1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2022 Intel Corporation. 3 * All rights reserved. 4 */ 5 6 #include "bdev_raid.h" 7 8 #include "spdk/env.h" 9 #include "spdk/thread.h" 10 #include "spdk/string.h" 11 #include "spdk/util.h" 12 #include "spdk/likely.h" 13 #include "spdk/log.h" 14 #include "spdk/xor.h" 15 16 /* Maximum concurrent full stripe writes per io channel */ 17 #define RAID5F_MAX_STRIPES 32 18 19 struct chunk { 20 /* Corresponds to base_bdev index */ 21 uint8_t index; 22 23 /* Array of iovecs */ 24 struct iovec *iovs; 25 26 /* Number of used iovecs */ 27 int iovcnt; 28 29 /* Total number of available iovecs in the array */ 30 int iovcnt_max; 31 32 /* Pointer to buffer with I/O metadata */ 33 void *md_buf; 34 35 /* Shallow copy of IO request parameters */ 36 struct spdk_bdev_ext_io_opts ext_opts; 37 }; 38 39 struct stripe_request { 40 struct raid5f_io_channel *r5ch; 41 42 /* The associated raid_bdev_io */ 43 struct raid_bdev_io *raid_io; 44 45 /* The stripe's index in the raid array. */ 46 uint64_t stripe_index; 47 48 /* The stripe's parity chunk */ 49 struct chunk *parity_chunk; 50 51 /* Buffer for stripe parity */ 52 void *parity_buf; 53 54 /* Buffer for stripe io metadata parity */ 55 void *parity_md_buf; 56 57 TAILQ_ENTRY(stripe_request) link; 58 59 /* Array of chunks corresponding to base_bdevs */ 60 struct chunk chunks[0]; 61 }; 62 63 struct raid5f_info { 64 /* The parent raid bdev */ 65 struct raid_bdev *raid_bdev; 66 67 /* Number of data blocks in a stripe (without parity) */ 68 uint64_t stripe_blocks; 69 70 /* Number of stripes on this array */ 71 uint64_t total_stripes; 72 73 /* Alignment for buffer allocation */ 74 size_t buf_alignment; 75 }; 76 77 struct raid5f_io_channel { 78 /* All available stripe requests on this channel */ 79 TAILQ_HEAD(, stripe_request) free_stripe_requests; 80 81 /* Array of iovec iterators for each data chunk */ 82 struct iov_iter { 83 struct iovec *iovs; 84 int iovcnt; 85 int index; 86 size_t offset; 87 } *chunk_iov_iters; 88 89 /* Array of source buffer pointers for parity calculation */ 90 void **chunk_xor_buffers; 91 92 /* Array of source buffer pointers for parity calculation of io metadata */ 93 void **chunk_xor_md_buffers; 94 95 /* Bounce buffers for parity calculation in case of unaligned source buffers */ 96 struct iovec *chunk_xor_bounce_buffers; 97 }; 98 99 #define __CHUNK_IN_RANGE(req, c) \ 100 c < req->chunks + raid5f_ch_to_r5f_info(req->r5ch)->raid_bdev->num_base_bdevs 101 102 #define FOR_EACH_CHUNK_FROM(req, c, from) \ 103 for (c = from; __CHUNK_IN_RANGE(req, c); c++) 104 105 #define FOR_EACH_CHUNK(req, c) \ 106 FOR_EACH_CHUNK_FROM(req, c, req->chunks) 107 108 #define __NEXT_DATA_CHUNK(req, c) \ 109 c == req->parity_chunk ? c+1 : c 110 111 #define FOR_EACH_DATA_CHUNK(req, c) \ 112 for (c = __NEXT_DATA_CHUNK(req, req->chunks); __CHUNK_IN_RANGE(req, c); \ 113 c = __NEXT_DATA_CHUNK(req, c+1)) 114 115 static inline struct raid5f_info * 116 raid5f_ch_to_r5f_info(struct raid5f_io_channel *r5ch) 117 { 118 return spdk_io_channel_get_io_device(spdk_io_channel_from_ctx(r5ch)); 119 } 120 121 static inline struct stripe_request * 122 raid5f_chunk_stripe_req(struct chunk *chunk) 123 { 124 return SPDK_CONTAINEROF((chunk - chunk->index), struct stripe_request, chunks); 125 } 126 127 static inline uint8_t 128 raid5f_stripe_data_chunks_num(const struct raid_bdev *raid_bdev) 129 { 130 return raid_bdev->min_base_bdevs_operational; 131 } 132 133 static inline uint8_t 134 raid5f_stripe_parity_chunk_index(const struct raid_bdev *raid_bdev, uint64_t stripe_index) 135 { 136 return raid5f_stripe_data_chunks_num(raid_bdev) - stripe_index % raid_bdev->num_base_bdevs; 137 } 138 139 static inline void 140 raid5f_stripe_request_release(struct stripe_request *stripe_req) 141 { 142 TAILQ_INSERT_HEAD(&stripe_req->r5ch->free_stripe_requests, stripe_req, link); 143 } 144 145 static int 146 raid5f_xor_stripe(struct stripe_request *stripe_req) 147 { 148 struct raid_bdev_io *raid_io = stripe_req->raid_io; 149 struct raid5f_io_channel *r5ch = stripe_req->r5ch; 150 struct raid_bdev *raid_bdev = raid_io->raid_bdev; 151 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); 152 size_t remaining = raid_bdev->strip_size << raid_bdev->blocklen_shift; 153 uint8_t n_src = raid5f_stripe_data_chunks_num(raid_bdev); 154 void *dest = stripe_req->parity_buf; 155 size_t alignment_mask = spdk_xor_get_optimal_alignment() - 1; 156 void *raid_md = spdk_bdev_io_get_md_buf(bdev_io); 157 uint32_t raid_md_size = spdk_bdev_get_md_size(&raid_bdev->bdev); 158 struct chunk *chunk; 159 int ret; 160 uint8_t c; 161 162 c = 0; 163 FOR_EACH_DATA_CHUNK(stripe_req, chunk) { 164 struct iov_iter *iov_iter = &r5ch->chunk_iov_iters[c]; 165 bool aligned = true; 166 int i; 167 168 for (i = 0; i < chunk->iovcnt; i++) { 169 if (((uintptr_t)chunk->iovs[i].iov_base & alignment_mask) || 170 (chunk->iovs[i].iov_len & alignment_mask)) { 171 aligned = false; 172 break; 173 } 174 } 175 176 if (aligned) { 177 iov_iter->iovs = chunk->iovs; 178 iov_iter->iovcnt = chunk->iovcnt; 179 } else { 180 iov_iter->iovs = &r5ch->chunk_xor_bounce_buffers[c]; 181 iov_iter->iovcnt = 1; 182 spdk_iovcpy(chunk->iovs, chunk->iovcnt, iov_iter->iovs, iov_iter->iovcnt); 183 } 184 185 iov_iter->index = 0; 186 iov_iter->offset = 0; 187 188 c++; 189 } 190 191 while (remaining > 0) { 192 size_t len = remaining; 193 uint8_t i; 194 195 for (i = 0; i < n_src; i++) { 196 struct iov_iter *iov_iter = &r5ch->chunk_iov_iters[i]; 197 struct iovec *iov = &iov_iter->iovs[iov_iter->index]; 198 199 len = spdk_min(len, iov->iov_len - iov_iter->offset); 200 r5ch->chunk_xor_buffers[i] = iov->iov_base + iov_iter->offset; 201 } 202 203 assert(len > 0); 204 205 ret = spdk_xor_gen(dest, r5ch->chunk_xor_buffers, n_src, len); 206 if (spdk_unlikely(ret)) { 207 SPDK_ERRLOG("stripe xor failed\n"); 208 return ret; 209 } 210 211 for (i = 0; i < n_src; i++) { 212 struct iov_iter *iov_iter = &r5ch->chunk_iov_iters[i]; 213 struct iovec *iov = &iov_iter->iovs[iov_iter->index]; 214 215 iov_iter->offset += len; 216 if (iov_iter->offset == iov->iov_len) { 217 iov_iter->offset = 0; 218 iov_iter->index++; 219 } 220 } 221 dest += len; 222 223 remaining -= len; 224 } 225 226 if (raid_md != NULL) { 227 uint64_t len = raid_bdev->strip_size * raid_md_size; 228 c = 0; 229 FOR_EACH_DATA_CHUNK(stripe_req, chunk) { 230 r5ch->chunk_xor_md_buffers[c] = chunk->md_buf; 231 c++; 232 } 233 ret = spdk_xor_gen(stripe_req->parity_md_buf, r5ch->chunk_xor_md_buffers, n_src, len); 234 if (spdk_unlikely(ret)) { 235 SPDK_ERRLOG("stripe io metadata xor failed\n"); 236 return ret; 237 } 238 } 239 240 return 0; 241 } 242 243 static void 244 raid5f_chunk_write_complete(struct chunk *chunk, enum spdk_bdev_io_status status) 245 { 246 struct stripe_request *stripe_req = raid5f_chunk_stripe_req(chunk); 247 248 if (raid_bdev_io_complete_part(stripe_req->raid_io, 1, status)) { 249 raid5f_stripe_request_release(stripe_req); 250 } 251 } 252 253 static void 254 raid5f_chunk_write_complete_bdev_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 255 { 256 struct chunk *chunk = cb_arg; 257 258 spdk_bdev_free_io(bdev_io); 259 260 raid5f_chunk_write_complete(chunk, success ? SPDK_BDEV_IO_STATUS_SUCCESS : 261 SPDK_BDEV_IO_STATUS_FAILED); 262 } 263 264 static void raid5f_stripe_request_submit_chunks(struct stripe_request *stripe_req); 265 266 static void 267 raid5f_chunk_write_retry(void *_raid_io) 268 { 269 struct raid_bdev_io *raid_io = _raid_io; 270 struct stripe_request *stripe_req = raid_io->module_private; 271 272 raid5f_stripe_request_submit_chunks(stripe_req); 273 } 274 275 static inline void 276 copy_ext_io_opts(struct spdk_bdev_ext_io_opts *dst, struct spdk_bdev_ext_io_opts *src) 277 { 278 memset(dst, 0, sizeof(*dst)); 279 memcpy(dst, src, src->size); 280 dst->size = sizeof(*dst); 281 } 282 283 static int 284 raid5f_chunk_write(struct chunk *chunk) 285 { 286 struct stripe_request *stripe_req = raid5f_chunk_stripe_req(chunk); 287 struct raid_bdev_io *raid_io = stripe_req->raid_io; 288 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); 289 struct raid_bdev *raid_bdev = raid_io->raid_bdev; 290 struct raid_base_bdev_info *base_info = &raid_bdev->base_bdev_info[chunk->index]; 291 struct spdk_io_channel *base_ch = raid_io->raid_ch->base_channel[chunk->index]; 292 uint64_t base_offset_blocks = (stripe_req->stripe_index << raid_bdev->strip_size_shift); 293 int ret; 294 295 if (bdev_io->u.bdev.ext_opts != NULL) { 296 copy_ext_io_opts(&chunk->ext_opts, bdev_io->u.bdev.ext_opts); 297 chunk->ext_opts.metadata = chunk->md_buf; 298 299 ret = spdk_bdev_writev_blocks_ext(base_info->desc, base_ch, chunk->iovs, chunk->iovcnt, 300 base_offset_blocks, raid_bdev->strip_size, raid5f_chunk_write_complete_bdev_io, 301 chunk, &chunk->ext_opts); 302 } else { 303 ret = spdk_bdev_writev_blocks_with_md(base_info->desc, base_ch, chunk->iovs, chunk->iovcnt, 304 chunk->md_buf, base_offset_blocks, raid_bdev->strip_size, 305 raid5f_chunk_write_complete_bdev_io, chunk); 306 } 307 308 if (spdk_unlikely(ret)) { 309 if (ret == -ENOMEM) { 310 raid_bdev_queue_io_wait(raid_io, base_info->bdev, base_ch, 311 raid5f_chunk_write_retry); 312 } else { 313 /* 314 * Implicitly complete any I/Os not yet submitted as FAILED. If completing 315 * these means there are no more to complete for the stripe request, we can 316 * release the stripe request as well. 317 */ 318 uint64_t base_bdev_io_not_submitted = raid_bdev->num_base_bdevs - 319 raid_io->base_bdev_io_submitted; 320 321 if (raid_bdev_io_complete_part(stripe_req->raid_io, base_bdev_io_not_submitted, 322 SPDK_BDEV_IO_STATUS_FAILED)) { 323 raid5f_stripe_request_release(stripe_req); 324 } 325 } 326 } 327 328 return ret; 329 } 330 331 static int 332 raid5f_stripe_request_map_iovecs(struct stripe_request *stripe_req) 333 { 334 struct raid_bdev *raid_bdev = stripe_req->raid_io->raid_bdev; 335 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(stripe_req->raid_io); 336 const struct iovec *raid_io_iovs = bdev_io->u.bdev.iovs; 337 int raid_io_iovcnt = bdev_io->u.bdev.iovcnt; 338 void *raid_io_md = spdk_bdev_io_get_md_buf(bdev_io); 339 uint32_t raid_io_md_size = spdk_bdev_get_md_size(&raid_bdev->bdev); 340 struct chunk *chunk; 341 int raid_io_iov_idx = 0; 342 size_t raid_io_offset = 0; 343 size_t raid_io_iov_offset = 0; 344 int i; 345 346 FOR_EACH_DATA_CHUNK(stripe_req, chunk) { 347 int chunk_iovcnt = 0; 348 uint64_t len = raid_bdev->strip_size << raid_bdev->blocklen_shift; 349 size_t off = raid_io_iov_offset; 350 351 for (i = raid_io_iov_idx; i < raid_io_iovcnt; i++) { 352 chunk_iovcnt++; 353 off += raid_io_iovs[i].iov_len; 354 if (off >= raid_io_offset + len) { 355 break; 356 } 357 } 358 359 assert(raid_io_iov_idx + chunk_iovcnt <= raid_io_iovcnt); 360 361 if (chunk_iovcnt > chunk->iovcnt_max) { 362 struct iovec *iovs = chunk->iovs; 363 364 iovs = realloc(iovs, chunk_iovcnt * sizeof(*iovs)); 365 if (!iovs) { 366 return -ENOMEM; 367 } 368 chunk->iovs = iovs; 369 chunk->iovcnt_max = chunk_iovcnt; 370 } 371 chunk->iovcnt = chunk_iovcnt; 372 373 if (raid_io_md) { 374 chunk->md_buf = raid_io_md + 375 (raid_io_offset >> raid_bdev->blocklen_shift) * raid_io_md_size; 376 } 377 378 for (i = 0; i < chunk_iovcnt; i++) { 379 struct iovec *chunk_iov = &chunk->iovs[i]; 380 const struct iovec *raid_io_iov = &raid_io_iovs[raid_io_iov_idx]; 381 size_t chunk_iov_offset = raid_io_offset - raid_io_iov_offset; 382 383 chunk_iov->iov_base = raid_io_iov->iov_base + chunk_iov_offset; 384 chunk_iov->iov_len = spdk_min(len, raid_io_iov->iov_len - chunk_iov_offset); 385 raid_io_offset += chunk_iov->iov_len; 386 len -= chunk_iov->iov_len; 387 388 if (raid_io_offset >= raid_io_iov_offset + raid_io_iov->iov_len) { 389 raid_io_iov_idx++; 390 raid_io_iov_offset += raid_io_iov->iov_len; 391 } 392 } 393 394 if (spdk_unlikely(len > 0)) { 395 return -EINVAL; 396 } 397 } 398 399 stripe_req->parity_chunk->iovs[0].iov_base = stripe_req->parity_buf; 400 stripe_req->parity_chunk->iovs[0].iov_len = raid_bdev->strip_size << 401 raid_bdev->blocklen_shift; 402 stripe_req->parity_chunk->md_buf = stripe_req->parity_md_buf; 403 stripe_req->parity_chunk->iovcnt = 1; 404 405 return 0; 406 } 407 408 static void 409 raid5f_stripe_request_submit_chunks(struct stripe_request *stripe_req) 410 { 411 struct raid_bdev_io *raid_io = stripe_req->raid_io; 412 struct chunk *start = &stripe_req->chunks[raid_io->base_bdev_io_submitted]; 413 struct chunk *chunk; 414 415 FOR_EACH_CHUNK_FROM(stripe_req, chunk, start) { 416 if (spdk_unlikely(raid5f_chunk_write(chunk) != 0)) { 417 break; 418 } 419 raid_io->base_bdev_io_submitted++; 420 } 421 } 422 423 static void 424 raid5f_submit_stripe_request(struct stripe_request *stripe_req) 425 { 426 if (spdk_unlikely(raid5f_xor_stripe(stripe_req) != 0)) { 427 raid_bdev_io_complete(stripe_req->raid_io, SPDK_BDEV_IO_STATUS_FAILED); 428 return; 429 } 430 431 raid5f_stripe_request_submit_chunks(stripe_req); 432 } 433 434 static int 435 raid5f_submit_write_request(struct raid_bdev_io *raid_io, uint64_t stripe_index) 436 { 437 struct raid_bdev *raid_bdev = raid_io->raid_bdev; 438 struct raid5f_io_channel *r5ch = spdk_io_channel_get_ctx(raid_io->raid_ch->module_channel); 439 struct stripe_request *stripe_req; 440 int ret; 441 442 stripe_req = TAILQ_FIRST(&r5ch->free_stripe_requests); 443 if (!stripe_req) { 444 return -ENOMEM; 445 } 446 447 stripe_req->stripe_index = stripe_index; 448 stripe_req->parity_chunk = stripe_req->chunks + raid5f_stripe_parity_chunk_index(raid_bdev, 449 stripe_req->stripe_index); 450 stripe_req->raid_io = raid_io; 451 452 ret = raid5f_stripe_request_map_iovecs(stripe_req); 453 if (spdk_unlikely(ret)) { 454 return ret; 455 } 456 457 TAILQ_REMOVE(&r5ch->free_stripe_requests, stripe_req, link); 458 459 raid_io->module_private = stripe_req; 460 raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs; 461 462 raid5f_submit_stripe_request(stripe_req); 463 464 return 0; 465 } 466 467 static void 468 raid5f_chunk_read_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 469 { 470 struct raid_bdev_io *raid_io = cb_arg; 471 472 spdk_bdev_free_io(bdev_io); 473 474 raid_bdev_io_complete(raid_io, success ? SPDK_BDEV_IO_STATUS_SUCCESS : 475 SPDK_BDEV_IO_STATUS_FAILED); 476 } 477 478 static void raid5f_submit_rw_request(struct raid_bdev_io *raid_io); 479 480 static void 481 _raid5f_submit_rw_request(void *_raid_io) 482 { 483 struct raid_bdev_io *raid_io = _raid_io; 484 485 raid5f_submit_rw_request(raid_io); 486 } 487 488 static int 489 raid5f_submit_read_request(struct raid_bdev_io *raid_io, uint64_t stripe_index, 490 uint64_t stripe_offset) 491 { 492 struct raid_bdev *raid_bdev = raid_io->raid_bdev; 493 uint8_t chunk_data_idx = stripe_offset >> raid_bdev->strip_size_shift; 494 uint8_t p_idx = raid5f_stripe_parity_chunk_index(raid_bdev, stripe_index); 495 uint8_t chunk_idx = chunk_data_idx < p_idx ? chunk_data_idx : chunk_data_idx + 1; 496 struct raid_base_bdev_info *base_info = &raid_bdev->base_bdev_info[chunk_idx]; 497 struct spdk_io_channel *base_ch = raid_io->raid_ch->base_channel[chunk_idx]; 498 uint64_t chunk_offset = stripe_offset - (chunk_data_idx << raid_bdev->strip_size_shift); 499 uint64_t base_offset_blocks = (stripe_index << raid_bdev->strip_size_shift) + chunk_offset; 500 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); 501 int ret; 502 503 if (bdev_io->u.bdev.ext_opts != NULL) { 504 ret = spdk_bdev_readv_blocks_ext(base_info->desc, base_ch, bdev_io->u.bdev.iovs, 505 bdev_io->u.bdev.iovcnt, 506 base_offset_blocks, bdev_io->u.bdev.num_blocks, raid5f_chunk_read_complete, raid_io, 507 bdev_io->u.bdev.ext_opts); 508 } else { 509 ret = spdk_bdev_readv_blocks_with_md(base_info->desc, base_ch, 510 bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, 511 bdev_io->u.bdev.md_buf, 512 base_offset_blocks, bdev_io->u.bdev.num_blocks, 513 raid5f_chunk_read_complete, raid_io); 514 } 515 516 if (spdk_unlikely(ret == -ENOMEM)) { 517 raid_bdev_queue_io_wait(raid_io, base_info->bdev, base_ch, 518 _raid5f_submit_rw_request); 519 return 0; 520 } 521 522 return ret; 523 } 524 525 static void 526 raid5f_submit_rw_request(struct raid_bdev_io *raid_io) 527 { 528 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); 529 struct raid_bdev *raid_bdev = raid_io->raid_bdev; 530 struct raid5f_info *r5f_info = raid_bdev->module_private; 531 uint64_t offset_blocks = bdev_io->u.bdev.offset_blocks; 532 uint64_t stripe_index = offset_blocks / r5f_info->stripe_blocks; 533 uint64_t stripe_offset = offset_blocks % r5f_info->stripe_blocks; 534 int ret; 535 536 switch (bdev_io->type) { 537 case SPDK_BDEV_IO_TYPE_READ: 538 assert(bdev_io->u.bdev.num_blocks <= raid_bdev->strip_size); 539 ret = raid5f_submit_read_request(raid_io, stripe_index, stripe_offset); 540 break; 541 case SPDK_BDEV_IO_TYPE_WRITE: 542 assert(stripe_offset == 0); 543 assert(bdev_io->u.bdev.num_blocks == r5f_info->stripe_blocks); 544 ret = raid5f_submit_write_request(raid_io, stripe_index); 545 break; 546 default: 547 ret = -EINVAL; 548 break; 549 } 550 551 if (spdk_unlikely(ret)) { 552 raid_bdev_io_complete(raid_io, ret == -ENOMEM ? SPDK_BDEV_IO_STATUS_NOMEM : 553 SPDK_BDEV_IO_STATUS_FAILED); 554 } 555 } 556 557 static void 558 raid5f_stripe_request_free(struct stripe_request *stripe_req) 559 { 560 struct chunk *chunk; 561 562 FOR_EACH_CHUNK(stripe_req, chunk) { 563 free(chunk->iovs); 564 } 565 566 spdk_dma_free(stripe_req->parity_buf); 567 spdk_dma_free(stripe_req->parity_md_buf); 568 569 free(stripe_req); 570 } 571 572 static struct stripe_request * 573 raid5f_stripe_request_alloc(struct raid5f_io_channel *r5ch) 574 { 575 struct raid5f_info *r5f_info = raid5f_ch_to_r5f_info(r5ch); 576 struct raid_bdev *raid_bdev = r5f_info->raid_bdev; 577 uint32_t raid_io_md_size = spdk_bdev_get_md_size(&raid_bdev->bdev); 578 struct stripe_request *stripe_req; 579 struct chunk *chunk; 580 581 stripe_req = calloc(1, sizeof(*stripe_req) + 582 sizeof(struct chunk) * raid_bdev->num_base_bdevs); 583 if (!stripe_req) { 584 return NULL; 585 } 586 587 stripe_req->r5ch = r5ch; 588 589 FOR_EACH_CHUNK(stripe_req, chunk) { 590 chunk->index = chunk - stripe_req->chunks; 591 chunk->iovcnt_max = 4; 592 chunk->iovs = calloc(chunk->iovcnt_max, sizeof(chunk->iovs[0])); 593 if (!chunk->iovs) { 594 goto err; 595 } 596 } 597 598 stripe_req->parity_buf = spdk_dma_malloc(raid_bdev->strip_size << raid_bdev->blocklen_shift, 599 r5f_info->buf_alignment, NULL); 600 if (!stripe_req->parity_buf) { 601 goto err; 602 } 603 604 if (raid_io_md_size != 0) { 605 stripe_req->parity_md_buf = spdk_dma_malloc(raid_bdev->strip_size * raid_io_md_size, 606 r5f_info->buf_alignment, NULL); 607 if (!stripe_req->parity_md_buf) { 608 goto err; 609 } 610 } 611 612 return stripe_req; 613 err: 614 raid5f_stripe_request_free(stripe_req); 615 return NULL; 616 } 617 618 static void 619 raid5f_ioch_destroy(void *io_device, void *ctx_buf) 620 { 621 struct raid5f_io_channel *r5ch = ctx_buf; 622 struct raid5f_info *r5f_info = io_device; 623 struct raid_bdev *raid_bdev = r5f_info->raid_bdev; 624 struct stripe_request *stripe_req; 625 int i; 626 627 while ((stripe_req = TAILQ_FIRST(&r5ch->free_stripe_requests))) { 628 TAILQ_REMOVE(&r5ch->free_stripe_requests, stripe_req, link); 629 raid5f_stripe_request_free(stripe_req); 630 } 631 632 if (r5ch->chunk_xor_bounce_buffers) { 633 for (i = 0; i < raid5f_stripe_data_chunks_num(raid_bdev); i++) { 634 free(r5ch->chunk_xor_bounce_buffers[i].iov_base); 635 } 636 free(r5ch->chunk_xor_bounce_buffers); 637 } 638 639 free(r5ch->chunk_xor_buffers); 640 free(r5ch->chunk_xor_md_buffers); 641 free(r5ch->chunk_iov_iters); 642 } 643 644 static int 645 raid5f_ioch_create(void *io_device, void *ctx_buf) 646 { 647 struct raid5f_io_channel *r5ch = ctx_buf; 648 struct raid5f_info *r5f_info = io_device; 649 struct raid_bdev *raid_bdev = r5f_info->raid_bdev; 650 size_t chunk_len = raid_bdev->strip_size << raid_bdev->blocklen_shift; 651 int status = 0; 652 int i; 653 654 TAILQ_INIT(&r5ch->free_stripe_requests); 655 656 for (i = 0; i < RAID5F_MAX_STRIPES; i++) { 657 struct stripe_request *stripe_req; 658 659 stripe_req = raid5f_stripe_request_alloc(r5ch); 660 if (!stripe_req) { 661 status = -ENOMEM; 662 goto out; 663 } 664 665 TAILQ_INSERT_HEAD(&r5ch->free_stripe_requests, stripe_req, link); 666 } 667 668 r5ch->chunk_iov_iters = calloc(raid5f_stripe_data_chunks_num(raid_bdev), 669 sizeof(r5ch->chunk_iov_iters[0])); 670 if (!r5ch->chunk_iov_iters) { 671 status = -ENOMEM; 672 goto out; 673 } 674 675 r5ch->chunk_xor_buffers = calloc(raid5f_stripe_data_chunks_num(raid_bdev), 676 sizeof(r5ch->chunk_xor_buffers[0])); 677 if (!r5ch->chunk_xor_buffers) { 678 status = -ENOMEM; 679 goto out; 680 } 681 682 r5ch->chunk_xor_md_buffers = calloc(raid5f_stripe_data_chunks_num(raid_bdev), 683 sizeof(r5ch->chunk_xor_md_buffers[0])); 684 if (!r5ch->chunk_xor_md_buffers) { 685 status = -ENOMEM; 686 goto out; 687 } 688 689 r5ch->chunk_xor_bounce_buffers = calloc(raid5f_stripe_data_chunks_num(raid_bdev), 690 sizeof(r5ch->chunk_xor_bounce_buffers[0])); 691 if (!r5ch->chunk_xor_bounce_buffers) { 692 status = -ENOMEM; 693 goto out; 694 } 695 696 for (i = 0; i < raid5f_stripe_data_chunks_num(raid_bdev); i++) { 697 status = posix_memalign(&r5ch->chunk_xor_bounce_buffers[i].iov_base, 698 spdk_xor_get_optimal_alignment(), chunk_len); 699 if (status) { 700 goto out; 701 } 702 r5ch->chunk_xor_bounce_buffers[i].iov_len = chunk_len; 703 } 704 out: 705 if (status) { 706 SPDK_ERRLOG("Failed to initialize io channel\n"); 707 raid5f_ioch_destroy(r5f_info, r5ch); 708 } 709 return status; 710 } 711 712 static int 713 raid5f_start(struct raid_bdev *raid_bdev) 714 { 715 uint64_t min_blockcnt = UINT64_MAX; 716 struct raid_base_bdev_info *base_info; 717 struct raid5f_info *r5f_info; 718 size_t alignment; 719 720 r5f_info = calloc(1, sizeof(*r5f_info)); 721 if (!r5f_info) { 722 SPDK_ERRLOG("Failed to allocate r5f_info\n"); 723 return -ENOMEM; 724 } 725 r5f_info->raid_bdev = raid_bdev; 726 727 alignment = spdk_xor_get_optimal_alignment(); 728 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 729 min_blockcnt = spdk_min(min_blockcnt, base_info->bdev->blockcnt); 730 alignment = spdk_max(alignment, spdk_bdev_get_buf_align(base_info->bdev)); 731 } 732 733 r5f_info->total_stripes = min_blockcnt / raid_bdev->strip_size; 734 r5f_info->stripe_blocks = raid_bdev->strip_size * raid5f_stripe_data_chunks_num(raid_bdev); 735 r5f_info->buf_alignment = alignment; 736 737 raid_bdev->bdev.blockcnt = r5f_info->stripe_blocks * r5f_info->total_stripes; 738 raid_bdev->bdev.optimal_io_boundary = raid_bdev->strip_size; 739 raid_bdev->bdev.split_on_optimal_io_boundary = true; 740 raid_bdev->bdev.write_unit_size = r5f_info->stripe_blocks; 741 raid_bdev->bdev.split_on_write_unit = true; 742 743 raid_bdev->module_private = r5f_info; 744 745 spdk_io_device_register(r5f_info, raid5f_ioch_create, raid5f_ioch_destroy, 746 sizeof(struct raid5f_io_channel), NULL); 747 748 return 0; 749 } 750 751 static void 752 raid5f_io_device_unregister_done(void *io_device) 753 { 754 struct raid5f_info *r5f_info = io_device; 755 756 raid_bdev_module_stop_done(r5f_info->raid_bdev); 757 758 free(r5f_info); 759 } 760 761 static bool 762 raid5f_stop(struct raid_bdev *raid_bdev) 763 { 764 struct raid5f_info *r5f_info = raid_bdev->module_private; 765 766 spdk_io_device_unregister(r5f_info, raid5f_io_device_unregister_done); 767 768 return false; 769 } 770 771 static struct spdk_io_channel * 772 raid5f_get_io_channel(struct raid_bdev *raid_bdev) 773 { 774 struct raid5f_info *r5f_info = raid_bdev->module_private; 775 776 return spdk_get_io_channel(r5f_info); 777 } 778 779 static struct raid_bdev_module g_raid5f_module = { 780 .level = RAID5F, 781 .base_bdevs_min = 3, 782 .base_bdevs_constraint = {CONSTRAINT_MAX_BASE_BDEVS_REMOVED, 1}, 783 .start = raid5f_start, 784 .stop = raid5f_stop, 785 .submit_rw_request = raid5f_submit_rw_request, 786 .get_io_channel = raid5f_get_io_channel, 787 }; 788 RAID_MODULE_REGISTER(&g_raid5f_module) 789 790 SPDK_LOG_REGISTER_COMPONENT(bdev_raid5f) 791