1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2022 Intel Corporation. 3 * All rights reserved. 4 */ 5 6 #include "bdev_raid.h" 7 8 #include "spdk/env.h" 9 #include "spdk/thread.h" 10 #include "spdk/string.h" 11 #include "spdk/util.h" 12 #include "spdk/likely.h" 13 #include "spdk/log.h" 14 #include "spdk/accel.h" 15 16 /* Maximum concurrent full stripe writes per io channel */ 17 #define RAID5F_MAX_STRIPES 32 18 19 struct chunk { 20 /* Corresponds to base_bdev index */ 21 uint8_t index; 22 23 /* Array of iovecs */ 24 struct iovec *iovs; 25 26 /* Number of used iovecs */ 27 int iovcnt; 28 29 /* Total number of available iovecs in the array */ 30 int iovcnt_max; 31 32 /* Pointer to buffer with I/O metadata */ 33 void *md_buf; 34 35 /* Shallow copy of IO request parameters */ 36 struct spdk_bdev_ext_io_opts ext_opts; 37 }; 38 39 struct stripe_request { 40 struct raid5f_io_channel *r5ch; 41 42 /* The associated raid_bdev_io */ 43 struct raid_bdev_io *raid_io; 44 45 /* The stripe's index in the raid array. */ 46 uint64_t stripe_index; 47 48 /* The stripe's parity chunk */ 49 struct chunk *parity_chunk; 50 51 /* Buffer for stripe parity */ 52 void *parity_buf; 53 54 /* Buffer for stripe io metadata parity */ 55 void *parity_md_buf; 56 57 /* Array of iovec iterators for each data chunk */ 58 struct iov_iter { 59 struct iovec *iovs; 60 int iovcnt; 61 int index; 62 size_t offset; 63 } *chunk_iov_iters; 64 65 /* Array of source buffer pointers for parity calculation */ 66 void **chunk_xor_buffers; 67 68 /* Array of source buffer pointers for parity calculation of io metadata */ 69 void **chunk_xor_md_buffers; 70 71 struct { 72 void *dest; 73 size_t len; 74 size_t remaining; 75 size_t remaining_md; 76 int status; 77 } xor; 78 79 TAILQ_ENTRY(stripe_request) link; 80 81 /* Array of chunks corresponding to base_bdevs */ 82 struct chunk chunks[0]; 83 }; 84 85 struct raid5f_info { 86 /* The parent raid bdev */ 87 struct raid_bdev *raid_bdev; 88 89 /* Number of data blocks in a stripe (without parity) */ 90 uint64_t stripe_blocks; 91 92 /* Number of stripes on this array */ 93 uint64_t total_stripes; 94 95 /* Alignment for buffer allocation */ 96 size_t buf_alignment; 97 }; 98 99 struct raid5f_io_channel { 100 /* All available stripe requests on this channel */ 101 TAILQ_HEAD(, stripe_request) free_stripe_requests; 102 103 /* accel_fw channel */ 104 struct spdk_io_channel *accel_ch; 105 106 /* For retrying xor if accel_ch runs out of resources */ 107 TAILQ_HEAD(, stripe_request) xor_retry_queue; 108 }; 109 110 #define __CHUNK_IN_RANGE(req, c) \ 111 c < req->chunks + raid5f_ch_to_r5f_info(req->r5ch)->raid_bdev->num_base_bdevs 112 113 #define FOR_EACH_CHUNK_FROM(req, c, from) \ 114 for (c = from; __CHUNK_IN_RANGE(req, c); c++) 115 116 #define FOR_EACH_CHUNK(req, c) \ 117 FOR_EACH_CHUNK_FROM(req, c, req->chunks) 118 119 #define __NEXT_DATA_CHUNK(req, c) \ 120 c == req->parity_chunk ? c+1 : c 121 122 #define FOR_EACH_DATA_CHUNK(req, c) \ 123 for (c = __NEXT_DATA_CHUNK(req, req->chunks); __CHUNK_IN_RANGE(req, c); \ 124 c = __NEXT_DATA_CHUNK(req, c+1)) 125 126 static inline struct raid5f_info * 127 raid5f_ch_to_r5f_info(struct raid5f_io_channel *r5ch) 128 { 129 return spdk_io_channel_get_io_device(spdk_io_channel_from_ctx(r5ch)); 130 } 131 132 static inline struct stripe_request * 133 raid5f_chunk_stripe_req(struct chunk *chunk) 134 { 135 return SPDK_CONTAINEROF((chunk - chunk->index), struct stripe_request, chunks); 136 } 137 138 static inline uint8_t 139 raid5f_stripe_data_chunks_num(const struct raid_bdev *raid_bdev) 140 { 141 return raid_bdev->min_base_bdevs_operational; 142 } 143 144 static inline uint8_t 145 raid5f_stripe_parity_chunk_index(const struct raid_bdev *raid_bdev, uint64_t stripe_index) 146 { 147 return raid5f_stripe_data_chunks_num(raid_bdev) - stripe_index % raid_bdev->num_base_bdevs; 148 } 149 150 static inline void 151 raid5f_stripe_request_release(struct stripe_request *stripe_req) 152 { 153 TAILQ_INSERT_HEAD(&stripe_req->r5ch->free_stripe_requests, stripe_req, link); 154 } 155 156 static void raid5f_stripe_request_submit_chunks(struct stripe_request *stripe_req); 157 static void raid5f_xor_stripe_retry(struct stripe_request *stripe_req); 158 159 static void 160 raid5f_xor_stripe_done(struct stripe_request *stripe_req) 161 { 162 if (stripe_req->xor.status != 0) { 163 SPDK_ERRLOG("stripe xor failed: %s\n", spdk_strerror(-stripe_req->xor.status)); 164 raid_bdev_io_complete(stripe_req->raid_io, SPDK_BDEV_IO_STATUS_FAILED); 165 } else { 166 raid5f_stripe_request_submit_chunks(stripe_req); 167 } 168 169 if (!TAILQ_EMPTY(&stripe_req->r5ch->xor_retry_queue)) { 170 stripe_req = TAILQ_FIRST(&stripe_req->r5ch->xor_retry_queue); 171 TAILQ_REMOVE(&stripe_req->r5ch->xor_retry_queue, stripe_req, link); 172 raid5f_xor_stripe_retry(stripe_req); 173 } 174 } 175 176 static void raid5f_xor_stripe_continue(struct stripe_request *stripe_req); 177 178 static void 179 _raid5f_xor_stripe_cb(struct stripe_request *stripe_req, int status) 180 { 181 if (status != 0) { 182 stripe_req->xor.status = status; 183 } 184 185 if (stripe_req->xor.remaining + stripe_req->xor.remaining_md == 0) { 186 raid5f_xor_stripe_done(stripe_req); 187 } 188 } 189 190 static void 191 raid5f_xor_stripe_cb(void *_stripe_req, int status) 192 { 193 struct stripe_request *stripe_req = _stripe_req; 194 size_t len = stripe_req->xor.len; 195 196 stripe_req->xor.remaining -= len; 197 198 if (stripe_req->xor.remaining > 0) { 199 struct raid_bdev_io *raid_io = stripe_req->raid_io; 200 struct raid_bdev *raid_bdev = raid_io->raid_bdev; 201 uint8_t n_src = raid5f_stripe_data_chunks_num(raid_bdev); 202 uint8_t i; 203 204 for (i = 0; i < n_src; i++) { 205 struct iov_iter *iov_iter = &stripe_req->chunk_iov_iters[i]; 206 struct iovec *iov = &iov_iter->iovs[iov_iter->index]; 207 208 iov_iter->offset += len; 209 if (iov_iter->offset == iov->iov_len) { 210 iov_iter->offset = 0; 211 iov_iter->index++; 212 } 213 } 214 215 stripe_req->xor.dest += len; 216 217 raid5f_xor_stripe_continue(stripe_req); 218 } 219 220 _raid5f_xor_stripe_cb(stripe_req, status); 221 } 222 223 static void 224 raid5f_xor_stripe_md_cb(void *_stripe_req, int status) 225 { 226 struct stripe_request *stripe_req = _stripe_req; 227 228 stripe_req->xor.remaining_md = 0; 229 230 _raid5f_xor_stripe_cb(stripe_req, status); 231 } 232 233 static void 234 raid5f_xor_stripe_continue(struct stripe_request *stripe_req) 235 { 236 struct raid_bdev_io *raid_io = stripe_req->raid_io; 237 struct raid_bdev *raid_bdev = raid_io->raid_bdev; 238 uint8_t n_src = raid5f_stripe_data_chunks_num(raid_bdev); 239 size_t len = stripe_req->xor.remaining; 240 uint8_t i; 241 int ret; 242 243 assert(stripe_req->xor.remaining > 0); 244 245 for (i = 0; i < n_src; i++) { 246 struct iov_iter *iov_iter = &stripe_req->chunk_iov_iters[i]; 247 struct iovec *iov = &iov_iter->iovs[iov_iter->index]; 248 249 len = spdk_min(len, iov->iov_len - iov_iter->offset); 250 stripe_req->chunk_xor_buffers[i] = iov->iov_base + iov_iter->offset; 251 } 252 253 assert(len > 0); 254 stripe_req->xor.len = len; 255 256 ret = spdk_accel_submit_xor(stripe_req->r5ch->accel_ch, stripe_req->xor.dest, 257 stripe_req->chunk_xor_buffers, n_src, len, 258 raid5f_xor_stripe_cb, stripe_req); 259 if (spdk_unlikely(ret)) { 260 if (ret == -ENOMEM) { 261 TAILQ_INSERT_HEAD(&stripe_req->r5ch->xor_retry_queue, stripe_req, link); 262 } else { 263 stripe_req->xor.status = ret; 264 raid5f_xor_stripe_done(stripe_req); 265 } 266 return; 267 } 268 } 269 270 static void 271 raid5f_xor_stripe(struct stripe_request *stripe_req) 272 { 273 struct raid_bdev_io *raid_io = stripe_req->raid_io; 274 struct raid_bdev *raid_bdev = raid_io->raid_bdev; 275 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); 276 void *raid_md = spdk_bdev_io_get_md_buf(bdev_io); 277 uint32_t raid_md_size = spdk_bdev_get_md_size(&raid_bdev->bdev); 278 struct chunk *chunk; 279 uint8_t c; 280 281 c = 0; 282 FOR_EACH_DATA_CHUNK(stripe_req, chunk) { 283 struct iov_iter *iov_iter = &stripe_req->chunk_iov_iters[c++]; 284 285 iov_iter->iovs = chunk->iovs; 286 iov_iter->iovcnt = chunk->iovcnt; 287 iov_iter->index = 0; 288 iov_iter->offset = 0; 289 } 290 291 stripe_req->xor.dest = stripe_req->parity_buf; 292 stripe_req->xor.remaining = raid_bdev->strip_size << raid_bdev->blocklen_shift; 293 stripe_req->xor.status = 0; 294 295 if (raid_md != NULL) { 296 uint8_t n_src = raid5f_stripe_data_chunks_num(raid_bdev); 297 uint64_t len = raid_bdev->strip_size * raid_md_size; 298 int ret; 299 300 stripe_req->xor.remaining_md = len; 301 302 c = 0; 303 FOR_EACH_DATA_CHUNK(stripe_req, chunk) { 304 stripe_req->chunk_xor_md_buffers[c] = chunk->md_buf; 305 c++; 306 } 307 308 ret = spdk_accel_submit_xor(stripe_req->r5ch->accel_ch, stripe_req->parity_md_buf, 309 stripe_req->chunk_xor_md_buffers, n_src, len, 310 raid5f_xor_stripe_md_cb, stripe_req); 311 if (spdk_unlikely(ret)) { 312 if (ret == -ENOMEM) { 313 TAILQ_INSERT_HEAD(&stripe_req->r5ch->xor_retry_queue, stripe_req, link); 314 } else { 315 stripe_req->xor.status = ret; 316 raid5f_xor_stripe_done(stripe_req); 317 } 318 return; 319 } 320 } 321 322 raid5f_xor_stripe_continue(stripe_req); 323 } 324 325 static void 326 raid5f_xor_stripe_retry(struct stripe_request *stripe_req) 327 { 328 if (stripe_req->xor.remaining_md) { 329 raid5f_xor_stripe(stripe_req); 330 } else { 331 raid5f_xor_stripe_continue(stripe_req); 332 } 333 } 334 335 static void 336 raid5f_chunk_write_complete(struct chunk *chunk, enum spdk_bdev_io_status status) 337 { 338 struct stripe_request *stripe_req = raid5f_chunk_stripe_req(chunk); 339 340 if (raid_bdev_io_complete_part(stripe_req->raid_io, 1, status)) { 341 raid5f_stripe_request_release(stripe_req); 342 } 343 } 344 345 static void 346 raid5f_chunk_write_complete_bdev_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 347 { 348 struct chunk *chunk = cb_arg; 349 350 spdk_bdev_free_io(bdev_io); 351 352 raid5f_chunk_write_complete(chunk, success ? SPDK_BDEV_IO_STATUS_SUCCESS : 353 SPDK_BDEV_IO_STATUS_FAILED); 354 } 355 356 static void 357 raid5f_chunk_write_retry(void *_raid_io) 358 { 359 struct raid_bdev_io *raid_io = _raid_io; 360 struct stripe_request *stripe_req = raid_io->module_private; 361 362 raid5f_stripe_request_submit_chunks(stripe_req); 363 } 364 365 static inline void 366 raid5f_init_ext_io_opts(struct spdk_bdev_io *bdev_io, struct spdk_bdev_ext_io_opts *opts) 367 { 368 memset(opts, 0, sizeof(*opts)); 369 opts->size = sizeof(*opts); 370 opts->memory_domain = bdev_io->u.bdev.memory_domain; 371 opts->memory_domain_ctx = bdev_io->u.bdev.memory_domain_ctx; 372 opts->metadata = bdev_io->u.bdev.md_buf; 373 } 374 375 static int 376 raid5f_chunk_write(struct chunk *chunk) 377 { 378 struct stripe_request *stripe_req = raid5f_chunk_stripe_req(chunk); 379 struct raid_bdev_io *raid_io = stripe_req->raid_io; 380 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); 381 struct raid_bdev *raid_bdev = raid_io->raid_bdev; 382 struct raid_base_bdev_info *base_info = &raid_bdev->base_bdev_info[chunk->index]; 383 struct spdk_io_channel *base_ch = raid_io->raid_ch->base_channel[chunk->index]; 384 uint64_t base_offset_blocks = (stripe_req->stripe_index << raid_bdev->strip_size_shift); 385 int ret; 386 387 raid5f_init_ext_io_opts(bdev_io, &chunk->ext_opts); 388 chunk->ext_opts.metadata = chunk->md_buf; 389 390 ret = spdk_bdev_writev_blocks_ext(base_info->desc, base_ch, chunk->iovs, chunk->iovcnt, 391 base_offset_blocks, raid_bdev->strip_size, raid5f_chunk_write_complete_bdev_io, 392 chunk, &chunk->ext_opts); 393 394 if (spdk_unlikely(ret)) { 395 if (ret == -ENOMEM) { 396 raid_bdev_queue_io_wait(raid_io, base_info->bdev, base_ch, 397 raid5f_chunk_write_retry); 398 } else { 399 /* 400 * Implicitly complete any I/Os not yet submitted as FAILED. If completing 401 * these means there are no more to complete for the stripe request, we can 402 * release the stripe request as well. 403 */ 404 uint64_t base_bdev_io_not_submitted = raid_bdev->num_base_bdevs - 405 raid_io->base_bdev_io_submitted; 406 407 if (raid_bdev_io_complete_part(stripe_req->raid_io, base_bdev_io_not_submitted, 408 SPDK_BDEV_IO_STATUS_FAILED)) { 409 raid5f_stripe_request_release(stripe_req); 410 } 411 } 412 } 413 414 return ret; 415 } 416 417 static int 418 raid5f_stripe_request_map_iovecs(struct stripe_request *stripe_req) 419 { 420 struct raid_bdev *raid_bdev = stripe_req->raid_io->raid_bdev; 421 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(stripe_req->raid_io); 422 const struct iovec *raid_io_iovs = bdev_io->u.bdev.iovs; 423 int raid_io_iovcnt = bdev_io->u.bdev.iovcnt; 424 void *raid_io_md = spdk_bdev_io_get_md_buf(bdev_io); 425 uint32_t raid_io_md_size = spdk_bdev_get_md_size(&raid_bdev->bdev); 426 struct chunk *chunk; 427 int raid_io_iov_idx = 0; 428 size_t raid_io_offset = 0; 429 size_t raid_io_iov_offset = 0; 430 int i; 431 432 FOR_EACH_DATA_CHUNK(stripe_req, chunk) { 433 int chunk_iovcnt = 0; 434 uint64_t len = raid_bdev->strip_size << raid_bdev->blocklen_shift; 435 size_t off = raid_io_iov_offset; 436 437 for (i = raid_io_iov_idx; i < raid_io_iovcnt; i++) { 438 chunk_iovcnt++; 439 off += raid_io_iovs[i].iov_len; 440 if (off >= raid_io_offset + len) { 441 break; 442 } 443 } 444 445 assert(raid_io_iov_idx + chunk_iovcnt <= raid_io_iovcnt); 446 447 if (chunk_iovcnt > chunk->iovcnt_max) { 448 struct iovec *iovs = chunk->iovs; 449 450 iovs = realloc(iovs, chunk_iovcnt * sizeof(*iovs)); 451 if (!iovs) { 452 return -ENOMEM; 453 } 454 chunk->iovs = iovs; 455 chunk->iovcnt_max = chunk_iovcnt; 456 } 457 chunk->iovcnt = chunk_iovcnt; 458 459 if (raid_io_md) { 460 chunk->md_buf = raid_io_md + 461 (raid_io_offset >> raid_bdev->blocklen_shift) * raid_io_md_size; 462 } 463 464 for (i = 0; i < chunk_iovcnt; i++) { 465 struct iovec *chunk_iov = &chunk->iovs[i]; 466 const struct iovec *raid_io_iov = &raid_io_iovs[raid_io_iov_idx]; 467 size_t chunk_iov_offset = raid_io_offset - raid_io_iov_offset; 468 469 chunk_iov->iov_base = raid_io_iov->iov_base + chunk_iov_offset; 470 chunk_iov->iov_len = spdk_min(len, raid_io_iov->iov_len - chunk_iov_offset); 471 raid_io_offset += chunk_iov->iov_len; 472 len -= chunk_iov->iov_len; 473 474 if (raid_io_offset >= raid_io_iov_offset + raid_io_iov->iov_len) { 475 raid_io_iov_idx++; 476 raid_io_iov_offset += raid_io_iov->iov_len; 477 } 478 } 479 480 if (spdk_unlikely(len > 0)) { 481 return -EINVAL; 482 } 483 } 484 485 stripe_req->parity_chunk->iovs[0].iov_base = stripe_req->parity_buf; 486 stripe_req->parity_chunk->iovs[0].iov_len = raid_bdev->strip_size << 487 raid_bdev->blocklen_shift; 488 stripe_req->parity_chunk->md_buf = stripe_req->parity_md_buf; 489 stripe_req->parity_chunk->iovcnt = 1; 490 491 return 0; 492 } 493 494 static void 495 raid5f_stripe_request_submit_chunks(struct stripe_request *stripe_req) 496 { 497 struct raid_bdev_io *raid_io = stripe_req->raid_io; 498 struct chunk *start = &stripe_req->chunks[raid_io->base_bdev_io_submitted]; 499 struct chunk *chunk; 500 501 FOR_EACH_CHUNK_FROM(stripe_req, chunk, start) { 502 if (spdk_unlikely(raid5f_chunk_write(chunk) != 0)) { 503 break; 504 } 505 raid_io->base_bdev_io_submitted++; 506 } 507 } 508 509 static int 510 raid5f_submit_write_request(struct raid_bdev_io *raid_io, uint64_t stripe_index) 511 { 512 struct raid_bdev *raid_bdev = raid_io->raid_bdev; 513 struct raid5f_io_channel *r5ch = spdk_io_channel_get_ctx(raid_io->raid_ch->module_channel); 514 struct stripe_request *stripe_req; 515 int ret; 516 517 stripe_req = TAILQ_FIRST(&r5ch->free_stripe_requests); 518 if (!stripe_req) { 519 return -ENOMEM; 520 } 521 522 stripe_req->stripe_index = stripe_index; 523 stripe_req->parity_chunk = stripe_req->chunks + raid5f_stripe_parity_chunk_index(raid_bdev, 524 stripe_req->stripe_index); 525 stripe_req->raid_io = raid_io; 526 527 ret = raid5f_stripe_request_map_iovecs(stripe_req); 528 if (spdk_unlikely(ret)) { 529 return ret; 530 } 531 532 TAILQ_REMOVE(&r5ch->free_stripe_requests, stripe_req, link); 533 534 raid_io->module_private = stripe_req; 535 raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs; 536 537 raid5f_xor_stripe(stripe_req); 538 539 return 0; 540 } 541 542 static void 543 raid5f_chunk_read_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 544 { 545 struct raid_bdev_io *raid_io = cb_arg; 546 547 spdk_bdev_free_io(bdev_io); 548 549 raid_bdev_io_complete(raid_io, success ? SPDK_BDEV_IO_STATUS_SUCCESS : 550 SPDK_BDEV_IO_STATUS_FAILED); 551 } 552 553 static void raid5f_submit_rw_request(struct raid_bdev_io *raid_io); 554 555 static void 556 _raid5f_submit_rw_request(void *_raid_io) 557 { 558 struct raid_bdev_io *raid_io = _raid_io; 559 560 raid5f_submit_rw_request(raid_io); 561 } 562 563 static int 564 raid5f_submit_read_request(struct raid_bdev_io *raid_io, uint64_t stripe_index, 565 uint64_t stripe_offset) 566 { 567 struct raid_bdev *raid_bdev = raid_io->raid_bdev; 568 uint8_t chunk_data_idx = stripe_offset >> raid_bdev->strip_size_shift; 569 uint8_t p_idx = raid5f_stripe_parity_chunk_index(raid_bdev, stripe_index); 570 uint8_t chunk_idx = chunk_data_idx < p_idx ? chunk_data_idx : chunk_data_idx + 1; 571 struct raid_base_bdev_info *base_info = &raid_bdev->base_bdev_info[chunk_idx]; 572 struct spdk_io_channel *base_ch = raid_io->raid_ch->base_channel[chunk_idx]; 573 uint64_t chunk_offset = stripe_offset - (chunk_data_idx << raid_bdev->strip_size_shift); 574 uint64_t base_offset_blocks = (stripe_index << raid_bdev->strip_size_shift) + chunk_offset; 575 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); 576 struct spdk_bdev_ext_io_opts io_opts; 577 int ret; 578 579 raid5f_init_ext_io_opts(bdev_io, &io_opts); 580 ret = spdk_bdev_readv_blocks_ext(base_info->desc, base_ch, bdev_io->u.bdev.iovs, 581 bdev_io->u.bdev.iovcnt, 582 base_offset_blocks, bdev_io->u.bdev.num_blocks, raid5f_chunk_read_complete, raid_io, 583 &io_opts); 584 585 if (spdk_unlikely(ret == -ENOMEM)) { 586 raid_bdev_queue_io_wait(raid_io, base_info->bdev, base_ch, 587 _raid5f_submit_rw_request); 588 return 0; 589 } 590 591 return ret; 592 } 593 594 static void 595 raid5f_submit_rw_request(struct raid_bdev_io *raid_io) 596 { 597 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); 598 struct raid_bdev *raid_bdev = raid_io->raid_bdev; 599 struct raid5f_info *r5f_info = raid_bdev->module_private; 600 uint64_t offset_blocks = bdev_io->u.bdev.offset_blocks; 601 uint64_t stripe_index = offset_blocks / r5f_info->stripe_blocks; 602 uint64_t stripe_offset = offset_blocks % r5f_info->stripe_blocks; 603 int ret; 604 605 switch (bdev_io->type) { 606 case SPDK_BDEV_IO_TYPE_READ: 607 assert(bdev_io->u.bdev.num_blocks <= raid_bdev->strip_size); 608 ret = raid5f_submit_read_request(raid_io, stripe_index, stripe_offset); 609 break; 610 case SPDK_BDEV_IO_TYPE_WRITE: 611 assert(stripe_offset == 0); 612 assert(bdev_io->u.bdev.num_blocks == r5f_info->stripe_blocks); 613 ret = raid5f_submit_write_request(raid_io, stripe_index); 614 break; 615 default: 616 ret = -EINVAL; 617 break; 618 } 619 620 if (spdk_unlikely(ret)) { 621 raid_bdev_io_complete(raid_io, ret == -ENOMEM ? SPDK_BDEV_IO_STATUS_NOMEM : 622 SPDK_BDEV_IO_STATUS_FAILED); 623 } 624 } 625 626 static void 627 raid5f_stripe_request_free(struct stripe_request *stripe_req) 628 { 629 struct chunk *chunk; 630 631 FOR_EACH_CHUNK(stripe_req, chunk) { 632 free(chunk->iovs); 633 } 634 635 spdk_dma_free(stripe_req->parity_buf); 636 spdk_dma_free(stripe_req->parity_md_buf); 637 638 free(stripe_req->chunk_xor_buffers); 639 free(stripe_req->chunk_xor_md_buffers); 640 free(stripe_req->chunk_iov_iters); 641 642 free(stripe_req); 643 } 644 645 static struct stripe_request * 646 raid5f_stripe_request_alloc(struct raid5f_io_channel *r5ch) 647 { 648 struct raid5f_info *r5f_info = raid5f_ch_to_r5f_info(r5ch); 649 struct raid_bdev *raid_bdev = r5f_info->raid_bdev; 650 uint32_t raid_io_md_size = spdk_bdev_get_md_size(&raid_bdev->bdev); 651 struct stripe_request *stripe_req; 652 struct chunk *chunk; 653 654 stripe_req = calloc(1, sizeof(*stripe_req) + 655 sizeof(struct chunk) * raid_bdev->num_base_bdevs); 656 if (!stripe_req) { 657 return NULL; 658 } 659 660 stripe_req->r5ch = r5ch; 661 662 FOR_EACH_CHUNK(stripe_req, chunk) { 663 chunk->index = chunk - stripe_req->chunks; 664 chunk->iovcnt_max = 4; 665 chunk->iovs = calloc(chunk->iovcnt_max, sizeof(chunk->iovs[0])); 666 if (!chunk->iovs) { 667 goto err; 668 } 669 } 670 671 stripe_req->parity_buf = spdk_dma_malloc(raid_bdev->strip_size << raid_bdev->blocklen_shift, 672 r5f_info->buf_alignment, NULL); 673 if (!stripe_req->parity_buf) { 674 goto err; 675 } 676 677 if (raid_io_md_size != 0) { 678 stripe_req->parity_md_buf = spdk_dma_malloc(raid_bdev->strip_size * raid_io_md_size, 679 r5f_info->buf_alignment, NULL); 680 if (!stripe_req->parity_md_buf) { 681 goto err; 682 } 683 } 684 685 stripe_req->chunk_iov_iters = calloc(raid5f_stripe_data_chunks_num(raid_bdev), 686 sizeof(stripe_req->chunk_iov_iters[0])); 687 if (!stripe_req->chunk_iov_iters) { 688 goto err; 689 } 690 691 stripe_req->chunk_xor_buffers = calloc(raid5f_stripe_data_chunks_num(raid_bdev), 692 sizeof(stripe_req->chunk_xor_buffers[0])); 693 if (!stripe_req->chunk_xor_buffers) { 694 goto err; 695 } 696 697 stripe_req->chunk_xor_md_buffers = calloc(raid5f_stripe_data_chunks_num(raid_bdev), 698 sizeof(stripe_req->chunk_xor_md_buffers[0])); 699 if (!stripe_req->chunk_xor_md_buffers) { 700 goto err; 701 } 702 703 return stripe_req; 704 err: 705 raid5f_stripe_request_free(stripe_req); 706 return NULL; 707 } 708 709 static void 710 raid5f_ioch_destroy(void *io_device, void *ctx_buf) 711 { 712 struct raid5f_io_channel *r5ch = ctx_buf; 713 struct stripe_request *stripe_req; 714 715 assert(TAILQ_EMPTY(&r5ch->xor_retry_queue)); 716 717 while ((stripe_req = TAILQ_FIRST(&r5ch->free_stripe_requests))) { 718 TAILQ_REMOVE(&r5ch->free_stripe_requests, stripe_req, link); 719 raid5f_stripe_request_free(stripe_req); 720 } 721 722 if (r5ch->accel_ch) { 723 spdk_put_io_channel(r5ch->accel_ch); 724 } 725 } 726 727 static int 728 raid5f_ioch_create(void *io_device, void *ctx_buf) 729 { 730 struct raid5f_io_channel *r5ch = ctx_buf; 731 struct raid5f_info *r5f_info = io_device; 732 int status = 0; 733 int i; 734 735 TAILQ_INIT(&r5ch->free_stripe_requests); 736 737 for (i = 0; i < RAID5F_MAX_STRIPES; i++) { 738 struct stripe_request *stripe_req; 739 740 stripe_req = raid5f_stripe_request_alloc(r5ch); 741 if (!stripe_req) { 742 status = -ENOMEM; 743 goto out; 744 } 745 746 TAILQ_INSERT_HEAD(&r5ch->free_stripe_requests, stripe_req, link); 747 } 748 749 r5ch->accel_ch = spdk_accel_get_io_channel(); 750 if (!r5ch->accel_ch) { 751 SPDK_ERRLOG("Failed to get accel framework's IO channel\n"); 752 goto out; 753 } 754 755 TAILQ_INIT(&r5ch->xor_retry_queue); 756 out: 757 if (status) { 758 SPDK_ERRLOG("Failed to initialize io channel\n"); 759 raid5f_ioch_destroy(r5f_info, r5ch); 760 } 761 return status; 762 } 763 764 static int 765 raid5f_start(struct raid_bdev *raid_bdev) 766 { 767 uint64_t min_blockcnt = UINT64_MAX; 768 struct raid_base_bdev_info *base_info; 769 struct raid5f_info *r5f_info; 770 size_t alignment = 0; 771 772 r5f_info = calloc(1, sizeof(*r5f_info)); 773 if (!r5f_info) { 774 SPDK_ERRLOG("Failed to allocate r5f_info\n"); 775 return -ENOMEM; 776 } 777 r5f_info->raid_bdev = raid_bdev; 778 779 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 780 min_blockcnt = spdk_min(min_blockcnt, base_info->bdev->blockcnt); 781 alignment = spdk_max(alignment, spdk_bdev_get_buf_align(base_info->bdev)); 782 } 783 784 r5f_info->total_stripes = min_blockcnt / raid_bdev->strip_size; 785 r5f_info->stripe_blocks = raid_bdev->strip_size * raid5f_stripe_data_chunks_num(raid_bdev); 786 r5f_info->buf_alignment = alignment; 787 788 raid_bdev->bdev.blockcnt = r5f_info->stripe_blocks * r5f_info->total_stripes; 789 raid_bdev->bdev.optimal_io_boundary = raid_bdev->strip_size; 790 raid_bdev->bdev.split_on_optimal_io_boundary = true; 791 raid_bdev->bdev.write_unit_size = r5f_info->stripe_blocks; 792 raid_bdev->bdev.split_on_write_unit = true; 793 794 raid_bdev->module_private = r5f_info; 795 796 spdk_io_device_register(r5f_info, raid5f_ioch_create, raid5f_ioch_destroy, 797 sizeof(struct raid5f_io_channel), NULL); 798 799 return 0; 800 } 801 802 static void 803 raid5f_io_device_unregister_done(void *io_device) 804 { 805 struct raid5f_info *r5f_info = io_device; 806 807 raid_bdev_module_stop_done(r5f_info->raid_bdev); 808 809 free(r5f_info); 810 } 811 812 static bool 813 raid5f_stop(struct raid_bdev *raid_bdev) 814 { 815 struct raid5f_info *r5f_info = raid_bdev->module_private; 816 817 spdk_io_device_unregister(r5f_info, raid5f_io_device_unregister_done); 818 819 return false; 820 } 821 822 static struct spdk_io_channel * 823 raid5f_get_io_channel(struct raid_bdev *raid_bdev) 824 { 825 struct raid5f_info *r5f_info = raid_bdev->module_private; 826 827 return spdk_get_io_channel(r5f_info); 828 } 829 830 static struct raid_bdev_module g_raid5f_module = { 831 .level = RAID5F, 832 .base_bdevs_min = 3, 833 .base_bdevs_constraint = {CONSTRAINT_MAX_BASE_BDEVS_REMOVED, 1}, 834 .start = raid5f_start, 835 .stop = raid5f_stop, 836 .submit_rw_request = raid5f_submit_rw_request, 837 .get_io_channel = raid5f_get_io_channel, 838 }; 839 RAID_MODULE_REGISTER(&g_raid5f_module) 840 841 SPDK_LOG_REGISTER_COMPONENT(bdev_raid5f) 842