1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2022 Intel Corporation. 3 * All rights reserved. 4 */ 5 6 #include "bdev_raid.h" 7 8 #include "spdk/env.h" 9 #include "spdk/thread.h" 10 #include "spdk/string.h" 11 #include "spdk/util.h" 12 #include "spdk/likely.h" 13 #include "spdk/log.h" 14 #include "spdk/accel.h" 15 16 /* Maximum concurrent full stripe writes per io channel */ 17 #define RAID5F_MAX_STRIPES 32 18 19 struct chunk { 20 /* Corresponds to base_bdev index */ 21 uint8_t index; 22 23 /* Array of iovecs */ 24 struct iovec *iovs; 25 26 /* Number of used iovecs */ 27 int iovcnt; 28 29 /* Total number of available iovecs in the array */ 30 int iovcnt_max; 31 32 /* Pointer to buffer with I/O metadata */ 33 void *md_buf; 34 35 /* Shallow copy of IO request parameters */ 36 struct spdk_bdev_ext_io_opts ext_opts; 37 }; 38 39 struct stripe_request; 40 typedef void (*stripe_req_xor_cb)(struct stripe_request *stripe_req, int status); 41 42 struct stripe_request { 43 enum stripe_request_type { 44 STRIPE_REQ_WRITE, 45 } type; 46 47 struct raid5f_io_channel *r5ch; 48 49 /* The associated raid_bdev_io */ 50 struct raid_bdev_io *raid_io; 51 52 /* The stripe's index in the raid array. */ 53 uint64_t stripe_index; 54 55 /* The stripe's parity chunk */ 56 struct chunk *parity_chunk; 57 58 union { 59 struct { 60 /* Buffer for stripe parity */ 61 void *parity_buf; 62 63 /* Buffer for stripe io metadata parity */ 64 void *parity_md_buf; 65 } write; 66 }; 67 68 /* Array of iovec iterators for each chunk */ 69 struct spdk_ioviter *chunk_iov_iters; 70 71 /* Array of source buffer pointers for parity calculation */ 72 void **chunk_xor_buffers; 73 74 /* Array of source buffer pointers for parity calculation of io metadata */ 75 void **chunk_xor_md_buffers; 76 77 struct { 78 size_t len; 79 size_t remaining; 80 size_t remaining_md; 81 int status; 82 stripe_req_xor_cb cb; 83 } xor; 84 85 TAILQ_ENTRY(stripe_request) link; 86 87 /* Array of chunks corresponding to base_bdevs */ 88 struct chunk chunks[0]; 89 }; 90 91 struct raid5f_info { 92 /* The parent raid bdev */ 93 struct raid_bdev *raid_bdev; 94 95 /* Number of data blocks in a stripe (without parity) */ 96 uint64_t stripe_blocks; 97 98 /* Number of stripes on this array */ 99 uint64_t total_stripes; 100 101 /* Alignment for buffer allocation */ 102 size_t buf_alignment; 103 }; 104 105 struct raid5f_io_channel { 106 /* All available stripe requests on this channel */ 107 struct { 108 TAILQ_HEAD(, stripe_request) write; 109 } free_stripe_requests; 110 111 /* accel_fw channel */ 112 struct spdk_io_channel *accel_ch; 113 114 /* For retrying xor if accel_ch runs out of resources */ 115 TAILQ_HEAD(, stripe_request) xor_retry_queue; 116 117 /* For iterating over chunk iovecs during xor calculation */ 118 void **chunk_xor_buffers; 119 struct iovec **chunk_xor_iovs; 120 size_t *chunk_xor_iovcnt; 121 }; 122 123 #define __CHUNK_IN_RANGE(req, c) \ 124 c < req->chunks + raid5f_ch_to_r5f_info(req->r5ch)->raid_bdev->num_base_bdevs 125 126 #define FOR_EACH_CHUNK_FROM(req, c, from) \ 127 for (c = from; __CHUNK_IN_RANGE(req, c); c++) 128 129 #define FOR_EACH_CHUNK(req, c) \ 130 FOR_EACH_CHUNK_FROM(req, c, req->chunks) 131 132 #define __NEXT_DATA_CHUNK(req, c) \ 133 c == req->parity_chunk ? c+1 : c 134 135 #define FOR_EACH_DATA_CHUNK(req, c) \ 136 for (c = __NEXT_DATA_CHUNK(req, req->chunks); __CHUNK_IN_RANGE(req, c); \ 137 c = __NEXT_DATA_CHUNK(req, c+1)) 138 139 static inline struct raid5f_info * 140 raid5f_ch_to_r5f_info(struct raid5f_io_channel *r5ch) 141 { 142 return spdk_io_channel_get_io_device(spdk_io_channel_from_ctx(r5ch)); 143 } 144 145 static inline struct stripe_request * 146 raid5f_chunk_stripe_req(struct chunk *chunk) 147 { 148 return SPDK_CONTAINEROF((chunk - chunk->index), struct stripe_request, chunks); 149 } 150 151 static inline uint8_t 152 raid5f_stripe_data_chunks_num(const struct raid_bdev *raid_bdev) 153 { 154 return raid_bdev->min_base_bdevs_operational; 155 } 156 157 static inline uint8_t 158 raid5f_stripe_parity_chunk_index(const struct raid_bdev *raid_bdev, uint64_t stripe_index) 159 { 160 return raid5f_stripe_data_chunks_num(raid_bdev) - stripe_index % raid_bdev->num_base_bdevs; 161 } 162 163 static inline void 164 raid5f_stripe_request_release(struct stripe_request *stripe_req) 165 { 166 if (spdk_likely(stripe_req->type == STRIPE_REQ_WRITE)) { 167 TAILQ_INSERT_HEAD(&stripe_req->r5ch->free_stripe_requests.write, stripe_req, link); 168 } else { 169 assert(false); 170 } 171 } 172 173 static void raid5f_xor_stripe_retry(struct stripe_request *stripe_req); 174 175 static void 176 raid5f_xor_stripe_done(struct stripe_request *stripe_req) 177 { 178 struct raid5f_io_channel *r5ch = stripe_req->r5ch; 179 180 if (stripe_req->xor.status != 0) { 181 SPDK_ERRLOG("stripe xor failed: %s\n", spdk_strerror(-stripe_req->xor.status)); 182 } 183 184 stripe_req->xor.cb(stripe_req, stripe_req->xor.status); 185 186 if (!TAILQ_EMPTY(&r5ch->xor_retry_queue)) { 187 stripe_req = TAILQ_FIRST(&r5ch->xor_retry_queue); 188 TAILQ_REMOVE(&r5ch->xor_retry_queue, stripe_req, link); 189 raid5f_xor_stripe_retry(stripe_req); 190 } 191 } 192 193 static void raid5f_xor_stripe_continue(struct stripe_request *stripe_req); 194 195 static void 196 _raid5f_xor_stripe_cb(struct stripe_request *stripe_req, int status) 197 { 198 if (status != 0) { 199 stripe_req->xor.status = status; 200 } 201 202 if (stripe_req->xor.remaining + stripe_req->xor.remaining_md == 0) { 203 raid5f_xor_stripe_done(stripe_req); 204 } 205 } 206 207 static void 208 raid5f_xor_stripe_cb(void *_stripe_req, int status) 209 { 210 struct stripe_request *stripe_req = _stripe_req; 211 212 stripe_req->xor.remaining -= stripe_req->xor.len; 213 214 if (stripe_req->xor.remaining > 0) { 215 stripe_req->xor.len = spdk_ioviter_nextv(stripe_req->chunk_iov_iters, 216 stripe_req->r5ch->chunk_xor_buffers); 217 raid5f_xor_stripe_continue(stripe_req); 218 } 219 220 _raid5f_xor_stripe_cb(stripe_req, status); 221 } 222 223 static void 224 raid5f_xor_stripe_md_cb(void *_stripe_req, int status) 225 { 226 struct stripe_request *stripe_req = _stripe_req; 227 228 stripe_req->xor.remaining_md = 0; 229 230 _raid5f_xor_stripe_cb(stripe_req, status); 231 } 232 233 static void 234 raid5f_xor_stripe_continue(struct stripe_request *stripe_req) 235 { 236 struct raid5f_io_channel *r5ch = stripe_req->r5ch; 237 struct raid_bdev_io *raid_io = stripe_req->raid_io; 238 struct raid_bdev *raid_bdev = raid_io->raid_bdev; 239 uint8_t n_src = raid5f_stripe_data_chunks_num(raid_bdev); 240 uint8_t i; 241 int ret; 242 243 assert(stripe_req->xor.len > 0); 244 245 for (i = 0; i < n_src; i++) { 246 stripe_req->chunk_xor_buffers[i] = r5ch->chunk_xor_buffers[i]; 247 } 248 249 ret = spdk_accel_submit_xor(r5ch->accel_ch, r5ch->chunk_xor_buffers[n_src], 250 stripe_req->chunk_xor_buffers, n_src, stripe_req->xor.len, 251 raid5f_xor_stripe_cb, stripe_req); 252 if (spdk_unlikely(ret)) { 253 if (ret == -ENOMEM) { 254 TAILQ_INSERT_HEAD(&r5ch->xor_retry_queue, stripe_req, link); 255 } else { 256 stripe_req->xor.status = ret; 257 raid5f_xor_stripe_done(stripe_req); 258 } 259 } 260 } 261 262 static void 263 raid5f_xor_stripe(struct stripe_request *stripe_req, stripe_req_xor_cb cb) 264 { 265 struct raid5f_io_channel *r5ch = stripe_req->r5ch; 266 struct raid_bdev_io *raid_io = stripe_req->raid_io; 267 struct raid_bdev *raid_bdev = raid_io->raid_bdev; 268 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); 269 void *raid_md = spdk_bdev_io_get_md_buf(bdev_io); 270 uint32_t raid_md_size = spdk_bdev_get_md_size(&raid_bdev->bdev); 271 struct chunk *chunk; 272 uint8_t c; 273 274 assert(cb != NULL); 275 assert(stripe_req->type == STRIPE_REQ_WRITE); 276 277 c = 0; 278 FOR_EACH_DATA_CHUNK(stripe_req, chunk) { 279 r5ch->chunk_xor_iovs[c] = chunk->iovs; 280 r5ch->chunk_xor_iovcnt[c] = chunk->iovcnt; 281 c++; 282 } 283 r5ch->chunk_xor_iovs[c] = stripe_req->parity_chunk->iovs; 284 r5ch->chunk_xor_iovcnt[c] = stripe_req->parity_chunk->iovcnt; 285 286 stripe_req->xor.len = spdk_ioviter_firstv(stripe_req->chunk_iov_iters, 287 raid_bdev->num_base_bdevs, 288 r5ch->chunk_xor_iovs, 289 r5ch->chunk_xor_iovcnt, 290 r5ch->chunk_xor_buffers); 291 stripe_req->xor.remaining = raid_bdev->strip_size << raid_bdev->blocklen_shift; 292 stripe_req->xor.status = 0; 293 stripe_req->xor.cb = cb; 294 295 if (raid_md != NULL) { 296 uint8_t n_src = raid5f_stripe_data_chunks_num(raid_bdev); 297 uint64_t len = raid_bdev->strip_size * raid_md_size; 298 int ret; 299 300 stripe_req->xor.remaining_md = len; 301 302 c = 0; 303 FOR_EACH_DATA_CHUNK(stripe_req, chunk) { 304 stripe_req->chunk_xor_md_buffers[c] = chunk->md_buf; 305 c++; 306 } 307 308 ret = spdk_accel_submit_xor(stripe_req->r5ch->accel_ch, stripe_req->write.parity_md_buf, 309 stripe_req->chunk_xor_md_buffers, n_src, len, 310 raid5f_xor_stripe_md_cb, stripe_req); 311 if (spdk_unlikely(ret)) { 312 if (ret == -ENOMEM) { 313 TAILQ_INSERT_HEAD(&stripe_req->r5ch->xor_retry_queue, stripe_req, link); 314 } else { 315 stripe_req->xor.status = ret; 316 raid5f_xor_stripe_done(stripe_req); 317 } 318 return; 319 } 320 } 321 322 raid5f_xor_stripe_continue(stripe_req); 323 } 324 325 static void 326 raid5f_xor_stripe_retry(struct stripe_request *stripe_req) 327 { 328 if (stripe_req->xor.remaining_md) { 329 raid5f_xor_stripe(stripe_req, stripe_req->xor.cb); 330 } else { 331 raid5f_xor_stripe_continue(stripe_req); 332 } 333 } 334 335 static void 336 raid5f_stripe_request_chunk_write_complete(struct stripe_request *stripe_req, 337 enum spdk_bdev_io_status status) 338 { 339 if (raid_bdev_io_complete_part(stripe_req->raid_io, 1, status)) { 340 raid5f_stripe_request_release(stripe_req); 341 } 342 } 343 344 static void 345 raid5f_chunk_complete_bdev_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 346 { 347 struct chunk *chunk = cb_arg; 348 struct stripe_request *stripe_req = raid5f_chunk_stripe_req(chunk); 349 enum spdk_bdev_io_status status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : 350 SPDK_BDEV_IO_STATUS_FAILED; 351 352 spdk_bdev_free_io(bdev_io); 353 354 if (spdk_likely(stripe_req->type == STRIPE_REQ_WRITE)) { 355 raid5f_stripe_request_chunk_write_complete(stripe_req, status); 356 } else { 357 assert(false); 358 } 359 } 360 361 static void raid5f_stripe_request_submit_chunks(struct stripe_request *stripe_req); 362 363 static void 364 raid5f_chunk_submit_retry(void *_raid_io) 365 { 366 struct raid_bdev_io *raid_io = _raid_io; 367 struct stripe_request *stripe_req = raid_io->module_private; 368 369 raid5f_stripe_request_submit_chunks(stripe_req); 370 } 371 372 static inline void 373 raid5f_init_ext_io_opts(struct spdk_bdev_io *bdev_io, struct spdk_bdev_ext_io_opts *opts) 374 { 375 memset(opts, 0, sizeof(*opts)); 376 opts->size = sizeof(*opts); 377 opts->memory_domain = bdev_io->u.bdev.memory_domain; 378 opts->memory_domain_ctx = bdev_io->u.bdev.memory_domain_ctx; 379 opts->metadata = bdev_io->u.bdev.md_buf; 380 } 381 382 static int 383 raid5f_chunk_submit(struct chunk *chunk) 384 { 385 struct stripe_request *stripe_req = raid5f_chunk_stripe_req(chunk); 386 struct raid_bdev_io *raid_io = stripe_req->raid_io; 387 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); 388 struct raid_bdev *raid_bdev = raid_io->raid_bdev; 389 struct raid_base_bdev_info *base_info = &raid_bdev->base_bdev_info[chunk->index]; 390 struct spdk_io_channel *base_ch = raid_io->raid_ch->base_channel[chunk->index]; 391 uint64_t base_offset_blocks = (stripe_req->stripe_index << raid_bdev->strip_size_shift); 392 int ret; 393 394 raid5f_init_ext_io_opts(bdev_io, &chunk->ext_opts); 395 chunk->ext_opts.metadata = chunk->md_buf; 396 397 switch (stripe_req->type) { 398 case STRIPE_REQ_WRITE: 399 ret = spdk_bdev_writev_blocks_ext(base_info->desc, base_ch, chunk->iovs, chunk->iovcnt, 400 base_offset_blocks, raid_bdev->strip_size, 401 raid5f_chunk_complete_bdev_io, chunk, 402 &chunk->ext_opts); 403 break; 404 default: 405 assert(false); 406 ret = -EINVAL; 407 break; 408 } 409 410 if (spdk_unlikely(ret)) { 411 if (ret == -ENOMEM) { 412 raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), 413 base_ch, raid5f_chunk_submit_retry); 414 } else { 415 /* 416 * Implicitly complete any I/Os not yet submitted as FAILED. If completing 417 * these means there are no more to complete for the stripe request, we can 418 * release the stripe request as well. 419 */ 420 uint64_t base_bdev_io_not_submitted = raid_bdev->num_base_bdevs - 421 raid_io->base_bdev_io_submitted; 422 423 if (raid_bdev_io_complete_part(stripe_req->raid_io, base_bdev_io_not_submitted, 424 SPDK_BDEV_IO_STATUS_FAILED)) { 425 raid5f_stripe_request_release(stripe_req); 426 } 427 } 428 } 429 430 return ret; 431 } 432 433 static int 434 raid5f_chunk_set_iovcnt(struct chunk *chunk, int iovcnt) 435 { 436 if (iovcnt > chunk->iovcnt_max) { 437 struct iovec *iovs = chunk->iovs; 438 439 iovs = realloc(iovs, iovcnt * sizeof(*iovs)); 440 if (!iovs) { 441 return -ENOMEM; 442 } 443 chunk->iovs = iovs; 444 chunk->iovcnt_max = iovcnt; 445 } 446 chunk->iovcnt = iovcnt; 447 448 return 0; 449 } 450 451 static int 452 raid5f_stripe_request_map_iovecs(struct stripe_request *stripe_req) 453 { 454 struct raid_bdev *raid_bdev = stripe_req->raid_io->raid_bdev; 455 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(stripe_req->raid_io); 456 const struct iovec *raid_io_iovs = bdev_io->u.bdev.iovs; 457 int raid_io_iovcnt = bdev_io->u.bdev.iovcnt; 458 void *raid_io_md = spdk_bdev_io_get_md_buf(bdev_io); 459 uint32_t raid_io_md_size = spdk_bdev_get_md_size(&raid_bdev->bdev); 460 struct chunk *chunk; 461 int raid_io_iov_idx = 0; 462 size_t raid_io_offset = 0; 463 size_t raid_io_iov_offset = 0; 464 int i; 465 466 FOR_EACH_DATA_CHUNK(stripe_req, chunk) { 467 int chunk_iovcnt = 0; 468 uint64_t len = raid_bdev->strip_size << raid_bdev->blocklen_shift; 469 size_t off = raid_io_iov_offset; 470 int ret; 471 472 for (i = raid_io_iov_idx; i < raid_io_iovcnt; i++) { 473 chunk_iovcnt++; 474 off += raid_io_iovs[i].iov_len; 475 if (off >= raid_io_offset + len) { 476 break; 477 } 478 } 479 480 assert(raid_io_iov_idx + chunk_iovcnt <= raid_io_iovcnt); 481 482 ret = raid5f_chunk_set_iovcnt(chunk, chunk_iovcnt); 483 if (ret) { 484 return ret; 485 } 486 487 if (raid_io_md) { 488 chunk->md_buf = raid_io_md + 489 (raid_io_offset >> raid_bdev->blocklen_shift) * raid_io_md_size; 490 } 491 492 for (i = 0; i < chunk_iovcnt; i++) { 493 struct iovec *chunk_iov = &chunk->iovs[i]; 494 const struct iovec *raid_io_iov = &raid_io_iovs[raid_io_iov_idx]; 495 size_t chunk_iov_offset = raid_io_offset - raid_io_iov_offset; 496 497 chunk_iov->iov_base = raid_io_iov->iov_base + chunk_iov_offset; 498 chunk_iov->iov_len = spdk_min(len, raid_io_iov->iov_len - chunk_iov_offset); 499 raid_io_offset += chunk_iov->iov_len; 500 len -= chunk_iov->iov_len; 501 502 if (raid_io_offset >= raid_io_iov_offset + raid_io_iov->iov_len) { 503 raid_io_iov_idx++; 504 raid_io_iov_offset += raid_io_iov->iov_len; 505 } 506 } 507 508 if (spdk_unlikely(len > 0)) { 509 return -EINVAL; 510 } 511 } 512 513 stripe_req->parity_chunk->iovs[0].iov_base = stripe_req->write.parity_buf; 514 stripe_req->parity_chunk->iovs[0].iov_len = raid_bdev->strip_size << raid_bdev->blocklen_shift; 515 stripe_req->parity_chunk->iovcnt = 1; 516 stripe_req->parity_chunk->md_buf = stripe_req->write.parity_md_buf; 517 518 return 0; 519 } 520 521 static void 522 raid5f_stripe_request_submit_chunks(struct stripe_request *stripe_req) 523 { 524 struct raid_bdev_io *raid_io = stripe_req->raid_io; 525 struct chunk *start = &stripe_req->chunks[raid_io->base_bdev_io_submitted]; 526 struct chunk *chunk; 527 528 FOR_EACH_CHUNK_FROM(stripe_req, chunk, start) { 529 if (spdk_unlikely(raid5f_chunk_submit(chunk) != 0)) { 530 break; 531 } 532 raid_io->base_bdev_io_submitted++; 533 } 534 } 535 536 static void 537 raid5f_stripe_write_request_xor_done(struct stripe_request *stripe_req, int status) 538 { 539 struct raid_bdev_io *raid_io = stripe_req->raid_io; 540 541 if (status != 0) { 542 raid5f_stripe_request_release(stripe_req); 543 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 544 } else { 545 raid5f_stripe_request_submit_chunks(stripe_req); 546 } 547 } 548 549 static int 550 raid5f_submit_write_request(struct raid_bdev_io *raid_io, uint64_t stripe_index) 551 { 552 struct raid_bdev *raid_bdev = raid_io->raid_bdev; 553 struct raid5f_io_channel *r5ch = spdk_io_channel_get_ctx(raid_io->raid_ch->module_channel); 554 struct stripe_request *stripe_req; 555 int ret; 556 557 stripe_req = TAILQ_FIRST(&r5ch->free_stripe_requests.write); 558 if (!stripe_req) { 559 return -ENOMEM; 560 } 561 562 stripe_req->stripe_index = stripe_index; 563 stripe_req->parity_chunk = stripe_req->chunks + raid5f_stripe_parity_chunk_index(raid_bdev, 564 stripe_req->stripe_index); 565 stripe_req->raid_io = raid_io; 566 567 ret = raid5f_stripe_request_map_iovecs(stripe_req); 568 if (spdk_unlikely(ret)) { 569 return ret; 570 } 571 572 TAILQ_REMOVE(&r5ch->free_stripe_requests.write, stripe_req, link); 573 574 raid_io->module_private = stripe_req; 575 raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs; 576 577 raid5f_xor_stripe(stripe_req, raid5f_stripe_write_request_xor_done); 578 579 return 0; 580 } 581 582 static void 583 raid5f_chunk_read_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 584 { 585 struct raid_bdev_io *raid_io = cb_arg; 586 587 spdk_bdev_free_io(bdev_io); 588 589 raid_bdev_io_complete(raid_io, success ? SPDK_BDEV_IO_STATUS_SUCCESS : 590 SPDK_BDEV_IO_STATUS_FAILED); 591 } 592 593 static void raid5f_submit_rw_request(struct raid_bdev_io *raid_io); 594 595 static void 596 _raid5f_submit_rw_request(void *_raid_io) 597 { 598 struct raid_bdev_io *raid_io = _raid_io; 599 600 raid5f_submit_rw_request(raid_io); 601 } 602 603 static int 604 raid5f_submit_read_request(struct raid_bdev_io *raid_io, uint64_t stripe_index, 605 uint64_t stripe_offset) 606 { 607 struct raid_bdev *raid_bdev = raid_io->raid_bdev; 608 uint8_t chunk_data_idx = stripe_offset >> raid_bdev->strip_size_shift; 609 uint8_t p_idx = raid5f_stripe_parity_chunk_index(raid_bdev, stripe_index); 610 uint8_t chunk_idx = chunk_data_idx < p_idx ? chunk_data_idx : chunk_data_idx + 1; 611 struct raid_base_bdev_info *base_info = &raid_bdev->base_bdev_info[chunk_idx]; 612 struct spdk_io_channel *base_ch = raid_io->raid_ch->base_channel[chunk_idx]; 613 uint64_t chunk_offset = stripe_offset - (chunk_data_idx << raid_bdev->strip_size_shift); 614 uint64_t base_offset_blocks = (stripe_index << raid_bdev->strip_size_shift) + chunk_offset; 615 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); 616 struct spdk_bdev_ext_io_opts io_opts; 617 int ret; 618 619 raid5f_init_ext_io_opts(bdev_io, &io_opts); 620 ret = spdk_bdev_readv_blocks_ext(base_info->desc, base_ch, bdev_io->u.bdev.iovs, 621 bdev_io->u.bdev.iovcnt, 622 base_offset_blocks, bdev_io->u.bdev.num_blocks, raid5f_chunk_read_complete, raid_io, 623 &io_opts); 624 625 if (spdk_unlikely(ret == -ENOMEM)) { 626 raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), 627 base_ch, _raid5f_submit_rw_request); 628 return 0; 629 } 630 631 return ret; 632 } 633 634 static void 635 raid5f_submit_rw_request(struct raid_bdev_io *raid_io) 636 { 637 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); 638 struct raid_bdev *raid_bdev = raid_io->raid_bdev; 639 struct raid5f_info *r5f_info = raid_bdev->module_private; 640 uint64_t offset_blocks = bdev_io->u.bdev.offset_blocks; 641 uint64_t stripe_index = offset_blocks / r5f_info->stripe_blocks; 642 uint64_t stripe_offset = offset_blocks % r5f_info->stripe_blocks; 643 int ret; 644 645 switch (bdev_io->type) { 646 case SPDK_BDEV_IO_TYPE_READ: 647 assert(bdev_io->u.bdev.num_blocks <= raid_bdev->strip_size); 648 ret = raid5f_submit_read_request(raid_io, stripe_index, stripe_offset); 649 break; 650 case SPDK_BDEV_IO_TYPE_WRITE: 651 assert(stripe_offset == 0); 652 assert(bdev_io->u.bdev.num_blocks == r5f_info->stripe_blocks); 653 ret = raid5f_submit_write_request(raid_io, stripe_index); 654 break; 655 default: 656 ret = -EINVAL; 657 break; 658 } 659 660 if (spdk_unlikely(ret)) { 661 raid_bdev_io_complete(raid_io, ret == -ENOMEM ? SPDK_BDEV_IO_STATUS_NOMEM : 662 SPDK_BDEV_IO_STATUS_FAILED); 663 } 664 } 665 666 static void 667 raid5f_stripe_request_free(struct stripe_request *stripe_req) 668 { 669 struct chunk *chunk; 670 671 FOR_EACH_CHUNK(stripe_req, chunk) { 672 free(chunk->iovs); 673 } 674 675 if (stripe_req->type == STRIPE_REQ_WRITE) { 676 spdk_dma_free(stripe_req->write.parity_buf); 677 spdk_dma_free(stripe_req->write.parity_md_buf); 678 } else { 679 assert(false); 680 } 681 682 free(stripe_req->chunk_xor_buffers); 683 free(stripe_req->chunk_xor_md_buffers); 684 free(stripe_req->chunk_iov_iters); 685 686 free(stripe_req); 687 } 688 689 static struct stripe_request * 690 raid5f_stripe_request_alloc(struct raid5f_io_channel *r5ch, enum stripe_request_type type) 691 { 692 struct raid5f_info *r5f_info = raid5f_ch_to_r5f_info(r5ch); 693 struct raid_bdev *raid_bdev = r5f_info->raid_bdev; 694 uint32_t raid_io_md_size = spdk_bdev_get_md_size(&raid_bdev->bdev); 695 struct stripe_request *stripe_req; 696 struct chunk *chunk; 697 698 stripe_req = calloc(1, sizeof(*stripe_req) + sizeof(*chunk) * raid_bdev->num_base_bdevs); 699 if (!stripe_req) { 700 return NULL; 701 } 702 703 stripe_req->r5ch = r5ch; 704 stripe_req->type = type; 705 706 FOR_EACH_CHUNK(stripe_req, chunk) { 707 chunk->index = chunk - stripe_req->chunks; 708 chunk->iovcnt_max = 4; 709 chunk->iovs = calloc(chunk->iovcnt_max, sizeof(chunk->iovs[0])); 710 if (!chunk->iovs) { 711 goto err; 712 } 713 } 714 715 if (type == STRIPE_REQ_WRITE) { 716 stripe_req->write.parity_buf = spdk_dma_malloc(raid_bdev->strip_size << raid_bdev->blocklen_shift, 717 r5f_info->buf_alignment, NULL); 718 if (!stripe_req->write.parity_buf) { 719 goto err; 720 } 721 722 if (raid_io_md_size != 0) { 723 stripe_req->write.parity_md_buf = spdk_dma_malloc(raid_bdev->strip_size * raid_io_md_size, 724 r5f_info->buf_alignment, NULL); 725 if (!stripe_req->write.parity_md_buf) { 726 goto err; 727 } 728 } 729 } else { 730 assert(false); 731 return NULL; 732 } 733 734 stripe_req->chunk_iov_iters = malloc(SPDK_IOVITER_SIZE(raid_bdev->num_base_bdevs)); 735 if (!stripe_req->chunk_iov_iters) { 736 goto err; 737 } 738 739 stripe_req->chunk_xor_buffers = calloc(raid5f_stripe_data_chunks_num(raid_bdev), 740 sizeof(stripe_req->chunk_xor_buffers[0])); 741 if (!stripe_req->chunk_xor_buffers) { 742 goto err; 743 } 744 745 stripe_req->chunk_xor_md_buffers = calloc(raid5f_stripe_data_chunks_num(raid_bdev), 746 sizeof(stripe_req->chunk_xor_md_buffers[0])); 747 if (!stripe_req->chunk_xor_md_buffers) { 748 goto err; 749 } 750 751 return stripe_req; 752 err: 753 raid5f_stripe_request_free(stripe_req); 754 return NULL; 755 } 756 757 static void 758 raid5f_ioch_destroy(void *io_device, void *ctx_buf) 759 { 760 struct raid5f_io_channel *r5ch = ctx_buf; 761 struct stripe_request *stripe_req; 762 763 assert(TAILQ_EMPTY(&r5ch->xor_retry_queue)); 764 765 while ((stripe_req = TAILQ_FIRST(&r5ch->free_stripe_requests.write))) { 766 TAILQ_REMOVE(&r5ch->free_stripe_requests.write, stripe_req, link); 767 raid5f_stripe_request_free(stripe_req); 768 } 769 770 if (r5ch->accel_ch) { 771 spdk_put_io_channel(r5ch->accel_ch); 772 } 773 774 free(r5ch->chunk_xor_buffers); 775 free(r5ch->chunk_xor_iovs); 776 free(r5ch->chunk_xor_iovcnt); 777 } 778 779 static int 780 raid5f_ioch_create(void *io_device, void *ctx_buf) 781 { 782 struct raid5f_io_channel *r5ch = ctx_buf; 783 struct raid5f_info *r5f_info = io_device; 784 struct raid_bdev *raid_bdev = r5f_info->raid_bdev; 785 int i; 786 787 TAILQ_INIT(&r5ch->free_stripe_requests.write); 788 TAILQ_INIT(&r5ch->xor_retry_queue); 789 790 for (i = 0; i < RAID5F_MAX_STRIPES; i++) { 791 struct stripe_request *stripe_req; 792 793 stripe_req = raid5f_stripe_request_alloc(r5ch, STRIPE_REQ_WRITE); 794 if (!stripe_req) { 795 goto err; 796 } 797 798 TAILQ_INSERT_HEAD(&r5ch->free_stripe_requests.write, stripe_req, link); 799 } 800 801 r5ch->accel_ch = spdk_accel_get_io_channel(); 802 if (!r5ch->accel_ch) { 803 SPDK_ERRLOG("Failed to get accel framework's IO channel\n"); 804 goto err; 805 } 806 807 r5ch->chunk_xor_buffers = calloc(raid_bdev->num_base_bdevs, sizeof(*r5ch->chunk_xor_buffers)); 808 if (!r5ch->chunk_xor_buffers) { 809 goto err; 810 } 811 812 r5ch->chunk_xor_iovs = calloc(raid_bdev->num_base_bdevs, sizeof(*r5ch->chunk_xor_iovs)); 813 if (!r5ch->chunk_xor_iovs) { 814 goto err; 815 } 816 817 r5ch->chunk_xor_iovcnt = calloc(raid_bdev->num_base_bdevs, sizeof(*r5ch->chunk_xor_iovcnt)); 818 if (!r5ch->chunk_xor_iovcnt) { 819 goto err; 820 } 821 822 return 0; 823 err: 824 SPDK_ERRLOG("Failed to initialize io channel\n"); 825 raid5f_ioch_destroy(r5f_info, r5ch); 826 return -ENOMEM; 827 } 828 829 static int 830 raid5f_start(struct raid_bdev *raid_bdev) 831 { 832 uint64_t min_blockcnt = UINT64_MAX; 833 struct raid_base_bdev_info *base_info; 834 struct raid5f_info *r5f_info; 835 size_t alignment = 0; 836 837 r5f_info = calloc(1, sizeof(*r5f_info)); 838 if (!r5f_info) { 839 SPDK_ERRLOG("Failed to allocate r5f_info\n"); 840 return -ENOMEM; 841 } 842 r5f_info->raid_bdev = raid_bdev; 843 844 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 845 struct spdk_bdev *base_bdev; 846 847 base_bdev = spdk_bdev_desc_get_bdev(base_info->desc); 848 min_blockcnt = spdk_min(min_blockcnt, base_bdev->blockcnt); 849 alignment = spdk_max(alignment, spdk_bdev_get_buf_align(base_bdev)); 850 } 851 852 r5f_info->total_stripes = min_blockcnt / raid_bdev->strip_size; 853 r5f_info->stripe_blocks = raid_bdev->strip_size * raid5f_stripe_data_chunks_num(raid_bdev); 854 r5f_info->buf_alignment = alignment; 855 856 raid_bdev->bdev.blockcnt = r5f_info->stripe_blocks * r5f_info->total_stripes; 857 raid_bdev->bdev.optimal_io_boundary = raid_bdev->strip_size; 858 raid_bdev->bdev.split_on_optimal_io_boundary = true; 859 raid_bdev->bdev.write_unit_size = r5f_info->stripe_blocks; 860 raid_bdev->bdev.split_on_write_unit = true; 861 862 raid_bdev->module_private = r5f_info; 863 864 spdk_io_device_register(r5f_info, raid5f_ioch_create, raid5f_ioch_destroy, 865 sizeof(struct raid5f_io_channel), NULL); 866 867 return 0; 868 } 869 870 static void 871 raid5f_io_device_unregister_done(void *io_device) 872 { 873 struct raid5f_info *r5f_info = io_device; 874 875 raid_bdev_module_stop_done(r5f_info->raid_bdev); 876 877 free(r5f_info); 878 } 879 880 static bool 881 raid5f_stop(struct raid_bdev *raid_bdev) 882 { 883 struct raid5f_info *r5f_info = raid_bdev->module_private; 884 885 spdk_io_device_unregister(r5f_info, raid5f_io_device_unregister_done); 886 887 return false; 888 } 889 890 static struct spdk_io_channel * 891 raid5f_get_io_channel(struct raid_bdev *raid_bdev) 892 { 893 struct raid5f_info *r5f_info = raid_bdev->module_private; 894 895 return spdk_get_io_channel(r5f_info); 896 } 897 898 static struct raid_bdev_module g_raid5f_module = { 899 .level = RAID5F, 900 .base_bdevs_min = 3, 901 .base_bdevs_constraint = {CONSTRAINT_MAX_BASE_BDEVS_REMOVED, 1}, 902 .start = raid5f_start, 903 .stop = raid5f_stop, 904 .submit_rw_request = raid5f_submit_rw_request, 905 .get_io_channel = raid5f_get_io_channel, 906 }; 907 RAID_MODULE_REGISTER(&g_raid5f_module) 908 909 SPDK_LOG_REGISTER_COMPONENT(bdev_raid5f) 910