1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2022 Intel Corporation. 3 * All rights reserved. 4 */ 5 6 #include "bdev_raid.h" 7 8 #include "spdk/env.h" 9 #include "spdk/thread.h" 10 #include "spdk/string.h" 11 #include "spdk/util.h" 12 #include "spdk/likely.h" 13 #include "spdk/log.h" 14 #include "spdk/accel.h" 15 16 /* Maximum concurrent full stripe writes per io channel */ 17 #define RAID5F_MAX_STRIPES 32 18 19 struct chunk { 20 /* Corresponds to base_bdev index */ 21 uint8_t index; 22 23 /* Array of iovecs */ 24 struct iovec *iovs; 25 26 /* Number of used iovecs */ 27 int iovcnt; 28 29 /* Total number of available iovecs in the array */ 30 int iovcnt_max; 31 32 /* Pointer to buffer with I/O metadata */ 33 void *md_buf; 34 35 /* Shallow copy of IO request parameters */ 36 struct spdk_bdev_ext_io_opts ext_opts; 37 }; 38 39 struct stripe_request { 40 enum stripe_request_type { 41 STRIPE_REQ_WRITE, 42 } type; 43 44 struct raid5f_io_channel *r5ch; 45 46 /* The associated raid_bdev_io */ 47 struct raid_bdev_io *raid_io; 48 49 /* The stripe's index in the raid array. */ 50 uint64_t stripe_index; 51 52 /* The stripe's parity chunk */ 53 struct chunk *parity_chunk; 54 55 union { 56 struct { 57 /* Buffer for stripe parity */ 58 void *parity_buf; 59 60 /* Buffer for stripe io metadata parity */ 61 void *parity_md_buf; 62 } write; 63 }; 64 65 /* Array of iovec iterators for each chunk */ 66 struct spdk_ioviter *chunk_iov_iters; 67 68 /* Array of source buffer pointers for parity calculation */ 69 void **chunk_xor_buffers; 70 71 /* Array of source buffer pointers for parity calculation of io metadata */ 72 void **chunk_xor_md_buffers; 73 74 struct { 75 size_t len; 76 size_t remaining; 77 size_t remaining_md; 78 int status; 79 } xor; 80 81 TAILQ_ENTRY(stripe_request) link; 82 83 /* Array of chunks corresponding to base_bdevs */ 84 struct chunk chunks[0]; 85 }; 86 87 struct raid5f_info { 88 /* The parent raid bdev */ 89 struct raid_bdev *raid_bdev; 90 91 /* Number of data blocks in a stripe (without parity) */ 92 uint64_t stripe_blocks; 93 94 /* Number of stripes on this array */ 95 uint64_t total_stripes; 96 97 /* Alignment for buffer allocation */ 98 size_t buf_alignment; 99 }; 100 101 struct raid5f_io_channel { 102 /* All available stripe requests on this channel */ 103 TAILQ_HEAD(, stripe_request) free_stripe_requests; 104 105 /* accel_fw channel */ 106 struct spdk_io_channel *accel_ch; 107 108 /* For retrying xor if accel_ch runs out of resources */ 109 TAILQ_HEAD(, stripe_request) xor_retry_queue; 110 111 /* For iterating over chunk iovecs during xor calculation */ 112 void **chunk_xor_buffers; 113 struct iovec **chunk_xor_iovs; 114 size_t *chunk_xor_iovcnt; 115 }; 116 117 #define __CHUNK_IN_RANGE(req, c) \ 118 c < req->chunks + raid5f_ch_to_r5f_info(req->r5ch)->raid_bdev->num_base_bdevs 119 120 #define FOR_EACH_CHUNK_FROM(req, c, from) \ 121 for (c = from; __CHUNK_IN_RANGE(req, c); c++) 122 123 #define FOR_EACH_CHUNK(req, c) \ 124 FOR_EACH_CHUNK_FROM(req, c, req->chunks) 125 126 #define __NEXT_DATA_CHUNK(req, c) \ 127 c == req->parity_chunk ? c+1 : c 128 129 #define FOR_EACH_DATA_CHUNK(req, c) \ 130 for (c = __NEXT_DATA_CHUNK(req, req->chunks); __CHUNK_IN_RANGE(req, c); \ 131 c = __NEXT_DATA_CHUNK(req, c+1)) 132 133 static inline struct raid5f_info * 134 raid5f_ch_to_r5f_info(struct raid5f_io_channel *r5ch) 135 { 136 return spdk_io_channel_get_io_device(spdk_io_channel_from_ctx(r5ch)); 137 } 138 139 static inline struct stripe_request * 140 raid5f_chunk_stripe_req(struct chunk *chunk) 141 { 142 return SPDK_CONTAINEROF((chunk - chunk->index), struct stripe_request, chunks); 143 } 144 145 static inline uint8_t 146 raid5f_stripe_data_chunks_num(const struct raid_bdev *raid_bdev) 147 { 148 return raid_bdev->min_base_bdevs_operational; 149 } 150 151 static inline uint8_t 152 raid5f_stripe_parity_chunk_index(const struct raid_bdev *raid_bdev, uint64_t stripe_index) 153 { 154 return raid5f_stripe_data_chunks_num(raid_bdev) - stripe_index % raid_bdev->num_base_bdevs; 155 } 156 157 static inline void 158 raid5f_stripe_request_release(struct stripe_request *stripe_req) 159 { 160 TAILQ_INSERT_HEAD(&stripe_req->r5ch->free_stripe_requests, stripe_req, link); 161 } 162 163 static void raid5f_stripe_request_submit_chunks(struct stripe_request *stripe_req); 164 static void raid5f_xor_stripe_retry(struct stripe_request *stripe_req); 165 166 static void 167 raid5f_xor_stripe_done(struct stripe_request *stripe_req) 168 { 169 struct raid5f_io_channel *r5ch = stripe_req->r5ch; 170 171 if (stripe_req->xor.status != 0) { 172 struct raid_bdev_io *raid_io = stripe_req->raid_io; 173 174 SPDK_ERRLOG("stripe xor failed: %s\n", spdk_strerror(-stripe_req->xor.status)); 175 raid5f_stripe_request_release(stripe_req); 176 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED); 177 } else { 178 raid5f_stripe_request_submit_chunks(stripe_req); 179 } 180 181 if (!TAILQ_EMPTY(&r5ch->xor_retry_queue)) { 182 stripe_req = TAILQ_FIRST(&r5ch->xor_retry_queue); 183 TAILQ_REMOVE(&r5ch->xor_retry_queue, stripe_req, link); 184 raid5f_xor_stripe_retry(stripe_req); 185 } 186 } 187 188 static void raid5f_xor_stripe_continue(struct stripe_request *stripe_req); 189 190 static void 191 _raid5f_xor_stripe_cb(struct stripe_request *stripe_req, int status) 192 { 193 if (status != 0) { 194 stripe_req->xor.status = status; 195 } 196 197 if (stripe_req->xor.remaining + stripe_req->xor.remaining_md == 0) { 198 raid5f_xor_stripe_done(stripe_req); 199 } 200 } 201 202 static void 203 raid5f_xor_stripe_cb(void *_stripe_req, int status) 204 { 205 struct stripe_request *stripe_req = _stripe_req; 206 207 stripe_req->xor.remaining -= stripe_req->xor.len; 208 209 if (stripe_req->xor.remaining > 0) { 210 stripe_req->xor.len = spdk_ioviter_nextv(stripe_req->chunk_iov_iters, 211 stripe_req->r5ch->chunk_xor_buffers); 212 raid5f_xor_stripe_continue(stripe_req); 213 } 214 215 _raid5f_xor_stripe_cb(stripe_req, status); 216 } 217 218 static void 219 raid5f_xor_stripe_md_cb(void *_stripe_req, int status) 220 { 221 struct stripe_request *stripe_req = _stripe_req; 222 223 stripe_req->xor.remaining_md = 0; 224 225 _raid5f_xor_stripe_cb(stripe_req, status); 226 } 227 228 static void 229 raid5f_xor_stripe_continue(struct stripe_request *stripe_req) 230 { 231 struct raid5f_io_channel *r5ch = stripe_req->r5ch; 232 struct raid_bdev_io *raid_io = stripe_req->raid_io; 233 struct raid_bdev *raid_bdev = raid_io->raid_bdev; 234 uint8_t n_src = raid5f_stripe_data_chunks_num(raid_bdev); 235 uint8_t i; 236 int ret; 237 238 assert(stripe_req->xor.len > 0); 239 240 for (i = 0; i < n_src; i++) { 241 stripe_req->chunk_xor_buffers[i] = r5ch->chunk_xor_buffers[i]; 242 } 243 244 ret = spdk_accel_submit_xor(r5ch->accel_ch, r5ch->chunk_xor_buffers[n_src], 245 stripe_req->chunk_xor_buffers, n_src, stripe_req->xor.len, 246 raid5f_xor_stripe_cb, stripe_req); 247 if (spdk_unlikely(ret)) { 248 if (ret == -ENOMEM) { 249 TAILQ_INSERT_HEAD(&r5ch->xor_retry_queue, stripe_req, link); 250 } else { 251 stripe_req->xor.status = ret; 252 raid5f_xor_stripe_done(stripe_req); 253 } 254 } 255 } 256 257 static void 258 raid5f_xor_stripe(struct stripe_request *stripe_req) 259 { 260 struct raid5f_io_channel *r5ch = stripe_req->r5ch; 261 struct raid_bdev_io *raid_io = stripe_req->raid_io; 262 struct raid_bdev *raid_bdev = raid_io->raid_bdev; 263 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); 264 void *raid_md = spdk_bdev_io_get_md_buf(bdev_io); 265 uint32_t raid_md_size = spdk_bdev_get_md_size(&raid_bdev->bdev); 266 struct chunk *chunk; 267 uint8_t c; 268 269 assert(stripe_req->type == STRIPE_REQ_WRITE); 270 271 c = 0; 272 FOR_EACH_DATA_CHUNK(stripe_req, chunk) { 273 r5ch->chunk_xor_iovs[c] = chunk->iovs; 274 r5ch->chunk_xor_iovcnt[c] = chunk->iovcnt; 275 c++; 276 } 277 r5ch->chunk_xor_iovs[c] = stripe_req->parity_chunk->iovs; 278 r5ch->chunk_xor_iovcnt[c] = stripe_req->parity_chunk->iovcnt; 279 280 stripe_req->xor.len = spdk_ioviter_firstv(stripe_req->chunk_iov_iters, 281 raid_bdev->num_base_bdevs, 282 r5ch->chunk_xor_iovs, 283 r5ch->chunk_xor_iovcnt, 284 r5ch->chunk_xor_buffers); 285 stripe_req->xor.remaining = raid_bdev->strip_size << raid_bdev->blocklen_shift; 286 stripe_req->xor.status = 0; 287 288 if (raid_md != NULL) { 289 uint8_t n_src = raid5f_stripe_data_chunks_num(raid_bdev); 290 uint64_t len = raid_bdev->strip_size * raid_md_size; 291 int ret; 292 293 stripe_req->xor.remaining_md = len; 294 295 c = 0; 296 FOR_EACH_DATA_CHUNK(stripe_req, chunk) { 297 stripe_req->chunk_xor_md_buffers[c] = chunk->md_buf; 298 c++; 299 } 300 301 ret = spdk_accel_submit_xor(stripe_req->r5ch->accel_ch, stripe_req->write.parity_md_buf, 302 stripe_req->chunk_xor_md_buffers, n_src, len, 303 raid5f_xor_stripe_md_cb, stripe_req); 304 if (spdk_unlikely(ret)) { 305 if (ret == -ENOMEM) { 306 TAILQ_INSERT_HEAD(&stripe_req->r5ch->xor_retry_queue, stripe_req, link); 307 } else { 308 stripe_req->xor.status = ret; 309 raid5f_xor_stripe_done(stripe_req); 310 } 311 return; 312 } 313 } 314 315 raid5f_xor_stripe_continue(stripe_req); 316 } 317 318 static void 319 raid5f_xor_stripe_retry(struct stripe_request *stripe_req) 320 { 321 if (stripe_req->xor.remaining_md) { 322 raid5f_xor_stripe(stripe_req); 323 } else { 324 raid5f_xor_stripe_continue(stripe_req); 325 } 326 } 327 328 static void 329 raid5f_stripe_request_chunk_write_complete(struct stripe_request *stripe_req, 330 enum spdk_bdev_io_status status) 331 { 332 if (raid_bdev_io_complete_part(stripe_req->raid_io, 1, status)) { 333 raid5f_stripe_request_release(stripe_req); 334 } 335 } 336 337 static void 338 raid5f_chunk_complete_bdev_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 339 { 340 struct chunk *chunk = cb_arg; 341 struct stripe_request *stripe_req = raid5f_chunk_stripe_req(chunk); 342 enum spdk_bdev_io_status status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : 343 SPDK_BDEV_IO_STATUS_FAILED; 344 345 spdk_bdev_free_io(bdev_io); 346 347 if (spdk_likely(stripe_req->type == STRIPE_REQ_WRITE)) { 348 raid5f_stripe_request_chunk_write_complete(stripe_req, status); 349 } else { 350 assert(false); 351 } 352 } 353 354 static void 355 raid5f_chunk_submit_retry(void *_raid_io) 356 { 357 struct raid_bdev_io *raid_io = _raid_io; 358 struct stripe_request *stripe_req = raid_io->module_private; 359 360 raid5f_stripe_request_submit_chunks(stripe_req); 361 } 362 363 static inline void 364 raid5f_init_ext_io_opts(struct spdk_bdev_io *bdev_io, struct spdk_bdev_ext_io_opts *opts) 365 { 366 memset(opts, 0, sizeof(*opts)); 367 opts->size = sizeof(*opts); 368 opts->memory_domain = bdev_io->u.bdev.memory_domain; 369 opts->memory_domain_ctx = bdev_io->u.bdev.memory_domain_ctx; 370 opts->metadata = bdev_io->u.bdev.md_buf; 371 } 372 373 static int 374 raid5f_chunk_submit(struct chunk *chunk) 375 { 376 struct stripe_request *stripe_req = raid5f_chunk_stripe_req(chunk); 377 struct raid_bdev_io *raid_io = stripe_req->raid_io; 378 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); 379 struct raid_bdev *raid_bdev = raid_io->raid_bdev; 380 struct raid_base_bdev_info *base_info = &raid_bdev->base_bdev_info[chunk->index]; 381 struct spdk_io_channel *base_ch = raid_io->raid_ch->base_channel[chunk->index]; 382 uint64_t base_offset_blocks = (stripe_req->stripe_index << raid_bdev->strip_size_shift); 383 int ret; 384 385 raid5f_init_ext_io_opts(bdev_io, &chunk->ext_opts); 386 chunk->ext_opts.metadata = chunk->md_buf; 387 388 switch (stripe_req->type) { 389 case STRIPE_REQ_WRITE: 390 ret = spdk_bdev_writev_blocks_ext(base_info->desc, base_ch, chunk->iovs, chunk->iovcnt, 391 base_offset_blocks, raid_bdev->strip_size, 392 raid5f_chunk_complete_bdev_io, chunk, 393 &chunk->ext_opts); 394 break; 395 default: 396 assert(false); 397 ret = -EINVAL; 398 break; 399 } 400 401 if (spdk_unlikely(ret)) { 402 if (ret == -ENOMEM) { 403 raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), 404 base_ch, raid5f_chunk_submit_retry); 405 } else { 406 /* 407 * Implicitly complete any I/Os not yet submitted as FAILED. If completing 408 * these means there are no more to complete for the stripe request, we can 409 * release the stripe request as well. 410 */ 411 uint64_t base_bdev_io_not_submitted = raid_bdev->num_base_bdevs - 412 raid_io->base_bdev_io_submitted; 413 414 if (raid_bdev_io_complete_part(stripe_req->raid_io, base_bdev_io_not_submitted, 415 SPDK_BDEV_IO_STATUS_FAILED)) { 416 raid5f_stripe_request_release(stripe_req); 417 } 418 } 419 } 420 421 return ret; 422 } 423 424 static int 425 raid5f_chunk_set_iovcnt(struct chunk *chunk, int iovcnt) 426 { 427 if (iovcnt > chunk->iovcnt_max) { 428 struct iovec *iovs = chunk->iovs; 429 430 iovs = realloc(iovs, iovcnt * sizeof(*iovs)); 431 if (!iovs) { 432 return -ENOMEM; 433 } 434 chunk->iovs = iovs; 435 chunk->iovcnt_max = iovcnt; 436 } 437 chunk->iovcnt = iovcnt; 438 439 return 0; 440 } 441 442 static int 443 raid5f_stripe_request_map_iovecs(struct stripe_request *stripe_req) 444 { 445 struct raid_bdev *raid_bdev = stripe_req->raid_io->raid_bdev; 446 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(stripe_req->raid_io); 447 const struct iovec *raid_io_iovs = bdev_io->u.bdev.iovs; 448 int raid_io_iovcnt = bdev_io->u.bdev.iovcnt; 449 void *raid_io_md = spdk_bdev_io_get_md_buf(bdev_io); 450 uint32_t raid_io_md_size = spdk_bdev_get_md_size(&raid_bdev->bdev); 451 struct chunk *chunk; 452 int raid_io_iov_idx = 0; 453 size_t raid_io_offset = 0; 454 size_t raid_io_iov_offset = 0; 455 int i; 456 457 FOR_EACH_DATA_CHUNK(stripe_req, chunk) { 458 int chunk_iovcnt = 0; 459 uint64_t len = raid_bdev->strip_size << raid_bdev->blocklen_shift; 460 size_t off = raid_io_iov_offset; 461 int ret; 462 463 for (i = raid_io_iov_idx; i < raid_io_iovcnt; i++) { 464 chunk_iovcnt++; 465 off += raid_io_iovs[i].iov_len; 466 if (off >= raid_io_offset + len) { 467 break; 468 } 469 } 470 471 assert(raid_io_iov_idx + chunk_iovcnt <= raid_io_iovcnt); 472 473 ret = raid5f_chunk_set_iovcnt(chunk, chunk_iovcnt); 474 if (ret) { 475 return ret; 476 } 477 478 if (raid_io_md) { 479 chunk->md_buf = raid_io_md + 480 (raid_io_offset >> raid_bdev->blocklen_shift) * raid_io_md_size; 481 } 482 483 for (i = 0; i < chunk_iovcnt; i++) { 484 struct iovec *chunk_iov = &chunk->iovs[i]; 485 const struct iovec *raid_io_iov = &raid_io_iovs[raid_io_iov_idx]; 486 size_t chunk_iov_offset = raid_io_offset - raid_io_iov_offset; 487 488 chunk_iov->iov_base = raid_io_iov->iov_base + chunk_iov_offset; 489 chunk_iov->iov_len = spdk_min(len, raid_io_iov->iov_len - chunk_iov_offset); 490 raid_io_offset += chunk_iov->iov_len; 491 len -= chunk_iov->iov_len; 492 493 if (raid_io_offset >= raid_io_iov_offset + raid_io_iov->iov_len) { 494 raid_io_iov_idx++; 495 raid_io_iov_offset += raid_io_iov->iov_len; 496 } 497 } 498 499 if (spdk_unlikely(len > 0)) { 500 return -EINVAL; 501 } 502 } 503 504 stripe_req->parity_chunk->iovs[0].iov_base = stripe_req->write.parity_buf; 505 stripe_req->parity_chunk->iovs[0].iov_len = raid_bdev->strip_size << raid_bdev->blocklen_shift; 506 stripe_req->parity_chunk->iovcnt = 1; 507 stripe_req->parity_chunk->md_buf = stripe_req->write.parity_md_buf; 508 509 return 0; 510 } 511 512 static void 513 raid5f_stripe_request_submit_chunks(struct stripe_request *stripe_req) 514 { 515 struct raid_bdev_io *raid_io = stripe_req->raid_io; 516 struct chunk *start = &stripe_req->chunks[raid_io->base_bdev_io_submitted]; 517 struct chunk *chunk; 518 519 FOR_EACH_CHUNK_FROM(stripe_req, chunk, start) { 520 if (spdk_unlikely(raid5f_chunk_submit(chunk) != 0)) { 521 break; 522 } 523 raid_io->base_bdev_io_submitted++; 524 } 525 } 526 527 static int 528 raid5f_submit_write_request(struct raid_bdev_io *raid_io, uint64_t stripe_index) 529 { 530 struct raid_bdev *raid_bdev = raid_io->raid_bdev; 531 struct raid5f_io_channel *r5ch = spdk_io_channel_get_ctx(raid_io->raid_ch->module_channel); 532 struct stripe_request *stripe_req; 533 int ret; 534 535 stripe_req = TAILQ_FIRST(&r5ch->free_stripe_requests); 536 if (!stripe_req) { 537 return -ENOMEM; 538 } 539 540 stripe_req->stripe_index = stripe_index; 541 stripe_req->parity_chunk = stripe_req->chunks + raid5f_stripe_parity_chunk_index(raid_bdev, 542 stripe_req->stripe_index); 543 stripe_req->raid_io = raid_io; 544 545 ret = raid5f_stripe_request_map_iovecs(stripe_req); 546 if (spdk_unlikely(ret)) { 547 return ret; 548 } 549 550 TAILQ_REMOVE(&r5ch->free_stripe_requests, stripe_req, link); 551 552 raid_io->module_private = stripe_req; 553 raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs; 554 555 raid5f_xor_stripe(stripe_req); 556 557 return 0; 558 } 559 560 static void 561 raid5f_chunk_read_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 562 { 563 struct raid_bdev_io *raid_io = cb_arg; 564 565 spdk_bdev_free_io(bdev_io); 566 567 raid_bdev_io_complete(raid_io, success ? SPDK_BDEV_IO_STATUS_SUCCESS : 568 SPDK_BDEV_IO_STATUS_FAILED); 569 } 570 571 static void raid5f_submit_rw_request(struct raid_bdev_io *raid_io); 572 573 static void 574 _raid5f_submit_rw_request(void *_raid_io) 575 { 576 struct raid_bdev_io *raid_io = _raid_io; 577 578 raid5f_submit_rw_request(raid_io); 579 } 580 581 static int 582 raid5f_submit_read_request(struct raid_bdev_io *raid_io, uint64_t stripe_index, 583 uint64_t stripe_offset) 584 { 585 struct raid_bdev *raid_bdev = raid_io->raid_bdev; 586 uint8_t chunk_data_idx = stripe_offset >> raid_bdev->strip_size_shift; 587 uint8_t p_idx = raid5f_stripe_parity_chunk_index(raid_bdev, stripe_index); 588 uint8_t chunk_idx = chunk_data_idx < p_idx ? chunk_data_idx : chunk_data_idx + 1; 589 struct raid_base_bdev_info *base_info = &raid_bdev->base_bdev_info[chunk_idx]; 590 struct spdk_io_channel *base_ch = raid_io->raid_ch->base_channel[chunk_idx]; 591 uint64_t chunk_offset = stripe_offset - (chunk_data_idx << raid_bdev->strip_size_shift); 592 uint64_t base_offset_blocks = (stripe_index << raid_bdev->strip_size_shift) + chunk_offset; 593 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); 594 struct spdk_bdev_ext_io_opts io_opts; 595 int ret; 596 597 raid5f_init_ext_io_opts(bdev_io, &io_opts); 598 ret = spdk_bdev_readv_blocks_ext(base_info->desc, base_ch, bdev_io->u.bdev.iovs, 599 bdev_io->u.bdev.iovcnt, 600 base_offset_blocks, bdev_io->u.bdev.num_blocks, raid5f_chunk_read_complete, raid_io, 601 &io_opts); 602 603 if (spdk_unlikely(ret == -ENOMEM)) { 604 raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc), 605 base_ch, _raid5f_submit_rw_request); 606 return 0; 607 } 608 609 return ret; 610 } 611 612 static void 613 raid5f_submit_rw_request(struct raid_bdev_io *raid_io) 614 { 615 struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io); 616 struct raid_bdev *raid_bdev = raid_io->raid_bdev; 617 struct raid5f_info *r5f_info = raid_bdev->module_private; 618 uint64_t offset_blocks = bdev_io->u.bdev.offset_blocks; 619 uint64_t stripe_index = offset_blocks / r5f_info->stripe_blocks; 620 uint64_t stripe_offset = offset_blocks % r5f_info->stripe_blocks; 621 int ret; 622 623 switch (bdev_io->type) { 624 case SPDK_BDEV_IO_TYPE_READ: 625 assert(bdev_io->u.bdev.num_blocks <= raid_bdev->strip_size); 626 ret = raid5f_submit_read_request(raid_io, stripe_index, stripe_offset); 627 break; 628 case SPDK_BDEV_IO_TYPE_WRITE: 629 assert(stripe_offset == 0); 630 assert(bdev_io->u.bdev.num_blocks == r5f_info->stripe_blocks); 631 ret = raid5f_submit_write_request(raid_io, stripe_index); 632 break; 633 default: 634 ret = -EINVAL; 635 break; 636 } 637 638 if (spdk_unlikely(ret)) { 639 raid_bdev_io_complete(raid_io, ret == -ENOMEM ? SPDK_BDEV_IO_STATUS_NOMEM : 640 SPDK_BDEV_IO_STATUS_FAILED); 641 } 642 } 643 644 static void 645 raid5f_stripe_request_free(struct stripe_request *stripe_req) 646 { 647 struct chunk *chunk; 648 649 FOR_EACH_CHUNK(stripe_req, chunk) { 650 free(chunk->iovs); 651 } 652 653 if (stripe_req->type == STRIPE_REQ_WRITE) { 654 spdk_dma_free(stripe_req->write.parity_buf); 655 spdk_dma_free(stripe_req->write.parity_md_buf); 656 } else { 657 assert(false); 658 } 659 660 free(stripe_req->chunk_xor_buffers); 661 free(stripe_req->chunk_xor_md_buffers); 662 free(stripe_req->chunk_iov_iters); 663 664 free(stripe_req); 665 } 666 667 static struct stripe_request * 668 raid5f_stripe_request_alloc(struct raid5f_io_channel *r5ch, enum stripe_request_type type) 669 { 670 struct raid5f_info *r5f_info = raid5f_ch_to_r5f_info(r5ch); 671 struct raid_bdev *raid_bdev = r5f_info->raid_bdev; 672 uint32_t raid_io_md_size = spdk_bdev_get_md_size(&raid_bdev->bdev); 673 struct stripe_request *stripe_req; 674 struct chunk *chunk; 675 676 stripe_req = calloc(1, sizeof(*stripe_req) + sizeof(*chunk) * raid_bdev->num_base_bdevs); 677 if (!stripe_req) { 678 return NULL; 679 } 680 681 stripe_req->r5ch = r5ch; 682 stripe_req->type = type; 683 684 FOR_EACH_CHUNK(stripe_req, chunk) { 685 chunk->index = chunk - stripe_req->chunks; 686 chunk->iovcnt_max = 4; 687 chunk->iovs = calloc(chunk->iovcnt_max, sizeof(chunk->iovs[0])); 688 if (!chunk->iovs) { 689 goto err; 690 } 691 } 692 693 if (type == STRIPE_REQ_WRITE) { 694 stripe_req->write.parity_buf = spdk_dma_malloc(raid_bdev->strip_size << raid_bdev->blocklen_shift, 695 r5f_info->buf_alignment, NULL); 696 if (!stripe_req->write.parity_buf) { 697 goto err; 698 } 699 700 if (raid_io_md_size != 0) { 701 stripe_req->write.parity_md_buf = spdk_dma_malloc(raid_bdev->strip_size * raid_io_md_size, 702 r5f_info->buf_alignment, NULL); 703 if (!stripe_req->write.parity_md_buf) { 704 goto err; 705 } 706 } 707 } else { 708 assert(false); 709 return NULL; 710 } 711 712 stripe_req->chunk_iov_iters = malloc(SPDK_IOVITER_SIZE(raid_bdev->num_base_bdevs)); 713 if (!stripe_req->chunk_iov_iters) { 714 goto err; 715 } 716 717 stripe_req->chunk_xor_buffers = calloc(raid5f_stripe_data_chunks_num(raid_bdev), 718 sizeof(stripe_req->chunk_xor_buffers[0])); 719 if (!stripe_req->chunk_xor_buffers) { 720 goto err; 721 } 722 723 stripe_req->chunk_xor_md_buffers = calloc(raid5f_stripe_data_chunks_num(raid_bdev), 724 sizeof(stripe_req->chunk_xor_md_buffers[0])); 725 if (!stripe_req->chunk_xor_md_buffers) { 726 goto err; 727 } 728 729 return stripe_req; 730 err: 731 raid5f_stripe_request_free(stripe_req); 732 return NULL; 733 } 734 735 static void 736 raid5f_ioch_destroy(void *io_device, void *ctx_buf) 737 { 738 struct raid5f_io_channel *r5ch = ctx_buf; 739 struct stripe_request *stripe_req; 740 741 assert(TAILQ_EMPTY(&r5ch->xor_retry_queue)); 742 743 while ((stripe_req = TAILQ_FIRST(&r5ch->free_stripe_requests))) { 744 TAILQ_REMOVE(&r5ch->free_stripe_requests, stripe_req, link); 745 raid5f_stripe_request_free(stripe_req); 746 } 747 748 if (r5ch->accel_ch) { 749 spdk_put_io_channel(r5ch->accel_ch); 750 } 751 752 free(r5ch->chunk_xor_buffers); 753 free(r5ch->chunk_xor_iovs); 754 free(r5ch->chunk_xor_iovcnt); 755 } 756 757 static int 758 raid5f_ioch_create(void *io_device, void *ctx_buf) 759 { 760 struct raid5f_io_channel *r5ch = ctx_buf; 761 struct raid5f_info *r5f_info = io_device; 762 struct raid_bdev *raid_bdev = r5f_info->raid_bdev; 763 int i; 764 765 TAILQ_INIT(&r5ch->free_stripe_requests); 766 TAILQ_INIT(&r5ch->xor_retry_queue); 767 768 for (i = 0; i < RAID5F_MAX_STRIPES; i++) { 769 struct stripe_request *stripe_req; 770 771 stripe_req = raid5f_stripe_request_alloc(r5ch, STRIPE_REQ_WRITE); 772 if (!stripe_req) { 773 goto err; 774 } 775 776 TAILQ_INSERT_HEAD(&r5ch->free_stripe_requests, stripe_req, link); 777 } 778 779 r5ch->accel_ch = spdk_accel_get_io_channel(); 780 if (!r5ch->accel_ch) { 781 SPDK_ERRLOG("Failed to get accel framework's IO channel\n"); 782 goto err; 783 } 784 785 r5ch->chunk_xor_buffers = calloc(raid_bdev->num_base_bdevs, sizeof(*r5ch->chunk_xor_buffers)); 786 if (!r5ch->chunk_xor_buffers) { 787 goto err; 788 } 789 790 r5ch->chunk_xor_iovs = calloc(raid_bdev->num_base_bdevs, sizeof(*r5ch->chunk_xor_iovs)); 791 if (!r5ch->chunk_xor_iovs) { 792 goto err; 793 } 794 795 r5ch->chunk_xor_iovcnt = calloc(raid_bdev->num_base_bdevs, sizeof(*r5ch->chunk_xor_iovcnt)); 796 if (!r5ch->chunk_xor_iovcnt) { 797 goto err; 798 } 799 800 return 0; 801 err: 802 SPDK_ERRLOG("Failed to initialize io channel\n"); 803 raid5f_ioch_destroy(r5f_info, r5ch); 804 return -ENOMEM; 805 } 806 807 static int 808 raid5f_start(struct raid_bdev *raid_bdev) 809 { 810 uint64_t min_blockcnt = UINT64_MAX; 811 struct raid_base_bdev_info *base_info; 812 struct raid5f_info *r5f_info; 813 size_t alignment = 0; 814 815 r5f_info = calloc(1, sizeof(*r5f_info)); 816 if (!r5f_info) { 817 SPDK_ERRLOG("Failed to allocate r5f_info\n"); 818 return -ENOMEM; 819 } 820 r5f_info->raid_bdev = raid_bdev; 821 822 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) { 823 struct spdk_bdev *base_bdev; 824 825 base_bdev = spdk_bdev_desc_get_bdev(base_info->desc); 826 min_blockcnt = spdk_min(min_blockcnt, base_bdev->blockcnt); 827 alignment = spdk_max(alignment, spdk_bdev_get_buf_align(base_bdev)); 828 } 829 830 r5f_info->total_stripes = min_blockcnt / raid_bdev->strip_size; 831 r5f_info->stripe_blocks = raid_bdev->strip_size * raid5f_stripe_data_chunks_num(raid_bdev); 832 r5f_info->buf_alignment = alignment; 833 834 raid_bdev->bdev.blockcnt = r5f_info->stripe_blocks * r5f_info->total_stripes; 835 raid_bdev->bdev.optimal_io_boundary = raid_bdev->strip_size; 836 raid_bdev->bdev.split_on_optimal_io_boundary = true; 837 raid_bdev->bdev.write_unit_size = r5f_info->stripe_blocks; 838 raid_bdev->bdev.split_on_write_unit = true; 839 840 raid_bdev->module_private = r5f_info; 841 842 spdk_io_device_register(r5f_info, raid5f_ioch_create, raid5f_ioch_destroy, 843 sizeof(struct raid5f_io_channel), NULL); 844 845 return 0; 846 } 847 848 static void 849 raid5f_io_device_unregister_done(void *io_device) 850 { 851 struct raid5f_info *r5f_info = io_device; 852 853 raid_bdev_module_stop_done(r5f_info->raid_bdev); 854 855 free(r5f_info); 856 } 857 858 static bool 859 raid5f_stop(struct raid_bdev *raid_bdev) 860 { 861 struct raid5f_info *r5f_info = raid_bdev->module_private; 862 863 spdk_io_device_unregister(r5f_info, raid5f_io_device_unregister_done); 864 865 return false; 866 } 867 868 static struct spdk_io_channel * 869 raid5f_get_io_channel(struct raid_bdev *raid_bdev) 870 { 871 struct raid5f_info *r5f_info = raid_bdev->module_private; 872 873 return spdk_get_io_channel(r5f_info); 874 } 875 876 static struct raid_bdev_module g_raid5f_module = { 877 .level = RAID5F, 878 .base_bdevs_min = 3, 879 .base_bdevs_constraint = {CONSTRAINT_MAX_BASE_BDEVS_REMOVED, 1}, 880 .start = raid5f_start, 881 .stop = raid5f_stop, 882 .submit_rw_request = raid5f_submit_rw_request, 883 .get_io_channel = raid5f_get_io_channel, 884 }; 885 RAID_MODULE_REGISTER(&g_raid5f_module) 886 887 SPDK_LOG_REGISTER_COMPONENT(bdev_raid5f) 888