1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright (C) 2022 Intel Corporation.
3 * All rights reserved.
4 */
5
6 #include "bdev_raid.h"
7
8 #include "spdk/env.h"
9 #include "spdk/thread.h"
10 #include "spdk/string.h"
11 #include "spdk/util.h"
12 #include "spdk/likely.h"
13 #include "spdk/log.h"
14 #include "spdk/accel.h"
15
16 /* Maximum concurrent full stripe writes per io channel */
17 #define RAID5F_MAX_STRIPES 32
18
19 struct chunk {
20 /* Corresponds to base_bdev index */
21 uint8_t index;
22
23 /* Array of iovecs */
24 struct iovec *iovs;
25
26 /* Number of used iovecs */
27 int iovcnt;
28
29 /* Total number of available iovecs in the array */
30 int iovcnt_max;
31
32 /* Pointer to buffer with I/O metadata */
33 void *md_buf;
34 };
35
36 struct stripe_request;
37 typedef void (*stripe_req_xor_cb)(struct stripe_request *stripe_req, int status);
38
39 struct stripe_request {
40 enum stripe_request_type {
41 STRIPE_REQ_WRITE,
42 STRIPE_REQ_RECONSTRUCT,
43 } type;
44
45 struct raid5f_io_channel *r5ch;
46
47 /* The associated raid_bdev_io */
48 struct raid_bdev_io *raid_io;
49
50 /* The stripe's index in the raid array. */
51 uint64_t stripe_index;
52
53 /* The stripe's parity chunk */
54 struct chunk *parity_chunk;
55
56 union {
57 struct {
58 /* Buffer for stripe parity */
59 void *parity_buf;
60
61 /* Buffer for stripe io metadata parity */
62 void *parity_md_buf;
63 } write;
64
65 struct {
66 /* Array of buffers for reading chunk data */
67 void **chunk_buffers;
68
69 /* Array of buffers for reading chunk metadata */
70 void **chunk_md_buffers;
71
72 /* Chunk to reconstruct from parity */
73 struct chunk *chunk;
74
75 /* Offset from chunk start */
76 uint64_t chunk_offset;
77 } reconstruct;
78 };
79
80 /* Array of iovec iterators for each chunk */
81 struct spdk_ioviter *chunk_iov_iters;
82
83 /* Array of source buffer pointers for parity calculation */
84 void **chunk_xor_buffers;
85
86 /* Array of source buffer pointers for parity calculation of io metadata */
87 void **chunk_xor_md_buffers;
88
89 struct {
90 size_t len;
91 size_t remaining;
92 size_t remaining_md;
93 int status;
94 stripe_req_xor_cb cb;
95 } xor;
96
97 TAILQ_ENTRY(stripe_request) link;
98
99 /* Array of chunks corresponding to base_bdevs */
100 struct chunk chunks[0];
101 };
102
103 struct raid5f_info {
104 /* The parent raid bdev */
105 struct raid_bdev *raid_bdev;
106
107 /* Number of data blocks in a stripe (without parity) */
108 uint64_t stripe_blocks;
109
110 /* Number of stripes on this array */
111 uint64_t total_stripes;
112
113 /* Alignment for buffer allocation */
114 size_t buf_alignment;
115
116 /* block length bit shift for optimized calculation, only valid when no interleaved md */
117 uint32_t blocklen_shift;
118 };
119
120 struct raid5f_io_channel {
121 /* All available stripe requests on this channel */
122 struct {
123 TAILQ_HEAD(, stripe_request) write;
124 TAILQ_HEAD(, stripe_request) reconstruct;
125 } free_stripe_requests;
126
127 /* accel_fw channel */
128 struct spdk_io_channel *accel_ch;
129
130 /* For retrying xor if accel_ch runs out of resources */
131 TAILQ_HEAD(, stripe_request) xor_retry_queue;
132
133 /* For iterating over chunk iovecs during xor calculation */
134 void **chunk_xor_buffers;
135 struct iovec **chunk_xor_iovs;
136 size_t *chunk_xor_iovcnt;
137 };
138
139 #define __CHUNK_IN_RANGE(req, c) \
140 c < req->chunks + raid5f_ch_to_r5f_info(req->r5ch)->raid_bdev->num_base_bdevs
141
142 #define FOR_EACH_CHUNK_FROM(req, c, from) \
143 for (c = from; __CHUNK_IN_RANGE(req, c); c++)
144
145 #define FOR_EACH_CHUNK(req, c) \
146 FOR_EACH_CHUNK_FROM(req, c, req->chunks)
147
148 #define __NEXT_DATA_CHUNK(req, c) \
149 c == req->parity_chunk ? c+1 : c
150
151 #define FOR_EACH_DATA_CHUNK(req, c) \
152 for (c = __NEXT_DATA_CHUNK(req, req->chunks); __CHUNK_IN_RANGE(req, c); \
153 c = __NEXT_DATA_CHUNK(req, c+1))
154
155 static inline struct raid5f_info *
raid5f_ch_to_r5f_info(struct raid5f_io_channel * r5ch)156 raid5f_ch_to_r5f_info(struct raid5f_io_channel *r5ch)
157 {
158 return spdk_io_channel_get_io_device(spdk_io_channel_from_ctx(r5ch));
159 }
160
161 static inline struct stripe_request *
raid5f_chunk_stripe_req(struct chunk * chunk)162 raid5f_chunk_stripe_req(struct chunk *chunk)
163 {
164 return SPDK_CONTAINEROF((chunk - chunk->index), struct stripe_request, chunks);
165 }
166
167 static inline uint8_t
raid5f_stripe_data_chunks_num(const struct raid_bdev * raid_bdev)168 raid5f_stripe_data_chunks_num(const struct raid_bdev *raid_bdev)
169 {
170 return raid_bdev->min_base_bdevs_operational;
171 }
172
173 static inline uint8_t
raid5f_stripe_parity_chunk_index(const struct raid_bdev * raid_bdev,uint64_t stripe_index)174 raid5f_stripe_parity_chunk_index(const struct raid_bdev *raid_bdev, uint64_t stripe_index)
175 {
176 return raid5f_stripe_data_chunks_num(raid_bdev) - stripe_index % raid_bdev->num_base_bdevs;
177 }
178
179 static inline void
raid5f_stripe_request_release(struct stripe_request * stripe_req)180 raid5f_stripe_request_release(struct stripe_request *stripe_req)
181 {
182 if (spdk_likely(stripe_req->type == STRIPE_REQ_WRITE)) {
183 TAILQ_INSERT_HEAD(&stripe_req->r5ch->free_stripe_requests.write, stripe_req, link);
184 } else if (stripe_req->type == STRIPE_REQ_RECONSTRUCT) {
185 TAILQ_INSERT_HEAD(&stripe_req->r5ch->free_stripe_requests.reconstruct, stripe_req, link);
186 } else {
187 assert(false);
188 }
189 }
190
191 static void raid5f_xor_stripe_retry(struct stripe_request *stripe_req);
192
193 static void
raid5f_xor_stripe_done(struct stripe_request * stripe_req)194 raid5f_xor_stripe_done(struct stripe_request *stripe_req)
195 {
196 struct raid5f_io_channel *r5ch = stripe_req->r5ch;
197
198 if (stripe_req->xor.status != 0) {
199 SPDK_ERRLOG("stripe xor failed: %s\n", spdk_strerror(-stripe_req->xor.status));
200 }
201
202 stripe_req->xor.cb(stripe_req, stripe_req->xor.status);
203
204 if (!TAILQ_EMPTY(&r5ch->xor_retry_queue)) {
205 stripe_req = TAILQ_FIRST(&r5ch->xor_retry_queue);
206 TAILQ_REMOVE(&r5ch->xor_retry_queue, stripe_req, link);
207 raid5f_xor_stripe_retry(stripe_req);
208 }
209 }
210
211 static void raid5f_xor_stripe_continue(struct stripe_request *stripe_req);
212
213 static void
_raid5f_xor_stripe_cb(struct stripe_request * stripe_req,int status)214 _raid5f_xor_stripe_cb(struct stripe_request *stripe_req, int status)
215 {
216 if (status != 0) {
217 stripe_req->xor.status = status;
218 }
219
220 if (stripe_req->xor.remaining + stripe_req->xor.remaining_md == 0) {
221 raid5f_xor_stripe_done(stripe_req);
222 }
223 }
224
225 static void
raid5f_xor_stripe_cb(void * _stripe_req,int status)226 raid5f_xor_stripe_cb(void *_stripe_req, int status)
227 {
228 struct stripe_request *stripe_req = _stripe_req;
229
230 stripe_req->xor.remaining -= stripe_req->xor.len;
231
232 if (stripe_req->xor.remaining > 0) {
233 stripe_req->xor.len = spdk_ioviter_nextv(stripe_req->chunk_iov_iters,
234 stripe_req->r5ch->chunk_xor_buffers);
235 raid5f_xor_stripe_continue(stripe_req);
236 }
237
238 _raid5f_xor_stripe_cb(stripe_req, status);
239 }
240
241 static void
raid5f_xor_stripe_md_cb(void * _stripe_req,int status)242 raid5f_xor_stripe_md_cb(void *_stripe_req, int status)
243 {
244 struct stripe_request *stripe_req = _stripe_req;
245
246 stripe_req->xor.remaining_md = 0;
247
248 _raid5f_xor_stripe_cb(stripe_req, status);
249 }
250
251 static void
raid5f_xor_stripe_continue(struct stripe_request * stripe_req)252 raid5f_xor_stripe_continue(struct stripe_request *stripe_req)
253 {
254 struct raid5f_io_channel *r5ch = stripe_req->r5ch;
255 struct raid_bdev_io *raid_io = stripe_req->raid_io;
256 struct raid_bdev *raid_bdev = raid_io->raid_bdev;
257 uint8_t n_src = raid5f_stripe_data_chunks_num(raid_bdev);
258 uint8_t i;
259 int ret;
260
261 assert(stripe_req->xor.len > 0);
262
263 for (i = 0; i < n_src; i++) {
264 stripe_req->chunk_xor_buffers[i] = r5ch->chunk_xor_buffers[i];
265 }
266
267 ret = spdk_accel_submit_xor(r5ch->accel_ch, r5ch->chunk_xor_buffers[n_src],
268 stripe_req->chunk_xor_buffers, n_src, stripe_req->xor.len,
269 raid5f_xor_stripe_cb, stripe_req);
270 if (spdk_unlikely(ret)) {
271 if (ret == -ENOMEM) {
272 TAILQ_INSERT_HEAD(&r5ch->xor_retry_queue, stripe_req, link);
273 } else {
274 stripe_req->xor.status = ret;
275 raid5f_xor_stripe_done(stripe_req);
276 }
277 }
278 }
279
280 static void
raid5f_xor_stripe(struct stripe_request * stripe_req,stripe_req_xor_cb cb)281 raid5f_xor_stripe(struct stripe_request *stripe_req, stripe_req_xor_cb cb)
282 {
283 struct raid5f_io_channel *r5ch = stripe_req->r5ch;
284 struct raid_bdev_io *raid_io = stripe_req->raid_io;
285 struct raid_bdev *raid_bdev = raid_io->raid_bdev;
286 struct chunk *chunk;
287 struct chunk *dest_chunk = NULL;
288 uint64_t num_blocks = 0;
289 uint8_t c;
290
291 assert(cb != NULL);
292
293 if (spdk_likely(stripe_req->type == STRIPE_REQ_WRITE)) {
294 num_blocks = raid_bdev->strip_size;
295 dest_chunk = stripe_req->parity_chunk;
296 } else if (stripe_req->type == STRIPE_REQ_RECONSTRUCT) {
297 num_blocks = raid_io->num_blocks;
298 dest_chunk = stripe_req->reconstruct.chunk;
299 } else {
300 assert(false);
301 }
302
303 c = 0;
304 FOR_EACH_CHUNK(stripe_req, chunk) {
305 if (chunk == dest_chunk) {
306 continue;
307 }
308 r5ch->chunk_xor_iovs[c] = chunk->iovs;
309 r5ch->chunk_xor_iovcnt[c] = chunk->iovcnt;
310 c++;
311 }
312 r5ch->chunk_xor_iovs[c] = dest_chunk->iovs;
313 r5ch->chunk_xor_iovcnt[c] = dest_chunk->iovcnt;
314
315 stripe_req->xor.len = spdk_ioviter_firstv(stripe_req->chunk_iov_iters,
316 raid_bdev->num_base_bdevs,
317 r5ch->chunk_xor_iovs,
318 r5ch->chunk_xor_iovcnt,
319 r5ch->chunk_xor_buffers);
320 stripe_req->xor.remaining = num_blocks * raid_bdev->bdev.blocklen;
321 stripe_req->xor.status = 0;
322 stripe_req->xor.cb = cb;
323
324 if (raid_io->md_buf != NULL) {
325 uint8_t n_src = raid5f_stripe_data_chunks_num(raid_bdev);
326 uint64_t len = num_blocks * raid_bdev->bdev.md_len;
327 int ret;
328
329 stripe_req->xor.remaining_md = len;
330
331 c = 0;
332 FOR_EACH_CHUNK(stripe_req, chunk) {
333 if (chunk != dest_chunk) {
334 stripe_req->chunk_xor_md_buffers[c] = chunk->md_buf;
335 c++;
336 }
337 }
338
339 ret = spdk_accel_submit_xor(stripe_req->r5ch->accel_ch, dest_chunk->md_buf,
340 stripe_req->chunk_xor_md_buffers, n_src, len,
341 raid5f_xor_stripe_md_cb, stripe_req);
342 if (spdk_unlikely(ret)) {
343 if (ret == -ENOMEM) {
344 TAILQ_INSERT_HEAD(&stripe_req->r5ch->xor_retry_queue, stripe_req, link);
345 } else {
346 stripe_req->xor.status = ret;
347 raid5f_xor_stripe_done(stripe_req);
348 }
349 return;
350 }
351 }
352
353 raid5f_xor_stripe_continue(stripe_req);
354 }
355
356 static void
raid5f_xor_stripe_retry(struct stripe_request * stripe_req)357 raid5f_xor_stripe_retry(struct stripe_request *stripe_req)
358 {
359 if (stripe_req->xor.remaining_md) {
360 raid5f_xor_stripe(stripe_req, stripe_req->xor.cb);
361 } else {
362 raid5f_xor_stripe_continue(stripe_req);
363 }
364 }
365
366 static void
raid5f_stripe_request_chunk_write_complete(struct stripe_request * stripe_req,enum spdk_bdev_io_status status)367 raid5f_stripe_request_chunk_write_complete(struct stripe_request *stripe_req,
368 enum spdk_bdev_io_status status)
369 {
370 if (raid_bdev_io_complete_part(stripe_req->raid_io, 1, status)) {
371 raid5f_stripe_request_release(stripe_req);
372 }
373 }
374
375 static void
raid5f_stripe_request_chunk_read_complete(struct stripe_request * stripe_req,enum spdk_bdev_io_status status)376 raid5f_stripe_request_chunk_read_complete(struct stripe_request *stripe_req,
377 enum spdk_bdev_io_status status)
378 {
379 struct raid_bdev_io *raid_io = stripe_req->raid_io;
380
381 raid_bdev_io_complete_part(raid_io, 1, status);
382 }
383
384 static void
raid5f_chunk_complete_bdev_io(struct spdk_bdev_io * bdev_io,bool success,void * cb_arg)385 raid5f_chunk_complete_bdev_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
386 {
387 struct chunk *chunk = cb_arg;
388 struct stripe_request *stripe_req = raid5f_chunk_stripe_req(chunk);
389 enum spdk_bdev_io_status status = success ? SPDK_BDEV_IO_STATUS_SUCCESS :
390 SPDK_BDEV_IO_STATUS_FAILED;
391
392 spdk_bdev_free_io(bdev_io);
393
394 if (spdk_likely(stripe_req->type == STRIPE_REQ_WRITE)) {
395 raid5f_stripe_request_chunk_write_complete(stripe_req, status);
396 } else if (stripe_req->type == STRIPE_REQ_RECONSTRUCT) {
397 raid5f_stripe_request_chunk_read_complete(stripe_req, status);
398 } else {
399 assert(false);
400 }
401 }
402
403 static void raid5f_stripe_request_submit_chunks(struct stripe_request *stripe_req);
404
405 static void
raid5f_chunk_submit_retry(void * _raid_io)406 raid5f_chunk_submit_retry(void *_raid_io)
407 {
408 struct raid_bdev_io *raid_io = _raid_io;
409 struct stripe_request *stripe_req = raid_io->module_private;
410
411 raid5f_stripe_request_submit_chunks(stripe_req);
412 }
413
414 static inline void
raid5f_init_ext_io_opts(struct spdk_bdev_ext_io_opts * opts,struct raid_bdev_io * raid_io)415 raid5f_init_ext_io_opts(struct spdk_bdev_ext_io_opts *opts, struct raid_bdev_io *raid_io)
416 {
417 memset(opts, 0, sizeof(*opts));
418 opts->size = sizeof(*opts);
419 opts->memory_domain = raid_io->memory_domain;
420 opts->memory_domain_ctx = raid_io->memory_domain_ctx;
421 opts->metadata = raid_io->md_buf;
422 }
423
424 static int
raid5f_chunk_submit(struct chunk * chunk)425 raid5f_chunk_submit(struct chunk *chunk)
426 {
427 struct stripe_request *stripe_req = raid5f_chunk_stripe_req(chunk);
428 struct raid_bdev_io *raid_io = stripe_req->raid_io;
429 struct raid_bdev *raid_bdev = raid_io->raid_bdev;
430 struct raid_base_bdev_info *base_info = &raid_bdev->base_bdev_info[chunk->index];
431 struct spdk_io_channel *base_ch = raid_bdev_channel_get_base_channel(raid_io->raid_ch,
432 chunk->index);
433 uint64_t base_offset_blocks = (stripe_req->stripe_index << raid_bdev->strip_size_shift);
434 struct spdk_bdev_ext_io_opts io_opts;
435 int ret;
436
437 raid5f_init_ext_io_opts(&io_opts, raid_io);
438 io_opts.metadata = chunk->md_buf;
439
440 raid_io->base_bdev_io_submitted++;
441
442 switch (stripe_req->type) {
443 case STRIPE_REQ_WRITE:
444 if (base_ch == NULL) {
445 raid_bdev_io_complete_part(raid_io, 1, SPDK_BDEV_IO_STATUS_SUCCESS);
446 return 0;
447 }
448
449 ret = raid_bdev_writev_blocks_ext(base_info, base_ch, chunk->iovs, chunk->iovcnt,
450 base_offset_blocks, raid_bdev->strip_size,
451 raid5f_chunk_complete_bdev_io, chunk, &io_opts);
452 break;
453 case STRIPE_REQ_RECONSTRUCT:
454 if (chunk == stripe_req->reconstruct.chunk) {
455 raid_bdev_io_complete_part(raid_io, 1, SPDK_BDEV_IO_STATUS_SUCCESS);
456 return 0;
457 }
458
459 base_offset_blocks += stripe_req->reconstruct.chunk_offset;
460
461 ret = raid_bdev_readv_blocks_ext(base_info, base_ch, chunk->iovs, chunk->iovcnt,
462 base_offset_blocks, raid_io->num_blocks,
463 raid5f_chunk_complete_bdev_io, chunk, &io_opts);
464 break;
465 default:
466 assert(false);
467 ret = -EINVAL;
468 break;
469 }
470
471 if (spdk_unlikely(ret)) {
472 raid_io->base_bdev_io_submitted--;
473 if (ret == -ENOMEM) {
474 raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc),
475 base_ch, raid5f_chunk_submit_retry);
476 } else {
477 /*
478 * Implicitly complete any I/Os not yet submitted as FAILED. If completing
479 * these means there are no more to complete for the stripe request, we can
480 * release the stripe request as well.
481 */
482 uint64_t base_bdev_io_not_submitted;
483
484 if (stripe_req->type == STRIPE_REQ_WRITE) {
485 base_bdev_io_not_submitted = raid_bdev->num_base_bdevs -
486 raid_io->base_bdev_io_submitted;
487 } else {
488 base_bdev_io_not_submitted = raid5f_stripe_data_chunks_num(raid_bdev) -
489 raid_io->base_bdev_io_submitted;
490 }
491
492 if (raid_bdev_io_complete_part(raid_io, base_bdev_io_not_submitted,
493 SPDK_BDEV_IO_STATUS_FAILED)) {
494 raid5f_stripe_request_release(stripe_req);
495 }
496 }
497 }
498
499 return ret;
500 }
501
502 static int
raid5f_chunk_set_iovcnt(struct chunk * chunk,int iovcnt)503 raid5f_chunk_set_iovcnt(struct chunk *chunk, int iovcnt)
504 {
505 if (iovcnt > chunk->iovcnt_max) {
506 struct iovec *iovs = chunk->iovs;
507
508 iovs = realloc(iovs, iovcnt * sizeof(*iovs));
509 if (!iovs) {
510 return -ENOMEM;
511 }
512 chunk->iovs = iovs;
513 chunk->iovcnt_max = iovcnt;
514 }
515 chunk->iovcnt = iovcnt;
516
517 return 0;
518 }
519
520 static int
raid5f_stripe_request_map_iovecs(struct stripe_request * stripe_req)521 raid5f_stripe_request_map_iovecs(struct stripe_request *stripe_req)
522 {
523 struct raid_bdev_io *raid_io = stripe_req->raid_io;
524 struct raid_bdev *raid_bdev = raid_io->raid_bdev;
525 struct raid5f_info *r5f_info = raid_bdev->module_private;
526 struct chunk *chunk;
527 int raid_io_iov_idx = 0;
528 size_t raid_io_offset = 0;
529 size_t raid_io_iov_offset = 0;
530 int i;
531
532 FOR_EACH_DATA_CHUNK(stripe_req, chunk) {
533 int chunk_iovcnt = 0;
534 uint64_t len = raid_bdev->strip_size * raid_bdev->bdev.blocklen;
535 size_t off = raid_io_iov_offset;
536 int ret;
537
538 for (i = raid_io_iov_idx; i < raid_io->iovcnt; i++) {
539 chunk_iovcnt++;
540 off += raid_io->iovs[i].iov_len;
541 if (off >= raid_io_offset + len) {
542 break;
543 }
544 }
545
546 assert(raid_io_iov_idx + chunk_iovcnt <= raid_io->iovcnt);
547
548 ret = raid5f_chunk_set_iovcnt(chunk, chunk_iovcnt);
549 if (ret) {
550 return ret;
551 }
552
553 if (raid_io->md_buf != NULL) {
554 chunk->md_buf = raid_io->md_buf +
555 (raid_io_offset >> r5f_info->blocklen_shift) * raid_bdev->bdev.md_len;
556 }
557
558 for (i = 0; i < chunk_iovcnt; i++) {
559 struct iovec *chunk_iov = &chunk->iovs[i];
560 const struct iovec *raid_io_iov = &raid_io->iovs[raid_io_iov_idx];
561 size_t chunk_iov_offset = raid_io_offset - raid_io_iov_offset;
562
563 chunk_iov->iov_base = raid_io_iov->iov_base + chunk_iov_offset;
564 chunk_iov->iov_len = spdk_min(len, raid_io_iov->iov_len - chunk_iov_offset);
565 raid_io_offset += chunk_iov->iov_len;
566 len -= chunk_iov->iov_len;
567
568 if (raid_io_offset >= raid_io_iov_offset + raid_io_iov->iov_len) {
569 raid_io_iov_idx++;
570 raid_io_iov_offset += raid_io_iov->iov_len;
571 }
572 }
573
574 if (spdk_unlikely(len > 0)) {
575 return -EINVAL;
576 }
577 }
578
579 stripe_req->parity_chunk->iovs[0].iov_base = stripe_req->write.parity_buf;
580 stripe_req->parity_chunk->iovs[0].iov_len = raid_bdev->strip_size * raid_bdev->bdev.blocklen;
581 stripe_req->parity_chunk->iovcnt = 1;
582 stripe_req->parity_chunk->md_buf = stripe_req->write.parity_md_buf;
583
584 return 0;
585 }
586
587 static void
raid5f_stripe_request_submit_chunks(struct stripe_request * stripe_req)588 raid5f_stripe_request_submit_chunks(struct stripe_request *stripe_req)
589 {
590 struct raid_bdev_io *raid_io = stripe_req->raid_io;
591 struct chunk *start = &stripe_req->chunks[raid_io->base_bdev_io_submitted];
592 struct chunk *chunk;
593
594 FOR_EACH_CHUNK_FROM(stripe_req, chunk, start) {
595 if (spdk_unlikely(raid5f_chunk_submit(chunk) != 0)) {
596 break;
597 }
598 }
599 }
600
601 static inline void
raid5f_stripe_request_init(struct stripe_request * stripe_req,struct raid_bdev_io * raid_io,uint64_t stripe_index)602 raid5f_stripe_request_init(struct stripe_request *stripe_req, struct raid_bdev_io *raid_io,
603 uint64_t stripe_index)
604 {
605 stripe_req->raid_io = raid_io;
606 stripe_req->stripe_index = stripe_index;
607 stripe_req->parity_chunk = &stripe_req->chunks[raid5f_stripe_parity_chunk_index(raid_io->raid_bdev,
608 stripe_index)];
609 }
610
611 static void
raid5f_stripe_write_request_xor_done(struct stripe_request * stripe_req,int status)612 raid5f_stripe_write_request_xor_done(struct stripe_request *stripe_req, int status)
613 {
614 struct raid_bdev_io *raid_io = stripe_req->raid_io;
615
616 if (status != 0) {
617 raid5f_stripe_request_release(stripe_req);
618 raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
619 } else {
620 raid5f_stripe_request_submit_chunks(stripe_req);
621 }
622 }
623
624 static int
raid5f_submit_write_request(struct raid_bdev_io * raid_io,uint64_t stripe_index)625 raid5f_submit_write_request(struct raid_bdev_io *raid_io, uint64_t stripe_index)
626 {
627 struct raid_bdev *raid_bdev = raid_io->raid_bdev;
628 struct raid5f_io_channel *r5ch = raid_bdev_channel_get_module_ctx(raid_io->raid_ch);
629 struct stripe_request *stripe_req;
630 int ret;
631
632 stripe_req = TAILQ_FIRST(&r5ch->free_stripe_requests.write);
633 if (!stripe_req) {
634 return -ENOMEM;
635 }
636
637 raid5f_stripe_request_init(stripe_req, raid_io, stripe_index);
638
639 ret = raid5f_stripe_request_map_iovecs(stripe_req);
640 if (spdk_unlikely(ret)) {
641 return ret;
642 }
643
644 TAILQ_REMOVE(&r5ch->free_stripe_requests.write, stripe_req, link);
645
646 raid_io->module_private = stripe_req;
647 raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs;
648
649 if (raid_bdev_channel_get_base_channel(raid_io->raid_ch, stripe_req->parity_chunk->index) != NULL) {
650 raid5f_xor_stripe(stripe_req, raid5f_stripe_write_request_xor_done);
651 } else {
652 raid5f_stripe_write_request_xor_done(stripe_req, 0);
653 }
654
655 return 0;
656 }
657
658 static void
raid5f_chunk_read_complete(struct spdk_bdev_io * bdev_io,bool success,void * cb_arg)659 raid5f_chunk_read_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
660 {
661 struct raid_bdev_io *raid_io = cb_arg;
662
663 spdk_bdev_free_io(bdev_io);
664
665 raid_bdev_io_complete(raid_io, success ? SPDK_BDEV_IO_STATUS_SUCCESS :
666 SPDK_BDEV_IO_STATUS_FAILED);
667 }
668
669 static void raid5f_submit_rw_request(struct raid_bdev_io *raid_io);
670
671 static void
_raid5f_submit_rw_request(void * _raid_io)672 _raid5f_submit_rw_request(void *_raid_io)
673 {
674 struct raid_bdev_io *raid_io = _raid_io;
675
676 raid5f_submit_rw_request(raid_io);
677 }
678
679 static void
raid5f_stripe_request_reconstruct_xor_done(struct stripe_request * stripe_req,int status)680 raid5f_stripe_request_reconstruct_xor_done(struct stripe_request *stripe_req, int status)
681 {
682 struct raid_bdev_io *raid_io = stripe_req->raid_io;
683
684 raid5f_stripe_request_release(stripe_req);
685
686 raid_bdev_io_complete(raid_io,
687 status == 0 ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED);
688 }
689
690 static void
raid5f_reconstruct_reads_completed_cb(struct raid_bdev_io * raid_io,enum spdk_bdev_io_status status)691 raid5f_reconstruct_reads_completed_cb(struct raid_bdev_io *raid_io, enum spdk_bdev_io_status status)
692 {
693 struct stripe_request *stripe_req = raid_io->module_private;
694
695 raid_io->completion_cb = NULL;
696
697 if (status != SPDK_BDEV_IO_STATUS_SUCCESS) {
698 stripe_req->xor.cb(stripe_req, -EIO);
699 return;
700 }
701
702 raid5f_xor_stripe(stripe_req, stripe_req->xor.cb);
703 }
704
705 static int
raid5f_submit_reconstruct_read(struct raid_bdev_io * raid_io,uint64_t stripe_index,uint8_t chunk_idx,uint64_t chunk_offset,stripe_req_xor_cb cb)706 raid5f_submit_reconstruct_read(struct raid_bdev_io *raid_io, uint64_t stripe_index,
707 uint8_t chunk_idx, uint64_t chunk_offset, stripe_req_xor_cb cb)
708 {
709 struct raid_bdev *raid_bdev = raid_io->raid_bdev;
710 struct raid5f_io_channel *r5ch = raid_bdev_channel_get_module_ctx(raid_io->raid_ch);
711 void *raid_io_md = raid_io->md_buf;
712 struct stripe_request *stripe_req;
713 struct chunk *chunk;
714 int buf_idx;
715
716 assert(cb != NULL);
717
718 stripe_req = TAILQ_FIRST(&r5ch->free_stripe_requests.reconstruct);
719 if (!stripe_req) {
720 return -ENOMEM;
721 }
722
723 raid5f_stripe_request_init(stripe_req, raid_io, stripe_index);
724
725 stripe_req->reconstruct.chunk = &stripe_req->chunks[chunk_idx];
726 stripe_req->reconstruct.chunk_offset = chunk_offset;
727 stripe_req->xor.cb = cb;
728 buf_idx = 0;
729
730 FOR_EACH_CHUNK(stripe_req, chunk) {
731 if (chunk == stripe_req->reconstruct.chunk) {
732 int i;
733 int ret;
734
735 ret = raid5f_chunk_set_iovcnt(chunk, raid_io->iovcnt);
736 if (ret) {
737 return ret;
738 }
739
740 for (i = 0; i < raid_io->iovcnt; i++) {
741 chunk->iovs[i] = raid_io->iovs[i];
742 }
743
744 chunk->md_buf = raid_io_md;
745 } else {
746 struct iovec *iov = &chunk->iovs[0];
747
748 iov->iov_base = stripe_req->reconstruct.chunk_buffers[buf_idx];
749 iov->iov_len = raid_io->num_blocks * raid_bdev->bdev.blocklen;
750 chunk->iovcnt = 1;
751
752 if (raid_io_md) {
753 chunk->md_buf = stripe_req->reconstruct.chunk_md_buffers[buf_idx];
754 }
755
756 buf_idx++;
757 }
758 }
759
760 raid_io->module_private = stripe_req;
761 raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs;
762 raid_io->completion_cb = raid5f_reconstruct_reads_completed_cb;
763
764 TAILQ_REMOVE(&r5ch->free_stripe_requests.reconstruct, stripe_req, link);
765
766 raid5f_stripe_request_submit_chunks(stripe_req);
767
768 return 0;
769 }
770
771 static int
raid5f_submit_read_request(struct raid_bdev_io * raid_io,uint64_t stripe_index,uint64_t stripe_offset)772 raid5f_submit_read_request(struct raid_bdev_io *raid_io, uint64_t stripe_index,
773 uint64_t stripe_offset)
774 {
775 struct raid_bdev *raid_bdev = raid_io->raid_bdev;
776 uint8_t chunk_data_idx = stripe_offset >> raid_bdev->strip_size_shift;
777 uint8_t p_idx = raid5f_stripe_parity_chunk_index(raid_bdev, stripe_index);
778 uint8_t chunk_idx = chunk_data_idx < p_idx ? chunk_data_idx : chunk_data_idx + 1;
779 struct raid_base_bdev_info *base_info = &raid_bdev->base_bdev_info[chunk_idx];
780 struct spdk_io_channel *base_ch = raid_bdev_channel_get_base_channel(raid_io->raid_ch, chunk_idx);
781 uint64_t chunk_offset = stripe_offset - (chunk_data_idx << raid_bdev->strip_size_shift);
782 uint64_t base_offset_blocks = (stripe_index << raid_bdev->strip_size_shift) + chunk_offset;
783 struct spdk_bdev_ext_io_opts io_opts;
784 int ret;
785
786 raid5f_init_ext_io_opts(&io_opts, raid_io);
787 if (base_ch == NULL) {
788 return raid5f_submit_reconstruct_read(raid_io, stripe_index, chunk_idx, chunk_offset,
789 raid5f_stripe_request_reconstruct_xor_done);
790 }
791
792 ret = raid_bdev_readv_blocks_ext(base_info, base_ch, raid_io->iovs, raid_io->iovcnt,
793 base_offset_blocks, raid_io->num_blocks,
794 raid5f_chunk_read_complete, raid_io, &io_opts);
795 if (spdk_unlikely(ret == -ENOMEM)) {
796 raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc),
797 base_ch, _raid5f_submit_rw_request);
798 return 0;
799 }
800
801 return ret;
802 }
803
804 static void
raid5f_submit_rw_request(struct raid_bdev_io * raid_io)805 raid5f_submit_rw_request(struct raid_bdev_io *raid_io)
806 {
807 struct raid_bdev *raid_bdev = raid_io->raid_bdev;
808 struct raid5f_info *r5f_info = raid_bdev->module_private;
809 uint64_t stripe_index = raid_io->offset_blocks / r5f_info->stripe_blocks;
810 uint64_t stripe_offset = raid_io->offset_blocks % r5f_info->stripe_blocks;
811 int ret;
812
813 switch (raid_io->type) {
814 case SPDK_BDEV_IO_TYPE_READ:
815 assert(raid_io->num_blocks <= raid_bdev->strip_size);
816 ret = raid5f_submit_read_request(raid_io, stripe_index, stripe_offset);
817 break;
818 case SPDK_BDEV_IO_TYPE_WRITE:
819 assert(stripe_offset == 0);
820 assert(raid_io->num_blocks == r5f_info->stripe_blocks);
821 ret = raid5f_submit_write_request(raid_io, stripe_index);
822 break;
823 default:
824 ret = -EINVAL;
825 break;
826 }
827
828 if (spdk_unlikely(ret)) {
829 raid_bdev_io_complete(raid_io, ret == -ENOMEM ? SPDK_BDEV_IO_STATUS_NOMEM :
830 SPDK_BDEV_IO_STATUS_FAILED);
831 }
832 }
833
834 static void
raid5f_stripe_request_free(struct stripe_request * stripe_req)835 raid5f_stripe_request_free(struct stripe_request *stripe_req)
836 {
837 struct chunk *chunk;
838
839 FOR_EACH_CHUNK(stripe_req, chunk) {
840 free(chunk->iovs);
841 }
842
843 if (stripe_req->type == STRIPE_REQ_WRITE) {
844 spdk_dma_free(stripe_req->write.parity_buf);
845 spdk_dma_free(stripe_req->write.parity_md_buf);
846 } else if (stripe_req->type == STRIPE_REQ_RECONSTRUCT) {
847 struct raid5f_info *r5f_info = raid5f_ch_to_r5f_info(stripe_req->r5ch);
848 struct raid_bdev *raid_bdev = r5f_info->raid_bdev;
849 uint8_t i;
850
851 if (stripe_req->reconstruct.chunk_buffers) {
852 for (i = 0; i < raid5f_stripe_data_chunks_num(raid_bdev); i++) {
853 spdk_dma_free(stripe_req->reconstruct.chunk_buffers[i]);
854 }
855 free(stripe_req->reconstruct.chunk_buffers);
856 }
857
858 if (stripe_req->reconstruct.chunk_md_buffers) {
859 for (i = 0; i < raid5f_stripe_data_chunks_num(raid_bdev); i++) {
860 spdk_dma_free(stripe_req->reconstruct.chunk_md_buffers[i]);
861 }
862 free(stripe_req->reconstruct.chunk_md_buffers);
863 }
864 } else {
865 assert(false);
866 }
867
868 free(stripe_req->chunk_xor_buffers);
869 free(stripe_req->chunk_xor_md_buffers);
870 free(stripe_req->chunk_iov_iters);
871
872 free(stripe_req);
873 }
874
875 static struct stripe_request *
raid5f_stripe_request_alloc(struct raid5f_io_channel * r5ch,enum stripe_request_type type)876 raid5f_stripe_request_alloc(struct raid5f_io_channel *r5ch, enum stripe_request_type type)
877 {
878 struct raid5f_info *r5f_info = raid5f_ch_to_r5f_info(r5ch);
879 struct raid_bdev *raid_bdev = r5f_info->raid_bdev;
880 uint32_t raid_io_md_size = raid_bdev->bdev.md_interleave ? 0 : raid_bdev->bdev.md_len;
881 struct stripe_request *stripe_req;
882 struct chunk *chunk;
883 size_t chunk_len;
884
885 stripe_req = calloc(1, sizeof(*stripe_req) + sizeof(*chunk) * raid_bdev->num_base_bdevs);
886 if (!stripe_req) {
887 return NULL;
888 }
889
890 stripe_req->r5ch = r5ch;
891 stripe_req->type = type;
892
893 FOR_EACH_CHUNK(stripe_req, chunk) {
894 chunk->index = chunk - stripe_req->chunks;
895 chunk->iovcnt_max = 4;
896 chunk->iovs = calloc(chunk->iovcnt_max, sizeof(chunk->iovs[0]));
897 if (!chunk->iovs) {
898 goto err;
899 }
900 }
901
902 chunk_len = raid_bdev->strip_size * raid_bdev->bdev.blocklen;
903
904 if (type == STRIPE_REQ_WRITE) {
905 stripe_req->write.parity_buf = spdk_dma_malloc(chunk_len, r5f_info->buf_alignment, NULL);
906 if (!stripe_req->write.parity_buf) {
907 goto err;
908 }
909
910 if (raid_io_md_size != 0) {
911 stripe_req->write.parity_md_buf = spdk_dma_malloc(raid_bdev->strip_size * raid_io_md_size,
912 r5f_info->buf_alignment, NULL);
913 if (!stripe_req->write.parity_md_buf) {
914 goto err;
915 }
916 }
917 } else if (type == STRIPE_REQ_RECONSTRUCT) {
918 uint8_t n = raid5f_stripe_data_chunks_num(raid_bdev);
919 void *buf;
920 uint8_t i;
921
922 stripe_req->reconstruct.chunk_buffers = calloc(n, sizeof(void *));
923 if (!stripe_req->reconstruct.chunk_buffers) {
924 goto err;
925 }
926
927 for (i = 0; i < n; i++) {
928 buf = spdk_dma_malloc(chunk_len, r5f_info->buf_alignment, NULL);
929 if (!buf) {
930 goto err;
931 }
932 stripe_req->reconstruct.chunk_buffers[i] = buf;
933 }
934
935 if (raid_io_md_size != 0) {
936 stripe_req->reconstruct.chunk_md_buffers = calloc(n, sizeof(void *));
937 if (!stripe_req->reconstruct.chunk_md_buffers) {
938 goto err;
939 }
940
941 for (i = 0; i < n; i++) {
942 buf = spdk_dma_malloc(raid_bdev->strip_size * raid_io_md_size, r5f_info->buf_alignment, NULL);
943 if (!buf) {
944 goto err;
945 }
946 stripe_req->reconstruct.chunk_md_buffers[i] = buf;
947 }
948 }
949 } else {
950 assert(false);
951 return NULL;
952 }
953
954 stripe_req->chunk_iov_iters = malloc(SPDK_IOVITER_SIZE(raid_bdev->num_base_bdevs));
955 if (!stripe_req->chunk_iov_iters) {
956 goto err;
957 }
958
959 stripe_req->chunk_xor_buffers = calloc(raid5f_stripe_data_chunks_num(raid_bdev),
960 sizeof(stripe_req->chunk_xor_buffers[0]));
961 if (!stripe_req->chunk_xor_buffers) {
962 goto err;
963 }
964
965 stripe_req->chunk_xor_md_buffers = calloc(raid5f_stripe_data_chunks_num(raid_bdev),
966 sizeof(stripe_req->chunk_xor_md_buffers[0]));
967 if (!stripe_req->chunk_xor_md_buffers) {
968 goto err;
969 }
970
971 return stripe_req;
972 err:
973 raid5f_stripe_request_free(stripe_req);
974 return NULL;
975 }
976
977 static void
raid5f_ioch_destroy(void * io_device,void * ctx_buf)978 raid5f_ioch_destroy(void *io_device, void *ctx_buf)
979 {
980 struct raid5f_io_channel *r5ch = ctx_buf;
981 struct stripe_request *stripe_req;
982
983 assert(TAILQ_EMPTY(&r5ch->xor_retry_queue));
984
985 while ((stripe_req = TAILQ_FIRST(&r5ch->free_stripe_requests.write))) {
986 TAILQ_REMOVE(&r5ch->free_stripe_requests.write, stripe_req, link);
987 raid5f_stripe_request_free(stripe_req);
988 }
989
990 while ((stripe_req = TAILQ_FIRST(&r5ch->free_stripe_requests.reconstruct))) {
991 TAILQ_REMOVE(&r5ch->free_stripe_requests.reconstruct, stripe_req, link);
992 raid5f_stripe_request_free(stripe_req);
993 }
994
995 if (r5ch->accel_ch) {
996 spdk_put_io_channel(r5ch->accel_ch);
997 }
998
999 free(r5ch->chunk_xor_buffers);
1000 free(r5ch->chunk_xor_iovs);
1001 free(r5ch->chunk_xor_iovcnt);
1002 }
1003
1004 static int
raid5f_ioch_create(void * io_device,void * ctx_buf)1005 raid5f_ioch_create(void *io_device, void *ctx_buf)
1006 {
1007 struct raid5f_io_channel *r5ch = ctx_buf;
1008 struct raid5f_info *r5f_info = io_device;
1009 struct raid_bdev *raid_bdev = r5f_info->raid_bdev;
1010 struct stripe_request *stripe_req;
1011 int i;
1012
1013 TAILQ_INIT(&r5ch->free_stripe_requests.write);
1014 TAILQ_INIT(&r5ch->free_stripe_requests.reconstruct);
1015 TAILQ_INIT(&r5ch->xor_retry_queue);
1016
1017 for (i = 0; i < RAID5F_MAX_STRIPES; i++) {
1018 stripe_req = raid5f_stripe_request_alloc(r5ch, STRIPE_REQ_WRITE);
1019 if (!stripe_req) {
1020 goto err;
1021 }
1022
1023 TAILQ_INSERT_HEAD(&r5ch->free_stripe_requests.write, stripe_req, link);
1024 }
1025
1026 for (i = 0; i < RAID5F_MAX_STRIPES; i++) {
1027 stripe_req = raid5f_stripe_request_alloc(r5ch, STRIPE_REQ_RECONSTRUCT);
1028 if (!stripe_req) {
1029 goto err;
1030 }
1031
1032 TAILQ_INSERT_HEAD(&r5ch->free_stripe_requests.reconstruct, stripe_req, link);
1033 }
1034
1035 r5ch->accel_ch = spdk_accel_get_io_channel();
1036 if (!r5ch->accel_ch) {
1037 SPDK_ERRLOG("Failed to get accel framework's IO channel\n");
1038 goto err;
1039 }
1040
1041 r5ch->chunk_xor_buffers = calloc(raid_bdev->num_base_bdevs, sizeof(*r5ch->chunk_xor_buffers));
1042 if (!r5ch->chunk_xor_buffers) {
1043 goto err;
1044 }
1045
1046 r5ch->chunk_xor_iovs = calloc(raid_bdev->num_base_bdevs, sizeof(*r5ch->chunk_xor_iovs));
1047 if (!r5ch->chunk_xor_iovs) {
1048 goto err;
1049 }
1050
1051 r5ch->chunk_xor_iovcnt = calloc(raid_bdev->num_base_bdevs, sizeof(*r5ch->chunk_xor_iovcnt));
1052 if (!r5ch->chunk_xor_iovcnt) {
1053 goto err;
1054 }
1055
1056 return 0;
1057 err:
1058 SPDK_ERRLOG("Failed to initialize io channel\n");
1059 raid5f_ioch_destroy(r5f_info, r5ch);
1060 return -ENOMEM;
1061 }
1062
1063 static int
raid5f_start(struct raid_bdev * raid_bdev)1064 raid5f_start(struct raid_bdev *raid_bdev)
1065 {
1066 uint64_t min_blockcnt = UINT64_MAX;
1067 uint64_t base_bdev_data_size;
1068 struct raid_base_bdev_info *base_info;
1069 struct spdk_bdev *base_bdev;
1070 struct raid5f_info *r5f_info;
1071 size_t alignment = 0;
1072
1073 r5f_info = calloc(1, sizeof(*r5f_info));
1074 if (!r5f_info) {
1075 SPDK_ERRLOG("Failed to allocate r5f_info\n");
1076 return -ENOMEM;
1077 }
1078 r5f_info->raid_bdev = raid_bdev;
1079
1080 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
1081 min_blockcnt = spdk_min(min_blockcnt, base_info->data_size);
1082 if (base_info->desc) {
1083 base_bdev = spdk_bdev_desc_get_bdev(base_info->desc);
1084 alignment = spdk_max(alignment, spdk_bdev_get_buf_align(base_bdev));
1085 }
1086 }
1087
1088 base_bdev_data_size = (min_blockcnt / raid_bdev->strip_size) * raid_bdev->strip_size;
1089
1090 RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
1091 base_info->data_size = base_bdev_data_size;
1092 }
1093
1094 r5f_info->total_stripes = min_blockcnt / raid_bdev->strip_size;
1095 r5f_info->stripe_blocks = raid_bdev->strip_size * raid5f_stripe_data_chunks_num(raid_bdev);
1096 r5f_info->buf_alignment = alignment;
1097 if (!raid_bdev->bdev.md_interleave) {
1098 r5f_info->blocklen_shift = spdk_u32log2(raid_bdev->bdev.blocklen);
1099 }
1100
1101 raid_bdev->bdev.blockcnt = r5f_info->stripe_blocks * r5f_info->total_stripes;
1102 raid_bdev->bdev.optimal_io_boundary = raid_bdev->strip_size;
1103 raid_bdev->bdev.split_on_optimal_io_boundary = true;
1104 raid_bdev->bdev.write_unit_size = r5f_info->stripe_blocks;
1105 raid_bdev->bdev.split_on_write_unit = true;
1106
1107 raid_bdev->module_private = r5f_info;
1108
1109 spdk_io_device_register(r5f_info, raid5f_ioch_create, raid5f_ioch_destroy,
1110 sizeof(struct raid5f_io_channel), NULL);
1111
1112 return 0;
1113 }
1114
1115 static void
raid5f_io_device_unregister_done(void * io_device)1116 raid5f_io_device_unregister_done(void *io_device)
1117 {
1118 struct raid5f_info *r5f_info = io_device;
1119
1120 raid_bdev_module_stop_done(r5f_info->raid_bdev);
1121
1122 free(r5f_info);
1123 }
1124
1125 static bool
raid5f_stop(struct raid_bdev * raid_bdev)1126 raid5f_stop(struct raid_bdev *raid_bdev)
1127 {
1128 struct raid5f_info *r5f_info = raid_bdev->module_private;
1129
1130 spdk_io_device_unregister(r5f_info, raid5f_io_device_unregister_done);
1131
1132 return false;
1133 }
1134
1135 static struct spdk_io_channel *
raid5f_get_io_channel(struct raid_bdev * raid_bdev)1136 raid5f_get_io_channel(struct raid_bdev *raid_bdev)
1137 {
1138 struct raid5f_info *r5f_info = raid_bdev->module_private;
1139
1140 return spdk_get_io_channel(r5f_info);
1141 }
1142
1143 static void
raid5f_process_write_completed(struct spdk_bdev_io * bdev_io,bool success,void * cb_arg)1144 raid5f_process_write_completed(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
1145 {
1146 struct raid_bdev_process_request *process_req = cb_arg;
1147
1148 spdk_bdev_free_io(bdev_io);
1149
1150 raid_bdev_process_request_complete(process_req, success ? 0 : -EIO);
1151 }
1152
1153 static void raid5f_process_submit_write(struct raid_bdev_process_request *process_req);
1154
1155 static void
_raid5f_process_submit_write(void * ctx)1156 _raid5f_process_submit_write(void *ctx)
1157 {
1158 struct raid_bdev_process_request *process_req = ctx;
1159
1160 raid5f_process_submit_write(process_req);
1161 }
1162
1163 static void
raid5f_process_submit_write(struct raid_bdev_process_request * process_req)1164 raid5f_process_submit_write(struct raid_bdev_process_request *process_req)
1165 {
1166 struct raid_bdev_io *raid_io = &process_req->raid_io;
1167 struct raid_bdev *raid_bdev = raid_io->raid_bdev;
1168 struct raid5f_info *r5f_info = raid_bdev->module_private;
1169 uint64_t stripe_index = process_req->offset_blocks / r5f_info->stripe_blocks;
1170 struct spdk_bdev_ext_io_opts io_opts;
1171 int ret;
1172
1173 raid5f_init_ext_io_opts(&io_opts, raid_io);
1174 ret = raid_bdev_writev_blocks_ext(process_req->target, process_req->target_ch,
1175 raid_io->iovs, raid_io->iovcnt,
1176 stripe_index << raid_bdev->strip_size_shift, raid_bdev->strip_size,
1177 raid5f_process_write_completed, process_req, &io_opts);
1178 if (spdk_unlikely(ret != 0)) {
1179 if (ret == -ENOMEM) {
1180 raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(process_req->target->desc),
1181 process_req->target_ch, _raid5f_process_submit_write);
1182 } else {
1183 raid_bdev_process_request_complete(process_req, ret);
1184 }
1185 }
1186 }
1187
1188 static void
raid5f_process_stripe_request_reconstruct_xor_done(struct stripe_request * stripe_req,int status)1189 raid5f_process_stripe_request_reconstruct_xor_done(struct stripe_request *stripe_req, int status)
1190 {
1191 struct raid_bdev_io *raid_io = stripe_req->raid_io;
1192 struct raid_bdev_process_request *process_req = SPDK_CONTAINEROF(raid_io,
1193 struct raid_bdev_process_request, raid_io);
1194
1195 raid5f_stripe_request_release(stripe_req);
1196
1197 if (status != 0) {
1198 raid_bdev_process_request_complete(process_req, status);
1199 return;
1200 }
1201
1202 raid5f_process_submit_write(process_req);
1203 }
1204
1205 static int
raid5f_submit_process_request(struct raid_bdev_process_request * process_req,struct raid_bdev_io_channel * raid_ch)1206 raid5f_submit_process_request(struct raid_bdev_process_request *process_req,
1207 struct raid_bdev_io_channel *raid_ch)
1208 {
1209 struct spdk_io_channel *ch = spdk_io_channel_from_ctx(raid_ch);
1210 struct raid_bdev *raid_bdev = spdk_io_channel_get_io_device(ch);
1211 struct raid5f_info *r5f_info = raid_bdev->module_private;
1212 struct raid_bdev_io *raid_io = &process_req->raid_io;
1213 uint8_t chunk_idx = raid_bdev_base_bdev_slot(process_req->target);
1214 uint64_t stripe_index = process_req->offset_blocks / r5f_info->stripe_blocks;
1215 int ret;
1216
1217 assert((process_req->offset_blocks % r5f_info->stripe_blocks) == 0);
1218
1219 if (process_req->num_blocks < r5f_info->stripe_blocks) {
1220 return 0;
1221 }
1222
1223 raid_bdev_io_init(raid_io, raid_ch, SPDK_BDEV_IO_TYPE_READ,
1224 process_req->offset_blocks, raid_bdev->strip_size,
1225 &process_req->iov, 1, process_req->md_buf, NULL, NULL);
1226
1227 ret = raid5f_submit_reconstruct_read(raid_io, stripe_index, chunk_idx, 0,
1228 raid5f_process_stripe_request_reconstruct_xor_done);
1229 if (spdk_likely(ret == 0)) {
1230 return r5f_info->stripe_blocks;
1231 } else if (ret < 0) {
1232 return ret;
1233 } else {
1234 return -EINVAL;
1235 }
1236 }
1237
1238 static struct raid_bdev_module g_raid5f_module = {
1239 .level = RAID5F,
1240 .base_bdevs_min = 3,
1241 .base_bdevs_constraint = {CONSTRAINT_MAX_BASE_BDEVS_REMOVED, 1},
1242 .start = raid5f_start,
1243 .stop = raid5f_stop,
1244 .submit_rw_request = raid5f_submit_rw_request,
1245 .get_io_channel = raid5f_get_io_channel,
1246 .submit_process_request = raid5f_submit_process_request,
1247 };
1248 RAID_MODULE_REGISTER(&g_raid5f_module)
1249
1250 SPDK_LOG_REGISTER_COMPONENT(bdev_raid5f)
1251