xref: /spdk/module/bdev/raid/raid5f.c (revision 14e26b9d0410a98689caffcba7bfacac8d85c74d)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (C) 2022 Intel Corporation.
3  *   All rights reserved.
4  */
5 
6 #include "bdev_raid.h"
7 
8 #include "spdk/env.h"
9 #include "spdk/thread.h"
10 #include "spdk/string.h"
11 #include "spdk/util.h"
12 #include "spdk/likely.h"
13 #include "spdk/log.h"
14 #include "spdk/accel.h"
15 
16 /* Maximum concurrent full stripe writes per io channel */
17 #define RAID5F_MAX_STRIPES 32
18 
19 struct chunk {
20 	/* Corresponds to base_bdev index */
21 	uint8_t index;
22 
23 	/* Array of iovecs */
24 	struct iovec *iovs;
25 
26 	/* Number of used iovecs */
27 	int iovcnt;
28 
29 	/* Total number of available iovecs in the array */
30 	int iovcnt_max;
31 
32 	/* Pointer to buffer with I/O metadata */
33 	void *md_buf;
34 };
35 
36 struct stripe_request;
37 typedef void (*stripe_req_xor_cb)(struct stripe_request *stripe_req, int status);
38 
39 struct stripe_request {
40 	enum stripe_request_type {
41 		STRIPE_REQ_WRITE,
42 		STRIPE_REQ_RECONSTRUCT,
43 	} type;
44 
45 	struct raid5f_io_channel *r5ch;
46 
47 	/* The associated raid_bdev_io */
48 	struct raid_bdev_io *raid_io;
49 
50 	/* The stripe's index in the raid array. */
51 	uint64_t stripe_index;
52 
53 	/* The stripe's parity chunk */
54 	struct chunk *parity_chunk;
55 
56 	union {
57 		struct {
58 			/* Buffer for stripe parity */
59 			void *parity_buf;
60 
61 			/* Buffer for stripe io metadata parity */
62 			void *parity_md_buf;
63 		} write;
64 
65 		struct {
66 			/* Array of buffers for reading chunk data */
67 			void **chunk_buffers;
68 
69 			/* Array of buffers for reading chunk metadata */
70 			void **chunk_md_buffers;
71 
72 			/* Chunk to reconstruct from parity */
73 			struct chunk *chunk;
74 
75 			/* Offset from chunk start */
76 			uint64_t chunk_offset;
77 		} reconstruct;
78 	};
79 
80 	/* Array of iovec iterators for each chunk */
81 	struct spdk_ioviter *chunk_iov_iters;
82 
83 	/* Array of source buffer pointers for parity calculation */
84 	void **chunk_xor_buffers;
85 
86 	/* Array of source buffer pointers for parity calculation of io metadata */
87 	void **chunk_xor_md_buffers;
88 
89 	struct {
90 		size_t len;
91 		size_t remaining;
92 		size_t remaining_md;
93 		int status;
94 		stripe_req_xor_cb cb;
95 	} xor;
96 
97 	TAILQ_ENTRY(stripe_request) link;
98 
99 	/* Array of chunks corresponding to base_bdevs */
100 	struct chunk chunks[0];
101 };
102 
103 struct raid5f_info {
104 	/* The parent raid bdev */
105 	struct raid_bdev *raid_bdev;
106 
107 	/* Number of data blocks in a stripe (without parity) */
108 	uint64_t stripe_blocks;
109 
110 	/* Number of stripes on this array */
111 	uint64_t total_stripes;
112 
113 	/* Alignment for buffer allocation */
114 	size_t buf_alignment;
115 };
116 
117 struct raid5f_io_channel {
118 	/* All available stripe requests on this channel */
119 	struct {
120 		TAILQ_HEAD(, stripe_request) write;
121 		TAILQ_HEAD(, stripe_request) reconstruct;
122 	} free_stripe_requests;
123 
124 	/* accel_fw channel */
125 	struct spdk_io_channel *accel_ch;
126 
127 	/* For retrying xor if accel_ch runs out of resources */
128 	TAILQ_HEAD(, stripe_request) xor_retry_queue;
129 
130 	/* For iterating over chunk iovecs during xor calculation */
131 	void **chunk_xor_buffers;
132 	struct iovec **chunk_xor_iovs;
133 	size_t *chunk_xor_iovcnt;
134 };
135 
136 #define __CHUNK_IN_RANGE(req, c) \
137 	c < req->chunks + raid5f_ch_to_r5f_info(req->r5ch)->raid_bdev->num_base_bdevs
138 
139 #define FOR_EACH_CHUNK_FROM(req, c, from) \
140 	for (c = from; __CHUNK_IN_RANGE(req, c); c++)
141 
142 #define FOR_EACH_CHUNK(req, c) \
143 	FOR_EACH_CHUNK_FROM(req, c, req->chunks)
144 
145 #define __NEXT_DATA_CHUNK(req, c) \
146 	c == req->parity_chunk ? c+1 : c
147 
148 #define FOR_EACH_DATA_CHUNK(req, c) \
149 	for (c = __NEXT_DATA_CHUNK(req, req->chunks); __CHUNK_IN_RANGE(req, c); \
150 	     c = __NEXT_DATA_CHUNK(req, c+1))
151 
152 static inline struct raid5f_info *
153 raid5f_ch_to_r5f_info(struct raid5f_io_channel *r5ch)
154 {
155 	return spdk_io_channel_get_io_device(spdk_io_channel_from_ctx(r5ch));
156 }
157 
158 static inline struct stripe_request *
159 raid5f_chunk_stripe_req(struct chunk *chunk)
160 {
161 	return SPDK_CONTAINEROF((chunk - chunk->index), struct stripe_request, chunks);
162 }
163 
164 static inline uint8_t
165 raid5f_stripe_data_chunks_num(const struct raid_bdev *raid_bdev)
166 {
167 	return raid_bdev->min_base_bdevs_operational;
168 }
169 
170 static inline uint8_t
171 raid5f_stripe_parity_chunk_index(const struct raid_bdev *raid_bdev, uint64_t stripe_index)
172 {
173 	return raid5f_stripe_data_chunks_num(raid_bdev) - stripe_index % raid_bdev->num_base_bdevs;
174 }
175 
176 static inline void
177 raid5f_stripe_request_release(struct stripe_request *stripe_req)
178 {
179 	if (spdk_likely(stripe_req->type == STRIPE_REQ_WRITE)) {
180 		TAILQ_INSERT_HEAD(&stripe_req->r5ch->free_stripe_requests.write, stripe_req, link);
181 	} else if (stripe_req->type == STRIPE_REQ_RECONSTRUCT) {
182 		TAILQ_INSERT_HEAD(&stripe_req->r5ch->free_stripe_requests.reconstruct, stripe_req, link);
183 	} else {
184 		assert(false);
185 	}
186 }
187 
188 static void raid5f_xor_stripe_retry(struct stripe_request *stripe_req);
189 
190 static void
191 raid5f_xor_stripe_done(struct stripe_request *stripe_req)
192 {
193 	struct raid5f_io_channel *r5ch = stripe_req->r5ch;
194 
195 	if (stripe_req->xor.status != 0) {
196 		SPDK_ERRLOG("stripe xor failed: %s\n", spdk_strerror(-stripe_req->xor.status));
197 	}
198 
199 	stripe_req->xor.cb(stripe_req, stripe_req->xor.status);
200 
201 	if (!TAILQ_EMPTY(&r5ch->xor_retry_queue)) {
202 		stripe_req = TAILQ_FIRST(&r5ch->xor_retry_queue);
203 		TAILQ_REMOVE(&r5ch->xor_retry_queue, stripe_req, link);
204 		raid5f_xor_stripe_retry(stripe_req);
205 	}
206 }
207 
208 static void raid5f_xor_stripe_continue(struct stripe_request *stripe_req);
209 
210 static void
211 _raid5f_xor_stripe_cb(struct stripe_request *stripe_req, int status)
212 {
213 	if (status != 0) {
214 		stripe_req->xor.status = status;
215 	}
216 
217 	if (stripe_req->xor.remaining + stripe_req->xor.remaining_md == 0) {
218 		raid5f_xor_stripe_done(stripe_req);
219 	}
220 }
221 
222 static void
223 raid5f_xor_stripe_cb(void *_stripe_req, int status)
224 {
225 	struct stripe_request *stripe_req = _stripe_req;
226 
227 	stripe_req->xor.remaining -= stripe_req->xor.len;
228 
229 	if (stripe_req->xor.remaining > 0) {
230 		stripe_req->xor.len = spdk_ioviter_nextv(stripe_req->chunk_iov_iters,
231 				      stripe_req->r5ch->chunk_xor_buffers);
232 		raid5f_xor_stripe_continue(stripe_req);
233 	}
234 
235 	_raid5f_xor_stripe_cb(stripe_req, status);
236 }
237 
238 static void
239 raid5f_xor_stripe_md_cb(void *_stripe_req, int status)
240 {
241 	struct stripe_request *stripe_req = _stripe_req;
242 
243 	stripe_req->xor.remaining_md = 0;
244 
245 	_raid5f_xor_stripe_cb(stripe_req, status);
246 }
247 
248 static void
249 raid5f_xor_stripe_continue(struct stripe_request *stripe_req)
250 {
251 	struct raid5f_io_channel *r5ch = stripe_req->r5ch;
252 	struct raid_bdev_io *raid_io = stripe_req->raid_io;
253 	struct raid_bdev *raid_bdev = raid_io->raid_bdev;
254 	uint8_t n_src = raid5f_stripe_data_chunks_num(raid_bdev);
255 	uint8_t i;
256 	int ret;
257 
258 	assert(stripe_req->xor.len > 0);
259 
260 	for (i = 0; i < n_src; i++) {
261 		stripe_req->chunk_xor_buffers[i] = r5ch->chunk_xor_buffers[i];
262 	}
263 
264 	ret = spdk_accel_submit_xor(r5ch->accel_ch, r5ch->chunk_xor_buffers[n_src],
265 				    stripe_req->chunk_xor_buffers, n_src, stripe_req->xor.len,
266 				    raid5f_xor_stripe_cb, stripe_req);
267 	if (spdk_unlikely(ret)) {
268 		if (ret == -ENOMEM) {
269 			TAILQ_INSERT_HEAD(&r5ch->xor_retry_queue, stripe_req, link);
270 		} else {
271 			stripe_req->xor.status = ret;
272 			raid5f_xor_stripe_done(stripe_req);
273 		}
274 	}
275 }
276 
277 static void
278 raid5f_xor_stripe(struct stripe_request *stripe_req, stripe_req_xor_cb cb)
279 {
280 	struct raid5f_io_channel *r5ch = stripe_req->r5ch;
281 	struct raid_bdev_io *raid_io = stripe_req->raid_io;
282 	struct raid_bdev *raid_bdev = raid_io->raid_bdev;
283 	struct chunk *chunk;
284 	struct chunk *dest_chunk = NULL;
285 	uint64_t num_blocks = 0;
286 	uint8_t c;
287 
288 	assert(cb != NULL);
289 
290 	if (spdk_likely(stripe_req->type == STRIPE_REQ_WRITE)) {
291 		num_blocks = raid_bdev->strip_size;
292 		dest_chunk = stripe_req->parity_chunk;
293 	} else if (stripe_req->type == STRIPE_REQ_RECONSTRUCT) {
294 		num_blocks = raid_io->num_blocks;
295 		dest_chunk = stripe_req->reconstruct.chunk;
296 	} else {
297 		assert(false);
298 	}
299 
300 	c = 0;
301 	FOR_EACH_CHUNK(stripe_req, chunk) {
302 		if (chunk == dest_chunk) {
303 			continue;
304 		}
305 		r5ch->chunk_xor_iovs[c] = chunk->iovs;
306 		r5ch->chunk_xor_iovcnt[c] = chunk->iovcnt;
307 		c++;
308 	}
309 	r5ch->chunk_xor_iovs[c] = dest_chunk->iovs;
310 	r5ch->chunk_xor_iovcnt[c] = dest_chunk->iovcnt;
311 
312 	stripe_req->xor.len = spdk_ioviter_firstv(stripe_req->chunk_iov_iters,
313 			      raid_bdev->num_base_bdevs,
314 			      r5ch->chunk_xor_iovs,
315 			      r5ch->chunk_xor_iovcnt,
316 			      r5ch->chunk_xor_buffers);
317 	stripe_req->xor.remaining = num_blocks << raid_bdev->blocklen_shift;
318 	stripe_req->xor.status = 0;
319 	stripe_req->xor.cb = cb;
320 
321 	if (raid_io->md_buf != NULL) {
322 		uint8_t n_src = raid5f_stripe_data_chunks_num(raid_bdev);
323 		uint64_t len = num_blocks * spdk_bdev_get_md_size(&raid_bdev->bdev);
324 		int ret;
325 
326 		stripe_req->xor.remaining_md = len;
327 
328 		c = 0;
329 		FOR_EACH_CHUNK(stripe_req, chunk) {
330 			if (chunk != dest_chunk) {
331 				stripe_req->chunk_xor_md_buffers[c] = chunk->md_buf;
332 				c++;
333 			}
334 		}
335 
336 		ret = spdk_accel_submit_xor(stripe_req->r5ch->accel_ch, dest_chunk->md_buf,
337 					    stripe_req->chunk_xor_md_buffers, n_src, len,
338 					    raid5f_xor_stripe_md_cb, stripe_req);
339 		if (spdk_unlikely(ret)) {
340 			if (ret == -ENOMEM) {
341 				TAILQ_INSERT_HEAD(&stripe_req->r5ch->xor_retry_queue, stripe_req, link);
342 			} else {
343 				stripe_req->xor.status = ret;
344 				raid5f_xor_stripe_done(stripe_req);
345 			}
346 			return;
347 		}
348 	}
349 
350 	raid5f_xor_stripe_continue(stripe_req);
351 }
352 
353 static void
354 raid5f_xor_stripe_retry(struct stripe_request *stripe_req)
355 {
356 	if (stripe_req->xor.remaining_md) {
357 		raid5f_xor_stripe(stripe_req, stripe_req->xor.cb);
358 	} else {
359 		raid5f_xor_stripe_continue(stripe_req);
360 	}
361 }
362 
363 static void
364 raid5f_stripe_request_chunk_write_complete(struct stripe_request *stripe_req,
365 		enum spdk_bdev_io_status status)
366 {
367 	if (raid_bdev_io_complete_part(stripe_req->raid_io, 1, status)) {
368 		raid5f_stripe_request_release(stripe_req);
369 	}
370 }
371 
372 static void
373 raid5f_stripe_request_chunk_read_complete(struct stripe_request *stripe_req,
374 		enum spdk_bdev_io_status status)
375 {
376 	struct raid_bdev_io *raid_io = stripe_req->raid_io;
377 
378 	raid_bdev_io_complete_part(raid_io, 1, status);
379 }
380 
381 static void
382 raid5f_chunk_complete_bdev_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
383 {
384 	struct chunk *chunk = cb_arg;
385 	struct stripe_request *stripe_req = raid5f_chunk_stripe_req(chunk);
386 	enum spdk_bdev_io_status status = success ? SPDK_BDEV_IO_STATUS_SUCCESS :
387 					  SPDK_BDEV_IO_STATUS_FAILED;
388 
389 	spdk_bdev_free_io(bdev_io);
390 
391 	if (spdk_likely(stripe_req->type == STRIPE_REQ_WRITE)) {
392 		raid5f_stripe_request_chunk_write_complete(stripe_req, status);
393 	} else if (stripe_req->type == STRIPE_REQ_RECONSTRUCT) {
394 		raid5f_stripe_request_chunk_read_complete(stripe_req, status);
395 	} else {
396 		assert(false);
397 	}
398 }
399 
400 static void raid5f_stripe_request_submit_chunks(struct stripe_request *stripe_req);
401 
402 static void
403 raid5f_chunk_submit_retry(void *_raid_io)
404 {
405 	struct raid_bdev_io *raid_io = _raid_io;
406 	struct stripe_request *stripe_req = raid_io->module_private;
407 
408 	raid5f_stripe_request_submit_chunks(stripe_req);
409 }
410 
411 static inline void
412 raid5f_init_ext_io_opts(struct spdk_bdev_ext_io_opts *opts, struct raid_bdev_io *raid_io)
413 {
414 	memset(opts, 0, sizeof(*opts));
415 	opts->size = sizeof(*opts);
416 	opts->memory_domain = raid_io->memory_domain;
417 	opts->memory_domain_ctx = raid_io->memory_domain_ctx;
418 	opts->metadata = raid_io->md_buf;
419 }
420 
421 static int
422 raid5f_chunk_submit(struct chunk *chunk)
423 {
424 	struct stripe_request *stripe_req = raid5f_chunk_stripe_req(chunk);
425 	struct raid_bdev_io *raid_io = stripe_req->raid_io;
426 	struct raid_bdev *raid_bdev = raid_io->raid_bdev;
427 	struct raid_base_bdev_info *base_info = &raid_bdev->base_bdev_info[chunk->index];
428 	struct spdk_io_channel *base_ch = raid_bdev_channel_get_base_channel(raid_io->raid_ch,
429 					  chunk->index);
430 	uint64_t base_offset_blocks = (stripe_req->stripe_index << raid_bdev->strip_size_shift);
431 	struct spdk_bdev_ext_io_opts io_opts;
432 	int ret;
433 
434 	raid5f_init_ext_io_opts(&io_opts, raid_io);
435 	io_opts.metadata = chunk->md_buf;
436 
437 	raid_io->base_bdev_io_submitted++;
438 
439 	switch (stripe_req->type) {
440 	case STRIPE_REQ_WRITE:
441 		if (base_ch == NULL) {
442 			raid_bdev_io_complete_part(raid_io, 1, SPDK_BDEV_IO_STATUS_SUCCESS);
443 			return 0;
444 		}
445 
446 		ret = raid_bdev_writev_blocks_ext(base_info, base_ch, chunk->iovs, chunk->iovcnt,
447 						  base_offset_blocks, raid_bdev->strip_size,
448 						  raid5f_chunk_complete_bdev_io, chunk, &io_opts);
449 		break;
450 	case STRIPE_REQ_RECONSTRUCT:
451 		if (chunk == stripe_req->reconstruct.chunk) {
452 			raid_bdev_io_complete_part(raid_io, 1, SPDK_BDEV_IO_STATUS_SUCCESS);
453 			return 0;
454 		}
455 
456 		base_offset_blocks += stripe_req->reconstruct.chunk_offset;
457 
458 		ret = raid_bdev_readv_blocks_ext(base_info, base_ch, chunk->iovs, chunk->iovcnt,
459 						 base_offset_blocks, raid_io->num_blocks,
460 						 raid5f_chunk_complete_bdev_io, chunk, &io_opts);
461 		break;
462 	default:
463 		assert(false);
464 		ret = -EINVAL;
465 		break;
466 	}
467 
468 	if (spdk_unlikely(ret)) {
469 		raid_io->base_bdev_io_submitted--;
470 		if (ret == -ENOMEM) {
471 			raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc),
472 						base_ch, raid5f_chunk_submit_retry);
473 		} else {
474 			/*
475 			 * Implicitly complete any I/Os not yet submitted as FAILED. If completing
476 			 * these means there are no more to complete for the stripe request, we can
477 			 * release the stripe request as well.
478 			 */
479 			uint64_t base_bdev_io_not_submitted;
480 
481 			if (stripe_req->type == STRIPE_REQ_WRITE) {
482 				base_bdev_io_not_submitted = raid_bdev->num_base_bdevs -
483 							     raid_io->base_bdev_io_submitted;
484 			} else {
485 				base_bdev_io_not_submitted = raid5f_stripe_data_chunks_num(raid_bdev) -
486 							     raid_io->base_bdev_io_submitted;
487 			}
488 
489 			if (raid_bdev_io_complete_part(raid_io, base_bdev_io_not_submitted,
490 						       SPDK_BDEV_IO_STATUS_FAILED)) {
491 				raid5f_stripe_request_release(stripe_req);
492 			}
493 		}
494 	}
495 
496 	return ret;
497 }
498 
499 static int
500 raid5f_chunk_set_iovcnt(struct chunk *chunk, int iovcnt)
501 {
502 	if (iovcnt > chunk->iovcnt_max) {
503 		struct iovec *iovs = chunk->iovs;
504 
505 		iovs = realloc(iovs, iovcnt * sizeof(*iovs));
506 		if (!iovs) {
507 			return -ENOMEM;
508 		}
509 		chunk->iovs = iovs;
510 		chunk->iovcnt_max = iovcnt;
511 	}
512 	chunk->iovcnt = iovcnt;
513 
514 	return 0;
515 }
516 
517 static int
518 raid5f_stripe_request_map_iovecs(struct stripe_request *stripe_req)
519 {
520 	struct raid_bdev_io *raid_io = stripe_req->raid_io;
521 	struct raid_bdev *raid_bdev = raid_io->raid_bdev;
522 	void *raid_io_md = raid_io->md_buf;
523 	uint32_t raid_io_md_size = spdk_bdev_get_md_size(&raid_bdev->bdev);
524 	struct chunk *chunk;
525 	int raid_io_iov_idx = 0;
526 	size_t raid_io_offset = 0;
527 	size_t raid_io_iov_offset = 0;
528 	int i;
529 
530 	FOR_EACH_DATA_CHUNK(stripe_req, chunk) {
531 		int chunk_iovcnt = 0;
532 		uint64_t len = raid_bdev->strip_size << raid_bdev->blocklen_shift;
533 		size_t off = raid_io_iov_offset;
534 		int ret;
535 
536 		for (i = raid_io_iov_idx; i < raid_io->iovcnt; i++) {
537 			chunk_iovcnt++;
538 			off += raid_io->iovs[i].iov_len;
539 			if (off >= raid_io_offset + len) {
540 				break;
541 			}
542 		}
543 
544 		assert(raid_io_iov_idx + chunk_iovcnt <= raid_io->iovcnt);
545 
546 		ret = raid5f_chunk_set_iovcnt(chunk, chunk_iovcnt);
547 		if (ret) {
548 			return ret;
549 		}
550 
551 		if (raid_io_md) {
552 			chunk->md_buf = raid_io_md +
553 					(raid_io_offset >> raid_bdev->blocklen_shift) * raid_io_md_size;
554 		}
555 
556 		for (i = 0; i < chunk_iovcnt; i++) {
557 			struct iovec *chunk_iov = &chunk->iovs[i];
558 			const struct iovec *raid_io_iov = &raid_io->iovs[raid_io_iov_idx];
559 			size_t chunk_iov_offset = raid_io_offset - raid_io_iov_offset;
560 
561 			chunk_iov->iov_base = raid_io_iov->iov_base + chunk_iov_offset;
562 			chunk_iov->iov_len = spdk_min(len, raid_io_iov->iov_len - chunk_iov_offset);
563 			raid_io_offset += chunk_iov->iov_len;
564 			len -= chunk_iov->iov_len;
565 
566 			if (raid_io_offset >= raid_io_iov_offset + raid_io_iov->iov_len) {
567 				raid_io_iov_idx++;
568 				raid_io_iov_offset += raid_io_iov->iov_len;
569 			}
570 		}
571 
572 		if (spdk_unlikely(len > 0)) {
573 			return -EINVAL;
574 		}
575 	}
576 
577 	stripe_req->parity_chunk->iovs[0].iov_base = stripe_req->write.parity_buf;
578 	stripe_req->parity_chunk->iovs[0].iov_len = raid_bdev->strip_size << raid_bdev->blocklen_shift;
579 	stripe_req->parity_chunk->iovcnt = 1;
580 	stripe_req->parity_chunk->md_buf = stripe_req->write.parity_md_buf;
581 
582 	return 0;
583 }
584 
585 static void
586 raid5f_stripe_request_submit_chunks(struct stripe_request *stripe_req)
587 {
588 	struct raid_bdev_io *raid_io = stripe_req->raid_io;
589 	struct chunk *start = &stripe_req->chunks[raid_io->base_bdev_io_submitted];
590 	struct chunk *chunk;
591 
592 	FOR_EACH_CHUNK_FROM(stripe_req, chunk, start) {
593 		if (spdk_unlikely(raid5f_chunk_submit(chunk) != 0)) {
594 			break;
595 		}
596 	}
597 }
598 
599 static inline void
600 raid5f_stripe_request_init(struct stripe_request *stripe_req, struct raid_bdev_io *raid_io,
601 			   uint64_t stripe_index)
602 {
603 	stripe_req->raid_io = raid_io;
604 	stripe_req->stripe_index = stripe_index;
605 	stripe_req->parity_chunk = &stripe_req->chunks[raid5f_stripe_parity_chunk_index(raid_io->raid_bdev,
606 				   stripe_index)];
607 }
608 
609 static void
610 raid5f_stripe_write_request_xor_done(struct stripe_request *stripe_req, int status)
611 {
612 	struct raid_bdev_io *raid_io = stripe_req->raid_io;
613 
614 	if (status != 0) {
615 		raid5f_stripe_request_release(stripe_req);
616 		raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
617 	} else {
618 		raid5f_stripe_request_submit_chunks(stripe_req);
619 	}
620 }
621 
622 static int
623 raid5f_submit_write_request(struct raid_bdev_io *raid_io, uint64_t stripe_index)
624 {
625 	struct raid_bdev *raid_bdev = raid_io->raid_bdev;
626 	struct raid5f_io_channel *r5ch = raid_bdev_channel_get_module_ctx(raid_io->raid_ch);
627 	struct stripe_request *stripe_req;
628 	int ret;
629 
630 	stripe_req = TAILQ_FIRST(&r5ch->free_stripe_requests.write);
631 	if (!stripe_req) {
632 		return -ENOMEM;
633 	}
634 
635 	raid5f_stripe_request_init(stripe_req, raid_io, stripe_index);
636 
637 	ret = raid5f_stripe_request_map_iovecs(stripe_req);
638 	if (spdk_unlikely(ret)) {
639 		return ret;
640 	}
641 
642 	TAILQ_REMOVE(&r5ch->free_stripe_requests.write, stripe_req, link);
643 
644 	raid_io->module_private = stripe_req;
645 	raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs;
646 
647 	if (raid_bdev_channel_get_base_channel(raid_io->raid_ch, stripe_req->parity_chunk->index) != NULL) {
648 		raid5f_xor_stripe(stripe_req, raid5f_stripe_write_request_xor_done);
649 	} else {
650 		raid5f_stripe_write_request_xor_done(stripe_req, 0);
651 	}
652 
653 	return 0;
654 }
655 
656 static void
657 raid5f_chunk_read_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
658 {
659 	struct raid_bdev_io *raid_io = cb_arg;
660 
661 	spdk_bdev_free_io(bdev_io);
662 
663 	raid_bdev_io_complete(raid_io, success ? SPDK_BDEV_IO_STATUS_SUCCESS :
664 			      SPDK_BDEV_IO_STATUS_FAILED);
665 }
666 
667 static void raid5f_submit_rw_request(struct raid_bdev_io *raid_io);
668 
669 static void
670 _raid5f_submit_rw_request(void *_raid_io)
671 {
672 	struct raid_bdev_io *raid_io = _raid_io;
673 
674 	raid5f_submit_rw_request(raid_io);
675 }
676 
677 static void
678 raid5f_stripe_request_reconstruct_xor_done(struct stripe_request *stripe_req, int status)
679 {
680 	struct raid_bdev_io *raid_io = stripe_req->raid_io;
681 
682 	raid5f_stripe_request_release(stripe_req);
683 
684 	raid_bdev_io_complete(raid_io,
685 			      status == 0 ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED);
686 }
687 
688 static void
689 raid5f_reconstruct_reads_completed_cb(struct raid_bdev_io *raid_io, enum spdk_bdev_io_status status)
690 {
691 	struct stripe_request *stripe_req = raid_io->module_private;
692 
693 	raid_io->completion_cb = NULL;
694 
695 	if (status != SPDK_BDEV_IO_STATUS_SUCCESS) {
696 		raid5f_stripe_request_release(stripe_req);
697 		raid_bdev_io_complete(raid_io, status);
698 		return;
699 	}
700 
701 	raid5f_xor_stripe(stripe_req, raid5f_stripe_request_reconstruct_xor_done);
702 }
703 
704 static int
705 raid5f_submit_reconstruct_read(struct raid_bdev_io *raid_io, uint64_t stripe_index,
706 			       uint8_t chunk_idx, uint64_t chunk_offset,
707 			       raid_bdev_io_completion_cb completion_cb)
708 {
709 	struct raid_bdev *raid_bdev = raid_io->raid_bdev;
710 	struct raid5f_io_channel *r5ch = raid_bdev_channel_get_module_ctx(raid_io->raid_ch);
711 	void *raid_io_md = raid_io->md_buf;
712 	struct stripe_request *stripe_req;
713 	struct chunk *chunk;
714 	int buf_idx;
715 
716 	stripe_req = TAILQ_FIRST(&r5ch->free_stripe_requests.reconstruct);
717 	if (!stripe_req) {
718 		return -ENOMEM;
719 	}
720 
721 	raid5f_stripe_request_init(stripe_req, raid_io, stripe_index);
722 
723 	stripe_req->reconstruct.chunk = &stripe_req->chunks[chunk_idx];
724 	stripe_req->reconstruct.chunk_offset = chunk_offset;
725 	buf_idx = 0;
726 
727 	FOR_EACH_CHUNK(stripe_req, chunk) {
728 		if (chunk == stripe_req->reconstruct.chunk) {
729 			int i;
730 			int ret;
731 
732 			ret = raid5f_chunk_set_iovcnt(chunk, raid_io->iovcnt);
733 			if (ret) {
734 				return ret;
735 			}
736 
737 			for (i = 0; i < raid_io->iovcnt; i++) {
738 				chunk->iovs[i] = raid_io->iovs[i];
739 			}
740 
741 			chunk->md_buf = raid_io_md;
742 		} else {
743 			struct iovec *iov = &chunk->iovs[0];
744 
745 			iov->iov_base = stripe_req->reconstruct.chunk_buffers[buf_idx];
746 			iov->iov_len = raid_io->num_blocks << raid_bdev->blocklen_shift;
747 			chunk->iovcnt = 1;
748 
749 			if (raid_io_md) {
750 				chunk->md_buf = stripe_req->reconstruct.chunk_md_buffers[buf_idx];
751 			}
752 
753 			buf_idx++;
754 		}
755 	}
756 
757 	raid_io->module_private = stripe_req;
758 	raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs;
759 	raid_io->completion_cb = completion_cb;
760 
761 	TAILQ_REMOVE(&r5ch->free_stripe_requests.reconstruct, stripe_req, link);
762 
763 	raid5f_stripe_request_submit_chunks(stripe_req);
764 
765 	return 0;
766 }
767 
768 static int
769 raid5f_submit_read_request(struct raid_bdev_io *raid_io, uint64_t stripe_index,
770 			   uint64_t stripe_offset)
771 {
772 	struct raid_bdev *raid_bdev = raid_io->raid_bdev;
773 	uint8_t chunk_data_idx = stripe_offset >> raid_bdev->strip_size_shift;
774 	uint8_t p_idx = raid5f_stripe_parity_chunk_index(raid_bdev, stripe_index);
775 	uint8_t chunk_idx = chunk_data_idx < p_idx ? chunk_data_idx : chunk_data_idx + 1;
776 	struct raid_base_bdev_info *base_info = &raid_bdev->base_bdev_info[chunk_idx];
777 	struct spdk_io_channel *base_ch = raid_bdev_channel_get_base_channel(raid_io->raid_ch, chunk_idx);
778 	uint64_t chunk_offset = stripe_offset - (chunk_data_idx << raid_bdev->strip_size_shift);
779 	uint64_t base_offset_blocks = (stripe_index << raid_bdev->strip_size_shift) + chunk_offset;
780 	struct spdk_bdev_ext_io_opts io_opts;
781 	int ret;
782 
783 	raid5f_init_ext_io_opts(&io_opts, raid_io);
784 	if (base_ch == NULL) {
785 		return raid5f_submit_reconstruct_read(raid_io, stripe_index, chunk_idx, chunk_offset,
786 						      raid5f_reconstruct_reads_completed_cb);
787 	}
788 
789 	ret = raid_bdev_readv_blocks_ext(base_info, base_ch, raid_io->iovs, raid_io->iovcnt,
790 					 base_offset_blocks, raid_io->num_blocks,
791 					 raid5f_chunk_read_complete, raid_io, &io_opts);
792 	if (spdk_unlikely(ret == -ENOMEM)) {
793 		raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc),
794 					base_ch, _raid5f_submit_rw_request);
795 		return 0;
796 	}
797 
798 	return ret;
799 }
800 
801 static void
802 raid5f_submit_rw_request(struct raid_bdev_io *raid_io)
803 {
804 	struct raid_bdev *raid_bdev = raid_io->raid_bdev;
805 	struct raid5f_info *r5f_info = raid_bdev->module_private;
806 	uint64_t stripe_index = raid_io->offset_blocks / r5f_info->stripe_blocks;
807 	uint64_t stripe_offset = raid_io->offset_blocks % r5f_info->stripe_blocks;
808 	int ret;
809 
810 	switch (raid_io->type) {
811 	case SPDK_BDEV_IO_TYPE_READ:
812 		assert(raid_io->num_blocks <= raid_bdev->strip_size);
813 		ret = raid5f_submit_read_request(raid_io, stripe_index, stripe_offset);
814 		break;
815 	case SPDK_BDEV_IO_TYPE_WRITE:
816 		assert(stripe_offset == 0);
817 		assert(raid_io->num_blocks == r5f_info->stripe_blocks);
818 		ret = raid5f_submit_write_request(raid_io, stripe_index);
819 		break;
820 	default:
821 		ret = -EINVAL;
822 		break;
823 	}
824 
825 	if (spdk_unlikely(ret)) {
826 		raid_bdev_io_complete(raid_io, ret == -ENOMEM ? SPDK_BDEV_IO_STATUS_NOMEM :
827 				      SPDK_BDEV_IO_STATUS_FAILED);
828 	}
829 }
830 
831 static void
832 raid5f_stripe_request_free(struct stripe_request *stripe_req)
833 {
834 	struct chunk *chunk;
835 
836 	FOR_EACH_CHUNK(stripe_req, chunk) {
837 		free(chunk->iovs);
838 	}
839 
840 	if (stripe_req->type == STRIPE_REQ_WRITE) {
841 		spdk_dma_free(stripe_req->write.parity_buf);
842 		spdk_dma_free(stripe_req->write.parity_md_buf);
843 	} else if (stripe_req->type == STRIPE_REQ_RECONSTRUCT) {
844 		struct raid5f_info *r5f_info = raid5f_ch_to_r5f_info(stripe_req->r5ch);
845 		struct raid_bdev *raid_bdev = r5f_info->raid_bdev;
846 		uint8_t i;
847 
848 		if (stripe_req->reconstruct.chunk_buffers) {
849 			for (i = 0; i < raid5f_stripe_data_chunks_num(raid_bdev); i++) {
850 				spdk_dma_free(stripe_req->reconstruct.chunk_buffers[i]);
851 			}
852 			free(stripe_req->reconstruct.chunk_buffers);
853 		}
854 
855 		if (stripe_req->reconstruct.chunk_md_buffers) {
856 			for (i = 0; i < raid5f_stripe_data_chunks_num(raid_bdev); i++) {
857 				spdk_dma_free(stripe_req->reconstruct.chunk_md_buffers[i]);
858 			}
859 			free(stripe_req->reconstruct.chunk_md_buffers);
860 		}
861 	} else {
862 		assert(false);
863 	}
864 
865 	free(stripe_req->chunk_xor_buffers);
866 	free(stripe_req->chunk_xor_md_buffers);
867 	free(stripe_req->chunk_iov_iters);
868 
869 	free(stripe_req);
870 }
871 
872 static struct stripe_request *
873 raid5f_stripe_request_alloc(struct raid5f_io_channel *r5ch, enum stripe_request_type type)
874 {
875 	struct raid5f_info *r5f_info = raid5f_ch_to_r5f_info(r5ch);
876 	struct raid_bdev *raid_bdev = r5f_info->raid_bdev;
877 	uint32_t raid_io_md_size = spdk_bdev_get_md_size(&raid_bdev->bdev);
878 	struct stripe_request *stripe_req;
879 	struct chunk *chunk;
880 	size_t chunk_len;
881 
882 	stripe_req = calloc(1, sizeof(*stripe_req) + sizeof(*chunk) * raid_bdev->num_base_bdevs);
883 	if (!stripe_req) {
884 		return NULL;
885 	}
886 
887 	stripe_req->r5ch = r5ch;
888 	stripe_req->type = type;
889 
890 	FOR_EACH_CHUNK(stripe_req, chunk) {
891 		chunk->index = chunk - stripe_req->chunks;
892 		chunk->iovcnt_max = 4;
893 		chunk->iovs = calloc(chunk->iovcnt_max, sizeof(chunk->iovs[0]));
894 		if (!chunk->iovs) {
895 			goto err;
896 		}
897 	}
898 
899 	chunk_len = raid_bdev->strip_size << raid_bdev->blocklen_shift;
900 
901 	if (type == STRIPE_REQ_WRITE) {
902 		stripe_req->write.parity_buf = spdk_dma_malloc(chunk_len, r5f_info->buf_alignment, NULL);
903 		if (!stripe_req->write.parity_buf) {
904 			goto err;
905 		}
906 
907 		if (raid_io_md_size != 0) {
908 			stripe_req->write.parity_md_buf = spdk_dma_malloc(raid_bdev->strip_size * raid_io_md_size,
909 							  r5f_info->buf_alignment, NULL);
910 			if (!stripe_req->write.parity_md_buf) {
911 				goto err;
912 			}
913 		}
914 	} else if (type == STRIPE_REQ_RECONSTRUCT) {
915 		uint8_t n = raid5f_stripe_data_chunks_num(raid_bdev);
916 		void *buf;
917 		uint8_t i;
918 
919 		stripe_req->reconstruct.chunk_buffers = calloc(n, sizeof(void *));
920 		if (!stripe_req->reconstruct.chunk_buffers) {
921 			goto err;
922 		}
923 
924 		for (i = 0; i < n; i++) {
925 			buf = spdk_dma_malloc(chunk_len, r5f_info->buf_alignment, NULL);
926 			if (!buf) {
927 				goto err;
928 			}
929 			stripe_req->reconstruct.chunk_buffers[i] = buf;
930 		}
931 
932 		if (raid_io_md_size != 0) {
933 			stripe_req->reconstruct.chunk_md_buffers = calloc(n, sizeof(void *));
934 			if (!stripe_req->reconstruct.chunk_md_buffers) {
935 				goto err;
936 			}
937 
938 			for (i = 0; i < n; i++) {
939 				buf = spdk_dma_malloc(raid_bdev->strip_size * raid_io_md_size, r5f_info->buf_alignment, NULL);
940 				if (!buf) {
941 					goto err;
942 				}
943 				stripe_req->reconstruct.chunk_md_buffers[i] = buf;
944 			}
945 		}
946 	} else {
947 		assert(false);
948 		return NULL;
949 	}
950 
951 	stripe_req->chunk_iov_iters = malloc(SPDK_IOVITER_SIZE(raid_bdev->num_base_bdevs));
952 	if (!stripe_req->chunk_iov_iters) {
953 		goto err;
954 	}
955 
956 	stripe_req->chunk_xor_buffers = calloc(raid5f_stripe_data_chunks_num(raid_bdev),
957 					       sizeof(stripe_req->chunk_xor_buffers[0]));
958 	if (!stripe_req->chunk_xor_buffers) {
959 		goto err;
960 	}
961 
962 	stripe_req->chunk_xor_md_buffers = calloc(raid5f_stripe_data_chunks_num(raid_bdev),
963 					   sizeof(stripe_req->chunk_xor_md_buffers[0]));
964 	if (!stripe_req->chunk_xor_md_buffers) {
965 		goto err;
966 	}
967 
968 	return stripe_req;
969 err:
970 	raid5f_stripe_request_free(stripe_req);
971 	return NULL;
972 }
973 
974 static void
975 raid5f_ioch_destroy(void *io_device, void *ctx_buf)
976 {
977 	struct raid5f_io_channel *r5ch = ctx_buf;
978 	struct stripe_request *stripe_req;
979 
980 	assert(TAILQ_EMPTY(&r5ch->xor_retry_queue));
981 
982 	while ((stripe_req = TAILQ_FIRST(&r5ch->free_stripe_requests.write))) {
983 		TAILQ_REMOVE(&r5ch->free_stripe_requests.write, stripe_req, link);
984 		raid5f_stripe_request_free(stripe_req);
985 	}
986 
987 	while ((stripe_req = TAILQ_FIRST(&r5ch->free_stripe_requests.reconstruct))) {
988 		TAILQ_REMOVE(&r5ch->free_stripe_requests.reconstruct, stripe_req, link);
989 		raid5f_stripe_request_free(stripe_req);
990 	}
991 
992 	if (r5ch->accel_ch) {
993 		spdk_put_io_channel(r5ch->accel_ch);
994 	}
995 
996 	free(r5ch->chunk_xor_buffers);
997 	free(r5ch->chunk_xor_iovs);
998 	free(r5ch->chunk_xor_iovcnt);
999 }
1000 
1001 static int
1002 raid5f_ioch_create(void *io_device, void *ctx_buf)
1003 {
1004 	struct raid5f_io_channel *r5ch = ctx_buf;
1005 	struct raid5f_info *r5f_info = io_device;
1006 	struct raid_bdev *raid_bdev = r5f_info->raid_bdev;
1007 	struct stripe_request *stripe_req;
1008 	int i;
1009 
1010 	TAILQ_INIT(&r5ch->free_stripe_requests.write);
1011 	TAILQ_INIT(&r5ch->free_stripe_requests.reconstruct);
1012 	TAILQ_INIT(&r5ch->xor_retry_queue);
1013 
1014 	for (i = 0; i < RAID5F_MAX_STRIPES; i++) {
1015 		stripe_req = raid5f_stripe_request_alloc(r5ch, STRIPE_REQ_WRITE);
1016 		if (!stripe_req) {
1017 			goto err;
1018 		}
1019 
1020 		TAILQ_INSERT_HEAD(&r5ch->free_stripe_requests.write, stripe_req, link);
1021 	}
1022 
1023 	for (i = 0; i < RAID5F_MAX_STRIPES; i++) {
1024 		stripe_req = raid5f_stripe_request_alloc(r5ch, STRIPE_REQ_RECONSTRUCT);
1025 		if (!stripe_req) {
1026 			goto err;
1027 		}
1028 
1029 		TAILQ_INSERT_HEAD(&r5ch->free_stripe_requests.reconstruct, stripe_req, link);
1030 	}
1031 
1032 	r5ch->accel_ch = spdk_accel_get_io_channel();
1033 	if (!r5ch->accel_ch) {
1034 		SPDK_ERRLOG("Failed to get accel framework's IO channel\n");
1035 		goto err;
1036 	}
1037 
1038 	r5ch->chunk_xor_buffers = calloc(raid_bdev->num_base_bdevs, sizeof(*r5ch->chunk_xor_buffers));
1039 	if (!r5ch->chunk_xor_buffers) {
1040 		goto err;
1041 	}
1042 
1043 	r5ch->chunk_xor_iovs = calloc(raid_bdev->num_base_bdevs, sizeof(*r5ch->chunk_xor_iovs));
1044 	if (!r5ch->chunk_xor_iovs) {
1045 		goto err;
1046 	}
1047 
1048 	r5ch->chunk_xor_iovcnt = calloc(raid_bdev->num_base_bdevs, sizeof(*r5ch->chunk_xor_iovcnt));
1049 	if (!r5ch->chunk_xor_iovcnt) {
1050 		goto err;
1051 	}
1052 
1053 	return 0;
1054 err:
1055 	SPDK_ERRLOG("Failed to initialize io channel\n");
1056 	raid5f_ioch_destroy(r5f_info, r5ch);
1057 	return -ENOMEM;
1058 }
1059 
1060 static int
1061 raid5f_start(struct raid_bdev *raid_bdev)
1062 {
1063 	uint64_t min_blockcnt = UINT64_MAX;
1064 	uint64_t base_bdev_data_size;
1065 	struct raid_base_bdev_info *base_info;
1066 	struct spdk_bdev *base_bdev;
1067 	struct raid5f_info *r5f_info;
1068 	size_t alignment = 0;
1069 
1070 	r5f_info = calloc(1, sizeof(*r5f_info));
1071 	if (!r5f_info) {
1072 		SPDK_ERRLOG("Failed to allocate r5f_info\n");
1073 		return -ENOMEM;
1074 	}
1075 	r5f_info->raid_bdev = raid_bdev;
1076 
1077 	RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
1078 		min_blockcnt = spdk_min(min_blockcnt, base_info->data_size);
1079 		if (base_info->desc) {
1080 			base_bdev = spdk_bdev_desc_get_bdev(base_info->desc);
1081 			alignment = spdk_max(alignment, spdk_bdev_get_buf_align(base_bdev));
1082 		}
1083 	}
1084 
1085 	base_bdev_data_size = (min_blockcnt / raid_bdev->strip_size) * raid_bdev->strip_size;
1086 
1087 	RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
1088 		base_info->data_size = base_bdev_data_size;
1089 	}
1090 
1091 	r5f_info->total_stripes = min_blockcnt / raid_bdev->strip_size;
1092 	r5f_info->stripe_blocks = raid_bdev->strip_size * raid5f_stripe_data_chunks_num(raid_bdev);
1093 	r5f_info->buf_alignment = alignment;
1094 
1095 	raid_bdev->bdev.blockcnt = r5f_info->stripe_blocks * r5f_info->total_stripes;
1096 	raid_bdev->bdev.optimal_io_boundary = raid_bdev->strip_size;
1097 	raid_bdev->bdev.split_on_optimal_io_boundary = true;
1098 	raid_bdev->bdev.write_unit_size = r5f_info->stripe_blocks;
1099 	raid_bdev->bdev.split_on_write_unit = true;
1100 
1101 	raid_bdev->module_private = r5f_info;
1102 
1103 	spdk_io_device_register(r5f_info, raid5f_ioch_create, raid5f_ioch_destroy,
1104 				sizeof(struct raid5f_io_channel), NULL);
1105 
1106 	return 0;
1107 }
1108 
1109 static void
1110 raid5f_io_device_unregister_done(void *io_device)
1111 {
1112 	struct raid5f_info *r5f_info = io_device;
1113 
1114 	raid_bdev_module_stop_done(r5f_info->raid_bdev);
1115 
1116 	free(r5f_info);
1117 }
1118 
1119 static bool
1120 raid5f_stop(struct raid_bdev *raid_bdev)
1121 {
1122 	struct raid5f_info *r5f_info = raid_bdev->module_private;
1123 
1124 	spdk_io_device_unregister(r5f_info, raid5f_io_device_unregister_done);
1125 
1126 	return false;
1127 }
1128 
1129 static struct spdk_io_channel *
1130 raid5f_get_io_channel(struct raid_bdev *raid_bdev)
1131 {
1132 	struct raid5f_info *r5f_info = raid_bdev->module_private;
1133 
1134 	return spdk_get_io_channel(r5f_info);
1135 }
1136 
1137 static void
1138 raid5f_process_write_completed(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
1139 {
1140 	struct raid_bdev_process_request *process_req = cb_arg;
1141 
1142 	spdk_bdev_free_io(bdev_io);
1143 
1144 	raid_bdev_process_request_complete(process_req, success ? 0 : -EIO);
1145 }
1146 
1147 static void raid5f_process_submit_write(struct raid_bdev_process_request *process_req);
1148 
1149 static void
1150 _raid5f_process_submit_write(void *ctx)
1151 {
1152 	struct raid_bdev_process_request *process_req = ctx;
1153 
1154 	raid5f_process_submit_write(process_req);
1155 }
1156 
1157 static void
1158 raid5f_process_submit_write(struct raid_bdev_process_request *process_req)
1159 {
1160 	struct raid_bdev_io *raid_io = &process_req->raid_io;
1161 	struct raid_bdev *raid_bdev = raid_io->raid_bdev;
1162 	struct raid5f_info *r5f_info = raid_bdev->module_private;
1163 	uint64_t stripe_index = process_req->offset_blocks / r5f_info->stripe_blocks;
1164 	struct spdk_bdev_ext_io_opts io_opts;
1165 	int ret;
1166 
1167 	raid5f_init_ext_io_opts(&io_opts, raid_io);
1168 	ret = raid_bdev_writev_blocks_ext(process_req->target, process_req->target_ch,
1169 					  raid_io->iovs, raid_io->iovcnt,
1170 					  stripe_index << raid_bdev->strip_size_shift, raid_bdev->strip_size,
1171 					  raid5f_process_write_completed, process_req, &io_opts);
1172 	if (spdk_unlikely(ret != 0)) {
1173 		if (ret == -ENOMEM) {
1174 			raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(process_req->target->desc),
1175 						process_req->target_ch, _raid5f_process_submit_write);
1176 		} else {
1177 			raid_bdev_process_request_complete(process_req, ret);
1178 		}
1179 	}
1180 }
1181 
1182 static void
1183 raid5f_process_stripe_request_reconstruct_xor_done(struct stripe_request *stripe_req, int status)
1184 {
1185 	struct raid_bdev_io *raid_io = stripe_req->raid_io;
1186 	struct raid_bdev_process_request *process_req = SPDK_CONTAINEROF(raid_io,
1187 			struct raid_bdev_process_request, raid_io);
1188 
1189 	raid5f_stripe_request_release(stripe_req);
1190 
1191 	if (status != 0) {
1192 		raid_bdev_process_request_complete(process_req, status);
1193 		return;
1194 	}
1195 
1196 	raid5f_process_submit_write(process_req);
1197 }
1198 
1199 static void
1200 raid5f_process_read_completed(struct raid_bdev_io *raid_io, enum spdk_bdev_io_status status)
1201 {
1202 	struct raid_bdev_process_request *process_req = SPDK_CONTAINEROF(raid_io,
1203 			struct raid_bdev_process_request, raid_io);
1204 	struct stripe_request *stripe_req = raid_io->module_private;
1205 
1206 	if (status != SPDK_BDEV_IO_STATUS_SUCCESS) {
1207 		raid5f_stripe_request_release(stripe_req);
1208 		raid_bdev_process_request_complete(process_req, -EIO);
1209 		return;
1210 	}
1211 
1212 	raid5f_xor_stripe(stripe_req, raid5f_process_stripe_request_reconstruct_xor_done);
1213 }
1214 
1215 static int
1216 raid5f_submit_process_request(struct raid_bdev_process_request *process_req,
1217 			      struct raid_bdev_io_channel *raid_ch)
1218 {
1219 	struct spdk_io_channel *ch = spdk_io_channel_from_ctx(raid_ch);
1220 	struct raid_bdev *raid_bdev = spdk_io_channel_get_io_device(ch);
1221 	struct raid5f_info *r5f_info = raid_bdev->module_private;
1222 	struct raid_bdev_io *raid_io = &process_req->raid_io;
1223 	uint8_t chunk_idx = raid_bdev_base_bdev_slot(process_req->target);
1224 	uint64_t stripe_index = process_req->offset_blocks / r5f_info->stripe_blocks;
1225 	int ret;
1226 
1227 	assert((process_req->offset_blocks % r5f_info->stripe_blocks) == 0);
1228 
1229 	if (process_req->num_blocks < r5f_info->stripe_blocks) {
1230 		return 0;
1231 	}
1232 
1233 	raid_bdev_io_init(raid_io, raid_ch, SPDK_BDEV_IO_TYPE_READ,
1234 			  process_req->offset_blocks, raid_bdev->strip_size,
1235 			  &process_req->iov, 1, process_req->md_buf, NULL, NULL);
1236 
1237 	ret = raid5f_submit_reconstruct_read(raid_io, stripe_index, chunk_idx, 0,
1238 					     raid5f_process_read_completed);
1239 	if (spdk_likely(ret == 0)) {
1240 		return r5f_info->stripe_blocks;
1241 	} else if (ret < 0) {
1242 		return ret;
1243 	} else {
1244 		return -EINVAL;
1245 	}
1246 }
1247 
1248 static struct raid_bdev_module g_raid5f_module = {
1249 	.level = RAID5F,
1250 	.base_bdevs_min = 3,
1251 	.base_bdevs_constraint = {CONSTRAINT_MAX_BASE_BDEVS_REMOVED, 1},
1252 	.start = raid5f_start,
1253 	.stop = raid5f_stop,
1254 	.submit_rw_request = raid5f_submit_rw_request,
1255 	.get_io_channel = raid5f_get_io_channel,
1256 	.submit_process_request = raid5f_submit_process_request,
1257 };
1258 RAID_MODULE_REGISTER(&g_raid5f_module)
1259 
1260 SPDK_LOG_REGISTER_COMPONENT(bdev_raid5f)
1261