xref: /spdk/module/bdev/raid/raid5f.c (revision 95d6c9fac17572b107042103439aafd696d60b0e)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (C) 2022 Intel Corporation.
3  *   All rights reserved.
4  */
5 
6 #include "bdev_raid.h"
7 
8 #include "spdk/env.h"
9 #include "spdk/thread.h"
10 #include "spdk/string.h"
11 #include "spdk/util.h"
12 #include "spdk/likely.h"
13 #include "spdk/log.h"
14 #include "spdk/accel.h"
15 
16 /* Maximum concurrent full stripe writes per io channel */
17 #define RAID5F_MAX_STRIPES 32
18 
19 struct chunk {
20 	/* Corresponds to base_bdev index */
21 	uint8_t index;
22 
23 	/* Array of iovecs */
24 	struct iovec *iovs;
25 
26 	/* Number of used iovecs */
27 	int iovcnt;
28 
29 	/* Total number of available iovecs in the array */
30 	int iovcnt_max;
31 
32 	/* Pointer to buffer with I/O metadata */
33 	void *md_buf;
34 };
35 
36 struct stripe_request;
37 typedef void (*stripe_req_xor_cb)(struct stripe_request *stripe_req, int status);
38 
39 struct stripe_request {
40 	enum stripe_request_type {
41 		STRIPE_REQ_WRITE,
42 		STRIPE_REQ_RECONSTRUCT,
43 	} type;
44 
45 	struct raid5f_io_channel *r5ch;
46 
47 	/* The associated raid_bdev_io */
48 	struct raid_bdev_io *raid_io;
49 
50 	/* The stripe's index in the raid array. */
51 	uint64_t stripe_index;
52 
53 	/* The stripe's parity chunk */
54 	struct chunk *parity_chunk;
55 
56 	union {
57 		struct {
58 			/* Buffer for stripe parity */
59 			void *parity_buf;
60 
61 			/* Buffer for stripe io metadata parity */
62 			void *parity_md_buf;
63 		} write;
64 
65 		struct {
66 			/* Array of buffers for reading chunk data */
67 			void **chunk_buffers;
68 
69 			/* Array of buffers for reading chunk metadata */
70 			void **chunk_md_buffers;
71 
72 			/* Chunk to reconstruct from parity */
73 			struct chunk *chunk;
74 
75 			/* Offset from chunk start */
76 			uint64_t chunk_offset;
77 		} reconstruct;
78 	};
79 
80 	/* Array of iovec iterators for each chunk */
81 	struct spdk_ioviter *chunk_iov_iters;
82 
83 	/* Array of source buffer pointers for parity calculation */
84 	void **chunk_xor_buffers;
85 
86 	/* Array of source buffer pointers for parity calculation of io metadata */
87 	void **chunk_xor_md_buffers;
88 
89 	struct {
90 		size_t len;
91 		size_t remaining;
92 		size_t remaining_md;
93 		int status;
94 		stripe_req_xor_cb cb;
95 	} xor;
96 
97 	TAILQ_ENTRY(stripe_request) link;
98 
99 	/* Array of chunks corresponding to base_bdevs */
100 	struct chunk chunks[0];
101 };
102 
103 struct raid5f_info {
104 	/* The parent raid bdev */
105 	struct raid_bdev *raid_bdev;
106 
107 	/* Number of data blocks in a stripe (without parity) */
108 	uint64_t stripe_blocks;
109 
110 	/* Number of stripes on this array */
111 	uint64_t total_stripes;
112 
113 	/* Alignment for buffer allocation */
114 	size_t buf_alignment;
115 
116 	/* block length bit shift for optimized calculation, only valid when no interleaved md */
117 	uint32_t blocklen_shift;
118 };
119 
120 struct raid5f_io_channel {
121 	/* All available stripe requests on this channel */
122 	struct {
123 		TAILQ_HEAD(, stripe_request) write;
124 		TAILQ_HEAD(, stripe_request) reconstruct;
125 	} free_stripe_requests;
126 
127 	/* accel_fw channel */
128 	struct spdk_io_channel *accel_ch;
129 
130 	/* For retrying xor if accel_ch runs out of resources */
131 	TAILQ_HEAD(, stripe_request) xor_retry_queue;
132 
133 	/* For iterating over chunk iovecs during xor calculation */
134 	void **chunk_xor_buffers;
135 	struct iovec **chunk_xor_iovs;
136 	size_t *chunk_xor_iovcnt;
137 };
138 
139 #define __CHUNK_IN_RANGE(req, c) \
140 	c < req->chunks + raid5f_ch_to_r5f_info(req->r5ch)->raid_bdev->num_base_bdevs
141 
142 #define FOR_EACH_CHUNK_FROM(req, c, from) \
143 	for (c = from; __CHUNK_IN_RANGE(req, c); c++)
144 
145 #define FOR_EACH_CHUNK(req, c) \
146 	FOR_EACH_CHUNK_FROM(req, c, req->chunks)
147 
148 #define __NEXT_DATA_CHUNK(req, c) \
149 	c == req->parity_chunk ? c+1 : c
150 
151 #define FOR_EACH_DATA_CHUNK(req, c) \
152 	for (c = __NEXT_DATA_CHUNK(req, req->chunks); __CHUNK_IN_RANGE(req, c); \
153 	     c = __NEXT_DATA_CHUNK(req, c+1))
154 
155 static inline struct raid5f_info *
156 raid5f_ch_to_r5f_info(struct raid5f_io_channel *r5ch)
157 {
158 	return spdk_io_channel_get_io_device(spdk_io_channel_from_ctx(r5ch));
159 }
160 
161 static inline struct stripe_request *
162 raid5f_chunk_stripe_req(struct chunk *chunk)
163 {
164 	return SPDK_CONTAINEROF((chunk - chunk->index), struct stripe_request, chunks);
165 }
166 
167 static inline uint8_t
168 raid5f_stripe_data_chunks_num(const struct raid_bdev *raid_bdev)
169 {
170 	return raid_bdev->min_base_bdevs_operational;
171 }
172 
173 static inline uint8_t
174 raid5f_stripe_parity_chunk_index(const struct raid_bdev *raid_bdev, uint64_t stripe_index)
175 {
176 	return raid5f_stripe_data_chunks_num(raid_bdev) - stripe_index % raid_bdev->num_base_bdevs;
177 }
178 
179 static inline void
180 raid5f_stripe_request_release(struct stripe_request *stripe_req)
181 {
182 	if (spdk_likely(stripe_req->type == STRIPE_REQ_WRITE)) {
183 		TAILQ_INSERT_HEAD(&stripe_req->r5ch->free_stripe_requests.write, stripe_req, link);
184 	} else if (stripe_req->type == STRIPE_REQ_RECONSTRUCT) {
185 		TAILQ_INSERT_HEAD(&stripe_req->r5ch->free_stripe_requests.reconstruct, stripe_req, link);
186 	} else {
187 		assert(false);
188 	}
189 }
190 
191 static void raid5f_xor_stripe_retry(struct stripe_request *stripe_req);
192 
193 static void
194 raid5f_xor_stripe_done(struct stripe_request *stripe_req)
195 {
196 	struct raid5f_io_channel *r5ch = stripe_req->r5ch;
197 
198 	if (stripe_req->xor.status != 0) {
199 		SPDK_ERRLOG("stripe xor failed: %s\n", spdk_strerror(-stripe_req->xor.status));
200 	}
201 
202 	stripe_req->xor.cb(stripe_req, stripe_req->xor.status);
203 
204 	if (!TAILQ_EMPTY(&r5ch->xor_retry_queue)) {
205 		stripe_req = TAILQ_FIRST(&r5ch->xor_retry_queue);
206 		TAILQ_REMOVE(&r5ch->xor_retry_queue, stripe_req, link);
207 		raid5f_xor_stripe_retry(stripe_req);
208 	}
209 }
210 
211 static void raid5f_xor_stripe_continue(struct stripe_request *stripe_req);
212 
213 static void
214 _raid5f_xor_stripe_cb(struct stripe_request *stripe_req, int status)
215 {
216 	if (status != 0) {
217 		stripe_req->xor.status = status;
218 	}
219 
220 	if (stripe_req->xor.remaining + stripe_req->xor.remaining_md == 0) {
221 		raid5f_xor_stripe_done(stripe_req);
222 	}
223 }
224 
225 static void
226 raid5f_xor_stripe_cb(void *_stripe_req, int status)
227 {
228 	struct stripe_request *stripe_req = _stripe_req;
229 
230 	stripe_req->xor.remaining -= stripe_req->xor.len;
231 
232 	if (stripe_req->xor.remaining > 0) {
233 		stripe_req->xor.len = spdk_ioviter_nextv(stripe_req->chunk_iov_iters,
234 				      stripe_req->r5ch->chunk_xor_buffers);
235 		raid5f_xor_stripe_continue(stripe_req);
236 	}
237 
238 	_raid5f_xor_stripe_cb(stripe_req, status);
239 }
240 
241 static void
242 raid5f_xor_stripe_md_cb(void *_stripe_req, int status)
243 {
244 	struct stripe_request *stripe_req = _stripe_req;
245 
246 	stripe_req->xor.remaining_md = 0;
247 
248 	_raid5f_xor_stripe_cb(stripe_req, status);
249 }
250 
251 static void
252 raid5f_xor_stripe_continue(struct stripe_request *stripe_req)
253 {
254 	struct raid5f_io_channel *r5ch = stripe_req->r5ch;
255 	struct raid_bdev_io *raid_io = stripe_req->raid_io;
256 	struct raid_bdev *raid_bdev = raid_io->raid_bdev;
257 	uint8_t n_src = raid5f_stripe_data_chunks_num(raid_bdev);
258 	uint8_t i;
259 	int ret;
260 
261 	assert(stripe_req->xor.len > 0);
262 
263 	for (i = 0; i < n_src; i++) {
264 		stripe_req->chunk_xor_buffers[i] = r5ch->chunk_xor_buffers[i];
265 	}
266 
267 	ret = spdk_accel_submit_xor(r5ch->accel_ch, r5ch->chunk_xor_buffers[n_src],
268 				    stripe_req->chunk_xor_buffers, n_src, stripe_req->xor.len,
269 				    raid5f_xor_stripe_cb, stripe_req);
270 	if (spdk_unlikely(ret)) {
271 		if (ret == -ENOMEM) {
272 			TAILQ_INSERT_HEAD(&r5ch->xor_retry_queue, stripe_req, link);
273 		} else {
274 			stripe_req->xor.status = ret;
275 			raid5f_xor_stripe_done(stripe_req);
276 		}
277 	}
278 }
279 
280 static void
281 raid5f_xor_stripe(struct stripe_request *stripe_req, stripe_req_xor_cb cb)
282 {
283 	struct raid5f_io_channel *r5ch = stripe_req->r5ch;
284 	struct raid_bdev_io *raid_io = stripe_req->raid_io;
285 	struct raid_bdev *raid_bdev = raid_io->raid_bdev;
286 	struct chunk *chunk;
287 	struct chunk *dest_chunk = NULL;
288 	uint64_t num_blocks = 0;
289 	uint8_t c;
290 
291 	assert(cb != NULL);
292 
293 	if (spdk_likely(stripe_req->type == STRIPE_REQ_WRITE)) {
294 		num_blocks = raid_bdev->strip_size;
295 		dest_chunk = stripe_req->parity_chunk;
296 	} else if (stripe_req->type == STRIPE_REQ_RECONSTRUCT) {
297 		num_blocks = raid_io->num_blocks;
298 		dest_chunk = stripe_req->reconstruct.chunk;
299 	} else {
300 		assert(false);
301 	}
302 
303 	c = 0;
304 	FOR_EACH_CHUNK(stripe_req, chunk) {
305 		if (chunk == dest_chunk) {
306 			continue;
307 		}
308 		r5ch->chunk_xor_iovs[c] = chunk->iovs;
309 		r5ch->chunk_xor_iovcnt[c] = chunk->iovcnt;
310 		c++;
311 	}
312 	r5ch->chunk_xor_iovs[c] = dest_chunk->iovs;
313 	r5ch->chunk_xor_iovcnt[c] = dest_chunk->iovcnt;
314 
315 	stripe_req->xor.len = spdk_ioviter_firstv(stripe_req->chunk_iov_iters,
316 			      raid_bdev->num_base_bdevs,
317 			      r5ch->chunk_xor_iovs,
318 			      r5ch->chunk_xor_iovcnt,
319 			      r5ch->chunk_xor_buffers);
320 	stripe_req->xor.remaining = num_blocks * raid_bdev->bdev.blocklen;
321 	stripe_req->xor.status = 0;
322 	stripe_req->xor.cb = cb;
323 
324 	if (raid_io->md_buf != NULL) {
325 		uint8_t n_src = raid5f_stripe_data_chunks_num(raid_bdev);
326 		uint64_t len = num_blocks * raid_bdev->bdev.md_len;
327 		int ret;
328 
329 		stripe_req->xor.remaining_md = len;
330 
331 		c = 0;
332 		FOR_EACH_CHUNK(stripe_req, chunk) {
333 			if (chunk != dest_chunk) {
334 				stripe_req->chunk_xor_md_buffers[c] = chunk->md_buf;
335 				c++;
336 			}
337 		}
338 
339 		ret = spdk_accel_submit_xor(stripe_req->r5ch->accel_ch, dest_chunk->md_buf,
340 					    stripe_req->chunk_xor_md_buffers, n_src, len,
341 					    raid5f_xor_stripe_md_cb, stripe_req);
342 		if (spdk_unlikely(ret)) {
343 			if (ret == -ENOMEM) {
344 				TAILQ_INSERT_HEAD(&stripe_req->r5ch->xor_retry_queue, stripe_req, link);
345 			} else {
346 				stripe_req->xor.status = ret;
347 				raid5f_xor_stripe_done(stripe_req);
348 			}
349 			return;
350 		}
351 	}
352 
353 	raid5f_xor_stripe_continue(stripe_req);
354 }
355 
356 static void
357 raid5f_xor_stripe_retry(struct stripe_request *stripe_req)
358 {
359 	if (stripe_req->xor.remaining_md) {
360 		raid5f_xor_stripe(stripe_req, stripe_req->xor.cb);
361 	} else {
362 		raid5f_xor_stripe_continue(stripe_req);
363 	}
364 }
365 
366 static void
367 raid5f_stripe_request_chunk_write_complete(struct stripe_request *stripe_req,
368 		enum spdk_bdev_io_status status)
369 {
370 	if (raid_bdev_io_complete_part(stripe_req->raid_io, 1, status)) {
371 		raid5f_stripe_request_release(stripe_req);
372 	}
373 }
374 
375 static void
376 raid5f_stripe_request_chunk_read_complete(struct stripe_request *stripe_req,
377 		enum spdk_bdev_io_status status)
378 {
379 	struct raid_bdev_io *raid_io = stripe_req->raid_io;
380 
381 	raid_bdev_io_complete_part(raid_io, 1, status);
382 }
383 
384 static void
385 raid5f_chunk_complete_bdev_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
386 {
387 	struct chunk *chunk = cb_arg;
388 	struct stripe_request *stripe_req = raid5f_chunk_stripe_req(chunk);
389 	enum spdk_bdev_io_status status = success ? SPDK_BDEV_IO_STATUS_SUCCESS :
390 					  SPDK_BDEV_IO_STATUS_FAILED;
391 
392 	spdk_bdev_free_io(bdev_io);
393 
394 	if (spdk_likely(stripe_req->type == STRIPE_REQ_WRITE)) {
395 		raid5f_stripe_request_chunk_write_complete(stripe_req, status);
396 	} else if (stripe_req->type == STRIPE_REQ_RECONSTRUCT) {
397 		raid5f_stripe_request_chunk_read_complete(stripe_req, status);
398 	} else {
399 		assert(false);
400 	}
401 }
402 
403 static void raid5f_stripe_request_submit_chunks(struct stripe_request *stripe_req);
404 
405 static void
406 raid5f_chunk_submit_retry(void *_raid_io)
407 {
408 	struct raid_bdev_io *raid_io = _raid_io;
409 	struct stripe_request *stripe_req = raid_io->module_private;
410 
411 	raid5f_stripe_request_submit_chunks(stripe_req);
412 }
413 
414 static inline void
415 raid5f_init_ext_io_opts(struct spdk_bdev_ext_io_opts *opts, struct raid_bdev_io *raid_io)
416 {
417 	memset(opts, 0, sizeof(*opts));
418 	opts->size = sizeof(*opts);
419 	opts->memory_domain = raid_io->memory_domain;
420 	opts->memory_domain_ctx = raid_io->memory_domain_ctx;
421 	opts->metadata = raid_io->md_buf;
422 }
423 
424 static int
425 raid5f_chunk_submit(struct chunk *chunk)
426 {
427 	struct stripe_request *stripe_req = raid5f_chunk_stripe_req(chunk);
428 	struct raid_bdev_io *raid_io = stripe_req->raid_io;
429 	struct raid_bdev *raid_bdev = raid_io->raid_bdev;
430 	struct raid_base_bdev_info *base_info = &raid_bdev->base_bdev_info[chunk->index];
431 	struct spdk_io_channel *base_ch = raid_bdev_channel_get_base_channel(raid_io->raid_ch,
432 					  chunk->index);
433 	uint64_t base_offset_blocks = (stripe_req->stripe_index << raid_bdev->strip_size_shift);
434 	struct spdk_bdev_ext_io_opts io_opts;
435 	int ret;
436 
437 	raid5f_init_ext_io_opts(&io_opts, raid_io);
438 	io_opts.metadata = chunk->md_buf;
439 
440 	raid_io->base_bdev_io_submitted++;
441 
442 	switch (stripe_req->type) {
443 	case STRIPE_REQ_WRITE:
444 		if (base_ch == NULL) {
445 			raid_bdev_io_complete_part(raid_io, 1, SPDK_BDEV_IO_STATUS_SUCCESS);
446 			return 0;
447 		}
448 
449 		ret = raid_bdev_writev_blocks_ext(base_info, base_ch, chunk->iovs, chunk->iovcnt,
450 						  base_offset_blocks, raid_bdev->strip_size,
451 						  raid5f_chunk_complete_bdev_io, chunk, &io_opts);
452 		break;
453 	case STRIPE_REQ_RECONSTRUCT:
454 		if (chunk == stripe_req->reconstruct.chunk) {
455 			raid_bdev_io_complete_part(raid_io, 1, SPDK_BDEV_IO_STATUS_SUCCESS);
456 			return 0;
457 		}
458 
459 		base_offset_blocks += stripe_req->reconstruct.chunk_offset;
460 
461 		ret = raid_bdev_readv_blocks_ext(base_info, base_ch, chunk->iovs, chunk->iovcnt,
462 						 base_offset_blocks, raid_io->num_blocks,
463 						 raid5f_chunk_complete_bdev_io, chunk, &io_opts);
464 		break;
465 	default:
466 		assert(false);
467 		ret = -EINVAL;
468 		break;
469 	}
470 
471 	if (spdk_unlikely(ret)) {
472 		raid_io->base_bdev_io_submitted--;
473 		if (ret == -ENOMEM) {
474 			raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc),
475 						base_ch, raid5f_chunk_submit_retry);
476 		} else {
477 			/*
478 			 * Implicitly complete any I/Os not yet submitted as FAILED. If completing
479 			 * these means there are no more to complete for the stripe request, we can
480 			 * release the stripe request as well.
481 			 */
482 			uint64_t base_bdev_io_not_submitted;
483 
484 			if (stripe_req->type == STRIPE_REQ_WRITE) {
485 				base_bdev_io_not_submitted = raid_bdev->num_base_bdevs -
486 							     raid_io->base_bdev_io_submitted;
487 			} else {
488 				base_bdev_io_not_submitted = raid5f_stripe_data_chunks_num(raid_bdev) -
489 							     raid_io->base_bdev_io_submitted;
490 			}
491 
492 			if (raid_bdev_io_complete_part(raid_io, base_bdev_io_not_submitted,
493 						       SPDK_BDEV_IO_STATUS_FAILED)) {
494 				raid5f_stripe_request_release(stripe_req);
495 			}
496 		}
497 	}
498 
499 	return ret;
500 }
501 
502 static int
503 raid5f_chunk_set_iovcnt(struct chunk *chunk, int iovcnt)
504 {
505 	if (iovcnt > chunk->iovcnt_max) {
506 		struct iovec *iovs = chunk->iovs;
507 
508 		iovs = realloc(iovs, iovcnt * sizeof(*iovs));
509 		if (!iovs) {
510 			return -ENOMEM;
511 		}
512 		chunk->iovs = iovs;
513 		chunk->iovcnt_max = iovcnt;
514 	}
515 	chunk->iovcnt = iovcnt;
516 
517 	return 0;
518 }
519 
520 static int
521 raid5f_stripe_request_map_iovecs(struct stripe_request *stripe_req)
522 {
523 	struct raid_bdev_io *raid_io = stripe_req->raid_io;
524 	struct raid_bdev *raid_bdev = raid_io->raid_bdev;
525 	struct raid5f_info *r5f_info = raid_bdev->module_private;
526 	struct chunk *chunk;
527 	int raid_io_iov_idx = 0;
528 	size_t raid_io_offset = 0;
529 	size_t raid_io_iov_offset = 0;
530 	int i;
531 
532 	FOR_EACH_DATA_CHUNK(stripe_req, chunk) {
533 		int chunk_iovcnt = 0;
534 		uint64_t len = raid_bdev->strip_size * raid_bdev->bdev.blocklen;
535 		size_t off = raid_io_iov_offset;
536 		int ret;
537 
538 		for (i = raid_io_iov_idx; i < raid_io->iovcnt; i++) {
539 			chunk_iovcnt++;
540 			off += raid_io->iovs[i].iov_len;
541 			if (off >= raid_io_offset + len) {
542 				break;
543 			}
544 		}
545 
546 		assert(raid_io_iov_idx + chunk_iovcnt <= raid_io->iovcnt);
547 
548 		ret = raid5f_chunk_set_iovcnt(chunk, chunk_iovcnt);
549 		if (ret) {
550 			return ret;
551 		}
552 
553 		if (raid_io->md_buf != NULL) {
554 			chunk->md_buf = raid_io->md_buf +
555 					(raid_io_offset >> r5f_info->blocklen_shift) * raid_bdev->bdev.md_len;
556 		}
557 
558 		for (i = 0; i < chunk_iovcnt; i++) {
559 			struct iovec *chunk_iov = &chunk->iovs[i];
560 			const struct iovec *raid_io_iov = &raid_io->iovs[raid_io_iov_idx];
561 			size_t chunk_iov_offset = raid_io_offset - raid_io_iov_offset;
562 
563 			chunk_iov->iov_base = raid_io_iov->iov_base + chunk_iov_offset;
564 			chunk_iov->iov_len = spdk_min(len, raid_io_iov->iov_len - chunk_iov_offset);
565 			raid_io_offset += chunk_iov->iov_len;
566 			len -= chunk_iov->iov_len;
567 
568 			if (raid_io_offset >= raid_io_iov_offset + raid_io_iov->iov_len) {
569 				raid_io_iov_idx++;
570 				raid_io_iov_offset += raid_io_iov->iov_len;
571 			}
572 		}
573 
574 		if (spdk_unlikely(len > 0)) {
575 			return -EINVAL;
576 		}
577 	}
578 
579 	stripe_req->parity_chunk->iovs[0].iov_base = stripe_req->write.parity_buf;
580 	stripe_req->parity_chunk->iovs[0].iov_len = raid_bdev->strip_size * raid_bdev->bdev.blocklen;
581 	stripe_req->parity_chunk->iovcnt = 1;
582 	stripe_req->parity_chunk->md_buf = stripe_req->write.parity_md_buf;
583 
584 	return 0;
585 }
586 
587 static void
588 raid5f_stripe_request_submit_chunks(struct stripe_request *stripe_req)
589 {
590 	struct raid_bdev_io *raid_io = stripe_req->raid_io;
591 	struct chunk *start = &stripe_req->chunks[raid_io->base_bdev_io_submitted];
592 	struct chunk *chunk;
593 
594 	FOR_EACH_CHUNK_FROM(stripe_req, chunk, start) {
595 		if (spdk_unlikely(raid5f_chunk_submit(chunk) != 0)) {
596 			break;
597 		}
598 	}
599 }
600 
601 static inline void
602 raid5f_stripe_request_init(struct stripe_request *stripe_req, struct raid_bdev_io *raid_io,
603 			   uint64_t stripe_index)
604 {
605 	stripe_req->raid_io = raid_io;
606 	stripe_req->stripe_index = stripe_index;
607 	stripe_req->parity_chunk = &stripe_req->chunks[raid5f_stripe_parity_chunk_index(raid_io->raid_bdev,
608 				   stripe_index)];
609 }
610 
611 static void
612 raid5f_stripe_write_request_xor_done(struct stripe_request *stripe_req, int status)
613 {
614 	struct raid_bdev_io *raid_io = stripe_req->raid_io;
615 
616 	if (status != 0) {
617 		raid5f_stripe_request_release(stripe_req);
618 		raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
619 	} else {
620 		raid5f_stripe_request_submit_chunks(stripe_req);
621 	}
622 }
623 
624 static int
625 raid5f_submit_write_request(struct raid_bdev_io *raid_io, uint64_t stripe_index)
626 {
627 	struct raid_bdev *raid_bdev = raid_io->raid_bdev;
628 	struct raid5f_io_channel *r5ch = raid_bdev_channel_get_module_ctx(raid_io->raid_ch);
629 	struct stripe_request *stripe_req;
630 	int ret;
631 
632 	stripe_req = TAILQ_FIRST(&r5ch->free_stripe_requests.write);
633 	if (!stripe_req) {
634 		return -ENOMEM;
635 	}
636 
637 	raid5f_stripe_request_init(stripe_req, raid_io, stripe_index);
638 
639 	ret = raid5f_stripe_request_map_iovecs(stripe_req);
640 	if (spdk_unlikely(ret)) {
641 		return ret;
642 	}
643 
644 	TAILQ_REMOVE(&r5ch->free_stripe_requests.write, stripe_req, link);
645 
646 	raid_io->module_private = stripe_req;
647 	raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs;
648 
649 	if (raid_bdev_channel_get_base_channel(raid_io->raid_ch, stripe_req->parity_chunk->index) != NULL) {
650 		raid5f_xor_stripe(stripe_req, raid5f_stripe_write_request_xor_done);
651 	} else {
652 		raid5f_stripe_write_request_xor_done(stripe_req, 0);
653 	}
654 
655 	return 0;
656 }
657 
658 static void
659 raid5f_chunk_read_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
660 {
661 	struct raid_bdev_io *raid_io = cb_arg;
662 
663 	spdk_bdev_free_io(bdev_io);
664 
665 	raid_bdev_io_complete(raid_io, success ? SPDK_BDEV_IO_STATUS_SUCCESS :
666 			      SPDK_BDEV_IO_STATUS_FAILED);
667 }
668 
669 static void raid5f_submit_rw_request(struct raid_bdev_io *raid_io);
670 
671 static void
672 _raid5f_submit_rw_request(void *_raid_io)
673 {
674 	struct raid_bdev_io *raid_io = _raid_io;
675 
676 	raid5f_submit_rw_request(raid_io);
677 }
678 
679 static void
680 raid5f_stripe_request_reconstruct_xor_done(struct stripe_request *stripe_req, int status)
681 {
682 	struct raid_bdev_io *raid_io = stripe_req->raid_io;
683 
684 	raid5f_stripe_request_release(stripe_req);
685 
686 	raid_bdev_io_complete(raid_io,
687 			      status == 0 ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED);
688 }
689 
690 static void
691 raid5f_reconstruct_reads_completed_cb(struct raid_bdev_io *raid_io, enum spdk_bdev_io_status status)
692 {
693 	struct stripe_request *stripe_req = raid_io->module_private;
694 
695 	raid_io->completion_cb = NULL;
696 
697 	if (status != SPDK_BDEV_IO_STATUS_SUCCESS) {
698 		stripe_req->xor.cb(stripe_req, -EIO);
699 		return;
700 	}
701 
702 	raid5f_xor_stripe(stripe_req, stripe_req->xor.cb);
703 }
704 
705 static int
706 raid5f_submit_reconstruct_read(struct raid_bdev_io *raid_io, uint64_t stripe_index,
707 			       uint8_t chunk_idx, uint64_t chunk_offset, stripe_req_xor_cb cb)
708 {
709 	struct raid_bdev *raid_bdev = raid_io->raid_bdev;
710 	struct raid5f_io_channel *r5ch = raid_bdev_channel_get_module_ctx(raid_io->raid_ch);
711 	void *raid_io_md = raid_io->md_buf;
712 	struct stripe_request *stripe_req;
713 	struct chunk *chunk;
714 	int buf_idx;
715 
716 	assert(cb != NULL);
717 
718 	stripe_req = TAILQ_FIRST(&r5ch->free_stripe_requests.reconstruct);
719 	if (!stripe_req) {
720 		return -ENOMEM;
721 	}
722 
723 	raid5f_stripe_request_init(stripe_req, raid_io, stripe_index);
724 
725 	stripe_req->reconstruct.chunk = &stripe_req->chunks[chunk_idx];
726 	stripe_req->reconstruct.chunk_offset = chunk_offset;
727 	stripe_req->xor.cb = cb;
728 	buf_idx = 0;
729 
730 	FOR_EACH_CHUNK(stripe_req, chunk) {
731 		if (chunk == stripe_req->reconstruct.chunk) {
732 			int i;
733 			int ret;
734 
735 			ret = raid5f_chunk_set_iovcnt(chunk, raid_io->iovcnt);
736 			if (ret) {
737 				return ret;
738 			}
739 
740 			for (i = 0; i < raid_io->iovcnt; i++) {
741 				chunk->iovs[i] = raid_io->iovs[i];
742 			}
743 
744 			chunk->md_buf = raid_io_md;
745 		} else {
746 			struct iovec *iov = &chunk->iovs[0];
747 
748 			iov->iov_base = stripe_req->reconstruct.chunk_buffers[buf_idx];
749 			iov->iov_len = raid_io->num_blocks * raid_bdev->bdev.blocklen;
750 			chunk->iovcnt = 1;
751 
752 			if (raid_io_md) {
753 				chunk->md_buf = stripe_req->reconstruct.chunk_md_buffers[buf_idx];
754 			}
755 
756 			buf_idx++;
757 		}
758 	}
759 
760 	raid_io->module_private = stripe_req;
761 	raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs;
762 	raid_io->completion_cb = raid5f_reconstruct_reads_completed_cb;
763 
764 	TAILQ_REMOVE(&r5ch->free_stripe_requests.reconstruct, stripe_req, link);
765 
766 	raid5f_stripe_request_submit_chunks(stripe_req);
767 
768 	return 0;
769 }
770 
771 static int
772 raid5f_submit_read_request(struct raid_bdev_io *raid_io, uint64_t stripe_index,
773 			   uint64_t stripe_offset)
774 {
775 	struct raid_bdev *raid_bdev = raid_io->raid_bdev;
776 	uint8_t chunk_data_idx = stripe_offset >> raid_bdev->strip_size_shift;
777 	uint8_t p_idx = raid5f_stripe_parity_chunk_index(raid_bdev, stripe_index);
778 	uint8_t chunk_idx = chunk_data_idx < p_idx ? chunk_data_idx : chunk_data_idx + 1;
779 	struct raid_base_bdev_info *base_info = &raid_bdev->base_bdev_info[chunk_idx];
780 	struct spdk_io_channel *base_ch = raid_bdev_channel_get_base_channel(raid_io->raid_ch, chunk_idx);
781 	uint64_t chunk_offset = stripe_offset - (chunk_data_idx << raid_bdev->strip_size_shift);
782 	uint64_t base_offset_blocks = (stripe_index << raid_bdev->strip_size_shift) + chunk_offset;
783 	struct spdk_bdev_ext_io_opts io_opts;
784 	int ret;
785 
786 	raid5f_init_ext_io_opts(&io_opts, raid_io);
787 	if (base_ch == NULL) {
788 		return raid5f_submit_reconstruct_read(raid_io, stripe_index, chunk_idx, chunk_offset,
789 						      raid5f_stripe_request_reconstruct_xor_done);
790 	}
791 
792 	ret = raid_bdev_readv_blocks_ext(base_info, base_ch, raid_io->iovs, raid_io->iovcnt,
793 					 base_offset_blocks, raid_io->num_blocks,
794 					 raid5f_chunk_read_complete, raid_io, &io_opts);
795 	if (spdk_unlikely(ret == -ENOMEM)) {
796 		raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc),
797 					base_ch, _raid5f_submit_rw_request);
798 		return 0;
799 	}
800 
801 	return ret;
802 }
803 
804 static void
805 raid5f_submit_rw_request(struct raid_bdev_io *raid_io)
806 {
807 	struct raid_bdev *raid_bdev = raid_io->raid_bdev;
808 	struct raid5f_info *r5f_info = raid_bdev->module_private;
809 	uint64_t stripe_index = raid_io->offset_blocks / r5f_info->stripe_blocks;
810 	uint64_t stripe_offset = raid_io->offset_blocks % r5f_info->stripe_blocks;
811 	int ret;
812 
813 	switch (raid_io->type) {
814 	case SPDK_BDEV_IO_TYPE_READ:
815 		assert(raid_io->num_blocks <= raid_bdev->strip_size);
816 		ret = raid5f_submit_read_request(raid_io, stripe_index, stripe_offset);
817 		break;
818 	case SPDK_BDEV_IO_TYPE_WRITE:
819 		assert(stripe_offset == 0);
820 		assert(raid_io->num_blocks == r5f_info->stripe_blocks);
821 		ret = raid5f_submit_write_request(raid_io, stripe_index);
822 		break;
823 	default:
824 		ret = -EINVAL;
825 		break;
826 	}
827 
828 	if (spdk_unlikely(ret)) {
829 		raid_bdev_io_complete(raid_io, ret == -ENOMEM ? SPDK_BDEV_IO_STATUS_NOMEM :
830 				      SPDK_BDEV_IO_STATUS_FAILED);
831 	}
832 }
833 
834 static void
835 raid5f_stripe_request_free(struct stripe_request *stripe_req)
836 {
837 	struct chunk *chunk;
838 
839 	FOR_EACH_CHUNK(stripe_req, chunk) {
840 		free(chunk->iovs);
841 	}
842 
843 	if (stripe_req->type == STRIPE_REQ_WRITE) {
844 		spdk_dma_free(stripe_req->write.parity_buf);
845 		spdk_dma_free(stripe_req->write.parity_md_buf);
846 	} else if (stripe_req->type == STRIPE_REQ_RECONSTRUCT) {
847 		struct raid5f_info *r5f_info = raid5f_ch_to_r5f_info(stripe_req->r5ch);
848 		struct raid_bdev *raid_bdev = r5f_info->raid_bdev;
849 		uint8_t i;
850 
851 		if (stripe_req->reconstruct.chunk_buffers) {
852 			for (i = 0; i < raid5f_stripe_data_chunks_num(raid_bdev); i++) {
853 				spdk_dma_free(stripe_req->reconstruct.chunk_buffers[i]);
854 			}
855 			free(stripe_req->reconstruct.chunk_buffers);
856 		}
857 
858 		if (stripe_req->reconstruct.chunk_md_buffers) {
859 			for (i = 0; i < raid5f_stripe_data_chunks_num(raid_bdev); i++) {
860 				spdk_dma_free(stripe_req->reconstruct.chunk_md_buffers[i]);
861 			}
862 			free(stripe_req->reconstruct.chunk_md_buffers);
863 		}
864 	} else {
865 		assert(false);
866 	}
867 
868 	free(stripe_req->chunk_xor_buffers);
869 	free(stripe_req->chunk_xor_md_buffers);
870 	free(stripe_req->chunk_iov_iters);
871 
872 	free(stripe_req);
873 }
874 
875 static struct stripe_request *
876 raid5f_stripe_request_alloc(struct raid5f_io_channel *r5ch, enum stripe_request_type type)
877 {
878 	struct raid5f_info *r5f_info = raid5f_ch_to_r5f_info(r5ch);
879 	struct raid_bdev *raid_bdev = r5f_info->raid_bdev;
880 	uint32_t raid_io_md_size = raid_bdev->bdev.md_interleave ? 0 : raid_bdev->bdev.md_len;
881 	struct stripe_request *stripe_req;
882 	struct chunk *chunk;
883 	size_t chunk_len;
884 
885 	stripe_req = calloc(1, sizeof(*stripe_req) + sizeof(*chunk) * raid_bdev->num_base_bdevs);
886 	if (!stripe_req) {
887 		return NULL;
888 	}
889 
890 	stripe_req->r5ch = r5ch;
891 	stripe_req->type = type;
892 
893 	FOR_EACH_CHUNK(stripe_req, chunk) {
894 		chunk->index = chunk - stripe_req->chunks;
895 		chunk->iovcnt_max = 4;
896 		chunk->iovs = calloc(chunk->iovcnt_max, sizeof(chunk->iovs[0]));
897 		if (!chunk->iovs) {
898 			goto err;
899 		}
900 	}
901 
902 	chunk_len = raid_bdev->strip_size * raid_bdev->bdev.blocklen;
903 
904 	if (type == STRIPE_REQ_WRITE) {
905 		stripe_req->write.parity_buf = spdk_dma_malloc(chunk_len, r5f_info->buf_alignment, NULL);
906 		if (!stripe_req->write.parity_buf) {
907 			goto err;
908 		}
909 
910 		if (raid_io_md_size != 0) {
911 			stripe_req->write.parity_md_buf = spdk_dma_malloc(raid_bdev->strip_size * raid_io_md_size,
912 							  r5f_info->buf_alignment, NULL);
913 			if (!stripe_req->write.parity_md_buf) {
914 				goto err;
915 			}
916 		}
917 	} else if (type == STRIPE_REQ_RECONSTRUCT) {
918 		uint8_t n = raid5f_stripe_data_chunks_num(raid_bdev);
919 		void *buf;
920 		uint8_t i;
921 
922 		stripe_req->reconstruct.chunk_buffers = calloc(n, sizeof(void *));
923 		if (!stripe_req->reconstruct.chunk_buffers) {
924 			goto err;
925 		}
926 
927 		for (i = 0; i < n; i++) {
928 			buf = spdk_dma_malloc(chunk_len, r5f_info->buf_alignment, NULL);
929 			if (!buf) {
930 				goto err;
931 			}
932 			stripe_req->reconstruct.chunk_buffers[i] = buf;
933 		}
934 
935 		if (raid_io_md_size != 0) {
936 			stripe_req->reconstruct.chunk_md_buffers = calloc(n, sizeof(void *));
937 			if (!stripe_req->reconstruct.chunk_md_buffers) {
938 				goto err;
939 			}
940 
941 			for (i = 0; i < n; i++) {
942 				buf = spdk_dma_malloc(raid_bdev->strip_size * raid_io_md_size, r5f_info->buf_alignment, NULL);
943 				if (!buf) {
944 					goto err;
945 				}
946 				stripe_req->reconstruct.chunk_md_buffers[i] = buf;
947 			}
948 		}
949 	} else {
950 		assert(false);
951 		return NULL;
952 	}
953 
954 	stripe_req->chunk_iov_iters = malloc(SPDK_IOVITER_SIZE(raid_bdev->num_base_bdevs));
955 	if (!stripe_req->chunk_iov_iters) {
956 		goto err;
957 	}
958 
959 	stripe_req->chunk_xor_buffers = calloc(raid5f_stripe_data_chunks_num(raid_bdev),
960 					       sizeof(stripe_req->chunk_xor_buffers[0]));
961 	if (!stripe_req->chunk_xor_buffers) {
962 		goto err;
963 	}
964 
965 	stripe_req->chunk_xor_md_buffers = calloc(raid5f_stripe_data_chunks_num(raid_bdev),
966 					   sizeof(stripe_req->chunk_xor_md_buffers[0]));
967 	if (!stripe_req->chunk_xor_md_buffers) {
968 		goto err;
969 	}
970 
971 	return stripe_req;
972 err:
973 	raid5f_stripe_request_free(stripe_req);
974 	return NULL;
975 }
976 
977 static void
978 raid5f_ioch_destroy(void *io_device, void *ctx_buf)
979 {
980 	struct raid5f_io_channel *r5ch = ctx_buf;
981 	struct stripe_request *stripe_req;
982 
983 	assert(TAILQ_EMPTY(&r5ch->xor_retry_queue));
984 
985 	while ((stripe_req = TAILQ_FIRST(&r5ch->free_stripe_requests.write))) {
986 		TAILQ_REMOVE(&r5ch->free_stripe_requests.write, stripe_req, link);
987 		raid5f_stripe_request_free(stripe_req);
988 	}
989 
990 	while ((stripe_req = TAILQ_FIRST(&r5ch->free_stripe_requests.reconstruct))) {
991 		TAILQ_REMOVE(&r5ch->free_stripe_requests.reconstruct, stripe_req, link);
992 		raid5f_stripe_request_free(stripe_req);
993 	}
994 
995 	if (r5ch->accel_ch) {
996 		spdk_put_io_channel(r5ch->accel_ch);
997 	}
998 
999 	free(r5ch->chunk_xor_buffers);
1000 	free(r5ch->chunk_xor_iovs);
1001 	free(r5ch->chunk_xor_iovcnt);
1002 }
1003 
1004 static int
1005 raid5f_ioch_create(void *io_device, void *ctx_buf)
1006 {
1007 	struct raid5f_io_channel *r5ch = ctx_buf;
1008 	struct raid5f_info *r5f_info = io_device;
1009 	struct raid_bdev *raid_bdev = r5f_info->raid_bdev;
1010 	struct stripe_request *stripe_req;
1011 	int i;
1012 
1013 	TAILQ_INIT(&r5ch->free_stripe_requests.write);
1014 	TAILQ_INIT(&r5ch->free_stripe_requests.reconstruct);
1015 	TAILQ_INIT(&r5ch->xor_retry_queue);
1016 
1017 	for (i = 0; i < RAID5F_MAX_STRIPES; i++) {
1018 		stripe_req = raid5f_stripe_request_alloc(r5ch, STRIPE_REQ_WRITE);
1019 		if (!stripe_req) {
1020 			goto err;
1021 		}
1022 
1023 		TAILQ_INSERT_HEAD(&r5ch->free_stripe_requests.write, stripe_req, link);
1024 	}
1025 
1026 	for (i = 0; i < RAID5F_MAX_STRIPES; i++) {
1027 		stripe_req = raid5f_stripe_request_alloc(r5ch, STRIPE_REQ_RECONSTRUCT);
1028 		if (!stripe_req) {
1029 			goto err;
1030 		}
1031 
1032 		TAILQ_INSERT_HEAD(&r5ch->free_stripe_requests.reconstruct, stripe_req, link);
1033 	}
1034 
1035 	r5ch->accel_ch = spdk_accel_get_io_channel();
1036 	if (!r5ch->accel_ch) {
1037 		SPDK_ERRLOG("Failed to get accel framework's IO channel\n");
1038 		goto err;
1039 	}
1040 
1041 	r5ch->chunk_xor_buffers = calloc(raid_bdev->num_base_bdevs, sizeof(*r5ch->chunk_xor_buffers));
1042 	if (!r5ch->chunk_xor_buffers) {
1043 		goto err;
1044 	}
1045 
1046 	r5ch->chunk_xor_iovs = calloc(raid_bdev->num_base_bdevs, sizeof(*r5ch->chunk_xor_iovs));
1047 	if (!r5ch->chunk_xor_iovs) {
1048 		goto err;
1049 	}
1050 
1051 	r5ch->chunk_xor_iovcnt = calloc(raid_bdev->num_base_bdevs, sizeof(*r5ch->chunk_xor_iovcnt));
1052 	if (!r5ch->chunk_xor_iovcnt) {
1053 		goto err;
1054 	}
1055 
1056 	return 0;
1057 err:
1058 	SPDK_ERRLOG("Failed to initialize io channel\n");
1059 	raid5f_ioch_destroy(r5f_info, r5ch);
1060 	return -ENOMEM;
1061 }
1062 
1063 static int
1064 raid5f_start(struct raid_bdev *raid_bdev)
1065 {
1066 	uint64_t min_blockcnt = UINT64_MAX;
1067 	uint64_t base_bdev_data_size;
1068 	struct raid_base_bdev_info *base_info;
1069 	struct spdk_bdev *base_bdev;
1070 	struct raid5f_info *r5f_info;
1071 	size_t alignment = 0;
1072 
1073 	r5f_info = calloc(1, sizeof(*r5f_info));
1074 	if (!r5f_info) {
1075 		SPDK_ERRLOG("Failed to allocate r5f_info\n");
1076 		return -ENOMEM;
1077 	}
1078 	r5f_info->raid_bdev = raid_bdev;
1079 
1080 	RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
1081 		min_blockcnt = spdk_min(min_blockcnt, base_info->data_size);
1082 		if (base_info->desc) {
1083 			base_bdev = spdk_bdev_desc_get_bdev(base_info->desc);
1084 			alignment = spdk_max(alignment, spdk_bdev_get_buf_align(base_bdev));
1085 		}
1086 	}
1087 
1088 	base_bdev_data_size = (min_blockcnt / raid_bdev->strip_size) * raid_bdev->strip_size;
1089 
1090 	RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
1091 		base_info->data_size = base_bdev_data_size;
1092 	}
1093 
1094 	r5f_info->total_stripes = min_blockcnt / raid_bdev->strip_size;
1095 	r5f_info->stripe_blocks = raid_bdev->strip_size * raid5f_stripe_data_chunks_num(raid_bdev);
1096 	r5f_info->buf_alignment = alignment;
1097 	if (!raid_bdev->bdev.md_interleave) {
1098 		r5f_info->blocklen_shift = spdk_u32log2(raid_bdev->bdev.blocklen);
1099 	}
1100 
1101 	raid_bdev->bdev.blockcnt = r5f_info->stripe_blocks * r5f_info->total_stripes;
1102 	raid_bdev->bdev.optimal_io_boundary = raid_bdev->strip_size;
1103 	raid_bdev->bdev.split_on_optimal_io_boundary = true;
1104 	raid_bdev->bdev.write_unit_size = r5f_info->stripe_blocks;
1105 	raid_bdev->bdev.split_on_write_unit = true;
1106 
1107 	raid_bdev->module_private = r5f_info;
1108 
1109 	spdk_io_device_register(r5f_info, raid5f_ioch_create, raid5f_ioch_destroy,
1110 				sizeof(struct raid5f_io_channel), NULL);
1111 
1112 	return 0;
1113 }
1114 
1115 static void
1116 raid5f_io_device_unregister_done(void *io_device)
1117 {
1118 	struct raid5f_info *r5f_info = io_device;
1119 
1120 	raid_bdev_module_stop_done(r5f_info->raid_bdev);
1121 
1122 	free(r5f_info);
1123 }
1124 
1125 static bool
1126 raid5f_stop(struct raid_bdev *raid_bdev)
1127 {
1128 	struct raid5f_info *r5f_info = raid_bdev->module_private;
1129 
1130 	spdk_io_device_unregister(r5f_info, raid5f_io_device_unregister_done);
1131 
1132 	return false;
1133 }
1134 
1135 static struct spdk_io_channel *
1136 raid5f_get_io_channel(struct raid_bdev *raid_bdev)
1137 {
1138 	struct raid5f_info *r5f_info = raid_bdev->module_private;
1139 
1140 	return spdk_get_io_channel(r5f_info);
1141 }
1142 
1143 static void
1144 raid5f_process_write_completed(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
1145 {
1146 	struct raid_bdev_process_request *process_req = cb_arg;
1147 
1148 	spdk_bdev_free_io(bdev_io);
1149 
1150 	raid_bdev_process_request_complete(process_req, success ? 0 : -EIO);
1151 }
1152 
1153 static void raid5f_process_submit_write(struct raid_bdev_process_request *process_req);
1154 
1155 static void
1156 _raid5f_process_submit_write(void *ctx)
1157 {
1158 	struct raid_bdev_process_request *process_req = ctx;
1159 
1160 	raid5f_process_submit_write(process_req);
1161 }
1162 
1163 static void
1164 raid5f_process_submit_write(struct raid_bdev_process_request *process_req)
1165 {
1166 	struct raid_bdev_io *raid_io = &process_req->raid_io;
1167 	struct raid_bdev *raid_bdev = raid_io->raid_bdev;
1168 	struct raid5f_info *r5f_info = raid_bdev->module_private;
1169 	uint64_t stripe_index = process_req->offset_blocks / r5f_info->stripe_blocks;
1170 	struct spdk_bdev_ext_io_opts io_opts;
1171 	int ret;
1172 
1173 	raid5f_init_ext_io_opts(&io_opts, raid_io);
1174 	ret = raid_bdev_writev_blocks_ext(process_req->target, process_req->target_ch,
1175 					  raid_io->iovs, raid_io->iovcnt,
1176 					  stripe_index << raid_bdev->strip_size_shift, raid_bdev->strip_size,
1177 					  raid5f_process_write_completed, process_req, &io_opts);
1178 	if (spdk_unlikely(ret != 0)) {
1179 		if (ret == -ENOMEM) {
1180 			raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(process_req->target->desc),
1181 						process_req->target_ch, _raid5f_process_submit_write);
1182 		} else {
1183 			raid_bdev_process_request_complete(process_req, ret);
1184 		}
1185 	}
1186 }
1187 
1188 static void
1189 raid5f_process_stripe_request_reconstruct_xor_done(struct stripe_request *stripe_req, int status)
1190 {
1191 	struct raid_bdev_io *raid_io = stripe_req->raid_io;
1192 	struct raid_bdev_process_request *process_req = SPDK_CONTAINEROF(raid_io,
1193 			struct raid_bdev_process_request, raid_io);
1194 
1195 	raid5f_stripe_request_release(stripe_req);
1196 
1197 	if (status != 0) {
1198 		raid_bdev_process_request_complete(process_req, status);
1199 		return;
1200 	}
1201 
1202 	raid5f_process_submit_write(process_req);
1203 }
1204 
1205 static int
1206 raid5f_submit_process_request(struct raid_bdev_process_request *process_req,
1207 			      struct raid_bdev_io_channel *raid_ch)
1208 {
1209 	struct spdk_io_channel *ch = spdk_io_channel_from_ctx(raid_ch);
1210 	struct raid_bdev *raid_bdev = spdk_io_channel_get_io_device(ch);
1211 	struct raid5f_info *r5f_info = raid_bdev->module_private;
1212 	struct raid_bdev_io *raid_io = &process_req->raid_io;
1213 	uint8_t chunk_idx = raid_bdev_base_bdev_slot(process_req->target);
1214 	uint64_t stripe_index = process_req->offset_blocks / r5f_info->stripe_blocks;
1215 	int ret;
1216 
1217 	assert((process_req->offset_blocks % r5f_info->stripe_blocks) == 0);
1218 
1219 	if (process_req->num_blocks < r5f_info->stripe_blocks) {
1220 		return 0;
1221 	}
1222 
1223 	raid_bdev_io_init(raid_io, raid_ch, SPDK_BDEV_IO_TYPE_READ,
1224 			  process_req->offset_blocks, raid_bdev->strip_size,
1225 			  &process_req->iov, 1, process_req->md_buf, NULL, NULL);
1226 
1227 	ret = raid5f_submit_reconstruct_read(raid_io, stripe_index, chunk_idx, 0,
1228 					     raid5f_process_stripe_request_reconstruct_xor_done);
1229 	if (spdk_likely(ret == 0)) {
1230 		return r5f_info->stripe_blocks;
1231 	} else if (ret < 0) {
1232 		return ret;
1233 	} else {
1234 		return -EINVAL;
1235 	}
1236 }
1237 
1238 static struct raid_bdev_module g_raid5f_module = {
1239 	.level = RAID5F,
1240 	.base_bdevs_min = 3,
1241 	.base_bdevs_constraint = {CONSTRAINT_MAX_BASE_BDEVS_REMOVED, 1},
1242 	.start = raid5f_start,
1243 	.stop = raid5f_stop,
1244 	.submit_rw_request = raid5f_submit_rw_request,
1245 	.get_io_channel = raid5f_get_io_channel,
1246 	.submit_process_request = raid5f_submit_process_request,
1247 };
1248 RAID_MODULE_REGISTER(&g_raid5f_module)
1249 
1250 SPDK_LOG_REGISTER_COMPONENT(bdev_raid5f)
1251