xref: /spdk/module/bdev/raid/raid5f.c (revision 588dfe314bb83d86effdf67ec42837b11c2620bf)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (C) 2022 Intel Corporation.
3  *   All rights reserved.
4  */
5 
6 #include "bdev_raid.h"
7 
8 #include "spdk/env.h"
9 #include "spdk/thread.h"
10 #include "spdk/string.h"
11 #include "spdk/util.h"
12 #include "spdk/likely.h"
13 #include "spdk/log.h"
14 #include "spdk/xor.h"
15 
16 /* Maximum concurrent full stripe writes per io channel */
17 #define RAID5F_MAX_STRIPES 32
18 
19 struct chunk {
20 	/* Corresponds to base_bdev index */
21 	uint8_t index;
22 
23 	/* Array of iovecs */
24 	struct iovec *iovs;
25 
26 	/* Number of used iovecs */
27 	int iovcnt;
28 
29 	/* Total number of available iovecs in the array */
30 	int iovcnt_max;
31 };
32 
33 struct stripe_request {
34 	struct raid5f_io_channel *r5ch;
35 
36 	/* The associated raid_bdev_io */
37 	struct raid_bdev_io *raid_io;
38 
39 	/* The stripe's index in the raid array. */
40 	uint64_t stripe_index;
41 
42 	/* The stripe's parity chunk */
43 	struct chunk *parity_chunk;
44 
45 	/* Buffer for stripe parity */
46 	void *parity_buf;
47 
48 	TAILQ_ENTRY(stripe_request) link;
49 
50 	/* Array of chunks corresponding to base_bdevs */
51 	struct chunk chunks[0];
52 };
53 
54 struct raid5f_info {
55 	/* The parent raid bdev */
56 	struct raid_bdev *raid_bdev;
57 
58 	/* Number of data blocks in a stripe (without parity) */
59 	uint64_t stripe_blocks;
60 
61 	/* Number of stripes on this array */
62 	uint64_t total_stripes;
63 
64 	/* Alignment for buffer allocation */
65 	size_t buf_alignment;
66 };
67 
68 struct raid5f_io_channel {
69 	/* All available stripe requests on this channel */
70 	TAILQ_HEAD(, stripe_request) free_stripe_requests;
71 
72 	/* Array of iovec iterators for each data chunk */
73 	struct iov_iter {
74 		struct iovec *iovs;
75 		int iovcnt;
76 		int index;
77 		size_t offset;
78 	} *chunk_iov_iters;
79 
80 	/* Array of source buffer pointers for parity calculation */
81 	void **chunk_xor_buffers;
82 
83 	/* Bounce buffers for parity calculation in case of unaligned source buffers */
84 	struct iovec *chunk_xor_bounce_buffers;
85 };
86 
87 #define __CHUNK_IN_RANGE(req, c) \
88 	c < req->chunks + raid5f_ch_to_r5f_info(req->r5ch)->raid_bdev->num_base_bdevs
89 
90 #define FOR_EACH_CHUNK_FROM(req, c, from) \
91 	for (c = from; __CHUNK_IN_RANGE(req, c); c++)
92 
93 #define FOR_EACH_CHUNK(req, c) \
94 	FOR_EACH_CHUNK_FROM(req, c, req->chunks)
95 
96 #define __NEXT_DATA_CHUNK(req, c) \
97 	c == req->parity_chunk ? c+1 : c
98 
99 #define FOR_EACH_DATA_CHUNK(req, c) \
100 	for (c = __NEXT_DATA_CHUNK(req, req->chunks); __CHUNK_IN_RANGE(req, c); \
101 	     c = __NEXT_DATA_CHUNK(req, c+1))
102 
103 static inline struct raid5f_info *
104 raid5f_ch_to_r5f_info(struct raid5f_io_channel *r5ch)
105 {
106 	return spdk_io_channel_get_io_device(spdk_io_channel_from_ctx(r5ch));
107 }
108 
109 static inline struct stripe_request *
110 raid5f_chunk_stripe_req(struct chunk *chunk)
111 {
112 	return SPDK_CONTAINEROF((chunk - chunk->index), struct stripe_request, chunks);
113 }
114 
115 static inline uint8_t
116 raid5f_stripe_data_chunks_num(const struct raid_bdev *raid_bdev)
117 {
118 	return raid_bdev->num_base_bdevs - raid_bdev->module->base_bdevs_max_degraded;
119 }
120 
121 static inline uint8_t
122 raid5f_stripe_parity_chunk_index(const struct raid_bdev *raid_bdev, uint64_t stripe_index)
123 {
124 	return raid5f_stripe_data_chunks_num(raid_bdev) - stripe_index % raid_bdev->num_base_bdevs;
125 }
126 
127 static inline void
128 raid5f_stripe_request_release(struct stripe_request *stripe_req)
129 {
130 	TAILQ_INSERT_HEAD(&stripe_req->r5ch->free_stripe_requests, stripe_req, link);
131 }
132 
133 static int
134 raid5f_xor_stripe(struct stripe_request *stripe_req)
135 {
136 	struct raid_bdev_io *raid_io = stripe_req->raid_io;
137 	struct raid5f_io_channel *r5ch = stripe_req->r5ch;
138 	struct raid_bdev *raid_bdev = raid_io->raid_bdev;
139 	size_t remaining = raid_bdev->strip_size << raid_bdev->blocklen_shift;
140 	uint8_t n_src = raid5f_stripe_data_chunks_num(raid_bdev);
141 	void *dest = stripe_req->parity_buf;
142 	size_t alignment_mask = spdk_xor_get_optimal_alignment() - 1;
143 	struct chunk *chunk;
144 	int ret;
145 	uint8_t c;
146 
147 	c = 0;
148 	FOR_EACH_DATA_CHUNK(stripe_req, chunk) {
149 		struct iov_iter *iov_iter = &r5ch->chunk_iov_iters[c];
150 		bool aligned = true;
151 		int i;
152 
153 		for (i = 0; i < chunk->iovcnt; i++) {
154 			if (((uintptr_t)chunk->iovs[i].iov_base & alignment_mask) ||
155 			    (chunk->iovs[i].iov_len & alignment_mask)) {
156 				aligned = false;
157 				break;
158 			}
159 		}
160 
161 		if (aligned) {
162 			iov_iter->iovs = chunk->iovs;
163 			iov_iter->iovcnt = chunk->iovcnt;
164 		} else {
165 			iov_iter->iovs = &r5ch->chunk_xor_bounce_buffers[c];
166 			iov_iter->iovcnt = 1;
167 			spdk_iovcpy(chunk->iovs, chunk->iovcnt, iov_iter->iovs, iov_iter->iovcnt);
168 		}
169 
170 		iov_iter->index = 0;
171 		iov_iter->offset = 0;
172 
173 		c++;
174 	}
175 
176 	while (remaining > 0) {
177 		size_t len = remaining;
178 		uint8_t i;
179 
180 		for (i = 0; i < n_src; i++) {
181 			struct iov_iter *iov_iter = &r5ch->chunk_iov_iters[i];
182 			struct iovec *iov = &iov_iter->iovs[iov_iter->index];
183 
184 			len = spdk_min(len, iov->iov_len - iov_iter->offset);
185 			r5ch->chunk_xor_buffers[i] = iov->iov_base + iov_iter->offset;
186 		}
187 
188 		assert(len > 0);
189 
190 		ret = spdk_xor_gen(dest, r5ch->chunk_xor_buffers, n_src, len);
191 		if (spdk_unlikely(ret)) {
192 			SPDK_ERRLOG("stripe xor failed\n");
193 			return ret;
194 		}
195 
196 		for (i = 0; i < n_src; i++) {
197 			struct iov_iter *iov_iter = &r5ch->chunk_iov_iters[i];
198 			struct iovec *iov = &iov_iter->iovs[iov_iter->index];
199 
200 			iov_iter->offset += len;
201 			if (iov_iter->offset == iov->iov_len) {
202 				iov_iter->offset = 0;
203 				iov_iter->index++;
204 			}
205 		}
206 		dest += len;
207 
208 		remaining -= len;
209 	}
210 
211 	return 0;
212 }
213 
214 static void
215 raid5f_chunk_write_complete(struct chunk *chunk, enum spdk_bdev_io_status status)
216 {
217 	struct stripe_request *stripe_req = raid5f_chunk_stripe_req(chunk);
218 
219 	if (raid_bdev_io_complete_part(stripe_req->raid_io, 1, status)) {
220 		raid5f_stripe_request_release(stripe_req);
221 	}
222 }
223 
224 static void
225 raid5f_chunk_write_complete_bdev_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
226 {
227 	struct chunk *chunk = cb_arg;
228 
229 	spdk_bdev_free_io(bdev_io);
230 
231 	raid5f_chunk_write_complete(chunk, success ? SPDK_BDEV_IO_STATUS_SUCCESS :
232 				    SPDK_BDEV_IO_STATUS_FAILED);
233 }
234 
235 static void raid5f_stripe_request_submit_chunks(struct stripe_request *stripe_req);
236 
237 static void
238 raid5f_chunk_write_retry(void *_raid_io)
239 {
240 	struct raid_bdev_io *raid_io = _raid_io;
241 	struct stripe_request *stripe_req = raid_io->module_private;
242 
243 	raid5f_stripe_request_submit_chunks(stripe_req);
244 }
245 
246 static int
247 raid5f_chunk_write(struct chunk *chunk)
248 {
249 	struct stripe_request *stripe_req = raid5f_chunk_stripe_req(chunk);
250 	struct raid_bdev_io *raid_io = stripe_req->raid_io;
251 	struct raid_bdev *raid_bdev = raid_io->raid_bdev;
252 	struct raid_base_bdev_info *base_info = &raid_bdev->base_bdev_info[chunk->index];
253 	struct spdk_io_channel *base_ch = raid_io->raid_ch->base_channel[chunk->index];
254 	uint64_t base_offset_blocks = (stripe_req->stripe_index << raid_bdev->strip_size_shift);
255 	int ret;
256 
257 	ret = spdk_bdev_writev_blocks(base_info->desc, base_ch, chunk->iovs, chunk->iovcnt,
258 				      base_offset_blocks, raid_bdev->strip_size,
259 				      raid5f_chunk_write_complete_bdev_io, chunk);
260 	if (spdk_unlikely(ret)) {
261 		if (ret == -ENOMEM) {
262 			raid_bdev_queue_io_wait(raid_io, base_info->bdev, base_ch,
263 						raid5f_chunk_write_retry);
264 		} else {
265 			/*
266 			 * Implicitly complete any I/Os not yet submitted as FAILED. If completing
267 			 * these means there are no more to complete for the stripe request, we can
268 			 * release the stripe request as well.
269 			 */
270 			uint64_t base_bdev_io_not_submitted = raid_bdev->num_base_bdevs -
271 							      raid_io->base_bdev_io_submitted;
272 
273 			if (raid_bdev_io_complete_part(stripe_req->raid_io, base_bdev_io_not_submitted,
274 						       SPDK_BDEV_IO_STATUS_FAILED)) {
275 				raid5f_stripe_request_release(stripe_req);
276 			}
277 		}
278 	}
279 
280 	return ret;
281 }
282 
283 static int
284 raid5f_stripe_request_map_iovecs(struct stripe_request *stripe_req,
285 				 const struct iovec *raid_io_iovs,
286 				 int raid_io_iovcnt)
287 {
288 	struct raid_bdev *raid_bdev = stripe_req->raid_io->raid_bdev;
289 	struct chunk *chunk;
290 	int raid_io_iov_idx = 0;
291 	size_t raid_io_offset = 0;
292 	size_t raid_io_iov_offset = 0;
293 	int i;
294 
295 	FOR_EACH_DATA_CHUNK(stripe_req, chunk) {
296 		int chunk_iovcnt = 0;
297 		uint64_t len = raid_bdev->strip_size << raid_bdev->blocklen_shift;
298 		size_t off = raid_io_iov_offset;
299 
300 		for (i = raid_io_iov_idx; i < raid_io_iovcnt; i++) {
301 			chunk_iovcnt++;
302 			off += raid_io_iovs[i].iov_len;
303 			if (off >= raid_io_offset + len) {
304 				break;
305 			}
306 		}
307 
308 		assert(raid_io_iov_idx + chunk_iovcnt <= raid_io_iovcnt);
309 
310 		if (chunk_iovcnt > chunk->iovcnt_max) {
311 			struct iovec *iovs = chunk->iovs;
312 
313 			iovs = realloc(iovs, chunk_iovcnt * sizeof(*iovs));
314 			if (!iovs) {
315 				return -ENOMEM;
316 			}
317 			chunk->iovs = iovs;
318 			chunk->iovcnt_max = chunk_iovcnt;
319 		}
320 		chunk->iovcnt = chunk_iovcnt;
321 
322 		for (i = 0; i < chunk_iovcnt; i++) {
323 			struct iovec *chunk_iov = &chunk->iovs[i];
324 			const struct iovec *raid_io_iov = &raid_io_iovs[raid_io_iov_idx];
325 			size_t chunk_iov_offset = raid_io_offset - raid_io_iov_offset;
326 
327 			chunk_iov->iov_base = raid_io_iov->iov_base + chunk_iov_offset;
328 			chunk_iov->iov_len = spdk_min(len, raid_io_iov->iov_len - chunk_iov_offset);
329 			raid_io_offset += chunk_iov->iov_len;
330 			len -= chunk_iov->iov_len;
331 
332 			if (raid_io_offset >= raid_io_iov_offset + raid_io_iov->iov_len) {
333 				raid_io_iov_idx++;
334 				raid_io_iov_offset += raid_io_iov->iov_len;
335 			}
336 		}
337 
338 		if (spdk_unlikely(len > 0)) {
339 			return -EINVAL;
340 		}
341 	}
342 
343 	stripe_req->parity_chunk->iovs[0].iov_base = stripe_req->parity_buf;
344 	stripe_req->parity_chunk->iovs[0].iov_len = raid_bdev->strip_size <<
345 			raid_bdev->blocklen_shift;
346 	stripe_req->parity_chunk->iovcnt = 1;
347 
348 	return 0;
349 }
350 
351 static void
352 raid5f_stripe_request_submit_chunks(struct stripe_request *stripe_req)
353 {
354 	struct raid_bdev_io *raid_io = stripe_req->raid_io;
355 	struct chunk *start = &stripe_req->chunks[raid_io->base_bdev_io_submitted];
356 	struct chunk *chunk;
357 
358 	FOR_EACH_CHUNK_FROM(stripe_req, chunk, start) {
359 		if (spdk_unlikely(raid5f_chunk_write(chunk) != 0)) {
360 			break;
361 		}
362 		raid_io->base_bdev_io_submitted++;
363 	}
364 }
365 
366 static void
367 raid5f_submit_stripe_request(struct stripe_request *stripe_req)
368 {
369 	if (spdk_unlikely(raid5f_xor_stripe(stripe_req) != 0)) {
370 		raid_bdev_io_complete(stripe_req->raid_io, SPDK_BDEV_IO_STATUS_FAILED);
371 		return;
372 	}
373 
374 	raid5f_stripe_request_submit_chunks(stripe_req);
375 }
376 
377 static int
378 raid5f_submit_write_request(struct raid_bdev_io *raid_io, uint64_t stripe_index)
379 {
380 	struct raid_bdev *raid_bdev = raid_io->raid_bdev;
381 	struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io);
382 	struct raid5f_io_channel *r5ch = spdk_io_channel_get_ctx(raid_io->raid_ch->module_channel);
383 	struct stripe_request *stripe_req;
384 	int ret;
385 
386 	stripe_req = TAILQ_FIRST(&r5ch->free_stripe_requests);
387 	if (!stripe_req) {
388 		return -ENOMEM;
389 	}
390 
391 	stripe_req->stripe_index = stripe_index;
392 	stripe_req->parity_chunk = stripe_req->chunks + raid5f_stripe_parity_chunk_index(raid_bdev,
393 				   stripe_req->stripe_index);
394 	stripe_req->raid_io = raid_io;
395 
396 	ret = raid5f_stripe_request_map_iovecs(stripe_req, bdev_io->u.bdev.iovs,
397 					       bdev_io->u.bdev.iovcnt);
398 	if (spdk_unlikely(ret)) {
399 		return ret;
400 	}
401 
402 	TAILQ_REMOVE(&r5ch->free_stripe_requests, stripe_req, link);
403 
404 	raid_io->module_private = stripe_req;
405 	raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs;
406 
407 	raid5f_submit_stripe_request(stripe_req);
408 
409 	return 0;
410 }
411 
412 static void
413 raid5f_chunk_read_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
414 {
415 	struct raid_bdev_io *raid_io = cb_arg;
416 
417 	spdk_bdev_free_io(bdev_io);
418 
419 	raid_bdev_io_complete(raid_io, success ? SPDK_BDEV_IO_STATUS_SUCCESS :
420 			      SPDK_BDEV_IO_STATUS_FAILED);
421 }
422 
423 static void raid5f_submit_rw_request(struct raid_bdev_io *raid_io);
424 
425 static void
426 _raid5f_submit_rw_request(void *_raid_io)
427 {
428 	struct raid_bdev_io *raid_io = _raid_io;
429 
430 	raid5f_submit_rw_request(raid_io);
431 }
432 
433 static int
434 raid5f_submit_read_request(struct raid_bdev_io *raid_io, uint64_t stripe_index,
435 			   uint64_t stripe_offset)
436 {
437 	struct raid_bdev *raid_bdev = raid_io->raid_bdev;
438 	uint8_t chunk_data_idx = stripe_offset >> raid_bdev->strip_size_shift;
439 	uint8_t p_idx = raid5f_stripe_parity_chunk_index(raid_bdev, stripe_index);
440 	uint8_t chunk_idx = chunk_data_idx < p_idx ? chunk_data_idx : chunk_data_idx + 1;
441 	struct raid_base_bdev_info *base_info = &raid_bdev->base_bdev_info[chunk_idx];
442 	struct spdk_io_channel *base_ch = raid_io->raid_ch->base_channel[chunk_idx];
443 	uint64_t chunk_offset = stripe_offset - (chunk_data_idx << raid_bdev->strip_size_shift);
444 	uint64_t base_offset_blocks = (stripe_index << raid_bdev->strip_size_shift) + chunk_offset;
445 	struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io);
446 	int ret;
447 
448 	ret = spdk_bdev_readv_blocks(base_info->desc, base_ch,
449 				     bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
450 				     base_offset_blocks, bdev_io->u.bdev.num_blocks,
451 				     raid5f_chunk_read_complete, raid_io);
452 	if (spdk_unlikely(ret == -ENOMEM)) {
453 		raid_bdev_queue_io_wait(raid_io, base_info->bdev, base_ch,
454 					_raid5f_submit_rw_request);
455 		return 0;
456 	}
457 
458 	return ret;
459 }
460 
461 static void
462 raid5f_submit_rw_request(struct raid_bdev_io *raid_io)
463 {
464 	struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io);
465 	struct raid_bdev *raid_bdev = raid_io->raid_bdev;
466 	struct raid5f_info *r5f_info = raid_bdev->module_private;
467 	uint64_t offset_blocks = bdev_io->u.bdev.offset_blocks;
468 	uint64_t stripe_index = offset_blocks / r5f_info->stripe_blocks;
469 	uint64_t stripe_offset = offset_blocks % r5f_info->stripe_blocks;
470 	int ret;
471 
472 	switch (bdev_io->type) {
473 	case SPDK_BDEV_IO_TYPE_READ:
474 		assert(bdev_io->u.bdev.num_blocks <= raid_bdev->strip_size);
475 		ret = raid5f_submit_read_request(raid_io, stripe_index, stripe_offset);
476 		break;
477 	case SPDK_BDEV_IO_TYPE_WRITE:
478 		assert(stripe_offset == 0);
479 		assert(bdev_io->u.bdev.num_blocks == r5f_info->stripe_blocks);
480 		ret = raid5f_submit_write_request(raid_io, stripe_index);
481 		break;
482 	default:
483 		ret = -EINVAL;
484 		break;
485 	}
486 
487 	if (spdk_unlikely(ret)) {
488 		raid_bdev_io_complete(raid_io, ret == -ENOMEM ? SPDK_BDEV_IO_STATUS_NOMEM :
489 				      SPDK_BDEV_IO_STATUS_FAILED);
490 	}
491 }
492 
493 static void
494 raid5f_stripe_request_free(struct stripe_request *stripe_req)
495 {
496 	struct chunk *chunk;
497 
498 	FOR_EACH_CHUNK(stripe_req, chunk) {
499 		free(chunk->iovs);
500 	}
501 
502 	spdk_dma_free(stripe_req->parity_buf);
503 
504 	free(stripe_req);
505 }
506 
507 static struct stripe_request *
508 raid5f_stripe_request_alloc(struct raid5f_io_channel *r5ch)
509 {
510 	struct raid5f_info *r5f_info = raid5f_ch_to_r5f_info(r5ch);
511 	struct raid_bdev *raid_bdev = r5f_info->raid_bdev;
512 	struct stripe_request *stripe_req;
513 	struct chunk *chunk;
514 
515 	stripe_req = calloc(1, sizeof(*stripe_req) +
516 			    sizeof(struct chunk) * raid_bdev->num_base_bdevs);
517 	if (!stripe_req) {
518 		return NULL;
519 	}
520 
521 	stripe_req->r5ch = r5ch;
522 
523 	FOR_EACH_CHUNK(stripe_req, chunk) {
524 		chunk->index = chunk - stripe_req->chunks;
525 		chunk->iovcnt_max = 4;
526 		chunk->iovs = calloc(chunk->iovcnt_max, sizeof(chunk->iovs[0]));
527 		if (!chunk->iovs) {
528 			goto err;
529 		}
530 	}
531 
532 	stripe_req->parity_buf = spdk_dma_malloc(raid_bdev->strip_size << raid_bdev->blocklen_shift,
533 				 r5f_info->buf_alignment, NULL);
534 	if (!stripe_req->parity_buf) {
535 		goto err;
536 	}
537 
538 	return stripe_req;
539 err:
540 	raid5f_stripe_request_free(stripe_req);
541 	return NULL;
542 }
543 
544 static void
545 raid5f_ioch_destroy(void *io_device, void *ctx_buf)
546 {
547 	struct raid5f_io_channel *r5ch = ctx_buf;
548 	struct raid5f_info *r5f_info = io_device;
549 	struct raid_bdev *raid_bdev = r5f_info->raid_bdev;
550 	struct stripe_request *stripe_req;
551 	int i;
552 
553 	while ((stripe_req = TAILQ_FIRST(&r5ch->free_stripe_requests))) {
554 		TAILQ_REMOVE(&r5ch->free_stripe_requests, stripe_req, link);
555 		raid5f_stripe_request_free(stripe_req);
556 	}
557 
558 	if (r5ch->chunk_xor_bounce_buffers) {
559 		for (i = 0; i < raid5f_stripe_data_chunks_num(raid_bdev); i++) {
560 			free(r5ch->chunk_xor_bounce_buffers[i].iov_base);
561 		}
562 		free(r5ch->chunk_xor_bounce_buffers);
563 	}
564 
565 	free(r5ch->chunk_xor_buffers);
566 	free(r5ch->chunk_iov_iters);
567 }
568 
569 static int
570 raid5f_ioch_create(void *io_device, void *ctx_buf)
571 {
572 	struct raid5f_io_channel *r5ch = ctx_buf;
573 	struct raid5f_info *r5f_info = io_device;
574 	struct raid_bdev *raid_bdev = r5f_info->raid_bdev;
575 	size_t chunk_len = raid_bdev->strip_size << raid_bdev->blocklen_shift;
576 	int status = 0;
577 	int i;
578 
579 	TAILQ_INIT(&r5ch->free_stripe_requests);
580 
581 	for (i = 0; i < RAID5F_MAX_STRIPES; i++) {
582 		struct stripe_request *stripe_req;
583 
584 		stripe_req = raid5f_stripe_request_alloc(r5ch);
585 		if (!stripe_req) {
586 			status = -ENOMEM;
587 			goto out;
588 		}
589 
590 		TAILQ_INSERT_HEAD(&r5ch->free_stripe_requests, stripe_req, link);
591 	}
592 
593 	r5ch->chunk_iov_iters = calloc(raid5f_stripe_data_chunks_num(raid_bdev),
594 				       sizeof(r5ch->chunk_iov_iters[0]));
595 	if (!r5ch->chunk_iov_iters) {
596 		status = -ENOMEM;
597 		goto out;
598 	}
599 
600 	r5ch->chunk_xor_buffers = calloc(raid5f_stripe_data_chunks_num(raid_bdev),
601 					 sizeof(r5ch->chunk_xor_buffers[0]));
602 	if (!r5ch->chunk_xor_buffers) {
603 		status = -ENOMEM;
604 		goto out;
605 	}
606 
607 	r5ch->chunk_xor_bounce_buffers = calloc(raid5f_stripe_data_chunks_num(raid_bdev),
608 						sizeof(r5ch->chunk_xor_bounce_buffers[0]));
609 	if (!r5ch->chunk_xor_bounce_buffers) {
610 		status = -ENOMEM;
611 		goto out;
612 	}
613 
614 	for (i = 0; i < raid5f_stripe_data_chunks_num(raid_bdev); i++) {
615 		status = posix_memalign(&r5ch->chunk_xor_bounce_buffers[i].iov_base,
616 					spdk_xor_get_optimal_alignment(), chunk_len);
617 		if (status) {
618 			goto out;
619 		}
620 		r5ch->chunk_xor_bounce_buffers[i].iov_len = chunk_len;
621 	}
622 out:
623 	if (status) {
624 		SPDK_ERRLOG("Failed to initialize io channel\n");
625 		raid5f_ioch_destroy(r5f_info, r5ch);
626 	}
627 	return status;
628 }
629 
630 static int
631 raid5f_start(struct raid_bdev *raid_bdev)
632 {
633 	uint64_t min_blockcnt = UINT64_MAX;
634 	struct raid_base_bdev_info *base_info;
635 	struct raid5f_info *r5f_info;
636 	size_t alignment;
637 
638 	r5f_info = calloc(1, sizeof(*r5f_info));
639 	if (!r5f_info) {
640 		SPDK_ERRLOG("Failed to allocate r5f_info\n");
641 		return -ENOMEM;
642 	}
643 	r5f_info->raid_bdev = raid_bdev;
644 
645 	alignment = spdk_xor_get_optimal_alignment();
646 	RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
647 		min_blockcnt = spdk_min(min_blockcnt, base_info->bdev->blockcnt);
648 		alignment = spdk_max(alignment, spdk_bdev_get_buf_align(base_info->bdev));
649 	}
650 
651 	r5f_info->total_stripes = min_blockcnt / raid_bdev->strip_size;
652 	r5f_info->stripe_blocks = raid_bdev->strip_size * raid5f_stripe_data_chunks_num(raid_bdev);
653 	r5f_info->buf_alignment = alignment;
654 
655 	raid_bdev->bdev.blockcnt = r5f_info->stripe_blocks * r5f_info->total_stripes;
656 	raid_bdev->bdev.optimal_io_boundary = raid_bdev->strip_size;
657 	raid_bdev->bdev.split_on_optimal_io_boundary = true;
658 	raid_bdev->bdev.write_unit_size = r5f_info->stripe_blocks;
659 	raid_bdev->bdev.split_on_write_unit = true;
660 
661 	raid_bdev->module_private = r5f_info;
662 
663 	spdk_io_device_register(r5f_info, raid5f_ioch_create, raid5f_ioch_destroy,
664 				sizeof(struct raid5f_io_channel), NULL);
665 
666 	return 0;
667 }
668 
669 static void
670 raid5f_io_device_unregister_done(void *io_device)
671 {
672 	struct raid5f_info *r5f_info = io_device;
673 
674 	raid_bdev_module_stop_done(r5f_info->raid_bdev);
675 
676 	free(r5f_info);
677 }
678 
679 static bool
680 raid5f_stop(struct raid_bdev *raid_bdev)
681 {
682 	struct raid5f_info *r5f_info = raid_bdev->module_private;
683 
684 	spdk_io_device_unregister(r5f_info, raid5f_io_device_unregister_done);
685 
686 	return false;
687 }
688 
689 static struct spdk_io_channel *
690 raid5f_get_io_channel(struct raid_bdev *raid_bdev)
691 {
692 	struct raid5f_info *r5f_info = raid_bdev->module_private;
693 
694 	return spdk_get_io_channel(r5f_info);
695 }
696 
697 static struct raid_bdev_module g_raid5f_module = {
698 	.level = RAID5F,
699 	.base_bdevs_min = 3,
700 	.base_bdevs_max_degraded = 1,
701 	.start = raid5f_start,
702 	.stop = raid5f_stop,
703 	.submit_rw_request = raid5f_submit_rw_request,
704 	.get_io_channel = raid5f_get_io_channel,
705 };
706 RAID_MODULE_REGISTER(&g_raid5f_module)
707 
708 SPDK_LOG_REGISTER_COMPONENT(bdev_raid5f)
709