xref: /spdk/module/bdev/raid/raid5f.c (revision a1dfa7ec92a6c49538482c8bb73f0b1ce040441f)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (C) 2022 Intel Corporation.
3  *   All rights reserved.
4  */
5 
6 #include "bdev_raid.h"
7 
8 #include "spdk/env.h"
9 #include "spdk/thread.h"
10 #include "spdk/string.h"
11 #include "spdk/util.h"
12 #include "spdk/likely.h"
13 #include "spdk/log.h"
14 #include "spdk/xor.h"
15 
16 /* Maximum concurrent full stripe writes per io channel */
17 #define RAID5F_MAX_STRIPES 32
18 
19 struct chunk {
20 	/* Corresponds to base_bdev index */
21 	uint8_t index;
22 
23 	/* Array of iovecs */
24 	struct iovec *iovs;
25 
26 	/* Number of used iovecs */
27 	int iovcnt;
28 
29 	/* Total number of available iovecs in the array */
30 	int iovcnt_max;
31 
32 	/* Pointer to buffer with I/O metadata */
33 	void *md_buf;
34 
35 	/* Shallow copy of IO request parameters */
36 	struct spdk_bdev_ext_io_opts ext_opts;
37 };
38 
39 struct stripe_request {
40 	struct raid5f_io_channel *r5ch;
41 
42 	/* The associated raid_bdev_io */
43 	struct raid_bdev_io *raid_io;
44 
45 	/* The stripe's index in the raid array. */
46 	uint64_t stripe_index;
47 
48 	/* The stripe's parity chunk */
49 	struct chunk *parity_chunk;
50 
51 	/* Buffer for stripe parity */
52 	void *parity_buf;
53 
54 	/* Buffer for stripe io metadata parity */
55 	void *parity_md_buf;
56 
57 	TAILQ_ENTRY(stripe_request) link;
58 
59 	/* Array of chunks corresponding to base_bdevs */
60 	struct chunk chunks[0];
61 };
62 
63 struct raid5f_info {
64 	/* The parent raid bdev */
65 	struct raid_bdev *raid_bdev;
66 
67 	/* Number of data blocks in a stripe (without parity) */
68 	uint64_t stripe_blocks;
69 
70 	/* Number of stripes on this array */
71 	uint64_t total_stripes;
72 
73 	/* Alignment for buffer allocation */
74 	size_t buf_alignment;
75 };
76 
77 struct raid5f_io_channel {
78 	/* All available stripe requests on this channel */
79 	TAILQ_HEAD(, stripe_request) free_stripe_requests;
80 
81 	/* Array of iovec iterators for each data chunk */
82 	struct iov_iter {
83 		struct iovec *iovs;
84 		int iovcnt;
85 		int index;
86 		size_t offset;
87 	} *chunk_iov_iters;
88 
89 	/* Array of source buffer pointers for parity calculation */
90 	void **chunk_xor_buffers;
91 
92 	/* Array of source buffer pointers for parity calculation of io metadata */
93 	void **chunk_xor_md_buffers;
94 
95 	/* Bounce buffers for parity calculation in case of unaligned source buffers */
96 	struct iovec *chunk_xor_bounce_buffers;
97 };
98 
99 #define __CHUNK_IN_RANGE(req, c) \
100 	c < req->chunks + raid5f_ch_to_r5f_info(req->r5ch)->raid_bdev->num_base_bdevs
101 
102 #define FOR_EACH_CHUNK_FROM(req, c, from) \
103 	for (c = from; __CHUNK_IN_RANGE(req, c); c++)
104 
105 #define FOR_EACH_CHUNK(req, c) \
106 	FOR_EACH_CHUNK_FROM(req, c, req->chunks)
107 
108 #define __NEXT_DATA_CHUNK(req, c) \
109 	c == req->parity_chunk ? c+1 : c
110 
111 #define FOR_EACH_DATA_CHUNK(req, c) \
112 	for (c = __NEXT_DATA_CHUNK(req, req->chunks); __CHUNK_IN_RANGE(req, c); \
113 	     c = __NEXT_DATA_CHUNK(req, c+1))
114 
115 static inline struct raid5f_info *
116 raid5f_ch_to_r5f_info(struct raid5f_io_channel *r5ch)
117 {
118 	return spdk_io_channel_get_io_device(spdk_io_channel_from_ctx(r5ch));
119 }
120 
121 static inline struct stripe_request *
122 raid5f_chunk_stripe_req(struct chunk *chunk)
123 {
124 	return SPDK_CONTAINEROF((chunk - chunk->index), struct stripe_request, chunks);
125 }
126 
127 static inline uint8_t
128 raid5f_stripe_data_chunks_num(const struct raid_bdev *raid_bdev)
129 {
130 	return raid_bdev->min_base_bdevs_operational;
131 }
132 
133 static inline uint8_t
134 raid5f_stripe_parity_chunk_index(const struct raid_bdev *raid_bdev, uint64_t stripe_index)
135 {
136 	return raid5f_stripe_data_chunks_num(raid_bdev) - stripe_index % raid_bdev->num_base_bdevs;
137 }
138 
139 static inline void
140 raid5f_stripe_request_release(struct stripe_request *stripe_req)
141 {
142 	TAILQ_INSERT_HEAD(&stripe_req->r5ch->free_stripe_requests, stripe_req, link);
143 }
144 
145 static int
146 raid5f_xor_stripe(struct stripe_request *stripe_req)
147 {
148 	struct raid_bdev_io *raid_io = stripe_req->raid_io;
149 	struct raid5f_io_channel *r5ch = stripe_req->r5ch;
150 	struct raid_bdev *raid_bdev = raid_io->raid_bdev;
151 	struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io);
152 	size_t remaining = raid_bdev->strip_size << raid_bdev->blocklen_shift;
153 	uint8_t n_src = raid5f_stripe_data_chunks_num(raid_bdev);
154 	void *dest = stripe_req->parity_buf;
155 	size_t alignment_mask = spdk_xor_get_optimal_alignment() - 1;
156 	void *raid_md = spdk_bdev_io_get_md_buf(bdev_io);
157 	uint32_t raid_md_size = spdk_bdev_get_md_size(&raid_bdev->bdev);
158 	struct chunk *chunk;
159 	int ret;
160 	uint8_t c;
161 
162 	c = 0;
163 	FOR_EACH_DATA_CHUNK(stripe_req, chunk) {
164 		struct iov_iter *iov_iter = &r5ch->chunk_iov_iters[c];
165 		bool aligned = true;
166 		int i;
167 
168 		for (i = 0; i < chunk->iovcnt; i++) {
169 			if (((uintptr_t)chunk->iovs[i].iov_base & alignment_mask) ||
170 			    (chunk->iovs[i].iov_len & alignment_mask)) {
171 				aligned = false;
172 				break;
173 			}
174 		}
175 
176 		if (aligned) {
177 			iov_iter->iovs = chunk->iovs;
178 			iov_iter->iovcnt = chunk->iovcnt;
179 		} else {
180 			iov_iter->iovs = &r5ch->chunk_xor_bounce_buffers[c];
181 			iov_iter->iovcnt = 1;
182 			spdk_iovcpy(chunk->iovs, chunk->iovcnt, iov_iter->iovs, iov_iter->iovcnt);
183 		}
184 
185 		iov_iter->index = 0;
186 		iov_iter->offset = 0;
187 
188 		c++;
189 	}
190 
191 	while (remaining > 0) {
192 		size_t len = remaining;
193 		uint8_t i;
194 
195 		for (i = 0; i < n_src; i++) {
196 			struct iov_iter *iov_iter = &r5ch->chunk_iov_iters[i];
197 			struct iovec *iov = &iov_iter->iovs[iov_iter->index];
198 
199 			len = spdk_min(len, iov->iov_len - iov_iter->offset);
200 			r5ch->chunk_xor_buffers[i] = iov->iov_base + iov_iter->offset;
201 		}
202 
203 		assert(len > 0);
204 
205 		ret = spdk_xor_gen(dest, r5ch->chunk_xor_buffers, n_src, len);
206 		if (spdk_unlikely(ret)) {
207 			SPDK_ERRLOG("stripe xor failed\n");
208 			return ret;
209 		}
210 
211 		for (i = 0; i < n_src; i++) {
212 			struct iov_iter *iov_iter = &r5ch->chunk_iov_iters[i];
213 			struct iovec *iov = &iov_iter->iovs[iov_iter->index];
214 
215 			iov_iter->offset += len;
216 			if (iov_iter->offset == iov->iov_len) {
217 				iov_iter->offset = 0;
218 				iov_iter->index++;
219 			}
220 		}
221 		dest += len;
222 
223 		remaining -= len;
224 	}
225 
226 	if (raid_md != NULL) {
227 		uint64_t len = raid_bdev->strip_size * raid_md_size;
228 		c = 0;
229 		FOR_EACH_DATA_CHUNK(stripe_req, chunk) {
230 			r5ch->chunk_xor_md_buffers[c] = chunk->md_buf;
231 			c++;
232 		}
233 		ret = spdk_xor_gen(stripe_req->parity_md_buf, r5ch->chunk_xor_md_buffers, n_src, len);
234 		if (spdk_unlikely(ret)) {
235 			SPDK_ERRLOG("stripe io metadata xor failed\n");
236 			return ret;
237 		}
238 	}
239 
240 	return 0;
241 }
242 
243 static void
244 raid5f_chunk_write_complete(struct chunk *chunk, enum spdk_bdev_io_status status)
245 {
246 	struct stripe_request *stripe_req = raid5f_chunk_stripe_req(chunk);
247 
248 	if (raid_bdev_io_complete_part(stripe_req->raid_io, 1, status)) {
249 		raid5f_stripe_request_release(stripe_req);
250 	}
251 }
252 
253 static void
254 raid5f_chunk_write_complete_bdev_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
255 {
256 	struct chunk *chunk = cb_arg;
257 
258 	spdk_bdev_free_io(bdev_io);
259 
260 	raid5f_chunk_write_complete(chunk, success ? SPDK_BDEV_IO_STATUS_SUCCESS :
261 				    SPDK_BDEV_IO_STATUS_FAILED);
262 }
263 
264 static void raid5f_stripe_request_submit_chunks(struct stripe_request *stripe_req);
265 
266 static void
267 raid5f_chunk_write_retry(void *_raid_io)
268 {
269 	struct raid_bdev_io *raid_io = _raid_io;
270 	struct stripe_request *stripe_req = raid_io->module_private;
271 
272 	raid5f_stripe_request_submit_chunks(stripe_req);
273 }
274 
275 static inline void
276 copy_ext_io_opts(struct spdk_bdev_ext_io_opts *dst, struct spdk_bdev_ext_io_opts *src)
277 {
278 	memset(dst, 0, sizeof(*dst));
279 	memcpy(dst, src, src->size);
280 	dst->size = sizeof(*dst);
281 }
282 
283 static int
284 raid5f_chunk_write(struct chunk *chunk)
285 {
286 	struct stripe_request *stripe_req = raid5f_chunk_stripe_req(chunk);
287 	struct raid_bdev_io *raid_io = stripe_req->raid_io;
288 	struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io);
289 	struct raid_bdev *raid_bdev = raid_io->raid_bdev;
290 	struct raid_base_bdev_info *base_info = &raid_bdev->base_bdev_info[chunk->index];
291 	struct spdk_io_channel *base_ch = raid_io->raid_ch->base_channel[chunk->index];
292 	uint64_t base_offset_blocks = (stripe_req->stripe_index << raid_bdev->strip_size_shift);
293 	int ret;
294 
295 	if (bdev_io->u.bdev.ext_opts != NULL) {
296 		copy_ext_io_opts(&chunk->ext_opts, bdev_io->u.bdev.ext_opts);
297 		chunk->ext_opts.metadata = chunk->md_buf;
298 
299 		ret = spdk_bdev_writev_blocks_ext(base_info->desc, base_ch, chunk->iovs, chunk->iovcnt,
300 						  base_offset_blocks, raid_bdev->strip_size, raid5f_chunk_write_complete_bdev_io,
301 						  chunk, &chunk->ext_opts);
302 	} else {
303 		ret = spdk_bdev_writev_blocks_with_md(base_info->desc, base_ch, chunk->iovs, chunk->iovcnt,
304 						      chunk->md_buf, base_offset_blocks, raid_bdev->strip_size,
305 						      raid5f_chunk_write_complete_bdev_io, chunk);
306 	}
307 
308 	if (spdk_unlikely(ret)) {
309 		if (ret == -ENOMEM) {
310 			raid_bdev_queue_io_wait(raid_io, base_info->bdev, base_ch,
311 						raid5f_chunk_write_retry);
312 		} else {
313 			/*
314 			 * Implicitly complete any I/Os not yet submitted as FAILED. If completing
315 			 * these means there are no more to complete for the stripe request, we can
316 			 * release the stripe request as well.
317 			 */
318 			uint64_t base_bdev_io_not_submitted = raid_bdev->num_base_bdevs -
319 							      raid_io->base_bdev_io_submitted;
320 
321 			if (raid_bdev_io_complete_part(stripe_req->raid_io, base_bdev_io_not_submitted,
322 						       SPDK_BDEV_IO_STATUS_FAILED)) {
323 				raid5f_stripe_request_release(stripe_req);
324 			}
325 		}
326 	}
327 
328 	return ret;
329 }
330 
331 static int
332 raid5f_stripe_request_map_iovecs(struct stripe_request *stripe_req)
333 {
334 	struct raid_bdev *raid_bdev = stripe_req->raid_io->raid_bdev;
335 	struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(stripe_req->raid_io);
336 	const struct iovec *raid_io_iovs = bdev_io->u.bdev.iovs;
337 	int raid_io_iovcnt = bdev_io->u.bdev.iovcnt;
338 	void *raid_io_md = spdk_bdev_io_get_md_buf(bdev_io);
339 	uint32_t raid_io_md_size = spdk_bdev_get_md_size(&raid_bdev->bdev);
340 	struct chunk *chunk;
341 	int raid_io_iov_idx = 0;
342 	size_t raid_io_offset = 0;
343 	size_t raid_io_iov_offset = 0;
344 	int i;
345 
346 	FOR_EACH_DATA_CHUNK(stripe_req, chunk) {
347 		int chunk_iovcnt = 0;
348 		uint64_t len = raid_bdev->strip_size << raid_bdev->blocklen_shift;
349 		size_t off = raid_io_iov_offset;
350 
351 		for (i = raid_io_iov_idx; i < raid_io_iovcnt; i++) {
352 			chunk_iovcnt++;
353 			off += raid_io_iovs[i].iov_len;
354 			if (off >= raid_io_offset + len) {
355 				break;
356 			}
357 		}
358 
359 		assert(raid_io_iov_idx + chunk_iovcnt <= raid_io_iovcnt);
360 
361 		if (chunk_iovcnt > chunk->iovcnt_max) {
362 			struct iovec *iovs = chunk->iovs;
363 
364 			iovs = realloc(iovs, chunk_iovcnt * sizeof(*iovs));
365 			if (!iovs) {
366 				return -ENOMEM;
367 			}
368 			chunk->iovs = iovs;
369 			chunk->iovcnt_max = chunk_iovcnt;
370 		}
371 		chunk->iovcnt = chunk_iovcnt;
372 
373 		if (raid_io_md) {
374 			chunk->md_buf = raid_io_md +
375 					(raid_io_offset >> raid_bdev->blocklen_shift) * raid_io_md_size;
376 		}
377 
378 		for (i = 0; i < chunk_iovcnt; i++) {
379 			struct iovec *chunk_iov = &chunk->iovs[i];
380 			const struct iovec *raid_io_iov = &raid_io_iovs[raid_io_iov_idx];
381 			size_t chunk_iov_offset = raid_io_offset - raid_io_iov_offset;
382 
383 			chunk_iov->iov_base = raid_io_iov->iov_base + chunk_iov_offset;
384 			chunk_iov->iov_len = spdk_min(len, raid_io_iov->iov_len - chunk_iov_offset);
385 			raid_io_offset += chunk_iov->iov_len;
386 			len -= chunk_iov->iov_len;
387 
388 			if (raid_io_offset >= raid_io_iov_offset + raid_io_iov->iov_len) {
389 				raid_io_iov_idx++;
390 				raid_io_iov_offset += raid_io_iov->iov_len;
391 			}
392 		}
393 
394 		if (spdk_unlikely(len > 0)) {
395 			return -EINVAL;
396 		}
397 	}
398 
399 	stripe_req->parity_chunk->iovs[0].iov_base = stripe_req->parity_buf;
400 	stripe_req->parity_chunk->iovs[0].iov_len = raid_bdev->strip_size <<
401 			raid_bdev->blocklen_shift;
402 	stripe_req->parity_chunk->md_buf = stripe_req->parity_md_buf;
403 	stripe_req->parity_chunk->iovcnt = 1;
404 
405 	return 0;
406 }
407 
408 static void
409 raid5f_stripe_request_submit_chunks(struct stripe_request *stripe_req)
410 {
411 	struct raid_bdev_io *raid_io = stripe_req->raid_io;
412 	struct chunk *start = &stripe_req->chunks[raid_io->base_bdev_io_submitted];
413 	struct chunk *chunk;
414 
415 	FOR_EACH_CHUNK_FROM(stripe_req, chunk, start) {
416 		if (spdk_unlikely(raid5f_chunk_write(chunk) != 0)) {
417 			break;
418 		}
419 		raid_io->base_bdev_io_submitted++;
420 	}
421 }
422 
423 static void
424 raid5f_submit_stripe_request(struct stripe_request *stripe_req)
425 {
426 	if (spdk_unlikely(raid5f_xor_stripe(stripe_req) != 0)) {
427 		raid_bdev_io_complete(stripe_req->raid_io, SPDK_BDEV_IO_STATUS_FAILED);
428 		return;
429 	}
430 
431 	raid5f_stripe_request_submit_chunks(stripe_req);
432 }
433 
434 static int
435 raid5f_submit_write_request(struct raid_bdev_io *raid_io, uint64_t stripe_index)
436 {
437 	struct raid_bdev *raid_bdev = raid_io->raid_bdev;
438 	struct raid5f_io_channel *r5ch = spdk_io_channel_get_ctx(raid_io->raid_ch->module_channel);
439 	struct stripe_request *stripe_req;
440 	int ret;
441 
442 	stripe_req = TAILQ_FIRST(&r5ch->free_stripe_requests);
443 	if (!stripe_req) {
444 		return -ENOMEM;
445 	}
446 
447 	stripe_req->stripe_index = stripe_index;
448 	stripe_req->parity_chunk = stripe_req->chunks + raid5f_stripe_parity_chunk_index(raid_bdev,
449 				   stripe_req->stripe_index);
450 	stripe_req->raid_io = raid_io;
451 
452 	ret = raid5f_stripe_request_map_iovecs(stripe_req);
453 	if (spdk_unlikely(ret)) {
454 		return ret;
455 	}
456 
457 	TAILQ_REMOVE(&r5ch->free_stripe_requests, stripe_req, link);
458 
459 	raid_io->module_private = stripe_req;
460 	raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs;
461 
462 	raid5f_submit_stripe_request(stripe_req);
463 
464 	return 0;
465 }
466 
467 static void
468 raid5f_chunk_read_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
469 {
470 	struct raid_bdev_io *raid_io = cb_arg;
471 
472 	spdk_bdev_free_io(bdev_io);
473 
474 	raid_bdev_io_complete(raid_io, success ? SPDK_BDEV_IO_STATUS_SUCCESS :
475 			      SPDK_BDEV_IO_STATUS_FAILED);
476 }
477 
478 static void raid5f_submit_rw_request(struct raid_bdev_io *raid_io);
479 
480 static void
481 _raid5f_submit_rw_request(void *_raid_io)
482 {
483 	struct raid_bdev_io *raid_io = _raid_io;
484 
485 	raid5f_submit_rw_request(raid_io);
486 }
487 
488 static int
489 raid5f_submit_read_request(struct raid_bdev_io *raid_io, uint64_t stripe_index,
490 			   uint64_t stripe_offset)
491 {
492 	struct raid_bdev *raid_bdev = raid_io->raid_bdev;
493 	uint8_t chunk_data_idx = stripe_offset >> raid_bdev->strip_size_shift;
494 	uint8_t p_idx = raid5f_stripe_parity_chunk_index(raid_bdev, stripe_index);
495 	uint8_t chunk_idx = chunk_data_idx < p_idx ? chunk_data_idx : chunk_data_idx + 1;
496 	struct raid_base_bdev_info *base_info = &raid_bdev->base_bdev_info[chunk_idx];
497 	struct spdk_io_channel *base_ch = raid_io->raid_ch->base_channel[chunk_idx];
498 	uint64_t chunk_offset = stripe_offset - (chunk_data_idx << raid_bdev->strip_size_shift);
499 	uint64_t base_offset_blocks = (stripe_index << raid_bdev->strip_size_shift) + chunk_offset;
500 	struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io);
501 	int ret;
502 
503 	if (bdev_io->u.bdev.ext_opts != NULL) {
504 		ret = spdk_bdev_readv_blocks_ext(base_info->desc, base_ch, bdev_io->u.bdev.iovs,
505 						 bdev_io->u.bdev.iovcnt,
506 						 base_offset_blocks, bdev_io->u.bdev.num_blocks, raid5f_chunk_read_complete, raid_io,
507 						 bdev_io->u.bdev.ext_opts);
508 	} else {
509 		ret = spdk_bdev_readv_blocks_with_md(base_info->desc, base_ch,
510 						     bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
511 						     bdev_io->u.bdev.md_buf,
512 						     base_offset_blocks, bdev_io->u.bdev.num_blocks,
513 						     raid5f_chunk_read_complete, raid_io);
514 	}
515 
516 	if (spdk_unlikely(ret == -ENOMEM)) {
517 		raid_bdev_queue_io_wait(raid_io, base_info->bdev, base_ch,
518 					_raid5f_submit_rw_request);
519 		return 0;
520 	}
521 
522 	return ret;
523 }
524 
525 static void
526 raid5f_submit_rw_request(struct raid_bdev_io *raid_io)
527 {
528 	struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io);
529 	struct raid_bdev *raid_bdev = raid_io->raid_bdev;
530 	struct raid5f_info *r5f_info = raid_bdev->module_private;
531 	uint64_t offset_blocks = bdev_io->u.bdev.offset_blocks;
532 	uint64_t stripe_index = offset_blocks / r5f_info->stripe_blocks;
533 	uint64_t stripe_offset = offset_blocks % r5f_info->stripe_blocks;
534 	int ret;
535 
536 	switch (bdev_io->type) {
537 	case SPDK_BDEV_IO_TYPE_READ:
538 		assert(bdev_io->u.bdev.num_blocks <= raid_bdev->strip_size);
539 		ret = raid5f_submit_read_request(raid_io, stripe_index, stripe_offset);
540 		break;
541 	case SPDK_BDEV_IO_TYPE_WRITE:
542 		assert(stripe_offset == 0);
543 		assert(bdev_io->u.bdev.num_blocks == r5f_info->stripe_blocks);
544 		ret = raid5f_submit_write_request(raid_io, stripe_index);
545 		break;
546 	default:
547 		ret = -EINVAL;
548 		break;
549 	}
550 
551 	if (spdk_unlikely(ret)) {
552 		raid_bdev_io_complete(raid_io, ret == -ENOMEM ? SPDK_BDEV_IO_STATUS_NOMEM :
553 				      SPDK_BDEV_IO_STATUS_FAILED);
554 	}
555 }
556 
557 static void
558 raid5f_stripe_request_free(struct stripe_request *stripe_req)
559 {
560 	struct chunk *chunk;
561 
562 	FOR_EACH_CHUNK(stripe_req, chunk) {
563 		free(chunk->iovs);
564 	}
565 
566 	spdk_dma_free(stripe_req->parity_buf);
567 	spdk_dma_free(stripe_req->parity_md_buf);
568 
569 	free(stripe_req);
570 }
571 
572 static struct stripe_request *
573 raid5f_stripe_request_alloc(struct raid5f_io_channel *r5ch)
574 {
575 	struct raid5f_info *r5f_info = raid5f_ch_to_r5f_info(r5ch);
576 	struct raid_bdev *raid_bdev = r5f_info->raid_bdev;
577 	uint32_t raid_io_md_size = spdk_bdev_get_md_size(&raid_bdev->bdev);
578 	struct stripe_request *stripe_req;
579 	struct chunk *chunk;
580 
581 	stripe_req = calloc(1, sizeof(*stripe_req) +
582 			    sizeof(struct chunk) * raid_bdev->num_base_bdevs);
583 	if (!stripe_req) {
584 		return NULL;
585 	}
586 
587 	stripe_req->r5ch = r5ch;
588 
589 	FOR_EACH_CHUNK(stripe_req, chunk) {
590 		chunk->index = chunk - stripe_req->chunks;
591 		chunk->iovcnt_max = 4;
592 		chunk->iovs = calloc(chunk->iovcnt_max, sizeof(chunk->iovs[0]));
593 		if (!chunk->iovs) {
594 			goto err;
595 		}
596 	}
597 
598 	stripe_req->parity_buf = spdk_dma_malloc(raid_bdev->strip_size << raid_bdev->blocklen_shift,
599 				 r5f_info->buf_alignment, NULL);
600 	if (!stripe_req->parity_buf) {
601 		goto err;
602 	}
603 
604 	if (raid_io_md_size != 0) {
605 		stripe_req->parity_md_buf = spdk_dma_malloc(raid_bdev->strip_size * raid_io_md_size,
606 					    r5f_info->buf_alignment, NULL);
607 		if (!stripe_req->parity_md_buf) {
608 			goto err;
609 		}
610 	}
611 
612 	return stripe_req;
613 err:
614 	raid5f_stripe_request_free(stripe_req);
615 	return NULL;
616 }
617 
618 static void
619 raid5f_ioch_destroy(void *io_device, void *ctx_buf)
620 {
621 	struct raid5f_io_channel *r5ch = ctx_buf;
622 	struct raid5f_info *r5f_info = io_device;
623 	struct raid_bdev *raid_bdev = r5f_info->raid_bdev;
624 	struct stripe_request *stripe_req;
625 	int i;
626 
627 	while ((stripe_req = TAILQ_FIRST(&r5ch->free_stripe_requests))) {
628 		TAILQ_REMOVE(&r5ch->free_stripe_requests, stripe_req, link);
629 		raid5f_stripe_request_free(stripe_req);
630 	}
631 
632 	if (r5ch->chunk_xor_bounce_buffers) {
633 		for (i = 0; i < raid5f_stripe_data_chunks_num(raid_bdev); i++) {
634 			free(r5ch->chunk_xor_bounce_buffers[i].iov_base);
635 		}
636 		free(r5ch->chunk_xor_bounce_buffers);
637 	}
638 
639 	free(r5ch->chunk_xor_buffers);
640 	free(r5ch->chunk_xor_md_buffers);
641 	free(r5ch->chunk_iov_iters);
642 }
643 
644 static int
645 raid5f_ioch_create(void *io_device, void *ctx_buf)
646 {
647 	struct raid5f_io_channel *r5ch = ctx_buf;
648 	struct raid5f_info *r5f_info = io_device;
649 	struct raid_bdev *raid_bdev = r5f_info->raid_bdev;
650 	size_t chunk_len = raid_bdev->strip_size << raid_bdev->blocklen_shift;
651 	int status = 0;
652 	int i;
653 
654 	TAILQ_INIT(&r5ch->free_stripe_requests);
655 
656 	for (i = 0; i < RAID5F_MAX_STRIPES; i++) {
657 		struct stripe_request *stripe_req;
658 
659 		stripe_req = raid5f_stripe_request_alloc(r5ch);
660 		if (!stripe_req) {
661 			status = -ENOMEM;
662 			goto out;
663 		}
664 
665 		TAILQ_INSERT_HEAD(&r5ch->free_stripe_requests, stripe_req, link);
666 	}
667 
668 	r5ch->chunk_iov_iters = calloc(raid5f_stripe_data_chunks_num(raid_bdev),
669 				       sizeof(r5ch->chunk_iov_iters[0]));
670 	if (!r5ch->chunk_iov_iters) {
671 		status = -ENOMEM;
672 		goto out;
673 	}
674 
675 	r5ch->chunk_xor_buffers = calloc(raid5f_stripe_data_chunks_num(raid_bdev),
676 					 sizeof(r5ch->chunk_xor_buffers[0]));
677 	if (!r5ch->chunk_xor_buffers) {
678 		status = -ENOMEM;
679 		goto out;
680 	}
681 
682 	r5ch->chunk_xor_md_buffers = calloc(raid5f_stripe_data_chunks_num(raid_bdev),
683 					    sizeof(r5ch->chunk_xor_md_buffers[0]));
684 	if (!r5ch->chunk_xor_md_buffers) {
685 		status = -ENOMEM;
686 		goto out;
687 	}
688 
689 	r5ch->chunk_xor_bounce_buffers = calloc(raid5f_stripe_data_chunks_num(raid_bdev),
690 						sizeof(r5ch->chunk_xor_bounce_buffers[0]));
691 	if (!r5ch->chunk_xor_bounce_buffers) {
692 		status = -ENOMEM;
693 		goto out;
694 	}
695 
696 	for (i = 0; i < raid5f_stripe_data_chunks_num(raid_bdev); i++) {
697 		status = posix_memalign(&r5ch->chunk_xor_bounce_buffers[i].iov_base,
698 					spdk_xor_get_optimal_alignment(), chunk_len);
699 		if (status) {
700 			goto out;
701 		}
702 		r5ch->chunk_xor_bounce_buffers[i].iov_len = chunk_len;
703 	}
704 out:
705 	if (status) {
706 		SPDK_ERRLOG("Failed to initialize io channel\n");
707 		raid5f_ioch_destroy(r5f_info, r5ch);
708 	}
709 	return status;
710 }
711 
712 static int
713 raid5f_start(struct raid_bdev *raid_bdev)
714 {
715 	uint64_t min_blockcnt = UINT64_MAX;
716 	struct raid_base_bdev_info *base_info;
717 	struct raid5f_info *r5f_info;
718 	size_t alignment;
719 
720 	r5f_info = calloc(1, sizeof(*r5f_info));
721 	if (!r5f_info) {
722 		SPDK_ERRLOG("Failed to allocate r5f_info\n");
723 		return -ENOMEM;
724 	}
725 	r5f_info->raid_bdev = raid_bdev;
726 
727 	alignment = spdk_xor_get_optimal_alignment();
728 	RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
729 		min_blockcnt = spdk_min(min_blockcnt, base_info->bdev->blockcnt);
730 		alignment = spdk_max(alignment, spdk_bdev_get_buf_align(base_info->bdev));
731 	}
732 
733 	r5f_info->total_stripes = min_blockcnt / raid_bdev->strip_size;
734 	r5f_info->stripe_blocks = raid_bdev->strip_size * raid5f_stripe_data_chunks_num(raid_bdev);
735 	r5f_info->buf_alignment = alignment;
736 
737 	raid_bdev->bdev.blockcnt = r5f_info->stripe_blocks * r5f_info->total_stripes;
738 	raid_bdev->bdev.optimal_io_boundary = raid_bdev->strip_size;
739 	raid_bdev->bdev.split_on_optimal_io_boundary = true;
740 	raid_bdev->bdev.write_unit_size = r5f_info->stripe_blocks;
741 	raid_bdev->bdev.split_on_write_unit = true;
742 
743 	raid_bdev->module_private = r5f_info;
744 
745 	spdk_io_device_register(r5f_info, raid5f_ioch_create, raid5f_ioch_destroy,
746 				sizeof(struct raid5f_io_channel), NULL);
747 
748 	return 0;
749 }
750 
751 static void
752 raid5f_io_device_unregister_done(void *io_device)
753 {
754 	struct raid5f_info *r5f_info = io_device;
755 
756 	raid_bdev_module_stop_done(r5f_info->raid_bdev);
757 
758 	free(r5f_info);
759 }
760 
761 static bool
762 raid5f_stop(struct raid_bdev *raid_bdev)
763 {
764 	struct raid5f_info *r5f_info = raid_bdev->module_private;
765 
766 	spdk_io_device_unregister(r5f_info, raid5f_io_device_unregister_done);
767 
768 	return false;
769 }
770 
771 static struct spdk_io_channel *
772 raid5f_get_io_channel(struct raid_bdev *raid_bdev)
773 {
774 	struct raid5f_info *r5f_info = raid_bdev->module_private;
775 
776 	return spdk_get_io_channel(r5f_info);
777 }
778 
779 static struct raid_bdev_module g_raid5f_module = {
780 	.level = RAID5F,
781 	.base_bdevs_min = 3,
782 	.base_bdevs_constraint = {CONSTRAINT_MAX_BASE_BDEVS_REMOVED, 1},
783 	.start = raid5f_start,
784 	.stop = raid5f_stop,
785 	.submit_rw_request = raid5f_submit_rw_request,
786 	.get_io_channel = raid5f_get_io_channel,
787 };
788 RAID_MODULE_REGISTER(&g_raid5f_module)
789 
790 SPDK_LOG_REGISTER_COMPONENT(bdev_raid5f)
791