xref: /spdk/module/bdev/raid/bdev_raid.c (revision 9889ab2dc80e40dae92dcef361d53dcba722043d)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "bdev_raid.h"
35 #include "spdk/env.h"
36 #include "spdk/io_channel.h"
37 #include "spdk/conf.h"
38 #include "spdk_internal/log.h"
39 #include "spdk/string.h"
40 #include "spdk/util.h"
41 #include "spdk/json.h"
42 #include "spdk/string.h"
43 
44 static bool g_shutdown_started = false;
45 
46 /* raid bdev config as read from config file */
47 struct raid_config	g_raid_config = {
48 	.raid_bdev_config_head = TAILQ_HEAD_INITIALIZER(g_raid_config.raid_bdev_config_head),
49 };
50 
51 /*
52  * List of raid bdev in configured list, these raid bdevs are registered with
53  * bdev layer
54  */
55 struct raid_configured_tailq	g_raid_bdev_configured_list = TAILQ_HEAD_INITIALIZER(
56 			g_raid_bdev_configured_list);
57 
58 /* List of raid bdev in configuring list */
59 struct raid_configuring_tailq	g_raid_bdev_configuring_list = TAILQ_HEAD_INITIALIZER(
60 			g_raid_bdev_configuring_list);
61 
62 /* List of all raid bdevs */
63 struct raid_all_tailq		g_raid_bdev_list = TAILQ_HEAD_INITIALIZER(g_raid_bdev_list);
64 
65 /* List of all raid bdevs that are offline */
66 struct raid_offline_tailq	g_raid_bdev_offline_list = TAILQ_HEAD_INITIALIZER(
67 			g_raid_bdev_offline_list);
68 
69 /* Function declarations */
70 static void	raid_bdev_examine(struct spdk_bdev *bdev);
71 static int	raid_bdev_init(void);
72 static void	raid_bdev_deconfigure(struct raid_bdev *raid_bdev,
73 				      raid_bdev_destruct_cb cb_fn, void *cb_arg);
74 static void	raid_bdev_remove_base_bdev(void *ctx);
75 
76 /*
77  * brief:
78  * raid_bdev_create_cb function is a cb function for raid bdev which creates the
79  * hierarchy from raid bdev to base bdev io channels. It will be called per core
80  * params:
81  * io_device - pointer to raid bdev io device represented by raid_bdev
82  * ctx_buf - pointer to context buffer for raid bdev io channel
83  * returns:
84  * 0 - success
85  * non zero - failure
86  */
87 static int
88 raid_bdev_create_cb(void *io_device, void *ctx_buf)
89 {
90 	struct raid_bdev            *raid_bdev = io_device;
91 	struct raid_bdev_io_channel *raid_ch = ctx_buf;
92 
93 	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_create_cb, %p\n", raid_ch);
94 
95 	assert(raid_bdev != NULL);
96 	assert(raid_bdev->state == RAID_BDEV_STATE_ONLINE);
97 
98 	raid_ch->num_channels = raid_bdev->num_base_bdevs;
99 
100 	raid_ch->base_channel = calloc(raid_ch->num_channels,
101 				       sizeof(struct spdk_io_channel *));
102 	if (!raid_ch->base_channel) {
103 		SPDK_ERRLOG("Unable to allocate base bdevs io channel\n");
104 		return -ENOMEM;
105 	}
106 	for (uint8_t i = 0; i < raid_ch->num_channels; i++) {
107 		/*
108 		 * Get the spdk_io_channel for all the base bdevs. This is used during
109 		 * split logic to send the respective child bdev ios to respective base
110 		 * bdev io channel.
111 		 */
112 		raid_ch->base_channel[i] = spdk_bdev_get_io_channel(
113 						   raid_bdev->base_bdev_info[i].desc);
114 		if (!raid_ch->base_channel[i]) {
115 			for (uint8_t j = 0; j < i; j++) {
116 				spdk_put_io_channel(raid_ch->base_channel[j]);
117 			}
118 			free(raid_ch->base_channel);
119 			raid_ch->base_channel = NULL;
120 			SPDK_ERRLOG("Unable to create io channel for base bdev\n");
121 			return -ENOMEM;
122 		}
123 	}
124 
125 	return 0;
126 }
127 
128 /*
129  * brief:
130  * raid_bdev_destroy_cb function is a cb function for raid bdev which deletes the
131  * hierarchy from raid bdev to base bdev io channels. It will be called per core
132  * params:
133  * io_device - pointer to raid bdev io device represented by raid_bdev
134  * ctx_buf - pointer to context buffer for raid bdev io channel
135  * returns:
136  * none
137  */
138 static void
139 raid_bdev_destroy_cb(void *io_device, void *ctx_buf)
140 {
141 	struct raid_bdev_io_channel *raid_ch = ctx_buf;
142 
143 	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_destroy_cb\n");
144 
145 	assert(raid_ch != NULL);
146 	assert(raid_ch->base_channel);
147 	for (uint8_t i = 0; i < raid_ch->num_channels; i++) {
148 		/* Free base bdev channels */
149 		assert(raid_ch->base_channel[i] != NULL);
150 		spdk_put_io_channel(raid_ch->base_channel[i]);
151 	}
152 	free(raid_ch->base_channel);
153 	raid_ch->base_channel = NULL;
154 }
155 
156 /*
157  * brief:
158  * raid_bdev_cleanup is used to cleanup and free raid_bdev related data
159  * structures.
160  * params:
161  * raid_bdev - pointer to raid_bdev
162  * returns:
163  * none
164  */
165 static void
166 raid_bdev_cleanup(struct raid_bdev *raid_bdev)
167 {
168 	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_cleanup, %p name %s, state %u, config %p\n",
169 		      raid_bdev,
170 		      raid_bdev->bdev.name, raid_bdev->state, raid_bdev->config);
171 	if (raid_bdev->state == RAID_BDEV_STATE_CONFIGURING) {
172 		TAILQ_REMOVE(&g_raid_bdev_configuring_list, raid_bdev, state_link);
173 	} else if (raid_bdev->state == RAID_BDEV_STATE_OFFLINE) {
174 		TAILQ_REMOVE(&g_raid_bdev_offline_list, raid_bdev, state_link);
175 	} else {
176 		assert(0);
177 	}
178 	TAILQ_REMOVE(&g_raid_bdev_list, raid_bdev, global_link);
179 	free(raid_bdev->bdev.name);
180 	free(raid_bdev->base_bdev_info);
181 	if (raid_bdev->config) {
182 		raid_bdev->config->raid_bdev = NULL;
183 	}
184 	free(raid_bdev);
185 }
186 
187 /*
188  * brief:
189  * free resource of base bdev for raid bdev
190  * params:
191  * raid_bdev - pointer to raid bdev
192  * base_bdev_slot - position to base bdev in raid bdev
193  * returns:
194  * 0 - success
195  * non zero - failure
196  */
197 static void
198 raid_bdev_free_base_bdev_resource(struct raid_bdev *raid_bdev, uint8_t base_bdev_slot)
199 {
200 	struct raid_base_bdev_info *info;
201 
202 	info = &raid_bdev->base_bdev_info[base_bdev_slot];
203 
204 	spdk_bdev_module_release_bdev(info->bdev);
205 	spdk_bdev_close(info->desc);
206 	info->desc = NULL;
207 	info->bdev = NULL;
208 
209 	assert(raid_bdev->num_base_bdevs_discovered);
210 	raid_bdev->num_base_bdevs_discovered--;
211 }
212 
213 /*
214  * brief:
215  * raid_bdev_destruct is the destruct function table pointer for raid bdev
216  * params:
217  * ctxt - pointer to raid_bdev
218  * returns:
219  * 0 - success
220  * non zero - failure
221  */
222 static int
223 raid_bdev_destruct(void *ctxt)
224 {
225 	struct raid_bdev *raid_bdev = ctxt;
226 
227 	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_destruct\n");
228 
229 	raid_bdev->destruct_called = true;
230 	for (uint8_t i = 0; i < raid_bdev->num_base_bdevs; i++) {
231 		/*
232 		 * Close all base bdev descriptors for which call has come from below
233 		 * layers.  Also close the descriptors if we have started shutdown.
234 		 */
235 		if (g_shutdown_started ||
236 		    ((raid_bdev->base_bdev_info[i].remove_scheduled == true) &&
237 		     (raid_bdev->base_bdev_info[i].bdev != NULL))) {
238 			raid_bdev_free_base_bdev_resource(raid_bdev, i);
239 		}
240 	}
241 
242 	if (g_shutdown_started) {
243 		TAILQ_REMOVE(&g_raid_bdev_configured_list, raid_bdev, state_link);
244 		raid_bdev->state = RAID_BDEV_STATE_OFFLINE;
245 		TAILQ_INSERT_TAIL(&g_raid_bdev_offline_list, raid_bdev, state_link);
246 	}
247 
248 	spdk_io_device_unregister(raid_bdev, NULL);
249 
250 	if (raid_bdev->num_base_bdevs_discovered == 0) {
251 		/* Free raid_bdev when there are no base bdevs left */
252 		SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid bdev base bdevs is 0, going to free all in destruct\n");
253 		raid_bdev_cleanup(raid_bdev);
254 	}
255 
256 	return 0;
257 }
258 
259 /*
260  * brief:
261  * raid_bdev_base_io_completion is the completion callback for member disk requests
262  * params:
263  * bdev_io - pointer to member disk requested bdev_io
264  * success - true if successful, false if unsuccessful
265  * cb_arg - callback argument (parent raid bdev_io)
266  * returns:
267  * none
268  */
269 void
270 raid_bdev_base_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
271 {
272 	struct spdk_bdev_io *parent_io = cb_arg;
273 	struct raid_bdev_io *raid_io = (struct raid_bdev_io *)parent_io->driver_ctx;
274 
275 	spdk_bdev_free_io(bdev_io);
276 
277 	if (!success) {
278 		raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED;
279 	}
280 
281 	raid_io->base_bdev_io_completed++;
282 	if (raid_io->base_bdev_io_completed == raid_io->base_bdev_io_expected) {
283 		spdk_bdev_io_complete(parent_io, raid_io->base_bdev_io_status);
284 	}
285 }
286 
287 /*
288  * brief:
289  * raid_bdev_queue_io_wait function processes the IO which failed to submit.
290  * It will try to queue the IOs after storing the context to bdev wait queue logic.
291  * params:
292  * raid_bdev_io - pointer to raid bdev_io
293  * pd_idx - base_dev index in raid_bdev
294  * cb_fn - callback when the spdk_bdev_io for base_bdev becomes available
295  * ret - return code
296  * returns:
297  * none
298  */
299 void
300 raid_bdev_queue_io_wait(struct spdk_bdev_io *raid_bdev_io, uint8_t pd_idx,
301 			spdk_bdev_io_wait_cb cb_fn, int ret)
302 {
303 	struct raid_bdev_io *raid_io = (struct raid_bdev_io *)raid_bdev_io->driver_ctx;
304 	struct raid_bdev_io_channel *raid_ch = spdk_io_channel_get_ctx(raid_io->ch);
305 	struct raid_bdev *raid_bdev = (struct raid_bdev *)raid_bdev_io->bdev->ctxt;
306 
307 	assert(ret != 0);
308 
309 	if (ret == -ENOMEM) {
310 		raid_io->waitq_entry.bdev = raid_bdev->base_bdev_info[pd_idx].bdev;
311 		raid_io->waitq_entry.cb_fn = cb_fn;
312 		raid_io->waitq_entry.cb_arg = raid_bdev_io;
313 		spdk_bdev_queue_io_wait(raid_bdev->base_bdev_info[pd_idx].bdev,
314 					raid_ch->base_channel[pd_idx],
315 					&raid_io->waitq_entry);
316 		return;
317 	}
318 
319 	SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n");
320 	assert(false);
321 	spdk_bdev_io_complete(raid_bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
322 }
323 
324 /*
325  * brief:
326  * _raid_bdev_submit_reset_request_next function submits the next batch of reset requests
327  * to member disks; it will submit as many as possible unless a reset fails with -ENOMEM, in
328  * which case it will queue it for later submission
329  * params:
330  * bdev_io - pointer to parent bdev_io on raid bdev device
331  * returns:
332  * none
333  */
334 static void
335 _raid_bdev_submit_reset_request_next(void *_bdev_io)
336 {
337 	struct spdk_bdev_io		*bdev_io = _bdev_io;
338 	struct raid_bdev_io		*raid_io;
339 	struct raid_bdev		*raid_bdev;
340 	struct raid_bdev_io_channel	*raid_ch;
341 	int				ret;
342 	uint8_t				i;
343 
344 	raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt;
345 	raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx;
346 	raid_ch = spdk_io_channel_get_ctx(raid_io->ch);
347 
348 	while (raid_io->base_bdev_io_submitted < raid_bdev->num_base_bdevs) {
349 		i = raid_io->base_bdev_io_submitted;
350 		ret = spdk_bdev_reset(raid_bdev->base_bdev_info[i].desc,
351 				      raid_ch->base_channel[i],
352 				      raid_bdev_base_io_completion, bdev_io);
353 		if (ret == 0) {
354 			raid_io->base_bdev_io_submitted++;
355 		} else {
356 			raid_bdev_queue_io_wait(bdev_io, i,
357 						_raid_bdev_submit_reset_request_next, ret);
358 			return;
359 		}
360 	}
361 }
362 
363 /*
364  * brief:
365  * _raid_bdev_submit_reset_request function is the submit_request function for
366  * reset requests
367  * params:
368  * ch - pointer to raid bdev io channel
369  * bdev_io - pointer to parent bdev_io on raid bdev device
370  * returns:
371  * none
372  */
373 static void
374 _raid_bdev_submit_reset_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
375 {
376 	struct raid_bdev_io		*raid_io;
377 	struct raid_bdev		*raid_bdev;
378 
379 	raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt;
380 	raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx;
381 	raid_io->ch = ch;
382 	raid_io->base_bdev_io_submitted = 0;
383 	raid_io->base_bdev_io_completed = 0;
384 	raid_io->base_bdev_io_expected = raid_bdev->num_base_bdevs;
385 	raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_SUCCESS;
386 	_raid_bdev_submit_reset_request_next(bdev_io);
387 }
388 
389 /*
390  * brief:
391  * _raid_bdev_submit_null_payload_request function is the submit_request function
392  * for io requests with range but without payload, like UNMAP and FLUSH.
393  * params:
394  * ch - pointer to raid bdev io channel
395  * bdev_io - pointer to parent bdev_io on raid bdev device
396  * returns:
397  * none
398  */
399 static void
400 _raid_bdev_submit_null_payload_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
401 {
402 	struct raid_bdev_io		*raid_io;
403 
404 	raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx;
405 	raid_io->ch = ch;
406 	raid_io->base_bdev_io_submitted = 0;
407 	raid_io->base_bdev_io_completed = 0;
408 	raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_SUCCESS;
409 
410 	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev: type %d, range (0x%lx, 0x%lx)\n",
411 		      bdev_io->type, bdev_io->u.bdev.offset_blocks, bdev_io->u.bdev.num_blocks);
412 
413 	raid0_submit_null_payload_request(bdev_io);
414 }
415 
416 /*
417  * brief:
418  * Callback function to spdk_bdev_io_get_buf.
419  * params:
420  * ch - pointer to raid bdev io channel
421  * bdev_io - pointer to parent bdev_io on raid bdev device
422  * success - True if buffer is allocated or false otherwise.
423  * returns:
424  * none
425  */
426 static void
427 raid_bdev_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
428 		     bool success)
429 {
430 	if (!success) {
431 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
432 		return;
433 	}
434 
435 	raid0_start_rw_request(ch, bdev_io);
436 }
437 
438 /*
439  * brief:
440  * raid_bdev_submit_request function is the submit_request function pointer of
441  * raid bdev function table. This is used to submit the io on raid_bdev to below
442  * layers.
443  * params:
444  * ch - pointer to raid bdev io channel
445  * bdev_io - pointer to parent bdev_io on raid bdev device
446  * returns:
447  * none
448  */
449 static void
450 raid_bdev_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
451 {
452 	switch (bdev_io->type) {
453 	case SPDK_BDEV_IO_TYPE_READ:
454 		spdk_bdev_io_get_buf(bdev_io, raid_bdev_get_buf_cb,
455 				     bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
456 		break;
457 	case SPDK_BDEV_IO_TYPE_WRITE:
458 		raid0_start_rw_request(ch, bdev_io);
459 		break;
460 
461 	case SPDK_BDEV_IO_TYPE_RESET:
462 		_raid_bdev_submit_reset_request(ch, bdev_io);
463 		break;
464 
465 	case SPDK_BDEV_IO_TYPE_FLUSH:
466 	case SPDK_BDEV_IO_TYPE_UNMAP:
467 		_raid_bdev_submit_null_payload_request(ch, bdev_io);
468 		break;
469 
470 	default:
471 		SPDK_ERRLOG("submit request, invalid io type %u\n", bdev_io->type);
472 		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
473 		break;
474 	}
475 
476 }
477 
478 /*
479  * brief:
480  * _raid_bdev_io_type_supported checks whether io_type is supported in
481  * all base bdev modules of raid bdev module. If anyone among the base_bdevs
482  * doesn't support, the raid device doesn't supports.
483  *
484  * params:
485  * raid_bdev - pointer to raid bdev context
486  * io_type - io type
487  * returns:
488  * true - io_type is supported
489  * false - io_type is not supported
490  */
491 inline static bool
492 _raid_bdev_io_type_supported(struct raid_bdev *raid_bdev, enum spdk_bdev_io_type io_type)
493 {
494 	uint8_t i;
495 
496 	for (i = 0; i < raid_bdev->num_base_bdevs; i++) {
497 		if (raid_bdev->base_bdev_info[i].bdev == NULL) {
498 			assert(false);
499 			continue;
500 		}
501 
502 		if (spdk_bdev_io_type_supported(raid_bdev->base_bdev_info[i].bdev,
503 						io_type) == false) {
504 			return false;
505 		}
506 	}
507 
508 	return true;
509 }
510 
511 /*
512  * brief:
513  * raid_bdev_io_type_supported is the io_supported function for bdev function
514  * table which returns whether the particular io type is supported or not by
515  * raid bdev module
516  * params:
517  * ctx - pointer to raid bdev context
518  * type - io type
519  * returns:
520  * true - io_type is supported
521  * false - io_type is not supported
522  */
523 static bool
524 raid_bdev_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
525 {
526 	switch (io_type) {
527 	case SPDK_BDEV_IO_TYPE_READ:
528 	case SPDK_BDEV_IO_TYPE_WRITE:
529 		return true;
530 
531 	case SPDK_BDEV_IO_TYPE_FLUSH:
532 	case SPDK_BDEV_IO_TYPE_RESET:
533 	case SPDK_BDEV_IO_TYPE_UNMAP:
534 		return _raid_bdev_io_type_supported(ctx, io_type);
535 
536 	default:
537 		return false;
538 	}
539 
540 	return false;
541 }
542 
543 /*
544  * brief:
545  * raid_bdev_get_io_channel is the get_io_channel function table pointer for
546  * raid bdev. This is used to return the io channel for this raid bdev
547  * params:
548  * ctxt - pointer to raid_bdev
549  * returns:
550  * pointer to io channel for raid bdev
551  */
552 static struct spdk_io_channel *
553 raid_bdev_get_io_channel(void *ctxt)
554 {
555 	struct raid_bdev *raid_bdev = ctxt;
556 
557 	return spdk_get_io_channel(raid_bdev);
558 }
559 
560 /*
561  * brief:
562  * raid_bdev_dump_info_json is the function table pointer for raid bdev
563  * params:
564  * ctx - pointer to raid_bdev
565  * w - pointer to json context
566  * returns:
567  * 0 - success
568  * non zero - failure
569  */
570 static int
571 raid_bdev_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
572 {
573 	struct raid_bdev *raid_bdev = ctx;
574 
575 	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_dump_config_json\n");
576 	assert(raid_bdev != NULL);
577 
578 	/* Dump the raid bdev configuration related information */
579 	spdk_json_write_named_object_begin(w, "raid");
580 	spdk_json_write_named_uint32(w, "strip_size", raid_bdev->strip_size);
581 	spdk_json_write_named_uint32(w, "strip_size_kb", raid_bdev->strip_size_kb);
582 	spdk_json_write_named_uint32(w, "state", raid_bdev->state);
583 	spdk_json_write_named_string(w, "raid_level", raid_bdev_level_to_str(raid_bdev->level));
584 	spdk_json_write_named_uint32(w, "destruct_called", raid_bdev->destruct_called);
585 	spdk_json_write_named_uint32(w, "num_base_bdevs", raid_bdev->num_base_bdevs);
586 	spdk_json_write_named_uint32(w, "num_base_bdevs_discovered", raid_bdev->num_base_bdevs_discovered);
587 	spdk_json_write_name(w, "base_bdevs_list");
588 	spdk_json_write_array_begin(w);
589 	for (uint8_t i = 0; i < raid_bdev->num_base_bdevs; i++) {
590 		if (raid_bdev->base_bdev_info[i].bdev) {
591 			spdk_json_write_string(w, raid_bdev->base_bdev_info[i].bdev->name);
592 		} else {
593 			spdk_json_write_null(w);
594 		}
595 	}
596 	spdk_json_write_array_end(w);
597 	spdk_json_write_object_end(w);
598 
599 	return 0;
600 }
601 
602 /*
603  * brief:
604  * raid_bdev_write_config_json is the function table pointer for raid bdev
605  * params:
606  * bdev - pointer to spdk_bdev
607  * w - pointer to json context
608  * returns:
609  * none
610  */
611 static void
612 raid_bdev_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
613 {
614 	struct raid_bdev *raid_bdev = bdev->ctxt;
615 	struct spdk_bdev *base;
616 	uint8_t i;
617 
618 	spdk_json_write_object_begin(w);
619 
620 	spdk_json_write_named_string(w, "method", "bdev_raid_create");
621 
622 	spdk_json_write_named_object_begin(w, "params");
623 	spdk_json_write_named_string(w, "name", bdev->name);
624 	spdk_json_write_named_uint32(w, "strip_size", raid_bdev->strip_size_kb);
625 	spdk_json_write_named_string(w, "raid_level", raid_bdev_level_to_str(raid_bdev->level));
626 
627 	spdk_json_write_named_array_begin(w, "base_bdevs");
628 	for (i = 0; i < raid_bdev->num_base_bdevs; i++) {
629 		base = raid_bdev->base_bdev_info[i].bdev;
630 		if (base) {
631 			spdk_json_write_string(w, base->name);
632 		}
633 	}
634 	spdk_json_write_array_end(w);
635 	spdk_json_write_object_end(w);
636 
637 	spdk_json_write_object_end(w);
638 }
639 
640 /* g_raid_bdev_fn_table is the function table for raid bdev */
641 static const struct spdk_bdev_fn_table g_raid_bdev_fn_table = {
642 	.destruct		= raid_bdev_destruct,
643 	.submit_request		= raid_bdev_submit_request,
644 	.io_type_supported	= raid_bdev_io_type_supported,
645 	.get_io_channel		= raid_bdev_get_io_channel,
646 	.dump_info_json		= raid_bdev_dump_info_json,
647 	.write_config_json	= raid_bdev_write_config_json,
648 };
649 
650 /*
651  * brief:
652  * raid_bdev_config_cleanup function is used to free memory for one raid_bdev in configuration
653  * params:
654  * raid_cfg - pointer to raid_bdev_config structure
655  * returns:
656  * none
657  */
658 void
659 raid_bdev_config_cleanup(struct raid_bdev_config *raid_cfg)
660 {
661 	uint8_t i;
662 
663 	TAILQ_REMOVE(&g_raid_config.raid_bdev_config_head, raid_cfg, link);
664 	g_raid_config.total_raid_bdev--;
665 
666 	if (raid_cfg->base_bdev) {
667 		for (i = 0; i < raid_cfg->num_base_bdevs; i++) {
668 			free(raid_cfg->base_bdev[i].name);
669 		}
670 		free(raid_cfg->base_bdev);
671 	}
672 	free(raid_cfg->name);
673 	free(raid_cfg);
674 }
675 
676 /*
677  * brief:
678  * raid_bdev_free is the raid bdev function table function pointer. This is
679  * called on bdev free path
680  * params:
681  * none
682  * returns:
683  * none
684  */
685 static void
686 raid_bdev_free(void)
687 {
688 	struct raid_bdev_config *raid_cfg, *tmp;
689 
690 	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_free\n");
691 	TAILQ_FOREACH_SAFE(raid_cfg, &g_raid_config.raid_bdev_config_head, link, tmp) {
692 		raid_bdev_config_cleanup(raid_cfg);
693 	}
694 }
695 
696 /* brief
697  * raid_bdev_config_find_by_name is a helper function to find raid bdev config
698  * by name as key.
699  *
700  * params:
701  * raid_name - name for raid bdev.
702  */
703 struct raid_bdev_config *
704 raid_bdev_config_find_by_name(const char *raid_name)
705 {
706 	struct raid_bdev_config *raid_cfg;
707 
708 	TAILQ_FOREACH(raid_cfg, &g_raid_config.raid_bdev_config_head, link) {
709 		if (!strcmp(raid_cfg->name, raid_name)) {
710 			return raid_cfg;
711 		}
712 	}
713 
714 	return raid_cfg;
715 }
716 
717 /*
718  * brief
719  * raid_bdev_config_add function adds config for newly created raid bdev.
720  *
721  * params:
722  * raid_name - name for raid bdev.
723  * strip_size - strip size in KB
724  * num_base_bdevs - number of base bdevs.
725  * level - raid level, only raid level 0 is supported.
726  * _raid_cfg - Pointer to newly added configuration
727  */
728 int
729 raid_bdev_config_add(const char *raid_name, uint32_t strip_size, uint8_t num_base_bdevs,
730 		     enum raid_level level, struct raid_bdev_config **_raid_cfg)
731 {
732 	struct raid_bdev_config *raid_cfg;
733 
734 	raid_cfg = raid_bdev_config_find_by_name(raid_name);
735 	if (raid_cfg != NULL) {
736 		SPDK_ERRLOG("Duplicate raid bdev name found in config file %s\n",
737 			    raid_name);
738 		return -EEXIST;
739 	}
740 
741 	if (spdk_u32_is_pow2(strip_size) == false) {
742 		SPDK_ERRLOG("Invalid strip size %" PRIu32 "\n", strip_size);
743 		return -EINVAL;
744 	}
745 
746 	if (num_base_bdevs == 0) {
747 		SPDK_ERRLOG("Invalid base device count %u\n", num_base_bdevs);
748 		return -EINVAL;
749 	}
750 
751 	if (level != RAID0) {
752 		SPDK_ERRLOG("invalid raid level %u, only raid level 0 is supported\n",
753 			    level);
754 		return -EINVAL;
755 	}
756 
757 	raid_cfg = calloc(1, sizeof(*raid_cfg));
758 	if (raid_cfg == NULL) {
759 		SPDK_ERRLOG("unable to allocate memory\n");
760 		return -ENOMEM;
761 	}
762 
763 	raid_cfg->name = strdup(raid_name);
764 	if (!raid_cfg->name) {
765 		free(raid_cfg);
766 		SPDK_ERRLOG("unable to allocate memory\n");
767 		return -ENOMEM;
768 	}
769 	raid_cfg->strip_size = strip_size;
770 	raid_cfg->num_base_bdevs = num_base_bdevs;
771 	raid_cfg->level = level;
772 
773 	raid_cfg->base_bdev = calloc(num_base_bdevs, sizeof(*raid_cfg->base_bdev));
774 	if (raid_cfg->base_bdev == NULL) {
775 		free(raid_cfg->name);
776 		free(raid_cfg);
777 		SPDK_ERRLOG("unable to allocate memory\n");
778 		return -ENOMEM;
779 	}
780 
781 	TAILQ_INSERT_TAIL(&g_raid_config.raid_bdev_config_head, raid_cfg, link);
782 	g_raid_config.total_raid_bdev++;
783 
784 	*_raid_cfg = raid_cfg;
785 	return 0;
786 }
787 
788 /*
789  * brief:
790  * raid_bdev_config_add_base_bdev function add base bdev to raid bdev config.
791  *
792  * params:
793  * raid_cfg - pointer to raid bdev configuration
794  * base_bdev_name - name of base bdev
795  * slot - Position to add base bdev
796  */
797 int
798 raid_bdev_config_add_base_bdev(struct raid_bdev_config *raid_cfg, const char *base_bdev_name,
799 			       uint8_t slot)
800 {
801 	uint8_t i;
802 	struct raid_bdev_config *tmp;
803 
804 	if (slot >= raid_cfg->num_base_bdevs) {
805 		return -EINVAL;
806 	}
807 
808 	TAILQ_FOREACH(tmp, &g_raid_config.raid_bdev_config_head, link) {
809 		for (i = 0; i < tmp->num_base_bdevs; i++) {
810 			if (tmp->base_bdev[i].name != NULL) {
811 				if (!strcmp(tmp->base_bdev[i].name, base_bdev_name)) {
812 					SPDK_ERRLOG("duplicate base bdev name %s mentioned\n",
813 						    base_bdev_name);
814 					return -EEXIST;
815 				}
816 			}
817 		}
818 	}
819 
820 	raid_cfg->base_bdev[slot].name = strdup(base_bdev_name);
821 	if (raid_cfg->base_bdev[slot].name == NULL) {
822 		SPDK_ERRLOG("unable to allocate memory\n");
823 		return -ENOMEM;
824 	}
825 
826 	return 0;
827 }
828 
829 static struct {
830 	const char *name;
831 	enum raid_level value;
832 } g_raid_level_names[] = {
833 	{ "raid0", RAID0 },
834 	{ "0", RAID0 },
835 	{ }
836 };
837 
838 enum raid_level raid_bdev_parse_raid_level(const char *str)
839 {
840 	unsigned int i;
841 
842 	for (i = 0; g_raid_level_names[i].name != NULL; i++) {
843 		if (strcasecmp(g_raid_level_names[i].name, str) == 0) {
844 			return g_raid_level_names[i].value;
845 		}
846 	}
847 
848 	return INVALID_RAID_LEVEL;
849 }
850 
851 const char *
852 raid_bdev_level_to_str(enum raid_level level)
853 {
854 	unsigned int i;
855 
856 	for (i = 0; g_raid_level_names[i].name != NULL; i++) {
857 		if (g_raid_level_names[i].value == level) {
858 			return g_raid_level_names[i].name;
859 		}
860 	}
861 
862 	return "";
863 }
864 
865 /*
866  * brief:
867  * raid_bdev_parse_raid is used to parse the raid bdev from config file based on
868  * pre-defined raid bdev format in config file.
869  * Format of config file:
870  *   [RAID1]
871  *   Name raid1
872  *   StripSize 64
873  *   NumDevices 2
874  *   RaidLevel 0
875  *   Devices Nvme0n1 Nvme1n1
876  *
877  *   [RAID2]
878  *   Name raid2
879  *   StripSize 64
880  *   NumDevices 3
881  *   RaidLevel 0
882  *   Devices Nvme2n1 Nvme3n1 Nvme4n1
883  *
884  * params:
885  * conf_section - pointer to config section
886  * returns:
887  * 0 - success
888  * non zero - failure
889  */
890 static int
891 raid_bdev_parse_raid(struct spdk_conf_section *conf_section)
892 {
893 	const char *raid_name;
894 	uint32_t strip_size;
895 	uint8_t num_base_bdevs;
896 	const char *raid_level_str;
897 	enum raid_level level;
898 	const char *base_bdev_name;
899 	struct raid_bdev_config *raid_cfg;
900 	int rc, i, val;
901 
902 	raid_name = spdk_conf_section_get_val(conf_section, "Name");
903 	if (raid_name == NULL) {
904 		SPDK_ERRLOG("raid_name is null\n");
905 		return -EINVAL;
906 	}
907 
908 	val = spdk_conf_section_get_intval(conf_section, "StripSize");
909 	if (val < 0) {
910 		return -EINVAL;
911 	}
912 	strip_size = val;
913 
914 	val = spdk_conf_section_get_intval(conf_section, "NumDevices");
915 	if (val < 0) {
916 		return -EINVAL;
917 	}
918 	num_base_bdevs = val;
919 
920 	raid_level_str = spdk_conf_section_get_val(conf_section, "RaidLevel");
921 	if (raid_level_str == NULL) {
922 		SPDK_ERRLOG("Missing RaidLevel\n");
923 		return -EINVAL;
924 	}
925 	level = raid_bdev_parse_raid_level(raid_level_str);
926 	if (level == INVALID_RAID_LEVEL) {
927 		SPDK_ERRLOG("Invalid RaidLevel\n");
928 		return -EINVAL;
929 	}
930 
931 	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "%s %" PRIu32 " %u %u\n",
932 		      raid_name, strip_size, num_base_bdevs, level);
933 
934 	rc = raid_bdev_config_add(raid_name, strip_size, num_base_bdevs, level,
935 				  &raid_cfg);
936 	if (rc != 0) {
937 		SPDK_ERRLOG("Failed to add raid bdev config\n");
938 		return rc;
939 	}
940 
941 	for (i = 0; true; i++) {
942 		base_bdev_name = spdk_conf_section_get_nmval(conf_section, "Devices", 0, i);
943 		if (base_bdev_name == NULL) {
944 			break;
945 		}
946 		if (i >= num_base_bdevs) {
947 			raid_bdev_config_cleanup(raid_cfg);
948 			SPDK_ERRLOG("Number of devices mentioned is more than count\n");
949 			return -EINVAL;
950 		}
951 
952 		rc = raid_bdev_config_add_base_bdev(raid_cfg, base_bdev_name, i);
953 		if (rc != 0) {
954 			raid_bdev_config_cleanup(raid_cfg);
955 			SPDK_ERRLOG("Failed to add base bdev to raid bdev config\n");
956 			return rc;
957 		}
958 	}
959 
960 	if (i != raid_cfg->num_base_bdevs) {
961 		raid_bdev_config_cleanup(raid_cfg);
962 		SPDK_ERRLOG("Number of devices mentioned is less than count\n");
963 		return -EINVAL;
964 	}
965 
966 	rc = raid_bdev_create(raid_cfg);
967 	if (rc != 0) {
968 		raid_bdev_config_cleanup(raid_cfg);
969 		SPDK_ERRLOG("Failed to create raid bdev\n");
970 		return rc;
971 	}
972 
973 	rc = raid_bdev_add_base_devices(raid_cfg);
974 	if (rc != 0) {
975 		SPDK_ERRLOG("Failed to add any base bdev to raid bdev\n");
976 		/* Config is not removed in this case. */
977 	}
978 
979 	return 0;
980 }
981 
982 /*
983  * brief:
984  * raid_bdev_parse_config is used to find the raid bdev config section and parse it
985  * Format of config file:
986  * params:
987  * none
988  * returns:
989  * 0 - success
990  * non zero - failure
991  */
992 static int
993 raid_bdev_parse_config(void)
994 {
995 	int                      ret;
996 	struct spdk_conf_section *conf_section;
997 
998 	conf_section = spdk_conf_first_section(NULL);
999 	while (conf_section != NULL) {
1000 		if (spdk_conf_section_match_prefix(conf_section, "RAID")) {
1001 			ret = raid_bdev_parse_raid(conf_section);
1002 			if (ret < 0) {
1003 				SPDK_ERRLOG("Unable to parse raid bdev section\n");
1004 				return ret;
1005 			}
1006 		}
1007 		conf_section = spdk_conf_next_section(conf_section);
1008 	}
1009 
1010 	return 0;
1011 }
1012 
1013 /*
1014  * brief:
1015  * raid_bdev_fini_start is called when bdev layer is starting the
1016  * shutdown process
1017  * params:
1018  * none
1019  * returns:
1020  * none
1021  */
1022 static void
1023 raid_bdev_fini_start(void)
1024 {
1025 	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_fini_start\n");
1026 	g_shutdown_started = true;
1027 }
1028 
1029 /*
1030  * brief:
1031  * raid_bdev_exit is called on raid bdev module exit time by bdev layer
1032  * params:
1033  * none
1034  * returns:
1035  * none
1036  */
1037 static void
1038 raid_bdev_exit(void)
1039 {
1040 	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_exit\n");
1041 	raid_bdev_free();
1042 }
1043 
1044 /*
1045  * brief:
1046  * raid_bdev_get_ctx_size is used to return the context size of bdev_io for raid
1047  * module
1048  * params:
1049  * none
1050  * returns:
1051  * size of spdk_bdev_io context for raid
1052  */
1053 static int
1054 raid_bdev_get_ctx_size(void)
1055 {
1056 	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_get_ctx_size\n");
1057 	return sizeof(struct raid_bdev_io);
1058 }
1059 
1060 /*
1061  * brief:
1062  * raid_bdev_get_running_config is used to get the configuration options.
1063  *
1064  * params:
1065  * fp - The pointer to a file that will be written to the configuration options.
1066  * returns:
1067  * none
1068  */
1069 static void
1070 raid_bdev_get_running_config(FILE *fp)
1071 {
1072 	struct raid_bdev *raid_bdev;
1073 	struct spdk_bdev *base;
1074 	int index = 1;
1075 	uint8_t i;
1076 
1077 	TAILQ_FOREACH(raid_bdev, &g_raid_bdev_configured_list, state_link) {
1078 		fprintf(fp,
1079 			"\n"
1080 			"[RAID%d]\n"
1081 			"  Name %s\n"
1082 			"  StripSize %" PRIu32 "\n"
1083 			"  NumDevices %u\n"
1084 			"  RaidLevel %s\n",
1085 			index, raid_bdev->bdev.name, raid_bdev->strip_size_kb,
1086 			raid_bdev->num_base_bdevs,
1087 			raid_bdev_level_to_str(raid_bdev->level));
1088 		fprintf(fp,
1089 			"  Devices ");
1090 		for (i = 0; i < raid_bdev->num_base_bdevs; i++) {
1091 			base = raid_bdev->base_bdev_info[i].bdev;
1092 			if (base) {
1093 				fprintf(fp,
1094 					"%s ",
1095 					base->name);
1096 			}
1097 		}
1098 		fprintf(fp,
1099 			"\n");
1100 		index++;
1101 	}
1102 }
1103 
1104 /*
1105  * brief:
1106  * raid_bdev_can_claim_bdev is the function to check if this base_bdev can be
1107  * claimed by raid bdev or not.
1108  * params:
1109  * bdev_name - represents base bdev name
1110  * _raid_cfg - pointer to raid bdev config parsed from config file
1111  * base_bdev_slot - if bdev can be claimed, it represents the base_bdev correct
1112  * slot. This field is only valid if return value of this function is true
1113  * returns:
1114  * true - if bdev can be claimed
1115  * false - if bdev can't be claimed
1116  */
1117 static bool
1118 raid_bdev_can_claim_bdev(const char *bdev_name, struct raid_bdev_config **_raid_cfg,
1119 			 uint8_t *base_bdev_slot)
1120 {
1121 	struct raid_bdev_config *raid_cfg;
1122 	uint8_t i;
1123 
1124 	TAILQ_FOREACH(raid_cfg, &g_raid_config.raid_bdev_config_head, link) {
1125 		for (i = 0; i < raid_cfg->num_base_bdevs; i++) {
1126 			/*
1127 			 * Check if the base bdev name is part of raid bdev configuration.
1128 			 * If match is found then return true and the slot information where
1129 			 * this base bdev should be inserted in raid bdev
1130 			 */
1131 			if (!strcmp(bdev_name, raid_cfg->base_bdev[i].name)) {
1132 				*_raid_cfg = raid_cfg;
1133 				*base_bdev_slot = i;
1134 				return true;
1135 			}
1136 		}
1137 	}
1138 
1139 	return false;
1140 }
1141 
1142 
1143 static struct spdk_bdev_module g_raid_if = {
1144 	.name = "raid",
1145 	.module_init = raid_bdev_init,
1146 	.fini_start = raid_bdev_fini_start,
1147 	.module_fini = raid_bdev_exit,
1148 	.get_ctx_size = raid_bdev_get_ctx_size,
1149 	.examine_config = raid_bdev_examine,
1150 	.config_text = raid_bdev_get_running_config,
1151 	.async_init = false,
1152 	.async_fini = false,
1153 };
1154 SPDK_BDEV_MODULE_REGISTER(raid, &g_raid_if)
1155 
1156 /*
1157  * brief:
1158  * raid_bdev_init is the initialization function for raid bdev module
1159  * params:
1160  * none
1161  * returns:
1162  * 0 - success
1163  * non zero - failure
1164  */
1165 static int
1166 raid_bdev_init(void)
1167 {
1168 	int ret;
1169 
1170 	/* Parse config file for raids */
1171 	ret = raid_bdev_parse_config();
1172 	if (ret < 0) {
1173 		SPDK_ERRLOG("raid bdev init failed parsing\n");
1174 		raid_bdev_free();
1175 		return ret;
1176 	}
1177 
1178 	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_init completed successfully\n");
1179 
1180 	return 0;
1181 }
1182 
1183 /*
1184  * brief:
1185  * raid_bdev_create allocates raid bdev based on passed configuration
1186  * params:
1187  * raid_cfg - configuration of raid bdev
1188  * returns:
1189  * 0 - success
1190  * non zero - failure
1191  */
1192 int
1193 raid_bdev_create(struct raid_bdev_config *raid_cfg)
1194 {
1195 	struct raid_bdev *raid_bdev;
1196 	struct spdk_bdev *raid_bdev_gen;
1197 
1198 	raid_bdev = calloc(1, sizeof(*raid_bdev));
1199 	if (!raid_bdev) {
1200 		SPDK_ERRLOG("Unable to allocate memory for raid bdev\n");
1201 		return -ENOMEM;
1202 	}
1203 
1204 	assert(raid_cfg->num_base_bdevs != 0);
1205 	raid_bdev->num_base_bdevs = raid_cfg->num_base_bdevs;
1206 	raid_bdev->base_bdev_info = calloc(raid_bdev->num_base_bdevs,
1207 					   sizeof(struct raid_base_bdev_info));
1208 	if (!raid_bdev->base_bdev_info) {
1209 		SPDK_ERRLOG("Unable able to allocate base bdev info\n");
1210 		free(raid_bdev);
1211 		return -ENOMEM;
1212 	}
1213 
1214 	/* strip_size_kb is from the rpc param.  strip_size is in blocks and used
1215 	 * intnerally and set later.
1216 	 */
1217 	raid_bdev->strip_size = 0;
1218 	raid_bdev->strip_size_kb = raid_cfg->strip_size;
1219 	raid_bdev->state = RAID_BDEV_STATE_CONFIGURING;
1220 	raid_bdev->config = raid_cfg;
1221 	raid_bdev->level = raid_cfg->level;
1222 
1223 	switch (raid_bdev->level) {
1224 	case RAID0:
1225 		break;
1226 	default:
1227 		SPDK_ERRLOG("invalid raid level %u\n", raid_bdev->level);
1228 		free(raid_bdev);
1229 		return -EINVAL;
1230 	}
1231 
1232 	raid_bdev_gen = &raid_bdev->bdev;
1233 
1234 	raid_bdev_gen->name = strdup(raid_cfg->name);
1235 	if (!raid_bdev_gen->name) {
1236 		SPDK_ERRLOG("Unable to allocate name for raid\n");
1237 		free(raid_bdev->base_bdev_info);
1238 		free(raid_bdev);
1239 		return -ENOMEM;
1240 	}
1241 
1242 	raid_bdev_gen->product_name = "Raid Volume";
1243 	raid_bdev_gen->ctxt = raid_bdev;
1244 	raid_bdev_gen->fn_table = &g_raid_bdev_fn_table;
1245 	raid_bdev_gen->module = &g_raid_if;
1246 	raid_bdev_gen->write_cache = 0;
1247 
1248 	TAILQ_INSERT_TAIL(&g_raid_bdev_configuring_list, raid_bdev, state_link);
1249 	TAILQ_INSERT_TAIL(&g_raid_bdev_list, raid_bdev, global_link);
1250 
1251 	raid_cfg->raid_bdev = raid_bdev;
1252 
1253 	return 0;
1254 }
1255 
1256 /*
1257  * brief
1258  * raid_bdev_alloc_base_bdev_resource allocates resource of base bdev.
1259  * params:
1260  * raid_bdev - pointer to raid bdev
1261  * bdev - pointer to base bdev
1262  * base_bdev_slot - position to add base bdev
1263  * returns:
1264  * 0 - success
1265  * non zero - failure
1266  */
1267 static int
1268 raid_bdev_alloc_base_bdev_resource(struct raid_bdev *raid_bdev, struct spdk_bdev *bdev,
1269 				   uint8_t base_bdev_slot)
1270 {
1271 	struct spdk_bdev_desc *desc;
1272 	int rc;
1273 
1274 	rc = spdk_bdev_open(bdev, true, raid_bdev_remove_base_bdev, bdev, &desc);
1275 	if (rc != 0) {
1276 		SPDK_ERRLOG("Unable to create desc on bdev '%s'\n", bdev->name);
1277 		return rc;
1278 	}
1279 
1280 	rc = spdk_bdev_module_claim_bdev(bdev, NULL, &g_raid_if);
1281 	if (rc != 0) {
1282 		SPDK_ERRLOG("Unable to claim this bdev as it is already claimed\n");
1283 		spdk_bdev_close(desc);
1284 		return rc;
1285 	}
1286 
1287 	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "bdev %s is claimed\n", bdev->name);
1288 
1289 	assert(raid_bdev->state != RAID_BDEV_STATE_ONLINE);
1290 	assert(base_bdev_slot < raid_bdev->num_base_bdevs);
1291 
1292 	raid_bdev->base_bdev_info[base_bdev_slot].bdev = bdev;
1293 	raid_bdev->base_bdev_info[base_bdev_slot].desc = desc;
1294 	raid_bdev->num_base_bdevs_discovered++;
1295 	assert(raid_bdev->num_base_bdevs_discovered <= raid_bdev->num_base_bdevs);
1296 
1297 	return 0;
1298 }
1299 
1300 /*
1301  * brief:
1302  * If raid bdev config is complete, then only register the raid bdev to
1303  * bdev layer and remove this raid bdev from configuring list and
1304  * insert the raid bdev to configured list
1305  * params:
1306  * raid_bdev - pointer to raid bdev
1307  * returns:
1308  * 0 - success
1309  * non zero - failure
1310  */
1311 static int
1312 raid_bdev_configure(struct raid_bdev *raid_bdev)
1313 {
1314 	uint32_t		blocklen;
1315 	uint64_t		min_blockcnt;
1316 	struct spdk_bdev	*raid_bdev_gen;
1317 	int rc = 0;
1318 
1319 	blocklen = raid_bdev->base_bdev_info[0].bdev->blocklen;
1320 	min_blockcnt = raid_bdev->base_bdev_info[0].bdev->blockcnt;
1321 	for (uint8_t i = 1; i < raid_bdev->num_base_bdevs; i++) {
1322 		/* Calculate minimum block count from all base bdevs */
1323 		if (raid_bdev->base_bdev_info[i].bdev->blockcnt < min_blockcnt) {
1324 			min_blockcnt = raid_bdev->base_bdev_info[i].bdev->blockcnt;
1325 		}
1326 
1327 		/* Check blocklen for all base bdevs that it should be same */
1328 		if (blocklen != raid_bdev->base_bdev_info[i].bdev->blocklen) {
1329 			/*
1330 			 * Assumption is that all the base bdevs for any raid bdev should
1331 			 * have same blocklen
1332 			 */
1333 			SPDK_ERRLOG("Blocklen of various bdevs not matching\n");
1334 			return -EINVAL;
1335 		}
1336 	}
1337 
1338 	/* The strip_size_kb is read in from user in KB. Convert to blocks here for
1339 	 * internal use.
1340 	 */
1341 	raid_bdev->strip_size = (raid_bdev->strip_size_kb * 1024) / blocklen;
1342 	raid_bdev->strip_size_shift = spdk_u32log2(raid_bdev->strip_size);
1343 	raid_bdev->blocklen_shift = spdk_u32log2(blocklen);
1344 
1345 	raid_bdev_gen = &raid_bdev->bdev;
1346 	raid_bdev_gen->blocklen = blocklen;
1347 	if (raid_bdev->num_base_bdevs > 1) {
1348 		raid_bdev_gen->optimal_io_boundary = raid_bdev->strip_size;
1349 		raid_bdev_gen->split_on_optimal_io_boundary = true;
1350 	} else {
1351 		/* Do not need to split reads/writes on single bdev RAID modules. */
1352 		raid_bdev_gen->optimal_io_boundary = 0;
1353 		raid_bdev_gen->split_on_optimal_io_boundary = false;
1354 	}
1355 
1356 	/*
1357 	 * RAID bdev logic is for striping so take the minimum block count based
1358 	 * approach where total block count of raid bdev is the number of base
1359 	 * bdev times the minimum block count of any base bdev
1360 	 */
1361 	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "min blockcount %lu,  numbasedev %u, strip size shift %u\n",
1362 		      min_blockcnt,
1363 		      raid_bdev->num_base_bdevs, raid_bdev->strip_size_shift);
1364 	raid_bdev_gen->blockcnt = ((min_blockcnt >> raid_bdev->strip_size_shift) <<
1365 				   raid_bdev->strip_size_shift)  * raid_bdev->num_base_bdevs;
1366 	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "io device register %p\n", raid_bdev);
1367 	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "blockcnt %lu, blocklen %u\n", raid_bdev_gen->blockcnt,
1368 		      raid_bdev_gen->blocklen);
1369 	if (raid_bdev->state == RAID_BDEV_STATE_CONFIGURING) {
1370 		raid_bdev->state = RAID_BDEV_STATE_ONLINE;
1371 		spdk_io_device_register(raid_bdev, raid_bdev_create_cb, raid_bdev_destroy_cb,
1372 					sizeof(struct raid_bdev_io_channel),
1373 					raid_bdev->bdev.name);
1374 		rc = spdk_bdev_register(raid_bdev_gen);
1375 		if (rc != 0) {
1376 			SPDK_ERRLOG("Unable to register raid bdev and stay at configuring state\n");
1377 			spdk_io_device_unregister(raid_bdev, NULL);
1378 			raid_bdev->state = RAID_BDEV_STATE_CONFIGURING;
1379 			return rc;
1380 		}
1381 		SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid bdev generic %p\n", raid_bdev_gen);
1382 		TAILQ_REMOVE(&g_raid_bdev_configuring_list, raid_bdev, state_link);
1383 		TAILQ_INSERT_TAIL(&g_raid_bdev_configured_list, raid_bdev, state_link);
1384 		SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid bdev is created with name %s, raid_bdev %p\n",
1385 			      raid_bdev_gen->name, raid_bdev);
1386 	}
1387 
1388 	return 0;
1389 }
1390 
1391 /*
1392  * brief:
1393  * If raid bdev is online and registered, change the bdev state to
1394  * configuring and unregister this raid device. Queue this raid device
1395  * in configuring list
1396  * params:
1397  * raid_bdev - pointer to raid bdev
1398  * cb_fn - callback function
1399  * cb_arg - argument to callback function
1400  * returns:
1401  * none
1402  */
1403 static void
1404 raid_bdev_deconfigure(struct raid_bdev *raid_bdev, raid_bdev_destruct_cb cb_fn,
1405 		      void *cb_arg)
1406 {
1407 	if (raid_bdev->state != RAID_BDEV_STATE_ONLINE) {
1408 		if (cb_fn) {
1409 			cb_fn(cb_arg, 0);
1410 		}
1411 		return;
1412 	}
1413 
1414 	assert(raid_bdev->num_base_bdevs == raid_bdev->num_base_bdevs_discovered);
1415 	TAILQ_REMOVE(&g_raid_bdev_configured_list, raid_bdev, state_link);
1416 	raid_bdev->state = RAID_BDEV_STATE_OFFLINE;
1417 	assert(raid_bdev->num_base_bdevs_discovered);
1418 	TAILQ_INSERT_TAIL(&g_raid_bdev_offline_list, raid_bdev, state_link);
1419 	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid bdev state chaning from online to offline\n");
1420 
1421 	spdk_bdev_unregister(&raid_bdev->bdev, cb_fn, cb_arg);
1422 }
1423 
1424 /*
1425  * brief:
1426  * raid_bdev_find_by_base_bdev function finds the raid bdev which has
1427  *  claimed the base bdev.
1428  * params:
1429  * base_bdev - pointer to base bdev pointer
1430  * _raid_bdev - Referenct to pointer to raid bdev
1431  * _base_bdev_slot - Reference to the slot of the base bdev.
1432  * returns:
1433  * true - if the raid bdev is found.
1434  * false - if the raid bdev is not found.
1435  */
1436 static bool
1437 raid_bdev_find_by_base_bdev(struct spdk_bdev *base_bdev, struct raid_bdev **_raid_bdev,
1438 			    uint8_t *_base_bdev_slot)
1439 {
1440 	struct raid_bdev	*raid_bdev;
1441 	uint8_t			i;
1442 
1443 	TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) {
1444 		for (i = 0; i < raid_bdev->num_base_bdevs; i++) {
1445 			if (raid_bdev->base_bdev_info[i].bdev == base_bdev) {
1446 				*_raid_bdev = raid_bdev;
1447 				*_base_bdev_slot = i;
1448 				return true;
1449 			}
1450 		}
1451 	}
1452 
1453 	return false;
1454 }
1455 
1456 /*
1457  * brief:
1458  * raid_bdev_remove_base_bdev function is called by below layers when base_bdev
1459  * is removed. This function checks if this base bdev is part of any raid bdev
1460  * or not. If yes, it takes necessary action on that particular raid bdev.
1461  * params:
1462  * ctx - pointer to base bdev pointer which got removed
1463  * returns:
1464  * none
1465  */
1466 static void
1467 raid_bdev_remove_base_bdev(void *ctx)
1468 {
1469 	struct spdk_bdev	*base_bdev = ctx;
1470 	struct raid_bdev	*raid_bdev = NULL;
1471 	uint8_t			base_bdev_slot = 0;
1472 
1473 	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_remove_base_bdev\n");
1474 
1475 	/* Find the raid_bdev which has claimed this base_bdev */
1476 	if (!raid_bdev_find_by_base_bdev(base_bdev, &raid_bdev, &base_bdev_slot)) {
1477 		SPDK_ERRLOG("bdev to remove '%s' not found\n", base_bdev->name);
1478 		return;
1479 	}
1480 
1481 	assert(raid_bdev->base_bdev_info[base_bdev_slot].desc);
1482 	raid_bdev->base_bdev_info[base_bdev_slot].remove_scheduled = true;
1483 
1484 	if (raid_bdev->destruct_called == true ||
1485 	    raid_bdev->state == RAID_BDEV_STATE_CONFIGURING) {
1486 		/*
1487 		 * As raid bdev is not registered yet or already unregistered,
1488 		 * so cleanup should be done here itself.
1489 		 */
1490 		raid_bdev_free_base_bdev_resource(raid_bdev, base_bdev_slot);
1491 		if (raid_bdev->num_base_bdevs_discovered == 0) {
1492 			/* There is no base bdev for this raid, so free the raid device. */
1493 			raid_bdev_cleanup(raid_bdev);
1494 			return;
1495 		}
1496 	}
1497 
1498 	raid_bdev_deconfigure(raid_bdev, NULL, NULL);
1499 }
1500 
1501 /*
1502  * brief:
1503  * Remove base bdevs from the raid bdev one by one.  Skip any base bdev which
1504  *  doesn't exist.
1505  * params:
1506  * raid_cfg - pointer to raid bdev config.
1507  * cb_fn - callback function
1508  * cb_ctx - argument to callback function
1509  */
1510 void
1511 raid_bdev_remove_base_devices(struct raid_bdev_config *raid_cfg,
1512 			      raid_bdev_destruct_cb cb_fn, void *cb_arg)
1513 {
1514 	struct raid_bdev		*raid_bdev;
1515 	struct raid_base_bdev_info	*info;
1516 	uint8_t				i;
1517 
1518 	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_remove_base_devices\n");
1519 
1520 	raid_bdev = raid_cfg->raid_bdev;
1521 	if (raid_bdev == NULL) {
1522 		SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid bdev %s doesn't exist now\n", raid_cfg->name);
1523 		if (cb_fn) {
1524 			cb_fn(cb_arg, 0);
1525 		}
1526 		return;
1527 	}
1528 
1529 	if (raid_bdev->destroy_started) {
1530 		SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "destroying raid bdev %s is already started\n",
1531 			      raid_cfg->name);
1532 		if (cb_fn) {
1533 			cb_fn(cb_arg, -EALREADY);
1534 		}
1535 		return;
1536 	}
1537 
1538 	raid_bdev->destroy_started = true;
1539 
1540 	for (i = 0; i < raid_bdev->num_base_bdevs; i++) {
1541 		info = &raid_bdev->base_bdev_info[i];
1542 
1543 		if (info->bdev == NULL) {
1544 			continue;
1545 		}
1546 
1547 		assert(info->desc);
1548 		info->remove_scheduled = true;
1549 
1550 		if (raid_bdev->destruct_called == true ||
1551 		    raid_bdev->state == RAID_BDEV_STATE_CONFIGURING) {
1552 			/*
1553 			 * As raid bdev is not registered yet or already unregistered,
1554 			 * so cleanup should be done here itself.
1555 			 */
1556 			raid_bdev_free_base_bdev_resource(raid_bdev, i);
1557 			if (raid_bdev->num_base_bdevs_discovered == 0) {
1558 				/* There is no base bdev for this raid, so free the raid device. */
1559 				raid_bdev_cleanup(raid_bdev);
1560 				if (cb_fn) {
1561 					cb_fn(cb_arg, 0);
1562 				}
1563 				return;
1564 			}
1565 		}
1566 	}
1567 
1568 	raid_bdev_deconfigure(raid_bdev, cb_fn, cb_arg);
1569 }
1570 
1571 /*
1572  * brief:
1573  * raid_bdev_add_base_device function is the actual function which either adds
1574  * the nvme base device to existing raid bdev or create a new raid bdev. It also claims
1575  * the base device and keep the open descriptor.
1576  * params:
1577  * raid_cfg - pointer to raid bdev config
1578  * bdev - pointer to base bdev
1579  * base_bdev_slot - position to add base bdev
1580  * returns:
1581  * 0 - success
1582  * non zero - failure
1583  */
1584 static int
1585 raid_bdev_add_base_device(struct raid_bdev_config *raid_cfg, struct spdk_bdev *bdev,
1586 			  uint8_t base_bdev_slot)
1587 {
1588 	struct raid_bdev	*raid_bdev;
1589 	int			rc;
1590 
1591 	raid_bdev = raid_cfg->raid_bdev;
1592 	if (!raid_bdev) {
1593 		SPDK_ERRLOG("Raid bdev '%s' is not created yet\n", raid_cfg->name);
1594 		return -ENODEV;
1595 	}
1596 
1597 	rc = raid_bdev_alloc_base_bdev_resource(raid_bdev, bdev, base_bdev_slot);
1598 	if (rc != 0) {
1599 		SPDK_ERRLOG("Failed to allocate resource for bdev '%s'\n", bdev->name);
1600 		return rc;
1601 	}
1602 
1603 	assert(raid_bdev->num_base_bdevs_discovered <= raid_bdev->num_base_bdevs);
1604 
1605 	if (raid_bdev->num_base_bdevs_discovered == raid_bdev->num_base_bdevs) {
1606 		rc = raid_bdev_configure(raid_bdev);
1607 		if (rc != 0) {
1608 			SPDK_ERRLOG("Failed to configure raid bdev\n");
1609 			return rc;
1610 		}
1611 	}
1612 
1613 	return 0;
1614 }
1615 
1616 /*
1617  * brief:
1618  * Add base bdevs to the raid bdev one by one.  Skip any base bdev which doesn't
1619  *  exist or fails to add. If all base bdevs are successfully added, the raid bdev
1620  *  moves to the configured state and becomes available. Otherwise, the raid bdev
1621  *  stays at the configuring state with added base bdevs.
1622  * params:
1623  * raid_cfg - pointer to raid bdev config
1624  * returns:
1625  * 0 - The raid bdev moves to the configured state or stays at the configuring
1626  *     state with added base bdevs due to any nonexistent base bdev.
1627  * non zero - Failed to add any base bdev and stays at the configuring state with
1628  *            added base bdevs.
1629  */
1630 int
1631 raid_bdev_add_base_devices(struct raid_bdev_config *raid_cfg)
1632 {
1633 	struct spdk_bdev	*base_bdev;
1634 	uint8_t			i;
1635 	int			rc = 0, _rc;
1636 
1637 	for (i = 0; i < raid_cfg->num_base_bdevs; i++) {
1638 		base_bdev = spdk_bdev_get_by_name(raid_cfg->base_bdev[i].name);
1639 		if (base_bdev == NULL) {
1640 			SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "base bdev %s doesn't exist now\n",
1641 				      raid_cfg->base_bdev[i].name);
1642 			continue;
1643 		}
1644 
1645 		_rc = raid_bdev_add_base_device(raid_cfg, base_bdev, i);
1646 		if (_rc != 0) {
1647 			SPDK_ERRLOG("Failed to add base bdev %s to RAID bdev %s: %s\n",
1648 				    raid_cfg->base_bdev[i].name, raid_cfg->name,
1649 				    spdk_strerror(-_rc));
1650 			if (rc == 0) {
1651 				rc = _rc;
1652 			}
1653 		}
1654 	}
1655 
1656 	return rc;
1657 }
1658 
1659 /*
1660  * brief:
1661  * raid_bdev_examine function is the examine function call by the below layers
1662  * like bdev_nvme layer. This function will check if this base bdev can be
1663  * claimed by this raid bdev or not.
1664  * params:
1665  * bdev - pointer to base bdev
1666  * returns:
1667  * none
1668  */
1669 static void
1670 raid_bdev_examine(struct spdk_bdev *bdev)
1671 {
1672 	struct raid_bdev_config	*raid_cfg;
1673 	uint8_t			base_bdev_slot;
1674 
1675 	if (raid_bdev_can_claim_bdev(bdev->name, &raid_cfg, &base_bdev_slot)) {
1676 		raid_bdev_add_base_device(raid_cfg, bdev, base_bdev_slot);
1677 	} else {
1678 		SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "bdev %s can't be claimed\n",
1679 			      bdev->name);
1680 	}
1681 
1682 	spdk_bdev_module_examine_done(&g_raid_if);
1683 }
1684 
1685 /* Log component for bdev raid bdev module */
1686 SPDK_LOG_REGISTER_COMPONENT("bdev_raid", SPDK_LOG_BDEV_RAID)
1687