xref: /spdk/module/bdev/raid/bdev_raid.c (revision 927f1fd57bd004df581518466ec4c1b8083e5d23)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *   Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
7  *
8  *   Redistribution and use in source and binary forms, with or without
9  *   modification, are permitted provided that the following conditions
10  *   are met:
11  *
12  *     * Redistributions of source code must retain the above copyright
13  *       notice, this list of conditions and the following disclaimer.
14  *     * Redistributions in binary form must reproduce the above copyright
15  *       notice, this list of conditions and the following disclaimer in
16  *       the documentation and/or other materials provided with the
17  *       distribution.
18  *     * Neither the name of Intel Corporation nor the names of its
19  *       contributors may be used to endorse or promote products derived
20  *       from this software without specific prior written permission.
21  *
22  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 #include "bdev_raid.h"
36 #include "spdk/env.h"
37 #include "spdk/thread.h"
38 #include "spdk/log.h"
39 #include "spdk/string.h"
40 #include "spdk/util.h"
41 #include "spdk/json.h"
42 #include "spdk/string.h"
43 
44 static bool g_shutdown_started = false;
45 
46 /* raid bdev config as read from config file */
47 struct raid_config	g_raid_config = {
48 	.raid_bdev_config_head = TAILQ_HEAD_INITIALIZER(g_raid_config.raid_bdev_config_head),
49 };
50 
51 /*
52  * List of raid bdev in configured list, these raid bdevs are registered with
53  * bdev layer
54  */
55 struct raid_configured_tailq	g_raid_bdev_configured_list = TAILQ_HEAD_INITIALIZER(
56 			g_raid_bdev_configured_list);
57 
58 /* List of raid bdev in configuring list */
59 struct raid_configuring_tailq	g_raid_bdev_configuring_list = TAILQ_HEAD_INITIALIZER(
60 			g_raid_bdev_configuring_list);
61 
62 /* List of all raid bdevs */
63 struct raid_all_tailq		g_raid_bdev_list = TAILQ_HEAD_INITIALIZER(g_raid_bdev_list);
64 
65 /* List of all raid bdevs that are offline */
66 struct raid_offline_tailq	g_raid_bdev_offline_list = TAILQ_HEAD_INITIALIZER(
67 			g_raid_bdev_offline_list);
68 
69 static TAILQ_HEAD(, raid_bdev_module) g_raid_modules = TAILQ_HEAD_INITIALIZER(g_raid_modules);
70 
71 static struct raid_bdev_module *raid_bdev_module_find(enum raid_level level)
72 {
73 	struct raid_bdev_module *raid_module;
74 
75 	TAILQ_FOREACH(raid_module, &g_raid_modules, link) {
76 		if (raid_module->level == level) {
77 			return raid_module;
78 		}
79 	}
80 
81 	return NULL;
82 }
83 
84 void raid_bdev_module_list_add(struct raid_bdev_module *raid_module)
85 {
86 	if (raid_bdev_module_find(raid_module->level) != NULL) {
87 		SPDK_ERRLOG("module for raid level '%s' already registered.\n",
88 			    raid_bdev_level_to_str(raid_module->level));
89 		assert(false);
90 	} else {
91 		TAILQ_INSERT_TAIL(&g_raid_modules, raid_module, link);
92 	}
93 }
94 
95 /* Function declarations */
96 static void	raid_bdev_examine(struct spdk_bdev *bdev);
97 static int	raid_bdev_init(void);
98 static void	raid_bdev_deconfigure(struct raid_bdev *raid_bdev,
99 				      raid_bdev_destruct_cb cb_fn, void *cb_arg);
100 static void	raid_bdev_event_base_bdev(enum spdk_bdev_event_type type, struct spdk_bdev *bdev,
101 		void *event_ctx);
102 
103 /*
104  * brief:
105  * raid_bdev_create_cb function is a cb function for raid bdev which creates the
106  * hierarchy from raid bdev to base bdev io channels. It will be called per core
107  * params:
108  * io_device - pointer to raid bdev io device represented by raid_bdev
109  * ctx_buf - pointer to context buffer for raid bdev io channel
110  * returns:
111  * 0 - success
112  * non zero - failure
113  */
114 static int
115 raid_bdev_create_cb(void *io_device, void *ctx_buf)
116 {
117 	struct raid_bdev            *raid_bdev = io_device;
118 	struct raid_bdev_io_channel *raid_ch = ctx_buf;
119 	uint8_t i;
120 
121 	SPDK_DEBUGLOG(bdev_raid, "raid_bdev_create_cb, %p\n", raid_ch);
122 
123 	assert(raid_bdev != NULL);
124 	assert(raid_bdev->state == RAID_BDEV_STATE_ONLINE);
125 
126 	raid_ch->num_channels = raid_bdev->num_base_bdevs;
127 
128 	raid_ch->base_channel = calloc(raid_ch->num_channels,
129 				       sizeof(struct spdk_io_channel *));
130 	if (!raid_ch->base_channel) {
131 		SPDK_ERRLOG("Unable to allocate base bdevs io channel\n");
132 		return -ENOMEM;
133 	}
134 	for (i = 0; i < raid_ch->num_channels; i++) {
135 		/*
136 		 * Get the spdk_io_channel for all the base bdevs. This is used during
137 		 * split logic to send the respective child bdev ios to respective base
138 		 * bdev io channel.
139 		 */
140 		raid_ch->base_channel[i] = spdk_bdev_get_io_channel(
141 						   raid_bdev->base_bdev_info[i].desc);
142 		if (!raid_ch->base_channel[i]) {
143 			uint8_t j;
144 
145 			for (j = 0; j < i; j++) {
146 				spdk_put_io_channel(raid_ch->base_channel[j]);
147 			}
148 			free(raid_ch->base_channel);
149 			raid_ch->base_channel = NULL;
150 			SPDK_ERRLOG("Unable to create io channel for base bdev\n");
151 			return -ENOMEM;
152 		}
153 	}
154 
155 	return 0;
156 }
157 
158 /*
159  * brief:
160  * raid_bdev_destroy_cb function is a cb function for raid bdev which deletes the
161  * hierarchy from raid bdev to base bdev io channels. It will be called per core
162  * params:
163  * io_device - pointer to raid bdev io device represented by raid_bdev
164  * ctx_buf - pointer to context buffer for raid bdev io channel
165  * returns:
166  * none
167  */
168 static void
169 raid_bdev_destroy_cb(void *io_device, void *ctx_buf)
170 {
171 	struct raid_bdev_io_channel *raid_ch = ctx_buf;
172 	uint8_t i;
173 
174 	SPDK_DEBUGLOG(bdev_raid, "raid_bdev_destroy_cb\n");
175 
176 	assert(raid_ch != NULL);
177 	assert(raid_ch->base_channel);
178 	for (i = 0; i < raid_ch->num_channels; i++) {
179 		/* Free base bdev channels */
180 		assert(raid_ch->base_channel[i] != NULL);
181 		spdk_put_io_channel(raid_ch->base_channel[i]);
182 	}
183 	free(raid_ch->base_channel);
184 	raid_ch->base_channel = NULL;
185 }
186 
187 /*
188  * brief:
189  * raid_bdev_cleanup is used to cleanup and free raid_bdev related data
190  * structures.
191  * params:
192  * raid_bdev - pointer to raid_bdev
193  * returns:
194  * none
195  */
196 static void
197 raid_bdev_cleanup(struct raid_bdev *raid_bdev)
198 {
199 	SPDK_DEBUGLOG(bdev_raid, "raid_bdev_cleanup, %p name %s, state %u, config %p\n",
200 		      raid_bdev,
201 		      raid_bdev->bdev.name, raid_bdev->state, raid_bdev->config);
202 	if (raid_bdev->state == RAID_BDEV_STATE_CONFIGURING) {
203 		TAILQ_REMOVE(&g_raid_bdev_configuring_list, raid_bdev, state_link);
204 	} else if (raid_bdev->state == RAID_BDEV_STATE_OFFLINE) {
205 		TAILQ_REMOVE(&g_raid_bdev_offline_list, raid_bdev, state_link);
206 	} else {
207 		assert(0);
208 	}
209 	TAILQ_REMOVE(&g_raid_bdev_list, raid_bdev, global_link);
210 	free(raid_bdev->bdev.name);
211 	free(raid_bdev->base_bdev_info);
212 	if (raid_bdev->config) {
213 		raid_bdev->config->raid_bdev = NULL;
214 	}
215 	free(raid_bdev);
216 }
217 
218 /*
219  * brief:
220  * wrapper for the bdev close operation
221  * params:
222  * base_info - raid base bdev info
223  * returns:
224  */
225 static void
226 _raid_bdev_free_base_bdev_resource(void *ctx)
227 {
228 	struct spdk_bdev_desc *desc = ctx;
229 
230 	spdk_bdev_close(desc);
231 }
232 
233 
234 /*
235  * brief:
236  * free resource of base bdev for raid bdev
237  * params:
238  * raid_bdev - pointer to raid bdev
239  * base_info - raid base bdev info
240  * returns:
241  * 0 - success
242  * non zero - failure
243  */
244 static void
245 raid_bdev_free_base_bdev_resource(struct raid_bdev *raid_bdev,
246 				  struct raid_base_bdev_info *base_info)
247 {
248 	spdk_bdev_module_release_bdev(base_info->bdev);
249 	if (base_info->thread && base_info->thread != spdk_get_thread()) {
250 		spdk_thread_send_msg(base_info->thread, _raid_bdev_free_base_bdev_resource, base_info->desc);
251 	} else {
252 		spdk_bdev_close(base_info->desc);
253 	}
254 	base_info->desc = NULL;
255 	base_info->bdev = NULL;
256 
257 	assert(raid_bdev->num_base_bdevs_discovered);
258 	raid_bdev->num_base_bdevs_discovered--;
259 }
260 
261 /*
262  * brief:
263  * raid_bdev_destruct is the destruct function table pointer for raid bdev
264  * params:
265  * ctxt - pointer to raid_bdev
266  * returns:
267  * 0 - success
268  * non zero - failure
269  */
270 static int
271 raid_bdev_destruct(void *ctxt)
272 {
273 	struct raid_bdev *raid_bdev = ctxt;
274 	struct raid_base_bdev_info *base_info;
275 
276 	SPDK_DEBUGLOG(bdev_raid, "raid_bdev_destruct\n");
277 
278 	raid_bdev->destruct_called = true;
279 	RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
280 		/*
281 		 * Close all base bdev descriptors for which call has come from below
282 		 * layers.  Also close the descriptors if we have started shutdown.
283 		 */
284 		if (g_shutdown_started ||
285 		    ((base_info->remove_scheduled == true) &&
286 		     (base_info->bdev != NULL))) {
287 			raid_bdev_free_base_bdev_resource(raid_bdev, base_info);
288 		}
289 	}
290 
291 	if (g_shutdown_started) {
292 		TAILQ_REMOVE(&g_raid_bdev_configured_list, raid_bdev, state_link);
293 		raid_bdev->state = RAID_BDEV_STATE_OFFLINE;
294 		TAILQ_INSERT_TAIL(&g_raid_bdev_offline_list, raid_bdev, state_link);
295 	}
296 
297 	if (raid_bdev->module->stop != NULL) {
298 		raid_bdev->module->stop(raid_bdev);
299 	}
300 
301 	spdk_io_device_unregister(raid_bdev, NULL);
302 
303 	if (raid_bdev->num_base_bdevs_discovered == 0) {
304 		/* Free raid_bdev when there are no base bdevs left */
305 		SPDK_DEBUGLOG(bdev_raid, "raid bdev base bdevs is 0, going to free all in destruct\n");
306 		raid_bdev_cleanup(raid_bdev);
307 	}
308 
309 	return 0;
310 }
311 
312 void
313 raid_bdev_io_complete(struct raid_bdev_io *raid_io, enum spdk_bdev_io_status status)
314 {
315 	struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io);
316 
317 	spdk_bdev_io_complete(bdev_io, status);
318 }
319 
320 /*
321  * brief:
322  * raid_bdev_io_complete_part - signal the completion of a part of the expected
323  * base bdev IOs and complete the raid_io if this is the final expected IO.
324  * The caller should first set raid_io->base_bdev_io_remaining. This function
325  * will decrement this counter by the value of the 'completed' parameter and
326  * complete the raid_io if the counter reaches 0. The caller is free to
327  * interpret the 'base_bdev_io_remaining' and 'completed' values as needed,
328  * it can represent e.g. blocks or IOs.
329  * params:
330  * raid_io - pointer to raid_bdev_io
331  * completed - the part of the raid_io that has been completed
332  * status - status of the base IO
333  * returns:
334  * true - if the raid_io is completed
335  * false - otherwise
336  */
337 bool
338 raid_bdev_io_complete_part(struct raid_bdev_io *raid_io, uint64_t completed,
339 			   enum spdk_bdev_io_status status)
340 {
341 	assert(raid_io->base_bdev_io_remaining >= completed);
342 	raid_io->base_bdev_io_remaining -= completed;
343 
344 	if (status != SPDK_BDEV_IO_STATUS_SUCCESS) {
345 		raid_io->base_bdev_io_status = status;
346 	}
347 
348 	if (raid_io->base_bdev_io_remaining == 0) {
349 		raid_bdev_io_complete(raid_io, raid_io->base_bdev_io_status);
350 		return true;
351 	} else {
352 		return false;
353 	}
354 }
355 
356 /*
357  * brief:
358  * raid_bdev_queue_io_wait function processes the IO which failed to submit.
359  * It will try to queue the IOs after storing the context to bdev wait queue logic.
360  * params:
361  * raid_io - pointer to raid_bdev_io
362  * bdev - the block device that the IO is submitted to
363  * ch - io channel
364  * cb_fn - callback when the spdk_bdev_io for bdev becomes available
365  * returns:
366  * none
367  */
368 void
369 raid_bdev_queue_io_wait(struct raid_bdev_io *raid_io, struct spdk_bdev *bdev,
370 			struct spdk_io_channel *ch, spdk_bdev_io_wait_cb cb_fn)
371 {
372 	raid_io->waitq_entry.bdev = bdev;
373 	raid_io->waitq_entry.cb_fn = cb_fn;
374 	raid_io->waitq_entry.cb_arg = raid_io;
375 	spdk_bdev_queue_io_wait(bdev, ch, &raid_io->waitq_entry);
376 }
377 
378 static void
379 raid_base_bdev_reset_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
380 {
381 	struct raid_bdev_io *raid_io = cb_arg;
382 
383 	spdk_bdev_free_io(bdev_io);
384 
385 	raid_bdev_io_complete_part(raid_io, 1, success ?
386 				   SPDK_BDEV_IO_STATUS_SUCCESS :
387 				   SPDK_BDEV_IO_STATUS_FAILED);
388 }
389 
390 static void
391 raid_bdev_submit_reset_request(struct raid_bdev_io *raid_io);
392 
393 static void
394 _raid_bdev_submit_reset_request(void *_raid_io)
395 {
396 	struct raid_bdev_io *raid_io = _raid_io;
397 
398 	raid_bdev_submit_reset_request(raid_io);
399 }
400 
401 /*
402  * brief:
403  * raid_bdev_submit_reset_request function submits reset requests
404  * to member disks; it will submit as many as possible unless a reset fails with -ENOMEM, in
405  * which case it will queue it for later submission
406  * params:
407  * raid_io
408  * returns:
409  * none
410  */
411 static void
412 raid_bdev_submit_reset_request(struct raid_bdev_io *raid_io)
413 {
414 	struct raid_bdev		*raid_bdev;
415 	int				ret;
416 	uint8_t				i;
417 	struct raid_base_bdev_info	*base_info;
418 	struct spdk_io_channel		*base_ch;
419 
420 	raid_bdev = raid_io->raid_bdev;
421 
422 	if (raid_io->base_bdev_io_remaining == 0) {
423 		raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs;
424 	}
425 
426 	while (raid_io->base_bdev_io_submitted < raid_bdev->num_base_bdevs) {
427 		i = raid_io->base_bdev_io_submitted;
428 		base_info = &raid_bdev->base_bdev_info[i];
429 		base_ch = raid_io->raid_ch->base_channel[i];
430 		ret = spdk_bdev_reset(base_info->desc, base_ch,
431 				      raid_base_bdev_reset_complete, raid_io);
432 		if (ret == 0) {
433 			raid_io->base_bdev_io_submitted++;
434 		} else if (ret == -ENOMEM) {
435 			raid_bdev_queue_io_wait(raid_io, base_info->bdev, base_ch,
436 						_raid_bdev_submit_reset_request);
437 			return;
438 		} else {
439 			SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n");
440 			assert(false);
441 			raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
442 			return;
443 		}
444 	}
445 }
446 
447 /*
448  * brief:
449  * Callback function to spdk_bdev_io_get_buf.
450  * params:
451  * ch - pointer to raid bdev io channel
452  * bdev_io - pointer to parent bdev_io on raid bdev device
453  * success - True if buffer is allocated or false otherwise.
454  * returns:
455  * none
456  */
457 static void
458 raid_bdev_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
459 		     bool success)
460 {
461 	struct raid_bdev_io *raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx;
462 
463 	if (!success) {
464 		raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
465 		return;
466 	}
467 
468 	raid_io->raid_bdev->module->submit_rw_request(raid_io);
469 }
470 
471 /*
472  * brief:
473  * raid_bdev_submit_request function is the submit_request function pointer of
474  * raid bdev function table. This is used to submit the io on raid_bdev to below
475  * layers.
476  * params:
477  * ch - pointer to raid bdev io channel
478  * bdev_io - pointer to parent bdev_io on raid bdev device
479  * returns:
480  * none
481  */
482 static void
483 raid_bdev_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
484 {
485 	struct raid_bdev_io *raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx;
486 
487 	raid_io->raid_bdev = bdev_io->bdev->ctxt;
488 	raid_io->raid_ch = spdk_io_channel_get_ctx(ch);
489 	raid_io->base_bdev_io_remaining = 0;
490 	raid_io->base_bdev_io_submitted = 0;
491 	raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_SUCCESS;
492 
493 	switch (bdev_io->type) {
494 	case SPDK_BDEV_IO_TYPE_READ:
495 		spdk_bdev_io_get_buf(bdev_io, raid_bdev_get_buf_cb,
496 				     bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
497 		break;
498 	case SPDK_BDEV_IO_TYPE_WRITE:
499 		raid_io->raid_bdev->module->submit_rw_request(raid_io);
500 		break;
501 
502 	case SPDK_BDEV_IO_TYPE_RESET:
503 		raid_bdev_submit_reset_request(raid_io);
504 		break;
505 
506 	case SPDK_BDEV_IO_TYPE_FLUSH:
507 	case SPDK_BDEV_IO_TYPE_UNMAP:
508 		raid_io->raid_bdev->module->submit_null_payload_request(raid_io);
509 		break;
510 
511 	default:
512 		SPDK_ERRLOG("submit request, invalid io type %u\n", bdev_io->type);
513 		raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
514 		break;
515 	}
516 }
517 
518 /*
519  * brief:
520  * _raid_bdev_io_type_supported checks whether io_type is supported in
521  * all base bdev modules of raid bdev module. If anyone among the base_bdevs
522  * doesn't support, the raid device doesn't supports.
523  *
524  * params:
525  * raid_bdev - pointer to raid bdev context
526  * io_type - io type
527  * returns:
528  * true - io_type is supported
529  * false - io_type is not supported
530  */
531 inline static bool
532 _raid_bdev_io_type_supported(struct raid_bdev *raid_bdev, enum spdk_bdev_io_type io_type)
533 {
534 	struct raid_base_bdev_info *base_info;
535 
536 	if (io_type == SPDK_BDEV_IO_TYPE_FLUSH ||
537 	    io_type == SPDK_BDEV_IO_TYPE_UNMAP) {
538 		if (raid_bdev->module->submit_null_payload_request == NULL) {
539 			return false;
540 		}
541 	}
542 
543 	RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
544 		if (base_info->bdev == NULL) {
545 			assert(false);
546 			continue;
547 		}
548 
549 		if (spdk_bdev_io_type_supported(base_info->bdev, io_type) == false) {
550 			return false;
551 		}
552 	}
553 
554 	return true;
555 }
556 
557 /*
558  * brief:
559  * raid_bdev_io_type_supported is the io_supported function for bdev function
560  * table which returns whether the particular io type is supported or not by
561  * raid bdev module
562  * params:
563  * ctx - pointer to raid bdev context
564  * type - io type
565  * returns:
566  * true - io_type is supported
567  * false - io_type is not supported
568  */
569 static bool
570 raid_bdev_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
571 {
572 	switch (io_type) {
573 	case SPDK_BDEV_IO_TYPE_READ:
574 	case SPDK_BDEV_IO_TYPE_WRITE:
575 		return true;
576 
577 	case SPDK_BDEV_IO_TYPE_FLUSH:
578 	case SPDK_BDEV_IO_TYPE_RESET:
579 	case SPDK_BDEV_IO_TYPE_UNMAP:
580 		return _raid_bdev_io_type_supported(ctx, io_type);
581 
582 	default:
583 		return false;
584 	}
585 
586 	return false;
587 }
588 
589 /*
590  * brief:
591  * raid_bdev_get_io_channel is the get_io_channel function table pointer for
592  * raid bdev. This is used to return the io channel for this raid bdev
593  * params:
594  * ctxt - pointer to raid_bdev
595  * returns:
596  * pointer to io channel for raid bdev
597  */
598 static struct spdk_io_channel *
599 raid_bdev_get_io_channel(void *ctxt)
600 {
601 	struct raid_bdev *raid_bdev = ctxt;
602 
603 	return spdk_get_io_channel(raid_bdev);
604 }
605 
606 /*
607  * brief:
608  * raid_bdev_dump_info_json is the function table pointer for raid bdev
609  * params:
610  * ctx - pointer to raid_bdev
611  * w - pointer to json context
612  * returns:
613  * 0 - success
614  * non zero - failure
615  */
616 static int
617 raid_bdev_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
618 {
619 	struct raid_bdev *raid_bdev = ctx;
620 	struct raid_base_bdev_info *base_info;
621 
622 	SPDK_DEBUGLOG(bdev_raid, "raid_bdev_dump_config_json\n");
623 	assert(raid_bdev != NULL);
624 
625 	/* Dump the raid bdev configuration related information */
626 	spdk_json_write_named_object_begin(w, "raid");
627 	spdk_json_write_named_uint32(w, "strip_size_kb", raid_bdev->strip_size_kb);
628 	spdk_json_write_named_uint32(w, "state", raid_bdev->state);
629 	spdk_json_write_named_string(w, "raid_level", raid_bdev_level_to_str(raid_bdev->level));
630 	spdk_json_write_named_uint32(w, "destruct_called", raid_bdev->destruct_called);
631 	spdk_json_write_named_uint32(w, "num_base_bdevs", raid_bdev->num_base_bdevs);
632 	spdk_json_write_named_uint32(w, "num_base_bdevs_discovered", raid_bdev->num_base_bdevs_discovered);
633 	spdk_json_write_name(w, "base_bdevs_list");
634 	spdk_json_write_array_begin(w);
635 	RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
636 		if (base_info->bdev) {
637 			spdk_json_write_string(w, base_info->bdev->name);
638 		} else {
639 			spdk_json_write_null(w);
640 		}
641 	}
642 	spdk_json_write_array_end(w);
643 	spdk_json_write_object_end(w);
644 
645 	return 0;
646 }
647 
648 /*
649  * brief:
650  * raid_bdev_write_config_json is the function table pointer for raid bdev
651  * params:
652  * bdev - pointer to spdk_bdev
653  * w - pointer to json context
654  * returns:
655  * none
656  */
657 static void
658 raid_bdev_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
659 {
660 	struct raid_bdev *raid_bdev = bdev->ctxt;
661 	struct raid_base_bdev_info *base_info;
662 
663 	spdk_json_write_object_begin(w);
664 
665 	spdk_json_write_named_string(w, "method", "bdev_raid_create");
666 
667 	spdk_json_write_named_object_begin(w, "params");
668 	spdk_json_write_named_string(w, "name", bdev->name);
669 	spdk_json_write_named_uint32(w, "strip_size_kb", raid_bdev->strip_size_kb);
670 	spdk_json_write_named_string(w, "raid_level", raid_bdev_level_to_str(raid_bdev->level));
671 
672 	spdk_json_write_named_array_begin(w, "base_bdevs");
673 	RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
674 		if (base_info->bdev) {
675 			spdk_json_write_string(w, base_info->bdev->name);
676 		}
677 	}
678 	spdk_json_write_array_end(w);
679 	spdk_json_write_object_end(w);
680 
681 	spdk_json_write_object_end(w);
682 }
683 
684 static int
685 raid_bdev_get_memory_domains(void *ctx, struct spdk_memory_domain **domains, int array_size)
686 {
687 	struct raid_bdev *raid_bdev = ctx;
688 	struct spdk_bdev *base_bdev;
689 	uint32_t i;
690 	int domains_count = 0, rc;
691 
692 	/* First loop to get the number of memory domains */
693 	for (i = 0; i < raid_bdev->num_base_bdevs; i++) {
694 		base_bdev = raid_bdev->base_bdev_info[i].bdev;
695 		rc = spdk_bdev_get_memory_domains(base_bdev, NULL, 0);
696 		if (rc < 0) {
697 			return rc;
698 		}
699 		domains_count += rc;
700 	}
701 
702 	if (!domains || array_size < domains_count) {
703 		return domains_count;
704 	}
705 
706 	for (i = 0; i < raid_bdev->num_base_bdevs; i++) {
707 		base_bdev = raid_bdev->base_bdev_info[i].bdev;
708 		rc = spdk_bdev_get_memory_domains(base_bdev, domains, array_size);
709 		if (rc < 0) {
710 			return rc;
711 		}
712 		domains += rc;
713 		array_size -= rc;
714 	}
715 
716 	return domains_count;
717 }
718 
719 /* g_raid_bdev_fn_table is the function table for raid bdev */
720 static const struct spdk_bdev_fn_table g_raid_bdev_fn_table = {
721 	.destruct		= raid_bdev_destruct,
722 	.submit_request		= raid_bdev_submit_request,
723 	.io_type_supported	= raid_bdev_io_type_supported,
724 	.get_io_channel		= raid_bdev_get_io_channel,
725 	.dump_info_json		= raid_bdev_dump_info_json,
726 	.write_config_json	= raid_bdev_write_config_json,
727 	.get_memory_domains	= raid_bdev_get_memory_domains,
728 };
729 
730 /*
731  * brief:
732  * raid_bdev_config_cleanup function is used to free memory for one raid_bdev in configuration
733  * params:
734  * raid_cfg - pointer to raid_bdev_config structure
735  * returns:
736  * none
737  */
738 void
739 raid_bdev_config_cleanup(struct raid_bdev_config *raid_cfg)
740 {
741 	uint8_t i;
742 
743 	TAILQ_REMOVE(&g_raid_config.raid_bdev_config_head, raid_cfg, link);
744 	g_raid_config.total_raid_bdev--;
745 
746 	if (raid_cfg->base_bdev) {
747 		for (i = 0; i < raid_cfg->num_base_bdevs; i++) {
748 			free(raid_cfg->base_bdev[i].name);
749 		}
750 		free(raid_cfg->base_bdev);
751 	}
752 	free(raid_cfg->name);
753 	free(raid_cfg);
754 }
755 
756 /*
757  * brief:
758  * raid_bdev_free is the raid bdev function table function pointer. This is
759  * called on bdev free path
760  * params:
761  * none
762  * returns:
763  * none
764  */
765 static void
766 raid_bdev_free(void)
767 {
768 	struct raid_bdev_config *raid_cfg, *tmp;
769 
770 	SPDK_DEBUGLOG(bdev_raid, "raid_bdev_free\n");
771 	TAILQ_FOREACH_SAFE(raid_cfg, &g_raid_config.raid_bdev_config_head, link, tmp) {
772 		raid_bdev_config_cleanup(raid_cfg);
773 	}
774 }
775 
776 /* brief
777  * raid_bdev_config_find_by_name is a helper function to find raid bdev config
778  * by name as key.
779  *
780  * params:
781  * raid_name - name for raid bdev.
782  */
783 struct raid_bdev_config *
784 raid_bdev_config_find_by_name(const char *raid_name)
785 {
786 	struct raid_bdev_config *raid_cfg;
787 
788 	TAILQ_FOREACH(raid_cfg, &g_raid_config.raid_bdev_config_head, link) {
789 		if (!strcmp(raid_cfg->name, raid_name)) {
790 			return raid_cfg;
791 		}
792 	}
793 
794 	return raid_cfg;
795 }
796 
797 /*
798  * brief
799  * raid_bdev_config_add function adds config for newly created raid bdev.
800  *
801  * params:
802  * raid_name - name for raid bdev.
803  * strip_size - strip size in KB
804  * num_base_bdevs - number of base bdevs.
805  * level - raid level.
806  * _raid_cfg - Pointer to newly added configuration
807  */
808 int
809 raid_bdev_config_add(const char *raid_name, uint32_t strip_size, uint8_t num_base_bdevs,
810 		     enum raid_level level, struct raid_bdev_config **_raid_cfg)
811 {
812 	struct raid_bdev_config *raid_cfg;
813 
814 	raid_cfg = raid_bdev_config_find_by_name(raid_name);
815 	if (raid_cfg != NULL) {
816 		SPDK_ERRLOG("Duplicate raid bdev name found in config file %s\n",
817 			    raid_name);
818 		return -EEXIST;
819 	}
820 
821 	if (spdk_u32_is_pow2(strip_size) == false) {
822 		SPDK_ERRLOG("Invalid strip size %" PRIu32 "\n", strip_size);
823 		return -EINVAL;
824 	}
825 
826 	if (num_base_bdevs == 0) {
827 		SPDK_ERRLOG("Invalid base device count %u\n", num_base_bdevs);
828 		return -EINVAL;
829 	}
830 
831 	raid_cfg = calloc(1, sizeof(*raid_cfg));
832 	if (raid_cfg == NULL) {
833 		SPDK_ERRLOG("unable to allocate memory\n");
834 		return -ENOMEM;
835 	}
836 
837 	raid_cfg->name = strdup(raid_name);
838 	if (!raid_cfg->name) {
839 		free(raid_cfg);
840 		SPDK_ERRLOG("unable to allocate memory\n");
841 		return -ENOMEM;
842 	}
843 	raid_cfg->strip_size = strip_size;
844 	raid_cfg->num_base_bdevs = num_base_bdevs;
845 	raid_cfg->level = level;
846 
847 	raid_cfg->base_bdev = calloc(num_base_bdevs, sizeof(*raid_cfg->base_bdev));
848 	if (raid_cfg->base_bdev == NULL) {
849 		free(raid_cfg->name);
850 		free(raid_cfg);
851 		SPDK_ERRLOG("unable to allocate memory\n");
852 		return -ENOMEM;
853 	}
854 
855 	TAILQ_INSERT_TAIL(&g_raid_config.raid_bdev_config_head, raid_cfg, link);
856 	g_raid_config.total_raid_bdev++;
857 
858 	*_raid_cfg = raid_cfg;
859 	return 0;
860 }
861 
862 /*
863  * brief:
864  * raid_bdev_config_add_base_bdev function add base bdev to raid bdev config.
865  *
866  * params:
867  * raid_cfg - pointer to raid bdev configuration
868  * base_bdev_name - name of base bdev
869  * slot - Position to add base bdev
870  */
871 int
872 raid_bdev_config_add_base_bdev(struct raid_bdev_config *raid_cfg, const char *base_bdev_name,
873 			       uint8_t slot)
874 {
875 	uint8_t i;
876 	struct raid_bdev_config *tmp;
877 
878 	if (slot >= raid_cfg->num_base_bdevs) {
879 		return -EINVAL;
880 	}
881 
882 	TAILQ_FOREACH(tmp, &g_raid_config.raid_bdev_config_head, link) {
883 		for (i = 0; i < tmp->num_base_bdevs; i++) {
884 			if (tmp->base_bdev[i].name != NULL) {
885 				if (!strcmp(tmp->base_bdev[i].name, base_bdev_name)) {
886 					SPDK_ERRLOG("duplicate base bdev name %s mentioned\n",
887 						    base_bdev_name);
888 					return -EEXIST;
889 				}
890 			}
891 		}
892 	}
893 
894 	raid_cfg->base_bdev[slot].name = strdup(base_bdev_name);
895 	if (raid_cfg->base_bdev[slot].name == NULL) {
896 		SPDK_ERRLOG("unable to allocate memory\n");
897 		return -ENOMEM;
898 	}
899 
900 	return 0;
901 }
902 
903 static struct {
904 	const char *name;
905 	enum raid_level value;
906 } g_raid_level_names[] = {
907 	{ "raid0", RAID0 },
908 	{ "0", RAID0 },
909 	{ "raid5", RAID5 },
910 	{ "5", RAID5 },
911 	{ "concat", CONCAT },
912 	{ }
913 };
914 
915 enum raid_level raid_bdev_parse_raid_level(const char *str)
916 {
917 	unsigned int i;
918 
919 	assert(str != NULL);
920 
921 	for (i = 0; g_raid_level_names[i].name != NULL; i++) {
922 		if (strcasecmp(g_raid_level_names[i].name, str) == 0) {
923 			return g_raid_level_names[i].value;
924 		}
925 	}
926 
927 	return INVALID_RAID_LEVEL;
928 }
929 
930 const char *
931 raid_bdev_level_to_str(enum raid_level level)
932 {
933 	unsigned int i;
934 
935 	for (i = 0; g_raid_level_names[i].name != NULL; i++) {
936 		if (g_raid_level_names[i].value == level) {
937 			return g_raid_level_names[i].name;
938 		}
939 	}
940 
941 	return "";
942 }
943 
944 /*
945  * brief:
946  * raid_bdev_fini_start is called when bdev layer is starting the
947  * shutdown process
948  * params:
949  * none
950  * returns:
951  * none
952  */
953 static void
954 raid_bdev_fini_start(void)
955 {
956 	SPDK_DEBUGLOG(bdev_raid, "raid_bdev_fini_start\n");
957 	g_shutdown_started = true;
958 }
959 
960 /*
961  * brief:
962  * raid_bdev_exit is called on raid bdev module exit time by bdev layer
963  * params:
964  * none
965  * returns:
966  * none
967  */
968 static void
969 raid_bdev_exit(void)
970 {
971 	SPDK_DEBUGLOG(bdev_raid, "raid_bdev_exit\n");
972 	raid_bdev_free();
973 }
974 
975 /*
976  * brief:
977  * raid_bdev_get_ctx_size is used to return the context size of bdev_io for raid
978  * module
979  * params:
980  * none
981  * returns:
982  * size of spdk_bdev_io context for raid
983  */
984 static int
985 raid_bdev_get_ctx_size(void)
986 {
987 	SPDK_DEBUGLOG(bdev_raid, "raid_bdev_get_ctx_size\n");
988 	return sizeof(struct raid_bdev_io);
989 }
990 
991 /*
992  * brief:
993  * raid_bdev_can_claim_bdev is the function to check if this base_bdev can be
994  * claimed by raid bdev or not.
995  * params:
996  * bdev_name - represents base bdev name
997  * _raid_cfg - pointer to raid bdev config parsed from config file
998  * base_bdev_slot - if bdev can be claimed, it represents the base_bdev correct
999  * slot. This field is only valid if return value of this function is true
1000  * returns:
1001  * true - if bdev can be claimed
1002  * false - if bdev can't be claimed
1003  */
1004 static bool
1005 raid_bdev_can_claim_bdev(const char *bdev_name, struct raid_bdev_config **_raid_cfg,
1006 			 uint8_t *base_bdev_slot)
1007 {
1008 	struct raid_bdev_config *raid_cfg;
1009 	uint8_t i;
1010 
1011 	TAILQ_FOREACH(raid_cfg, &g_raid_config.raid_bdev_config_head, link) {
1012 		for (i = 0; i < raid_cfg->num_base_bdevs; i++) {
1013 			/*
1014 			 * Check if the base bdev name is part of raid bdev configuration.
1015 			 * If match is found then return true and the slot information where
1016 			 * this base bdev should be inserted in raid bdev
1017 			 */
1018 			if (!strcmp(bdev_name, raid_cfg->base_bdev[i].name)) {
1019 				*_raid_cfg = raid_cfg;
1020 				*base_bdev_slot = i;
1021 				return true;
1022 			}
1023 		}
1024 	}
1025 
1026 	return false;
1027 }
1028 
1029 
1030 static struct spdk_bdev_module g_raid_if = {
1031 	.name = "raid",
1032 	.module_init = raid_bdev_init,
1033 	.fini_start = raid_bdev_fini_start,
1034 	.module_fini = raid_bdev_exit,
1035 	.get_ctx_size = raid_bdev_get_ctx_size,
1036 	.examine_config = raid_bdev_examine,
1037 	.async_init = false,
1038 	.async_fini = false,
1039 };
1040 SPDK_BDEV_MODULE_REGISTER(raid, &g_raid_if)
1041 
1042 /*
1043  * brief:
1044  * raid_bdev_init is the initialization function for raid bdev module
1045  * params:
1046  * none
1047  * returns:
1048  * 0 - success
1049  * non zero - failure
1050  */
1051 static int
1052 raid_bdev_init(void)
1053 {
1054 	return 0;
1055 }
1056 
1057 /*
1058  * brief:
1059  * raid_bdev_create allocates raid bdev based on passed configuration
1060  * params:
1061  * raid_cfg - configuration of raid bdev
1062  * returns:
1063  * 0 - success
1064  * non zero - failure
1065  */
1066 int
1067 raid_bdev_create(struct raid_bdev_config *raid_cfg)
1068 {
1069 	struct raid_bdev *raid_bdev;
1070 	struct spdk_bdev *raid_bdev_gen;
1071 	struct raid_bdev_module *module;
1072 
1073 	module = raid_bdev_module_find(raid_cfg->level);
1074 	if (module == NULL) {
1075 		SPDK_ERRLOG("Unsupported raid level '%d'\n", raid_cfg->level);
1076 		return -EINVAL;
1077 	}
1078 
1079 	assert(module->base_bdevs_min != 0);
1080 	if (raid_cfg->num_base_bdevs < module->base_bdevs_min) {
1081 		SPDK_ERRLOG("At least %u base devices required for %s\n",
1082 			    module->base_bdevs_min,
1083 			    raid_bdev_level_to_str(raid_cfg->level));
1084 		return -EINVAL;
1085 	}
1086 
1087 	raid_bdev = calloc(1, sizeof(*raid_bdev));
1088 	if (!raid_bdev) {
1089 		SPDK_ERRLOG("Unable to allocate memory for raid bdev\n");
1090 		return -ENOMEM;
1091 	}
1092 
1093 	raid_bdev->module = module;
1094 	raid_bdev->num_base_bdevs = raid_cfg->num_base_bdevs;
1095 	raid_bdev->base_bdev_info = calloc(raid_bdev->num_base_bdevs,
1096 					   sizeof(struct raid_base_bdev_info));
1097 	if (!raid_bdev->base_bdev_info) {
1098 		SPDK_ERRLOG("Unable able to allocate base bdev info\n");
1099 		free(raid_bdev);
1100 		return -ENOMEM;
1101 	}
1102 
1103 	/* strip_size_kb is from the rpc param.  strip_size is in blocks and used
1104 	 * internally and set later.
1105 	 */
1106 	raid_bdev->strip_size = 0;
1107 	raid_bdev->strip_size_kb = raid_cfg->strip_size;
1108 	raid_bdev->state = RAID_BDEV_STATE_CONFIGURING;
1109 	raid_bdev->config = raid_cfg;
1110 	raid_bdev->level = raid_cfg->level;
1111 
1112 	raid_bdev_gen = &raid_bdev->bdev;
1113 
1114 	raid_bdev_gen->name = strdup(raid_cfg->name);
1115 	if (!raid_bdev_gen->name) {
1116 		SPDK_ERRLOG("Unable to allocate name for raid\n");
1117 		free(raid_bdev->base_bdev_info);
1118 		free(raid_bdev);
1119 		return -ENOMEM;
1120 	}
1121 
1122 	raid_bdev_gen->product_name = "Raid Volume";
1123 	raid_bdev_gen->ctxt = raid_bdev;
1124 	raid_bdev_gen->fn_table = &g_raid_bdev_fn_table;
1125 	raid_bdev_gen->module = &g_raid_if;
1126 	raid_bdev_gen->write_cache = 0;
1127 
1128 	TAILQ_INSERT_TAIL(&g_raid_bdev_configuring_list, raid_bdev, state_link);
1129 	TAILQ_INSERT_TAIL(&g_raid_bdev_list, raid_bdev, global_link);
1130 
1131 	raid_cfg->raid_bdev = raid_bdev;
1132 
1133 	return 0;
1134 }
1135 
1136 /*
1137  * brief
1138  * raid_bdev_alloc_base_bdev_resource allocates resource of base bdev.
1139  * params:
1140  * raid_bdev - pointer to raid bdev
1141  * bdev_name - base bdev name
1142  * base_bdev_slot - position to add base bdev
1143  * returns:
1144  * 0 - success
1145  * non zero - failure
1146  */
1147 static int
1148 raid_bdev_alloc_base_bdev_resource(struct raid_bdev *raid_bdev, const char *bdev_name,
1149 				   uint8_t base_bdev_slot)
1150 {
1151 	struct spdk_bdev_desc *desc;
1152 	struct spdk_bdev *bdev;
1153 	int rc;
1154 
1155 	rc = spdk_bdev_open_ext(bdev_name, true, raid_bdev_event_base_bdev, NULL, &desc);
1156 	if (rc != 0) {
1157 		if (rc != -ENODEV) {
1158 			SPDK_ERRLOG("Unable to create desc on bdev '%s'\n", bdev_name);
1159 		}
1160 		return rc;
1161 	}
1162 
1163 	bdev = spdk_bdev_desc_get_bdev(desc);
1164 
1165 	rc = spdk_bdev_module_claim_bdev(bdev, NULL, &g_raid_if);
1166 	if (rc != 0) {
1167 		SPDK_ERRLOG("Unable to claim this bdev as it is already claimed\n");
1168 		spdk_bdev_close(desc);
1169 		return rc;
1170 	}
1171 
1172 	SPDK_DEBUGLOG(bdev_raid, "bdev %s is claimed\n", bdev_name);
1173 
1174 	assert(raid_bdev->state != RAID_BDEV_STATE_ONLINE);
1175 	assert(base_bdev_slot < raid_bdev->num_base_bdevs);
1176 
1177 	raid_bdev->base_bdev_info[base_bdev_slot].thread = spdk_get_thread();
1178 	raid_bdev->base_bdev_info[base_bdev_slot].bdev = bdev;
1179 	raid_bdev->base_bdev_info[base_bdev_slot].desc = desc;
1180 	raid_bdev->num_base_bdevs_discovered++;
1181 	assert(raid_bdev->num_base_bdevs_discovered <= raid_bdev->num_base_bdevs);
1182 
1183 	return 0;
1184 }
1185 
1186 /*
1187  * brief:
1188  * If raid bdev config is complete, then only register the raid bdev to
1189  * bdev layer and remove this raid bdev from configuring list and
1190  * insert the raid bdev to configured list
1191  * params:
1192  * raid_bdev - pointer to raid bdev
1193  * returns:
1194  * 0 - success
1195  * non zero - failure
1196  */
1197 static int
1198 raid_bdev_configure(struct raid_bdev *raid_bdev)
1199 {
1200 	uint32_t blocklen = 0;
1201 	struct spdk_bdev *raid_bdev_gen;
1202 	struct raid_base_bdev_info *base_info;
1203 	int rc = 0;
1204 
1205 	assert(raid_bdev->state == RAID_BDEV_STATE_CONFIGURING);
1206 	assert(raid_bdev->num_base_bdevs_discovered == raid_bdev->num_base_bdevs);
1207 
1208 	RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
1209 		/* Check blocklen for all base bdevs that it should be same */
1210 		if (blocklen == 0) {
1211 			blocklen = base_info->bdev->blocklen;
1212 		} else if (blocklen != base_info->bdev->blocklen) {
1213 			/*
1214 			 * Assumption is that all the base bdevs for any raid bdev should
1215 			 * have same blocklen
1216 			 */
1217 			SPDK_ERRLOG("Blocklen of various bdevs not matching\n");
1218 			return -EINVAL;
1219 		}
1220 	}
1221 	assert(blocklen > 0);
1222 
1223 	/* The strip_size_kb is read in from user in KB. Convert to blocks here for
1224 	 * internal use.
1225 	 */
1226 	raid_bdev->strip_size = (raid_bdev->strip_size_kb * 1024) / blocklen;
1227 	raid_bdev->strip_size_shift = spdk_u32log2(raid_bdev->strip_size);
1228 	raid_bdev->blocklen_shift = spdk_u32log2(blocklen);
1229 
1230 	raid_bdev_gen = &raid_bdev->bdev;
1231 	raid_bdev_gen->blocklen = blocklen;
1232 
1233 	rc = raid_bdev->module->start(raid_bdev);
1234 	if (rc != 0) {
1235 		SPDK_ERRLOG("raid module startup callback failed\n");
1236 		return rc;
1237 	}
1238 	raid_bdev->state = RAID_BDEV_STATE_ONLINE;
1239 	SPDK_DEBUGLOG(bdev_raid, "io device register %p\n", raid_bdev);
1240 	SPDK_DEBUGLOG(bdev_raid, "blockcnt %" PRIu64 ", blocklen %u\n",
1241 		      raid_bdev_gen->blockcnt, raid_bdev_gen->blocklen);
1242 	spdk_io_device_register(raid_bdev, raid_bdev_create_cb, raid_bdev_destroy_cb,
1243 				sizeof(struct raid_bdev_io_channel),
1244 				raid_bdev->bdev.name);
1245 	rc = spdk_bdev_register(raid_bdev_gen);
1246 	if (rc != 0) {
1247 		SPDK_ERRLOG("Unable to register raid bdev and stay at configuring state\n");
1248 		if (raid_bdev->module->stop != NULL) {
1249 			raid_bdev->module->stop(raid_bdev);
1250 		}
1251 		spdk_io_device_unregister(raid_bdev, NULL);
1252 		raid_bdev->state = RAID_BDEV_STATE_CONFIGURING;
1253 		return rc;
1254 	}
1255 	SPDK_DEBUGLOG(bdev_raid, "raid bdev generic %p\n", raid_bdev_gen);
1256 	TAILQ_REMOVE(&g_raid_bdev_configuring_list, raid_bdev, state_link);
1257 	TAILQ_INSERT_TAIL(&g_raid_bdev_configured_list, raid_bdev, state_link);
1258 	SPDK_DEBUGLOG(bdev_raid, "raid bdev is created with name %s, raid_bdev %p\n",
1259 		      raid_bdev_gen->name, raid_bdev);
1260 
1261 	return 0;
1262 }
1263 
1264 /*
1265  * brief:
1266  * If raid bdev is online and registered, change the bdev state to
1267  * configuring and unregister this raid device. Queue this raid device
1268  * in configuring list
1269  * params:
1270  * raid_bdev - pointer to raid bdev
1271  * cb_fn - callback function
1272  * cb_arg - argument to callback function
1273  * returns:
1274  * none
1275  */
1276 static void
1277 raid_bdev_deconfigure(struct raid_bdev *raid_bdev, raid_bdev_destruct_cb cb_fn,
1278 		      void *cb_arg)
1279 {
1280 	if (raid_bdev->state != RAID_BDEV_STATE_ONLINE) {
1281 		if (cb_fn) {
1282 			cb_fn(cb_arg, 0);
1283 		}
1284 		return;
1285 	}
1286 
1287 	assert(raid_bdev->num_base_bdevs == raid_bdev->num_base_bdevs_discovered);
1288 	TAILQ_REMOVE(&g_raid_bdev_configured_list, raid_bdev, state_link);
1289 	raid_bdev->state = RAID_BDEV_STATE_OFFLINE;
1290 	assert(raid_bdev->num_base_bdevs_discovered);
1291 	TAILQ_INSERT_TAIL(&g_raid_bdev_offline_list, raid_bdev, state_link);
1292 	SPDK_DEBUGLOG(bdev_raid, "raid bdev state changing from online to offline\n");
1293 
1294 	spdk_bdev_unregister(&raid_bdev->bdev, cb_fn, cb_arg);
1295 }
1296 
1297 /*
1298  * brief:
1299  * raid_bdev_find_by_base_bdev function finds the raid bdev which has
1300  *  claimed the base bdev.
1301  * params:
1302  * base_bdev - pointer to base bdev pointer
1303  * _raid_bdev - Reference to pointer to raid bdev
1304  * _base_info - Reference to the raid base bdev info.
1305  * returns:
1306  * true - if the raid bdev is found.
1307  * false - if the raid bdev is not found.
1308  */
1309 static bool
1310 raid_bdev_find_by_base_bdev(struct spdk_bdev *base_bdev, struct raid_bdev **_raid_bdev,
1311 			    struct raid_base_bdev_info **_base_info)
1312 {
1313 	struct raid_bdev *raid_bdev;
1314 	struct raid_base_bdev_info *base_info;
1315 
1316 	TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) {
1317 		RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
1318 			if (base_info->bdev == base_bdev) {
1319 				*_raid_bdev = raid_bdev;
1320 				*_base_info = base_info;
1321 				return true;
1322 			}
1323 		}
1324 	}
1325 
1326 	return false;
1327 }
1328 
1329 /*
1330  * brief:
1331  * raid_bdev_remove_base_bdev function is called by below layers when base_bdev
1332  * is removed. This function checks if this base bdev is part of any raid bdev
1333  * or not. If yes, it takes necessary action on that particular raid bdev.
1334  * params:
1335  * base_bdev - pointer to base bdev pointer which got removed
1336  * returns:
1337  * none
1338  */
1339 static void
1340 raid_bdev_remove_base_bdev(struct spdk_bdev *base_bdev)
1341 {
1342 	struct raid_bdev	*raid_bdev = NULL;
1343 	struct raid_base_bdev_info *base_info;
1344 
1345 	SPDK_DEBUGLOG(bdev_raid, "raid_bdev_remove_base_bdev\n");
1346 
1347 	/* Find the raid_bdev which has claimed this base_bdev */
1348 	if (!raid_bdev_find_by_base_bdev(base_bdev, &raid_bdev, &base_info)) {
1349 		SPDK_ERRLOG("bdev to remove '%s' not found\n", base_bdev->name);
1350 		return;
1351 	}
1352 
1353 	assert(base_info->desc);
1354 	base_info->remove_scheduled = true;
1355 
1356 	if (raid_bdev->destruct_called == true ||
1357 	    raid_bdev->state == RAID_BDEV_STATE_CONFIGURING) {
1358 		/*
1359 		 * As raid bdev is not registered yet or already unregistered,
1360 		 * so cleanup should be done here itself.
1361 		 */
1362 		raid_bdev_free_base_bdev_resource(raid_bdev, base_info);
1363 		if (raid_bdev->num_base_bdevs_discovered == 0) {
1364 			/* There is no base bdev for this raid, so free the raid device. */
1365 			raid_bdev_cleanup(raid_bdev);
1366 			return;
1367 		}
1368 	}
1369 
1370 	raid_bdev_deconfigure(raid_bdev, NULL, NULL);
1371 }
1372 
1373 /*
1374  * brief:
1375  * raid_bdev_event_base_bdev function is called by below layers when base_bdev
1376  * triggers asynchronous event.
1377  * params:
1378  * type - event details.
1379  * bdev - bdev that triggered event.
1380  * event_ctx - context for event.
1381  * returns:
1382  * none
1383  */
1384 static void
1385 raid_bdev_event_base_bdev(enum spdk_bdev_event_type type, struct spdk_bdev *bdev,
1386 			  void *event_ctx)
1387 {
1388 	switch (type) {
1389 	case SPDK_BDEV_EVENT_REMOVE:
1390 		raid_bdev_remove_base_bdev(bdev);
1391 		break;
1392 	default:
1393 		SPDK_NOTICELOG("Unsupported bdev event: type %d\n", type);
1394 		break;
1395 	}
1396 }
1397 
1398 /*
1399  * brief:
1400  * Remove base bdevs from the raid bdev one by one.  Skip any base bdev which
1401  *  doesn't exist.
1402  * params:
1403  * raid_cfg - pointer to raid bdev config.
1404  * cb_fn - callback function
1405  * cb_ctx - argument to callback function
1406  */
1407 void
1408 raid_bdev_remove_base_devices(struct raid_bdev_config *raid_cfg,
1409 			      raid_bdev_destruct_cb cb_fn, void *cb_arg)
1410 {
1411 	struct raid_bdev		*raid_bdev;
1412 	struct raid_base_bdev_info	*base_info;
1413 
1414 	SPDK_DEBUGLOG(bdev_raid, "raid_bdev_remove_base_devices\n");
1415 
1416 	raid_bdev = raid_cfg->raid_bdev;
1417 	if (raid_bdev == NULL) {
1418 		SPDK_DEBUGLOG(bdev_raid, "raid bdev %s doesn't exist now\n", raid_cfg->name);
1419 		if (cb_fn) {
1420 			cb_fn(cb_arg, 0);
1421 		}
1422 		return;
1423 	}
1424 
1425 	if (raid_bdev->destroy_started) {
1426 		SPDK_DEBUGLOG(bdev_raid, "destroying raid bdev %s is already started\n",
1427 			      raid_cfg->name);
1428 		if (cb_fn) {
1429 			cb_fn(cb_arg, -EALREADY);
1430 		}
1431 		return;
1432 	}
1433 
1434 	raid_bdev->destroy_started = true;
1435 
1436 	RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
1437 		if (base_info->bdev == NULL) {
1438 			continue;
1439 		}
1440 
1441 		assert(base_info->desc);
1442 		base_info->remove_scheduled = true;
1443 
1444 		if (raid_bdev->destruct_called == true ||
1445 		    raid_bdev->state == RAID_BDEV_STATE_CONFIGURING) {
1446 			/*
1447 			 * As raid bdev is not registered yet or already unregistered,
1448 			 * so cleanup should be done here itself.
1449 			 */
1450 			raid_bdev_free_base_bdev_resource(raid_bdev, base_info);
1451 			if (raid_bdev->num_base_bdevs_discovered == 0) {
1452 				/* There is no base bdev for this raid, so free the raid device. */
1453 				raid_bdev_cleanup(raid_bdev);
1454 				if (cb_fn) {
1455 					cb_fn(cb_arg, 0);
1456 				}
1457 				return;
1458 			}
1459 		}
1460 	}
1461 
1462 	raid_bdev_deconfigure(raid_bdev, cb_fn, cb_arg);
1463 }
1464 
1465 /*
1466  * brief:
1467  * raid_bdev_add_base_device function is the actual function which either adds
1468  * the nvme base device to existing raid bdev or create a new raid bdev. It also claims
1469  * the base device and keep the open descriptor.
1470  * params:
1471  * raid_cfg - pointer to raid bdev config
1472  * bdev - pointer to base bdev
1473  * base_bdev_slot - position to add base bdev
1474  * returns:
1475  * 0 - success
1476  * non zero - failure
1477  */
1478 static int
1479 raid_bdev_add_base_device(struct raid_bdev_config *raid_cfg, const char *bdev_name,
1480 			  uint8_t base_bdev_slot)
1481 {
1482 	struct raid_bdev	*raid_bdev;
1483 	int			rc;
1484 
1485 	raid_bdev = raid_cfg->raid_bdev;
1486 	if (!raid_bdev) {
1487 		SPDK_ERRLOG("Raid bdev '%s' is not created yet\n", raid_cfg->name);
1488 		return -ENODEV;
1489 	}
1490 
1491 	rc = raid_bdev_alloc_base_bdev_resource(raid_bdev, bdev_name, base_bdev_slot);
1492 	if (rc != 0) {
1493 		if (rc != -ENODEV) {
1494 			SPDK_ERRLOG("Failed to allocate resource for bdev '%s'\n", bdev_name);
1495 		}
1496 		return rc;
1497 	}
1498 
1499 	assert(raid_bdev->num_base_bdevs_discovered <= raid_bdev->num_base_bdevs);
1500 
1501 	if (raid_bdev->num_base_bdevs_discovered == raid_bdev->num_base_bdevs) {
1502 		rc = raid_bdev_configure(raid_bdev);
1503 		if (rc != 0) {
1504 			SPDK_ERRLOG("Failed to configure raid bdev\n");
1505 			return rc;
1506 		}
1507 	}
1508 
1509 	return 0;
1510 }
1511 
1512 /*
1513  * brief:
1514  * Add base bdevs to the raid bdev one by one.  Skip any base bdev which doesn't
1515  *  exist or fails to add. If all base bdevs are successfully added, the raid bdev
1516  *  moves to the configured state and becomes available. Otherwise, the raid bdev
1517  *  stays at the configuring state with added base bdevs.
1518  * params:
1519  * raid_cfg - pointer to raid bdev config
1520  * returns:
1521  * 0 - The raid bdev moves to the configured state or stays at the configuring
1522  *     state with added base bdevs due to any nonexistent base bdev.
1523  * non zero - Failed to add any base bdev and stays at the configuring state with
1524  *            added base bdevs.
1525  */
1526 int
1527 raid_bdev_add_base_devices(struct raid_bdev_config *raid_cfg)
1528 {
1529 	uint8_t	i;
1530 	int	rc = 0, _rc;
1531 
1532 	for (i = 0; i < raid_cfg->num_base_bdevs; i++) {
1533 		_rc = raid_bdev_add_base_device(raid_cfg, raid_cfg->base_bdev[i].name, i);
1534 		if (_rc == -ENODEV) {
1535 			SPDK_DEBUGLOG(bdev_raid, "base bdev %s doesn't exist now\n",
1536 				      raid_cfg->base_bdev[i].name);
1537 		} else if (_rc != 0) {
1538 			SPDK_ERRLOG("Failed to add base bdev %s to RAID bdev %s: %s\n",
1539 				    raid_cfg->base_bdev[i].name, raid_cfg->name,
1540 				    spdk_strerror(-_rc));
1541 			if (rc == 0) {
1542 				rc = _rc;
1543 			}
1544 		}
1545 	}
1546 
1547 	return rc;
1548 }
1549 
1550 /*
1551  * brief:
1552  * raid_bdev_examine function is the examine function call by the below layers
1553  * like bdev_nvme layer. This function will check if this base bdev can be
1554  * claimed by this raid bdev or not.
1555  * params:
1556  * bdev - pointer to base bdev
1557  * returns:
1558  * none
1559  */
1560 static void
1561 raid_bdev_examine(struct spdk_bdev *bdev)
1562 {
1563 	struct raid_bdev_config	*raid_cfg;
1564 	uint8_t			base_bdev_slot;
1565 
1566 	if (raid_bdev_can_claim_bdev(bdev->name, &raid_cfg, &base_bdev_slot)) {
1567 		raid_bdev_add_base_device(raid_cfg, bdev->name, base_bdev_slot);
1568 	} else {
1569 		SPDK_DEBUGLOG(bdev_raid, "bdev %s can't be claimed\n",
1570 			      bdev->name);
1571 	}
1572 
1573 	spdk_bdev_module_examine_done(&g_raid_if);
1574 }
1575 
1576 /* Log component for bdev raid bdev module */
1577 SPDK_LOG_REGISTER_COMPONENT(bdev_raid)
1578