xref: /spdk/module/bdev/raid/bdev_raid.c (revision 307b8c112ffd90a26d53dd15fad67bd9038ef526)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (c) Intel Corporation.
3  *   All rights reserved.
4  *   Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
5  */
6 
7 #include "bdev_raid.h"
8 #include "spdk/env.h"
9 #include "spdk/thread.h"
10 #include "spdk/log.h"
11 #include "spdk/string.h"
12 #include "spdk/util.h"
13 #include "spdk/json.h"
14 #include "spdk/string.h"
15 
16 static bool g_shutdown_started = false;
17 
18 /* raid bdev config as read from config file */
19 struct raid_config	g_raid_config = {
20 	.raid_bdev_config_head = TAILQ_HEAD_INITIALIZER(g_raid_config.raid_bdev_config_head),
21 };
22 
23 /*
24  * List of raid bdev in configured list, these raid bdevs are registered with
25  * bdev layer
26  */
27 struct raid_configured_tailq	g_raid_bdev_configured_list = TAILQ_HEAD_INITIALIZER(
28 			g_raid_bdev_configured_list);
29 
30 /* List of raid bdev in configuring list */
31 struct raid_configuring_tailq	g_raid_bdev_configuring_list = TAILQ_HEAD_INITIALIZER(
32 			g_raid_bdev_configuring_list);
33 
34 /* List of all raid bdevs */
35 struct raid_all_tailq		g_raid_bdev_list = TAILQ_HEAD_INITIALIZER(g_raid_bdev_list);
36 
37 /* List of all raid bdevs that are offline */
38 struct raid_offline_tailq	g_raid_bdev_offline_list = TAILQ_HEAD_INITIALIZER(
39 			g_raid_bdev_offline_list);
40 
41 static TAILQ_HEAD(, raid_bdev_module) g_raid_modules = TAILQ_HEAD_INITIALIZER(g_raid_modules);
42 
43 static struct raid_bdev_module *
44 raid_bdev_module_find(enum raid_level level)
45 {
46 	struct raid_bdev_module *raid_module;
47 
48 	TAILQ_FOREACH(raid_module, &g_raid_modules, link) {
49 		if (raid_module->level == level) {
50 			return raid_module;
51 		}
52 	}
53 
54 	return NULL;
55 }
56 
57 void
58 raid_bdev_module_list_add(struct raid_bdev_module *raid_module)
59 {
60 	if (raid_bdev_module_find(raid_module->level) != NULL) {
61 		SPDK_ERRLOG("module for raid level '%s' already registered.\n",
62 			    raid_bdev_level_to_str(raid_module->level));
63 		assert(false);
64 	} else {
65 		TAILQ_INSERT_TAIL(&g_raid_modules, raid_module, link);
66 	}
67 }
68 
69 /* Function declarations */
70 static void	raid_bdev_examine(struct spdk_bdev *bdev);
71 static int	raid_bdev_init(void);
72 static void	raid_bdev_deconfigure(struct raid_bdev *raid_bdev,
73 				      raid_bdev_destruct_cb cb_fn, void *cb_arg);
74 static void	raid_bdev_event_base_bdev(enum spdk_bdev_event_type type, struct spdk_bdev *bdev,
75 		void *event_ctx);
76 
77 /*
78  * brief:
79  * raid_bdev_create_cb function is a cb function for raid bdev which creates the
80  * hierarchy from raid bdev to base bdev io channels. It will be called per core
81  * params:
82  * io_device - pointer to raid bdev io device represented by raid_bdev
83  * ctx_buf - pointer to context buffer for raid bdev io channel
84  * returns:
85  * 0 - success
86  * non zero - failure
87  */
88 static int
89 raid_bdev_create_cb(void *io_device, void *ctx_buf)
90 {
91 	struct raid_bdev            *raid_bdev = io_device;
92 	struct raid_bdev_io_channel *raid_ch = ctx_buf;
93 	uint8_t i;
94 	int ret = 0;
95 
96 	SPDK_DEBUGLOG(bdev_raid, "raid_bdev_create_cb, %p\n", raid_ch);
97 
98 	assert(raid_bdev != NULL);
99 	assert(raid_bdev->state == RAID_BDEV_STATE_ONLINE);
100 
101 	raid_ch->num_channels = raid_bdev->num_base_bdevs;
102 
103 	raid_ch->base_channel = calloc(raid_ch->num_channels,
104 				       sizeof(struct spdk_io_channel *));
105 	if (!raid_ch->base_channel) {
106 		SPDK_ERRLOG("Unable to allocate base bdevs io channel\n");
107 		return -ENOMEM;
108 	}
109 	for (i = 0; i < raid_ch->num_channels; i++) {
110 		/*
111 		 * Get the spdk_io_channel for all the base bdevs. This is used during
112 		 * split logic to send the respective child bdev ios to respective base
113 		 * bdev io channel.
114 		 */
115 		raid_ch->base_channel[i] = spdk_bdev_get_io_channel(
116 						   raid_bdev->base_bdev_info[i].desc);
117 		if (!raid_ch->base_channel[i]) {
118 			SPDK_ERRLOG("Unable to create io channel for base bdev\n");
119 			ret = -ENOMEM;
120 			break;
121 		}
122 	}
123 
124 	if (!ret && raid_bdev->module->get_io_channel) {
125 		raid_ch->module_channel = raid_bdev->module->get_io_channel(raid_bdev);
126 		if (!raid_ch->module_channel) {
127 			SPDK_ERRLOG("Unable to create io channel for raid module\n");
128 			ret = -ENOMEM;
129 		}
130 	}
131 
132 	if (ret) {
133 		uint8_t j;
134 
135 		for (j = 0; j < i; j++) {
136 			spdk_put_io_channel(raid_ch->base_channel[j]);
137 		}
138 		free(raid_ch->base_channel);
139 		raid_ch->base_channel = NULL;
140 	}
141 	return ret;
142 }
143 
144 /*
145  * brief:
146  * raid_bdev_destroy_cb function is a cb function for raid bdev which deletes the
147  * hierarchy from raid bdev to base bdev io channels. It will be called per core
148  * params:
149  * io_device - pointer to raid bdev io device represented by raid_bdev
150  * ctx_buf - pointer to context buffer for raid bdev io channel
151  * returns:
152  * none
153  */
154 static void
155 raid_bdev_destroy_cb(void *io_device, void *ctx_buf)
156 {
157 	struct raid_bdev_io_channel *raid_ch = ctx_buf;
158 	uint8_t i;
159 
160 	SPDK_DEBUGLOG(bdev_raid, "raid_bdev_destroy_cb\n");
161 
162 	assert(raid_ch != NULL);
163 	assert(raid_ch->base_channel);
164 
165 	if (raid_ch->module_channel) {
166 		spdk_put_io_channel(raid_ch->module_channel);
167 	}
168 
169 	for (i = 0; i < raid_ch->num_channels; i++) {
170 		/* Free base bdev channels */
171 		assert(raid_ch->base_channel[i] != NULL);
172 		spdk_put_io_channel(raid_ch->base_channel[i]);
173 	}
174 	free(raid_ch->base_channel);
175 	raid_ch->base_channel = NULL;
176 }
177 
178 /*
179  * brief:
180  * raid_bdev_cleanup is used to cleanup raid_bdev related data
181  * structures.
182  * params:
183  * raid_bdev - pointer to raid_bdev
184  * returns:
185  * none
186  */
187 static void
188 raid_bdev_cleanup(struct raid_bdev *raid_bdev)
189 {
190 	SPDK_DEBUGLOG(bdev_raid, "raid_bdev_cleanup, %p name %s, state %u, config %p\n",
191 		      raid_bdev,
192 		      raid_bdev->bdev.name, raid_bdev->state, raid_bdev->config);
193 	if (raid_bdev->state == RAID_BDEV_STATE_CONFIGURING) {
194 		TAILQ_REMOVE(&g_raid_bdev_configuring_list, raid_bdev, state_link);
195 	} else if (raid_bdev->state == RAID_BDEV_STATE_OFFLINE) {
196 		TAILQ_REMOVE(&g_raid_bdev_offline_list, raid_bdev, state_link);
197 	} else {
198 		assert(0);
199 	}
200 	TAILQ_REMOVE(&g_raid_bdev_list, raid_bdev, global_link);
201 	free(raid_bdev->base_bdev_info);
202 	if (raid_bdev->config) {
203 		raid_bdev->config->raid_bdev = NULL;
204 	}
205 }
206 
207 static void
208 raid_bdev_free(struct raid_bdev *raid_bdev)
209 {
210 	free(raid_bdev->bdev.name);
211 	free(raid_bdev);
212 }
213 
214 static void
215 raid_bdev_cleanup_and_free(struct raid_bdev *raid_bdev)
216 {
217 	raid_bdev_cleanup(raid_bdev);
218 	raid_bdev_free(raid_bdev);
219 }
220 
221 /*
222  * brief:
223  * wrapper for the bdev close operation
224  * params:
225  * base_info - raid base bdev info
226  * returns:
227  */
228 static void
229 _raid_bdev_free_base_bdev_resource(void *ctx)
230 {
231 	struct spdk_bdev_desc *desc = ctx;
232 
233 	spdk_bdev_close(desc);
234 }
235 
236 
237 /*
238  * brief:
239  * free resource of base bdev for raid bdev
240  * params:
241  * raid_bdev - pointer to raid bdev
242  * base_info - raid base bdev info
243  * returns:
244  * 0 - success
245  * non zero - failure
246  */
247 static void
248 raid_bdev_free_base_bdev_resource(struct raid_bdev *raid_bdev,
249 				  struct raid_base_bdev_info *base_info)
250 {
251 	spdk_bdev_module_release_bdev(base_info->bdev);
252 	if (base_info->thread && base_info->thread != spdk_get_thread()) {
253 		spdk_thread_send_msg(base_info->thread, _raid_bdev_free_base_bdev_resource, base_info->desc);
254 	} else {
255 		spdk_bdev_close(base_info->desc);
256 	}
257 	base_info->desc = NULL;
258 	base_info->bdev = NULL;
259 
260 	assert(raid_bdev->num_base_bdevs_discovered);
261 	raid_bdev->num_base_bdevs_discovered--;
262 }
263 
264 static void
265 raid_bdev_io_device_unregister_cb(void *io_device)
266 {
267 	struct raid_bdev *raid_bdev = io_device;
268 
269 	if (raid_bdev->num_base_bdevs_discovered == 0) {
270 		/* Free raid_bdev when there are no base bdevs left */
271 		SPDK_DEBUGLOG(bdev_raid, "raid bdev base bdevs is 0, going to free all in destruct\n");
272 		raid_bdev_cleanup(raid_bdev);
273 		spdk_bdev_destruct_done(&raid_bdev->bdev, 0);
274 		raid_bdev_free(raid_bdev);
275 	} else {
276 		spdk_bdev_destruct_done(&raid_bdev->bdev, 0);
277 	}
278 }
279 
280 void
281 raid_bdev_module_stop_done(struct raid_bdev *raid_bdev)
282 {
283 	if (raid_bdev->state != RAID_BDEV_STATE_CONFIGURING) {
284 		spdk_io_device_unregister(raid_bdev, raid_bdev_io_device_unregister_cb);
285 	}
286 }
287 
288 /*
289  * brief:
290  * raid_bdev_destruct is the destruct function table pointer for raid bdev
291  * params:
292  * ctxt - pointer to raid_bdev
293  * returns:
294  * 1 - success (deferred completion)
295  */
296 static int
297 raid_bdev_destruct(void *ctxt)
298 {
299 	struct raid_bdev *raid_bdev = ctxt;
300 	struct raid_base_bdev_info *base_info;
301 
302 	SPDK_DEBUGLOG(bdev_raid, "raid_bdev_destruct\n");
303 
304 	raid_bdev->destruct_called = true;
305 	RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
306 		/*
307 		 * Close all base bdev descriptors for which call has come from below
308 		 * layers.  Also close the descriptors if we have started shutdown.
309 		 */
310 		if (g_shutdown_started ||
311 		    ((base_info->remove_scheduled == true) &&
312 		     (base_info->bdev != NULL))) {
313 			raid_bdev_free_base_bdev_resource(raid_bdev, base_info);
314 		}
315 	}
316 
317 	if (g_shutdown_started) {
318 		TAILQ_REMOVE(&g_raid_bdev_configured_list, raid_bdev, state_link);
319 		raid_bdev->state = RAID_BDEV_STATE_OFFLINE;
320 		TAILQ_INSERT_TAIL(&g_raid_bdev_offline_list, raid_bdev, state_link);
321 	}
322 
323 	if (raid_bdev->module->stop != NULL) {
324 		if (raid_bdev->module->stop(raid_bdev) == false) {
325 			return 1;
326 		}
327 	}
328 
329 	raid_bdev_module_stop_done(raid_bdev);
330 
331 	return 1;
332 }
333 
334 void
335 raid_bdev_io_complete(struct raid_bdev_io *raid_io, enum spdk_bdev_io_status status)
336 {
337 	struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io);
338 
339 	spdk_bdev_io_complete(bdev_io, status);
340 }
341 
342 /*
343  * brief:
344  * raid_bdev_io_complete_part - signal the completion of a part of the expected
345  * base bdev IOs and complete the raid_io if this is the final expected IO.
346  * The caller should first set raid_io->base_bdev_io_remaining. This function
347  * will decrement this counter by the value of the 'completed' parameter and
348  * complete the raid_io if the counter reaches 0. The caller is free to
349  * interpret the 'base_bdev_io_remaining' and 'completed' values as needed,
350  * it can represent e.g. blocks or IOs.
351  * params:
352  * raid_io - pointer to raid_bdev_io
353  * completed - the part of the raid_io that has been completed
354  * status - status of the base IO
355  * returns:
356  * true - if the raid_io is completed
357  * false - otherwise
358  */
359 bool
360 raid_bdev_io_complete_part(struct raid_bdev_io *raid_io, uint64_t completed,
361 			   enum spdk_bdev_io_status status)
362 {
363 	assert(raid_io->base_bdev_io_remaining >= completed);
364 	raid_io->base_bdev_io_remaining -= completed;
365 
366 	if (status != SPDK_BDEV_IO_STATUS_SUCCESS) {
367 		raid_io->base_bdev_io_status = status;
368 	}
369 
370 	if (raid_io->base_bdev_io_remaining == 0) {
371 		raid_bdev_io_complete(raid_io, raid_io->base_bdev_io_status);
372 		return true;
373 	} else {
374 		return false;
375 	}
376 }
377 
378 /*
379  * brief:
380  * raid_bdev_queue_io_wait function processes the IO which failed to submit.
381  * It will try to queue the IOs after storing the context to bdev wait queue logic.
382  * params:
383  * raid_io - pointer to raid_bdev_io
384  * bdev - the block device that the IO is submitted to
385  * ch - io channel
386  * cb_fn - callback when the spdk_bdev_io for bdev becomes available
387  * returns:
388  * none
389  */
390 void
391 raid_bdev_queue_io_wait(struct raid_bdev_io *raid_io, struct spdk_bdev *bdev,
392 			struct spdk_io_channel *ch, spdk_bdev_io_wait_cb cb_fn)
393 {
394 	raid_io->waitq_entry.bdev = bdev;
395 	raid_io->waitq_entry.cb_fn = cb_fn;
396 	raid_io->waitq_entry.cb_arg = raid_io;
397 	spdk_bdev_queue_io_wait(bdev, ch, &raid_io->waitq_entry);
398 }
399 
400 static void
401 raid_base_bdev_reset_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
402 {
403 	struct raid_bdev_io *raid_io = cb_arg;
404 
405 	spdk_bdev_free_io(bdev_io);
406 
407 	raid_bdev_io_complete_part(raid_io, 1, success ?
408 				   SPDK_BDEV_IO_STATUS_SUCCESS :
409 				   SPDK_BDEV_IO_STATUS_FAILED);
410 }
411 
412 static void raid_bdev_submit_reset_request(struct raid_bdev_io *raid_io);
413 
414 static void
415 _raid_bdev_submit_reset_request(void *_raid_io)
416 {
417 	struct raid_bdev_io *raid_io = _raid_io;
418 
419 	raid_bdev_submit_reset_request(raid_io);
420 }
421 
422 /*
423  * brief:
424  * raid_bdev_submit_reset_request function submits reset requests
425  * to member disks; it will submit as many as possible unless a reset fails with -ENOMEM, in
426  * which case it will queue it for later submission
427  * params:
428  * raid_io
429  * returns:
430  * none
431  */
432 static void
433 raid_bdev_submit_reset_request(struct raid_bdev_io *raid_io)
434 {
435 	struct raid_bdev		*raid_bdev;
436 	int				ret;
437 	uint8_t				i;
438 	struct raid_base_bdev_info	*base_info;
439 	struct spdk_io_channel		*base_ch;
440 
441 	raid_bdev = raid_io->raid_bdev;
442 
443 	if (raid_io->base_bdev_io_remaining == 0) {
444 		raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs;
445 	}
446 
447 	while (raid_io->base_bdev_io_submitted < raid_bdev->num_base_bdevs) {
448 		i = raid_io->base_bdev_io_submitted;
449 		base_info = &raid_bdev->base_bdev_info[i];
450 		base_ch = raid_io->raid_ch->base_channel[i];
451 		ret = spdk_bdev_reset(base_info->desc, base_ch,
452 				      raid_base_bdev_reset_complete, raid_io);
453 		if (ret == 0) {
454 			raid_io->base_bdev_io_submitted++;
455 		} else if (ret == -ENOMEM) {
456 			raid_bdev_queue_io_wait(raid_io, base_info->bdev, base_ch,
457 						_raid_bdev_submit_reset_request);
458 			return;
459 		} else {
460 			SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n");
461 			assert(false);
462 			raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
463 			return;
464 		}
465 	}
466 }
467 
468 /*
469  * brief:
470  * Callback function to spdk_bdev_io_get_buf.
471  * params:
472  * ch - pointer to raid bdev io channel
473  * bdev_io - pointer to parent bdev_io on raid bdev device
474  * success - True if buffer is allocated or false otherwise.
475  * returns:
476  * none
477  */
478 static void
479 raid_bdev_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
480 		     bool success)
481 {
482 	struct raid_bdev_io *raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx;
483 
484 	if (!success) {
485 		raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
486 		return;
487 	}
488 
489 	raid_io->raid_bdev->module->submit_rw_request(raid_io);
490 }
491 
492 /*
493  * brief:
494  * raid_bdev_submit_request function is the submit_request function pointer of
495  * raid bdev function table. This is used to submit the io on raid_bdev to below
496  * layers.
497  * params:
498  * ch - pointer to raid bdev io channel
499  * bdev_io - pointer to parent bdev_io on raid bdev device
500  * returns:
501  * none
502  */
503 static void
504 raid_bdev_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
505 {
506 	struct raid_bdev_io *raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx;
507 
508 	raid_io->raid_bdev = bdev_io->bdev->ctxt;
509 	raid_io->raid_ch = spdk_io_channel_get_ctx(ch);
510 	raid_io->base_bdev_io_remaining = 0;
511 	raid_io->base_bdev_io_submitted = 0;
512 	raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_SUCCESS;
513 
514 	switch (bdev_io->type) {
515 	case SPDK_BDEV_IO_TYPE_READ:
516 		spdk_bdev_io_get_buf(bdev_io, raid_bdev_get_buf_cb,
517 				     bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
518 		break;
519 	case SPDK_BDEV_IO_TYPE_WRITE:
520 		raid_io->raid_bdev->module->submit_rw_request(raid_io);
521 		break;
522 
523 	case SPDK_BDEV_IO_TYPE_RESET:
524 		raid_bdev_submit_reset_request(raid_io);
525 		break;
526 
527 	case SPDK_BDEV_IO_TYPE_FLUSH:
528 	case SPDK_BDEV_IO_TYPE_UNMAP:
529 		raid_io->raid_bdev->module->submit_null_payload_request(raid_io);
530 		break;
531 
532 	default:
533 		SPDK_ERRLOG("submit request, invalid io type %u\n", bdev_io->type);
534 		raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
535 		break;
536 	}
537 }
538 
539 /*
540  * brief:
541  * _raid_bdev_io_type_supported checks whether io_type is supported in
542  * all base bdev modules of raid bdev module. If anyone among the base_bdevs
543  * doesn't support, the raid device doesn't supports.
544  *
545  * params:
546  * raid_bdev - pointer to raid bdev context
547  * io_type - io type
548  * returns:
549  * true - io_type is supported
550  * false - io_type is not supported
551  */
552 inline static bool
553 _raid_bdev_io_type_supported(struct raid_bdev *raid_bdev, enum spdk_bdev_io_type io_type)
554 {
555 	struct raid_base_bdev_info *base_info;
556 
557 	if (io_type == SPDK_BDEV_IO_TYPE_FLUSH ||
558 	    io_type == SPDK_BDEV_IO_TYPE_UNMAP) {
559 		if (raid_bdev->module->submit_null_payload_request == NULL) {
560 			return false;
561 		}
562 	}
563 
564 	RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
565 		if (base_info->bdev == NULL) {
566 			assert(false);
567 			continue;
568 		}
569 
570 		if (spdk_bdev_io_type_supported(base_info->bdev, io_type) == false) {
571 			return false;
572 		}
573 	}
574 
575 	return true;
576 }
577 
578 /*
579  * brief:
580  * raid_bdev_io_type_supported is the io_supported function for bdev function
581  * table which returns whether the particular io type is supported or not by
582  * raid bdev module
583  * params:
584  * ctx - pointer to raid bdev context
585  * type - io type
586  * returns:
587  * true - io_type is supported
588  * false - io_type is not supported
589  */
590 static bool
591 raid_bdev_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
592 {
593 	switch (io_type) {
594 	case SPDK_BDEV_IO_TYPE_READ:
595 	case SPDK_BDEV_IO_TYPE_WRITE:
596 		return true;
597 
598 	case SPDK_BDEV_IO_TYPE_FLUSH:
599 	case SPDK_BDEV_IO_TYPE_RESET:
600 	case SPDK_BDEV_IO_TYPE_UNMAP:
601 		return _raid_bdev_io_type_supported(ctx, io_type);
602 
603 	default:
604 		return false;
605 	}
606 
607 	return false;
608 }
609 
610 /*
611  * brief:
612  * raid_bdev_get_io_channel is the get_io_channel function table pointer for
613  * raid bdev. This is used to return the io channel for this raid bdev
614  * params:
615  * ctxt - pointer to raid_bdev
616  * returns:
617  * pointer to io channel for raid bdev
618  */
619 static struct spdk_io_channel *
620 raid_bdev_get_io_channel(void *ctxt)
621 {
622 	struct raid_bdev *raid_bdev = ctxt;
623 
624 	return spdk_get_io_channel(raid_bdev);
625 }
626 
627 /*
628  * brief:
629  * raid_bdev_dump_info_json is the function table pointer for raid bdev
630  * params:
631  * ctx - pointer to raid_bdev
632  * w - pointer to json context
633  * returns:
634  * 0 - success
635  * non zero - failure
636  */
637 static int
638 raid_bdev_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
639 {
640 	struct raid_bdev *raid_bdev = ctx;
641 	struct raid_base_bdev_info *base_info;
642 
643 	SPDK_DEBUGLOG(bdev_raid, "raid_bdev_dump_config_json\n");
644 	assert(raid_bdev != NULL);
645 
646 	/* Dump the raid bdev configuration related information */
647 	spdk_json_write_named_object_begin(w, "raid");
648 	spdk_json_write_named_uint32(w, "strip_size_kb", raid_bdev->strip_size_kb);
649 	spdk_json_write_named_uint32(w, "state", raid_bdev->state);
650 	spdk_json_write_named_string(w, "raid_level", raid_bdev_level_to_str(raid_bdev->level));
651 	spdk_json_write_named_uint32(w, "destruct_called", raid_bdev->destruct_called);
652 	spdk_json_write_named_uint32(w, "num_base_bdevs", raid_bdev->num_base_bdevs);
653 	spdk_json_write_named_uint32(w, "num_base_bdevs_discovered", raid_bdev->num_base_bdevs_discovered);
654 	spdk_json_write_name(w, "base_bdevs_list");
655 	spdk_json_write_array_begin(w);
656 	RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
657 		if (base_info->bdev) {
658 			spdk_json_write_string(w, base_info->bdev->name);
659 		} else {
660 			spdk_json_write_null(w);
661 		}
662 	}
663 	spdk_json_write_array_end(w);
664 	spdk_json_write_object_end(w);
665 
666 	return 0;
667 }
668 
669 /*
670  * brief:
671  * raid_bdev_write_config_json is the function table pointer for raid bdev
672  * params:
673  * bdev - pointer to spdk_bdev
674  * w - pointer to json context
675  * returns:
676  * none
677  */
678 static void
679 raid_bdev_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
680 {
681 	struct raid_bdev *raid_bdev = bdev->ctxt;
682 	struct raid_base_bdev_info *base_info;
683 
684 	spdk_json_write_object_begin(w);
685 
686 	spdk_json_write_named_string(w, "method", "bdev_raid_create");
687 
688 	spdk_json_write_named_object_begin(w, "params");
689 	spdk_json_write_named_string(w, "name", bdev->name);
690 	spdk_json_write_named_uint32(w, "strip_size_kb", raid_bdev->strip_size_kb);
691 	spdk_json_write_named_string(w, "raid_level", raid_bdev_level_to_str(raid_bdev->level));
692 
693 	spdk_json_write_named_array_begin(w, "base_bdevs");
694 	RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
695 		if (base_info->bdev) {
696 			spdk_json_write_string(w, base_info->bdev->name);
697 		}
698 	}
699 	spdk_json_write_array_end(w);
700 	spdk_json_write_object_end(w);
701 
702 	spdk_json_write_object_end(w);
703 }
704 
705 static int
706 raid_bdev_get_memory_domains(void *ctx, struct spdk_memory_domain **domains, int array_size)
707 {
708 	struct raid_bdev *raid_bdev = ctx;
709 	struct spdk_bdev *base_bdev;
710 	uint32_t i;
711 	int domains_count = 0, rc;
712 
713 	/* First loop to get the number of memory domains */
714 	for (i = 0; i < raid_bdev->num_base_bdevs; i++) {
715 		base_bdev = raid_bdev->base_bdev_info[i].bdev;
716 		rc = spdk_bdev_get_memory_domains(base_bdev, NULL, 0);
717 		if (rc < 0) {
718 			return rc;
719 		}
720 		domains_count += rc;
721 	}
722 
723 	if (!domains || array_size < domains_count) {
724 		return domains_count;
725 	}
726 
727 	for (i = 0; i < raid_bdev->num_base_bdevs; i++) {
728 		base_bdev = raid_bdev->base_bdev_info[i].bdev;
729 		rc = spdk_bdev_get_memory_domains(base_bdev, domains, array_size);
730 		if (rc < 0) {
731 			return rc;
732 		}
733 		domains += rc;
734 		array_size -= rc;
735 	}
736 
737 	return domains_count;
738 }
739 
740 /* g_raid_bdev_fn_table is the function table for raid bdev */
741 static const struct spdk_bdev_fn_table g_raid_bdev_fn_table = {
742 	.destruct		= raid_bdev_destruct,
743 	.submit_request		= raid_bdev_submit_request,
744 	.io_type_supported	= raid_bdev_io_type_supported,
745 	.get_io_channel		= raid_bdev_get_io_channel,
746 	.dump_info_json		= raid_bdev_dump_info_json,
747 	.write_config_json	= raid_bdev_write_config_json,
748 	.get_memory_domains	= raid_bdev_get_memory_domains,
749 };
750 
751 /*
752  * brief:
753  * raid_bdev_config_cleanup function is used to free memory for one raid_bdev in configuration
754  * params:
755  * raid_cfg - pointer to raid_bdev_config structure
756  * returns:
757  * none
758  */
759 void
760 raid_bdev_config_cleanup(struct raid_bdev_config *raid_cfg)
761 {
762 	uint8_t i;
763 
764 	TAILQ_REMOVE(&g_raid_config.raid_bdev_config_head, raid_cfg, link);
765 	g_raid_config.total_raid_bdev--;
766 
767 	if (raid_cfg->base_bdev) {
768 		for (i = 0; i < raid_cfg->num_base_bdevs; i++) {
769 			free(raid_cfg->base_bdev[i].name);
770 		}
771 		free(raid_cfg->base_bdev);
772 	}
773 	free(raid_cfg->name);
774 	free(raid_cfg);
775 }
776 
777 /* brief
778  * raid_bdev_config_find_by_name is a helper function to find raid bdev config
779  * by name as key.
780  *
781  * params:
782  * raid_name - name for raid bdev.
783  */
784 struct raid_bdev_config *
785 raid_bdev_config_find_by_name(const char *raid_name)
786 {
787 	struct raid_bdev_config *raid_cfg;
788 
789 	TAILQ_FOREACH(raid_cfg, &g_raid_config.raid_bdev_config_head, link) {
790 		if (!strcmp(raid_cfg->name, raid_name)) {
791 			return raid_cfg;
792 		}
793 	}
794 
795 	return raid_cfg;
796 }
797 
798 /*
799  * brief
800  * raid_bdev_config_add function adds config for newly created raid bdev.
801  *
802  * params:
803  * raid_name - name for raid bdev.
804  * strip_size - strip size in KB
805  * num_base_bdevs - number of base bdevs.
806  * level - raid level.
807  * _raid_cfg - Pointer to newly added configuration
808  */
809 int
810 raid_bdev_config_add(const char *raid_name, uint32_t strip_size, uint8_t num_base_bdevs,
811 		     enum raid_level level, struct raid_bdev_config **_raid_cfg)
812 {
813 	struct raid_bdev_config *raid_cfg;
814 
815 	raid_cfg = raid_bdev_config_find_by_name(raid_name);
816 	if (raid_cfg != NULL) {
817 		SPDK_ERRLOG("Duplicate raid bdev name found in config file %s\n",
818 			    raid_name);
819 		return -EEXIST;
820 	}
821 
822 	if (spdk_u32_is_pow2(strip_size) == false) {
823 		SPDK_ERRLOG("Invalid strip size %" PRIu32 "\n", strip_size);
824 		return -EINVAL;
825 	}
826 
827 	if (num_base_bdevs == 0) {
828 		SPDK_ERRLOG("Invalid base device count %u\n", num_base_bdevs);
829 		return -EINVAL;
830 	}
831 
832 	raid_cfg = calloc(1, sizeof(*raid_cfg));
833 	if (raid_cfg == NULL) {
834 		SPDK_ERRLOG("unable to allocate memory\n");
835 		return -ENOMEM;
836 	}
837 
838 	raid_cfg->name = strdup(raid_name);
839 	if (!raid_cfg->name) {
840 		free(raid_cfg);
841 		SPDK_ERRLOG("unable to allocate memory\n");
842 		return -ENOMEM;
843 	}
844 	raid_cfg->strip_size = strip_size;
845 	raid_cfg->num_base_bdevs = num_base_bdevs;
846 	raid_cfg->level = level;
847 
848 	raid_cfg->base_bdev = calloc(num_base_bdevs, sizeof(*raid_cfg->base_bdev));
849 	if (raid_cfg->base_bdev == NULL) {
850 		free(raid_cfg->name);
851 		free(raid_cfg);
852 		SPDK_ERRLOG("unable to allocate memory\n");
853 		return -ENOMEM;
854 	}
855 
856 	TAILQ_INSERT_TAIL(&g_raid_config.raid_bdev_config_head, raid_cfg, link);
857 	g_raid_config.total_raid_bdev++;
858 
859 	*_raid_cfg = raid_cfg;
860 	return 0;
861 }
862 
863 /*
864  * brief:
865  * raid_bdev_config_add_base_bdev function add base bdev to raid bdev config.
866  *
867  * params:
868  * raid_cfg - pointer to raid bdev configuration
869  * base_bdev_name - name of base bdev
870  * slot - Position to add base bdev
871  */
872 int
873 raid_bdev_config_add_base_bdev(struct raid_bdev_config *raid_cfg, const char *base_bdev_name,
874 			       uint8_t slot)
875 {
876 	uint8_t i;
877 	struct raid_bdev_config *tmp;
878 
879 	if (slot >= raid_cfg->num_base_bdevs) {
880 		return -EINVAL;
881 	}
882 
883 	TAILQ_FOREACH(tmp, &g_raid_config.raid_bdev_config_head, link) {
884 		for (i = 0; i < tmp->num_base_bdevs; i++) {
885 			if (tmp->base_bdev[i].name != NULL) {
886 				if (!strcmp(tmp->base_bdev[i].name, base_bdev_name)) {
887 					SPDK_ERRLOG("duplicate base bdev name %s mentioned\n",
888 						    base_bdev_name);
889 					return -EEXIST;
890 				}
891 			}
892 		}
893 	}
894 
895 	raid_cfg->base_bdev[slot].name = strdup(base_bdev_name);
896 	if (raid_cfg->base_bdev[slot].name == NULL) {
897 		SPDK_ERRLOG("unable to allocate memory\n");
898 		return -ENOMEM;
899 	}
900 
901 	return 0;
902 }
903 
904 static struct {
905 	const char *name;
906 	enum raid_level value;
907 } g_raid_level_names[] = {
908 	{ "raid0", RAID0 },
909 	{ "0", RAID0 },
910 	{ "raid5f", RAID5F },
911 	{ "5f", RAID5F },
912 	{ "concat", CONCAT },
913 	{ }
914 };
915 
916 /* We have to use the typedef in the function declaration to appease astyle. */
917 typedef enum raid_level raid_level_t;
918 
919 raid_level_t
920 raid_bdev_parse_raid_level(const char *str)
921 {
922 	unsigned int i;
923 
924 	assert(str != NULL);
925 
926 	for (i = 0; g_raid_level_names[i].name != NULL; i++) {
927 		if (strcasecmp(g_raid_level_names[i].name, str) == 0) {
928 			return g_raid_level_names[i].value;
929 		}
930 	}
931 
932 	return INVALID_RAID_LEVEL;
933 }
934 
935 const char *
936 raid_bdev_level_to_str(enum raid_level level)
937 {
938 	unsigned int i;
939 
940 	for (i = 0; g_raid_level_names[i].name != NULL; i++) {
941 		if (g_raid_level_names[i].value == level) {
942 			return g_raid_level_names[i].name;
943 		}
944 	}
945 
946 	return "";
947 }
948 
949 /*
950  * brief:
951  * raid_bdev_fini_start is called when bdev layer is starting the
952  * shutdown process
953  * params:
954  * none
955  * returns:
956  * none
957  */
958 static void
959 raid_bdev_fini_start(void)
960 {
961 	SPDK_DEBUGLOG(bdev_raid, "raid_bdev_fini_start\n");
962 	g_shutdown_started = true;
963 }
964 
965 /*
966  * brief:
967  * raid_bdev_exit is called on raid bdev module exit time by bdev layer
968  * params:
969  * none
970  * returns:
971  * none
972  */
973 static void
974 raid_bdev_exit(void)
975 {
976 	struct raid_bdev_config *raid_cfg, *tmp;
977 
978 	SPDK_DEBUGLOG(bdev_raid, "raid_bdev_exit\n");
979 	TAILQ_FOREACH_SAFE(raid_cfg, &g_raid_config.raid_bdev_config_head, link, tmp) {
980 		raid_bdev_config_cleanup(raid_cfg);
981 	}
982 }
983 
984 /*
985  * brief:
986  * raid_bdev_get_ctx_size is used to return the context size of bdev_io for raid
987  * module
988  * params:
989  * none
990  * returns:
991  * size of spdk_bdev_io context for raid
992  */
993 static int
994 raid_bdev_get_ctx_size(void)
995 {
996 	SPDK_DEBUGLOG(bdev_raid, "raid_bdev_get_ctx_size\n");
997 	return sizeof(struct raid_bdev_io);
998 }
999 
1000 /*
1001  * brief:
1002  * raid_bdev_can_claim_bdev is the function to check if this base_bdev can be
1003  * claimed by raid bdev or not.
1004  * params:
1005  * bdev_name - represents base bdev name
1006  * _raid_cfg - pointer to raid bdev config parsed from config file
1007  * base_bdev_slot - if bdev can be claimed, it represents the base_bdev correct
1008  * slot. This field is only valid if return value of this function is true
1009  * returns:
1010  * true - if bdev can be claimed
1011  * false - if bdev can't be claimed
1012  */
1013 static bool
1014 raid_bdev_can_claim_bdev(const char *bdev_name, struct raid_bdev_config **_raid_cfg,
1015 			 uint8_t *base_bdev_slot)
1016 {
1017 	struct raid_bdev_config *raid_cfg;
1018 	uint8_t i;
1019 
1020 	TAILQ_FOREACH(raid_cfg, &g_raid_config.raid_bdev_config_head, link) {
1021 		for (i = 0; i < raid_cfg->num_base_bdevs; i++) {
1022 			/*
1023 			 * Check if the base bdev name is part of raid bdev configuration.
1024 			 * If match is found then return true and the slot information where
1025 			 * this base bdev should be inserted in raid bdev
1026 			 */
1027 			if (!strcmp(bdev_name, raid_cfg->base_bdev[i].name)) {
1028 				*_raid_cfg = raid_cfg;
1029 				*base_bdev_slot = i;
1030 				return true;
1031 			}
1032 		}
1033 	}
1034 
1035 	return false;
1036 }
1037 
1038 
1039 static struct spdk_bdev_module g_raid_if = {
1040 	.name = "raid",
1041 	.module_init = raid_bdev_init,
1042 	.fini_start = raid_bdev_fini_start,
1043 	.module_fini = raid_bdev_exit,
1044 	.get_ctx_size = raid_bdev_get_ctx_size,
1045 	.examine_config = raid_bdev_examine,
1046 	.async_init = false,
1047 	.async_fini = false,
1048 };
1049 SPDK_BDEV_MODULE_REGISTER(raid, &g_raid_if)
1050 
1051 /*
1052  * brief:
1053  * raid_bdev_init is the initialization function for raid bdev module
1054  * params:
1055  * none
1056  * returns:
1057  * 0 - success
1058  * non zero - failure
1059  */
1060 static int
1061 raid_bdev_init(void)
1062 {
1063 	return 0;
1064 }
1065 
1066 /*
1067  * brief:
1068  * raid_bdev_create allocates raid bdev based on passed configuration
1069  * params:
1070  * raid_cfg - configuration of raid bdev
1071  * returns:
1072  * 0 - success
1073  * non zero - failure
1074  */
1075 int
1076 raid_bdev_create(struct raid_bdev_config *raid_cfg)
1077 {
1078 	struct raid_bdev *raid_bdev;
1079 	struct spdk_bdev *raid_bdev_gen;
1080 	struct raid_bdev_module *module;
1081 
1082 	module = raid_bdev_module_find(raid_cfg->level);
1083 	if (module == NULL) {
1084 		SPDK_ERRLOG("Unsupported raid level '%d'\n", raid_cfg->level);
1085 		return -EINVAL;
1086 	}
1087 
1088 	assert(module->base_bdevs_min != 0);
1089 	if (raid_cfg->num_base_bdevs < module->base_bdevs_min) {
1090 		SPDK_ERRLOG("At least %u base devices required for %s\n",
1091 			    module->base_bdevs_min,
1092 			    raid_bdev_level_to_str(raid_cfg->level));
1093 		return -EINVAL;
1094 	}
1095 
1096 	raid_bdev = calloc(1, sizeof(*raid_bdev));
1097 	if (!raid_bdev) {
1098 		SPDK_ERRLOG("Unable to allocate memory for raid bdev\n");
1099 		return -ENOMEM;
1100 	}
1101 
1102 	raid_bdev->module = module;
1103 	raid_bdev->num_base_bdevs = raid_cfg->num_base_bdevs;
1104 	raid_bdev->base_bdev_info = calloc(raid_bdev->num_base_bdevs,
1105 					   sizeof(struct raid_base_bdev_info));
1106 	if (!raid_bdev->base_bdev_info) {
1107 		SPDK_ERRLOG("Unable able to allocate base bdev info\n");
1108 		free(raid_bdev);
1109 		return -ENOMEM;
1110 	}
1111 
1112 	/* strip_size_kb is from the rpc param.  strip_size is in blocks and used
1113 	 * internally and set later.
1114 	 */
1115 	raid_bdev->strip_size = 0;
1116 	raid_bdev->strip_size_kb = raid_cfg->strip_size;
1117 	raid_bdev->state = RAID_BDEV_STATE_CONFIGURING;
1118 	raid_bdev->config = raid_cfg;
1119 	raid_bdev->level = raid_cfg->level;
1120 
1121 	raid_bdev_gen = &raid_bdev->bdev;
1122 
1123 	raid_bdev_gen->name = strdup(raid_cfg->name);
1124 	if (!raid_bdev_gen->name) {
1125 		SPDK_ERRLOG("Unable to allocate name for raid\n");
1126 		free(raid_bdev->base_bdev_info);
1127 		free(raid_bdev);
1128 		return -ENOMEM;
1129 	}
1130 
1131 	raid_bdev_gen->product_name = "Raid Volume";
1132 	raid_bdev_gen->ctxt = raid_bdev;
1133 	raid_bdev_gen->fn_table = &g_raid_bdev_fn_table;
1134 	raid_bdev_gen->module = &g_raid_if;
1135 	raid_bdev_gen->write_cache = 0;
1136 
1137 	TAILQ_INSERT_TAIL(&g_raid_bdev_configuring_list, raid_bdev, state_link);
1138 	TAILQ_INSERT_TAIL(&g_raid_bdev_list, raid_bdev, global_link);
1139 
1140 	raid_cfg->raid_bdev = raid_bdev;
1141 
1142 	return 0;
1143 }
1144 
1145 /*
1146  * brief
1147  * raid_bdev_alloc_base_bdev_resource allocates resource of base bdev.
1148  * params:
1149  * raid_bdev - pointer to raid bdev
1150  * bdev_name - base bdev name
1151  * base_bdev_slot - position to add base bdev
1152  * returns:
1153  * 0 - success
1154  * non zero - failure
1155  */
1156 static int
1157 raid_bdev_alloc_base_bdev_resource(struct raid_bdev *raid_bdev, const char *bdev_name,
1158 				   uint8_t base_bdev_slot)
1159 {
1160 	struct spdk_bdev_desc *desc;
1161 	struct spdk_bdev *bdev;
1162 	int rc;
1163 
1164 	rc = spdk_bdev_open_ext(bdev_name, true, raid_bdev_event_base_bdev, NULL, &desc);
1165 	if (rc != 0) {
1166 		if (rc != -ENODEV) {
1167 			SPDK_ERRLOG("Unable to create desc on bdev '%s'\n", bdev_name);
1168 		}
1169 		return rc;
1170 	}
1171 
1172 	bdev = spdk_bdev_desc_get_bdev(desc);
1173 
1174 	rc = spdk_bdev_module_claim_bdev(bdev, NULL, &g_raid_if);
1175 	if (rc != 0) {
1176 		SPDK_ERRLOG("Unable to claim this bdev as it is already claimed\n");
1177 		spdk_bdev_close(desc);
1178 		return rc;
1179 	}
1180 
1181 	SPDK_DEBUGLOG(bdev_raid, "bdev %s is claimed\n", bdev_name);
1182 
1183 	assert(raid_bdev->state != RAID_BDEV_STATE_ONLINE);
1184 	assert(base_bdev_slot < raid_bdev->num_base_bdevs);
1185 
1186 	raid_bdev->base_bdev_info[base_bdev_slot].thread = spdk_get_thread();
1187 	raid_bdev->base_bdev_info[base_bdev_slot].bdev = bdev;
1188 	raid_bdev->base_bdev_info[base_bdev_slot].desc = desc;
1189 	raid_bdev->num_base_bdevs_discovered++;
1190 	assert(raid_bdev->num_base_bdevs_discovered <= raid_bdev->num_base_bdevs);
1191 
1192 	return 0;
1193 }
1194 
1195 /*
1196  * brief:
1197  * If raid bdev config is complete, then only register the raid bdev to
1198  * bdev layer and remove this raid bdev from configuring list and
1199  * insert the raid bdev to configured list
1200  * params:
1201  * raid_bdev - pointer to raid bdev
1202  * returns:
1203  * 0 - success
1204  * non zero - failure
1205  */
1206 static int
1207 raid_bdev_configure(struct raid_bdev *raid_bdev)
1208 {
1209 	uint32_t blocklen = 0;
1210 	struct spdk_bdev *raid_bdev_gen;
1211 	struct raid_base_bdev_info *base_info;
1212 	int rc = 0;
1213 
1214 	assert(raid_bdev->state == RAID_BDEV_STATE_CONFIGURING);
1215 	assert(raid_bdev->num_base_bdevs_discovered == raid_bdev->num_base_bdevs);
1216 
1217 	RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
1218 		/* Check blocklen for all base bdevs that it should be same */
1219 		if (blocklen == 0) {
1220 			blocklen = base_info->bdev->blocklen;
1221 		} else if (blocklen != base_info->bdev->blocklen) {
1222 			/*
1223 			 * Assumption is that all the base bdevs for any raid bdev should
1224 			 * have same blocklen
1225 			 */
1226 			SPDK_ERRLOG("Blocklen of various bdevs not matching\n");
1227 			return -EINVAL;
1228 		}
1229 	}
1230 	assert(blocklen > 0);
1231 
1232 	/* The strip_size_kb is read in from user in KB. Convert to blocks here for
1233 	 * internal use.
1234 	 */
1235 	raid_bdev->strip_size = (raid_bdev->strip_size_kb * 1024) / blocklen;
1236 	raid_bdev->strip_size_shift = spdk_u32log2(raid_bdev->strip_size);
1237 	raid_bdev->blocklen_shift = spdk_u32log2(blocklen);
1238 
1239 	raid_bdev_gen = &raid_bdev->bdev;
1240 	raid_bdev_gen->blocklen = blocklen;
1241 
1242 	rc = raid_bdev->module->start(raid_bdev);
1243 	if (rc != 0) {
1244 		SPDK_ERRLOG("raid module startup callback failed\n");
1245 		return rc;
1246 	}
1247 	raid_bdev->state = RAID_BDEV_STATE_ONLINE;
1248 	SPDK_DEBUGLOG(bdev_raid, "io device register %p\n", raid_bdev);
1249 	SPDK_DEBUGLOG(bdev_raid, "blockcnt %" PRIu64 ", blocklen %u\n",
1250 		      raid_bdev_gen->blockcnt, raid_bdev_gen->blocklen);
1251 	spdk_io_device_register(raid_bdev, raid_bdev_create_cb, raid_bdev_destroy_cb,
1252 				sizeof(struct raid_bdev_io_channel),
1253 				raid_bdev->bdev.name);
1254 	rc = spdk_bdev_register(raid_bdev_gen);
1255 	if (rc != 0) {
1256 		SPDK_ERRLOG("Unable to register raid bdev and stay at configuring state\n");
1257 		if (raid_bdev->module->stop != NULL) {
1258 			raid_bdev->module->stop(raid_bdev);
1259 		}
1260 		spdk_io_device_unregister(raid_bdev, NULL);
1261 		raid_bdev->state = RAID_BDEV_STATE_CONFIGURING;
1262 		return rc;
1263 	}
1264 	SPDK_DEBUGLOG(bdev_raid, "raid bdev generic %p\n", raid_bdev_gen);
1265 	TAILQ_REMOVE(&g_raid_bdev_configuring_list, raid_bdev, state_link);
1266 	TAILQ_INSERT_TAIL(&g_raid_bdev_configured_list, raid_bdev, state_link);
1267 	SPDK_DEBUGLOG(bdev_raid, "raid bdev is created with name %s, raid_bdev %p\n",
1268 		      raid_bdev_gen->name, raid_bdev);
1269 
1270 	return 0;
1271 }
1272 
1273 /*
1274  * brief:
1275  * If raid bdev is online and registered, change the bdev state to
1276  * configuring and unregister this raid device. Queue this raid device
1277  * in configuring list
1278  * params:
1279  * raid_bdev - pointer to raid bdev
1280  * cb_fn - callback function
1281  * cb_arg - argument to callback function
1282  * returns:
1283  * none
1284  */
1285 static void
1286 raid_bdev_deconfigure(struct raid_bdev *raid_bdev, raid_bdev_destruct_cb cb_fn,
1287 		      void *cb_arg)
1288 {
1289 	if (raid_bdev->state != RAID_BDEV_STATE_ONLINE) {
1290 		if (cb_fn) {
1291 			cb_fn(cb_arg, 0);
1292 		}
1293 		return;
1294 	}
1295 
1296 	assert(raid_bdev->num_base_bdevs == raid_bdev->num_base_bdevs_discovered);
1297 	TAILQ_REMOVE(&g_raid_bdev_configured_list, raid_bdev, state_link);
1298 	raid_bdev->state = RAID_BDEV_STATE_OFFLINE;
1299 	assert(raid_bdev->num_base_bdevs_discovered);
1300 	TAILQ_INSERT_TAIL(&g_raid_bdev_offline_list, raid_bdev, state_link);
1301 	SPDK_DEBUGLOG(bdev_raid, "raid bdev state changing from online to offline\n");
1302 
1303 	spdk_bdev_unregister(&raid_bdev->bdev, cb_fn, cb_arg);
1304 }
1305 
1306 /*
1307  * brief:
1308  * raid_bdev_find_by_base_bdev function finds the raid bdev which has
1309  *  claimed the base bdev.
1310  * params:
1311  * base_bdev - pointer to base bdev pointer
1312  * _raid_bdev - Reference to pointer to raid bdev
1313  * _base_info - Reference to the raid base bdev info.
1314  * returns:
1315  * true - if the raid bdev is found.
1316  * false - if the raid bdev is not found.
1317  */
1318 static bool
1319 raid_bdev_find_by_base_bdev(struct spdk_bdev *base_bdev, struct raid_bdev **_raid_bdev,
1320 			    struct raid_base_bdev_info **_base_info)
1321 {
1322 	struct raid_bdev *raid_bdev;
1323 	struct raid_base_bdev_info *base_info;
1324 
1325 	TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) {
1326 		RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
1327 			if (base_info->bdev == base_bdev) {
1328 				*_raid_bdev = raid_bdev;
1329 				*_base_info = base_info;
1330 				return true;
1331 			}
1332 		}
1333 	}
1334 
1335 	return false;
1336 }
1337 
1338 /*
1339  * brief:
1340  * raid_bdev_remove_base_bdev function is called by below layers when base_bdev
1341  * is removed. This function checks if this base bdev is part of any raid bdev
1342  * or not. If yes, it takes necessary action on that particular raid bdev.
1343  * params:
1344  * base_bdev - pointer to base bdev pointer which got removed
1345  * returns:
1346  * none
1347  */
1348 static void
1349 raid_bdev_remove_base_bdev(struct spdk_bdev *base_bdev)
1350 {
1351 	struct raid_bdev	*raid_bdev = NULL;
1352 	struct raid_base_bdev_info *base_info;
1353 
1354 	SPDK_DEBUGLOG(bdev_raid, "raid_bdev_remove_base_bdev\n");
1355 
1356 	/* Find the raid_bdev which has claimed this base_bdev */
1357 	if (!raid_bdev_find_by_base_bdev(base_bdev, &raid_bdev, &base_info)) {
1358 		SPDK_ERRLOG("bdev to remove '%s' not found\n", base_bdev->name);
1359 		return;
1360 	}
1361 
1362 	assert(base_info->desc);
1363 	base_info->remove_scheduled = true;
1364 
1365 	if (raid_bdev->destruct_called == true ||
1366 	    raid_bdev->state == RAID_BDEV_STATE_CONFIGURING) {
1367 		/*
1368 		 * As raid bdev is not registered yet or already unregistered,
1369 		 * so cleanup should be done here itself.
1370 		 */
1371 		raid_bdev_free_base_bdev_resource(raid_bdev, base_info);
1372 		if (raid_bdev->num_base_bdevs_discovered == 0) {
1373 			/* There is no base bdev for this raid, so free the raid device. */
1374 			raid_bdev_cleanup_and_free(raid_bdev);
1375 			return;
1376 		}
1377 	}
1378 
1379 	raid_bdev_deconfigure(raid_bdev, NULL, NULL);
1380 }
1381 
1382 /*
1383  * brief:
1384  * raid_bdev_event_base_bdev function is called by below layers when base_bdev
1385  * triggers asynchronous event.
1386  * params:
1387  * type - event details.
1388  * bdev - bdev that triggered event.
1389  * event_ctx - context for event.
1390  * returns:
1391  * none
1392  */
1393 static void
1394 raid_bdev_event_base_bdev(enum spdk_bdev_event_type type, struct spdk_bdev *bdev,
1395 			  void *event_ctx)
1396 {
1397 	switch (type) {
1398 	case SPDK_BDEV_EVENT_REMOVE:
1399 		raid_bdev_remove_base_bdev(bdev);
1400 		break;
1401 	default:
1402 		SPDK_NOTICELOG("Unsupported bdev event: type %d\n", type);
1403 		break;
1404 	}
1405 }
1406 
1407 /*
1408  * brief:
1409  * Remove base bdevs from the raid bdev one by one.  Skip any base bdev which
1410  *  doesn't exist.
1411  * params:
1412  * raid_cfg - pointer to raid bdev config.
1413  * cb_fn - callback function
1414  * cb_ctx - argument to callback function
1415  */
1416 void
1417 raid_bdev_remove_base_devices(struct raid_bdev_config *raid_cfg,
1418 			      raid_bdev_destruct_cb cb_fn, void *cb_arg)
1419 {
1420 	struct raid_bdev		*raid_bdev;
1421 	struct raid_base_bdev_info	*base_info;
1422 
1423 	SPDK_DEBUGLOG(bdev_raid, "raid_bdev_remove_base_devices\n");
1424 
1425 	raid_bdev = raid_cfg->raid_bdev;
1426 	if (raid_bdev == NULL) {
1427 		SPDK_DEBUGLOG(bdev_raid, "raid bdev %s doesn't exist now\n", raid_cfg->name);
1428 		if (cb_fn) {
1429 			cb_fn(cb_arg, 0);
1430 		}
1431 		return;
1432 	}
1433 
1434 	if (raid_bdev->destroy_started) {
1435 		SPDK_DEBUGLOG(bdev_raid, "destroying raid bdev %s is already started\n",
1436 			      raid_cfg->name);
1437 		if (cb_fn) {
1438 			cb_fn(cb_arg, -EALREADY);
1439 		}
1440 		return;
1441 	}
1442 
1443 	raid_bdev->destroy_started = true;
1444 
1445 	RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
1446 		if (base_info->bdev == NULL) {
1447 			continue;
1448 		}
1449 
1450 		assert(base_info->desc);
1451 		base_info->remove_scheduled = true;
1452 
1453 		if (raid_bdev->destruct_called == true ||
1454 		    raid_bdev->state == RAID_BDEV_STATE_CONFIGURING) {
1455 			/*
1456 			 * As raid bdev is not registered yet or already unregistered,
1457 			 * so cleanup should be done here itself.
1458 			 */
1459 			raid_bdev_free_base_bdev_resource(raid_bdev, base_info);
1460 		}
1461 	}
1462 
1463 	if (raid_bdev->num_base_bdevs_discovered == 0) {
1464 		/* There is no base bdev for this raid, so free the raid device. */
1465 		raid_bdev_cleanup_and_free(raid_bdev);
1466 		if (cb_fn) {
1467 			cb_fn(cb_arg, 0);
1468 		}
1469 	} else {
1470 		raid_bdev_deconfigure(raid_bdev, cb_fn, cb_arg);
1471 	}
1472 }
1473 
1474 /*
1475  * brief:
1476  * raid_bdev_add_base_device function is the actual function which either adds
1477  * the nvme base device to existing raid bdev or create a new raid bdev. It also claims
1478  * the base device and keep the open descriptor.
1479  * params:
1480  * raid_cfg - pointer to raid bdev config
1481  * bdev - pointer to base bdev
1482  * base_bdev_slot - position to add base bdev
1483  * returns:
1484  * 0 - success
1485  * non zero - failure
1486  */
1487 static int
1488 raid_bdev_add_base_device(struct raid_bdev_config *raid_cfg, const char *bdev_name,
1489 			  uint8_t base_bdev_slot)
1490 {
1491 	struct raid_bdev	*raid_bdev;
1492 	int			rc;
1493 
1494 	raid_bdev = raid_cfg->raid_bdev;
1495 	if (!raid_bdev) {
1496 		SPDK_ERRLOG("Raid bdev '%s' is not created yet\n", raid_cfg->name);
1497 		return -ENODEV;
1498 	}
1499 
1500 	rc = raid_bdev_alloc_base_bdev_resource(raid_bdev, bdev_name, base_bdev_slot);
1501 	if (rc != 0) {
1502 		if (rc != -ENODEV) {
1503 			SPDK_ERRLOG("Failed to allocate resource for bdev '%s'\n", bdev_name);
1504 		}
1505 		return rc;
1506 	}
1507 
1508 	assert(raid_bdev->num_base_bdevs_discovered <= raid_bdev->num_base_bdevs);
1509 
1510 	if (raid_bdev->num_base_bdevs_discovered == raid_bdev->num_base_bdevs) {
1511 		rc = raid_bdev_configure(raid_bdev);
1512 		if (rc != 0) {
1513 			SPDK_ERRLOG("Failed to configure raid bdev\n");
1514 			return rc;
1515 		}
1516 	}
1517 
1518 	return 0;
1519 }
1520 
1521 /*
1522  * brief:
1523  * Add base bdevs to the raid bdev one by one.  Skip any base bdev which doesn't
1524  *  exist or fails to add. If all base bdevs are successfully added, the raid bdev
1525  *  moves to the configured state and becomes available. Otherwise, the raid bdev
1526  *  stays at the configuring state with added base bdevs.
1527  * params:
1528  * raid_cfg - pointer to raid bdev config
1529  * returns:
1530  * 0 - The raid bdev moves to the configured state or stays at the configuring
1531  *     state with added base bdevs due to any nonexistent base bdev.
1532  * non zero - Failed to add any base bdev and stays at the configuring state with
1533  *            added base bdevs.
1534  */
1535 int
1536 raid_bdev_add_base_devices(struct raid_bdev_config *raid_cfg)
1537 {
1538 	uint8_t	i;
1539 	int	rc = 0, _rc;
1540 
1541 	for (i = 0; i < raid_cfg->num_base_bdevs; i++) {
1542 		_rc = raid_bdev_add_base_device(raid_cfg, raid_cfg->base_bdev[i].name, i);
1543 		if (_rc == -ENODEV) {
1544 			SPDK_DEBUGLOG(bdev_raid, "base bdev %s doesn't exist now\n",
1545 				      raid_cfg->base_bdev[i].name);
1546 		} else if (_rc != 0) {
1547 			SPDK_ERRLOG("Failed to add base bdev %s to RAID bdev %s: %s\n",
1548 				    raid_cfg->base_bdev[i].name, raid_cfg->name,
1549 				    spdk_strerror(-_rc));
1550 			if (rc == 0) {
1551 				rc = _rc;
1552 			}
1553 		}
1554 	}
1555 
1556 	return rc;
1557 }
1558 
1559 /*
1560  * brief:
1561  * raid_bdev_examine function is the examine function call by the below layers
1562  * like bdev_nvme layer. This function will check if this base bdev can be
1563  * claimed by this raid bdev or not.
1564  * params:
1565  * bdev - pointer to base bdev
1566  * returns:
1567  * none
1568  */
1569 static void
1570 raid_bdev_examine(struct spdk_bdev *bdev)
1571 {
1572 	struct raid_bdev_config	*raid_cfg;
1573 	uint8_t			base_bdev_slot;
1574 
1575 	if (raid_bdev_can_claim_bdev(bdev->name, &raid_cfg, &base_bdev_slot)) {
1576 		raid_bdev_add_base_device(raid_cfg, bdev->name, base_bdev_slot);
1577 	} else {
1578 		SPDK_DEBUGLOG(bdev_raid, "bdev %s can't be claimed\n",
1579 			      bdev->name);
1580 	}
1581 
1582 	spdk_bdev_module_examine_done(&g_raid_if);
1583 }
1584 
1585 /* Log component for bdev raid bdev module */
1586 SPDK_LOG_REGISTER_COMPONENT(bdev_raid)
1587