xref: /spdk/module/bdev/raid/bdev_raid.c (revision 0098e636761237b77c12c30c2408263a5d2260cc)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (c) Intel Corporation.
3  *   All rights reserved.
4  *   Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
5  */
6 
7 #include "bdev_raid.h"
8 #include "spdk/env.h"
9 #include "spdk/thread.h"
10 #include "spdk/log.h"
11 #include "spdk/string.h"
12 #include "spdk/util.h"
13 #include "spdk/json.h"
14 #include "spdk/string.h"
15 
16 static bool g_shutdown_started = false;
17 
18 /* raid bdev config as read from config file */
19 struct raid_config	g_raid_config = {
20 	.raid_bdev_config_head = TAILQ_HEAD_INITIALIZER(g_raid_config.raid_bdev_config_head),
21 };
22 
23 /*
24  * List of raid bdev in configured list, these raid bdevs are registered with
25  * bdev layer
26  */
27 struct raid_configured_tailq	g_raid_bdev_configured_list = TAILQ_HEAD_INITIALIZER(
28 			g_raid_bdev_configured_list);
29 
30 /* List of raid bdev in configuring list */
31 struct raid_configuring_tailq	g_raid_bdev_configuring_list = TAILQ_HEAD_INITIALIZER(
32 			g_raid_bdev_configuring_list);
33 
34 /* List of all raid bdevs */
35 struct raid_all_tailq		g_raid_bdev_list = TAILQ_HEAD_INITIALIZER(g_raid_bdev_list);
36 
37 /* List of all raid bdevs that are offline */
38 struct raid_offline_tailq	g_raid_bdev_offline_list = TAILQ_HEAD_INITIALIZER(
39 			g_raid_bdev_offline_list);
40 
41 static TAILQ_HEAD(, raid_bdev_module) g_raid_modules = TAILQ_HEAD_INITIALIZER(g_raid_modules);
42 
43 static struct raid_bdev_module *
44 raid_bdev_module_find(enum raid_level level)
45 {
46 	struct raid_bdev_module *raid_module;
47 
48 	TAILQ_FOREACH(raid_module, &g_raid_modules, link) {
49 		if (raid_module->level == level) {
50 			return raid_module;
51 		}
52 	}
53 
54 	return NULL;
55 }
56 
57 void
58 raid_bdev_module_list_add(struct raid_bdev_module *raid_module)
59 {
60 	if (raid_bdev_module_find(raid_module->level) != NULL) {
61 		SPDK_ERRLOG("module for raid level '%s' already registered.\n",
62 			    raid_bdev_level_to_str(raid_module->level));
63 		assert(false);
64 	} else {
65 		TAILQ_INSERT_TAIL(&g_raid_modules, raid_module, link);
66 	}
67 }
68 
69 /* Function declarations */
70 static void	raid_bdev_examine(struct spdk_bdev *bdev);
71 static int	raid_bdev_init(void);
72 static void	raid_bdev_deconfigure(struct raid_bdev *raid_bdev,
73 				      raid_bdev_destruct_cb cb_fn, void *cb_arg);
74 static void	raid_bdev_event_base_bdev(enum spdk_bdev_event_type type, struct spdk_bdev *bdev,
75 		void *event_ctx);
76 
77 /*
78  * brief:
79  * raid_bdev_create_cb function is a cb function for raid bdev which creates the
80  * hierarchy from raid bdev to base bdev io channels. It will be called per core
81  * params:
82  * io_device - pointer to raid bdev io device represented by raid_bdev
83  * ctx_buf - pointer to context buffer for raid bdev io channel
84  * returns:
85  * 0 - success
86  * non zero - failure
87  */
88 static int
89 raid_bdev_create_cb(void *io_device, void *ctx_buf)
90 {
91 	struct raid_bdev            *raid_bdev = io_device;
92 	struct raid_bdev_io_channel *raid_ch = ctx_buf;
93 	uint8_t i;
94 
95 	SPDK_DEBUGLOG(bdev_raid, "raid_bdev_create_cb, %p\n", raid_ch);
96 
97 	assert(raid_bdev != NULL);
98 	assert(raid_bdev->state == RAID_BDEV_STATE_ONLINE);
99 
100 	raid_ch->num_channels = raid_bdev->num_base_bdevs;
101 
102 	raid_ch->base_channel = calloc(raid_ch->num_channels,
103 				       sizeof(struct spdk_io_channel *));
104 	if (!raid_ch->base_channel) {
105 		SPDK_ERRLOG("Unable to allocate base bdevs io channel\n");
106 		return -ENOMEM;
107 	}
108 	for (i = 0; i < raid_ch->num_channels; i++) {
109 		/*
110 		 * Get the spdk_io_channel for all the base bdevs. This is used during
111 		 * split logic to send the respective child bdev ios to respective base
112 		 * bdev io channel.
113 		 */
114 		raid_ch->base_channel[i] = spdk_bdev_get_io_channel(
115 						   raid_bdev->base_bdev_info[i].desc);
116 		if (!raid_ch->base_channel[i]) {
117 			uint8_t j;
118 
119 			for (j = 0; j < i; j++) {
120 				spdk_put_io_channel(raid_ch->base_channel[j]);
121 			}
122 			free(raid_ch->base_channel);
123 			raid_ch->base_channel = NULL;
124 			SPDK_ERRLOG("Unable to create io channel for base bdev\n");
125 			return -ENOMEM;
126 		}
127 	}
128 
129 	return 0;
130 }
131 
132 /*
133  * brief:
134  * raid_bdev_destroy_cb function is a cb function for raid bdev which deletes the
135  * hierarchy from raid bdev to base bdev io channels. It will be called per core
136  * params:
137  * io_device - pointer to raid bdev io device represented by raid_bdev
138  * ctx_buf - pointer to context buffer for raid bdev io channel
139  * returns:
140  * none
141  */
142 static void
143 raid_bdev_destroy_cb(void *io_device, void *ctx_buf)
144 {
145 	struct raid_bdev_io_channel *raid_ch = ctx_buf;
146 	uint8_t i;
147 
148 	SPDK_DEBUGLOG(bdev_raid, "raid_bdev_destroy_cb\n");
149 
150 	assert(raid_ch != NULL);
151 	assert(raid_ch->base_channel);
152 	for (i = 0; i < raid_ch->num_channels; i++) {
153 		/* Free base bdev channels */
154 		assert(raid_ch->base_channel[i] != NULL);
155 		spdk_put_io_channel(raid_ch->base_channel[i]);
156 	}
157 	free(raid_ch->base_channel);
158 	raid_ch->base_channel = NULL;
159 }
160 
161 /*
162  * brief:
163  * raid_bdev_cleanup is used to cleanup and free raid_bdev related data
164  * structures.
165  * params:
166  * raid_bdev - pointer to raid_bdev
167  * returns:
168  * none
169  */
170 static void
171 raid_bdev_cleanup(struct raid_bdev *raid_bdev)
172 {
173 	SPDK_DEBUGLOG(bdev_raid, "raid_bdev_cleanup, %p name %s, state %u, config %p\n",
174 		      raid_bdev,
175 		      raid_bdev->bdev.name, raid_bdev->state, raid_bdev->config);
176 	if (raid_bdev->state == RAID_BDEV_STATE_CONFIGURING) {
177 		TAILQ_REMOVE(&g_raid_bdev_configuring_list, raid_bdev, state_link);
178 	} else if (raid_bdev->state == RAID_BDEV_STATE_OFFLINE) {
179 		TAILQ_REMOVE(&g_raid_bdev_offline_list, raid_bdev, state_link);
180 	} else {
181 		assert(0);
182 	}
183 	TAILQ_REMOVE(&g_raid_bdev_list, raid_bdev, global_link);
184 	free(raid_bdev->bdev.name);
185 	free(raid_bdev->base_bdev_info);
186 	if (raid_bdev->config) {
187 		raid_bdev->config->raid_bdev = NULL;
188 	}
189 	free(raid_bdev);
190 }
191 
192 /*
193  * brief:
194  * wrapper for the bdev close operation
195  * params:
196  * base_info - raid base bdev info
197  * returns:
198  */
199 static void
200 _raid_bdev_free_base_bdev_resource(void *ctx)
201 {
202 	struct spdk_bdev_desc *desc = ctx;
203 
204 	spdk_bdev_close(desc);
205 }
206 
207 
208 /*
209  * brief:
210  * free resource of base bdev for raid bdev
211  * params:
212  * raid_bdev - pointer to raid bdev
213  * base_info - raid base bdev info
214  * returns:
215  * 0 - success
216  * non zero - failure
217  */
218 static void
219 raid_bdev_free_base_bdev_resource(struct raid_bdev *raid_bdev,
220 				  struct raid_base_bdev_info *base_info)
221 {
222 	spdk_bdev_module_release_bdev(base_info->bdev);
223 	if (base_info->thread && base_info->thread != spdk_get_thread()) {
224 		spdk_thread_send_msg(base_info->thread, _raid_bdev_free_base_bdev_resource, base_info->desc);
225 	} else {
226 		spdk_bdev_close(base_info->desc);
227 	}
228 	base_info->desc = NULL;
229 	base_info->bdev = NULL;
230 
231 	assert(raid_bdev->num_base_bdevs_discovered);
232 	raid_bdev->num_base_bdevs_discovered--;
233 }
234 
235 /*
236  * brief:
237  * raid_bdev_destruct is the destruct function table pointer for raid bdev
238  * params:
239  * ctxt - pointer to raid_bdev
240  * returns:
241  * 0 - success
242  * non zero - failure
243  */
244 static int
245 raid_bdev_destruct(void *ctxt)
246 {
247 	struct raid_bdev *raid_bdev = ctxt;
248 	struct raid_base_bdev_info *base_info;
249 
250 	SPDK_DEBUGLOG(bdev_raid, "raid_bdev_destruct\n");
251 
252 	raid_bdev->destruct_called = true;
253 	RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
254 		/*
255 		 * Close all base bdev descriptors for which call has come from below
256 		 * layers.  Also close the descriptors if we have started shutdown.
257 		 */
258 		if (g_shutdown_started ||
259 		    ((base_info->remove_scheduled == true) &&
260 		     (base_info->bdev != NULL))) {
261 			raid_bdev_free_base_bdev_resource(raid_bdev, base_info);
262 		}
263 	}
264 
265 	if (g_shutdown_started) {
266 		TAILQ_REMOVE(&g_raid_bdev_configured_list, raid_bdev, state_link);
267 		raid_bdev->state = RAID_BDEV_STATE_OFFLINE;
268 		TAILQ_INSERT_TAIL(&g_raid_bdev_offline_list, raid_bdev, state_link);
269 	}
270 
271 	if (raid_bdev->module->stop != NULL) {
272 		raid_bdev->module->stop(raid_bdev);
273 	}
274 
275 	spdk_io_device_unregister(raid_bdev, NULL);
276 
277 	if (raid_bdev->num_base_bdevs_discovered == 0) {
278 		/* Free raid_bdev when there are no base bdevs left */
279 		SPDK_DEBUGLOG(bdev_raid, "raid bdev base bdevs is 0, going to free all in destruct\n");
280 		raid_bdev_cleanup(raid_bdev);
281 	}
282 
283 	return 0;
284 }
285 
286 void
287 raid_bdev_io_complete(struct raid_bdev_io *raid_io, enum spdk_bdev_io_status status)
288 {
289 	struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io);
290 
291 	spdk_bdev_io_complete(bdev_io, status);
292 }
293 
294 /*
295  * brief:
296  * raid_bdev_io_complete_part - signal the completion of a part of the expected
297  * base bdev IOs and complete the raid_io if this is the final expected IO.
298  * The caller should first set raid_io->base_bdev_io_remaining. This function
299  * will decrement this counter by the value of the 'completed' parameter and
300  * complete the raid_io if the counter reaches 0. The caller is free to
301  * interpret the 'base_bdev_io_remaining' and 'completed' values as needed,
302  * it can represent e.g. blocks or IOs.
303  * params:
304  * raid_io - pointer to raid_bdev_io
305  * completed - the part of the raid_io that has been completed
306  * status - status of the base IO
307  * returns:
308  * true - if the raid_io is completed
309  * false - otherwise
310  */
311 bool
312 raid_bdev_io_complete_part(struct raid_bdev_io *raid_io, uint64_t completed,
313 			   enum spdk_bdev_io_status status)
314 {
315 	assert(raid_io->base_bdev_io_remaining >= completed);
316 	raid_io->base_bdev_io_remaining -= completed;
317 
318 	if (status != SPDK_BDEV_IO_STATUS_SUCCESS) {
319 		raid_io->base_bdev_io_status = status;
320 	}
321 
322 	if (raid_io->base_bdev_io_remaining == 0) {
323 		raid_bdev_io_complete(raid_io, raid_io->base_bdev_io_status);
324 		return true;
325 	} else {
326 		return false;
327 	}
328 }
329 
330 /*
331  * brief:
332  * raid_bdev_queue_io_wait function processes the IO which failed to submit.
333  * It will try to queue the IOs after storing the context to bdev wait queue logic.
334  * params:
335  * raid_io - pointer to raid_bdev_io
336  * bdev - the block device that the IO is submitted to
337  * ch - io channel
338  * cb_fn - callback when the spdk_bdev_io for bdev becomes available
339  * returns:
340  * none
341  */
342 void
343 raid_bdev_queue_io_wait(struct raid_bdev_io *raid_io, struct spdk_bdev *bdev,
344 			struct spdk_io_channel *ch, spdk_bdev_io_wait_cb cb_fn)
345 {
346 	raid_io->waitq_entry.bdev = bdev;
347 	raid_io->waitq_entry.cb_fn = cb_fn;
348 	raid_io->waitq_entry.cb_arg = raid_io;
349 	spdk_bdev_queue_io_wait(bdev, ch, &raid_io->waitq_entry);
350 }
351 
352 static void
353 raid_base_bdev_reset_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
354 {
355 	struct raid_bdev_io *raid_io = cb_arg;
356 
357 	spdk_bdev_free_io(bdev_io);
358 
359 	raid_bdev_io_complete_part(raid_io, 1, success ?
360 				   SPDK_BDEV_IO_STATUS_SUCCESS :
361 				   SPDK_BDEV_IO_STATUS_FAILED);
362 }
363 
364 static void raid_bdev_submit_reset_request(struct raid_bdev_io *raid_io);
365 
366 static void
367 _raid_bdev_submit_reset_request(void *_raid_io)
368 {
369 	struct raid_bdev_io *raid_io = _raid_io;
370 
371 	raid_bdev_submit_reset_request(raid_io);
372 }
373 
374 /*
375  * brief:
376  * raid_bdev_submit_reset_request function submits reset requests
377  * to member disks; it will submit as many as possible unless a reset fails with -ENOMEM, in
378  * which case it will queue it for later submission
379  * params:
380  * raid_io
381  * returns:
382  * none
383  */
384 static void
385 raid_bdev_submit_reset_request(struct raid_bdev_io *raid_io)
386 {
387 	struct raid_bdev		*raid_bdev;
388 	int				ret;
389 	uint8_t				i;
390 	struct raid_base_bdev_info	*base_info;
391 	struct spdk_io_channel		*base_ch;
392 
393 	raid_bdev = raid_io->raid_bdev;
394 
395 	if (raid_io->base_bdev_io_remaining == 0) {
396 		raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs;
397 	}
398 
399 	while (raid_io->base_bdev_io_submitted < raid_bdev->num_base_bdevs) {
400 		i = raid_io->base_bdev_io_submitted;
401 		base_info = &raid_bdev->base_bdev_info[i];
402 		base_ch = raid_io->raid_ch->base_channel[i];
403 		ret = spdk_bdev_reset(base_info->desc, base_ch,
404 				      raid_base_bdev_reset_complete, raid_io);
405 		if (ret == 0) {
406 			raid_io->base_bdev_io_submitted++;
407 		} else if (ret == -ENOMEM) {
408 			raid_bdev_queue_io_wait(raid_io, base_info->bdev, base_ch,
409 						_raid_bdev_submit_reset_request);
410 			return;
411 		} else {
412 			SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n");
413 			assert(false);
414 			raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
415 			return;
416 		}
417 	}
418 }
419 
420 /*
421  * brief:
422  * Callback function to spdk_bdev_io_get_buf.
423  * params:
424  * ch - pointer to raid bdev io channel
425  * bdev_io - pointer to parent bdev_io on raid bdev device
426  * success - True if buffer is allocated or false otherwise.
427  * returns:
428  * none
429  */
430 static void
431 raid_bdev_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
432 		     bool success)
433 {
434 	struct raid_bdev_io *raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx;
435 
436 	if (!success) {
437 		raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
438 		return;
439 	}
440 
441 	raid_io->raid_bdev->module->submit_rw_request(raid_io);
442 }
443 
444 /*
445  * brief:
446  * raid_bdev_submit_request function is the submit_request function pointer of
447  * raid bdev function table. This is used to submit the io on raid_bdev to below
448  * layers.
449  * params:
450  * ch - pointer to raid bdev io channel
451  * bdev_io - pointer to parent bdev_io on raid bdev device
452  * returns:
453  * none
454  */
455 static void
456 raid_bdev_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
457 {
458 	struct raid_bdev_io *raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx;
459 
460 	raid_io->raid_bdev = bdev_io->bdev->ctxt;
461 	raid_io->raid_ch = spdk_io_channel_get_ctx(ch);
462 	raid_io->base_bdev_io_remaining = 0;
463 	raid_io->base_bdev_io_submitted = 0;
464 	raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_SUCCESS;
465 
466 	switch (bdev_io->type) {
467 	case SPDK_BDEV_IO_TYPE_READ:
468 		spdk_bdev_io_get_buf(bdev_io, raid_bdev_get_buf_cb,
469 				     bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
470 		break;
471 	case SPDK_BDEV_IO_TYPE_WRITE:
472 		raid_io->raid_bdev->module->submit_rw_request(raid_io);
473 		break;
474 
475 	case SPDK_BDEV_IO_TYPE_RESET:
476 		raid_bdev_submit_reset_request(raid_io);
477 		break;
478 
479 	case SPDK_BDEV_IO_TYPE_FLUSH:
480 	case SPDK_BDEV_IO_TYPE_UNMAP:
481 		raid_io->raid_bdev->module->submit_null_payload_request(raid_io);
482 		break;
483 
484 	default:
485 		SPDK_ERRLOG("submit request, invalid io type %u\n", bdev_io->type);
486 		raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
487 		break;
488 	}
489 }
490 
491 /*
492  * brief:
493  * _raid_bdev_io_type_supported checks whether io_type is supported in
494  * all base bdev modules of raid bdev module. If anyone among the base_bdevs
495  * doesn't support, the raid device doesn't supports.
496  *
497  * params:
498  * raid_bdev - pointer to raid bdev context
499  * io_type - io type
500  * returns:
501  * true - io_type is supported
502  * false - io_type is not supported
503  */
504 inline static bool
505 _raid_bdev_io_type_supported(struct raid_bdev *raid_bdev, enum spdk_bdev_io_type io_type)
506 {
507 	struct raid_base_bdev_info *base_info;
508 
509 	if (io_type == SPDK_BDEV_IO_TYPE_FLUSH ||
510 	    io_type == SPDK_BDEV_IO_TYPE_UNMAP) {
511 		if (raid_bdev->module->submit_null_payload_request == NULL) {
512 			return false;
513 		}
514 	}
515 
516 	RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
517 		if (base_info->bdev == NULL) {
518 			assert(false);
519 			continue;
520 		}
521 
522 		if (spdk_bdev_io_type_supported(base_info->bdev, io_type) == false) {
523 			return false;
524 		}
525 	}
526 
527 	return true;
528 }
529 
530 /*
531  * brief:
532  * raid_bdev_io_type_supported is the io_supported function for bdev function
533  * table which returns whether the particular io type is supported or not by
534  * raid bdev module
535  * params:
536  * ctx - pointer to raid bdev context
537  * type - io type
538  * returns:
539  * true - io_type is supported
540  * false - io_type is not supported
541  */
542 static bool
543 raid_bdev_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
544 {
545 	switch (io_type) {
546 	case SPDK_BDEV_IO_TYPE_READ:
547 	case SPDK_BDEV_IO_TYPE_WRITE:
548 		return true;
549 
550 	case SPDK_BDEV_IO_TYPE_FLUSH:
551 	case SPDK_BDEV_IO_TYPE_RESET:
552 	case SPDK_BDEV_IO_TYPE_UNMAP:
553 		return _raid_bdev_io_type_supported(ctx, io_type);
554 
555 	default:
556 		return false;
557 	}
558 
559 	return false;
560 }
561 
562 /*
563  * brief:
564  * raid_bdev_get_io_channel is the get_io_channel function table pointer for
565  * raid bdev. This is used to return the io channel for this raid bdev
566  * params:
567  * ctxt - pointer to raid_bdev
568  * returns:
569  * pointer to io channel for raid bdev
570  */
571 static struct spdk_io_channel *
572 raid_bdev_get_io_channel(void *ctxt)
573 {
574 	struct raid_bdev *raid_bdev = ctxt;
575 
576 	return spdk_get_io_channel(raid_bdev);
577 }
578 
579 /*
580  * brief:
581  * raid_bdev_dump_info_json is the function table pointer for raid bdev
582  * params:
583  * ctx - pointer to raid_bdev
584  * w - pointer to json context
585  * returns:
586  * 0 - success
587  * non zero - failure
588  */
589 static int
590 raid_bdev_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
591 {
592 	struct raid_bdev *raid_bdev = ctx;
593 	struct raid_base_bdev_info *base_info;
594 
595 	SPDK_DEBUGLOG(bdev_raid, "raid_bdev_dump_config_json\n");
596 	assert(raid_bdev != NULL);
597 
598 	/* Dump the raid bdev configuration related information */
599 	spdk_json_write_named_object_begin(w, "raid");
600 	spdk_json_write_named_uint32(w, "strip_size_kb", raid_bdev->strip_size_kb);
601 	spdk_json_write_named_uint32(w, "state", raid_bdev->state);
602 	spdk_json_write_named_string(w, "raid_level", raid_bdev_level_to_str(raid_bdev->level));
603 	spdk_json_write_named_uint32(w, "destruct_called", raid_bdev->destruct_called);
604 	spdk_json_write_named_uint32(w, "num_base_bdevs", raid_bdev->num_base_bdevs);
605 	spdk_json_write_named_uint32(w, "num_base_bdevs_discovered", raid_bdev->num_base_bdevs_discovered);
606 	spdk_json_write_name(w, "base_bdevs_list");
607 	spdk_json_write_array_begin(w);
608 	RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
609 		if (base_info->bdev) {
610 			spdk_json_write_string(w, base_info->bdev->name);
611 		} else {
612 			spdk_json_write_null(w);
613 		}
614 	}
615 	spdk_json_write_array_end(w);
616 	spdk_json_write_object_end(w);
617 
618 	return 0;
619 }
620 
621 /*
622  * brief:
623  * raid_bdev_write_config_json is the function table pointer for raid bdev
624  * params:
625  * bdev - pointer to spdk_bdev
626  * w - pointer to json context
627  * returns:
628  * none
629  */
630 static void
631 raid_bdev_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
632 {
633 	struct raid_bdev *raid_bdev = bdev->ctxt;
634 	struct raid_base_bdev_info *base_info;
635 
636 	spdk_json_write_object_begin(w);
637 
638 	spdk_json_write_named_string(w, "method", "bdev_raid_create");
639 
640 	spdk_json_write_named_object_begin(w, "params");
641 	spdk_json_write_named_string(w, "name", bdev->name);
642 	spdk_json_write_named_uint32(w, "strip_size_kb", raid_bdev->strip_size_kb);
643 	spdk_json_write_named_string(w, "raid_level", raid_bdev_level_to_str(raid_bdev->level));
644 
645 	spdk_json_write_named_array_begin(w, "base_bdevs");
646 	RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
647 		if (base_info->bdev) {
648 			spdk_json_write_string(w, base_info->bdev->name);
649 		}
650 	}
651 	spdk_json_write_array_end(w);
652 	spdk_json_write_object_end(w);
653 
654 	spdk_json_write_object_end(w);
655 }
656 
657 static int
658 raid_bdev_get_memory_domains(void *ctx, struct spdk_memory_domain **domains, int array_size)
659 {
660 	struct raid_bdev *raid_bdev = ctx;
661 	struct spdk_bdev *base_bdev;
662 	uint32_t i;
663 	int domains_count = 0, rc;
664 
665 	/* First loop to get the number of memory domains */
666 	for (i = 0; i < raid_bdev->num_base_bdevs; i++) {
667 		base_bdev = raid_bdev->base_bdev_info[i].bdev;
668 		rc = spdk_bdev_get_memory_domains(base_bdev, NULL, 0);
669 		if (rc < 0) {
670 			return rc;
671 		}
672 		domains_count += rc;
673 	}
674 
675 	if (!domains || array_size < domains_count) {
676 		return domains_count;
677 	}
678 
679 	for (i = 0; i < raid_bdev->num_base_bdevs; i++) {
680 		base_bdev = raid_bdev->base_bdev_info[i].bdev;
681 		rc = spdk_bdev_get_memory_domains(base_bdev, domains, array_size);
682 		if (rc < 0) {
683 			return rc;
684 		}
685 		domains += rc;
686 		array_size -= rc;
687 	}
688 
689 	return domains_count;
690 }
691 
692 /* g_raid_bdev_fn_table is the function table for raid bdev */
693 static const struct spdk_bdev_fn_table g_raid_bdev_fn_table = {
694 	.destruct		= raid_bdev_destruct,
695 	.submit_request		= raid_bdev_submit_request,
696 	.io_type_supported	= raid_bdev_io_type_supported,
697 	.get_io_channel		= raid_bdev_get_io_channel,
698 	.dump_info_json		= raid_bdev_dump_info_json,
699 	.write_config_json	= raid_bdev_write_config_json,
700 	.get_memory_domains	= raid_bdev_get_memory_domains,
701 };
702 
703 /*
704  * brief:
705  * raid_bdev_config_cleanup function is used to free memory for one raid_bdev in configuration
706  * params:
707  * raid_cfg - pointer to raid_bdev_config structure
708  * returns:
709  * none
710  */
711 void
712 raid_bdev_config_cleanup(struct raid_bdev_config *raid_cfg)
713 {
714 	uint8_t i;
715 
716 	TAILQ_REMOVE(&g_raid_config.raid_bdev_config_head, raid_cfg, link);
717 	g_raid_config.total_raid_bdev--;
718 
719 	if (raid_cfg->base_bdev) {
720 		for (i = 0; i < raid_cfg->num_base_bdevs; i++) {
721 			free(raid_cfg->base_bdev[i].name);
722 		}
723 		free(raid_cfg->base_bdev);
724 	}
725 	free(raid_cfg->name);
726 	free(raid_cfg);
727 }
728 
729 /*
730  * brief:
731  * raid_bdev_free is the raid bdev function table function pointer. This is
732  * called on bdev free path
733  * params:
734  * none
735  * returns:
736  * none
737  */
738 static void
739 raid_bdev_free(void)
740 {
741 	struct raid_bdev_config *raid_cfg, *tmp;
742 
743 	SPDK_DEBUGLOG(bdev_raid, "raid_bdev_free\n");
744 	TAILQ_FOREACH_SAFE(raid_cfg, &g_raid_config.raid_bdev_config_head, link, tmp) {
745 		raid_bdev_config_cleanup(raid_cfg);
746 	}
747 }
748 
749 /* brief
750  * raid_bdev_config_find_by_name is a helper function to find raid bdev config
751  * by name as key.
752  *
753  * params:
754  * raid_name - name for raid bdev.
755  */
756 struct raid_bdev_config *
757 raid_bdev_config_find_by_name(const char *raid_name)
758 {
759 	struct raid_bdev_config *raid_cfg;
760 
761 	TAILQ_FOREACH(raid_cfg, &g_raid_config.raid_bdev_config_head, link) {
762 		if (!strcmp(raid_cfg->name, raid_name)) {
763 			return raid_cfg;
764 		}
765 	}
766 
767 	return raid_cfg;
768 }
769 
770 /*
771  * brief
772  * raid_bdev_config_add function adds config for newly created raid bdev.
773  *
774  * params:
775  * raid_name - name for raid bdev.
776  * strip_size - strip size in KB
777  * num_base_bdevs - number of base bdevs.
778  * level - raid level.
779  * _raid_cfg - Pointer to newly added configuration
780  */
781 int
782 raid_bdev_config_add(const char *raid_name, uint32_t strip_size, uint8_t num_base_bdevs,
783 		     enum raid_level level, struct raid_bdev_config **_raid_cfg)
784 {
785 	struct raid_bdev_config *raid_cfg;
786 
787 	raid_cfg = raid_bdev_config_find_by_name(raid_name);
788 	if (raid_cfg != NULL) {
789 		SPDK_ERRLOG("Duplicate raid bdev name found in config file %s\n",
790 			    raid_name);
791 		return -EEXIST;
792 	}
793 
794 	if (spdk_u32_is_pow2(strip_size) == false) {
795 		SPDK_ERRLOG("Invalid strip size %" PRIu32 "\n", strip_size);
796 		return -EINVAL;
797 	}
798 
799 	if (num_base_bdevs == 0) {
800 		SPDK_ERRLOG("Invalid base device count %u\n", num_base_bdevs);
801 		return -EINVAL;
802 	}
803 
804 	raid_cfg = calloc(1, sizeof(*raid_cfg));
805 	if (raid_cfg == NULL) {
806 		SPDK_ERRLOG("unable to allocate memory\n");
807 		return -ENOMEM;
808 	}
809 
810 	raid_cfg->name = strdup(raid_name);
811 	if (!raid_cfg->name) {
812 		free(raid_cfg);
813 		SPDK_ERRLOG("unable to allocate memory\n");
814 		return -ENOMEM;
815 	}
816 	raid_cfg->strip_size = strip_size;
817 	raid_cfg->num_base_bdevs = num_base_bdevs;
818 	raid_cfg->level = level;
819 
820 	raid_cfg->base_bdev = calloc(num_base_bdevs, sizeof(*raid_cfg->base_bdev));
821 	if (raid_cfg->base_bdev == NULL) {
822 		free(raid_cfg->name);
823 		free(raid_cfg);
824 		SPDK_ERRLOG("unable to allocate memory\n");
825 		return -ENOMEM;
826 	}
827 
828 	TAILQ_INSERT_TAIL(&g_raid_config.raid_bdev_config_head, raid_cfg, link);
829 	g_raid_config.total_raid_bdev++;
830 
831 	*_raid_cfg = raid_cfg;
832 	return 0;
833 }
834 
835 /*
836  * brief:
837  * raid_bdev_config_add_base_bdev function add base bdev to raid bdev config.
838  *
839  * params:
840  * raid_cfg - pointer to raid bdev configuration
841  * base_bdev_name - name of base bdev
842  * slot - Position to add base bdev
843  */
844 int
845 raid_bdev_config_add_base_bdev(struct raid_bdev_config *raid_cfg, const char *base_bdev_name,
846 			       uint8_t slot)
847 {
848 	uint8_t i;
849 	struct raid_bdev_config *tmp;
850 
851 	if (slot >= raid_cfg->num_base_bdevs) {
852 		return -EINVAL;
853 	}
854 
855 	TAILQ_FOREACH(tmp, &g_raid_config.raid_bdev_config_head, link) {
856 		for (i = 0; i < tmp->num_base_bdevs; i++) {
857 			if (tmp->base_bdev[i].name != NULL) {
858 				if (!strcmp(tmp->base_bdev[i].name, base_bdev_name)) {
859 					SPDK_ERRLOG("duplicate base bdev name %s mentioned\n",
860 						    base_bdev_name);
861 					return -EEXIST;
862 				}
863 			}
864 		}
865 	}
866 
867 	raid_cfg->base_bdev[slot].name = strdup(base_bdev_name);
868 	if (raid_cfg->base_bdev[slot].name == NULL) {
869 		SPDK_ERRLOG("unable to allocate memory\n");
870 		return -ENOMEM;
871 	}
872 
873 	return 0;
874 }
875 
876 static struct {
877 	const char *name;
878 	enum raid_level value;
879 } g_raid_level_names[] = {
880 	{ "raid0", RAID0 },
881 	{ "0", RAID0 },
882 	{ "raid5", RAID5 },
883 	{ "5", RAID5 },
884 	{ "concat", CONCAT },
885 	{ }
886 };
887 
888 /* We have to use the typedef in the function declaration to appease astyle. */
889 typedef enum raid_level raid_level_t;
890 
891 raid_level_t
892 raid_bdev_parse_raid_level(const char *str)
893 {
894 	unsigned int i;
895 
896 	assert(str != NULL);
897 
898 	for (i = 0; g_raid_level_names[i].name != NULL; i++) {
899 		if (strcasecmp(g_raid_level_names[i].name, str) == 0) {
900 			return g_raid_level_names[i].value;
901 		}
902 	}
903 
904 	return INVALID_RAID_LEVEL;
905 }
906 
907 const char *
908 raid_bdev_level_to_str(enum raid_level level)
909 {
910 	unsigned int i;
911 
912 	for (i = 0; g_raid_level_names[i].name != NULL; i++) {
913 		if (g_raid_level_names[i].value == level) {
914 			return g_raid_level_names[i].name;
915 		}
916 	}
917 
918 	return "";
919 }
920 
921 /*
922  * brief:
923  * raid_bdev_fini_start is called when bdev layer is starting the
924  * shutdown process
925  * params:
926  * none
927  * returns:
928  * none
929  */
930 static void
931 raid_bdev_fini_start(void)
932 {
933 	SPDK_DEBUGLOG(bdev_raid, "raid_bdev_fini_start\n");
934 	g_shutdown_started = true;
935 }
936 
937 /*
938  * brief:
939  * raid_bdev_exit is called on raid bdev module exit time by bdev layer
940  * params:
941  * none
942  * returns:
943  * none
944  */
945 static void
946 raid_bdev_exit(void)
947 {
948 	SPDK_DEBUGLOG(bdev_raid, "raid_bdev_exit\n");
949 	raid_bdev_free();
950 }
951 
952 /*
953  * brief:
954  * raid_bdev_get_ctx_size is used to return the context size of bdev_io for raid
955  * module
956  * params:
957  * none
958  * returns:
959  * size of spdk_bdev_io context for raid
960  */
961 static int
962 raid_bdev_get_ctx_size(void)
963 {
964 	SPDK_DEBUGLOG(bdev_raid, "raid_bdev_get_ctx_size\n");
965 	return sizeof(struct raid_bdev_io);
966 }
967 
968 /*
969  * brief:
970  * raid_bdev_can_claim_bdev is the function to check if this base_bdev can be
971  * claimed by raid bdev or not.
972  * params:
973  * bdev_name - represents base bdev name
974  * _raid_cfg - pointer to raid bdev config parsed from config file
975  * base_bdev_slot - if bdev can be claimed, it represents the base_bdev correct
976  * slot. This field is only valid if return value of this function is true
977  * returns:
978  * true - if bdev can be claimed
979  * false - if bdev can't be claimed
980  */
981 static bool
982 raid_bdev_can_claim_bdev(const char *bdev_name, struct raid_bdev_config **_raid_cfg,
983 			 uint8_t *base_bdev_slot)
984 {
985 	struct raid_bdev_config *raid_cfg;
986 	uint8_t i;
987 
988 	TAILQ_FOREACH(raid_cfg, &g_raid_config.raid_bdev_config_head, link) {
989 		for (i = 0; i < raid_cfg->num_base_bdevs; i++) {
990 			/*
991 			 * Check if the base bdev name is part of raid bdev configuration.
992 			 * If match is found then return true and the slot information where
993 			 * this base bdev should be inserted in raid bdev
994 			 */
995 			if (!strcmp(bdev_name, raid_cfg->base_bdev[i].name)) {
996 				*_raid_cfg = raid_cfg;
997 				*base_bdev_slot = i;
998 				return true;
999 			}
1000 		}
1001 	}
1002 
1003 	return false;
1004 }
1005 
1006 
1007 static struct spdk_bdev_module g_raid_if = {
1008 	.name = "raid",
1009 	.module_init = raid_bdev_init,
1010 	.fini_start = raid_bdev_fini_start,
1011 	.module_fini = raid_bdev_exit,
1012 	.get_ctx_size = raid_bdev_get_ctx_size,
1013 	.examine_config = raid_bdev_examine,
1014 	.async_init = false,
1015 	.async_fini = false,
1016 };
1017 SPDK_BDEV_MODULE_REGISTER(raid, &g_raid_if)
1018 
1019 /*
1020  * brief:
1021  * raid_bdev_init is the initialization function for raid bdev module
1022  * params:
1023  * none
1024  * returns:
1025  * 0 - success
1026  * non zero - failure
1027  */
1028 static int
1029 raid_bdev_init(void)
1030 {
1031 	return 0;
1032 }
1033 
1034 /*
1035  * brief:
1036  * raid_bdev_create allocates raid bdev based on passed configuration
1037  * params:
1038  * raid_cfg - configuration of raid bdev
1039  * returns:
1040  * 0 - success
1041  * non zero - failure
1042  */
1043 int
1044 raid_bdev_create(struct raid_bdev_config *raid_cfg)
1045 {
1046 	struct raid_bdev *raid_bdev;
1047 	struct spdk_bdev *raid_bdev_gen;
1048 	struct raid_bdev_module *module;
1049 
1050 	module = raid_bdev_module_find(raid_cfg->level);
1051 	if (module == NULL) {
1052 		SPDK_ERRLOG("Unsupported raid level '%d'\n", raid_cfg->level);
1053 		return -EINVAL;
1054 	}
1055 
1056 	assert(module->base_bdevs_min != 0);
1057 	if (raid_cfg->num_base_bdevs < module->base_bdevs_min) {
1058 		SPDK_ERRLOG("At least %u base devices required for %s\n",
1059 			    module->base_bdevs_min,
1060 			    raid_bdev_level_to_str(raid_cfg->level));
1061 		return -EINVAL;
1062 	}
1063 
1064 	raid_bdev = calloc(1, sizeof(*raid_bdev));
1065 	if (!raid_bdev) {
1066 		SPDK_ERRLOG("Unable to allocate memory for raid bdev\n");
1067 		return -ENOMEM;
1068 	}
1069 
1070 	raid_bdev->module = module;
1071 	raid_bdev->num_base_bdevs = raid_cfg->num_base_bdevs;
1072 	raid_bdev->base_bdev_info = calloc(raid_bdev->num_base_bdevs,
1073 					   sizeof(struct raid_base_bdev_info));
1074 	if (!raid_bdev->base_bdev_info) {
1075 		SPDK_ERRLOG("Unable able to allocate base bdev info\n");
1076 		free(raid_bdev);
1077 		return -ENOMEM;
1078 	}
1079 
1080 	/* strip_size_kb is from the rpc param.  strip_size is in blocks and used
1081 	 * internally and set later.
1082 	 */
1083 	raid_bdev->strip_size = 0;
1084 	raid_bdev->strip_size_kb = raid_cfg->strip_size;
1085 	raid_bdev->state = RAID_BDEV_STATE_CONFIGURING;
1086 	raid_bdev->config = raid_cfg;
1087 	raid_bdev->level = raid_cfg->level;
1088 
1089 	raid_bdev_gen = &raid_bdev->bdev;
1090 
1091 	raid_bdev_gen->name = strdup(raid_cfg->name);
1092 	if (!raid_bdev_gen->name) {
1093 		SPDK_ERRLOG("Unable to allocate name for raid\n");
1094 		free(raid_bdev->base_bdev_info);
1095 		free(raid_bdev);
1096 		return -ENOMEM;
1097 	}
1098 
1099 	raid_bdev_gen->product_name = "Raid Volume";
1100 	raid_bdev_gen->ctxt = raid_bdev;
1101 	raid_bdev_gen->fn_table = &g_raid_bdev_fn_table;
1102 	raid_bdev_gen->module = &g_raid_if;
1103 	raid_bdev_gen->write_cache = 0;
1104 
1105 	TAILQ_INSERT_TAIL(&g_raid_bdev_configuring_list, raid_bdev, state_link);
1106 	TAILQ_INSERT_TAIL(&g_raid_bdev_list, raid_bdev, global_link);
1107 
1108 	raid_cfg->raid_bdev = raid_bdev;
1109 
1110 	return 0;
1111 }
1112 
1113 /*
1114  * brief
1115  * raid_bdev_alloc_base_bdev_resource allocates resource of base bdev.
1116  * params:
1117  * raid_bdev - pointer to raid bdev
1118  * bdev_name - base bdev name
1119  * base_bdev_slot - position to add base bdev
1120  * returns:
1121  * 0 - success
1122  * non zero - failure
1123  */
1124 static int
1125 raid_bdev_alloc_base_bdev_resource(struct raid_bdev *raid_bdev, const char *bdev_name,
1126 				   uint8_t base_bdev_slot)
1127 {
1128 	struct spdk_bdev_desc *desc;
1129 	struct spdk_bdev *bdev;
1130 	int rc;
1131 
1132 	rc = spdk_bdev_open_ext(bdev_name, true, raid_bdev_event_base_bdev, NULL, &desc);
1133 	if (rc != 0) {
1134 		if (rc != -ENODEV) {
1135 			SPDK_ERRLOG("Unable to create desc on bdev '%s'\n", bdev_name);
1136 		}
1137 		return rc;
1138 	}
1139 
1140 	bdev = spdk_bdev_desc_get_bdev(desc);
1141 
1142 	rc = spdk_bdev_module_claim_bdev(bdev, NULL, &g_raid_if);
1143 	if (rc != 0) {
1144 		SPDK_ERRLOG("Unable to claim this bdev as it is already claimed\n");
1145 		spdk_bdev_close(desc);
1146 		return rc;
1147 	}
1148 
1149 	SPDK_DEBUGLOG(bdev_raid, "bdev %s is claimed\n", bdev_name);
1150 
1151 	assert(raid_bdev->state != RAID_BDEV_STATE_ONLINE);
1152 	assert(base_bdev_slot < raid_bdev->num_base_bdevs);
1153 
1154 	raid_bdev->base_bdev_info[base_bdev_slot].thread = spdk_get_thread();
1155 	raid_bdev->base_bdev_info[base_bdev_slot].bdev = bdev;
1156 	raid_bdev->base_bdev_info[base_bdev_slot].desc = desc;
1157 	raid_bdev->num_base_bdevs_discovered++;
1158 	assert(raid_bdev->num_base_bdevs_discovered <= raid_bdev->num_base_bdevs);
1159 
1160 	return 0;
1161 }
1162 
1163 /*
1164  * brief:
1165  * If raid bdev config is complete, then only register the raid bdev to
1166  * bdev layer and remove this raid bdev from configuring list and
1167  * insert the raid bdev to configured list
1168  * params:
1169  * raid_bdev - pointer to raid bdev
1170  * returns:
1171  * 0 - success
1172  * non zero - failure
1173  */
1174 static int
1175 raid_bdev_configure(struct raid_bdev *raid_bdev)
1176 {
1177 	uint32_t blocklen = 0;
1178 	struct spdk_bdev *raid_bdev_gen;
1179 	struct raid_base_bdev_info *base_info;
1180 	int rc = 0;
1181 
1182 	assert(raid_bdev->state == RAID_BDEV_STATE_CONFIGURING);
1183 	assert(raid_bdev->num_base_bdevs_discovered == raid_bdev->num_base_bdevs);
1184 
1185 	RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
1186 		/* Check blocklen for all base bdevs that it should be same */
1187 		if (blocklen == 0) {
1188 			blocklen = base_info->bdev->blocklen;
1189 		} else if (blocklen != base_info->bdev->blocklen) {
1190 			/*
1191 			 * Assumption is that all the base bdevs for any raid bdev should
1192 			 * have same blocklen
1193 			 */
1194 			SPDK_ERRLOG("Blocklen of various bdevs not matching\n");
1195 			return -EINVAL;
1196 		}
1197 	}
1198 	assert(blocklen > 0);
1199 
1200 	/* The strip_size_kb is read in from user in KB. Convert to blocks here for
1201 	 * internal use.
1202 	 */
1203 	raid_bdev->strip_size = (raid_bdev->strip_size_kb * 1024) / blocklen;
1204 	raid_bdev->strip_size_shift = spdk_u32log2(raid_bdev->strip_size);
1205 	raid_bdev->blocklen_shift = spdk_u32log2(blocklen);
1206 
1207 	raid_bdev_gen = &raid_bdev->bdev;
1208 	raid_bdev_gen->blocklen = blocklen;
1209 
1210 	rc = raid_bdev->module->start(raid_bdev);
1211 	if (rc != 0) {
1212 		SPDK_ERRLOG("raid module startup callback failed\n");
1213 		return rc;
1214 	}
1215 	raid_bdev->state = RAID_BDEV_STATE_ONLINE;
1216 	SPDK_DEBUGLOG(bdev_raid, "io device register %p\n", raid_bdev);
1217 	SPDK_DEBUGLOG(bdev_raid, "blockcnt %" PRIu64 ", blocklen %u\n",
1218 		      raid_bdev_gen->blockcnt, raid_bdev_gen->blocklen);
1219 	spdk_io_device_register(raid_bdev, raid_bdev_create_cb, raid_bdev_destroy_cb,
1220 				sizeof(struct raid_bdev_io_channel),
1221 				raid_bdev->bdev.name);
1222 	rc = spdk_bdev_register(raid_bdev_gen);
1223 	if (rc != 0) {
1224 		SPDK_ERRLOG("Unable to register raid bdev and stay at configuring state\n");
1225 		if (raid_bdev->module->stop != NULL) {
1226 			raid_bdev->module->stop(raid_bdev);
1227 		}
1228 		spdk_io_device_unregister(raid_bdev, NULL);
1229 		raid_bdev->state = RAID_BDEV_STATE_CONFIGURING;
1230 		return rc;
1231 	}
1232 	SPDK_DEBUGLOG(bdev_raid, "raid bdev generic %p\n", raid_bdev_gen);
1233 	TAILQ_REMOVE(&g_raid_bdev_configuring_list, raid_bdev, state_link);
1234 	TAILQ_INSERT_TAIL(&g_raid_bdev_configured_list, raid_bdev, state_link);
1235 	SPDK_DEBUGLOG(bdev_raid, "raid bdev is created with name %s, raid_bdev %p\n",
1236 		      raid_bdev_gen->name, raid_bdev);
1237 
1238 	return 0;
1239 }
1240 
1241 /*
1242  * brief:
1243  * If raid bdev is online and registered, change the bdev state to
1244  * configuring and unregister this raid device. Queue this raid device
1245  * in configuring list
1246  * params:
1247  * raid_bdev - pointer to raid bdev
1248  * cb_fn - callback function
1249  * cb_arg - argument to callback function
1250  * returns:
1251  * none
1252  */
1253 static void
1254 raid_bdev_deconfigure(struct raid_bdev *raid_bdev, raid_bdev_destruct_cb cb_fn,
1255 		      void *cb_arg)
1256 {
1257 	if (raid_bdev->state != RAID_BDEV_STATE_ONLINE) {
1258 		if (cb_fn) {
1259 			cb_fn(cb_arg, 0);
1260 		}
1261 		return;
1262 	}
1263 
1264 	assert(raid_bdev->num_base_bdevs == raid_bdev->num_base_bdevs_discovered);
1265 	TAILQ_REMOVE(&g_raid_bdev_configured_list, raid_bdev, state_link);
1266 	raid_bdev->state = RAID_BDEV_STATE_OFFLINE;
1267 	assert(raid_bdev->num_base_bdevs_discovered);
1268 	TAILQ_INSERT_TAIL(&g_raid_bdev_offline_list, raid_bdev, state_link);
1269 	SPDK_DEBUGLOG(bdev_raid, "raid bdev state changing from online to offline\n");
1270 
1271 	spdk_bdev_unregister(&raid_bdev->bdev, cb_fn, cb_arg);
1272 }
1273 
1274 /*
1275  * brief:
1276  * raid_bdev_find_by_base_bdev function finds the raid bdev which has
1277  *  claimed the base bdev.
1278  * params:
1279  * base_bdev - pointer to base bdev pointer
1280  * _raid_bdev - Reference to pointer to raid bdev
1281  * _base_info - Reference to the raid base bdev info.
1282  * returns:
1283  * true - if the raid bdev is found.
1284  * false - if the raid bdev is not found.
1285  */
1286 static bool
1287 raid_bdev_find_by_base_bdev(struct spdk_bdev *base_bdev, struct raid_bdev **_raid_bdev,
1288 			    struct raid_base_bdev_info **_base_info)
1289 {
1290 	struct raid_bdev *raid_bdev;
1291 	struct raid_base_bdev_info *base_info;
1292 
1293 	TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) {
1294 		RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
1295 			if (base_info->bdev == base_bdev) {
1296 				*_raid_bdev = raid_bdev;
1297 				*_base_info = base_info;
1298 				return true;
1299 			}
1300 		}
1301 	}
1302 
1303 	return false;
1304 }
1305 
1306 /*
1307  * brief:
1308  * raid_bdev_remove_base_bdev function is called by below layers when base_bdev
1309  * is removed. This function checks if this base bdev is part of any raid bdev
1310  * or not. If yes, it takes necessary action on that particular raid bdev.
1311  * params:
1312  * base_bdev - pointer to base bdev pointer which got removed
1313  * returns:
1314  * none
1315  */
1316 static void
1317 raid_bdev_remove_base_bdev(struct spdk_bdev *base_bdev)
1318 {
1319 	struct raid_bdev	*raid_bdev = NULL;
1320 	struct raid_base_bdev_info *base_info;
1321 
1322 	SPDK_DEBUGLOG(bdev_raid, "raid_bdev_remove_base_bdev\n");
1323 
1324 	/* Find the raid_bdev which has claimed this base_bdev */
1325 	if (!raid_bdev_find_by_base_bdev(base_bdev, &raid_bdev, &base_info)) {
1326 		SPDK_ERRLOG("bdev to remove '%s' not found\n", base_bdev->name);
1327 		return;
1328 	}
1329 
1330 	assert(base_info->desc);
1331 	base_info->remove_scheduled = true;
1332 
1333 	if (raid_bdev->destruct_called == true ||
1334 	    raid_bdev->state == RAID_BDEV_STATE_CONFIGURING) {
1335 		/*
1336 		 * As raid bdev is not registered yet or already unregistered,
1337 		 * so cleanup should be done here itself.
1338 		 */
1339 		raid_bdev_free_base_bdev_resource(raid_bdev, base_info);
1340 		if (raid_bdev->num_base_bdevs_discovered == 0) {
1341 			/* There is no base bdev for this raid, so free the raid device. */
1342 			raid_bdev_cleanup(raid_bdev);
1343 			return;
1344 		}
1345 	}
1346 
1347 	raid_bdev_deconfigure(raid_bdev, NULL, NULL);
1348 }
1349 
1350 /*
1351  * brief:
1352  * raid_bdev_event_base_bdev function is called by below layers when base_bdev
1353  * triggers asynchronous event.
1354  * params:
1355  * type - event details.
1356  * bdev - bdev that triggered event.
1357  * event_ctx - context for event.
1358  * returns:
1359  * none
1360  */
1361 static void
1362 raid_bdev_event_base_bdev(enum spdk_bdev_event_type type, struct spdk_bdev *bdev,
1363 			  void *event_ctx)
1364 {
1365 	switch (type) {
1366 	case SPDK_BDEV_EVENT_REMOVE:
1367 		raid_bdev_remove_base_bdev(bdev);
1368 		break;
1369 	default:
1370 		SPDK_NOTICELOG("Unsupported bdev event: type %d\n", type);
1371 		break;
1372 	}
1373 }
1374 
1375 /*
1376  * brief:
1377  * Remove base bdevs from the raid bdev one by one.  Skip any base bdev which
1378  *  doesn't exist.
1379  * params:
1380  * raid_cfg - pointer to raid bdev config.
1381  * cb_fn - callback function
1382  * cb_ctx - argument to callback function
1383  */
1384 void
1385 raid_bdev_remove_base_devices(struct raid_bdev_config *raid_cfg,
1386 			      raid_bdev_destruct_cb cb_fn, void *cb_arg)
1387 {
1388 	struct raid_bdev		*raid_bdev;
1389 	struct raid_base_bdev_info	*base_info;
1390 
1391 	SPDK_DEBUGLOG(bdev_raid, "raid_bdev_remove_base_devices\n");
1392 
1393 	raid_bdev = raid_cfg->raid_bdev;
1394 	if (raid_bdev == NULL) {
1395 		SPDK_DEBUGLOG(bdev_raid, "raid bdev %s doesn't exist now\n", raid_cfg->name);
1396 		if (cb_fn) {
1397 			cb_fn(cb_arg, 0);
1398 		}
1399 		return;
1400 	}
1401 
1402 	if (raid_bdev->destroy_started) {
1403 		SPDK_DEBUGLOG(bdev_raid, "destroying raid bdev %s is already started\n",
1404 			      raid_cfg->name);
1405 		if (cb_fn) {
1406 			cb_fn(cb_arg, -EALREADY);
1407 		}
1408 		return;
1409 	}
1410 
1411 	raid_bdev->destroy_started = true;
1412 
1413 	RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
1414 		if (base_info->bdev == NULL) {
1415 			continue;
1416 		}
1417 
1418 		assert(base_info->desc);
1419 		base_info->remove_scheduled = true;
1420 
1421 		if (raid_bdev->destruct_called == true ||
1422 		    raid_bdev->state == RAID_BDEV_STATE_CONFIGURING) {
1423 			/*
1424 			 * As raid bdev is not registered yet or already unregistered,
1425 			 * so cleanup should be done here itself.
1426 			 */
1427 			raid_bdev_free_base_bdev_resource(raid_bdev, base_info);
1428 		}
1429 	}
1430 
1431 	if (raid_bdev->num_base_bdevs_discovered == 0) {
1432 		/* There is no base bdev for this raid, so free the raid device. */
1433 		raid_bdev_cleanup(raid_bdev);
1434 		if (cb_fn) {
1435 			cb_fn(cb_arg, 0);
1436 		}
1437 	} else {
1438 		raid_bdev_deconfigure(raid_bdev, cb_fn, cb_arg);
1439 	}
1440 }
1441 
1442 /*
1443  * brief:
1444  * raid_bdev_add_base_device function is the actual function which either adds
1445  * the nvme base device to existing raid bdev or create a new raid bdev. It also claims
1446  * the base device and keep the open descriptor.
1447  * params:
1448  * raid_cfg - pointer to raid bdev config
1449  * bdev - pointer to base bdev
1450  * base_bdev_slot - position to add base bdev
1451  * returns:
1452  * 0 - success
1453  * non zero - failure
1454  */
1455 static int
1456 raid_bdev_add_base_device(struct raid_bdev_config *raid_cfg, const char *bdev_name,
1457 			  uint8_t base_bdev_slot)
1458 {
1459 	struct raid_bdev	*raid_bdev;
1460 	int			rc;
1461 
1462 	raid_bdev = raid_cfg->raid_bdev;
1463 	if (!raid_bdev) {
1464 		SPDK_ERRLOG("Raid bdev '%s' is not created yet\n", raid_cfg->name);
1465 		return -ENODEV;
1466 	}
1467 
1468 	rc = raid_bdev_alloc_base_bdev_resource(raid_bdev, bdev_name, base_bdev_slot);
1469 	if (rc != 0) {
1470 		if (rc != -ENODEV) {
1471 			SPDK_ERRLOG("Failed to allocate resource for bdev '%s'\n", bdev_name);
1472 		}
1473 		return rc;
1474 	}
1475 
1476 	assert(raid_bdev->num_base_bdevs_discovered <= raid_bdev->num_base_bdevs);
1477 
1478 	if (raid_bdev->num_base_bdevs_discovered == raid_bdev->num_base_bdevs) {
1479 		rc = raid_bdev_configure(raid_bdev);
1480 		if (rc != 0) {
1481 			SPDK_ERRLOG("Failed to configure raid bdev\n");
1482 			return rc;
1483 		}
1484 	}
1485 
1486 	return 0;
1487 }
1488 
1489 /*
1490  * brief:
1491  * Add base bdevs to the raid bdev one by one.  Skip any base bdev which doesn't
1492  *  exist or fails to add. If all base bdevs are successfully added, the raid bdev
1493  *  moves to the configured state and becomes available. Otherwise, the raid bdev
1494  *  stays at the configuring state with added base bdevs.
1495  * params:
1496  * raid_cfg - pointer to raid bdev config
1497  * returns:
1498  * 0 - The raid bdev moves to the configured state or stays at the configuring
1499  *     state with added base bdevs due to any nonexistent base bdev.
1500  * non zero - Failed to add any base bdev and stays at the configuring state with
1501  *            added base bdevs.
1502  */
1503 int
1504 raid_bdev_add_base_devices(struct raid_bdev_config *raid_cfg)
1505 {
1506 	uint8_t	i;
1507 	int	rc = 0, _rc;
1508 
1509 	for (i = 0; i < raid_cfg->num_base_bdevs; i++) {
1510 		_rc = raid_bdev_add_base_device(raid_cfg, raid_cfg->base_bdev[i].name, i);
1511 		if (_rc == -ENODEV) {
1512 			SPDK_DEBUGLOG(bdev_raid, "base bdev %s doesn't exist now\n",
1513 				      raid_cfg->base_bdev[i].name);
1514 		} else if (_rc != 0) {
1515 			SPDK_ERRLOG("Failed to add base bdev %s to RAID bdev %s: %s\n",
1516 				    raid_cfg->base_bdev[i].name, raid_cfg->name,
1517 				    spdk_strerror(-_rc));
1518 			if (rc == 0) {
1519 				rc = _rc;
1520 			}
1521 		}
1522 	}
1523 
1524 	return rc;
1525 }
1526 
1527 /*
1528  * brief:
1529  * raid_bdev_examine function is the examine function call by the below layers
1530  * like bdev_nvme layer. This function will check if this base bdev can be
1531  * claimed by this raid bdev or not.
1532  * params:
1533  * bdev - pointer to base bdev
1534  * returns:
1535  * none
1536  */
1537 static void
1538 raid_bdev_examine(struct spdk_bdev *bdev)
1539 {
1540 	struct raid_bdev_config	*raid_cfg;
1541 	uint8_t			base_bdev_slot;
1542 
1543 	if (raid_bdev_can_claim_bdev(bdev->name, &raid_cfg, &base_bdev_slot)) {
1544 		raid_bdev_add_base_device(raid_cfg, bdev->name, base_bdev_slot);
1545 	} else {
1546 		SPDK_DEBUGLOG(bdev_raid, "bdev %s can't be claimed\n",
1547 			      bdev->name);
1548 	}
1549 
1550 	spdk_bdev_module_examine_done(&g_raid_if);
1551 }
1552 
1553 /* Log component for bdev raid bdev module */
1554 SPDK_LOG_REGISTER_COMPONENT(bdev_raid)
1555