xref: /spdk/module/bdev/raid/bdev_raid.c (revision 712a3f69d32632bf6c862f00200f7f437d3f7529)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "bdev_raid.h"
35 #include "spdk/env.h"
36 #include "spdk/io_channel.h"
37 #include "spdk/conf.h"
38 #include "spdk_internal/log.h"
39 #include "spdk/string.h"
40 #include "spdk/util.h"
41 #include "spdk/json.h"
42 #include "spdk/string.h"
43 
44 static bool g_shutdown_started = false;
45 
46 /* raid bdev config as read from config file */
47 struct raid_config	g_raid_config = {
48 	.raid_bdev_config_head = TAILQ_HEAD_INITIALIZER(g_raid_config.raid_bdev_config_head),
49 };
50 
51 /*
52  * List of raid bdev in configured list, these raid bdevs are registered with
53  * bdev layer
54  */
55 struct raid_configured_tailq	g_raid_bdev_configured_list = TAILQ_HEAD_INITIALIZER(
56 			g_raid_bdev_configured_list);
57 
58 /* List of raid bdev in configuring list */
59 struct raid_configuring_tailq	g_raid_bdev_configuring_list = TAILQ_HEAD_INITIALIZER(
60 			g_raid_bdev_configuring_list);
61 
62 /* List of all raid bdevs */
63 struct raid_all_tailq		g_raid_bdev_list = TAILQ_HEAD_INITIALIZER(g_raid_bdev_list);
64 
65 /* List of all raid bdevs that are offline */
66 struct raid_offline_tailq	g_raid_bdev_offline_list = TAILQ_HEAD_INITIALIZER(
67 			g_raid_bdev_offline_list);
68 
69 static TAILQ_HEAD(, raid_bdev_module) g_raid_modules = TAILQ_HEAD_INITIALIZER(g_raid_modules);
70 
71 static struct raid_bdev_module *raid_bdev_module_find(enum raid_level level)
72 {
73 	struct raid_bdev_module *raid_module;
74 
75 	TAILQ_FOREACH(raid_module, &g_raid_modules, link) {
76 		if (raid_module->level == level) {
77 			return raid_module;
78 		}
79 	}
80 
81 	return NULL;
82 }
83 
84 void raid_bdev_module_list_add(struct raid_bdev_module *raid_module)
85 {
86 	if (raid_bdev_module_find(raid_module->level) != NULL) {
87 		SPDK_ERRLOG("module for raid level '%s' already registered.\n",
88 			    raid_bdev_level_to_str(raid_module->level));
89 		assert(false);
90 	} else {
91 		TAILQ_INSERT_TAIL(&g_raid_modules, raid_module, link);
92 	}
93 }
94 
95 /* Function declarations */
96 static void	raid_bdev_examine(struct spdk_bdev *bdev);
97 static int	raid_bdev_init(void);
98 static void	raid_bdev_deconfigure(struct raid_bdev *raid_bdev,
99 				      raid_bdev_destruct_cb cb_fn, void *cb_arg);
100 static void	raid_bdev_remove_base_bdev(void *ctx);
101 
102 /*
103  * brief:
104  * raid_bdev_create_cb function is a cb function for raid bdev which creates the
105  * hierarchy from raid bdev to base bdev io channels. It will be called per core
106  * params:
107  * io_device - pointer to raid bdev io device represented by raid_bdev
108  * ctx_buf - pointer to context buffer for raid bdev io channel
109  * returns:
110  * 0 - success
111  * non zero - failure
112  */
113 static int
114 raid_bdev_create_cb(void *io_device, void *ctx_buf)
115 {
116 	struct raid_bdev            *raid_bdev = io_device;
117 	struct raid_bdev_io_channel *raid_ch = ctx_buf;
118 	uint8_t i;
119 
120 	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_create_cb, %p\n", raid_ch);
121 
122 	assert(raid_bdev != NULL);
123 	assert(raid_bdev->state == RAID_BDEV_STATE_ONLINE);
124 
125 	raid_ch->num_channels = raid_bdev->num_base_bdevs;
126 
127 	raid_ch->base_channel = calloc(raid_ch->num_channels,
128 				       sizeof(struct spdk_io_channel *));
129 	if (!raid_ch->base_channel) {
130 		SPDK_ERRLOG("Unable to allocate base bdevs io channel\n");
131 		return -ENOMEM;
132 	}
133 	for (i = 0; i < raid_ch->num_channels; i++) {
134 		/*
135 		 * Get the spdk_io_channel for all the base bdevs. This is used during
136 		 * split logic to send the respective child bdev ios to respective base
137 		 * bdev io channel.
138 		 */
139 		raid_ch->base_channel[i] = spdk_bdev_get_io_channel(
140 						   raid_bdev->base_bdev_info[i].desc);
141 		if (!raid_ch->base_channel[i]) {
142 			uint8_t j;
143 
144 			for (j = 0; j < i; j++) {
145 				spdk_put_io_channel(raid_ch->base_channel[j]);
146 			}
147 			free(raid_ch->base_channel);
148 			raid_ch->base_channel = NULL;
149 			SPDK_ERRLOG("Unable to create io channel for base bdev\n");
150 			return -ENOMEM;
151 		}
152 	}
153 
154 	return 0;
155 }
156 
157 /*
158  * brief:
159  * raid_bdev_destroy_cb function is a cb function for raid bdev which deletes the
160  * hierarchy from raid bdev to base bdev io channels. It will be called per core
161  * params:
162  * io_device - pointer to raid bdev io device represented by raid_bdev
163  * ctx_buf - pointer to context buffer for raid bdev io channel
164  * returns:
165  * none
166  */
167 static void
168 raid_bdev_destroy_cb(void *io_device, void *ctx_buf)
169 {
170 	struct raid_bdev_io_channel *raid_ch = ctx_buf;
171 	uint8_t i;
172 
173 	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_destroy_cb\n");
174 
175 	assert(raid_ch != NULL);
176 	assert(raid_ch->base_channel);
177 	for (i = 0; i < raid_ch->num_channels; i++) {
178 		/* Free base bdev channels */
179 		assert(raid_ch->base_channel[i] != NULL);
180 		spdk_put_io_channel(raid_ch->base_channel[i]);
181 	}
182 	free(raid_ch->base_channel);
183 	raid_ch->base_channel = NULL;
184 }
185 
186 /*
187  * brief:
188  * raid_bdev_cleanup is used to cleanup and free raid_bdev related data
189  * structures.
190  * params:
191  * raid_bdev - pointer to raid_bdev
192  * returns:
193  * none
194  */
195 static void
196 raid_bdev_cleanup(struct raid_bdev *raid_bdev)
197 {
198 	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_cleanup, %p name %s, state %u, config %p\n",
199 		      raid_bdev,
200 		      raid_bdev->bdev.name, raid_bdev->state, raid_bdev->config);
201 	if (raid_bdev->state == RAID_BDEV_STATE_CONFIGURING) {
202 		TAILQ_REMOVE(&g_raid_bdev_configuring_list, raid_bdev, state_link);
203 	} else if (raid_bdev->state == RAID_BDEV_STATE_OFFLINE) {
204 		TAILQ_REMOVE(&g_raid_bdev_offline_list, raid_bdev, state_link);
205 	} else {
206 		assert(0);
207 	}
208 	TAILQ_REMOVE(&g_raid_bdev_list, raid_bdev, global_link);
209 	free(raid_bdev->bdev.name);
210 	free(raid_bdev->base_bdev_info);
211 	if (raid_bdev->config) {
212 		raid_bdev->config->raid_bdev = NULL;
213 	}
214 	free(raid_bdev);
215 }
216 
217 /*
218  * brief:
219  * free resource of base bdev for raid bdev
220  * params:
221  * raid_bdev - pointer to raid bdev
222  * base_info - raid base bdev info
223  * returns:
224  * 0 - success
225  * non zero - failure
226  */
227 static void
228 raid_bdev_free_base_bdev_resource(struct raid_bdev *raid_bdev,
229 				  struct raid_base_bdev_info *base_info)
230 {
231 	spdk_bdev_module_release_bdev(base_info->bdev);
232 	spdk_bdev_close(base_info->desc);
233 	base_info->desc = NULL;
234 	base_info->bdev = NULL;
235 
236 	assert(raid_bdev->num_base_bdevs_discovered);
237 	raid_bdev->num_base_bdevs_discovered--;
238 }
239 
240 /*
241  * brief:
242  * raid_bdev_destruct is the destruct function table pointer for raid bdev
243  * params:
244  * ctxt - pointer to raid_bdev
245  * returns:
246  * 0 - success
247  * non zero - failure
248  */
249 static int
250 raid_bdev_destruct(void *ctxt)
251 {
252 	struct raid_bdev *raid_bdev = ctxt;
253 	struct raid_base_bdev_info *base_info;
254 
255 	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_destruct\n");
256 
257 	raid_bdev->destruct_called = true;
258 	RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
259 		/*
260 		 * Close all base bdev descriptors for which call has come from below
261 		 * layers.  Also close the descriptors if we have started shutdown.
262 		 */
263 		if (g_shutdown_started ||
264 		    ((base_info->remove_scheduled == true) &&
265 		     (base_info->bdev != NULL))) {
266 			raid_bdev_free_base_bdev_resource(raid_bdev, base_info);
267 		}
268 	}
269 
270 	if (g_shutdown_started) {
271 		TAILQ_REMOVE(&g_raid_bdev_configured_list, raid_bdev, state_link);
272 		if (raid_bdev->module->stop != NULL) {
273 			raid_bdev->module->stop(raid_bdev);
274 		}
275 		raid_bdev->state = RAID_BDEV_STATE_OFFLINE;
276 		TAILQ_INSERT_TAIL(&g_raid_bdev_offline_list, raid_bdev, state_link);
277 	}
278 
279 	spdk_io_device_unregister(raid_bdev, NULL);
280 
281 	if (raid_bdev->num_base_bdevs_discovered == 0) {
282 		/* Free raid_bdev when there are no base bdevs left */
283 		SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid bdev base bdevs is 0, going to free all in destruct\n");
284 		raid_bdev_cleanup(raid_bdev);
285 	}
286 
287 	return 0;
288 }
289 
290 void
291 raid_bdev_io_complete(struct raid_bdev_io *raid_io, enum spdk_bdev_io_status status)
292 {
293 	struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io);
294 
295 	spdk_bdev_io_complete(bdev_io, status);
296 }
297 
298 /*
299  * brief:
300  * raid_bdev_base_io_completion is the completion callback for member disk requests
301  * params:
302  * bdev_io - pointer to member disk requested bdev_io
303  * success - true if successful, false if unsuccessful
304  * cb_arg - callback argument (parent raid_bdev_io)
305  * returns:
306  * none
307  */
308 void
309 raid_bdev_base_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
310 {
311 	struct raid_bdev_io *raid_io = cb_arg;
312 
313 	spdk_bdev_free_io(bdev_io);
314 
315 	if (!success) {
316 		raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_FAILED;
317 	}
318 
319 	raid_io->base_bdev_io_completed++;
320 	if (raid_io->base_bdev_io_completed == raid_io->base_bdev_io_expected) {
321 		raid_bdev_io_complete(raid_io, raid_io->base_bdev_io_status);
322 	}
323 }
324 
325 /*
326  * brief:
327  * raid_bdev_queue_io_wait function processes the IO which failed to submit.
328  * It will try to queue the IOs after storing the context to bdev wait queue logic.
329  * params:
330  * raid_io - pointer to raid_bdev_io
331  * bdev - the block device that the IO is submitted to
332  * ch - io channel
333  * cb_fn - callback when the spdk_bdev_io for bdev becomes available
334  * returns:
335  * none
336  */
337 void
338 raid_bdev_queue_io_wait(struct raid_bdev_io *raid_io, struct spdk_bdev *bdev,
339 			struct spdk_io_channel *ch, spdk_bdev_io_wait_cb cb_fn)
340 {
341 	raid_io->waitq_entry.bdev = bdev;
342 	raid_io->waitq_entry.cb_fn = cb_fn;
343 	raid_io->waitq_entry.cb_arg = raid_io;
344 	spdk_bdev_queue_io_wait(bdev, ch, &raid_io->waitq_entry);
345 }
346 
347 static void
348 raid_bdev_submit_reset_request(struct raid_bdev_io *raid_io);
349 
350 static void
351 _raid_bdev_submit_reset_request(void *_raid_io)
352 {
353 	struct raid_bdev_io *raid_io = _raid_io;
354 
355 	raid_bdev_submit_reset_request(raid_io);
356 }
357 
358 /*
359  * brief:
360  * raid_bdev_submit_reset_request function submits reset requests
361  * to member disks; it will submit as many as possible unless a reset fails with -ENOMEM, in
362  * which case it will queue it for later submission
363  * params:
364  * raid_io
365  * returns:
366  * none
367  */
368 static void
369 raid_bdev_submit_reset_request(struct raid_bdev_io *raid_io)
370 {
371 	struct raid_bdev		*raid_bdev;
372 	int				ret;
373 	uint8_t				i;
374 	struct raid_base_bdev_info	*base_info;
375 	struct spdk_io_channel		*base_ch;
376 
377 	raid_bdev = raid_io->raid_bdev;
378 
379 	raid_io->base_bdev_io_expected = raid_bdev->num_base_bdevs;
380 
381 	while (raid_io->base_bdev_io_submitted < raid_bdev->num_base_bdevs) {
382 		i = raid_io->base_bdev_io_submitted;
383 		base_info = &raid_bdev->base_bdev_info[i];
384 		base_ch = raid_io->raid_ch->base_channel[i];
385 		ret = spdk_bdev_reset(base_info->desc, base_ch,
386 				      raid_bdev_base_io_completion, raid_io);
387 		if (ret == 0) {
388 			raid_io->base_bdev_io_submitted++;
389 		} else if (ret == -ENOMEM) {
390 			raid_bdev_queue_io_wait(raid_io, base_info->bdev, base_ch,
391 						_raid_bdev_submit_reset_request);
392 			return;
393 		} else {
394 			SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n");
395 			assert(false);
396 			raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
397 			return;
398 		}
399 	}
400 }
401 
402 /*
403  * brief:
404  * Callback function to spdk_bdev_io_get_buf.
405  * params:
406  * ch - pointer to raid bdev io channel
407  * bdev_io - pointer to parent bdev_io on raid bdev device
408  * success - True if buffer is allocated or false otherwise.
409  * returns:
410  * none
411  */
412 static void
413 raid_bdev_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
414 		     bool success)
415 {
416 	struct raid_bdev_io *raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx;
417 
418 	if (!success) {
419 		raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
420 		return;
421 	}
422 
423 	raid_io->raid_bdev->module->submit_rw_request(raid_io);
424 }
425 
426 /*
427  * brief:
428  * raid_bdev_submit_request function is the submit_request function pointer of
429  * raid bdev function table. This is used to submit the io on raid_bdev to below
430  * layers.
431  * params:
432  * ch - pointer to raid bdev io channel
433  * bdev_io - pointer to parent bdev_io on raid bdev device
434  * returns:
435  * none
436  */
437 static void
438 raid_bdev_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
439 {
440 	struct raid_bdev_io *raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx;
441 
442 	raid_io->raid_bdev = bdev_io->bdev->ctxt;
443 	raid_io->raid_ch = spdk_io_channel_get_ctx(ch);
444 	raid_io->base_bdev_io_submitted = 0;
445 	raid_io->base_bdev_io_completed = 0;
446 	raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_SUCCESS;
447 
448 	switch (bdev_io->type) {
449 	case SPDK_BDEV_IO_TYPE_READ:
450 		spdk_bdev_io_get_buf(bdev_io, raid_bdev_get_buf_cb,
451 				     bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
452 		break;
453 	case SPDK_BDEV_IO_TYPE_WRITE:
454 		raid_io->raid_bdev->module->submit_rw_request(raid_io);
455 		break;
456 
457 	case SPDK_BDEV_IO_TYPE_RESET:
458 		raid_bdev_submit_reset_request(raid_io);
459 		break;
460 
461 	case SPDK_BDEV_IO_TYPE_FLUSH:
462 	case SPDK_BDEV_IO_TYPE_UNMAP:
463 		raid_io->raid_bdev->module->submit_null_payload_request(raid_io);
464 		break;
465 
466 	default:
467 		SPDK_ERRLOG("submit request, invalid io type %u\n", bdev_io->type);
468 		raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
469 		break;
470 	}
471 }
472 
473 /*
474  * brief:
475  * _raid_bdev_io_type_supported checks whether io_type is supported in
476  * all base bdev modules of raid bdev module. If anyone among the base_bdevs
477  * doesn't support, the raid device doesn't supports.
478  *
479  * params:
480  * raid_bdev - pointer to raid bdev context
481  * io_type - io type
482  * returns:
483  * true - io_type is supported
484  * false - io_type is not supported
485  */
486 inline static bool
487 _raid_bdev_io_type_supported(struct raid_bdev *raid_bdev, enum spdk_bdev_io_type io_type)
488 {
489 	struct raid_base_bdev_info *base_info;
490 
491 	if (io_type == SPDK_BDEV_IO_TYPE_FLUSH ||
492 	    io_type == SPDK_BDEV_IO_TYPE_UNMAP) {
493 		if (raid_bdev->module->submit_null_payload_request == NULL) {
494 			return false;
495 		}
496 	}
497 
498 	RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
499 		if (base_info->bdev == NULL) {
500 			assert(false);
501 			continue;
502 		}
503 
504 		if (spdk_bdev_io_type_supported(base_info->bdev, io_type) == false) {
505 			return false;
506 		}
507 	}
508 
509 	return true;
510 }
511 
512 /*
513  * brief:
514  * raid_bdev_io_type_supported is the io_supported function for bdev function
515  * table which returns whether the particular io type is supported or not by
516  * raid bdev module
517  * params:
518  * ctx - pointer to raid bdev context
519  * type - io type
520  * returns:
521  * true - io_type is supported
522  * false - io_type is not supported
523  */
524 static bool
525 raid_bdev_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
526 {
527 	switch (io_type) {
528 	case SPDK_BDEV_IO_TYPE_READ:
529 	case SPDK_BDEV_IO_TYPE_WRITE:
530 		return true;
531 
532 	case SPDK_BDEV_IO_TYPE_FLUSH:
533 	case SPDK_BDEV_IO_TYPE_RESET:
534 	case SPDK_BDEV_IO_TYPE_UNMAP:
535 		return _raid_bdev_io_type_supported(ctx, io_type);
536 
537 	default:
538 		return false;
539 	}
540 
541 	return false;
542 }
543 
544 /*
545  * brief:
546  * raid_bdev_get_io_channel is the get_io_channel function table pointer for
547  * raid bdev. This is used to return the io channel for this raid bdev
548  * params:
549  * ctxt - pointer to raid_bdev
550  * returns:
551  * pointer to io channel for raid bdev
552  */
553 static struct spdk_io_channel *
554 raid_bdev_get_io_channel(void *ctxt)
555 {
556 	struct raid_bdev *raid_bdev = ctxt;
557 
558 	return spdk_get_io_channel(raid_bdev);
559 }
560 
561 /*
562  * brief:
563  * raid_bdev_dump_info_json is the function table pointer for raid bdev
564  * params:
565  * ctx - pointer to raid_bdev
566  * w - pointer to json context
567  * returns:
568  * 0 - success
569  * non zero - failure
570  */
571 static int
572 raid_bdev_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
573 {
574 	struct raid_bdev *raid_bdev = ctx;
575 	struct raid_base_bdev_info *base_info;
576 
577 	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_dump_config_json\n");
578 	assert(raid_bdev != NULL);
579 
580 	/* Dump the raid bdev configuration related information */
581 	spdk_json_write_named_object_begin(w, "raid");
582 	spdk_json_write_named_uint32(w, "strip_size", raid_bdev->strip_size);
583 	spdk_json_write_named_uint32(w, "strip_size_kb", raid_bdev->strip_size_kb);
584 	spdk_json_write_named_uint32(w, "state", raid_bdev->state);
585 	spdk_json_write_named_string(w, "raid_level", raid_bdev_level_to_str(raid_bdev->level));
586 	spdk_json_write_named_uint32(w, "destruct_called", raid_bdev->destruct_called);
587 	spdk_json_write_named_uint32(w, "num_base_bdevs", raid_bdev->num_base_bdevs);
588 	spdk_json_write_named_uint32(w, "num_base_bdevs_discovered", raid_bdev->num_base_bdevs_discovered);
589 	spdk_json_write_name(w, "base_bdevs_list");
590 	spdk_json_write_array_begin(w);
591 	RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
592 		if (base_info->bdev) {
593 			spdk_json_write_string(w, base_info->bdev->name);
594 		} else {
595 			spdk_json_write_null(w);
596 		}
597 	}
598 	spdk_json_write_array_end(w);
599 	spdk_json_write_object_end(w);
600 
601 	return 0;
602 }
603 
604 /*
605  * brief:
606  * raid_bdev_write_config_json is the function table pointer for raid bdev
607  * params:
608  * bdev - pointer to spdk_bdev
609  * w - pointer to json context
610  * returns:
611  * none
612  */
613 static void
614 raid_bdev_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
615 {
616 	struct raid_bdev *raid_bdev = bdev->ctxt;
617 	struct raid_base_bdev_info *base_info;
618 
619 	spdk_json_write_object_begin(w);
620 
621 	spdk_json_write_named_string(w, "method", "bdev_raid_create");
622 
623 	spdk_json_write_named_object_begin(w, "params");
624 	spdk_json_write_named_string(w, "name", bdev->name);
625 	spdk_json_write_named_uint32(w, "strip_size", raid_bdev->strip_size_kb);
626 	spdk_json_write_named_string(w, "raid_level", raid_bdev_level_to_str(raid_bdev->level));
627 
628 	spdk_json_write_named_array_begin(w, "base_bdevs");
629 	RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
630 		if (base_info->bdev) {
631 			spdk_json_write_string(w, base_info->bdev->name);
632 		}
633 	}
634 	spdk_json_write_array_end(w);
635 	spdk_json_write_object_end(w);
636 
637 	spdk_json_write_object_end(w);
638 }
639 
640 /* g_raid_bdev_fn_table is the function table for raid bdev */
641 static const struct spdk_bdev_fn_table g_raid_bdev_fn_table = {
642 	.destruct		= raid_bdev_destruct,
643 	.submit_request		= raid_bdev_submit_request,
644 	.io_type_supported	= raid_bdev_io_type_supported,
645 	.get_io_channel		= raid_bdev_get_io_channel,
646 	.dump_info_json		= raid_bdev_dump_info_json,
647 	.write_config_json	= raid_bdev_write_config_json,
648 };
649 
650 /*
651  * brief:
652  * raid_bdev_config_cleanup function is used to free memory for one raid_bdev in configuration
653  * params:
654  * raid_cfg - pointer to raid_bdev_config structure
655  * returns:
656  * none
657  */
658 void
659 raid_bdev_config_cleanup(struct raid_bdev_config *raid_cfg)
660 {
661 	uint8_t i;
662 
663 	TAILQ_REMOVE(&g_raid_config.raid_bdev_config_head, raid_cfg, link);
664 	g_raid_config.total_raid_bdev--;
665 
666 	if (raid_cfg->base_bdev) {
667 		for (i = 0; i < raid_cfg->num_base_bdevs; i++) {
668 			free(raid_cfg->base_bdev[i].name);
669 		}
670 		free(raid_cfg->base_bdev);
671 	}
672 	free(raid_cfg->name);
673 	free(raid_cfg);
674 }
675 
676 /*
677  * brief:
678  * raid_bdev_free is the raid bdev function table function pointer. This is
679  * called on bdev free path
680  * params:
681  * none
682  * returns:
683  * none
684  */
685 static void
686 raid_bdev_free(void)
687 {
688 	struct raid_bdev_config *raid_cfg, *tmp;
689 
690 	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_free\n");
691 	TAILQ_FOREACH_SAFE(raid_cfg, &g_raid_config.raid_bdev_config_head, link, tmp) {
692 		raid_bdev_config_cleanup(raid_cfg);
693 	}
694 }
695 
696 /* brief
697  * raid_bdev_config_find_by_name is a helper function to find raid bdev config
698  * by name as key.
699  *
700  * params:
701  * raid_name - name for raid bdev.
702  */
703 struct raid_bdev_config *
704 raid_bdev_config_find_by_name(const char *raid_name)
705 {
706 	struct raid_bdev_config *raid_cfg;
707 
708 	TAILQ_FOREACH(raid_cfg, &g_raid_config.raid_bdev_config_head, link) {
709 		if (!strcmp(raid_cfg->name, raid_name)) {
710 			return raid_cfg;
711 		}
712 	}
713 
714 	return raid_cfg;
715 }
716 
717 /*
718  * brief
719  * raid_bdev_config_add function adds config for newly created raid bdev.
720  *
721  * params:
722  * raid_name - name for raid bdev.
723  * strip_size - strip size in KB
724  * num_base_bdevs - number of base bdevs.
725  * level - raid level.
726  * _raid_cfg - Pointer to newly added configuration
727  */
728 int
729 raid_bdev_config_add(const char *raid_name, uint32_t strip_size, uint8_t num_base_bdevs,
730 		     enum raid_level level, struct raid_bdev_config **_raid_cfg)
731 {
732 	struct raid_bdev_config *raid_cfg;
733 
734 	raid_cfg = raid_bdev_config_find_by_name(raid_name);
735 	if (raid_cfg != NULL) {
736 		SPDK_ERRLOG("Duplicate raid bdev name found in config file %s\n",
737 			    raid_name);
738 		return -EEXIST;
739 	}
740 
741 	if (spdk_u32_is_pow2(strip_size) == false) {
742 		SPDK_ERRLOG("Invalid strip size %" PRIu32 "\n", strip_size);
743 		return -EINVAL;
744 	}
745 
746 	if (num_base_bdevs == 0) {
747 		SPDK_ERRLOG("Invalid base device count %u\n", num_base_bdevs);
748 		return -EINVAL;
749 	}
750 
751 	raid_cfg = calloc(1, sizeof(*raid_cfg));
752 	if (raid_cfg == NULL) {
753 		SPDK_ERRLOG("unable to allocate memory\n");
754 		return -ENOMEM;
755 	}
756 
757 	raid_cfg->name = strdup(raid_name);
758 	if (!raid_cfg->name) {
759 		free(raid_cfg);
760 		SPDK_ERRLOG("unable to allocate memory\n");
761 		return -ENOMEM;
762 	}
763 	raid_cfg->strip_size = strip_size;
764 	raid_cfg->num_base_bdevs = num_base_bdevs;
765 	raid_cfg->level = level;
766 
767 	raid_cfg->base_bdev = calloc(num_base_bdevs, sizeof(*raid_cfg->base_bdev));
768 	if (raid_cfg->base_bdev == NULL) {
769 		free(raid_cfg->name);
770 		free(raid_cfg);
771 		SPDK_ERRLOG("unable to allocate memory\n");
772 		return -ENOMEM;
773 	}
774 
775 	TAILQ_INSERT_TAIL(&g_raid_config.raid_bdev_config_head, raid_cfg, link);
776 	g_raid_config.total_raid_bdev++;
777 
778 	*_raid_cfg = raid_cfg;
779 	return 0;
780 }
781 
782 /*
783  * brief:
784  * raid_bdev_config_add_base_bdev function add base bdev to raid bdev config.
785  *
786  * params:
787  * raid_cfg - pointer to raid bdev configuration
788  * base_bdev_name - name of base bdev
789  * slot - Position to add base bdev
790  */
791 int
792 raid_bdev_config_add_base_bdev(struct raid_bdev_config *raid_cfg, const char *base_bdev_name,
793 			       uint8_t slot)
794 {
795 	uint8_t i;
796 	struct raid_bdev_config *tmp;
797 
798 	if (slot >= raid_cfg->num_base_bdevs) {
799 		return -EINVAL;
800 	}
801 
802 	TAILQ_FOREACH(tmp, &g_raid_config.raid_bdev_config_head, link) {
803 		for (i = 0; i < tmp->num_base_bdevs; i++) {
804 			if (tmp->base_bdev[i].name != NULL) {
805 				if (!strcmp(tmp->base_bdev[i].name, base_bdev_name)) {
806 					SPDK_ERRLOG("duplicate base bdev name %s mentioned\n",
807 						    base_bdev_name);
808 					return -EEXIST;
809 				}
810 			}
811 		}
812 	}
813 
814 	raid_cfg->base_bdev[slot].name = strdup(base_bdev_name);
815 	if (raid_cfg->base_bdev[slot].name == NULL) {
816 		SPDK_ERRLOG("unable to allocate memory\n");
817 		return -ENOMEM;
818 	}
819 
820 	return 0;
821 }
822 
823 static struct {
824 	const char *name;
825 	enum raid_level value;
826 } g_raid_level_names[] = {
827 	{ "raid0", RAID0 },
828 	{ "0", RAID0 },
829 	{ }
830 };
831 
832 enum raid_level raid_bdev_parse_raid_level(const char *str)
833 {
834 	unsigned int i;
835 
836 	for (i = 0; g_raid_level_names[i].name != NULL; i++) {
837 		if (strcasecmp(g_raid_level_names[i].name, str) == 0) {
838 			return g_raid_level_names[i].value;
839 		}
840 	}
841 
842 	return INVALID_RAID_LEVEL;
843 }
844 
845 const char *
846 raid_bdev_level_to_str(enum raid_level level)
847 {
848 	unsigned int i;
849 
850 	for (i = 0; g_raid_level_names[i].name != NULL; i++) {
851 		if (g_raid_level_names[i].value == level) {
852 			return g_raid_level_names[i].name;
853 		}
854 	}
855 
856 	return "";
857 }
858 
859 /*
860  * brief:
861  * raid_bdev_parse_raid is used to parse the raid bdev from config file based on
862  * pre-defined raid bdev format in config file.
863  * Format of config file:
864  *   [RAID1]
865  *   Name raid1
866  *   StripSize 64
867  *   NumDevices 2
868  *   RaidLevel 0
869  *   Devices Nvme0n1 Nvme1n1
870  *
871  *   [RAID2]
872  *   Name raid2
873  *   StripSize 64
874  *   NumDevices 3
875  *   RaidLevel 0
876  *   Devices Nvme2n1 Nvme3n1 Nvme4n1
877  *
878  * params:
879  * conf_section - pointer to config section
880  * returns:
881  * 0 - success
882  * non zero - failure
883  */
884 static int
885 raid_bdev_parse_raid(struct spdk_conf_section *conf_section)
886 {
887 	const char *raid_name;
888 	uint32_t strip_size;
889 	uint8_t num_base_bdevs;
890 	const char *raid_level_str;
891 	enum raid_level level;
892 	const char *base_bdev_name;
893 	struct raid_bdev_config *raid_cfg;
894 	int rc, i, val;
895 
896 	raid_name = spdk_conf_section_get_val(conf_section, "Name");
897 	if (raid_name == NULL) {
898 		SPDK_ERRLOG("raid_name is null\n");
899 		return -EINVAL;
900 	}
901 
902 	val = spdk_conf_section_get_intval(conf_section, "StripSize");
903 	if (val < 0) {
904 		return -EINVAL;
905 	}
906 	strip_size = val;
907 
908 	val = spdk_conf_section_get_intval(conf_section, "NumDevices");
909 	if (val < 0) {
910 		return -EINVAL;
911 	}
912 	num_base_bdevs = val;
913 
914 	raid_level_str = spdk_conf_section_get_val(conf_section, "RaidLevel");
915 	if (raid_level_str == NULL) {
916 		SPDK_ERRLOG("Missing RaidLevel\n");
917 		return -EINVAL;
918 	}
919 	level = raid_bdev_parse_raid_level(raid_level_str);
920 	if (level == INVALID_RAID_LEVEL) {
921 		SPDK_ERRLOG("Invalid RaidLevel\n");
922 		return -EINVAL;
923 	}
924 
925 	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "%s %" PRIu32 " %u %u\n",
926 		      raid_name, strip_size, num_base_bdevs, level);
927 
928 	rc = raid_bdev_config_add(raid_name, strip_size, num_base_bdevs, level,
929 				  &raid_cfg);
930 	if (rc != 0) {
931 		SPDK_ERRLOG("Failed to add raid bdev config\n");
932 		return rc;
933 	}
934 
935 	for (i = 0; true; i++) {
936 		base_bdev_name = spdk_conf_section_get_nmval(conf_section, "Devices", 0, i);
937 		if (base_bdev_name == NULL) {
938 			break;
939 		}
940 		if (i >= num_base_bdevs) {
941 			raid_bdev_config_cleanup(raid_cfg);
942 			SPDK_ERRLOG("Number of devices mentioned is more than count\n");
943 			return -EINVAL;
944 		}
945 
946 		rc = raid_bdev_config_add_base_bdev(raid_cfg, base_bdev_name, i);
947 		if (rc != 0) {
948 			raid_bdev_config_cleanup(raid_cfg);
949 			SPDK_ERRLOG("Failed to add base bdev to raid bdev config\n");
950 			return rc;
951 		}
952 	}
953 
954 	if (i != raid_cfg->num_base_bdevs) {
955 		raid_bdev_config_cleanup(raid_cfg);
956 		SPDK_ERRLOG("Number of devices mentioned is less than count\n");
957 		return -EINVAL;
958 	}
959 
960 	rc = raid_bdev_create(raid_cfg);
961 	if (rc != 0) {
962 		raid_bdev_config_cleanup(raid_cfg);
963 		SPDK_ERRLOG("Failed to create raid bdev\n");
964 		return rc;
965 	}
966 
967 	rc = raid_bdev_add_base_devices(raid_cfg);
968 	if (rc != 0) {
969 		SPDK_ERRLOG("Failed to add any base bdev to raid bdev\n");
970 		/* Config is not removed in this case. */
971 	}
972 
973 	return 0;
974 }
975 
976 /*
977  * brief:
978  * raid_bdev_parse_config is used to find the raid bdev config section and parse it
979  * Format of config file:
980  * params:
981  * none
982  * returns:
983  * 0 - success
984  * non zero - failure
985  */
986 static int
987 raid_bdev_parse_config(void)
988 {
989 	int                      ret;
990 	struct spdk_conf_section *conf_section;
991 
992 	conf_section = spdk_conf_first_section(NULL);
993 	while (conf_section != NULL) {
994 		if (spdk_conf_section_match_prefix(conf_section, "RAID")) {
995 			ret = raid_bdev_parse_raid(conf_section);
996 			if (ret < 0) {
997 				SPDK_ERRLOG("Unable to parse raid bdev section\n");
998 				return ret;
999 			}
1000 		}
1001 		conf_section = spdk_conf_next_section(conf_section);
1002 	}
1003 
1004 	return 0;
1005 }
1006 
1007 /*
1008  * brief:
1009  * raid_bdev_fini_start is called when bdev layer is starting the
1010  * shutdown process
1011  * params:
1012  * none
1013  * returns:
1014  * none
1015  */
1016 static void
1017 raid_bdev_fini_start(void)
1018 {
1019 	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_fini_start\n");
1020 	g_shutdown_started = true;
1021 }
1022 
1023 /*
1024  * brief:
1025  * raid_bdev_exit is called on raid bdev module exit time by bdev layer
1026  * params:
1027  * none
1028  * returns:
1029  * none
1030  */
1031 static void
1032 raid_bdev_exit(void)
1033 {
1034 	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_exit\n");
1035 	raid_bdev_free();
1036 }
1037 
1038 /*
1039  * brief:
1040  * raid_bdev_get_ctx_size is used to return the context size of bdev_io for raid
1041  * module
1042  * params:
1043  * none
1044  * returns:
1045  * size of spdk_bdev_io context for raid
1046  */
1047 static int
1048 raid_bdev_get_ctx_size(void)
1049 {
1050 	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_get_ctx_size\n");
1051 	return sizeof(struct raid_bdev_io);
1052 }
1053 
1054 /*
1055  * brief:
1056  * raid_bdev_get_running_config is used to get the configuration options.
1057  *
1058  * params:
1059  * fp - The pointer to a file that will be written to the configuration options.
1060  * returns:
1061  * none
1062  */
1063 static void
1064 raid_bdev_get_running_config(FILE *fp)
1065 {
1066 	struct raid_bdev *raid_bdev;
1067 	struct raid_base_bdev_info *base_info;
1068 	int index = 1;
1069 
1070 	TAILQ_FOREACH(raid_bdev, &g_raid_bdev_configured_list, state_link) {
1071 		fprintf(fp,
1072 			"\n"
1073 			"[RAID%d]\n"
1074 			"  Name %s\n"
1075 			"  StripSize %" PRIu32 "\n"
1076 			"  NumDevices %u\n"
1077 			"  RaidLevel %s\n",
1078 			index, raid_bdev->bdev.name, raid_bdev->strip_size_kb,
1079 			raid_bdev->num_base_bdevs,
1080 			raid_bdev_level_to_str(raid_bdev->level));
1081 		fprintf(fp,
1082 			"  Devices ");
1083 		RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
1084 			if (base_info->bdev) {
1085 				fprintf(fp,
1086 					"%s ",
1087 					base_info->bdev->name);
1088 			}
1089 		}
1090 		fprintf(fp,
1091 			"\n");
1092 		index++;
1093 	}
1094 }
1095 
1096 /*
1097  * brief:
1098  * raid_bdev_can_claim_bdev is the function to check if this base_bdev can be
1099  * claimed by raid bdev or not.
1100  * params:
1101  * bdev_name - represents base bdev name
1102  * _raid_cfg - pointer to raid bdev config parsed from config file
1103  * base_bdev_slot - if bdev can be claimed, it represents the base_bdev correct
1104  * slot. This field is only valid if return value of this function is true
1105  * returns:
1106  * true - if bdev can be claimed
1107  * false - if bdev can't be claimed
1108  */
1109 static bool
1110 raid_bdev_can_claim_bdev(const char *bdev_name, struct raid_bdev_config **_raid_cfg,
1111 			 uint8_t *base_bdev_slot)
1112 {
1113 	struct raid_bdev_config *raid_cfg;
1114 	uint8_t i;
1115 
1116 	TAILQ_FOREACH(raid_cfg, &g_raid_config.raid_bdev_config_head, link) {
1117 		for (i = 0; i < raid_cfg->num_base_bdevs; i++) {
1118 			/*
1119 			 * Check if the base bdev name is part of raid bdev configuration.
1120 			 * If match is found then return true and the slot information where
1121 			 * this base bdev should be inserted in raid bdev
1122 			 */
1123 			if (!strcmp(bdev_name, raid_cfg->base_bdev[i].name)) {
1124 				*_raid_cfg = raid_cfg;
1125 				*base_bdev_slot = i;
1126 				return true;
1127 			}
1128 		}
1129 	}
1130 
1131 	return false;
1132 }
1133 
1134 
1135 static struct spdk_bdev_module g_raid_if = {
1136 	.name = "raid",
1137 	.module_init = raid_bdev_init,
1138 	.fini_start = raid_bdev_fini_start,
1139 	.module_fini = raid_bdev_exit,
1140 	.get_ctx_size = raid_bdev_get_ctx_size,
1141 	.examine_config = raid_bdev_examine,
1142 	.config_text = raid_bdev_get_running_config,
1143 	.async_init = false,
1144 	.async_fini = false,
1145 };
1146 SPDK_BDEV_MODULE_REGISTER(raid, &g_raid_if)
1147 
1148 /*
1149  * brief:
1150  * raid_bdev_init is the initialization function for raid bdev module
1151  * params:
1152  * none
1153  * returns:
1154  * 0 - success
1155  * non zero - failure
1156  */
1157 static int
1158 raid_bdev_init(void)
1159 {
1160 	int ret;
1161 
1162 	/* Parse config file for raids */
1163 	ret = raid_bdev_parse_config();
1164 	if (ret < 0) {
1165 		SPDK_ERRLOG("raid bdev init failed parsing\n");
1166 		raid_bdev_free();
1167 		return ret;
1168 	}
1169 
1170 	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_init completed successfully\n");
1171 
1172 	return 0;
1173 }
1174 
1175 /*
1176  * brief:
1177  * raid_bdev_create allocates raid bdev based on passed configuration
1178  * params:
1179  * raid_cfg - configuration of raid bdev
1180  * returns:
1181  * 0 - success
1182  * non zero - failure
1183  */
1184 int
1185 raid_bdev_create(struct raid_bdev_config *raid_cfg)
1186 {
1187 	struct raid_bdev *raid_bdev;
1188 	struct spdk_bdev *raid_bdev_gen;
1189 	struct raid_bdev_module *module;
1190 
1191 	module = raid_bdev_module_find(raid_cfg->level);
1192 	if (module == NULL) {
1193 		SPDK_ERRLOG("Unsupported raid level '%d'\n", raid_cfg->level);
1194 		return -EINVAL;
1195 	}
1196 
1197 	assert(module->base_bdevs_min != 0);
1198 	if (raid_cfg->num_base_bdevs < module->base_bdevs_min) {
1199 		SPDK_ERRLOG("At least %u base devices required for %s\n",
1200 			    module->base_bdevs_min,
1201 			    raid_bdev_level_to_str(raid_cfg->level));
1202 		return -EINVAL;
1203 	}
1204 
1205 	raid_bdev = calloc(1, sizeof(*raid_bdev));
1206 	if (!raid_bdev) {
1207 		SPDK_ERRLOG("Unable to allocate memory for raid bdev\n");
1208 		return -ENOMEM;
1209 	}
1210 
1211 	raid_bdev->module = module;
1212 	raid_bdev->num_base_bdevs = raid_cfg->num_base_bdevs;
1213 	raid_bdev->base_bdev_info = calloc(raid_bdev->num_base_bdevs,
1214 					   sizeof(struct raid_base_bdev_info));
1215 	if (!raid_bdev->base_bdev_info) {
1216 		SPDK_ERRLOG("Unable able to allocate base bdev info\n");
1217 		free(raid_bdev);
1218 		return -ENOMEM;
1219 	}
1220 
1221 	/* strip_size_kb is from the rpc param.  strip_size is in blocks and used
1222 	 * internally and set later.
1223 	 */
1224 	raid_bdev->strip_size = 0;
1225 	raid_bdev->strip_size_kb = raid_cfg->strip_size;
1226 	raid_bdev->state = RAID_BDEV_STATE_CONFIGURING;
1227 	raid_bdev->config = raid_cfg;
1228 	raid_bdev->level = raid_cfg->level;
1229 
1230 	raid_bdev_gen = &raid_bdev->bdev;
1231 
1232 	raid_bdev_gen->name = strdup(raid_cfg->name);
1233 	if (!raid_bdev_gen->name) {
1234 		SPDK_ERRLOG("Unable to allocate name for raid\n");
1235 		free(raid_bdev->base_bdev_info);
1236 		free(raid_bdev);
1237 		return -ENOMEM;
1238 	}
1239 
1240 	raid_bdev_gen->product_name = "Raid Volume";
1241 	raid_bdev_gen->ctxt = raid_bdev;
1242 	raid_bdev_gen->fn_table = &g_raid_bdev_fn_table;
1243 	raid_bdev_gen->module = &g_raid_if;
1244 	raid_bdev_gen->write_cache = 0;
1245 
1246 	TAILQ_INSERT_TAIL(&g_raid_bdev_configuring_list, raid_bdev, state_link);
1247 	TAILQ_INSERT_TAIL(&g_raid_bdev_list, raid_bdev, global_link);
1248 
1249 	raid_cfg->raid_bdev = raid_bdev;
1250 
1251 	return 0;
1252 }
1253 
1254 /*
1255  * brief
1256  * raid_bdev_alloc_base_bdev_resource allocates resource of base bdev.
1257  * params:
1258  * raid_bdev - pointer to raid bdev
1259  * bdev - pointer to base bdev
1260  * base_bdev_slot - position to add base bdev
1261  * returns:
1262  * 0 - success
1263  * non zero - failure
1264  */
1265 static int
1266 raid_bdev_alloc_base_bdev_resource(struct raid_bdev *raid_bdev, struct spdk_bdev *bdev,
1267 				   uint8_t base_bdev_slot)
1268 {
1269 	struct spdk_bdev_desc *desc;
1270 	int rc;
1271 
1272 	rc = spdk_bdev_open(bdev, true, raid_bdev_remove_base_bdev, bdev, &desc);
1273 	if (rc != 0) {
1274 		SPDK_ERRLOG("Unable to create desc on bdev '%s'\n", bdev->name);
1275 		return rc;
1276 	}
1277 
1278 	rc = spdk_bdev_module_claim_bdev(bdev, NULL, &g_raid_if);
1279 	if (rc != 0) {
1280 		SPDK_ERRLOG("Unable to claim this bdev as it is already claimed\n");
1281 		spdk_bdev_close(desc);
1282 		return rc;
1283 	}
1284 
1285 	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "bdev %s is claimed\n", bdev->name);
1286 
1287 	assert(raid_bdev->state != RAID_BDEV_STATE_ONLINE);
1288 	assert(base_bdev_slot < raid_bdev->num_base_bdevs);
1289 
1290 	raid_bdev->base_bdev_info[base_bdev_slot].bdev = bdev;
1291 	raid_bdev->base_bdev_info[base_bdev_slot].desc = desc;
1292 	raid_bdev->num_base_bdevs_discovered++;
1293 	assert(raid_bdev->num_base_bdevs_discovered <= raid_bdev->num_base_bdevs);
1294 
1295 	return 0;
1296 }
1297 
1298 /*
1299  * brief:
1300  * If raid bdev config is complete, then only register the raid bdev to
1301  * bdev layer and remove this raid bdev from configuring list and
1302  * insert the raid bdev to configured list
1303  * params:
1304  * raid_bdev - pointer to raid bdev
1305  * returns:
1306  * 0 - success
1307  * non zero - failure
1308  */
1309 static int
1310 raid_bdev_configure(struct raid_bdev *raid_bdev)
1311 {
1312 	uint32_t blocklen = 0;
1313 	struct spdk_bdev *raid_bdev_gen;
1314 	struct raid_base_bdev_info *base_info;
1315 	int rc = 0;
1316 
1317 	assert(raid_bdev->state == RAID_BDEV_STATE_CONFIGURING);
1318 	assert(raid_bdev->num_base_bdevs_discovered == raid_bdev->num_base_bdevs);
1319 
1320 	RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
1321 		/* Check blocklen for all base bdevs that it should be same */
1322 		if (blocklen == 0) {
1323 			blocklen = base_info->bdev->blocklen;
1324 		} else if (blocklen != base_info->bdev->blocklen) {
1325 			/*
1326 			 * Assumption is that all the base bdevs for any raid bdev should
1327 			 * have same blocklen
1328 			 */
1329 			SPDK_ERRLOG("Blocklen of various bdevs not matching\n");
1330 			return -EINVAL;
1331 		}
1332 	}
1333 	assert(blocklen > 0);
1334 
1335 	/* The strip_size_kb is read in from user in KB. Convert to blocks here for
1336 	 * internal use.
1337 	 */
1338 	raid_bdev->strip_size = (raid_bdev->strip_size_kb * 1024) / blocklen;
1339 	raid_bdev->strip_size_shift = spdk_u32log2(raid_bdev->strip_size);
1340 	raid_bdev->blocklen_shift = spdk_u32log2(blocklen);
1341 
1342 	raid_bdev_gen = &raid_bdev->bdev;
1343 	raid_bdev_gen->blocklen = blocklen;
1344 
1345 	rc = raid_bdev->module->start(raid_bdev);
1346 	if (rc != 0) {
1347 		SPDK_ERRLOG("raid module startup callback failed\n");
1348 		return rc;
1349 	}
1350 	raid_bdev->state = RAID_BDEV_STATE_ONLINE;
1351 	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "io device register %p\n", raid_bdev);
1352 	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "blockcnt %lu, blocklen %u\n",
1353 		      raid_bdev_gen->blockcnt, raid_bdev_gen->blocklen);
1354 	spdk_io_device_register(raid_bdev, raid_bdev_create_cb, raid_bdev_destroy_cb,
1355 				sizeof(struct raid_bdev_io_channel),
1356 				raid_bdev->bdev.name);
1357 	rc = spdk_bdev_register(raid_bdev_gen);
1358 	if (rc != 0) {
1359 		SPDK_ERRLOG("Unable to register raid bdev and stay at configuring state\n");
1360 		if (raid_bdev->module->stop != NULL) {
1361 			raid_bdev->module->stop(raid_bdev);
1362 		}
1363 		spdk_io_device_unregister(raid_bdev, NULL);
1364 		raid_bdev->state = RAID_BDEV_STATE_CONFIGURING;
1365 		return rc;
1366 	}
1367 	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid bdev generic %p\n", raid_bdev_gen);
1368 	TAILQ_REMOVE(&g_raid_bdev_configuring_list, raid_bdev, state_link);
1369 	TAILQ_INSERT_TAIL(&g_raid_bdev_configured_list, raid_bdev, state_link);
1370 	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid bdev is created with name %s, raid_bdev %p\n",
1371 		      raid_bdev_gen->name, raid_bdev);
1372 
1373 	return 0;
1374 }
1375 
1376 /*
1377  * brief:
1378  * If raid bdev is online and registered, change the bdev state to
1379  * configuring and unregister this raid device. Queue this raid device
1380  * in configuring list
1381  * params:
1382  * raid_bdev - pointer to raid bdev
1383  * cb_fn - callback function
1384  * cb_arg - argument to callback function
1385  * returns:
1386  * none
1387  */
1388 static void
1389 raid_bdev_deconfigure(struct raid_bdev *raid_bdev, raid_bdev_destruct_cb cb_fn,
1390 		      void *cb_arg)
1391 {
1392 	if (raid_bdev->state != RAID_BDEV_STATE_ONLINE) {
1393 		if (cb_fn) {
1394 			cb_fn(cb_arg, 0);
1395 		}
1396 		return;
1397 	}
1398 
1399 	assert(raid_bdev->num_base_bdevs == raid_bdev->num_base_bdevs_discovered);
1400 	TAILQ_REMOVE(&g_raid_bdev_configured_list, raid_bdev, state_link);
1401 	if (raid_bdev->module->stop != NULL) {
1402 		raid_bdev->module->stop(raid_bdev);
1403 	}
1404 	raid_bdev->state = RAID_BDEV_STATE_OFFLINE;
1405 	assert(raid_bdev->num_base_bdevs_discovered);
1406 	TAILQ_INSERT_TAIL(&g_raid_bdev_offline_list, raid_bdev, state_link);
1407 	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid bdev state chaning from online to offline\n");
1408 
1409 	spdk_bdev_unregister(&raid_bdev->bdev, cb_fn, cb_arg);
1410 }
1411 
1412 /*
1413  * brief:
1414  * raid_bdev_find_by_base_bdev function finds the raid bdev which has
1415  *  claimed the base bdev.
1416  * params:
1417  * base_bdev - pointer to base bdev pointer
1418  * _raid_bdev - Reference to pointer to raid bdev
1419  * _base_info - Reference to the raid base bdev info.
1420  * returns:
1421  * true - if the raid bdev is found.
1422  * false - if the raid bdev is not found.
1423  */
1424 static bool
1425 raid_bdev_find_by_base_bdev(struct spdk_bdev *base_bdev, struct raid_bdev **_raid_bdev,
1426 			    struct raid_base_bdev_info **_base_info)
1427 {
1428 	struct raid_bdev *raid_bdev;
1429 	struct raid_base_bdev_info *base_info;
1430 
1431 	TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) {
1432 		RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
1433 			if (base_info->bdev == base_bdev) {
1434 				*_raid_bdev = raid_bdev;
1435 				*_base_info = base_info;
1436 				return true;
1437 			}
1438 		}
1439 	}
1440 
1441 	return false;
1442 }
1443 
1444 /*
1445  * brief:
1446  * raid_bdev_remove_base_bdev function is called by below layers when base_bdev
1447  * is removed. This function checks if this base bdev is part of any raid bdev
1448  * or not. If yes, it takes necessary action on that particular raid bdev.
1449  * params:
1450  * ctx - pointer to base bdev pointer which got removed
1451  * returns:
1452  * none
1453  */
1454 static void
1455 raid_bdev_remove_base_bdev(void *ctx)
1456 {
1457 	struct spdk_bdev	*base_bdev = ctx;
1458 	struct raid_bdev	*raid_bdev = NULL;
1459 	struct raid_base_bdev_info *base_info;
1460 
1461 	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_remove_base_bdev\n");
1462 
1463 	/* Find the raid_bdev which has claimed this base_bdev */
1464 	if (!raid_bdev_find_by_base_bdev(base_bdev, &raid_bdev, &base_info)) {
1465 		SPDK_ERRLOG("bdev to remove '%s' not found\n", base_bdev->name);
1466 		return;
1467 	}
1468 
1469 	assert(base_info->desc);
1470 	base_info->remove_scheduled = true;
1471 
1472 	if (raid_bdev->destruct_called == true ||
1473 	    raid_bdev->state == RAID_BDEV_STATE_CONFIGURING) {
1474 		/*
1475 		 * As raid bdev is not registered yet or already unregistered,
1476 		 * so cleanup should be done here itself.
1477 		 */
1478 		raid_bdev_free_base_bdev_resource(raid_bdev, base_info);
1479 		if (raid_bdev->num_base_bdevs_discovered == 0) {
1480 			/* There is no base bdev for this raid, so free the raid device. */
1481 			raid_bdev_cleanup(raid_bdev);
1482 			return;
1483 		}
1484 	}
1485 
1486 	raid_bdev_deconfigure(raid_bdev, NULL, NULL);
1487 }
1488 
1489 /*
1490  * brief:
1491  * Remove base bdevs from the raid bdev one by one.  Skip any base bdev which
1492  *  doesn't exist.
1493  * params:
1494  * raid_cfg - pointer to raid bdev config.
1495  * cb_fn - callback function
1496  * cb_ctx - argument to callback function
1497  */
1498 void
1499 raid_bdev_remove_base_devices(struct raid_bdev_config *raid_cfg,
1500 			      raid_bdev_destruct_cb cb_fn, void *cb_arg)
1501 {
1502 	struct raid_bdev		*raid_bdev;
1503 	struct raid_base_bdev_info	*base_info;
1504 
1505 	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev_remove_base_devices\n");
1506 
1507 	raid_bdev = raid_cfg->raid_bdev;
1508 	if (raid_bdev == NULL) {
1509 		SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid bdev %s doesn't exist now\n", raid_cfg->name);
1510 		if (cb_fn) {
1511 			cb_fn(cb_arg, 0);
1512 		}
1513 		return;
1514 	}
1515 
1516 	if (raid_bdev->destroy_started) {
1517 		SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "destroying raid bdev %s is already started\n",
1518 			      raid_cfg->name);
1519 		if (cb_fn) {
1520 			cb_fn(cb_arg, -EALREADY);
1521 		}
1522 		return;
1523 	}
1524 
1525 	raid_bdev->destroy_started = true;
1526 
1527 	RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
1528 		if (base_info->bdev == NULL) {
1529 			continue;
1530 		}
1531 
1532 		assert(base_info->desc);
1533 		base_info->remove_scheduled = true;
1534 
1535 		if (raid_bdev->destruct_called == true ||
1536 		    raid_bdev->state == RAID_BDEV_STATE_CONFIGURING) {
1537 			/*
1538 			 * As raid bdev is not registered yet or already unregistered,
1539 			 * so cleanup should be done here itself.
1540 			 */
1541 			raid_bdev_free_base_bdev_resource(raid_bdev, base_info);
1542 			if (raid_bdev->num_base_bdevs_discovered == 0) {
1543 				/* There is no base bdev for this raid, so free the raid device. */
1544 				raid_bdev_cleanup(raid_bdev);
1545 				if (cb_fn) {
1546 					cb_fn(cb_arg, 0);
1547 				}
1548 				return;
1549 			}
1550 		}
1551 	}
1552 
1553 	raid_bdev_deconfigure(raid_bdev, cb_fn, cb_arg);
1554 }
1555 
1556 /*
1557  * brief:
1558  * raid_bdev_add_base_device function is the actual function which either adds
1559  * the nvme base device to existing raid bdev or create a new raid bdev. It also claims
1560  * the base device and keep the open descriptor.
1561  * params:
1562  * raid_cfg - pointer to raid bdev config
1563  * bdev - pointer to base bdev
1564  * base_bdev_slot - position to add base bdev
1565  * returns:
1566  * 0 - success
1567  * non zero - failure
1568  */
1569 static int
1570 raid_bdev_add_base_device(struct raid_bdev_config *raid_cfg, struct spdk_bdev *bdev,
1571 			  uint8_t base_bdev_slot)
1572 {
1573 	struct raid_bdev	*raid_bdev;
1574 	int			rc;
1575 
1576 	raid_bdev = raid_cfg->raid_bdev;
1577 	if (!raid_bdev) {
1578 		SPDK_ERRLOG("Raid bdev '%s' is not created yet\n", raid_cfg->name);
1579 		return -ENODEV;
1580 	}
1581 
1582 	rc = raid_bdev_alloc_base_bdev_resource(raid_bdev, bdev, base_bdev_slot);
1583 	if (rc != 0) {
1584 		SPDK_ERRLOG("Failed to allocate resource for bdev '%s'\n", bdev->name);
1585 		return rc;
1586 	}
1587 
1588 	assert(raid_bdev->num_base_bdevs_discovered <= raid_bdev->num_base_bdevs);
1589 
1590 	if (raid_bdev->num_base_bdevs_discovered == raid_bdev->num_base_bdevs) {
1591 		rc = raid_bdev_configure(raid_bdev);
1592 		if (rc != 0) {
1593 			SPDK_ERRLOG("Failed to configure raid bdev\n");
1594 			return rc;
1595 		}
1596 	}
1597 
1598 	return 0;
1599 }
1600 
1601 /*
1602  * brief:
1603  * Add base bdevs to the raid bdev one by one.  Skip any base bdev which doesn't
1604  *  exist or fails to add. If all base bdevs are successfully added, the raid bdev
1605  *  moves to the configured state and becomes available. Otherwise, the raid bdev
1606  *  stays at the configuring state with added base bdevs.
1607  * params:
1608  * raid_cfg - pointer to raid bdev config
1609  * returns:
1610  * 0 - The raid bdev moves to the configured state or stays at the configuring
1611  *     state with added base bdevs due to any nonexistent base bdev.
1612  * non zero - Failed to add any base bdev and stays at the configuring state with
1613  *            added base bdevs.
1614  */
1615 int
1616 raid_bdev_add_base_devices(struct raid_bdev_config *raid_cfg)
1617 {
1618 	struct spdk_bdev	*base_bdev;
1619 	uint8_t			i;
1620 	int			rc = 0, _rc;
1621 
1622 	for (i = 0; i < raid_cfg->num_base_bdevs; i++) {
1623 		base_bdev = spdk_bdev_get_by_name(raid_cfg->base_bdev[i].name);
1624 		if (base_bdev == NULL) {
1625 			SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "base bdev %s doesn't exist now\n",
1626 				      raid_cfg->base_bdev[i].name);
1627 			continue;
1628 		}
1629 
1630 		_rc = raid_bdev_add_base_device(raid_cfg, base_bdev, i);
1631 		if (_rc != 0) {
1632 			SPDK_ERRLOG("Failed to add base bdev %s to RAID bdev %s: %s\n",
1633 				    raid_cfg->base_bdev[i].name, raid_cfg->name,
1634 				    spdk_strerror(-_rc));
1635 			if (rc == 0) {
1636 				rc = _rc;
1637 			}
1638 		}
1639 	}
1640 
1641 	return rc;
1642 }
1643 
1644 /*
1645  * brief:
1646  * raid_bdev_examine function is the examine function call by the below layers
1647  * like bdev_nvme layer. This function will check if this base bdev can be
1648  * claimed by this raid bdev or not.
1649  * params:
1650  * bdev - pointer to base bdev
1651  * returns:
1652  * none
1653  */
1654 static void
1655 raid_bdev_examine(struct spdk_bdev *bdev)
1656 {
1657 	struct raid_bdev_config	*raid_cfg;
1658 	uint8_t			base_bdev_slot;
1659 
1660 	if (raid_bdev_can_claim_bdev(bdev->name, &raid_cfg, &base_bdev_slot)) {
1661 		raid_bdev_add_base_device(raid_cfg, bdev, base_bdev_slot);
1662 	} else {
1663 		SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "bdev %s can't be claimed\n",
1664 			      bdev->name);
1665 	}
1666 
1667 	spdk_bdev_module_examine_done(&g_raid_if);
1668 }
1669 
1670 /* Log component for bdev raid bdev module */
1671 SPDK_LOG_REGISTER_COMPONENT("bdev_raid", SPDK_LOG_BDEV_RAID)
1672