xref: /spdk/module/bdev/raid/bdev_raid.h (revision 927f1fd57bd004df581518466ec4c1b8083e5d23)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #ifndef SPDK_BDEV_RAID_INTERNAL_H
35 #define SPDK_BDEV_RAID_INTERNAL_H
36 
37 #include "spdk/bdev_module.h"
38 
39 enum raid_level {
40 	INVALID_RAID_LEVEL	= -1,
41 	RAID0			= 0,
42 	RAID5			= 5,
43 	CONCAT			= 99,
44 };
45 
46 /*
47  * Raid state describes the state of the raid. This raid bdev can be either in
48  * configured list or configuring list
49  */
50 enum raid_bdev_state {
51 	/* raid bdev is ready and is seen by upper layers */
52 	RAID_BDEV_STATE_ONLINE,
53 
54 	/*
55 	 * raid bdev is configuring, not all underlying bdevs are present.
56 	 * And can't be seen by upper layers.
57 	 */
58 	RAID_BDEV_STATE_CONFIGURING,
59 
60 	/*
61 	 * In offline state, raid bdev layer will complete all incoming commands without
62 	 * submitting to underlying base nvme bdevs
63 	 */
64 	RAID_BDEV_STATE_OFFLINE,
65 
66 	/* raid bdev max, new states should be added before this */
67 	RAID_BDEV_MAX
68 };
69 
70 /*
71  * raid_base_bdev_info contains information for the base bdevs which are part of some
72  * raid. This structure contains the per base bdev information. Whatever is
73  * required per base device for raid bdev will be kept here
74  */
75 struct raid_base_bdev_info {
76 	/* pointer to base spdk bdev */
77 	struct spdk_bdev	*bdev;
78 
79 	/* pointer to base bdev descriptor opened by raid bdev */
80 	struct spdk_bdev_desc	*desc;
81 
82 	/*
83 	 * When underlying base device calls the hot plug function on drive removal,
84 	 * this flag will be set and later after doing some processing, base device
85 	 * descriptor will be closed
86 	 */
87 	bool			remove_scheduled;
88 
89 	/* thread where base device is opened */
90 	struct spdk_thread	*thread;
91 };
92 
93 /*
94  * raid_bdev_io is the context part of bdev_io. It contains the information
95  * related to bdev_io for a raid bdev
96  */
97 struct raid_bdev_io {
98 	/* The raid bdev associated with this IO */
99 	struct raid_bdev *raid_bdev;
100 
101 	/* WaitQ entry, used only in waitq logic */
102 	struct spdk_bdev_io_wait_entry	waitq_entry;
103 
104 	/* Context of the original channel for this IO */
105 	struct raid_bdev_io_channel	*raid_ch;
106 
107 	/* Used for tracking progress on io requests sent to member disks. */
108 	uint64_t			base_bdev_io_remaining;
109 	uint8_t				base_bdev_io_submitted;
110 	uint8_t				base_bdev_io_status;
111 };
112 
113 /*
114  * raid_bdev is the single entity structure which contains SPDK block device
115  * and the information related to any raid bdev either configured or
116  * in configuring list. io device is created on this.
117  */
118 struct raid_bdev {
119 	/* raid bdev device, this will get registered in bdev layer */
120 	struct spdk_bdev		bdev;
121 
122 	/* link of raid bdev to link it to configured, configuring or offline list */
123 	TAILQ_ENTRY(raid_bdev)		state_link;
124 
125 	/* link of raid bdev to link it to global raid bdev list */
126 	TAILQ_ENTRY(raid_bdev)		global_link;
127 
128 	/* pointer to config file entry */
129 	struct raid_bdev_config		*config;
130 
131 	/* array of base bdev info */
132 	struct raid_base_bdev_info	*base_bdev_info;
133 
134 	/* strip size of raid bdev in blocks */
135 	uint32_t			strip_size;
136 
137 	/* strip size of raid bdev in KB */
138 	uint32_t			strip_size_kb;
139 
140 	/* strip size bit shift for optimized calculation */
141 	uint32_t			strip_size_shift;
142 
143 	/* block length bit shift for optimized calculation */
144 	uint32_t			blocklen_shift;
145 
146 	/* state of raid bdev */
147 	enum raid_bdev_state		state;
148 
149 	/* number of base bdevs comprising raid bdev  */
150 	uint8_t				num_base_bdevs;
151 
152 	/* number of base bdevs discovered */
153 	uint8_t				num_base_bdevs_discovered;
154 
155 	/* Raid Level of this raid bdev */
156 	enum raid_level			level;
157 
158 	/* Set to true if destruct is called for this raid bdev */
159 	bool				destruct_called;
160 
161 	/* Set to true if destroy of this raid bdev is started. */
162 	bool				destroy_started;
163 
164 	/* Module for RAID-level specific operations */
165 	struct raid_bdev_module		*module;
166 
167 	/* Private data for the raid module */
168 	void				*module_private;
169 };
170 
171 #define RAID_FOR_EACH_BASE_BDEV(r, i) \
172 	for (i = r->base_bdev_info; i < r->base_bdev_info + r->num_base_bdevs; i++)
173 
174 /*
175  * raid_base_bdev_config is the per base bdev data structure which contains
176  * information w.r.t to per base bdev during parsing config
177  */
178 struct raid_base_bdev_config {
179 	/* base bdev name from config file */
180 	char				*name;
181 };
182 
183 /*
184  * raid_bdev_config contains the raid bdev config related information after
185  * parsing the config file
186  */
187 struct raid_bdev_config {
188 	/* base bdev config per underlying bdev */
189 	struct raid_base_bdev_config	*base_bdev;
190 
191 	/* Points to already created raid bdev  */
192 	struct raid_bdev		*raid_bdev;
193 
194 	char				*name;
195 
196 	/* strip size of this raid bdev in KB */
197 	uint32_t			strip_size;
198 
199 	/* number of base bdevs */
200 	uint8_t				num_base_bdevs;
201 
202 	/* raid level */
203 	enum raid_level			level;
204 
205 	TAILQ_ENTRY(raid_bdev_config)	link;
206 };
207 
208 /*
209  * raid_config is the top level structure representing the raid bdev config as read
210  * from config file for all raids
211  */
212 struct raid_config {
213 	/* raid bdev  context from config file */
214 	TAILQ_HEAD(, raid_bdev_config) raid_bdev_config_head;
215 
216 	/* total raid bdev  from config file */
217 	uint8_t total_raid_bdev;
218 };
219 
220 /*
221  * raid_bdev_io_channel is the context of spdk_io_channel for raid bdev device. It
222  * contains the relationship of raid bdev io channel with base bdev io channels.
223  */
224 struct raid_bdev_io_channel {
225 	/* Array of IO channels of base bdevs */
226 	struct spdk_io_channel	**base_channel;
227 
228 	/* Number of IO channels */
229 	uint8_t			num_channels;
230 };
231 
232 /* TAIL heads for various raid bdev lists */
233 TAILQ_HEAD(raid_configured_tailq, raid_bdev);
234 TAILQ_HEAD(raid_configuring_tailq, raid_bdev);
235 TAILQ_HEAD(raid_all_tailq, raid_bdev);
236 TAILQ_HEAD(raid_offline_tailq, raid_bdev);
237 
238 extern struct raid_configured_tailq	g_raid_bdev_configured_list;
239 extern struct raid_configuring_tailq	g_raid_bdev_configuring_list;
240 extern struct raid_all_tailq		g_raid_bdev_list;
241 extern struct raid_offline_tailq	g_raid_bdev_offline_list;
242 extern struct raid_config		g_raid_config;
243 
244 typedef void (*raid_bdev_destruct_cb)(void *cb_ctx, int rc);
245 
246 int raid_bdev_create(struct raid_bdev_config *raid_cfg);
247 int raid_bdev_add_base_devices(struct raid_bdev_config *raid_cfg);
248 void raid_bdev_remove_base_devices(struct raid_bdev_config *raid_cfg,
249 				   raid_bdev_destruct_cb cb_fn, void *cb_ctx);
250 int raid_bdev_config_add(const char *raid_name, uint32_t strip_size, uint8_t num_base_bdevs,
251 			 enum raid_level level, struct raid_bdev_config **_raid_cfg);
252 int raid_bdev_config_add_base_bdev(struct raid_bdev_config *raid_cfg,
253 				   const char *base_bdev_name, uint8_t slot);
254 void raid_bdev_config_cleanup(struct raid_bdev_config *raid_cfg);
255 struct raid_bdev_config *raid_bdev_config_find_by_name(const char *raid_name);
256 enum raid_level raid_bdev_parse_raid_level(const char *str);
257 const char *raid_bdev_level_to_str(enum raid_level level);
258 
259 /*
260  * RAID module descriptor
261  */
262 struct raid_bdev_module {
263 	/* RAID level implemented by this module */
264 	enum raid_level level;
265 
266 	/* Minimum required number of base bdevs. Must be > 0. */
267 	uint8_t base_bdevs_min;
268 
269 	/*
270 	 * Maximum number of base bdevs that can be removed without failing
271 	 * the array.
272 	 */
273 	uint8_t base_bdevs_max_degraded;
274 
275 	/*
276 	 * Called when the raid is starting, right before changing the state to
277 	 * online and registering the bdev. Parameters of the bdev like blockcnt
278 	 * should be set here.
279 	 *
280 	 * Non-zero return value will abort the startup process.
281 	 */
282 	int (*start)(struct raid_bdev *raid_bdev);
283 
284 	/*
285 	 * Called when the raid is stopping, right before changing the state to
286 	 * offline and unregistering the bdev. Optional.
287 	 */
288 	void (*stop)(struct raid_bdev *raid_bdev);
289 
290 	/* Handler for R/W requests */
291 	void (*submit_rw_request)(struct raid_bdev_io *raid_io);
292 
293 	/* Handler for requests without payload (flush, unmap). Optional. */
294 	void (*submit_null_payload_request)(struct raid_bdev_io *raid_io);
295 
296 	TAILQ_ENTRY(raid_bdev_module) link;
297 };
298 
299 void raid_bdev_module_list_add(struct raid_bdev_module *raid_module);
300 
301 #define __RAID_MODULE_REGISTER(line) __RAID_MODULE_REGISTER_(line)
302 #define __RAID_MODULE_REGISTER_(line) raid_module_register_##line
303 
304 #define RAID_MODULE_REGISTER(_module)					\
305 __attribute__((constructor)) static void				\
306 __RAID_MODULE_REGISTER(__LINE__)(void)					\
307 {									\
308     raid_bdev_module_list_add(_module);					\
309 }
310 
311 bool
312 raid_bdev_io_complete_part(struct raid_bdev_io *raid_io, uint64_t completed,
313 			   enum spdk_bdev_io_status status);
314 void
315 raid_bdev_queue_io_wait(struct raid_bdev_io *raid_io, struct spdk_bdev *bdev,
316 			struct spdk_io_channel *ch, spdk_bdev_io_wait_cb cb_fn);
317 void
318 raid_bdev_io_complete(struct raid_bdev_io *raid_io, enum spdk_bdev_io_status status);
319 
320 #endif /* SPDK_BDEV_RAID_INTERNAL_H */
321