xref: /spdk/module/bdev/raid/bdev_raid.h (revision f8abbede89d30584d2a4f8427b13896f8591b873)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (C) 2018 Intel Corporation.
3  *   All rights reserved.
4  */
5 
6 #ifndef SPDK_BDEV_RAID_INTERNAL_H
7 #define SPDK_BDEV_RAID_INTERNAL_H
8 
9 #include "spdk/bdev_module.h"
10 #include "spdk/uuid.h"
11 
12 #define RAID_BDEV_MIN_DATA_OFFSET_SIZE	(1024*1024) /* 1 MiB */
13 
14 enum raid_level {
15 	INVALID_RAID_LEVEL	= -1,
16 	RAID0			= 0,
17 	RAID1			= 1,
18 	RAID5F			= 95, /* 0x5f */
19 	CONCAT			= 99,
20 };
21 
22 /*
23  * Raid state describes the state of the raid. This raid bdev can be either in
24  * configured list or configuring list
25  */
26 enum raid_bdev_state {
27 	/* raid bdev is ready and is seen by upper layers */
28 	RAID_BDEV_STATE_ONLINE,
29 
30 	/*
31 	 * raid bdev is configuring, not all underlying bdevs are present.
32 	 * And can't be seen by upper layers.
33 	 */
34 	RAID_BDEV_STATE_CONFIGURING,
35 
36 	/*
37 	 * In offline state, raid bdev layer will complete all incoming commands without
38 	 * submitting to underlying base nvme bdevs
39 	 */
40 	RAID_BDEV_STATE_OFFLINE,
41 
42 	/* raid bdev state max, new states should be added before this */
43 	RAID_BDEV_STATE_MAX
44 };
45 
46 typedef void (*raid_bdev_remove_base_bdev_cb)(void *ctx, int status);
47 
48 /*
49  * raid_base_bdev_info contains information for the base bdevs which are part of some
50  * raid. This structure contains the per base bdev information. Whatever is
51  * required per base device for raid bdev will be kept here
52  */
53 struct raid_base_bdev_info {
54 	/* The raid bdev that this base bdev belongs to */
55 	struct raid_bdev	*raid_bdev;
56 
57 	/* name of the bdev */
58 	char			*name;
59 
60 	/* pointer to base bdev descriptor opened by raid bdev */
61 	struct spdk_bdev_desc	*desc;
62 
63 	/* offset in blocks from the start of the base bdev to the start of the data region */
64 	uint64_t		data_offset;
65 
66 	/* size in blocks of the base bdev's data region */
67 	uint64_t		data_size;
68 
69 	/*
70 	 * When underlying base device calls the hot plug function on drive removal,
71 	 * this flag will be set and later after doing some processing, base device
72 	 * descriptor will be closed
73 	 */
74 	bool			remove_scheduled;
75 
76 	/* callback for base bdev removal */
77 	raid_bdev_remove_base_bdev_cb remove_cb;
78 
79 	/* context of the callback */
80 	void			*remove_cb_ctx;
81 
82 	/* Hold the number of blocks to know how large the base bdev is resized. */
83 	uint64_t		blockcnt;
84 };
85 
86 /*
87  * raid_bdev_io is the context part of bdev_io. It contains the information
88  * related to bdev_io for a raid bdev
89  */
90 struct raid_bdev_io {
91 	/* The raid bdev associated with this IO */
92 	struct raid_bdev *raid_bdev;
93 
94 	/* WaitQ entry, used only in waitq logic */
95 	struct spdk_bdev_io_wait_entry	waitq_entry;
96 
97 	/* Context of the original channel for this IO */
98 	struct raid_bdev_io_channel	*raid_ch;
99 
100 	/* Used for tracking progress on io requests sent to member disks. */
101 	uint64_t			base_bdev_io_remaining;
102 	uint8_t				base_bdev_io_submitted;
103 	uint8_t				base_bdev_io_status;
104 
105 	/* Private data for the raid module */
106 	void				*module_private;
107 };
108 
109 /*
110  * raid_bdev is the single entity structure which contains SPDK block device
111  * and the information related to any raid bdev either configured or
112  * in configuring list. io device is created on this.
113  */
114 struct raid_bdev {
115 	/* raid bdev device, this will get registered in bdev layer */
116 	struct spdk_bdev		bdev;
117 
118 	/* link of raid bdev to link it to global raid bdev list */
119 	TAILQ_ENTRY(raid_bdev)		global_link;
120 
121 	/* array of base bdev info */
122 	struct raid_base_bdev_info	*base_bdev_info;
123 
124 	/* lock to protect the base bdev array */
125 	struct spdk_spinlock		base_bdev_lock;
126 
127 	/* strip size of raid bdev in blocks */
128 	uint32_t			strip_size;
129 
130 	/* strip size of raid bdev in KB */
131 	uint32_t			strip_size_kb;
132 
133 	/* strip size bit shift for optimized calculation */
134 	uint32_t			strip_size_shift;
135 
136 	/* block length bit shift for optimized calculation */
137 	uint32_t			blocklen_shift;
138 
139 	/* state of raid bdev */
140 	enum raid_bdev_state		state;
141 
142 	/* number of base bdevs comprising raid bdev  */
143 	uint8_t				num_base_bdevs;
144 
145 	/* number of base bdevs discovered */
146 	uint8_t				num_base_bdevs_discovered;
147 
148 	/* minimum number of viable base bdevs that are required by array to operate */
149 	uint8_t				min_base_bdevs_operational;
150 
151 	/* Raid Level of this raid bdev */
152 	enum raid_level			level;
153 
154 	/* Set to true if destroy of this raid bdev is started. */
155 	bool				destroy_started;
156 
157 	/* Set to true if superblock metadata is enabled on this raid bdev */
158 	bool				superblock_enabled;
159 
160 	/* Module for RAID-level specific operations */
161 	struct raid_bdev_module		*module;
162 
163 	/* Private data for the raid module */
164 	void				*module_private;
165 };
166 
167 #define RAID_FOR_EACH_BASE_BDEV(r, i) \
168 	for (i = r->base_bdev_info; i < r->base_bdev_info + r->num_base_bdevs; i++)
169 
170 /*
171  * raid_bdev_io_channel is the context of spdk_io_channel for raid bdev device. It
172  * contains the relationship of raid bdev io channel with base bdev io channels.
173  */
174 struct raid_bdev_io_channel {
175 	/* Array of IO channels of base bdevs */
176 	struct spdk_io_channel	**base_channel;
177 
178 	/* Number of IO channels */
179 	uint8_t			num_channels;
180 
181 	/* Private raid module IO channel */
182 	struct spdk_io_channel	*module_channel;
183 };
184 
185 /* TAIL head for raid bdev list */
186 TAILQ_HEAD(raid_all_tailq, raid_bdev);
187 
188 extern struct raid_all_tailq		g_raid_bdev_list;
189 
190 typedef void (*raid_bdev_destruct_cb)(void *cb_ctx, int rc);
191 
192 int raid_bdev_create(const char *name, uint32_t strip_size, uint8_t num_base_bdevs,
193 		     enum raid_level level, bool superblock, const struct spdk_uuid *uuid,
194 		     struct raid_bdev **raid_bdev_out);
195 void raid_bdev_delete(struct raid_bdev *raid_bdev, raid_bdev_destruct_cb cb_fn, void *cb_ctx);
196 int raid_bdev_add_base_device(struct raid_bdev *raid_bdev, const char *name, uint8_t slot);
197 struct raid_bdev *raid_bdev_find_by_name(const char *name);
198 enum raid_level raid_bdev_str_to_level(const char *str);
199 const char *raid_bdev_level_to_str(enum raid_level level);
200 enum raid_bdev_state raid_bdev_str_to_state(const char *str);
201 const char *raid_bdev_state_to_str(enum raid_bdev_state state);
202 void raid_bdev_write_info_json(struct raid_bdev *raid_bdev, struct spdk_json_write_ctx *w);
203 int raid_bdev_remove_base_bdev(struct spdk_bdev *base_bdev, raid_bdev_remove_base_bdev_cb cb_fn,
204 			       void *cb_ctx);
205 
206 /*
207  * RAID module descriptor
208  */
209 struct raid_bdev_module {
210 	/* RAID level implemented by this module */
211 	enum raid_level level;
212 
213 	/* Minimum required number of base bdevs. Must be > 0. */
214 	uint8_t base_bdevs_min;
215 
216 	/*
217 	 * RAID constraint. Determines number of base bdevs that can be removed
218 	 * without failing the array.
219 	 */
220 	struct {
221 		enum {
222 			CONSTRAINT_UNSET = 0,
223 			CONSTRAINT_MAX_BASE_BDEVS_REMOVED,
224 			CONSTRAINT_MIN_BASE_BDEVS_OPERATIONAL,
225 		} type;
226 		uint8_t value;
227 	} base_bdevs_constraint;
228 
229 	/* Set to true if this module supports memory domains. */
230 	bool memory_domains_supported;
231 
232 	/*
233 	 * Called when the raid is starting, right before changing the state to
234 	 * online and registering the bdev. Parameters of the bdev like blockcnt
235 	 * should be set here.
236 	 *
237 	 * Non-zero return value will abort the startup process.
238 	 */
239 	int (*start)(struct raid_bdev *raid_bdev);
240 
241 	/*
242 	 * Called when the raid is stopping, right before changing the state to
243 	 * offline and unregistering the bdev. Optional.
244 	 *
245 	 * The function should return false if it is asynchronous. Then, after
246 	 * the async operation has completed and the module is fully stopped
247 	 * raid_bdev_module_stop_done() must be called.
248 	 */
249 	bool (*stop)(struct raid_bdev *raid_bdev);
250 
251 	/* Handler for R/W requests */
252 	void (*submit_rw_request)(struct raid_bdev_io *raid_io);
253 
254 	/* Handler for requests without payload (flush, unmap). Optional. */
255 	void (*submit_null_payload_request)(struct raid_bdev_io *raid_io);
256 
257 	/*
258 	 * Called when the bdev's IO channel is created to get the module's private IO channel.
259 	 * Optional.
260 	 */
261 	struct spdk_io_channel *(*get_io_channel)(struct raid_bdev *raid_bdev);
262 
263 	/*
264 	 * Called when a base_bdev is resized to resize the raid if the condition
265 	 * is satisfied.
266 	 */
267 	void (*resize)(struct raid_bdev *raid_bdev);
268 
269 	TAILQ_ENTRY(raid_bdev_module) link;
270 };
271 
272 void raid_bdev_module_list_add(struct raid_bdev_module *raid_module);
273 
274 #define __RAID_MODULE_REGISTER(line) __RAID_MODULE_REGISTER_(line)
275 #define __RAID_MODULE_REGISTER_(line) raid_module_register_##line
276 
277 #define RAID_MODULE_REGISTER(_module)					\
278 __attribute__((constructor)) static void				\
279 __RAID_MODULE_REGISTER(__LINE__)(void)					\
280 {									\
281     raid_bdev_module_list_add(_module);					\
282 }
283 
284 bool raid_bdev_io_complete_part(struct raid_bdev_io *raid_io, uint64_t completed,
285 				enum spdk_bdev_io_status status);
286 void raid_bdev_queue_io_wait(struct raid_bdev_io *raid_io, struct spdk_bdev *bdev,
287 			     struct spdk_io_channel *ch, spdk_bdev_io_wait_cb cb_fn);
288 void raid_bdev_io_complete(struct raid_bdev_io *raid_io, enum spdk_bdev_io_status status);
289 void raid_bdev_module_stop_done(struct raid_bdev *raid_bdev);
290 
291 /**
292  * Raid bdev I/O read/write wrapper for spdk_bdev_readv_blocks_ext function.
293  */
294 static inline int
295 raid_bdev_readv_blocks_ext(struct raid_base_bdev_info *base_info, struct spdk_io_channel *ch,
296 			   struct iovec *iov, int iovcnt, uint64_t offset_blocks,
297 			   uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg,
298 			   struct spdk_bdev_ext_io_opts *opts)
299 {
300 	return spdk_bdev_readv_blocks_ext(base_info->desc, ch, iov, iovcnt,
301 					  base_info->data_offset + offset_blocks, num_blocks, cb, cb_arg, opts);
302 }
303 
304 /**
305  * Raid bdev I/O read/write wrapper for spdk_bdev_writev_blocks_ext function.
306  */
307 static inline int
308 raid_bdev_writev_blocks_ext(struct raid_base_bdev_info *base_info, struct spdk_io_channel *ch,
309 			    struct iovec *iov, int iovcnt, uint64_t offset_blocks,
310 			    uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg,
311 			    struct spdk_bdev_ext_io_opts *opts)
312 {
313 	return spdk_bdev_writev_blocks_ext(base_info->desc, ch, iov, iovcnt,
314 					   base_info->data_offset + offset_blocks, num_blocks, cb, cb_arg, opts);
315 }
316 
317 /**
318  * Raid bdev I/O read/write wrapper for spdk_bdev_unmap_blocks function.
319  */
320 static inline int
321 raid_bdev_unmap_blocks(struct raid_base_bdev_info *base_info, struct spdk_io_channel *ch,
322 		       uint64_t offset_blocks, uint64_t num_blocks,
323 		       spdk_bdev_io_completion_cb cb, void *cb_arg)
324 {
325 	return spdk_bdev_unmap_blocks(base_info->desc, ch, base_info->data_offset + offset_blocks,
326 				      num_blocks, cb, cb_arg);
327 }
328 
329 /**
330  * Raid bdev I/O read/write wrapper for spdk_bdev_flush_blocks function.
331  */
332 static inline int
333 raid_bdev_flush_blocks(struct raid_base_bdev_info *base_info, struct spdk_io_channel *ch,
334 		       uint64_t offset_blocks, uint64_t num_blocks,
335 		       spdk_bdev_io_completion_cb cb, void *cb_arg)
336 {
337 	return spdk_bdev_flush_blocks(base_info->desc, ch, base_info->data_offset + offset_blocks,
338 				      num_blocks, cb, cb_arg);
339 }
340 
341 #endif /* SPDK_BDEV_RAID_INTERNAL_H */
342