xref: /spdk/module/bdev/raid/bdev_raid.h (revision 5558f3f5022a22f65bfbb6e3a9fc67602f6d0ca8)
1488570ebSJim Harris /*   SPDX-License-Identifier: BSD-3-Clause
2a6dbe372Spaul luse  *   Copyright (C) 2018 Intel Corporation.
307fe6a43SSeth Howell  *   All rights reserved.
407fe6a43SSeth Howell  */
507fe6a43SSeth Howell 
607fe6a43SSeth Howell #ifndef SPDK_BDEV_RAID_INTERNAL_H
707fe6a43SSeth Howell #define SPDK_BDEV_RAID_INTERNAL_H
807fe6a43SSeth Howell 
907fe6a43SSeth Howell #include "spdk/bdev_module.h"
101db41324SKrzysztof Karas #include "spdk/uuid.h"
1107fe6a43SSeth Howell 
12deed7d2fSArtur Paszkiewicz #define RAID_BDEV_MIN_DATA_OFFSET_SIZE	(1024*1024) /* 1 MiB */
13deed7d2fSArtur Paszkiewicz 
14445e667fSArtur Paszkiewicz enum raid_level {
15445e667fSArtur Paszkiewicz 	INVALID_RAID_LEVEL	= -1,
16445e667fSArtur Paszkiewicz 	RAID0			= 0,
17208f21cdSKrzysztof Smolinski 	RAID1			= 1,
1883a4b155SArtur Paszkiewicz 	RAID5F			= 95, /* 0x5f */
1964eebbd1Syupeng 	CONCAT			= 99,
20445e667fSArtur Paszkiewicz };
2175cb691cSpaul luse 
2207fe6a43SSeth Howell /*
2307fe6a43SSeth Howell  * Raid state describes the state of the raid. This raid bdev can be either in
2407fe6a43SSeth Howell  * configured list or configuring list
2507fe6a43SSeth Howell  */
2607fe6a43SSeth Howell enum raid_bdev_state {
2707fe6a43SSeth Howell 	/* raid bdev is ready and is seen by upper layers */
2807fe6a43SSeth Howell 	RAID_BDEV_STATE_ONLINE,
2907fe6a43SSeth Howell 
3007fe6a43SSeth Howell 	/*
3107fe6a43SSeth Howell 	 * raid bdev is configuring, not all underlying bdevs are present.
3207fe6a43SSeth Howell 	 * And can't be seen by upper layers.
3307fe6a43SSeth Howell 	 */
3407fe6a43SSeth Howell 	RAID_BDEV_STATE_CONFIGURING,
3507fe6a43SSeth Howell 
3607fe6a43SSeth Howell 	/*
3707fe6a43SSeth Howell 	 * In offline state, raid bdev layer will complete all incoming commands without
3807fe6a43SSeth Howell 	 * submitting to underlying base nvme bdevs
3907fe6a43SSeth Howell 	 */
4007fe6a43SSeth Howell 	RAID_BDEV_STATE_OFFLINE,
4107fe6a43SSeth Howell 
4246ff15a6SArtur Paszkiewicz 	/* raid bdev state max, new states should be added before this */
4346ff15a6SArtur Paszkiewicz 	RAID_BDEV_STATE_MAX
4407fe6a43SSeth Howell };
4507fe6a43SSeth Howell 
4650d58ff3SArtur Paszkiewicz enum raid_process_type {
4750d58ff3SArtur Paszkiewicz 	RAID_PROCESS_NONE,
4850d58ff3SArtur Paszkiewicz 	RAID_PROCESS_REBUILD,
4950d58ff3SArtur Paszkiewicz 	RAID_PROCESS_MAX
5050d58ff3SArtur Paszkiewicz };
5150d58ff3SArtur Paszkiewicz 
52887b8ec4SArtur Paszkiewicz typedef void (*raid_base_bdev_cb)(void *ctx, int status);
5323850b03SKrzysztof Smolinski 
5407fe6a43SSeth Howell /*
5507fe6a43SSeth Howell  * raid_base_bdev_info contains information for the base bdevs which are part of some
5607fe6a43SSeth Howell  * raid. This structure contains the per base bdev information. Whatever is
5707fe6a43SSeth Howell  * required per base device for raid bdev will be kept here
5807fe6a43SSeth Howell  */
5907fe6a43SSeth Howell struct raid_base_bdev_info {
6026861b70SArtur Paszkiewicz 	/* The raid bdev that this base bdev belongs to */
6126861b70SArtur Paszkiewicz 	struct raid_bdev	*raid_bdev;
6226861b70SArtur Paszkiewicz 
63dccdd1e5SArtur Paszkiewicz 	/* name of the bdev */
64dccdd1e5SArtur Paszkiewicz 	char			*name;
65dccdd1e5SArtur Paszkiewicz 
668a6bb6a8SArtur Paszkiewicz 	/* uuid of the bdev */
678a6bb6a8SArtur Paszkiewicz 	struct spdk_uuid	uuid;
688a6bb6a8SArtur Paszkiewicz 
69255d0786SArtur Paszkiewicz 	/*
70255d0786SArtur Paszkiewicz 	 * Pointer to base bdev descriptor opened by raid bdev. This is NULL when the bdev for
71255d0786SArtur Paszkiewicz 	 * this slot is missing.
72255d0786SArtur Paszkiewicz 	 */
7307fe6a43SSeth Howell 	struct spdk_bdev_desc	*desc;
7407fe6a43SSeth Howell 
75deed7d2fSArtur Paszkiewicz 	/* offset in blocks from the start of the base bdev to the start of the data region */
76deed7d2fSArtur Paszkiewicz 	uint64_t		data_offset;
77deed7d2fSArtur Paszkiewicz 
78deed7d2fSArtur Paszkiewicz 	/* size in blocks of the base bdev's data region */
79deed7d2fSArtur Paszkiewicz 	uint64_t		data_size;
80deed7d2fSArtur Paszkiewicz 
8107fe6a43SSeth Howell 	/*
8207fe6a43SSeth Howell 	 * When underlying base device calls the hot plug function on drive removal,
8307fe6a43SSeth Howell 	 * this flag will be set and later after doing some processing, base device
8407fe6a43SSeth Howell 	 * descriptor will be closed
8507fe6a43SSeth Howell 	 */
8607fe6a43SSeth Howell 	bool			remove_scheduled;
87c9224e26SShuhei Matsumoto 
8823850b03SKrzysztof Smolinski 	/* callback for base bdev removal */
89887b8ec4SArtur Paszkiewicz 	raid_base_bdev_cb	remove_cb;
9023850b03SKrzysztof Smolinski 
9123850b03SKrzysztof Smolinski 	/* context of the callback */
9223850b03SKrzysztof Smolinski 	void			*remove_cb_ctx;
9323850b03SKrzysztof Smolinski 
94c9224e26SShuhei Matsumoto 	/* Hold the number of blocks to know how large the base bdev is resized. */
95c9224e26SShuhei Matsumoto 	uint64_t		blockcnt;
9677b8f7b6SArtur Paszkiewicz 
9777b8f7b6SArtur Paszkiewicz 	/* io channel for the app thread */
9877b8f7b6SArtur Paszkiewicz 	struct spdk_io_channel	*app_thread_ch;
999222ff97SArtur Paszkiewicz 
1009222ff97SArtur Paszkiewicz 	/* Set to true when base bdev has completed the configuration process */
1019222ff97SArtur Paszkiewicz 	bool			is_configured;
102887b8ec4SArtur Paszkiewicz 
103a6708553SArtur Paszkiewicz 	/* Set to true if this base bdev is the target of a background process */
104a6708553SArtur Paszkiewicz 	bool			is_process_target;
105a6708553SArtur Paszkiewicz 
106f1a03e33SArtur Paszkiewicz 	/* Set to true to indicate that the base bdev is being removed because of a failure */
107f1a03e33SArtur Paszkiewicz 	bool			is_failed;
108f1a03e33SArtur Paszkiewicz 
109887b8ec4SArtur Paszkiewicz 	/* callback for base bdev configuration */
110887b8ec4SArtur Paszkiewicz 	raid_base_bdev_cb	configure_cb;
111887b8ec4SArtur Paszkiewicz 
112887b8ec4SArtur Paszkiewicz 	/* context of the callback */
113887b8ec4SArtur Paszkiewicz 	void			*configure_cb_ctx;
11407fe6a43SSeth Howell };
11507fe6a43SSeth Howell 
1162b867a50SArtur Paszkiewicz struct raid_bdev_io;
1172b867a50SArtur Paszkiewicz typedef void (*raid_bdev_io_completion_cb)(struct raid_bdev_io *raid_io,
1182b867a50SArtur Paszkiewicz 		enum spdk_bdev_io_status status);
1192b867a50SArtur Paszkiewicz 
12007fe6a43SSeth Howell /*
12184bd6704Spaul luse  * raid_bdev_io is the context part of bdev_io. It contains the information
12284bd6704Spaul luse  * related to bdev_io for a raid bdev
12384bd6704Spaul luse  */
12484bd6704Spaul luse struct raid_bdev_io {
125cb8dbf17SArtur Paszkiewicz 	/* The raid bdev associated with this IO */
126cb8dbf17SArtur Paszkiewicz 	struct raid_bdev *raid_bdev;
127cb8dbf17SArtur Paszkiewicz 
128a4e1703eSArtur Paszkiewicz 	uint64_t offset_blocks;
129a4e1703eSArtur Paszkiewicz 	uint64_t num_blocks;
130a4e1703eSArtur Paszkiewicz 	struct iovec *iovs;
131a4e1703eSArtur Paszkiewicz 	int iovcnt;
13270401f58SArtur Paszkiewicz 	enum spdk_bdev_io_type type;
133a4e1703eSArtur Paszkiewicz 	struct spdk_memory_domain *memory_domain;
134a4e1703eSArtur Paszkiewicz 	void *memory_domain_ctx;
135a4e1703eSArtur Paszkiewicz 	void *md_buf;
136a4e1703eSArtur Paszkiewicz 
13784bd6704Spaul luse 	/* WaitQ entry, used only in waitq logic */
13884bd6704Spaul luse 	struct spdk_bdev_io_wait_entry	waitq_entry;
13984bd6704Spaul luse 
140152cce0dSArtur Paszkiewicz 	/* Context of the original channel for this IO */
141152cce0dSArtur Paszkiewicz 	struct raid_bdev_io_channel	*raid_ch;
14284bd6704Spaul luse 
14384bd6704Spaul luse 	/* Used for tracking progress on io requests sent to member disks. */
1442efe6d92SArtur Paszkiewicz 	uint64_t			base_bdev_io_remaining;
14584bd6704Spaul luse 	uint8_t				base_bdev_io_submitted;
146d8a22322SArtur Paszkiewicz 	enum spdk_bdev_io_status	base_bdev_io_status;
1479820a949SArtur Paszkiewicz 	/* This will be the raid_io completion status unless any base io's status is different. */
1489820a949SArtur Paszkiewicz 	enum spdk_bdev_io_status	base_bdev_io_status_default;
1497131bec4SArtur Paszkiewicz 
1507131bec4SArtur Paszkiewicz 	/* Private data for the raid module */
1517131bec4SArtur Paszkiewicz 	void				*module_private;
1522b867a50SArtur Paszkiewicz 
1532b867a50SArtur Paszkiewicz 	/* Custom completion callback. Overrides bdev_io completion if set. */
1542b867a50SArtur Paszkiewicz 	raid_bdev_io_completion_cb	completion_cb;
155aae5e04fSArtur Paszkiewicz 
156aae5e04fSArtur Paszkiewicz 	struct {
157aae5e04fSArtur Paszkiewicz 		uint64_t		offset;
158aae5e04fSArtur Paszkiewicz 		struct iovec		*iov;
159aae5e04fSArtur Paszkiewicz 		struct iovec		iov_copy;
160aae5e04fSArtur Paszkiewicz 	} split;
16184bd6704Spaul luse };
16284bd6704Spaul luse 
16350d58ff3SArtur Paszkiewicz struct raid_bdev_process_request {
16450d58ff3SArtur Paszkiewicz 	struct raid_bdev_process *process;
16550d58ff3SArtur Paszkiewicz 	struct raid_base_bdev_info *target;
16650d58ff3SArtur Paszkiewicz 	struct spdk_io_channel *target_ch;
16750d58ff3SArtur Paszkiewicz 	uint64_t offset_blocks;
16850d58ff3SArtur Paszkiewicz 	uint32_t num_blocks;
16950d58ff3SArtur Paszkiewicz 	struct iovec iov;
17050d58ff3SArtur Paszkiewicz 	void *md_buf;
17150d58ff3SArtur Paszkiewicz 	/* bdev_io is raid_io's driver_ctx - don't reorder them!
17250d58ff3SArtur Paszkiewicz 	 * These are needed for re-using raid module I/O functions for process I/O. */
17350d58ff3SArtur Paszkiewicz 	struct spdk_bdev_io bdev_io;
17450d58ff3SArtur Paszkiewicz 	struct raid_bdev_io raid_io;
17550d58ff3SArtur Paszkiewicz 	TAILQ_ENTRY(raid_bdev_process_request) link;
17650d58ff3SArtur Paszkiewicz };
17750d58ff3SArtur Paszkiewicz 
178*5558f3f5SArtur Paszkiewicz typedef void (*raid_bdev_configure_cb)(void *cb_ctx, int rc);
179*5558f3f5SArtur Paszkiewicz 
18084bd6704Spaul luse /*
18107fe6a43SSeth Howell  * raid_bdev is the single entity structure which contains SPDK block device
18207fe6a43SSeth Howell  * and the information related to any raid bdev either configured or
18307fe6a43SSeth Howell  * in configuring list. io device is created on this.
18407fe6a43SSeth Howell  */
18507fe6a43SSeth Howell struct raid_bdev {
18607fe6a43SSeth Howell 	/* raid bdev device, this will get registered in bdev layer */
18707fe6a43SSeth Howell 	struct spdk_bdev		bdev;
18807fe6a43SSeth Howell 
1899d2b7b41SArtur Paszkiewicz 	/* the raid bdev descriptor, opened for internal use */
1909d2b7b41SArtur Paszkiewicz 	struct spdk_bdev_desc		*self_desc;
1919d2b7b41SArtur Paszkiewicz 
19207fe6a43SSeth Howell 	/* link of raid bdev to link it to global raid bdev list */
19307fe6a43SSeth Howell 	TAILQ_ENTRY(raid_bdev)		global_link;
19407fe6a43SSeth Howell 
19507fe6a43SSeth Howell 	/* array of base bdev info */
19607fe6a43SSeth Howell 	struct raid_base_bdev_info	*base_bdev_info;
19707fe6a43SSeth Howell 
19807fe6a43SSeth Howell 	/* strip size of raid bdev in blocks */
19907fe6a43SSeth Howell 	uint32_t			strip_size;
20007fe6a43SSeth Howell 
20107fe6a43SSeth Howell 	/* strip size of raid bdev in KB */
20207fe6a43SSeth Howell 	uint32_t			strip_size_kb;
20307fe6a43SSeth Howell 
20407fe6a43SSeth Howell 	/* strip size bit shift for optimized calculation */
20507fe6a43SSeth Howell 	uint32_t			strip_size_shift;
20607fe6a43SSeth Howell 
20707fe6a43SSeth Howell 	/* state of raid bdev */
20807fe6a43SSeth Howell 	enum raid_bdev_state		state;
20907fe6a43SSeth Howell 
21007fe6a43SSeth Howell 	/* number of base bdevs comprising raid bdev  */
21107fe6a43SSeth Howell 	uint8_t				num_base_bdevs;
21207fe6a43SSeth Howell 
21307fe6a43SSeth Howell 	/* number of base bdevs discovered */
21407fe6a43SSeth Howell 	uint8_t				num_base_bdevs_discovered;
21507fe6a43SSeth Howell 
216255d0786SArtur Paszkiewicz 	/*
217255d0786SArtur Paszkiewicz 	 * Number of operational base bdevs, i.e. how many we know/expect to be working. This
218255d0786SArtur Paszkiewicz 	 * will be less than num_base_bdevs when starting a degraded array.
219255d0786SArtur Paszkiewicz 	 */
220255d0786SArtur Paszkiewicz 	uint8_t				num_base_bdevs_operational;
221255d0786SArtur Paszkiewicz 
222ad94094fSKrzysztof Smolinski 	/* minimum number of viable base bdevs that are required by array to operate */
223ad94094fSKrzysztof Smolinski 	uint8_t				min_base_bdevs_operational;
224ad94094fSKrzysztof Smolinski 
22507fe6a43SSeth Howell 	/* Raid Level of this raid bdev */
226445e667fSArtur Paszkiewicz 	enum raid_level			level;
22707fe6a43SSeth Howell 
22807fe6a43SSeth Howell 	/* Set to true if destroy of this raid bdev is started. */
22907fe6a43SSeth Howell 	bool				destroy_started;
230706029d5SArtur Paszkiewicz 
231706029d5SArtur Paszkiewicz 	/* Module for RAID-level specific operations */
232706029d5SArtur Paszkiewicz 	struct raid_bdev_module		*module;
23373763d40SArtur Paszkiewicz 
23473763d40SArtur Paszkiewicz 	/* Private data for the raid module */
23573763d40SArtur Paszkiewicz 	void				*module_private;
23677b8f7b6SArtur Paszkiewicz 
23777b8f7b6SArtur Paszkiewicz 	/* Superblock */
2383bb8256aSArtur Paszkiewicz 	bool				superblock_enabled;
23977b8f7b6SArtur Paszkiewicz 	struct raid_bdev_superblock	*sb;
24050d58ff3SArtur Paszkiewicz 
241fa8b2fd7SArtur Paszkiewicz 	/* Superblock buffer used for I/O */
242fa8b2fd7SArtur Paszkiewicz 	void				*sb_io_buf;
243fa8b2fd7SArtur Paszkiewicz 	uint32_t			sb_io_buf_size;
244fa8b2fd7SArtur Paszkiewicz 
24550d58ff3SArtur Paszkiewicz 	/* Raid bdev background process, e.g. rebuild */
24650d58ff3SArtur Paszkiewicz 	struct raid_bdev_process	*process;
247*5558f3f5SArtur Paszkiewicz 
248*5558f3f5SArtur Paszkiewicz 	/* Callback and context for raid_bdev configuration */
249*5558f3f5SArtur Paszkiewicz 	raid_bdev_configure_cb		configure_cb;
250*5558f3f5SArtur Paszkiewicz 	void				*configure_cb_ctx;
25107fe6a43SSeth Howell };
25207fe6a43SSeth Howell 
253a193dcb8SArtur Paszkiewicz #define RAID_FOR_EACH_BASE_BDEV(r, i) \
254a193dcb8SArtur Paszkiewicz 	for (i = r->base_bdev_info; i < r->base_bdev_info + r->num_base_bdevs; i++)
255a193dcb8SArtur Paszkiewicz 
2566e03e49bSArtur Paszkiewicz struct raid_bdev_io_channel;
25707fe6a43SSeth Howell 
25812ed89acSArtur Paszkiewicz /* TAIL head for raid bdev list */
25907fe6a43SSeth Howell TAILQ_HEAD(raid_all_tailq, raid_bdev);
26007fe6a43SSeth Howell 
26107fe6a43SSeth Howell extern struct raid_all_tailq		g_raid_bdev_list;
26207fe6a43SSeth Howell 
26307fe6a43SSeth Howell typedef void (*raid_bdev_destruct_cb)(void *cb_ctx, int rc);
26407fe6a43SSeth Howell 
265dccdd1e5SArtur Paszkiewicz int raid_bdev_create(const char *name, uint32_t strip_size, uint8_t num_base_bdevs,
266deed7d2fSArtur Paszkiewicz 		     enum raid_level level, bool superblock, const struct spdk_uuid *uuid,
267deed7d2fSArtur Paszkiewicz 		     struct raid_bdev **raid_bdev_out);
268dccdd1e5SArtur Paszkiewicz void raid_bdev_delete(struct raid_bdev *raid_bdev, raid_bdev_destruct_cb cb_fn, void *cb_ctx);
269e9af23deSArtur Paszkiewicz int raid_bdev_add_base_bdev(struct raid_bdev *raid_bdev, const char *name,
2706eed119eSArtur Paszkiewicz 			    raid_base_bdev_cb cb_fn, void *cb_ctx);
271dccdd1e5SArtur Paszkiewicz struct raid_bdev *raid_bdev_find_by_name(const char *name);
27246ff15a6SArtur Paszkiewicz enum raid_level raid_bdev_str_to_level(const char *str);
273445e667fSArtur Paszkiewicz const char *raid_bdev_level_to_str(enum raid_level level);
27446ff15a6SArtur Paszkiewicz enum raid_bdev_state raid_bdev_str_to_state(const char *str);
27546ff15a6SArtur Paszkiewicz const char *raid_bdev_state_to_str(enum raid_bdev_state state);
27650d58ff3SArtur Paszkiewicz const char *raid_bdev_process_to_str(enum raid_process_type value);
277ec6d94b6SArtur Paszkiewicz void raid_bdev_write_info_json(struct raid_bdev *raid_bdev, struct spdk_json_write_ctx *w);
278887b8ec4SArtur Paszkiewicz int raid_bdev_remove_base_bdev(struct spdk_bdev *base_bdev, raid_base_bdev_cb cb_fn, void *cb_ctx);
27907fe6a43SSeth Howell 
280706029d5SArtur Paszkiewicz /*
281706029d5SArtur Paszkiewicz  * RAID module descriptor
282706029d5SArtur Paszkiewicz  */
283706029d5SArtur Paszkiewicz struct raid_bdev_module {
284706029d5SArtur Paszkiewicz 	/* RAID level implemented by this module */
285706029d5SArtur Paszkiewicz 	enum raid_level level;
286706029d5SArtur Paszkiewicz 
2870a5194faSArtur Paszkiewicz 	/* Minimum required number of base bdevs. Must be > 0. */
2880a5194faSArtur Paszkiewicz 	uint8_t base_bdevs_min;
2890a5194faSArtur Paszkiewicz 
2900b4ca522SArtur Paszkiewicz 	/*
291ad94094fSKrzysztof Smolinski 	 * RAID constraint. Determines number of base bdevs that can be removed
292ad94094fSKrzysztof Smolinski 	 * without failing the array.
29373763d40SArtur Paszkiewicz 	 */
294ad94094fSKrzysztof Smolinski 	struct {
295ad94094fSKrzysztof Smolinski 		enum {
296ad94094fSKrzysztof Smolinski 			CONSTRAINT_UNSET = 0,
297ad94094fSKrzysztof Smolinski 			CONSTRAINT_MAX_BASE_BDEVS_REMOVED,
298ad94094fSKrzysztof Smolinski 			CONSTRAINT_MIN_BASE_BDEVS_OPERATIONAL,
299ad94094fSKrzysztof Smolinski 		} type;
300ad94094fSKrzysztof Smolinski 		uint8_t value;
301ad94094fSKrzysztof Smolinski 	} base_bdevs_constraint;
30273763d40SArtur Paszkiewicz 
30372672d49SArtur Paszkiewicz 	/* Set to true if this module supports memory domains. */
30472672d49SArtur Paszkiewicz 	bool memory_domains_supported;
30572672d49SArtur Paszkiewicz 
306192db8deSSlawomir Ptak 	/* Set to true if this module supports DIF/DIX */
307192db8deSSlawomir Ptak 	bool dif_supported;
308192db8deSSlawomir Ptak 
30973763d40SArtur Paszkiewicz 	/*
3100b4ca522SArtur Paszkiewicz 	 * Called when the raid is starting, right before changing the state to
3110b4ca522SArtur Paszkiewicz 	 * online and registering the bdev. Parameters of the bdev like blockcnt
3120b4ca522SArtur Paszkiewicz 	 * should be set here.
3130b4ca522SArtur Paszkiewicz 	 *
3140b4ca522SArtur Paszkiewicz 	 * Non-zero return value will abort the startup process.
3150b4ca522SArtur Paszkiewicz 	 */
3160b4ca522SArtur Paszkiewicz 	int (*start)(struct raid_bdev *raid_bdev);
3170b4ca522SArtur Paszkiewicz 
3180b4ca522SArtur Paszkiewicz 	/*
3190b4ca522SArtur Paszkiewicz 	 * Called when the raid is stopping, right before changing the state to
3200b4ca522SArtur Paszkiewicz 	 * offline and unregistering the bdev. Optional.
3219cf1ab5bSArtur Paszkiewicz 	 *
3229cf1ab5bSArtur Paszkiewicz 	 * The function should return false if it is asynchronous. Then, after
3239cf1ab5bSArtur Paszkiewicz 	 * the async operation has completed and the module is fully stopped
3249cf1ab5bSArtur Paszkiewicz 	 * raid_bdev_module_stop_done() must be called.
3250b4ca522SArtur Paszkiewicz 	 */
3269cf1ab5bSArtur Paszkiewicz 	bool (*stop)(struct raid_bdev *raid_bdev);
3270b4ca522SArtur Paszkiewicz 
328182278c0SArtur Paszkiewicz 	/* Handler for R/W requests */
329182278c0SArtur Paszkiewicz 	void (*submit_rw_request)(struct raid_bdev_io *raid_io);
330182278c0SArtur Paszkiewicz 
3311d3ca5c4SArtur Paszkiewicz 	/* Handler for requests without payload (flush, unmap). Optional. */
332182278c0SArtur Paszkiewicz 	void (*submit_null_payload_request)(struct raid_bdev_io *raid_io);
333182278c0SArtur Paszkiewicz 
334c89e2008SArtur Paszkiewicz 	/*
335c89e2008SArtur Paszkiewicz 	 * Called when the bdev's IO channel is created to get the module's private IO channel.
336c89e2008SArtur Paszkiewicz 	 * Optional.
337c89e2008SArtur Paszkiewicz 	 */
338c89e2008SArtur Paszkiewicz 	struct spdk_io_channel *(*get_io_channel)(struct raid_bdev *raid_bdev);
339c89e2008SArtur Paszkiewicz 
340c9224e26SShuhei Matsumoto 	/*
341c9224e26SShuhei Matsumoto 	 * Called when a base_bdev is resized to resize the raid if the condition
342ceb2ad0cSArtur Paszkiewicz 	 * is satisfied. Optional.
343ceb2ad0cSArtur Paszkiewicz 	 *
344ceb2ad0cSArtur Paszkiewicz 	 * Returns true if the resize was performed.
345c9224e26SShuhei Matsumoto 	 */
346ceb2ad0cSArtur Paszkiewicz 	bool (*resize)(struct raid_bdev *raid_bdev);
347c9224e26SShuhei Matsumoto 
34850d58ff3SArtur Paszkiewicz 	/* Handler for raid process requests. Required for raid modules with redundancy. */
34950d58ff3SArtur Paszkiewicz 	int (*submit_process_request)(struct raid_bdev_process_request *process_req,
35050d58ff3SArtur Paszkiewicz 				      struct raid_bdev_io_channel *raid_ch);
35150d58ff3SArtur Paszkiewicz 
352706029d5SArtur Paszkiewicz 	TAILQ_ENTRY(raid_bdev_module) link;
353706029d5SArtur Paszkiewicz };
354706029d5SArtur Paszkiewicz 
355706029d5SArtur Paszkiewicz void raid_bdev_module_list_add(struct raid_bdev_module *raid_module);
356706029d5SArtur Paszkiewicz 
357706029d5SArtur Paszkiewicz #define __RAID_MODULE_REGISTER(line) __RAID_MODULE_REGISTER_(line)
358706029d5SArtur Paszkiewicz #define __RAID_MODULE_REGISTER_(line) raid_module_register_##line
359706029d5SArtur Paszkiewicz 
360706029d5SArtur Paszkiewicz #define RAID_MODULE_REGISTER(_module)					\
361706029d5SArtur Paszkiewicz __attribute__((constructor)) static void				\
362706029d5SArtur Paszkiewicz __RAID_MODULE_REGISTER(__LINE__)(void)					\
363706029d5SArtur Paszkiewicz {									\
364706029d5SArtur Paszkiewicz     raid_bdev_module_list_add(_module);					\
365706029d5SArtur Paszkiewicz }
366706029d5SArtur Paszkiewicz 
3678dd1cd21SBen Walker bool raid_bdev_io_complete_part(struct raid_bdev_io *raid_io, uint64_t completed,
3682efe6d92SArtur Paszkiewicz 				enum spdk_bdev_io_status status);
3698dd1cd21SBen Walker void raid_bdev_queue_io_wait(struct raid_bdev_io *raid_io, struct spdk_bdev *bdev,
370ae70d6a4SArtur Paszkiewicz 			     struct spdk_io_channel *ch, spdk_bdev_io_wait_cb cb_fn);
3718dd1cd21SBen Walker void raid_bdev_io_complete(struct raid_bdev_io *raid_io, enum spdk_bdev_io_status status);
3729cf1ab5bSArtur Paszkiewicz void raid_bdev_module_stop_done(struct raid_bdev *raid_bdev);
37350d58ff3SArtur Paszkiewicz struct spdk_io_channel *raid_bdev_channel_get_base_channel(struct raid_bdev_io_channel *raid_ch,
37450d58ff3SArtur Paszkiewicz 		uint8_t idx);
37550d58ff3SArtur Paszkiewicz void *raid_bdev_channel_get_module_ctx(struct raid_bdev_io_channel *raid_ch);
37635a054dbSArtur Paszkiewicz struct raid_base_bdev_info *raid_bdev_channel_get_base_info(struct raid_bdev_io_channel *raid_ch,
37735a054dbSArtur Paszkiewicz 		struct spdk_bdev *base_bdev);
37850d58ff3SArtur Paszkiewicz void raid_bdev_process_request_complete(struct raid_bdev_process_request *process_req, int status);
379a4e1703eSArtur Paszkiewicz void raid_bdev_io_init(struct raid_bdev_io *raid_io, struct raid_bdev_io_channel *raid_ch,
380a4e1703eSArtur Paszkiewicz 		       enum spdk_bdev_io_type type, uint64_t offset_blocks,
381a4e1703eSArtur Paszkiewicz 		       uint64_t num_blocks, struct iovec *iovs, int iovcnt, void *md_buf,
382a4e1703eSArtur Paszkiewicz 		       struct spdk_memory_domain *memory_domain, void *memory_domain_ctx);
383f1a03e33SArtur Paszkiewicz void raid_bdev_fail_base_bdev(struct raid_base_bdev_info *base_info);
384fc9ca1d0SArtur Paszkiewicz 
385fd92e702SArtur Paszkiewicz static inline uint8_t
386fd92e702SArtur Paszkiewicz raid_bdev_base_bdev_slot(struct raid_base_bdev_info *base_info)
387fd92e702SArtur Paszkiewicz {
388fd92e702SArtur Paszkiewicz 	return base_info - base_info->raid_bdev->base_bdev_info;
389fd92e702SArtur Paszkiewicz }
390fd92e702SArtur Paszkiewicz 
3919820a949SArtur Paszkiewicz static inline void
3929820a949SArtur Paszkiewicz raid_bdev_io_set_default_status(struct raid_bdev_io *raid_io, enum spdk_bdev_io_status status)
3939820a949SArtur Paszkiewicz {
3949820a949SArtur Paszkiewicz 	assert(raid_io->base_bdev_io_submitted == 0);
3959820a949SArtur Paszkiewicz 	raid_io->base_bdev_io_status = status;
3969820a949SArtur Paszkiewicz 	raid_io->base_bdev_io_status_default = status;
3979820a949SArtur Paszkiewicz }
3989820a949SArtur Paszkiewicz 
3991b9c5629SArtur Paszkiewicz int raid_bdev_remap_dix_reftag(void *md_buf, uint64_t num_blocks,
4001b9c5629SArtur Paszkiewicz 			       struct spdk_bdev *bdev, uint32_t remapped_offset);
401192db8deSSlawomir Ptak int raid_bdev_verify_dix_reftag(struct iovec *iovs, int iovcnt, void *md_buf,
402192db8deSSlawomir Ptak 				uint64_t num_blocks, struct spdk_bdev *bdev, uint32_t offset_blocks);
403192db8deSSlawomir Ptak 
404614ca6d2SArtur Paszkiewicz /**
405614ca6d2SArtur Paszkiewicz  * Raid bdev I/O read/write wrapper for spdk_bdev_readv_blocks_ext function.
406614ca6d2SArtur Paszkiewicz  */
4071b9c5629SArtur Paszkiewicz static inline int
4081b9c5629SArtur Paszkiewicz raid_bdev_readv_blocks_ext(struct raid_base_bdev_info *base_info, struct spdk_io_channel *ch,
409614ca6d2SArtur Paszkiewicz 			   struct iovec *iov, int iovcnt, uint64_t offset_blocks,
410614ca6d2SArtur Paszkiewicz 			   uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg,
4111b9c5629SArtur Paszkiewicz 			   struct spdk_bdev_ext_io_opts *opts)
4121b9c5629SArtur Paszkiewicz {
4131b9c5629SArtur Paszkiewicz 	return spdk_bdev_readv_blocks_ext(base_info->desc, ch, iov, iovcnt,
4141b9c5629SArtur Paszkiewicz 					  base_info->data_offset + offset_blocks, num_blocks, cb, cb_arg, opts);
4151b9c5629SArtur Paszkiewicz }
416614ca6d2SArtur Paszkiewicz 
417614ca6d2SArtur Paszkiewicz /**
418614ca6d2SArtur Paszkiewicz  * Raid bdev I/O read/write wrapper for spdk_bdev_writev_blocks_ext function.
419614ca6d2SArtur Paszkiewicz  */
4201b9c5629SArtur Paszkiewicz static inline int
4211b9c5629SArtur Paszkiewicz raid_bdev_writev_blocks_ext(struct raid_base_bdev_info *base_info, struct spdk_io_channel *ch,
422614ca6d2SArtur Paszkiewicz 			    struct iovec *iov, int iovcnt, uint64_t offset_blocks,
423614ca6d2SArtur Paszkiewicz 			    uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg,
4241b9c5629SArtur Paszkiewicz 			    struct spdk_bdev_ext_io_opts *opts)
4251b9c5629SArtur Paszkiewicz {
4261b9c5629SArtur Paszkiewicz 	int rc;
4271b9c5629SArtur Paszkiewicz 	uint64_t remapped_offset_blocks = base_info->data_offset + offset_blocks;
4281b9c5629SArtur Paszkiewicz 
4291b9c5629SArtur Paszkiewicz 	if (spdk_unlikely(spdk_bdev_get_dif_type(&base_info->raid_bdev->bdev) != SPDK_DIF_DISABLE &&
4301b9c5629SArtur Paszkiewicz 			  (base_info->raid_bdev->bdev.dif_check_flags & SPDK_DIF_FLAGS_REFTAG_CHECK))) {
4311b9c5629SArtur Paszkiewicz 		rc = raid_bdev_remap_dix_reftag(opts->metadata, num_blocks, &base_info->raid_bdev->bdev,
4321b9c5629SArtur Paszkiewicz 						remapped_offset_blocks);
4331b9c5629SArtur Paszkiewicz 		if (rc != 0) {
4341b9c5629SArtur Paszkiewicz 			return rc;
4351b9c5629SArtur Paszkiewicz 		}
4361b9c5629SArtur Paszkiewicz 	}
4371b9c5629SArtur Paszkiewicz 
4381b9c5629SArtur Paszkiewicz 	return spdk_bdev_writev_blocks_ext(base_info->desc, ch, iov, iovcnt,
4391b9c5629SArtur Paszkiewicz 					   remapped_offset_blocks, num_blocks, cb, cb_arg, opts);
4401b9c5629SArtur Paszkiewicz }
441614ca6d2SArtur Paszkiewicz 
442614ca6d2SArtur Paszkiewicz /**
443614ca6d2SArtur Paszkiewicz  * Raid bdev I/O read/write wrapper for spdk_bdev_unmap_blocks function.
444614ca6d2SArtur Paszkiewicz  */
445614ca6d2SArtur Paszkiewicz static inline int
446614ca6d2SArtur Paszkiewicz raid_bdev_unmap_blocks(struct raid_base_bdev_info *base_info, struct spdk_io_channel *ch,
447614ca6d2SArtur Paszkiewicz 		       uint64_t offset_blocks, uint64_t num_blocks,
448614ca6d2SArtur Paszkiewicz 		       spdk_bdev_io_completion_cb cb, void *cb_arg)
449614ca6d2SArtur Paszkiewicz {
450deed7d2fSArtur Paszkiewicz 	return spdk_bdev_unmap_blocks(base_info->desc, ch, base_info->data_offset + offset_blocks,
451deed7d2fSArtur Paszkiewicz 				      num_blocks, cb, cb_arg);
452614ca6d2SArtur Paszkiewicz }
453614ca6d2SArtur Paszkiewicz 
454614ca6d2SArtur Paszkiewicz /**
455614ca6d2SArtur Paszkiewicz  * Raid bdev I/O read/write wrapper for spdk_bdev_flush_blocks function.
456614ca6d2SArtur Paszkiewicz  */
457614ca6d2SArtur Paszkiewicz static inline int
458614ca6d2SArtur Paszkiewicz raid_bdev_flush_blocks(struct raid_base_bdev_info *base_info, struct spdk_io_channel *ch,
459614ca6d2SArtur Paszkiewicz 		       uint64_t offset_blocks, uint64_t num_blocks,
460614ca6d2SArtur Paszkiewicz 		       spdk_bdev_io_completion_cb cb, void *cb_arg)
461614ca6d2SArtur Paszkiewicz {
462deed7d2fSArtur Paszkiewicz 	return spdk_bdev_flush_blocks(base_info->desc, ch, base_info->data_offset + offset_blocks,
463deed7d2fSArtur Paszkiewicz 				      num_blocks, cb, cb_arg);
464614ca6d2SArtur Paszkiewicz }
465614ca6d2SArtur Paszkiewicz 
46677b8f7b6SArtur Paszkiewicz /*
46777b8f7b6SArtur Paszkiewicz  * Definitions related to raid bdev superblock
46877b8f7b6SArtur Paszkiewicz  */
46977b8f7b6SArtur Paszkiewicz 
47077b8f7b6SArtur Paszkiewicz #define RAID_BDEV_SB_VERSION_MAJOR	1
47177b8f7b6SArtur Paszkiewicz #define RAID_BDEV_SB_VERSION_MINOR	0
47277b8f7b6SArtur Paszkiewicz 
47377b8f7b6SArtur Paszkiewicz #define RAID_BDEV_SB_NAME_SIZE		64
47477b8f7b6SArtur Paszkiewicz 
47577b8f7b6SArtur Paszkiewicz enum raid_bdev_sb_base_bdev_state {
47677b8f7b6SArtur Paszkiewicz 	RAID_SB_BASE_BDEV_MISSING	= 0,
47777b8f7b6SArtur Paszkiewicz 	RAID_SB_BASE_BDEV_CONFIGURED	= 1,
47877b8f7b6SArtur Paszkiewicz 	RAID_SB_BASE_BDEV_FAILED	= 2,
47977b8f7b6SArtur Paszkiewicz 	RAID_SB_BASE_BDEV_SPARE		= 3,
48077b8f7b6SArtur Paszkiewicz };
48177b8f7b6SArtur Paszkiewicz 
48277b8f7b6SArtur Paszkiewicz struct raid_bdev_sb_base_bdev {
48377b8f7b6SArtur Paszkiewicz 	/* uuid of the base bdev */
48477b8f7b6SArtur Paszkiewicz 	struct spdk_uuid	uuid;
48577b8f7b6SArtur Paszkiewicz 	/* offset in blocks from base device start to the start of raid data area */
48677b8f7b6SArtur Paszkiewicz 	uint64_t		data_offset;
48777b8f7b6SArtur Paszkiewicz 	/* size in blocks of the base device raid data area */
48877b8f7b6SArtur Paszkiewicz 	uint64_t		data_size;
48977b8f7b6SArtur Paszkiewicz 	/* state of the base bdev */
49077b8f7b6SArtur Paszkiewicz 	uint32_t		state;
49177b8f7b6SArtur Paszkiewicz 	/* feature/status flags */
49277b8f7b6SArtur Paszkiewicz 	uint32_t		flags;
49377b8f7b6SArtur Paszkiewicz 	/* slot number of this base bdev in the raid */
49477b8f7b6SArtur Paszkiewicz 	uint8_t			slot;
49577b8f7b6SArtur Paszkiewicz 
49677b8f7b6SArtur Paszkiewicz 	uint8_t			reserved[23];
49777b8f7b6SArtur Paszkiewicz };
49877b8f7b6SArtur Paszkiewicz SPDK_STATIC_ASSERT(sizeof(struct raid_bdev_sb_base_bdev) == 64, "incorrect size");
49977b8f7b6SArtur Paszkiewicz 
50077b8f7b6SArtur Paszkiewicz struct raid_bdev_superblock {
50177b8f7b6SArtur Paszkiewicz #define RAID_BDEV_SB_SIG "SPDKRAID"
50277b8f7b6SArtur Paszkiewicz 	uint8_t			signature[8];
50377b8f7b6SArtur Paszkiewicz 	struct {
50477b8f7b6SArtur Paszkiewicz 		/* incremented when a breaking change in the superblock structure is made */
50577b8f7b6SArtur Paszkiewicz 		uint16_t	major;
50677b8f7b6SArtur Paszkiewicz 		/* incremented for changes in the superblock that are backward compatible */
50777b8f7b6SArtur Paszkiewicz 		uint16_t	minor;
50877b8f7b6SArtur Paszkiewicz 	} version;
50977b8f7b6SArtur Paszkiewicz 	/* length in bytes of the entire superblock */
51077b8f7b6SArtur Paszkiewicz 	uint32_t		length;
51177b8f7b6SArtur Paszkiewicz 	/* crc32c checksum of the entire superblock */
51277b8f7b6SArtur Paszkiewicz 	uint32_t		crc;
51377b8f7b6SArtur Paszkiewicz 	/* feature/status flags */
51477b8f7b6SArtur Paszkiewicz 	uint32_t		flags;
51577b8f7b6SArtur Paszkiewicz 	/* unique id of the raid bdev */
51677b8f7b6SArtur Paszkiewicz 	struct spdk_uuid	uuid;
51777b8f7b6SArtur Paszkiewicz 	/* name of the raid bdev */
51877b8f7b6SArtur Paszkiewicz 	uint8_t			name[RAID_BDEV_SB_NAME_SIZE];
51977b8f7b6SArtur Paszkiewicz 	/* size of the raid bdev in blocks */
52077b8f7b6SArtur Paszkiewicz 	uint64_t		raid_size;
52177b8f7b6SArtur Paszkiewicz 	/* the raid bdev block size - must be the same for all base bdevs */
52277b8f7b6SArtur Paszkiewicz 	uint32_t		block_size;
52377b8f7b6SArtur Paszkiewicz 	/* the raid level */
52477b8f7b6SArtur Paszkiewicz 	uint32_t		level;
52577b8f7b6SArtur Paszkiewicz 	/* strip (chunk) size in blocks */
52677b8f7b6SArtur Paszkiewicz 	uint32_t		strip_size;
52777b8f7b6SArtur Paszkiewicz 	/* state of the raid */
52877b8f7b6SArtur Paszkiewicz 	uint32_t		state;
52977b8f7b6SArtur Paszkiewicz 	/* sequence number, incremented on every superblock update */
53077b8f7b6SArtur Paszkiewicz 	uint64_t		seq_number;
53177b8f7b6SArtur Paszkiewicz 	/* number of raid base devices */
53277b8f7b6SArtur Paszkiewicz 	uint8_t			num_base_bdevs;
53377b8f7b6SArtur Paszkiewicz 
53477b8f7b6SArtur Paszkiewicz 	uint8_t			reserved[118];
53577b8f7b6SArtur Paszkiewicz 
53677b8f7b6SArtur Paszkiewicz 	/* size of the base bdevs array */
53777b8f7b6SArtur Paszkiewicz 	uint8_t			base_bdevs_size;
53877b8f7b6SArtur Paszkiewicz 	/* array of base bdev descriptors */
53977b8f7b6SArtur Paszkiewicz 	struct raid_bdev_sb_base_bdev base_bdevs[];
54077b8f7b6SArtur Paszkiewicz };
54177b8f7b6SArtur Paszkiewicz SPDK_STATIC_ASSERT(sizeof(struct raid_bdev_superblock) == 256, "incorrect size");
54277b8f7b6SArtur Paszkiewicz 
54346adadcfSArtur Paszkiewicz #define RAID_BDEV_SB_MAX_LENGTH (sizeof(struct raid_bdev_superblock) + UINT8_MAX * sizeof(struct raid_bdev_sb_base_bdev))
54477b8f7b6SArtur Paszkiewicz 
54577b8f7b6SArtur Paszkiewicz SPDK_STATIC_ASSERT(RAID_BDEV_SB_MAX_LENGTH < RAID_BDEV_MIN_DATA_OFFSET_SIZE,
54677b8f7b6SArtur Paszkiewicz 		   "Incorrect min data offset");
54777b8f7b6SArtur Paszkiewicz 
54877b8f7b6SArtur Paszkiewicz typedef void (*raid_bdev_write_sb_cb)(int status, struct raid_bdev *raid_bdev, void *ctx);
5498a6bb6a8SArtur Paszkiewicz typedef void (*raid_bdev_load_sb_cb)(const struct raid_bdev_superblock *sb, int status, void *ctx);
55077b8f7b6SArtur Paszkiewicz 
551c25b80e2SArtur Paszkiewicz int raid_bdev_alloc_superblock(struct raid_bdev *raid_bdev, uint32_t block_size);
552c25b80e2SArtur Paszkiewicz void raid_bdev_free_superblock(struct raid_bdev *raid_bdev);
55377b8f7b6SArtur Paszkiewicz void raid_bdev_init_superblock(struct raid_bdev *raid_bdev);
55477b8f7b6SArtur Paszkiewicz void raid_bdev_write_superblock(struct raid_bdev *raid_bdev, raid_bdev_write_sb_cb cb,
55577b8f7b6SArtur Paszkiewicz 				void *cb_ctx);
5568a6bb6a8SArtur Paszkiewicz int raid_bdev_load_base_bdev_superblock(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
5578a6bb6a8SArtur Paszkiewicz 					raid_bdev_load_sb_cb cb, void *cb_ctx);
55877b8f7b6SArtur Paszkiewicz 
559f39350beSArtur Paszkiewicz struct spdk_raid_bdev_opts {
560f39350beSArtur Paszkiewicz 	/* Size of the background process window in KiB */
561f39350beSArtur Paszkiewicz 	uint32_t process_window_size_kb;
56289fd1730Sxupeng9 	/* Maximum bandwidth in MiB to process per second */
56389fd1730Sxupeng9 	uint32_t process_max_bandwidth_mb_sec;
564f39350beSArtur Paszkiewicz };
565f39350beSArtur Paszkiewicz 
566f39350beSArtur Paszkiewicz void raid_bdev_get_opts(struct spdk_raid_bdev_opts *opts);
567f39350beSArtur Paszkiewicz int raid_bdev_set_opts(const struct spdk_raid_bdev_opts *opts);
568f39350beSArtur Paszkiewicz 
56907fe6a43SSeth Howell #endif /* SPDK_BDEV_RAID_INTERNAL_H */
570