1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2018 Intel Corporation. 3 * All rights reserved. 4 */ 5 6 #ifndef SPDK_BDEV_RAID_INTERNAL_H 7 #define SPDK_BDEV_RAID_INTERNAL_H 8 9 #include "spdk/bdev_module.h" 10 #include "spdk/uuid.h" 11 12 #define RAID_BDEV_MIN_DATA_OFFSET_SIZE (1024*1024) /* 1 MiB */ 13 14 enum raid_level { 15 INVALID_RAID_LEVEL = -1, 16 RAID0 = 0, 17 RAID1 = 1, 18 RAID5F = 95, /* 0x5f */ 19 CONCAT = 99, 20 }; 21 22 /* 23 * Raid state describes the state of the raid. This raid bdev can be either in 24 * configured list or configuring list 25 */ 26 enum raid_bdev_state { 27 /* raid bdev is ready and is seen by upper layers */ 28 RAID_BDEV_STATE_ONLINE, 29 30 /* 31 * raid bdev is configuring, not all underlying bdevs are present. 32 * And can't be seen by upper layers. 33 */ 34 RAID_BDEV_STATE_CONFIGURING, 35 36 /* 37 * In offline state, raid bdev layer will complete all incoming commands without 38 * submitting to underlying base nvme bdevs 39 */ 40 RAID_BDEV_STATE_OFFLINE, 41 42 /* raid bdev state max, new states should be added before this */ 43 RAID_BDEV_STATE_MAX 44 }; 45 46 typedef void (*raid_bdev_remove_base_bdev_cb)(void *ctx, int status); 47 48 /* 49 * raid_base_bdev_info contains information for the base bdevs which are part of some 50 * raid. This structure contains the per base bdev information. Whatever is 51 * required per base device for raid bdev will be kept here 52 */ 53 struct raid_base_bdev_info { 54 /* The raid bdev that this base bdev belongs to */ 55 struct raid_bdev *raid_bdev; 56 57 /* name of the bdev */ 58 char *name; 59 60 /* pointer to base bdev descriptor opened by raid bdev */ 61 struct spdk_bdev_desc *desc; 62 63 /* offset in blocks from the start of the base bdev to the start of the data region */ 64 uint64_t data_offset; 65 66 /* size in blocks of the base bdev's data region */ 67 uint64_t data_size; 68 69 /* 70 * When underlying base device calls the hot plug function on drive removal, 71 * this flag will be set and later after doing some processing, base device 72 * descriptor will be closed 73 */ 74 bool remove_scheduled; 75 76 /* callback for base bdev removal */ 77 raid_bdev_remove_base_bdev_cb remove_cb; 78 79 /* context of the callback */ 80 void *remove_cb_ctx; 81 82 /* Hold the number of blocks to know how large the base bdev is resized. */ 83 uint64_t blockcnt; 84 }; 85 86 /* 87 * raid_bdev_io is the context part of bdev_io. It contains the information 88 * related to bdev_io for a raid bdev 89 */ 90 struct raid_bdev_io { 91 /* The raid bdev associated with this IO */ 92 struct raid_bdev *raid_bdev; 93 94 /* WaitQ entry, used only in waitq logic */ 95 struct spdk_bdev_io_wait_entry waitq_entry; 96 97 /* Context of the original channel for this IO */ 98 struct raid_bdev_io_channel *raid_ch; 99 100 /* Used for tracking progress on io requests sent to member disks. */ 101 uint64_t base_bdev_io_remaining; 102 uint8_t base_bdev_io_submitted; 103 uint8_t base_bdev_io_status; 104 105 /* Private data for the raid module */ 106 void *module_private; 107 }; 108 109 /* 110 * raid_bdev is the single entity structure which contains SPDK block device 111 * and the information related to any raid bdev either configured or 112 * in configuring list. io device is created on this. 113 */ 114 struct raid_bdev { 115 /* raid bdev device, this will get registered in bdev layer */ 116 struct spdk_bdev bdev; 117 118 /* link of raid bdev to link it to global raid bdev list */ 119 TAILQ_ENTRY(raid_bdev) global_link; 120 121 /* array of base bdev info */ 122 struct raid_base_bdev_info *base_bdev_info; 123 124 /* lock to protect the base bdev array */ 125 struct spdk_spinlock base_bdev_lock; 126 127 /* strip size of raid bdev in blocks */ 128 uint32_t strip_size; 129 130 /* strip size of raid bdev in KB */ 131 uint32_t strip_size_kb; 132 133 /* strip size bit shift for optimized calculation */ 134 uint32_t strip_size_shift; 135 136 /* block length bit shift for optimized calculation */ 137 uint32_t blocklen_shift; 138 139 /* state of raid bdev */ 140 enum raid_bdev_state state; 141 142 /* number of base bdevs comprising raid bdev */ 143 uint8_t num_base_bdevs; 144 145 /* number of base bdevs discovered */ 146 uint8_t num_base_bdevs_discovered; 147 148 /* minimum number of viable base bdevs that are required by array to operate */ 149 uint8_t min_base_bdevs_operational; 150 151 /* Raid Level of this raid bdev */ 152 enum raid_level level; 153 154 /* Set to true if destroy of this raid bdev is started. */ 155 bool destroy_started; 156 157 /* Set to true if superblock metadata is enabled on this raid bdev */ 158 bool superblock_enabled; 159 160 /* Module for RAID-level specific operations */ 161 struct raid_bdev_module *module; 162 163 /* Private data for the raid module */ 164 void *module_private; 165 }; 166 167 #define RAID_FOR_EACH_BASE_BDEV(r, i) \ 168 for (i = r->base_bdev_info; i < r->base_bdev_info + r->num_base_bdevs; i++) 169 170 /* 171 * raid_bdev_io_channel is the context of spdk_io_channel for raid bdev device. It 172 * contains the relationship of raid bdev io channel with base bdev io channels. 173 */ 174 struct raid_bdev_io_channel { 175 /* Array of IO channels of base bdevs */ 176 struct spdk_io_channel **base_channel; 177 178 /* Number of IO channels */ 179 uint8_t num_channels; 180 181 /* Private raid module IO channel */ 182 struct spdk_io_channel *module_channel; 183 }; 184 185 /* TAIL head for raid bdev list */ 186 TAILQ_HEAD(raid_all_tailq, raid_bdev); 187 188 extern struct raid_all_tailq g_raid_bdev_list; 189 190 typedef void (*raid_bdev_destruct_cb)(void *cb_ctx, int rc); 191 192 int raid_bdev_create(const char *name, uint32_t strip_size, uint8_t num_base_bdevs, 193 enum raid_level level, bool superblock, const struct spdk_uuid *uuid, 194 struct raid_bdev **raid_bdev_out); 195 void raid_bdev_delete(struct raid_bdev *raid_bdev, raid_bdev_destruct_cb cb_fn, void *cb_ctx); 196 int raid_bdev_add_base_device(struct raid_bdev *raid_bdev, const char *name, uint8_t slot); 197 struct raid_bdev *raid_bdev_find_by_name(const char *name); 198 enum raid_level raid_bdev_str_to_level(const char *str); 199 const char *raid_bdev_level_to_str(enum raid_level level); 200 enum raid_bdev_state raid_bdev_str_to_state(const char *str); 201 const char *raid_bdev_state_to_str(enum raid_bdev_state state); 202 void raid_bdev_write_info_json(struct raid_bdev *raid_bdev, struct spdk_json_write_ctx *w); 203 int raid_bdev_remove_base_bdev(struct spdk_bdev *base_bdev, raid_bdev_remove_base_bdev_cb cb_fn, 204 void *cb_ctx); 205 206 /* 207 * RAID module descriptor 208 */ 209 struct raid_bdev_module { 210 /* RAID level implemented by this module */ 211 enum raid_level level; 212 213 /* Minimum required number of base bdevs. Must be > 0. */ 214 uint8_t base_bdevs_min; 215 216 /* 217 * RAID constraint. Determines number of base bdevs that can be removed 218 * without failing the array. 219 */ 220 struct { 221 enum { 222 CONSTRAINT_UNSET = 0, 223 CONSTRAINT_MAX_BASE_BDEVS_REMOVED, 224 CONSTRAINT_MIN_BASE_BDEVS_OPERATIONAL, 225 } type; 226 uint8_t value; 227 } base_bdevs_constraint; 228 229 /* Set to true if this module supports memory domains. */ 230 bool memory_domains_supported; 231 232 /* 233 * Called when the raid is starting, right before changing the state to 234 * online and registering the bdev. Parameters of the bdev like blockcnt 235 * should be set here. 236 * 237 * Non-zero return value will abort the startup process. 238 */ 239 int (*start)(struct raid_bdev *raid_bdev); 240 241 /* 242 * Called when the raid is stopping, right before changing the state to 243 * offline and unregistering the bdev. Optional. 244 * 245 * The function should return false if it is asynchronous. Then, after 246 * the async operation has completed and the module is fully stopped 247 * raid_bdev_module_stop_done() must be called. 248 */ 249 bool (*stop)(struct raid_bdev *raid_bdev); 250 251 /* Handler for R/W requests */ 252 void (*submit_rw_request)(struct raid_bdev_io *raid_io); 253 254 /* Handler for requests without payload (flush, unmap). Optional. */ 255 void (*submit_null_payload_request)(struct raid_bdev_io *raid_io); 256 257 /* 258 * Called when the bdev's IO channel is created to get the module's private IO channel. 259 * Optional. 260 */ 261 struct spdk_io_channel *(*get_io_channel)(struct raid_bdev *raid_bdev); 262 263 /* 264 * Called when a base_bdev is resized to resize the raid if the condition 265 * is satisfied. 266 */ 267 void (*resize)(struct raid_bdev *raid_bdev); 268 269 TAILQ_ENTRY(raid_bdev_module) link; 270 }; 271 272 void raid_bdev_module_list_add(struct raid_bdev_module *raid_module); 273 274 #define __RAID_MODULE_REGISTER(line) __RAID_MODULE_REGISTER_(line) 275 #define __RAID_MODULE_REGISTER_(line) raid_module_register_##line 276 277 #define RAID_MODULE_REGISTER(_module) \ 278 __attribute__((constructor)) static void \ 279 __RAID_MODULE_REGISTER(__LINE__)(void) \ 280 { \ 281 raid_bdev_module_list_add(_module); \ 282 } 283 284 bool raid_bdev_io_complete_part(struct raid_bdev_io *raid_io, uint64_t completed, 285 enum spdk_bdev_io_status status); 286 void raid_bdev_queue_io_wait(struct raid_bdev_io *raid_io, struct spdk_bdev *bdev, 287 struct spdk_io_channel *ch, spdk_bdev_io_wait_cb cb_fn); 288 void raid_bdev_io_complete(struct raid_bdev_io *raid_io, enum spdk_bdev_io_status status); 289 void raid_bdev_module_stop_done(struct raid_bdev *raid_bdev); 290 291 /** 292 * Raid bdev I/O read/write wrapper for spdk_bdev_readv_blocks_ext function. 293 */ 294 static inline int 295 raid_bdev_readv_blocks_ext(struct raid_base_bdev_info *base_info, struct spdk_io_channel *ch, 296 struct iovec *iov, int iovcnt, uint64_t offset_blocks, 297 uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg, 298 struct spdk_bdev_ext_io_opts *opts) 299 { 300 return spdk_bdev_readv_blocks_ext(base_info->desc, ch, iov, iovcnt, 301 base_info->data_offset + offset_blocks, num_blocks, cb, cb_arg, opts); 302 } 303 304 /** 305 * Raid bdev I/O read/write wrapper for spdk_bdev_writev_blocks_ext function. 306 */ 307 static inline int 308 raid_bdev_writev_blocks_ext(struct raid_base_bdev_info *base_info, struct spdk_io_channel *ch, 309 struct iovec *iov, int iovcnt, uint64_t offset_blocks, 310 uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg, 311 struct spdk_bdev_ext_io_opts *opts) 312 { 313 return spdk_bdev_writev_blocks_ext(base_info->desc, ch, iov, iovcnt, 314 base_info->data_offset + offset_blocks, num_blocks, cb, cb_arg, opts); 315 } 316 317 /** 318 * Raid bdev I/O read/write wrapper for spdk_bdev_unmap_blocks function. 319 */ 320 static inline int 321 raid_bdev_unmap_blocks(struct raid_base_bdev_info *base_info, struct spdk_io_channel *ch, 322 uint64_t offset_blocks, uint64_t num_blocks, 323 spdk_bdev_io_completion_cb cb, void *cb_arg) 324 { 325 return spdk_bdev_unmap_blocks(base_info->desc, ch, base_info->data_offset + offset_blocks, 326 num_blocks, cb, cb_arg); 327 } 328 329 /** 330 * Raid bdev I/O read/write wrapper for spdk_bdev_flush_blocks function. 331 */ 332 static inline int 333 raid_bdev_flush_blocks(struct raid_base_bdev_info *base_info, struct spdk_io_channel *ch, 334 uint64_t offset_blocks, uint64_t num_blocks, 335 spdk_bdev_io_completion_cb cb, void *cb_arg) 336 { 337 return spdk_bdev_flush_blocks(base_info->desc, ch, base_info->data_offset + offset_blocks, 338 num_blocks, cb, cb_arg); 339 } 340 341 #endif /* SPDK_BDEV_RAID_INTERNAL_H */ 342