1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2018 Intel Corporation. 3 * All rights reserved. 4 */ 5 6 #ifndef SPDK_BDEV_RAID_INTERNAL_H 7 #define SPDK_BDEV_RAID_INTERNAL_H 8 9 #include "spdk/bdev_module.h" 10 #include "spdk/uuid.h" 11 12 #define RAID_BDEV_MIN_DATA_OFFSET_SIZE (1024*1024) /* 1 MiB */ 13 14 enum raid_level { 15 INVALID_RAID_LEVEL = -1, 16 RAID0 = 0, 17 RAID1 = 1, 18 RAID5F = 95, /* 0x5f */ 19 CONCAT = 99, 20 }; 21 22 /* 23 * Raid state describes the state of the raid. This raid bdev can be either in 24 * configured list or configuring list 25 */ 26 enum raid_bdev_state { 27 /* raid bdev is ready and is seen by upper layers */ 28 RAID_BDEV_STATE_ONLINE, 29 30 /* 31 * raid bdev is configuring, not all underlying bdevs are present. 32 * And can't be seen by upper layers. 33 */ 34 RAID_BDEV_STATE_CONFIGURING, 35 36 /* 37 * In offline state, raid bdev layer will complete all incoming commands without 38 * submitting to underlying base nvme bdevs 39 */ 40 RAID_BDEV_STATE_OFFLINE, 41 42 /* raid bdev state max, new states should be added before this */ 43 RAID_BDEV_STATE_MAX 44 }; 45 46 enum raid_process_type { 47 RAID_PROCESS_NONE, 48 RAID_PROCESS_REBUILD, 49 RAID_PROCESS_MAX 50 }; 51 52 typedef void (*raid_base_bdev_cb)(void *ctx, int status); 53 54 /* 55 * raid_base_bdev_info contains information for the base bdevs which are part of some 56 * raid. This structure contains the per base bdev information. Whatever is 57 * required per base device for raid bdev will be kept here 58 */ 59 struct raid_base_bdev_info { 60 /* The raid bdev that this base bdev belongs to */ 61 struct raid_bdev *raid_bdev; 62 63 /* name of the bdev */ 64 char *name; 65 66 /* uuid of the bdev */ 67 struct spdk_uuid uuid; 68 69 /* 70 * Pointer to base bdev descriptor opened by raid bdev. This is NULL when the bdev for 71 * this slot is missing. 72 */ 73 struct spdk_bdev_desc *desc; 74 75 /* offset in blocks from the start of the base bdev to the start of the data region */ 76 uint64_t data_offset; 77 78 /* size in blocks of the base bdev's data region */ 79 uint64_t data_size; 80 81 /* 82 * When underlying base device calls the hot plug function on drive removal, 83 * this flag will be set and later after doing some processing, base device 84 * descriptor will be closed 85 */ 86 bool remove_scheduled; 87 88 /* callback for base bdev removal */ 89 raid_base_bdev_cb remove_cb; 90 91 /* context of the callback */ 92 void *remove_cb_ctx; 93 94 /* Hold the number of blocks to know how large the base bdev is resized. */ 95 uint64_t blockcnt; 96 97 /* io channel for the app thread */ 98 struct spdk_io_channel *app_thread_ch; 99 100 /* Set to true when base bdev has completed the configuration process */ 101 bool is_configured; 102 103 /* callback for base bdev configuration */ 104 raid_base_bdev_cb configure_cb; 105 106 /* context of the callback */ 107 void *configure_cb_ctx; 108 }; 109 110 struct raid_bdev_io; 111 typedef void (*raid_bdev_io_completion_cb)(struct raid_bdev_io *raid_io, 112 enum spdk_bdev_io_status status); 113 114 /* 115 * raid_bdev_io is the context part of bdev_io. It contains the information 116 * related to bdev_io for a raid bdev 117 */ 118 struct raid_bdev_io { 119 /* The raid bdev associated with this IO */ 120 struct raid_bdev *raid_bdev; 121 122 uint64_t offset_blocks; 123 uint64_t num_blocks; 124 struct iovec *iovs; 125 int iovcnt; 126 enum spdk_bdev_io_type type; 127 struct spdk_memory_domain *memory_domain; 128 void *memory_domain_ctx; 129 void *md_buf; 130 131 /* WaitQ entry, used only in waitq logic */ 132 struct spdk_bdev_io_wait_entry waitq_entry; 133 134 /* Context of the original channel for this IO */ 135 struct raid_bdev_io_channel *raid_ch; 136 137 /* Used for tracking progress on io requests sent to member disks. */ 138 uint64_t base_bdev_io_remaining; 139 uint8_t base_bdev_io_submitted; 140 enum spdk_bdev_io_status base_bdev_io_status; 141 142 /* Private data for the raid module */ 143 void *module_private; 144 145 /* Custom completion callback. Overrides bdev_io completion if set. */ 146 raid_bdev_io_completion_cb completion_cb; 147 148 struct { 149 uint64_t offset; 150 struct iovec *iov; 151 struct iovec iov_copy; 152 } split; 153 }; 154 155 struct raid_bdev_process_request { 156 struct raid_bdev_process *process; 157 struct raid_base_bdev_info *target; 158 struct spdk_io_channel *target_ch; 159 uint64_t offset_blocks; 160 uint32_t num_blocks; 161 struct iovec iov; 162 void *md_buf; 163 /* bdev_io is raid_io's driver_ctx - don't reorder them! 164 * These are needed for re-using raid module I/O functions for process I/O. */ 165 struct spdk_bdev_io bdev_io; 166 struct raid_bdev_io raid_io; 167 TAILQ_ENTRY(raid_bdev_process_request) link; 168 }; 169 170 /* 171 * raid_bdev is the single entity structure which contains SPDK block device 172 * and the information related to any raid bdev either configured or 173 * in configuring list. io device is created on this. 174 */ 175 struct raid_bdev { 176 /* raid bdev device, this will get registered in bdev layer */ 177 struct spdk_bdev bdev; 178 179 /* link of raid bdev to link it to global raid bdev list */ 180 TAILQ_ENTRY(raid_bdev) global_link; 181 182 /* array of base bdev info */ 183 struct raid_base_bdev_info *base_bdev_info; 184 185 /* lock to protect the base bdev array */ 186 struct spdk_spinlock base_bdev_lock; 187 188 /* strip size of raid bdev in blocks */ 189 uint32_t strip_size; 190 191 /* strip size of raid bdev in KB */ 192 uint32_t strip_size_kb; 193 194 /* strip size bit shift for optimized calculation */ 195 uint32_t strip_size_shift; 196 197 /* block length bit shift for optimized calculation */ 198 uint32_t blocklen_shift; 199 200 /* state of raid bdev */ 201 enum raid_bdev_state state; 202 203 /* number of base bdevs comprising raid bdev */ 204 uint8_t num_base_bdevs; 205 206 /* number of base bdevs discovered */ 207 uint8_t num_base_bdevs_discovered; 208 209 /* 210 * Number of operational base bdevs, i.e. how many we know/expect to be working. This 211 * will be less than num_base_bdevs when starting a degraded array. 212 */ 213 uint8_t num_base_bdevs_operational; 214 215 /* minimum number of viable base bdevs that are required by array to operate */ 216 uint8_t min_base_bdevs_operational; 217 218 /* Raid Level of this raid bdev */ 219 enum raid_level level; 220 221 /* Set to true if destroy of this raid bdev is started. */ 222 bool destroy_started; 223 224 /* Module for RAID-level specific operations */ 225 struct raid_bdev_module *module; 226 227 /* Private data for the raid module */ 228 void *module_private; 229 230 /* Superblock */ 231 struct raid_bdev_superblock *sb; 232 233 /* Raid bdev background process, e.g. rebuild */ 234 struct raid_bdev_process *process; 235 }; 236 237 #define RAID_FOR_EACH_BASE_BDEV(r, i) \ 238 for (i = r->base_bdev_info; i < r->base_bdev_info + r->num_base_bdevs; i++) 239 240 struct raid_bdev_io_channel; 241 242 /* TAIL head for raid bdev list */ 243 TAILQ_HEAD(raid_all_tailq, raid_bdev); 244 245 extern struct raid_all_tailq g_raid_bdev_list; 246 247 typedef void (*raid_bdev_destruct_cb)(void *cb_ctx, int rc); 248 249 int raid_bdev_create(const char *name, uint32_t strip_size, uint8_t num_base_bdevs, 250 enum raid_level level, bool superblock, const struct spdk_uuid *uuid, 251 struct raid_bdev **raid_bdev_out); 252 void raid_bdev_delete(struct raid_bdev *raid_bdev, raid_bdev_destruct_cb cb_fn, void *cb_ctx); 253 int raid_bdev_add_base_device(struct raid_bdev *raid_bdev, const char *name, uint8_t slot); 254 struct raid_bdev *raid_bdev_find_by_name(const char *name); 255 enum raid_level raid_bdev_str_to_level(const char *str); 256 const char *raid_bdev_level_to_str(enum raid_level level); 257 enum raid_bdev_state raid_bdev_str_to_state(const char *str); 258 const char *raid_bdev_state_to_str(enum raid_bdev_state state); 259 const char *raid_bdev_process_to_str(enum raid_process_type value); 260 void raid_bdev_write_info_json(struct raid_bdev *raid_bdev, struct spdk_json_write_ctx *w); 261 int raid_bdev_remove_base_bdev(struct spdk_bdev *base_bdev, raid_base_bdev_cb cb_fn, void *cb_ctx); 262 int raid_bdev_attach_base_bdev(struct raid_bdev *raid_bdev, struct spdk_bdev *base_bdev, 263 raid_base_bdev_cb cb_fn, void *cb_ctx); 264 265 /* 266 * RAID module descriptor 267 */ 268 struct raid_bdev_module { 269 /* RAID level implemented by this module */ 270 enum raid_level level; 271 272 /* Minimum required number of base bdevs. Must be > 0. */ 273 uint8_t base_bdevs_min; 274 275 /* 276 * RAID constraint. Determines number of base bdevs that can be removed 277 * without failing the array. 278 */ 279 struct { 280 enum { 281 CONSTRAINT_UNSET = 0, 282 CONSTRAINT_MAX_BASE_BDEVS_REMOVED, 283 CONSTRAINT_MIN_BASE_BDEVS_OPERATIONAL, 284 } type; 285 uint8_t value; 286 } base_bdevs_constraint; 287 288 /* Set to true if this module supports memory domains. */ 289 bool memory_domains_supported; 290 291 /* 292 * Called when the raid is starting, right before changing the state to 293 * online and registering the bdev. Parameters of the bdev like blockcnt 294 * should be set here. 295 * 296 * Non-zero return value will abort the startup process. 297 */ 298 int (*start)(struct raid_bdev *raid_bdev); 299 300 /* 301 * Called when the raid is stopping, right before changing the state to 302 * offline and unregistering the bdev. Optional. 303 * 304 * The function should return false if it is asynchronous. Then, after 305 * the async operation has completed and the module is fully stopped 306 * raid_bdev_module_stop_done() must be called. 307 */ 308 bool (*stop)(struct raid_bdev *raid_bdev); 309 310 /* Handler for R/W requests */ 311 void (*submit_rw_request)(struct raid_bdev_io *raid_io); 312 313 /* Handler for requests without payload (flush, unmap). Optional. */ 314 void (*submit_null_payload_request)(struct raid_bdev_io *raid_io); 315 316 /* 317 * Called when the bdev's IO channel is created to get the module's private IO channel. 318 * Optional. 319 */ 320 struct spdk_io_channel *(*get_io_channel)(struct raid_bdev *raid_bdev); 321 322 /* 323 * Called when a base_bdev is resized to resize the raid if the condition 324 * is satisfied. 325 */ 326 void (*resize)(struct raid_bdev *raid_bdev); 327 328 /* Handler for raid process requests. Required for raid modules with redundancy. */ 329 int (*submit_process_request)(struct raid_bdev_process_request *process_req, 330 struct raid_bdev_io_channel *raid_ch); 331 332 TAILQ_ENTRY(raid_bdev_module) link; 333 }; 334 335 void raid_bdev_module_list_add(struct raid_bdev_module *raid_module); 336 337 #define __RAID_MODULE_REGISTER(line) __RAID_MODULE_REGISTER_(line) 338 #define __RAID_MODULE_REGISTER_(line) raid_module_register_##line 339 340 #define RAID_MODULE_REGISTER(_module) \ 341 __attribute__((constructor)) static void \ 342 __RAID_MODULE_REGISTER(__LINE__)(void) \ 343 { \ 344 raid_bdev_module_list_add(_module); \ 345 } 346 347 bool raid_bdev_io_complete_part(struct raid_bdev_io *raid_io, uint64_t completed, 348 enum spdk_bdev_io_status status); 349 void raid_bdev_queue_io_wait(struct raid_bdev_io *raid_io, struct spdk_bdev *bdev, 350 struct spdk_io_channel *ch, spdk_bdev_io_wait_cb cb_fn); 351 void raid_bdev_io_complete(struct raid_bdev_io *raid_io, enum spdk_bdev_io_status status); 352 void raid_bdev_module_stop_done(struct raid_bdev *raid_bdev); 353 struct spdk_io_channel *raid_bdev_channel_get_base_channel(struct raid_bdev_io_channel *raid_ch, 354 uint8_t idx); 355 void *raid_bdev_channel_get_module_ctx(struct raid_bdev_io_channel *raid_ch); 356 void raid_bdev_process_request_complete(struct raid_bdev_process_request *process_req, int status); 357 void raid_bdev_io_init(struct raid_bdev_io *raid_io, struct raid_bdev_io_channel *raid_ch, 358 enum spdk_bdev_io_type type, uint64_t offset_blocks, 359 uint64_t num_blocks, struct iovec *iovs, int iovcnt, void *md_buf, 360 struct spdk_memory_domain *memory_domain, void *memory_domain_ctx); 361 362 static inline uint8_t 363 raid_bdev_base_bdev_slot(struct raid_base_bdev_info *base_info) 364 { 365 return base_info - base_info->raid_bdev->base_bdev_info; 366 } 367 368 /** 369 * Raid bdev I/O read/write wrapper for spdk_bdev_readv_blocks_ext function. 370 */ 371 static inline int 372 raid_bdev_readv_blocks_ext(struct raid_base_bdev_info *base_info, struct spdk_io_channel *ch, 373 struct iovec *iov, int iovcnt, uint64_t offset_blocks, 374 uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg, 375 struct spdk_bdev_ext_io_opts *opts) 376 { 377 return spdk_bdev_readv_blocks_ext(base_info->desc, ch, iov, iovcnt, 378 base_info->data_offset + offset_blocks, num_blocks, cb, cb_arg, opts); 379 } 380 381 /** 382 * Raid bdev I/O read/write wrapper for spdk_bdev_writev_blocks_ext function. 383 */ 384 static inline int 385 raid_bdev_writev_blocks_ext(struct raid_base_bdev_info *base_info, struct spdk_io_channel *ch, 386 struct iovec *iov, int iovcnt, uint64_t offset_blocks, 387 uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg, 388 struct spdk_bdev_ext_io_opts *opts) 389 { 390 return spdk_bdev_writev_blocks_ext(base_info->desc, ch, iov, iovcnt, 391 base_info->data_offset + offset_blocks, num_blocks, cb, cb_arg, opts); 392 } 393 394 /** 395 * Raid bdev I/O read/write wrapper for spdk_bdev_unmap_blocks function. 396 */ 397 static inline int 398 raid_bdev_unmap_blocks(struct raid_base_bdev_info *base_info, struct spdk_io_channel *ch, 399 uint64_t offset_blocks, uint64_t num_blocks, 400 spdk_bdev_io_completion_cb cb, void *cb_arg) 401 { 402 return spdk_bdev_unmap_blocks(base_info->desc, ch, base_info->data_offset + offset_blocks, 403 num_blocks, cb, cb_arg); 404 } 405 406 /** 407 * Raid bdev I/O read/write wrapper for spdk_bdev_flush_blocks function. 408 */ 409 static inline int 410 raid_bdev_flush_blocks(struct raid_base_bdev_info *base_info, struct spdk_io_channel *ch, 411 uint64_t offset_blocks, uint64_t num_blocks, 412 spdk_bdev_io_completion_cb cb, void *cb_arg) 413 { 414 return spdk_bdev_flush_blocks(base_info->desc, ch, base_info->data_offset + offset_blocks, 415 num_blocks, cb, cb_arg); 416 } 417 418 /* 419 * Definitions related to raid bdev superblock 420 */ 421 422 #define RAID_BDEV_SB_VERSION_MAJOR 1 423 #define RAID_BDEV_SB_VERSION_MINOR 0 424 425 #define RAID_BDEV_SB_NAME_SIZE 64 426 427 enum raid_bdev_sb_base_bdev_state { 428 RAID_SB_BASE_BDEV_MISSING = 0, 429 RAID_SB_BASE_BDEV_CONFIGURED = 1, 430 RAID_SB_BASE_BDEV_FAILED = 2, 431 RAID_SB_BASE_BDEV_SPARE = 3, 432 }; 433 434 struct raid_bdev_sb_base_bdev { 435 /* uuid of the base bdev */ 436 struct spdk_uuid uuid; 437 /* offset in blocks from base device start to the start of raid data area */ 438 uint64_t data_offset; 439 /* size in blocks of the base device raid data area */ 440 uint64_t data_size; 441 /* state of the base bdev */ 442 uint32_t state; 443 /* feature/status flags */ 444 uint32_t flags; 445 /* slot number of this base bdev in the raid */ 446 uint8_t slot; 447 448 uint8_t reserved[23]; 449 }; 450 SPDK_STATIC_ASSERT(sizeof(struct raid_bdev_sb_base_bdev) == 64, "incorrect size"); 451 452 struct raid_bdev_superblock { 453 #define RAID_BDEV_SB_SIG "SPDKRAID" 454 uint8_t signature[8]; 455 struct { 456 /* incremented when a breaking change in the superblock structure is made */ 457 uint16_t major; 458 /* incremented for changes in the superblock that are backward compatible */ 459 uint16_t minor; 460 } version; 461 /* length in bytes of the entire superblock */ 462 uint32_t length; 463 /* crc32c checksum of the entire superblock */ 464 uint32_t crc; 465 /* feature/status flags */ 466 uint32_t flags; 467 /* unique id of the raid bdev */ 468 struct spdk_uuid uuid; 469 /* name of the raid bdev */ 470 uint8_t name[RAID_BDEV_SB_NAME_SIZE]; 471 /* size of the raid bdev in blocks */ 472 uint64_t raid_size; 473 /* the raid bdev block size - must be the same for all base bdevs */ 474 uint32_t block_size; 475 /* the raid level */ 476 uint32_t level; 477 /* strip (chunk) size in blocks */ 478 uint32_t strip_size; 479 /* state of the raid */ 480 uint32_t state; 481 /* sequence number, incremented on every superblock update */ 482 uint64_t seq_number; 483 /* number of raid base devices */ 484 uint8_t num_base_bdevs; 485 486 uint8_t reserved[118]; 487 488 /* size of the base bdevs array */ 489 uint8_t base_bdevs_size; 490 /* array of base bdev descriptors */ 491 struct raid_bdev_sb_base_bdev base_bdevs[]; 492 }; 493 SPDK_STATIC_ASSERT(sizeof(struct raid_bdev_superblock) == 256, "incorrect size"); 494 495 #define RAID_BDEV_SB_MAX_LENGTH \ 496 SPDK_ALIGN_CEIL((sizeof(struct raid_bdev_superblock) + UINT8_MAX * sizeof(struct raid_bdev_sb_base_bdev)), 0x1000) 497 498 SPDK_STATIC_ASSERT(RAID_BDEV_SB_MAX_LENGTH < RAID_BDEV_MIN_DATA_OFFSET_SIZE, 499 "Incorrect min data offset"); 500 501 typedef void (*raid_bdev_write_sb_cb)(int status, struct raid_bdev *raid_bdev, void *ctx); 502 typedef void (*raid_bdev_load_sb_cb)(const struct raid_bdev_superblock *sb, int status, void *ctx); 503 504 void raid_bdev_init_superblock(struct raid_bdev *raid_bdev); 505 void raid_bdev_write_superblock(struct raid_bdev *raid_bdev, raid_bdev_write_sb_cb cb, 506 void *cb_ctx); 507 int raid_bdev_load_base_bdev_superblock(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 508 raid_bdev_load_sb_cb cb, void *cb_ctx); 509 510 #endif /* SPDK_BDEV_RAID_INTERNAL_H */ 511