1488570ebSJim Harris /* SPDX-License-Identifier: BSD-3-Clause 2a6dbe372Spaul luse * Copyright (C) 2018 Intel Corporation. 307fe6a43SSeth Howell * All rights reserved. 407fe6a43SSeth Howell */ 507fe6a43SSeth Howell 607fe6a43SSeth Howell #ifndef SPDK_BDEV_RAID_INTERNAL_H 707fe6a43SSeth Howell #define SPDK_BDEV_RAID_INTERNAL_H 807fe6a43SSeth Howell 907fe6a43SSeth Howell #include "spdk/bdev_module.h" 101db41324SKrzysztof Karas #include "spdk/uuid.h" 1107fe6a43SSeth Howell 12deed7d2fSArtur Paszkiewicz #define RAID_BDEV_MIN_DATA_OFFSET_SIZE (1024*1024) /* 1 MiB */ 13deed7d2fSArtur Paszkiewicz 14445e667fSArtur Paszkiewicz enum raid_level { 15445e667fSArtur Paszkiewicz INVALID_RAID_LEVEL = -1, 16445e667fSArtur Paszkiewicz RAID0 = 0, 17208f21cdSKrzysztof Smolinski RAID1 = 1, 1883a4b155SArtur Paszkiewicz RAID5F = 95, /* 0x5f */ 1964eebbd1Syupeng CONCAT = 99, 20445e667fSArtur Paszkiewicz }; 2175cb691cSpaul luse 2207fe6a43SSeth Howell /* 2307fe6a43SSeth Howell * Raid state describes the state of the raid. This raid bdev can be either in 2407fe6a43SSeth Howell * configured list or configuring list 2507fe6a43SSeth Howell */ 2607fe6a43SSeth Howell enum raid_bdev_state { 2707fe6a43SSeth Howell /* raid bdev is ready and is seen by upper layers */ 2807fe6a43SSeth Howell RAID_BDEV_STATE_ONLINE, 2907fe6a43SSeth Howell 3007fe6a43SSeth Howell /* 3107fe6a43SSeth Howell * raid bdev is configuring, not all underlying bdevs are present. 3207fe6a43SSeth Howell * And can't be seen by upper layers. 3307fe6a43SSeth Howell */ 3407fe6a43SSeth Howell RAID_BDEV_STATE_CONFIGURING, 3507fe6a43SSeth Howell 3607fe6a43SSeth Howell /* 3707fe6a43SSeth Howell * In offline state, raid bdev layer will complete all incoming commands without 3807fe6a43SSeth Howell * submitting to underlying base nvme bdevs 3907fe6a43SSeth Howell */ 4007fe6a43SSeth Howell RAID_BDEV_STATE_OFFLINE, 4107fe6a43SSeth Howell 4246ff15a6SArtur Paszkiewicz /* raid bdev state max, new states should be added before this */ 4346ff15a6SArtur Paszkiewicz RAID_BDEV_STATE_MAX 4407fe6a43SSeth Howell }; 4507fe6a43SSeth Howell 4650d58ff3SArtur Paszkiewicz enum raid_process_type { 4750d58ff3SArtur Paszkiewicz RAID_PROCESS_NONE, 4850d58ff3SArtur Paszkiewicz RAID_PROCESS_REBUILD, 4950d58ff3SArtur Paszkiewicz RAID_PROCESS_MAX 5050d58ff3SArtur Paszkiewicz }; 5150d58ff3SArtur Paszkiewicz 52887b8ec4SArtur Paszkiewicz typedef void (*raid_base_bdev_cb)(void *ctx, int status); 5323850b03SKrzysztof Smolinski 5407fe6a43SSeth Howell /* 5507fe6a43SSeth Howell * raid_base_bdev_info contains information for the base bdevs which are part of some 5607fe6a43SSeth Howell * raid. This structure contains the per base bdev information. Whatever is 5707fe6a43SSeth Howell * required per base device for raid bdev will be kept here 5807fe6a43SSeth Howell */ 5907fe6a43SSeth Howell struct raid_base_bdev_info { 6026861b70SArtur Paszkiewicz /* The raid bdev that this base bdev belongs to */ 6126861b70SArtur Paszkiewicz struct raid_bdev *raid_bdev; 6226861b70SArtur Paszkiewicz 63dccdd1e5SArtur Paszkiewicz /* name of the bdev */ 64dccdd1e5SArtur Paszkiewicz char *name; 65dccdd1e5SArtur Paszkiewicz 668a6bb6a8SArtur Paszkiewicz /* uuid of the bdev */ 678a6bb6a8SArtur Paszkiewicz struct spdk_uuid uuid; 688a6bb6a8SArtur Paszkiewicz 69255d0786SArtur Paszkiewicz /* 70255d0786SArtur Paszkiewicz * Pointer to base bdev descriptor opened by raid bdev. This is NULL when the bdev for 71255d0786SArtur Paszkiewicz * this slot is missing. 72255d0786SArtur Paszkiewicz */ 7307fe6a43SSeth Howell struct spdk_bdev_desc *desc; 7407fe6a43SSeth Howell 75deed7d2fSArtur Paszkiewicz /* offset in blocks from the start of the base bdev to the start of the data region */ 76deed7d2fSArtur Paszkiewicz uint64_t data_offset; 77deed7d2fSArtur Paszkiewicz 78deed7d2fSArtur Paszkiewicz /* size in blocks of the base bdev's data region */ 79deed7d2fSArtur Paszkiewicz uint64_t data_size; 80deed7d2fSArtur Paszkiewicz 8107fe6a43SSeth Howell /* 8207fe6a43SSeth Howell * When underlying base device calls the hot plug function on drive removal, 8307fe6a43SSeth Howell * this flag will be set and later after doing some processing, base device 8407fe6a43SSeth Howell * descriptor will be closed 8507fe6a43SSeth Howell */ 8607fe6a43SSeth Howell bool remove_scheduled; 87c9224e26SShuhei Matsumoto 8823850b03SKrzysztof Smolinski /* callback for base bdev removal */ 89887b8ec4SArtur Paszkiewicz raid_base_bdev_cb remove_cb; 9023850b03SKrzysztof Smolinski 9123850b03SKrzysztof Smolinski /* context of the callback */ 9223850b03SKrzysztof Smolinski void *remove_cb_ctx; 9323850b03SKrzysztof Smolinski 94c9224e26SShuhei Matsumoto /* Hold the number of blocks to know how large the base bdev is resized. */ 95c9224e26SShuhei Matsumoto uint64_t blockcnt; 9677b8f7b6SArtur Paszkiewicz 9777b8f7b6SArtur Paszkiewicz /* io channel for the app thread */ 9877b8f7b6SArtur Paszkiewicz struct spdk_io_channel *app_thread_ch; 999222ff97SArtur Paszkiewicz 1009222ff97SArtur Paszkiewicz /* Set to true when base bdev has completed the configuration process */ 1019222ff97SArtur Paszkiewicz bool is_configured; 102887b8ec4SArtur Paszkiewicz 103a6708553SArtur Paszkiewicz /* Set to true if this base bdev is the target of a background process */ 104a6708553SArtur Paszkiewicz bool is_process_target; 105a6708553SArtur Paszkiewicz 106f1a03e33SArtur Paszkiewicz /* Set to true to indicate that the base bdev is being removed because of a failure */ 107f1a03e33SArtur Paszkiewicz bool is_failed; 108f1a03e33SArtur Paszkiewicz 109887b8ec4SArtur Paszkiewicz /* callback for base bdev configuration */ 110887b8ec4SArtur Paszkiewicz raid_base_bdev_cb configure_cb; 111887b8ec4SArtur Paszkiewicz 112887b8ec4SArtur Paszkiewicz /* context of the callback */ 113887b8ec4SArtur Paszkiewicz void *configure_cb_ctx; 11407fe6a43SSeth Howell }; 11507fe6a43SSeth Howell 1162b867a50SArtur Paszkiewicz struct raid_bdev_io; 1172b867a50SArtur Paszkiewicz typedef void (*raid_bdev_io_completion_cb)(struct raid_bdev_io *raid_io, 1182b867a50SArtur Paszkiewicz enum spdk_bdev_io_status status); 1192b867a50SArtur Paszkiewicz 12007fe6a43SSeth Howell /* 12184bd6704Spaul luse * raid_bdev_io is the context part of bdev_io. It contains the information 12284bd6704Spaul luse * related to bdev_io for a raid bdev 12384bd6704Spaul luse */ 12484bd6704Spaul luse struct raid_bdev_io { 125cb8dbf17SArtur Paszkiewicz /* The raid bdev associated with this IO */ 126cb8dbf17SArtur Paszkiewicz struct raid_bdev *raid_bdev; 127cb8dbf17SArtur Paszkiewicz 128a4e1703eSArtur Paszkiewicz uint64_t offset_blocks; 129a4e1703eSArtur Paszkiewicz uint64_t num_blocks; 130a4e1703eSArtur Paszkiewicz struct iovec *iovs; 131a4e1703eSArtur Paszkiewicz int iovcnt; 13270401f58SArtur Paszkiewicz enum spdk_bdev_io_type type; 133a4e1703eSArtur Paszkiewicz struct spdk_memory_domain *memory_domain; 134a4e1703eSArtur Paszkiewicz void *memory_domain_ctx; 135a4e1703eSArtur Paszkiewicz void *md_buf; 136a4e1703eSArtur Paszkiewicz 13784bd6704Spaul luse /* WaitQ entry, used only in waitq logic */ 13884bd6704Spaul luse struct spdk_bdev_io_wait_entry waitq_entry; 13984bd6704Spaul luse 140152cce0dSArtur Paszkiewicz /* Context of the original channel for this IO */ 141152cce0dSArtur Paszkiewicz struct raid_bdev_io_channel *raid_ch; 14284bd6704Spaul luse 14384bd6704Spaul luse /* Used for tracking progress on io requests sent to member disks. */ 1442efe6d92SArtur Paszkiewicz uint64_t base_bdev_io_remaining; 14584bd6704Spaul luse uint8_t base_bdev_io_submitted; 146d8a22322SArtur Paszkiewicz enum spdk_bdev_io_status base_bdev_io_status; 1479820a949SArtur Paszkiewicz /* This will be the raid_io completion status unless any base io's status is different. */ 1489820a949SArtur Paszkiewicz enum spdk_bdev_io_status base_bdev_io_status_default; 1497131bec4SArtur Paszkiewicz 1507131bec4SArtur Paszkiewicz /* Private data for the raid module */ 1517131bec4SArtur Paszkiewicz void *module_private; 1522b867a50SArtur Paszkiewicz 1532b867a50SArtur Paszkiewicz /* Custom completion callback. Overrides bdev_io completion if set. */ 1542b867a50SArtur Paszkiewicz raid_bdev_io_completion_cb completion_cb; 155aae5e04fSArtur Paszkiewicz 156aae5e04fSArtur Paszkiewicz struct { 157aae5e04fSArtur Paszkiewicz uint64_t offset; 158aae5e04fSArtur Paszkiewicz struct iovec *iov; 159aae5e04fSArtur Paszkiewicz struct iovec iov_copy; 160aae5e04fSArtur Paszkiewicz } split; 16184bd6704Spaul luse }; 16284bd6704Spaul luse 16350d58ff3SArtur Paszkiewicz struct raid_bdev_process_request { 16450d58ff3SArtur Paszkiewicz struct raid_bdev_process *process; 16550d58ff3SArtur Paszkiewicz struct raid_base_bdev_info *target; 16650d58ff3SArtur Paszkiewicz struct spdk_io_channel *target_ch; 16750d58ff3SArtur Paszkiewicz uint64_t offset_blocks; 16850d58ff3SArtur Paszkiewicz uint32_t num_blocks; 16950d58ff3SArtur Paszkiewicz struct iovec iov; 17050d58ff3SArtur Paszkiewicz void *md_buf; 17150d58ff3SArtur Paszkiewicz /* bdev_io is raid_io's driver_ctx - don't reorder them! 17250d58ff3SArtur Paszkiewicz * These are needed for re-using raid module I/O functions for process I/O. */ 17350d58ff3SArtur Paszkiewicz struct spdk_bdev_io bdev_io; 17450d58ff3SArtur Paszkiewicz struct raid_bdev_io raid_io; 17550d58ff3SArtur Paszkiewicz TAILQ_ENTRY(raid_bdev_process_request) link; 17650d58ff3SArtur Paszkiewicz }; 17750d58ff3SArtur Paszkiewicz 178*5558f3f5SArtur Paszkiewicz typedef void (*raid_bdev_configure_cb)(void *cb_ctx, int rc); 179*5558f3f5SArtur Paszkiewicz 18084bd6704Spaul luse /* 18107fe6a43SSeth Howell * raid_bdev is the single entity structure which contains SPDK block device 18207fe6a43SSeth Howell * and the information related to any raid bdev either configured or 18307fe6a43SSeth Howell * in configuring list. io device is created on this. 18407fe6a43SSeth Howell */ 18507fe6a43SSeth Howell struct raid_bdev { 18607fe6a43SSeth Howell /* raid bdev device, this will get registered in bdev layer */ 18707fe6a43SSeth Howell struct spdk_bdev bdev; 18807fe6a43SSeth Howell 1899d2b7b41SArtur Paszkiewicz /* the raid bdev descriptor, opened for internal use */ 1909d2b7b41SArtur Paszkiewicz struct spdk_bdev_desc *self_desc; 1919d2b7b41SArtur Paszkiewicz 19207fe6a43SSeth Howell /* link of raid bdev to link it to global raid bdev list */ 19307fe6a43SSeth Howell TAILQ_ENTRY(raid_bdev) global_link; 19407fe6a43SSeth Howell 19507fe6a43SSeth Howell /* array of base bdev info */ 19607fe6a43SSeth Howell struct raid_base_bdev_info *base_bdev_info; 19707fe6a43SSeth Howell 19807fe6a43SSeth Howell /* strip size of raid bdev in blocks */ 19907fe6a43SSeth Howell uint32_t strip_size; 20007fe6a43SSeth Howell 20107fe6a43SSeth Howell /* strip size of raid bdev in KB */ 20207fe6a43SSeth Howell uint32_t strip_size_kb; 20307fe6a43SSeth Howell 20407fe6a43SSeth Howell /* strip size bit shift for optimized calculation */ 20507fe6a43SSeth Howell uint32_t strip_size_shift; 20607fe6a43SSeth Howell 20707fe6a43SSeth Howell /* state of raid bdev */ 20807fe6a43SSeth Howell enum raid_bdev_state state; 20907fe6a43SSeth Howell 21007fe6a43SSeth Howell /* number of base bdevs comprising raid bdev */ 21107fe6a43SSeth Howell uint8_t num_base_bdevs; 21207fe6a43SSeth Howell 21307fe6a43SSeth Howell /* number of base bdevs discovered */ 21407fe6a43SSeth Howell uint8_t num_base_bdevs_discovered; 21507fe6a43SSeth Howell 216255d0786SArtur Paszkiewicz /* 217255d0786SArtur Paszkiewicz * Number of operational base bdevs, i.e. how many we know/expect to be working. This 218255d0786SArtur Paszkiewicz * will be less than num_base_bdevs when starting a degraded array. 219255d0786SArtur Paszkiewicz */ 220255d0786SArtur Paszkiewicz uint8_t num_base_bdevs_operational; 221255d0786SArtur Paszkiewicz 222ad94094fSKrzysztof Smolinski /* minimum number of viable base bdevs that are required by array to operate */ 223ad94094fSKrzysztof Smolinski uint8_t min_base_bdevs_operational; 224ad94094fSKrzysztof Smolinski 22507fe6a43SSeth Howell /* Raid Level of this raid bdev */ 226445e667fSArtur Paszkiewicz enum raid_level level; 22707fe6a43SSeth Howell 22807fe6a43SSeth Howell /* Set to true if destroy of this raid bdev is started. */ 22907fe6a43SSeth Howell bool destroy_started; 230706029d5SArtur Paszkiewicz 231706029d5SArtur Paszkiewicz /* Module for RAID-level specific operations */ 232706029d5SArtur Paszkiewicz struct raid_bdev_module *module; 23373763d40SArtur Paszkiewicz 23473763d40SArtur Paszkiewicz /* Private data for the raid module */ 23573763d40SArtur Paszkiewicz void *module_private; 23677b8f7b6SArtur Paszkiewicz 23777b8f7b6SArtur Paszkiewicz /* Superblock */ 2383bb8256aSArtur Paszkiewicz bool superblock_enabled; 23977b8f7b6SArtur Paszkiewicz struct raid_bdev_superblock *sb; 24050d58ff3SArtur Paszkiewicz 241fa8b2fd7SArtur Paszkiewicz /* Superblock buffer used for I/O */ 242fa8b2fd7SArtur Paszkiewicz void *sb_io_buf; 243fa8b2fd7SArtur Paszkiewicz uint32_t sb_io_buf_size; 244fa8b2fd7SArtur Paszkiewicz 24550d58ff3SArtur Paszkiewicz /* Raid bdev background process, e.g. rebuild */ 24650d58ff3SArtur Paszkiewicz struct raid_bdev_process *process; 247*5558f3f5SArtur Paszkiewicz 248*5558f3f5SArtur Paszkiewicz /* Callback and context for raid_bdev configuration */ 249*5558f3f5SArtur Paszkiewicz raid_bdev_configure_cb configure_cb; 250*5558f3f5SArtur Paszkiewicz void *configure_cb_ctx; 25107fe6a43SSeth Howell }; 25207fe6a43SSeth Howell 253a193dcb8SArtur Paszkiewicz #define RAID_FOR_EACH_BASE_BDEV(r, i) \ 254a193dcb8SArtur Paszkiewicz for (i = r->base_bdev_info; i < r->base_bdev_info + r->num_base_bdevs; i++) 255a193dcb8SArtur Paszkiewicz 2566e03e49bSArtur Paszkiewicz struct raid_bdev_io_channel; 25707fe6a43SSeth Howell 25812ed89acSArtur Paszkiewicz /* TAIL head for raid bdev list */ 25907fe6a43SSeth Howell TAILQ_HEAD(raid_all_tailq, raid_bdev); 26007fe6a43SSeth Howell 26107fe6a43SSeth Howell extern struct raid_all_tailq g_raid_bdev_list; 26207fe6a43SSeth Howell 26307fe6a43SSeth Howell typedef void (*raid_bdev_destruct_cb)(void *cb_ctx, int rc); 26407fe6a43SSeth Howell 265dccdd1e5SArtur Paszkiewicz int raid_bdev_create(const char *name, uint32_t strip_size, uint8_t num_base_bdevs, 266deed7d2fSArtur Paszkiewicz enum raid_level level, bool superblock, const struct spdk_uuid *uuid, 267deed7d2fSArtur Paszkiewicz struct raid_bdev **raid_bdev_out); 268dccdd1e5SArtur Paszkiewicz void raid_bdev_delete(struct raid_bdev *raid_bdev, raid_bdev_destruct_cb cb_fn, void *cb_ctx); 269e9af23deSArtur Paszkiewicz int raid_bdev_add_base_bdev(struct raid_bdev *raid_bdev, const char *name, 2706eed119eSArtur Paszkiewicz raid_base_bdev_cb cb_fn, void *cb_ctx); 271dccdd1e5SArtur Paszkiewicz struct raid_bdev *raid_bdev_find_by_name(const char *name); 27246ff15a6SArtur Paszkiewicz enum raid_level raid_bdev_str_to_level(const char *str); 273445e667fSArtur Paszkiewicz const char *raid_bdev_level_to_str(enum raid_level level); 27446ff15a6SArtur Paszkiewicz enum raid_bdev_state raid_bdev_str_to_state(const char *str); 27546ff15a6SArtur Paszkiewicz const char *raid_bdev_state_to_str(enum raid_bdev_state state); 27650d58ff3SArtur Paszkiewicz const char *raid_bdev_process_to_str(enum raid_process_type value); 277ec6d94b6SArtur Paszkiewicz void raid_bdev_write_info_json(struct raid_bdev *raid_bdev, struct spdk_json_write_ctx *w); 278887b8ec4SArtur Paszkiewicz int raid_bdev_remove_base_bdev(struct spdk_bdev *base_bdev, raid_base_bdev_cb cb_fn, void *cb_ctx); 27907fe6a43SSeth Howell 280706029d5SArtur Paszkiewicz /* 281706029d5SArtur Paszkiewicz * RAID module descriptor 282706029d5SArtur Paszkiewicz */ 283706029d5SArtur Paszkiewicz struct raid_bdev_module { 284706029d5SArtur Paszkiewicz /* RAID level implemented by this module */ 285706029d5SArtur Paszkiewicz enum raid_level level; 286706029d5SArtur Paszkiewicz 2870a5194faSArtur Paszkiewicz /* Minimum required number of base bdevs. Must be > 0. */ 2880a5194faSArtur Paszkiewicz uint8_t base_bdevs_min; 2890a5194faSArtur Paszkiewicz 2900b4ca522SArtur Paszkiewicz /* 291ad94094fSKrzysztof Smolinski * RAID constraint. Determines number of base bdevs that can be removed 292ad94094fSKrzysztof Smolinski * without failing the array. 29373763d40SArtur Paszkiewicz */ 294ad94094fSKrzysztof Smolinski struct { 295ad94094fSKrzysztof Smolinski enum { 296ad94094fSKrzysztof Smolinski CONSTRAINT_UNSET = 0, 297ad94094fSKrzysztof Smolinski CONSTRAINT_MAX_BASE_BDEVS_REMOVED, 298ad94094fSKrzysztof Smolinski CONSTRAINT_MIN_BASE_BDEVS_OPERATIONAL, 299ad94094fSKrzysztof Smolinski } type; 300ad94094fSKrzysztof Smolinski uint8_t value; 301ad94094fSKrzysztof Smolinski } base_bdevs_constraint; 30273763d40SArtur Paszkiewicz 30372672d49SArtur Paszkiewicz /* Set to true if this module supports memory domains. */ 30472672d49SArtur Paszkiewicz bool memory_domains_supported; 30572672d49SArtur Paszkiewicz 306192db8deSSlawomir Ptak /* Set to true if this module supports DIF/DIX */ 307192db8deSSlawomir Ptak bool dif_supported; 308192db8deSSlawomir Ptak 30973763d40SArtur Paszkiewicz /* 3100b4ca522SArtur Paszkiewicz * Called when the raid is starting, right before changing the state to 3110b4ca522SArtur Paszkiewicz * online and registering the bdev. Parameters of the bdev like blockcnt 3120b4ca522SArtur Paszkiewicz * should be set here. 3130b4ca522SArtur Paszkiewicz * 3140b4ca522SArtur Paszkiewicz * Non-zero return value will abort the startup process. 3150b4ca522SArtur Paszkiewicz */ 3160b4ca522SArtur Paszkiewicz int (*start)(struct raid_bdev *raid_bdev); 3170b4ca522SArtur Paszkiewicz 3180b4ca522SArtur Paszkiewicz /* 3190b4ca522SArtur Paszkiewicz * Called when the raid is stopping, right before changing the state to 3200b4ca522SArtur Paszkiewicz * offline and unregistering the bdev. Optional. 3219cf1ab5bSArtur Paszkiewicz * 3229cf1ab5bSArtur Paszkiewicz * The function should return false if it is asynchronous. Then, after 3239cf1ab5bSArtur Paszkiewicz * the async operation has completed and the module is fully stopped 3249cf1ab5bSArtur Paszkiewicz * raid_bdev_module_stop_done() must be called. 3250b4ca522SArtur Paszkiewicz */ 3269cf1ab5bSArtur Paszkiewicz bool (*stop)(struct raid_bdev *raid_bdev); 3270b4ca522SArtur Paszkiewicz 328182278c0SArtur Paszkiewicz /* Handler for R/W requests */ 329182278c0SArtur Paszkiewicz void (*submit_rw_request)(struct raid_bdev_io *raid_io); 330182278c0SArtur Paszkiewicz 3311d3ca5c4SArtur Paszkiewicz /* Handler for requests without payload (flush, unmap). Optional. */ 332182278c0SArtur Paszkiewicz void (*submit_null_payload_request)(struct raid_bdev_io *raid_io); 333182278c0SArtur Paszkiewicz 334c89e2008SArtur Paszkiewicz /* 335c89e2008SArtur Paszkiewicz * Called when the bdev's IO channel is created to get the module's private IO channel. 336c89e2008SArtur Paszkiewicz * Optional. 337c89e2008SArtur Paszkiewicz */ 338c89e2008SArtur Paszkiewicz struct spdk_io_channel *(*get_io_channel)(struct raid_bdev *raid_bdev); 339c89e2008SArtur Paszkiewicz 340c9224e26SShuhei Matsumoto /* 341c9224e26SShuhei Matsumoto * Called when a base_bdev is resized to resize the raid if the condition 342ceb2ad0cSArtur Paszkiewicz * is satisfied. Optional. 343ceb2ad0cSArtur Paszkiewicz * 344ceb2ad0cSArtur Paszkiewicz * Returns true if the resize was performed. 345c9224e26SShuhei Matsumoto */ 346ceb2ad0cSArtur Paszkiewicz bool (*resize)(struct raid_bdev *raid_bdev); 347c9224e26SShuhei Matsumoto 34850d58ff3SArtur Paszkiewicz /* Handler for raid process requests. Required for raid modules with redundancy. */ 34950d58ff3SArtur Paszkiewicz int (*submit_process_request)(struct raid_bdev_process_request *process_req, 35050d58ff3SArtur Paszkiewicz struct raid_bdev_io_channel *raid_ch); 35150d58ff3SArtur Paszkiewicz 352706029d5SArtur Paszkiewicz TAILQ_ENTRY(raid_bdev_module) link; 353706029d5SArtur Paszkiewicz }; 354706029d5SArtur Paszkiewicz 355706029d5SArtur Paszkiewicz void raid_bdev_module_list_add(struct raid_bdev_module *raid_module); 356706029d5SArtur Paszkiewicz 357706029d5SArtur Paszkiewicz #define __RAID_MODULE_REGISTER(line) __RAID_MODULE_REGISTER_(line) 358706029d5SArtur Paszkiewicz #define __RAID_MODULE_REGISTER_(line) raid_module_register_##line 359706029d5SArtur Paszkiewicz 360706029d5SArtur Paszkiewicz #define RAID_MODULE_REGISTER(_module) \ 361706029d5SArtur Paszkiewicz __attribute__((constructor)) static void \ 362706029d5SArtur Paszkiewicz __RAID_MODULE_REGISTER(__LINE__)(void) \ 363706029d5SArtur Paszkiewicz { \ 364706029d5SArtur Paszkiewicz raid_bdev_module_list_add(_module); \ 365706029d5SArtur Paszkiewicz } 366706029d5SArtur Paszkiewicz 3678dd1cd21SBen Walker bool raid_bdev_io_complete_part(struct raid_bdev_io *raid_io, uint64_t completed, 3682efe6d92SArtur Paszkiewicz enum spdk_bdev_io_status status); 3698dd1cd21SBen Walker void raid_bdev_queue_io_wait(struct raid_bdev_io *raid_io, struct spdk_bdev *bdev, 370ae70d6a4SArtur Paszkiewicz struct spdk_io_channel *ch, spdk_bdev_io_wait_cb cb_fn); 3718dd1cd21SBen Walker void raid_bdev_io_complete(struct raid_bdev_io *raid_io, enum spdk_bdev_io_status status); 3729cf1ab5bSArtur Paszkiewicz void raid_bdev_module_stop_done(struct raid_bdev *raid_bdev); 37350d58ff3SArtur Paszkiewicz struct spdk_io_channel *raid_bdev_channel_get_base_channel(struct raid_bdev_io_channel *raid_ch, 37450d58ff3SArtur Paszkiewicz uint8_t idx); 37550d58ff3SArtur Paszkiewicz void *raid_bdev_channel_get_module_ctx(struct raid_bdev_io_channel *raid_ch); 37635a054dbSArtur Paszkiewicz struct raid_base_bdev_info *raid_bdev_channel_get_base_info(struct raid_bdev_io_channel *raid_ch, 37735a054dbSArtur Paszkiewicz struct spdk_bdev *base_bdev); 37850d58ff3SArtur Paszkiewicz void raid_bdev_process_request_complete(struct raid_bdev_process_request *process_req, int status); 379a4e1703eSArtur Paszkiewicz void raid_bdev_io_init(struct raid_bdev_io *raid_io, struct raid_bdev_io_channel *raid_ch, 380a4e1703eSArtur Paszkiewicz enum spdk_bdev_io_type type, uint64_t offset_blocks, 381a4e1703eSArtur Paszkiewicz uint64_t num_blocks, struct iovec *iovs, int iovcnt, void *md_buf, 382a4e1703eSArtur Paszkiewicz struct spdk_memory_domain *memory_domain, void *memory_domain_ctx); 383f1a03e33SArtur Paszkiewicz void raid_bdev_fail_base_bdev(struct raid_base_bdev_info *base_info); 384fc9ca1d0SArtur Paszkiewicz 385fd92e702SArtur Paszkiewicz static inline uint8_t 386fd92e702SArtur Paszkiewicz raid_bdev_base_bdev_slot(struct raid_base_bdev_info *base_info) 387fd92e702SArtur Paszkiewicz { 388fd92e702SArtur Paszkiewicz return base_info - base_info->raid_bdev->base_bdev_info; 389fd92e702SArtur Paszkiewicz } 390fd92e702SArtur Paszkiewicz 3919820a949SArtur Paszkiewicz static inline void 3929820a949SArtur Paszkiewicz raid_bdev_io_set_default_status(struct raid_bdev_io *raid_io, enum spdk_bdev_io_status status) 3939820a949SArtur Paszkiewicz { 3949820a949SArtur Paszkiewicz assert(raid_io->base_bdev_io_submitted == 0); 3959820a949SArtur Paszkiewicz raid_io->base_bdev_io_status = status; 3969820a949SArtur Paszkiewicz raid_io->base_bdev_io_status_default = status; 3979820a949SArtur Paszkiewicz } 3989820a949SArtur Paszkiewicz 3991b9c5629SArtur Paszkiewicz int raid_bdev_remap_dix_reftag(void *md_buf, uint64_t num_blocks, 4001b9c5629SArtur Paszkiewicz struct spdk_bdev *bdev, uint32_t remapped_offset); 401192db8deSSlawomir Ptak int raid_bdev_verify_dix_reftag(struct iovec *iovs, int iovcnt, void *md_buf, 402192db8deSSlawomir Ptak uint64_t num_blocks, struct spdk_bdev *bdev, uint32_t offset_blocks); 403192db8deSSlawomir Ptak 404614ca6d2SArtur Paszkiewicz /** 405614ca6d2SArtur Paszkiewicz * Raid bdev I/O read/write wrapper for spdk_bdev_readv_blocks_ext function. 406614ca6d2SArtur Paszkiewicz */ 4071b9c5629SArtur Paszkiewicz static inline int 4081b9c5629SArtur Paszkiewicz raid_bdev_readv_blocks_ext(struct raid_base_bdev_info *base_info, struct spdk_io_channel *ch, 409614ca6d2SArtur Paszkiewicz struct iovec *iov, int iovcnt, uint64_t offset_blocks, 410614ca6d2SArtur Paszkiewicz uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg, 4111b9c5629SArtur Paszkiewicz struct spdk_bdev_ext_io_opts *opts) 4121b9c5629SArtur Paszkiewicz { 4131b9c5629SArtur Paszkiewicz return spdk_bdev_readv_blocks_ext(base_info->desc, ch, iov, iovcnt, 4141b9c5629SArtur Paszkiewicz base_info->data_offset + offset_blocks, num_blocks, cb, cb_arg, opts); 4151b9c5629SArtur Paszkiewicz } 416614ca6d2SArtur Paszkiewicz 417614ca6d2SArtur Paszkiewicz /** 418614ca6d2SArtur Paszkiewicz * Raid bdev I/O read/write wrapper for spdk_bdev_writev_blocks_ext function. 419614ca6d2SArtur Paszkiewicz */ 4201b9c5629SArtur Paszkiewicz static inline int 4211b9c5629SArtur Paszkiewicz raid_bdev_writev_blocks_ext(struct raid_base_bdev_info *base_info, struct spdk_io_channel *ch, 422614ca6d2SArtur Paszkiewicz struct iovec *iov, int iovcnt, uint64_t offset_blocks, 423614ca6d2SArtur Paszkiewicz uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg, 4241b9c5629SArtur Paszkiewicz struct spdk_bdev_ext_io_opts *opts) 4251b9c5629SArtur Paszkiewicz { 4261b9c5629SArtur Paszkiewicz int rc; 4271b9c5629SArtur Paszkiewicz uint64_t remapped_offset_blocks = base_info->data_offset + offset_blocks; 4281b9c5629SArtur Paszkiewicz 4291b9c5629SArtur Paszkiewicz if (spdk_unlikely(spdk_bdev_get_dif_type(&base_info->raid_bdev->bdev) != SPDK_DIF_DISABLE && 4301b9c5629SArtur Paszkiewicz (base_info->raid_bdev->bdev.dif_check_flags & SPDK_DIF_FLAGS_REFTAG_CHECK))) { 4311b9c5629SArtur Paszkiewicz rc = raid_bdev_remap_dix_reftag(opts->metadata, num_blocks, &base_info->raid_bdev->bdev, 4321b9c5629SArtur Paszkiewicz remapped_offset_blocks); 4331b9c5629SArtur Paszkiewicz if (rc != 0) { 4341b9c5629SArtur Paszkiewicz return rc; 4351b9c5629SArtur Paszkiewicz } 4361b9c5629SArtur Paszkiewicz } 4371b9c5629SArtur Paszkiewicz 4381b9c5629SArtur Paszkiewicz return spdk_bdev_writev_blocks_ext(base_info->desc, ch, iov, iovcnt, 4391b9c5629SArtur Paszkiewicz remapped_offset_blocks, num_blocks, cb, cb_arg, opts); 4401b9c5629SArtur Paszkiewicz } 441614ca6d2SArtur Paszkiewicz 442614ca6d2SArtur Paszkiewicz /** 443614ca6d2SArtur Paszkiewicz * Raid bdev I/O read/write wrapper for spdk_bdev_unmap_blocks function. 444614ca6d2SArtur Paszkiewicz */ 445614ca6d2SArtur Paszkiewicz static inline int 446614ca6d2SArtur Paszkiewicz raid_bdev_unmap_blocks(struct raid_base_bdev_info *base_info, struct spdk_io_channel *ch, 447614ca6d2SArtur Paszkiewicz uint64_t offset_blocks, uint64_t num_blocks, 448614ca6d2SArtur Paszkiewicz spdk_bdev_io_completion_cb cb, void *cb_arg) 449614ca6d2SArtur Paszkiewicz { 450deed7d2fSArtur Paszkiewicz return spdk_bdev_unmap_blocks(base_info->desc, ch, base_info->data_offset + offset_blocks, 451deed7d2fSArtur Paszkiewicz num_blocks, cb, cb_arg); 452614ca6d2SArtur Paszkiewicz } 453614ca6d2SArtur Paszkiewicz 454614ca6d2SArtur Paszkiewicz /** 455614ca6d2SArtur Paszkiewicz * Raid bdev I/O read/write wrapper for spdk_bdev_flush_blocks function. 456614ca6d2SArtur Paszkiewicz */ 457614ca6d2SArtur Paszkiewicz static inline int 458614ca6d2SArtur Paszkiewicz raid_bdev_flush_blocks(struct raid_base_bdev_info *base_info, struct spdk_io_channel *ch, 459614ca6d2SArtur Paszkiewicz uint64_t offset_blocks, uint64_t num_blocks, 460614ca6d2SArtur Paszkiewicz spdk_bdev_io_completion_cb cb, void *cb_arg) 461614ca6d2SArtur Paszkiewicz { 462deed7d2fSArtur Paszkiewicz return spdk_bdev_flush_blocks(base_info->desc, ch, base_info->data_offset + offset_blocks, 463deed7d2fSArtur Paszkiewicz num_blocks, cb, cb_arg); 464614ca6d2SArtur Paszkiewicz } 465614ca6d2SArtur Paszkiewicz 46677b8f7b6SArtur Paszkiewicz /* 46777b8f7b6SArtur Paszkiewicz * Definitions related to raid bdev superblock 46877b8f7b6SArtur Paszkiewicz */ 46977b8f7b6SArtur Paszkiewicz 47077b8f7b6SArtur Paszkiewicz #define RAID_BDEV_SB_VERSION_MAJOR 1 47177b8f7b6SArtur Paszkiewicz #define RAID_BDEV_SB_VERSION_MINOR 0 47277b8f7b6SArtur Paszkiewicz 47377b8f7b6SArtur Paszkiewicz #define RAID_BDEV_SB_NAME_SIZE 64 47477b8f7b6SArtur Paszkiewicz 47577b8f7b6SArtur Paszkiewicz enum raid_bdev_sb_base_bdev_state { 47677b8f7b6SArtur Paszkiewicz RAID_SB_BASE_BDEV_MISSING = 0, 47777b8f7b6SArtur Paszkiewicz RAID_SB_BASE_BDEV_CONFIGURED = 1, 47877b8f7b6SArtur Paszkiewicz RAID_SB_BASE_BDEV_FAILED = 2, 47977b8f7b6SArtur Paszkiewicz RAID_SB_BASE_BDEV_SPARE = 3, 48077b8f7b6SArtur Paszkiewicz }; 48177b8f7b6SArtur Paszkiewicz 48277b8f7b6SArtur Paszkiewicz struct raid_bdev_sb_base_bdev { 48377b8f7b6SArtur Paszkiewicz /* uuid of the base bdev */ 48477b8f7b6SArtur Paszkiewicz struct spdk_uuid uuid; 48577b8f7b6SArtur Paszkiewicz /* offset in blocks from base device start to the start of raid data area */ 48677b8f7b6SArtur Paszkiewicz uint64_t data_offset; 48777b8f7b6SArtur Paszkiewicz /* size in blocks of the base device raid data area */ 48877b8f7b6SArtur Paszkiewicz uint64_t data_size; 48977b8f7b6SArtur Paszkiewicz /* state of the base bdev */ 49077b8f7b6SArtur Paszkiewicz uint32_t state; 49177b8f7b6SArtur Paszkiewicz /* feature/status flags */ 49277b8f7b6SArtur Paszkiewicz uint32_t flags; 49377b8f7b6SArtur Paszkiewicz /* slot number of this base bdev in the raid */ 49477b8f7b6SArtur Paszkiewicz uint8_t slot; 49577b8f7b6SArtur Paszkiewicz 49677b8f7b6SArtur Paszkiewicz uint8_t reserved[23]; 49777b8f7b6SArtur Paszkiewicz }; 49877b8f7b6SArtur Paszkiewicz SPDK_STATIC_ASSERT(sizeof(struct raid_bdev_sb_base_bdev) == 64, "incorrect size"); 49977b8f7b6SArtur Paszkiewicz 50077b8f7b6SArtur Paszkiewicz struct raid_bdev_superblock { 50177b8f7b6SArtur Paszkiewicz #define RAID_BDEV_SB_SIG "SPDKRAID" 50277b8f7b6SArtur Paszkiewicz uint8_t signature[8]; 50377b8f7b6SArtur Paszkiewicz struct { 50477b8f7b6SArtur Paszkiewicz /* incremented when a breaking change in the superblock structure is made */ 50577b8f7b6SArtur Paszkiewicz uint16_t major; 50677b8f7b6SArtur Paszkiewicz /* incremented for changes in the superblock that are backward compatible */ 50777b8f7b6SArtur Paszkiewicz uint16_t minor; 50877b8f7b6SArtur Paszkiewicz } version; 50977b8f7b6SArtur Paszkiewicz /* length in bytes of the entire superblock */ 51077b8f7b6SArtur Paszkiewicz uint32_t length; 51177b8f7b6SArtur Paszkiewicz /* crc32c checksum of the entire superblock */ 51277b8f7b6SArtur Paszkiewicz uint32_t crc; 51377b8f7b6SArtur Paszkiewicz /* feature/status flags */ 51477b8f7b6SArtur Paszkiewicz uint32_t flags; 51577b8f7b6SArtur Paszkiewicz /* unique id of the raid bdev */ 51677b8f7b6SArtur Paszkiewicz struct spdk_uuid uuid; 51777b8f7b6SArtur Paszkiewicz /* name of the raid bdev */ 51877b8f7b6SArtur Paszkiewicz uint8_t name[RAID_BDEV_SB_NAME_SIZE]; 51977b8f7b6SArtur Paszkiewicz /* size of the raid bdev in blocks */ 52077b8f7b6SArtur Paszkiewicz uint64_t raid_size; 52177b8f7b6SArtur Paszkiewicz /* the raid bdev block size - must be the same for all base bdevs */ 52277b8f7b6SArtur Paszkiewicz uint32_t block_size; 52377b8f7b6SArtur Paszkiewicz /* the raid level */ 52477b8f7b6SArtur Paszkiewicz uint32_t level; 52577b8f7b6SArtur Paszkiewicz /* strip (chunk) size in blocks */ 52677b8f7b6SArtur Paszkiewicz uint32_t strip_size; 52777b8f7b6SArtur Paszkiewicz /* state of the raid */ 52877b8f7b6SArtur Paszkiewicz uint32_t state; 52977b8f7b6SArtur Paszkiewicz /* sequence number, incremented on every superblock update */ 53077b8f7b6SArtur Paszkiewicz uint64_t seq_number; 53177b8f7b6SArtur Paszkiewicz /* number of raid base devices */ 53277b8f7b6SArtur Paszkiewicz uint8_t num_base_bdevs; 53377b8f7b6SArtur Paszkiewicz 53477b8f7b6SArtur Paszkiewicz uint8_t reserved[118]; 53577b8f7b6SArtur Paszkiewicz 53677b8f7b6SArtur Paszkiewicz /* size of the base bdevs array */ 53777b8f7b6SArtur Paszkiewicz uint8_t base_bdevs_size; 53877b8f7b6SArtur Paszkiewicz /* array of base bdev descriptors */ 53977b8f7b6SArtur Paszkiewicz struct raid_bdev_sb_base_bdev base_bdevs[]; 54077b8f7b6SArtur Paszkiewicz }; 54177b8f7b6SArtur Paszkiewicz SPDK_STATIC_ASSERT(sizeof(struct raid_bdev_superblock) == 256, "incorrect size"); 54277b8f7b6SArtur Paszkiewicz 54346adadcfSArtur Paszkiewicz #define RAID_BDEV_SB_MAX_LENGTH (sizeof(struct raid_bdev_superblock) + UINT8_MAX * sizeof(struct raid_bdev_sb_base_bdev)) 54477b8f7b6SArtur Paszkiewicz 54577b8f7b6SArtur Paszkiewicz SPDK_STATIC_ASSERT(RAID_BDEV_SB_MAX_LENGTH < RAID_BDEV_MIN_DATA_OFFSET_SIZE, 54677b8f7b6SArtur Paszkiewicz "Incorrect min data offset"); 54777b8f7b6SArtur Paszkiewicz 54877b8f7b6SArtur Paszkiewicz typedef void (*raid_bdev_write_sb_cb)(int status, struct raid_bdev *raid_bdev, void *ctx); 5498a6bb6a8SArtur Paszkiewicz typedef void (*raid_bdev_load_sb_cb)(const struct raid_bdev_superblock *sb, int status, void *ctx); 55077b8f7b6SArtur Paszkiewicz 551c25b80e2SArtur Paszkiewicz int raid_bdev_alloc_superblock(struct raid_bdev *raid_bdev, uint32_t block_size); 552c25b80e2SArtur Paszkiewicz void raid_bdev_free_superblock(struct raid_bdev *raid_bdev); 55377b8f7b6SArtur Paszkiewicz void raid_bdev_init_superblock(struct raid_bdev *raid_bdev); 55477b8f7b6SArtur Paszkiewicz void raid_bdev_write_superblock(struct raid_bdev *raid_bdev, raid_bdev_write_sb_cb cb, 55577b8f7b6SArtur Paszkiewicz void *cb_ctx); 5568a6bb6a8SArtur Paszkiewicz int raid_bdev_load_base_bdev_superblock(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 5578a6bb6a8SArtur Paszkiewicz raid_bdev_load_sb_cb cb, void *cb_ctx); 55877b8f7b6SArtur Paszkiewicz 559f39350beSArtur Paszkiewicz struct spdk_raid_bdev_opts { 560f39350beSArtur Paszkiewicz /* Size of the background process window in KiB */ 561f39350beSArtur Paszkiewicz uint32_t process_window_size_kb; 56289fd1730Sxupeng9 /* Maximum bandwidth in MiB to process per second */ 56389fd1730Sxupeng9 uint32_t process_max_bandwidth_mb_sec; 564f39350beSArtur Paszkiewicz }; 565f39350beSArtur Paszkiewicz 566f39350beSArtur Paszkiewicz void raid_bdev_get_opts(struct spdk_raid_bdev_opts *opts); 567f39350beSArtur Paszkiewicz int raid_bdev_set_opts(const struct spdk_raid_bdev_opts *opts); 568f39350beSArtur Paszkiewicz 56907fe6a43SSeth Howell #endif /* SPDK_BDEV_RAID_INTERNAL_H */ 570