1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2016 Intel Corporation. All rights reserved. 3 * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved. 4 * Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 5 */ 6 7 #ifndef SPDK_BDEV_NVME_H 8 #define SPDK_BDEV_NVME_H 9 10 #include "spdk/stdinc.h" 11 12 #include "spdk/queue.h" 13 #include "spdk/nvme.h" 14 #include "spdk/bdev_module.h" 15 16 TAILQ_HEAD(nvme_bdev_ctrlrs, nvme_bdev_ctrlr); 17 extern struct nvme_bdev_ctrlrs g_nvme_bdev_ctrlrs; 18 extern pthread_mutex_t g_bdev_nvme_mutex; 19 extern bool g_bdev_nvme_module_finish; 20 21 #define NVME_MAX_CONTROLLERS 1024 22 23 enum bdev_nvme_multipath_policy { 24 BDEV_NVME_MP_POLICY_ACTIVE_PASSIVE, 25 BDEV_NVME_MP_POLICY_ACTIVE_ACTIVE, 26 }; 27 28 typedef void (*spdk_bdev_create_nvme_fn)(void *ctx, size_t bdev_count, int rc); 29 typedef void (*spdk_bdev_nvme_start_discovery_fn)(void *ctx, int status); 30 typedef void (*spdk_bdev_nvme_stop_discovery_fn)(void *ctx); 31 32 struct nvme_ctrlr_opts { 33 uint32_t prchk_flags; 34 int32_t ctrlr_loss_timeout_sec; 35 uint32_t reconnect_delay_sec; 36 uint32_t fast_io_fail_timeout_sec; 37 bool from_discovery_service; 38 }; 39 40 struct nvme_async_probe_ctx { 41 struct spdk_nvme_probe_ctx *probe_ctx; 42 const char *base_name; 43 const char **names; 44 uint32_t count; 45 struct spdk_poller *poller; 46 struct spdk_nvme_transport_id trid; 47 struct nvme_ctrlr_opts bdev_opts; 48 struct spdk_nvme_ctrlr_opts drv_opts; 49 spdk_bdev_create_nvme_fn cb_fn; 50 void *cb_ctx; 51 uint32_t populates_in_progress; 52 bool ctrlr_attached; 53 bool probe_done; 54 bool namespaces_populated; 55 }; 56 57 struct nvme_ns { 58 uint32_t id; 59 struct spdk_nvme_ns *ns; 60 struct nvme_ctrlr *ctrlr; 61 struct nvme_bdev *bdev; 62 uint32_t ana_group_id; 63 enum spdk_nvme_ana_state ana_state; 64 bool ana_state_updating; 65 bool ana_transition_timedout; 66 struct spdk_poller *anatt_timer; 67 struct nvme_async_probe_ctx *probe_ctx; 68 TAILQ_ENTRY(nvme_ns) tailq; 69 RB_ENTRY(nvme_ns) node; 70 }; 71 72 struct nvme_bdev_io; 73 struct nvme_bdev_ctrlr; 74 struct nvme_bdev; 75 struct nvme_io_path; 76 77 struct nvme_path_id { 78 struct spdk_nvme_transport_id trid; 79 struct spdk_nvme_host_id hostid; 80 TAILQ_ENTRY(nvme_path_id) link; 81 bool is_failed; 82 }; 83 84 typedef void (*bdev_nvme_reset_cb)(void *cb_arg, bool success); 85 typedef void (*nvme_ctrlr_disconnected_cb)(struct nvme_ctrlr *nvme_ctrlr); 86 87 struct nvme_ctrlr { 88 /** 89 * points to pinned, physically contiguous memory region; 90 * contains 4KB IDENTIFY structure for controller which is 91 * target for CONTROLLER IDENTIFY command during initialization 92 */ 93 struct spdk_nvme_ctrlr *ctrlr; 94 struct nvme_path_id *active_path_id; 95 int ref; 96 97 uint32_t resetting : 1; 98 uint32_t reconnect_is_delayed : 1; 99 uint32_t fast_io_fail_timedout : 1; 100 uint32_t destruct : 1; 101 uint32_t ana_log_page_updating : 1; 102 uint32_t io_path_cache_clearing : 1; 103 104 struct nvme_ctrlr_opts opts; 105 106 RB_HEAD(nvme_ns_tree, nvme_ns) namespaces; 107 108 struct spdk_opal_dev *opal_dev; 109 110 struct spdk_poller *adminq_timer_poller; 111 struct spdk_thread *thread; 112 113 bdev_nvme_reset_cb reset_cb_fn; 114 void *reset_cb_arg; 115 /* Poller used to check for reset/detach completion */ 116 struct spdk_poller *reset_detach_poller; 117 struct spdk_nvme_detach_ctx *detach_ctx; 118 119 uint64_t reset_start_tsc; 120 struct spdk_poller *reconnect_delay_timer; 121 122 nvme_ctrlr_disconnected_cb disconnected_cb; 123 124 /** linked list pointer for device list */ 125 TAILQ_ENTRY(nvme_ctrlr) tailq; 126 struct nvme_bdev_ctrlr *nbdev_ctrlr; 127 128 TAILQ_HEAD(nvme_paths, nvme_path_id) trids; 129 130 uint32_t max_ana_log_page_size; 131 struct spdk_nvme_ana_page *ana_log_page; 132 struct spdk_nvme_ana_group_descriptor *copied_ana_desc; 133 134 struct nvme_async_probe_ctx *probe_ctx; 135 136 pthread_mutex_t mutex; 137 }; 138 139 struct nvme_bdev_ctrlr { 140 char *name; 141 TAILQ_HEAD(, nvme_ctrlr) ctrlrs; 142 TAILQ_HEAD(, nvme_bdev) bdevs; 143 TAILQ_ENTRY(nvme_bdev_ctrlr) tailq; 144 }; 145 146 struct nvme_bdev { 147 struct spdk_bdev disk; 148 uint32_t nsid; 149 struct nvme_bdev_ctrlr *nbdev_ctrlr; 150 pthread_mutex_t mutex; 151 int ref; 152 enum bdev_nvme_multipath_policy mp_policy; 153 TAILQ_HEAD(, nvme_ns) nvme_ns_list; 154 bool opal; 155 TAILQ_ENTRY(nvme_bdev) tailq; 156 }; 157 158 struct nvme_qpair { 159 struct nvme_ctrlr *ctrlr; 160 struct spdk_nvme_qpair *qpair; 161 struct nvme_poll_group *group; 162 struct nvme_ctrlr_channel *ctrlr_ch; 163 164 /* The following is used to update io_path cache of nvme_bdev_channels. */ 165 TAILQ_HEAD(, nvme_io_path) io_path_list; 166 167 TAILQ_ENTRY(nvme_qpair) tailq; 168 }; 169 170 struct nvme_ctrlr_channel { 171 struct nvme_qpair *qpair; 172 TAILQ_HEAD(, spdk_bdev_io) pending_resets; 173 174 struct spdk_io_channel_iter *reset_iter; 175 }; 176 177 struct nvme_io_path { 178 struct nvme_ns *nvme_ns; 179 struct nvme_qpair *qpair; 180 uint64_t io_outstanding; 181 STAILQ_ENTRY(nvme_io_path) stailq; 182 183 /* The following are used to update io_path cache of the nvme_bdev_channel. */ 184 struct nvme_bdev_channel *nbdev_ch; 185 TAILQ_ENTRY(nvme_io_path) tailq; 186 }; 187 188 struct nvme_bdev_channel { 189 struct nvme_io_path *current_io_path; 190 enum bdev_nvme_multipath_policy mp_policy; 191 STAILQ_HEAD(, nvme_io_path) io_path_list; 192 TAILQ_HEAD(retry_io_head, spdk_bdev_io) retry_io_list; 193 struct spdk_poller *retry_io_poller; 194 }; 195 196 struct nvme_poll_group { 197 struct spdk_nvme_poll_group *group; 198 struct spdk_io_channel *accel_channel; 199 struct spdk_poller *poller; 200 bool collect_spin_stat; 201 uint64_t spin_ticks; 202 uint64_t start_ticks; 203 uint64_t end_ticks; 204 TAILQ_HEAD(, nvme_qpair) qpair_list; 205 }; 206 207 void nvme_io_path_info_json(struct spdk_json_write_ctx *w, struct nvme_io_path *io_path); 208 209 struct nvme_ctrlr *nvme_ctrlr_get_by_name(const char *name); 210 211 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr_get_by_name(const char *name); 212 213 typedef void (*nvme_bdev_ctrlr_for_each_fn)(struct nvme_bdev_ctrlr *nbdev_ctrlr, void *ctx); 214 215 void nvme_bdev_ctrlr_for_each(nvme_bdev_ctrlr_for_each_fn fn, void *ctx); 216 217 void nvme_bdev_dump_trid_json(const struct spdk_nvme_transport_id *trid, 218 struct spdk_json_write_ctx *w); 219 220 void nvme_ctrlr_info_json(struct spdk_json_write_ctx *w, struct nvme_ctrlr *nvme_ctrlr); 221 222 struct nvme_ns *nvme_ctrlr_get_ns(struct nvme_ctrlr *nvme_ctrlr, uint32_t nsid); 223 struct nvme_ns *nvme_ctrlr_get_first_active_ns(struct nvme_ctrlr *nvme_ctrlr); 224 struct nvme_ns *nvme_ctrlr_get_next_active_ns(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *ns); 225 226 enum spdk_bdev_timeout_action { 227 SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE = 0, 228 SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET, 229 SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT, 230 }; 231 232 struct spdk_bdev_nvme_opts { 233 enum spdk_bdev_timeout_action action_on_timeout; 234 uint64_t timeout_us; 235 uint64_t timeout_admin_us; 236 uint32_t keep_alive_timeout_ms; 237 /* The number of attempts per I/O in the transport layer before an I/O fails. */ 238 uint32_t transport_retry_count; 239 uint32_t arbitration_burst; 240 uint32_t low_priority_weight; 241 uint32_t medium_priority_weight; 242 uint32_t high_priority_weight; 243 uint64_t nvme_adminq_poll_period_us; 244 uint64_t nvme_ioq_poll_period_us; 245 uint32_t io_queue_requests; 246 bool delay_cmd_submit; 247 /* The number of attempts per I/O in the bdev layer before an I/O fails. */ 248 int32_t bdev_retry_count; 249 uint8_t transport_ack_timeout; 250 int32_t ctrlr_loss_timeout_sec; 251 uint32_t reconnect_delay_sec; 252 uint32_t fast_io_fail_timeout_sec; 253 bool disable_auto_failback; 254 bool generate_uuids; 255 }; 256 257 struct spdk_nvme_qpair *bdev_nvme_get_io_qpair(struct spdk_io_channel *ctrlr_io_ch); 258 void bdev_nvme_get_opts(struct spdk_bdev_nvme_opts *opts); 259 int bdev_nvme_set_opts(const struct spdk_bdev_nvme_opts *opts); 260 int bdev_nvme_set_hotplug(bool enabled, uint64_t period_us, spdk_msg_fn cb, void *cb_ctx); 261 262 void bdev_nvme_get_default_ctrlr_opts(struct nvme_ctrlr_opts *opts); 263 264 int bdev_nvme_create(struct spdk_nvme_transport_id *trid, 265 const char *base_name, 266 const char **names, 267 uint32_t count, 268 spdk_bdev_create_nvme_fn cb_fn, 269 void *cb_ctx, 270 struct spdk_nvme_ctrlr_opts *drv_opts, 271 struct nvme_ctrlr_opts *bdev_opts, 272 bool multipath); 273 274 int bdev_nvme_start_discovery(struct spdk_nvme_transport_id *trid, const char *base_name, 275 struct spdk_nvme_ctrlr_opts *drv_opts, struct nvme_ctrlr_opts *bdev_opts, 276 uint64_t timeout, spdk_bdev_nvme_start_discovery_fn cb_fn, void *cb_ctx); 277 int bdev_nvme_stop_discovery(const char *name, spdk_bdev_nvme_stop_discovery_fn cb_fn, 278 void *cb_ctx); 279 void bdev_nvme_get_discovery_info(struct spdk_json_write_ctx *w); 280 281 struct spdk_nvme_ctrlr *bdev_nvme_get_ctrlr(struct spdk_bdev *bdev); 282 283 /** 284 * Delete NVMe controller with all bdevs on top of it, or delete the specified path 285 * if there is any alternative path. Requires to pass name of NVMe controller. 286 * 287 * \param name NVMe controller name 288 * \param path_id The specified path to remove (optional) 289 * \return zero on success, -EINVAL on wrong parameters or -ENODEV if controller is not found 290 */ 291 int bdev_nvme_delete(const char *name, const struct nvme_path_id *path_id); 292 293 /** 294 * Reset NVMe controller. 295 * 296 * \param nvme_ctrlr The specified NVMe controller to reset 297 * \param cb_fn Function to be called back after reset completes 298 * \param cb_arg Argument for callback function 299 * \return zero on success. Negated errno on the following error conditions: 300 * -ENXIO: controller is being destroyed. 301 * -EBUSY: controller is already being reset. 302 */ 303 int bdev_nvme_reset_rpc(struct nvme_ctrlr *nvme_ctrlr, bdev_nvme_reset_cb cb_fn, void *cb_arg); 304 305 typedef void (*bdev_nvme_set_preferred_path_cb)(void *cb_arg, int rc); 306 307 /** 308 * Set the preferred I/O path for an NVMe bdev in multipath mode. 309 * 310 * NOTE: This function does not support NVMe bdevs in failover mode. 311 * 312 * \param name NVMe bdev name 313 * \param cntlid NVMe-oF controller ID 314 * \param cb_fn Function to be called back after completion. 315 * \param cb_arg Argument for callback function. 316 */ 317 void bdev_nvme_set_preferred_path(const char *name, uint16_t cntlid, 318 bdev_nvme_set_preferred_path_cb cb_fn, void *cb_arg); 319 320 typedef void (*bdev_nvme_set_multipath_policy_cb)(void *cb_arg, int rc); 321 322 /** 323 * Set multipath policy of the NVMe bdev. 324 * 325 * \param name NVMe bdev name 326 * \param policy Multipath policy (active-passive or active-active) 327 * \param cb_fn Function to be called back after completion. 328 */ 329 void bdev_nvme_set_multipath_policy(const char *name, 330 enum bdev_nvme_multipath_policy policy, 331 bdev_nvme_set_multipath_policy_cb cb_fn, 332 void *cb_arg); 333 334 #endif /* SPDK_BDEV_NVME_H */ 335