1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2016 Intel Corporation. All rights reserved. 3 * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved. 4 * Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 5 * Copyright (c) 2022 Dell Inc, or its subsidiaries. All rights reserved. 6 */ 7 8 #ifndef SPDK_BDEV_NVME_H 9 #define SPDK_BDEV_NVME_H 10 11 #include "spdk/stdinc.h" 12 13 #include "spdk/queue.h" 14 #include "spdk/nvme.h" 15 #include "spdk/bdev_module.h" 16 #include "spdk/module/bdev/nvme.h" 17 #include "spdk/jsonrpc.h" 18 19 TAILQ_HEAD(nvme_bdev_ctrlrs, nvme_bdev_ctrlr); 20 extern struct nvme_bdev_ctrlrs g_nvme_bdev_ctrlrs; 21 extern pthread_mutex_t g_bdev_nvme_mutex; 22 extern bool g_bdev_nvme_module_finish; 23 extern struct spdk_thread *g_bdev_nvme_init_thread; 24 25 #define NVME_MAX_CONTROLLERS 1024 26 27 typedef void (*spdk_bdev_nvme_start_discovery_fn)(void *ctx, int status); 28 typedef void (*spdk_bdev_nvme_stop_discovery_fn)(void *ctx); 29 30 struct nvme_async_probe_ctx { 31 struct spdk_nvme_probe_ctx *probe_ctx; 32 const char *base_name; 33 const char **names; 34 uint32_t max_bdevs; 35 uint32_t reported_bdevs; 36 struct spdk_poller *poller; 37 struct spdk_nvme_transport_id trid; 38 struct spdk_bdev_nvme_ctrlr_opts bdev_opts; 39 struct spdk_nvme_ctrlr_opts drv_opts; 40 spdk_bdev_nvme_create_cb cb_fn; 41 void *cb_ctx; 42 uint32_t populates_in_progress; 43 bool ctrlr_attached; 44 bool probe_done; 45 bool namespaces_populated; 46 }; 47 48 struct nvme_ns { 49 uint32_t id; 50 struct spdk_nvme_ns *ns; 51 struct nvme_ctrlr *ctrlr; 52 struct nvme_bdev *bdev; 53 uint32_t ana_group_id; 54 enum spdk_nvme_ana_state ana_state; 55 bool ana_state_updating; 56 bool ana_transition_timedout; 57 struct spdk_poller *anatt_timer; 58 struct nvme_async_probe_ctx *probe_ctx; 59 TAILQ_ENTRY(nvme_ns) tailq; 60 RB_ENTRY(nvme_ns) node; 61 62 /** 63 * record io path stat before destroyed. Allocation of stat is 64 * decided by option io_path_stat of RPC 65 * bdev_nvme_set_options 66 */ 67 struct spdk_bdev_io_stat *stat; 68 }; 69 70 struct nvme_bdev_io; 71 struct nvme_bdev_ctrlr; 72 struct nvme_bdev; 73 struct nvme_io_path; 74 struct nvme_ctrlr_channel_iter; 75 struct nvme_bdev_channel_iter; 76 77 struct nvme_path_id { 78 struct spdk_nvme_transport_id trid; 79 struct spdk_nvme_host_id hostid; 80 TAILQ_ENTRY(nvme_path_id) link; 81 uint64_t last_failed_tsc; 82 }; 83 84 typedef void (*bdev_nvme_ctrlr_op_cb)(void *cb_arg, int rc); 85 typedef void (*nvme_ctrlr_disconnected_cb)(struct nvme_ctrlr *nvme_ctrlr); 86 87 struct nvme_ctrlr { 88 /** 89 * points to pinned, physically contiguous memory region; 90 * contains 4KB IDENTIFY structure for controller which is 91 * target for CONTROLLER IDENTIFY command during initialization 92 */ 93 struct spdk_nvme_ctrlr *ctrlr; 94 struct nvme_path_id *active_path_id; 95 int ref; 96 97 uint32_t resetting : 1; 98 uint32_t reconnect_is_delayed : 1; 99 uint32_t in_failover : 1; 100 uint32_t pending_failover : 1; 101 uint32_t fast_io_fail_timedout : 1; 102 uint32_t destruct : 1; 103 uint32_t ana_log_page_updating : 1; 104 uint32_t io_path_cache_clearing : 1; 105 uint32_t dont_retry : 1; 106 uint32_t disabled : 1; 107 108 struct spdk_bdev_nvme_ctrlr_opts opts; 109 110 RB_HEAD(nvme_ns_tree, nvme_ns) namespaces; 111 112 struct spdk_opal_dev *opal_dev; 113 114 struct spdk_poller *adminq_timer_poller; 115 struct spdk_thread *thread; 116 struct spdk_interrupt *intr; 117 118 bdev_nvme_ctrlr_op_cb ctrlr_op_cb_fn; 119 void *ctrlr_op_cb_arg; 120 /* Poller used to check for reset/detach completion */ 121 struct spdk_poller *reset_detach_poller; 122 struct spdk_nvme_detach_ctx *detach_ctx; 123 124 uint64_t reset_start_tsc; 125 struct spdk_poller *reconnect_delay_timer; 126 127 nvme_ctrlr_disconnected_cb disconnected_cb; 128 129 TAILQ_HEAD(, nvme_bdev_io) pending_resets; 130 131 /** linked list pointer for device list */ 132 TAILQ_ENTRY(nvme_ctrlr) tailq; 133 struct nvme_bdev_ctrlr *nbdev_ctrlr; 134 135 TAILQ_HEAD(nvme_paths, nvme_path_id) trids; 136 137 uint32_t max_ana_log_page_size; 138 struct spdk_nvme_ana_page *ana_log_page; 139 struct spdk_nvme_ana_group_descriptor *copied_ana_desc; 140 141 struct nvme_async_probe_ctx *probe_ctx; 142 struct spdk_key *psk; 143 struct spdk_key *dhchap_key; 144 struct spdk_key *dhchap_ctrlr_key; 145 146 pthread_mutex_t mutex; 147 }; 148 149 struct nvme_bdev_ctrlr { 150 char *name; 151 TAILQ_HEAD(, nvme_ctrlr) ctrlrs; 152 TAILQ_HEAD(, nvme_bdev) bdevs; 153 TAILQ_ENTRY(nvme_bdev_ctrlr) tailq; 154 }; 155 156 struct nvme_error_stat { 157 uint32_t status_type[8]; 158 uint32_t status[4][256]; 159 }; 160 161 struct nvme_bdev { 162 struct spdk_bdev disk; 163 uint32_t nsid; 164 struct nvme_bdev_ctrlr *nbdev_ctrlr; 165 pthread_mutex_t mutex; 166 int ref; 167 enum spdk_bdev_nvme_multipath_policy mp_policy; 168 enum spdk_bdev_nvme_multipath_selector mp_selector; 169 uint32_t rr_min_io; 170 TAILQ_HEAD(, nvme_ns) nvme_ns_list; 171 bool opal; 172 TAILQ_ENTRY(nvme_bdev) tailq; 173 struct nvme_error_stat *err_stat; 174 }; 175 176 struct nvme_qpair { 177 struct nvme_ctrlr *ctrlr; 178 struct spdk_nvme_qpair *qpair; 179 struct nvme_poll_group *group; 180 struct nvme_ctrlr_channel *ctrlr_ch; 181 182 /* The following is used to update io_path cache of nvme_bdev_channels. */ 183 TAILQ_HEAD(, nvme_io_path) io_path_list; 184 185 TAILQ_ENTRY(nvme_qpair) tailq; 186 }; 187 188 struct nvme_ctrlr_channel { 189 struct nvme_qpair *qpair; 190 191 struct nvme_ctrlr_channel_iter *reset_iter; 192 struct spdk_poller *connect_poller; 193 }; 194 195 struct nvme_io_path { 196 struct nvme_ns *nvme_ns; 197 struct nvme_qpair *qpair; 198 STAILQ_ENTRY(nvme_io_path) stailq; 199 200 /* The following are used to update io_path cache of the nvme_bdev_channel. */ 201 struct nvme_bdev_channel *nbdev_ch; 202 TAILQ_ENTRY(nvme_io_path) tailq; 203 204 /* allocation of stat is decided by option io_path_stat of RPC bdev_nvme_set_options */ 205 struct spdk_bdev_io_stat *stat; 206 }; 207 208 struct nvme_bdev_channel { 209 struct nvme_io_path *current_io_path; 210 enum spdk_bdev_nvme_multipath_policy mp_policy; 211 enum spdk_bdev_nvme_multipath_selector mp_selector; 212 uint32_t rr_min_io; 213 uint32_t rr_counter; 214 STAILQ_HEAD(, nvme_io_path) io_path_list; 215 TAILQ_HEAD(retry_io_head, nvme_bdev_io) retry_io_list; 216 struct spdk_poller *retry_io_poller; 217 bool resetting; 218 }; 219 220 struct nvme_poll_group { 221 struct spdk_nvme_poll_group *group; 222 struct spdk_io_channel *accel_channel; 223 struct spdk_poller *poller; 224 bool collect_spin_stat; 225 uint64_t spin_ticks; 226 uint64_t start_ticks; 227 uint64_t end_ticks; 228 TAILQ_HEAD(, nvme_qpair) qpair_list; 229 struct spdk_interrupt *intr; 230 }; 231 232 void nvme_io_path_info_json(struct spdk_json_write_ctx *w, struct nvme_io_path *io_path); 233 234 struct nvme_ctrlr *nvme_ctrlr_get_by_name(const char *name); 235 236 typedef void (*nvme_ctrlr_for_each_channel_msg)(struct nvme_ctrlr_channel_iter *iter, 237 struct nvme_ctrlr *nvme_ctrlr, 238 struct nvme_ctrlr_channel *ctrlr_ch, 239 void *ctx); 240 241 typedef void (*nvme_ctrlr_for_each_channel_done)(struct nvme_ctrlr *nvme_ctrlr, 242 void *ctx, int status); 243 244 void nvme_ctrlr_for_each_channel(struct nvme_ctrlr *nvme_ctrlr, 245 nvme_ctrlr_for_each_channel_msg fn, void *ctx, 246 nvme_ctrlr_for_each_channel_done cpl); 247 248 void nvme_ctrlr_for_each_channel_continue(struct nvme_ctrlr_channel_iter *iter, 249 int status); 250 251 252 typedef void (*nvme_bdev_for_each_channel_msg)(struct nvme_bdev_channel_iter *iter, 253 struct nvme_bdev *nbdev, 254 struct nvme_bdev_channel *nbdev_ch, 255 void *ctx); 256 257 typedef void (*nvme_bdev_for_each_channel_done)(struct nvme_bdev *nbdev, 258 void *ctx, int status); 259 260 void nvme_bdev_for_each_channel(struct nvme_bdev *nbdev, 261 nvme_bdev_for_each_channel_msg fn, void *ctx, 262 nvme_bdev_for_each_channel_done cpl); 263 264 void nvme_bdev_for_each_channel_continue(struct nvme_bdev_channel_iter *iter, 265 int status); 266 267 struct nvme_ctrlr *nvme_bdev_ctrlr_get_ctrlr_by_id(struct nvme_bdev_ctrlr *nbdev_ctrlr, 268 uint16_t cntlid); 269 270 struct nvme_bdev_ctrlr *nvme_bdev_ctrlr_get_by_name(const char *name); 271 272 typedef void (*nvme_bdev_ctrlr_for_each_fn)(struct nvme_bdev_ctrlr *nbdev_ctrlr, void *ctx); 273 274 void nvme_bdev_ctrlr_for_each(nvme_bdev_ctrlr_for_each_fn fn, void *ctx); 275 276 void nvme_bdev_dump_trid_json(const struct spdk_nvme_transport_id *trid, 277 struct spdk_json_write_ctx *w); 278 279 void nvme_ctrlr_info_json(struct spdk_json_write_ctx *w, struct nvme_ctrlr *nvme_ctrlr); 280 281 struct nvme_ns *nvme_ctrlr_get_ns(struct nvme_ctrlr *nvme_ctrlr, uint32_t nsid); 282 struct nvme_ns *nvme_ctrlr_get_first_active_ns(struct nvme_ctrlr *nvme_ctrlr); 283 struct nvme_ns *nvme_ctrlr_get_next_active_ns(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *ns); 284 285 enum spdk_bdev_timeout_action { 286 SPDK_BDEV_NVME_TIMEOUT_ACTION_NONE = 0, 287 SPDK_BDEV_NVME_TIMEOUT_ACTION_RESET, 288 SPDK_BDEV_NVME_TIMEOUT_ACTION_ABORT, 289 }; 290 291 struct spdk_bdev_nvme_opts { 292 enum spdk_bdev_timeout_action action_on_timeout; 293 uint64_t timeout_us; 294 uint64_t timeout_admin_us; 295 uint32_t keep_alive_timeout_ms; 296 /* The number of attempts per I/O in the transport layer before an I/O fails. */ 297 uint32_t transport_retry_count; 298 uint32_t arbitration_burst; 299 uint32_t low_priority_weight; 300 uint32_t medium_priority_weight; 301 uint32_t high_priority_weight; 302 uint64_t nvme_adminq_poll_period_us; 303 uint64_t nvme_ioq_poll_period_us; 304 uint32_t io_queue_requests; 305 bool delay_cmd_submit; 306 /* The number of attempts per I/O in the bdev layer before an I/O fails. */ 307 int32_t bdev_retry_count; 308 uint8_t transport_ack_timeout; 309 int32_t ctrlr_loss_timeout_sec; 310 uint32_t reconnect_delay_sec; 311 uint32_t fast_io_fail_timeout_sec; 312 bool disable_auto_failback; 313 bool generate_uuids; 314 /* Type of Service - RDMA only */ 315 uint8_t transport_tos; 316 bool nvme_error_stat; 317 uint32_t rdma_srq_size; 318 bool io_path_stat; 319 bool allow_accel_sequence; 320 uint32_t rdma_max_cq_size; 321 uint16_t rdma_cm_event_timeout_ms; 322 uint32_t dhchap_digests; 323 uint32_t dhchap_dhgroups; 324 }; 325 326 struct spdk_nvme_qpair *bdev_nvme_get_io_qpair(struct spdk_io_channel *ctrlr_io_ch); 327 void bdev_nvme_get_opts(struct spdk_bdev_nvme_opts *opts); 328 int bdev_nvme_set_opts(const struct spdk_bdev_nvme_opts *opts); 329 int bdev_nvme_set_hotplug(bool enabled, uint64_t period_us, spdk_msg_fn cb, void *cb_ctx); 330 331 int bdev_nvme_start_discovery(struct spdk_nvme_transport_id *trid, const char *base_name, 332 struct spdk_nvme_ctrlr_opts *drv_opts, struct spdk_bdev_nvme_ctrlr_opts *bdev_opts, 333 uint64_t timeout, bool from_mdns, 334 spdk_bdev_nvme_start_discovery_fn cb_fn, void *cb_ctx); 335 int bdev_nvme_stop_discovery(const char *name, spdk_bdev_nvme_stop_discovery_fn cb_fn, 336 void *cb_ctx); 337 void bdev_nvme_get_discovery_info(struct spdk_json_write_ctx *w); 338 339 int bdev_nvme_start_mdns_discovery(const char *base_name, 340 const char *svcname, 341 struct spdk_nvme_ctrlr_opts *drv_opts, 342 struct spdk_bdev_nvme_ctrlr_opts *bdev_opts); 343 int bdev_nvme_stop_mdns_discovery(const char *name); 344 void bdev_nvme_get_mdns_discovery_info(struct spdk_jsonrpc_request *request); 345 void bdev_nvme_mdns_discovery_config_json(struct spdk_json_write_ctx *w); 346 347 typedef void (*bdev_nvme_set_keys_cb)(void *ctx, int status); 348 349 int bdev_nvme_set_keys(const char *name, const char *dhchap_key, const char *dhchap_ctrlr_key, 350 bdev_nvme_set_keys_cb cb_fn, void *cb_ctx); 351 352 struct spdk_nvme_ctrlr *bdev_nvme_get_ctrlr(struct spdk_bdev *bdev); 353 354 typedef void (*bdev_nvme_delete_done_fn)(void *ctx, int rc); 355 356 /** 357 * Delete NVMe controller with all bdevs on top of it, or delete the specified path 358 * if there is any alternative path. Requires to pass name of NVMe controller. 359 * 360 * \param name NVMe controller name 361 * \param path_id The specified path to remove (optional) 362 * \param delete_done Callback function on delete complete (optional) 363 * \param delete_done_ctx Context passed to callback (optional) 364 * \return zero on success, 365 * -EINVAL on wrong parameters or 366 * -ENODEV if controller is not found or 367 * -ENOMEM on no memory 368 */ 369 int bdev_nvme_delete(const char *name, const struct nvme_path_id *path_id, 370 bdev_nvme_delete_done_fn delete_done, void *delete_done_ctx); 371 372 enum nvme_ctrlr_op { 373 NVME_CTRLR_OP_RESET = 1, 374 NVME_CTRLR_OP_ENABLE, 375 NVME_CTRLR_OP_DISABLE, 376 }; 377 378 /** 379 * Perform specified operation on an NVMe controller. 380 * 381 * NOTE: The callback function is always called after this function returns except for 382 * out of memory cases. 383 * 384 * \param nvme_ctrlr The specified NVMe controller to operate 385 * \param op Operation code 386 * \param cb_fn Function to be called back after operation completes 387 * \param cb_arg Argument for callback function 388 */ 389 void nvme_ctrlr_op_rpc(struct nvme_ctrlr *nvme_ctrlr, enum nvme_ctrlr_op op, 390 bdev_nvme_ctrlr_op_cb cb_fn, void *cb_arg); 391 392 /** 393 * Perform specified operation on all NVMe controllers in an NVMe bdev controller. 394 * 395 * NOTE: The callback function is always called after this function returns except for 396 * out of memory cases. 397 * 398 * \param nbdev_ctrlr The specified NVMe bdev controller to operate 399 * \param op Operation code 400 * \param cb_fn Function to be called back after operation completes 401 * \param cb_arg Argument for callback function 402 */ 403 void nvme_bdev_ctrlr_op_rpc(struct nvme_bdev_ctrlr *nbdev_ctrlr, enum nvme_ctrlr_op op, 404 bdev_nvme_ctrlr_op_cb cb_fn, void *cb_arg); 405 406 typedef void (*bdev_nvme_set_preferred_path_cb)(void *cb_arg, int rc); 407 408 /** 409 * Set the preferred I/O path for an NVMe bdev in multipath mode. 410 * 411 * NOTE: This function does not support NVMe bdevs in failover mode. 412 * 413 * \param name NVMe bdev name 414 * \param cntlid NVMe-oF controller ID 415 * \param cb_fn Function to be called back after completion. 416 * \param cb_arg Argument for callback function. 417 */ 418 void bdev_nvme_set_preferred_path(const char *name, uint16_t cntlid, 419 bdev_nvme_set_preferred_path_cb cb_fn, void *cb_arg); 420 421 #endif /* SPDK_BDEV_NVME_H */ 422