1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2016 Intel Corporation. All rights reserved. 3 * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved. 4 * Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 5 */ 6 7 #ifndef __NVMF_INTERNAL_H__ 8 #define __NVMF_INTERNAL_H__ 9 10 #include "spdk/stdinc.h" 11 12 #include "spdk/likely.h" 13 #include "spdk/nvmf.h" 14 #include "spdk/nvmf_cmd.h" 15 #include "spdk/nvmf_transport.h" 16 #include "spdk/nvmf_spec.h" 17 #include "spdk/assert.h" 18 #include "spdk/bdev.h" 19 #include "spdk/queue.h" 20 #include "spdk/util.h" 21 #include "spdk/thread.h" 22 23 /* The spec reserves cntlid values in the range FFF0h to FFFFh. */ 24 #define NVMF_MIN_CNTLID 1 25 #define NVMF_MAX_CNTLID 0xFFEF 26 27 enum spdk_nvmf_tgt_state { 28 NVMF_TGT_IDLE = 0, 29 NVMF_TGT_RUNNING, 30 NVMF_TGT_PAUSING, 31 NVMF_TGT_PAUSED, 32 NVMF_TGT_RESUMING, 33 }; 34 35 enum spdk_nvmf_subsystem_state { 36 SPDK_NVMF_SUBSYSTEM_INACTIVE = 0, 37 SPDK_NVMF_SUBSYSTEM_ACTIVATING, 38 SPDK_NVMF_SUBSYSTEM_ACTIVE, 39 SPDK_NVMF_SUBSYSTEM_PAUSING, 40 SPDK_NVMF_SUBSYSTEM_PAUSED, 41 SPDK_NVMF_SUBSYSTEM_RESUMING, 42 SPDK_NVMF_SUBSYSTEM_DEACTIVATING, 43 SPDK_NVMF_SUBSYSTEM_NUM_STATES, 44 }; 45 46 struct spdk_nvmf_tgt { 47 char name[NVMF_TGT_NAME_MAX_LENGTH]; 48 49 pthread_mutex_t mutex; 50 51 uint64_t discovery_genctr; 52 53 uint32_t max_subsystems; 54 55 enum spdk_nvmf_tgt_discovery_filter discovery_filter; 56 57 enum spdk_nvmf_tgt_state state; 58 59 /* Array of subsystem pointers of size max_subsystems indexed by sid */ 60 struct spdk_nvmf_subsystem **subsystems; 61 62 TAILQ_HEAD(, spdk_nvmf_transport) transports; 63 TAILQ_HEAD(, spdk_nvmf_poll_group) poll_groups; 64 65 /* Used for round-robin assignment of connections to poll groups */ 66 struct spdk_nvmf_poll_group *next_poll_group; 67 68 spdk_nvmf_tgt_destroy_done_fn *destroy_cb_fn; 69 void *destroy_cb_arg; 70 71 uint16_t crdt[3]; 72 73 TAILQ_ENTRY(spdk_nvmf_tgt) link; 74 }; 75 76 struct spdk_nvmf_host { 77 char nqn[SPDK_NVMF_NQN_MAX_LEN + 1]; 78 TAILQ_ENTRY(spdk_nvmf_host) link; 79 }; 80 81 struct spdk_nvmf_subsystem_listener { 82 struct spdk_nvmf_subsystem *subsystem; 83 spdk_nvmf_tgt_subsystem_listen_done_fn cb_fn; 84 void *cb_arg; 85 struct spdk_nvme_transport_id *trid; 86 struct spdk_nvmf_transport *transport; 87 enum spdk_nvme_ana_state *ana_state; 88 uint64_t ana_state_change_count; 89 uint16_t id; 90 TAILQ_ENTRY(spdk_nvmf_subsystem_listener) link; 91 }; 92 93 /* Maximum number of registrants supported per namespace */ 94 #define SPDK_NVMF_MAX_NUM_REGISTRANTS 16 95 96 struct spdk_nvmf_registrant_info { 97 uint64_t rkey; 98 char host_uuid[SPDK_UUID_STRING_LEN]; 99 }; 100 101 struct spdk_nvmf_reservation_info { 102 bool ptpl_activated; 103 enum spdk_nvme_reservation_type rtype; 104 uint64_t crkey; 105 char bdev_uuid[SPDK_UUID_STRING_LEN]; 106 char holder_uuid[SPDK_UUID_STRING_LEN]; 107 uint32_t num_regs; 108 struct spdk_nvmf_registrant_info registrants[SPDK_NVMF_MAX_NUM_REGISTRANTS]; 109 }; 110 111 struct spdk_nvmf_subsystem_pg_ns_info { 112 struct spdk_io_channel *channel; 113 struct spdk_uuid uuid; 114 /* current reservation key, no reservation if the value is 0 */ 115 uint64_t crkey; 116 /* reservation type */ 117 enum spdk_nvme_reservation_type rtype; 118 /* Host ID which holds the reservation */ 119 struct spdk_uuid holder_id; 120 /* Host ID for the registrants with the namespace */ 121 struct spdk_uuid reg_hostid[SPDK_NVMF_MAX_NUM_REGISTRANTS]; 122 uint64_t num_blocks; 123 124 /* I/O outstanding to this namespace */ 125 uint64_t io_outstanding; 126 enum spdk_nvmf_subsystem_state state; 127 }; 128 129 typedef void(*spdk_nvmf_poll_group_mod_done)(void *cb_arg, int status); 130 131 struct spdk_nvmf_subsystem_poll_group { 132 /* Array of namespace information for each namespace indexed by nsid - 1 */ 133 struct spdk_nvmf_subsystem_pg_ns_info *ns_info; 134 uint32_t num_ns; 135 136 /* Number of ADMIN and FABRICS requests outstanding */ 137 uint64_t mgmt_io_outstanding; 138 spdk_nvmf_poll_group_mod_done cb_fn; 139 void *cb_arg; 140 141 enum spdk_nvmf_subsystem_state state; 142 143 TAILQ_HEAD(, spdk_nvmf_request) queued; 144 }; 145 146 struct spdk_nvmf_registrant { 147 TAILQ_ENTRY(spdk_nvmf_registrant) link; 148 struct spdk_uuid hostid; 149 /* Registration key */ 150 uint64_t rkey; 151 }; 152 153 struct spdk_nvmf_ns { 154 uint32_t nsid; 155 uint32_t anagrpid; 156 struct spdk_nvmf_subsystem *subsystem; 157 struct spdk_bdev *bdev; 158 struct spdk_bdev_desc *desc; 159 struct spdk_nvmf_ns_opts opts; 160 /* reservation notification mask */ 161 uint32_t mask; 162 /* generation code */ 163 uint32_t gen; 164 /* registrants head */ 165 TAILQ_HEAD(, spdk_nvmf_registrant) registrants; 166 /* current reservation key */ 167 uint64_t crkey; 168 /* reservation type */ 169 enum spdk_nvme_reservation_type rtype; 170 /* current reservation holder, only valid if reservation type can only have one holder */ 171 struct spdk_nvmf_registrant *holder; 172 /* Persist Through Power Loss file which contains the persistent reservation */ 173 char *ptpl_file; 174 /* Persist Through Power Loss feature is enabled */ 175 bool ptpl_activated; 176 /* ZCOPY supported on bdev device */ 177 bool zcopy; 178 /* Command Set Identifier */ 179 enum spdk_nvme_csi csi; 180 }; 181 182 /* 183 * NVMf reservation notification log page. 184 */ 185 struct spdk_nvmf_reservation_log { 186 struct spdk_nvme_reservation_notification_log log; 187 TAILQ_ENTRY(spdk_nvmf_reservation_log) link; 188 struct spdk_nvmf_ctrlr *ctrlr; 189 }; 190 191 /* 192 * NVMf async event completion. 193 */ 194 struct spdk_nvmf_async_event_completion { 195 union spdk_nvme_async_event_completion event; 196 STAILQ_ENTRY(spdk_nvmf_async_event_completion) link; 197 }; 198 199 /* 200 * This structure represents an NVMe-oF controller, 201 * which is like a "session" in networking terms. 202 */ 203 struct spdk_nvmf_ctrlr { 204 uint16_t cntlid; 205 char hostnqn[SPDK_NVMF_NQN_MAX_LEN + 1]; 206 struct spdk_nvmf_subsystem *subsys; 207 208 struct spdk_nvmf_ctrlr_data cdata; 209 210 struct spdk_nvmf_registers vcprop; 211 212 struct spdk_nvmf_ctrlr_feat feat; 213 214 struct spdk_nvmf_qpair *admin_qpair; 215 struct spdk_thread *thread; 216 struct spdk_bit_array *qpair_mask; 217 218 const struct spdk_nvmf_subsystem_listener *listener; 219 220 struct spdk_nvmf_request *aer_req[SPDK_NVMF_MAX_ASYNC_EVENTS]; 221 STAILQ_HEAD(, spdk_nvmf_async_event_completion) async_events; 222 uint64_t notice_aen_mask; 223 uint8_t nr_aer_reqs; 224 struct spdk_uuid hostid; 225 226 uint32_t association_timeout; /* in milliseconds */ 227 uint16_t changed_ns_list_count; 228 struct spdk_nvme_ns_list changed_ns_list; 229 uint64_t log_page_count; 230 uint8_t num_avail_log_pages; 231 TAILQ_HEAD(log_page_head, spdk_nvmf_reservation_log) log_head; 232 233 /* Time to trigger keep-alive--poller_time = now_tick + period */ 234 uint64_t last_keep_alive_tick; 235 struct spdk_poller *keep_alive_poller; 236 237 struct spdk_poller *association_timer; 238 239 struct spdk_poller *cc_timer; 240 uint64_t cc_timeout_tsc; 241 struct spdk_poller *cc_timeout_timer; 242 243 bool dif_insert_or_strip; 244 bool in_destruct; 245 bool disconnect_in_progress; 246 /* valid only when disconnect_in_progress is true */ 247 bool disconnect_is_shn; 248 bool acre_enabled; 249 bool dynamic_ctrlr; 250 251 TAILQ_ENTRY(spdk_nvmf_ctrlr) link; 252 }; 253 254 #define NVMF_MAX_LISTENERS_PER_SUBSYSTEM 16 255 256 struct spdk_nvmf_subsystem { 257 struct spdk_thread *thread; 258 259 uint32_t id; 260 261 enum spdk_nvmf_subsystem_state state; 262 enum spdk_nvmf_subtype subtype; 263 264 uint16_t next_cntlid; 265 struct { 266 uint8_t allow_any_host : 1; 267 uint8_t allow_any_listener : 1; 268 uint8_t ana_reporting : 1; 269 uint8_t reserved : 5; 270 } flags; 271 272 /* boolean for state change synchronization */ 273 bool changing_state; 274 275 bool destroying; 276 bool async_destroy; 277 278 /* Zoned storage related fields */ 279 bool zone_append_supported; 280 uint64_t max_zone_append_size_kib; 281 282 struct spdk_nvmf_tgt *tgt; 283 284 /* Array of pointers to namespaces of size max_nsid indexed by nsid - 1 */ 285 struct spdk_nvmf_ns **ns; 286 uint32_t max_nsid; 287 288 uint16_t min_cntlid; 289 uint16_t max_cntlid; 290 291 TAILQ_HEAD(, spdk_nvmf_ctrlr) ctrlrs; 292 293 /* A mutex used to protect the hosts list and allow_any_host flag. Unlike the namespace 294 * array, this list is not used on the I/O path (it's needed for handling things like 295 * the CONNECT command), so use a mutex to protect it instead of requiring the subsystem 296 * state to be paused. This removes the requirement to pause the subsystem when hosts 297 * are added or removed dynamically. */ 298 pthread_mutex_t mutex; 299 TAILQ_HEAD(, spdk_nvmf_host) hosts; 300 TAILQ_HEAD(, spdk_nvmf_subsystem_listener) listeners; 301 struct spdk_bit_array *used_listener_ids; 302 303 TAILQ_ENTRY(spdk_nvmf_subsystem) entries; 304 305 nvmf_subsystem_destroy_cb async_destroy_cb; 306 void *async_destroy_cb_arg; 307 308 char sn[SPDK_NVME_CTRLR_SN_LEN + 1]; 309 char mn[SPDK_NVME_CTRLR_MN_LEN + 1]; 310 char subnqn[SPDK_NVMF_NQN_MAX_LEN + 1]; 311 312 /* Array of namespace count per ANA group of size max_nsid indexed anagrpid - 1 313 * It will be enough for ANA group to use the same size as namespaces. 314 */ 315 uint32_t *ana_group; 316 }; 317 318 int nvmf_poll_group_update_subsystem(struct spdk_nvmf_poll_group *group, 319 struct spdk_nvmf_subsystem *subsystem); 320 int nvmf_poll_group_add_subsystem(struct spdk_nvmf_poll_group *group, 321 struct spdk_nvmf_subsystem *subsystem, 322 spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg); 323 void nvmf_poll_group_remove_subsystem(struct spdk_nvmf_poll_group *group, 324 struct spdk_nvmf_subsystem *subsystem, spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg); 325 void nvmf_poll_group_pause_subsystem(struct spdk_nvmf_poll_group *group, 326 struct spdk_nvmf_subsystem *subsystem, 327 uint32_t nsid, 328 spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg); 329 void nvmf_poll_group_resume_subsystem(struct spdk_nvmf_poll_group *group, 330 struct spdk_nvmf_subsystem *subsystem, spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg); 331 332 void nvmf_update_discovery_log(struct spdk_nvmf_tgt *tgt, const char *hostnqn); 333 void nvmf_get_discovery_log_page(struct spdk_nvmf_tgt *tgt, const char *hostnqn, struct iovec *iov, 334 uint32_t iovcnt, uint64_t offset, uint32_t length, 335 struct spdk_nvme_transport_id *cmd_source_trid); 336 337 void nvmf_ctrlr_destruct(struct spdk_nvmf_ctrlr *ctrlr); 338 int nvmf_ctrlr_process_admin_cmd(struct spdk_nvmf_request *req); 339 int nvmf_ctrlr_process_io_cmd(struct spdk_nvmf_request *req); 340 bool nvmf_ctrlr_dsm_supported(struct spdk_nvmf_ctrlr *ctrlr); 341 bool nvmf_ctrlr_write_zeroes_supported(struct spdk_nvmf_ctrlr *ctrlr); 342 bool nvmf_ctrlr_copy_supported(struct spdk_nvmf_ctrlr *ctrlr); 343 void nvmf_ctrlr_ns_changed(struct spdk_nvmf_ctrlr *ctrlr, uint32_t nsid); 344 bool nvmf_ctrlr_use_zcopy(struct spdk_nvmf_request *req); 345 346 void nvmf_bdev_ctrlr_identify_ns(struct spdk_nvmf_ns *ns, struct spdk_nvme_ns_data *nsdata, 347 bool dif_insert_or_strip); 348 int nvmf_bdev_ctrlr_read_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, 349 struct spdk_io_channel *ch, struct spdk_nvmf_request *req); 350 int nvmf_bdev_ctrlr_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, 351 struct spdk_io_channel *ch, struct spdk_nvmf_request *req); 352 int nvmf_bdev_ctrlr_compare_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, 353 struct spdk_io_channel *ch, struct spdk_nvmf_request *req); 354 int nvmf_bdev_ctrlr_compare_and_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, 355 struct spdk_io_channel *ch, struct spdk_nvmf_request *cmp_req, struct spdk_nvmf_request *write_req); 356 int nvmf_bdev_ctrlr_write_zeroes_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, 357 struct spdk_io_channel *ch, struct spdk_nvmf_request *req); 358 int nvmf_bdev_ctrlr_flush_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, 359 struct spdk_io_channel *ch, struct spdk_nvmf_request *req); 360 int nvmf_bdev_ctrlr_dsm_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, 361 struct spdk_io_channel *ch, struct spdk_nvmf_request *req); 362 int nvmf_bdev_ctrlr_copy_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, 363 struct spdk_io_channel *ch, struct spdk_nvmf_request *req); 364 int nvmf_bdev_ctrlr_nvme_passthru_io(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, 365 struct spdk_io_channel *ch, struct spdk_nvmf_request *req); 366 bool nvmf_bdev_ctrlr_get_dif_ctx(struct spdk_bdev *bdev, struct spdk_nvme_cmd *cmd, 367 struct spdk_dif_ctx *dif_ctx); 368 bool nvmf_bdev_zcopy_enabled(struct spdk_bdev *bdev); 369 370 int nvmf_subsystem_add_ctrlr(struct spdk_nvmf_subsystem *subsystem, 371 struct spdk_nvmf_ctrlr *ctrlr); 372 void nvmf_subsystem_remove_ctrlr(struct spdk_nvmf_subsystem *subsystem, 373 struct spdk_nvmf_ctrlr *ctrlr); 374 void nvmf_subsystem_remove_all_listeners(struct spdk_nvmf_subsystem *subsystem, 375 bool stop); 376 struct spdk_nvmf_ctrlr *nvmf_subsystem_get_ctrlr(struct spdk_nvmf_subsystem *subsystem, 377 uint16_t cntlid); 378 struct spdk_nvmf_subsystem_listener *nvmf_subsystem_find_listener( 379 struct spdk_nvmf_subsystem *subsystem, 380 const struct spdk_nvme_transport_id *trid); 381 struct spdk_nvmf_listener *nvmf_transport_find_listener( 382 struct spdk_nvmf_transport *transport, 383 const struct spdk_nvme_transport_id *trid); 384 void nvmf_transport_dump_opts(struct spdk_nvmf_transport *transport, struct spdk_json_write_ctx *w, 385 bool named); 386 void nvmf_transport_listen_dump_opts(struct spdk_nvmf_transport *transport, 387 const struct spdk_nvme_transport_id *trid, struct spdk_json_write_ctx *w); 388 void nvmf_subsystem_set_ana_state(struct spdk_nvmf_subsystem *subsystem, 389 const struct spdk_nvme_transport_id *trid, 390 enum spdk_nvme_ana_state ana_state, uint32_t anagrpid, 391 spdk_nvmf_tgt_subsystem_listen_done_fn cb_fn, void *cb_arg); 392 bool nvmf_subsystem_get_ana_reporting(struct spdk_nvmf_subsystem *subsystem); 393 394 /** 395 * Sets the controller ID range for a subsystem. 396 * Valid range is [1, 0xFFEF]. 397 * 398 * May only be performed on subsystems in the INACTIVE state. 399 * 400 * \param subsystem Subsystem to modify. 401 * \param min_cntlid Minimum controller ID. 402 * \param max_cntlid Maximum controller ID. 403 * 404 * \return 0 on success, or negated errno value on failure. 405 */ 406 int nvmf_subsystem_set_cntlid_range(struct spdk_nvmf_subsystem *subsystem, 407 uint16_t min_cntlid, uint16_t max_cntlid); 408 409 int nvmf_ctrlr_async_event_ns_notice(struct spdk_nvmf_ctrlr *ctrlr); 410 int nvmf_ctrlr_async_event_ana_change_notice(struct spdk_nvmf_ctrlr *ctrlr); 411 void nvmf_ctrlr_async_event_discovery_log_change_notice(void *ctx); 412 void nvmf_ctrlr_async_event_reservation_notification(struct spdk_nvmf_ctrlr *ctrlr); 413 int nvmf_ctrlr_async_event_error_event(struct spdk_nvmf_ctrlr *ctrlr, 414 union spdk_nvme_async_event_completion event); 415 void nvmf_ns_reservation_request(void *ctx); 416 void nvmf_ctrlr_reservation_notice_log(struct spdk_nvmf_ctrlr *ctrlr, 417 struct spdk_nvmf_ns *ns, 418 enum spdk_nvme_reservation_notification_log_page_type type); 419 420 /* 421 * Abort aer is sent on a per controller basis and sends a completion for the aer to the host. 422 * This function should be called when attempting to recover in error paths when it is OK for 423 * the host to send a subsequent AER. 424 */ 425 void nvmf_ctrlr_abort_aer(struct spdk_nvmf_ctrlr *ctrlr); 426 427 /* 428 * Abort zero-copy requests that already got the buffer (received zcopy_start cb), but haven't 429 * started zcopy_end. These requests are kept on the outstanding queue, but are not waiting for a 430 * completion from the bdev layer, so, when a qpair is being disconnected, we need to kick them to 431 * force their completion. 432 */ 433 void nvmf_qpair_abort_pending_zcopy_reqs(struct spdk_nvmf_qpair *qpair); 434 435 /* 436 * Free aer simply frees the rdma resources for the aer without informing the host. 437 * This function should be called when deleting a qpair when one wants to make sure 438 * the qpair is completely empty before freeing the request. The reason we free the 439 * AER without sending a completion is to prevent the host from sending another AER. 440 */ 441 void nvmf_qpair_free_aer(struct spdk_nvmf_qpair *qpair); 442 443 int nvmf_ctrlr_abort_request(struct spdk_nvmf_request *req); 444 445 void nvmf_ctrlr_set_fatal_status(struct spdk_nvmf_ctrlr *ctrlr); 446 447 static inline struct spdk_nvmf_ns * 448 _nvmf_subsystem_get_ns(struct spdk_nvmf_subsystem *subsystem, uint32_t nsid) 449 { 450 /* NOTE: This implicitly also checks for 0, since 0 - 1 wraps around to UINT32_MAX. */ 451 if (spdk_unlikely(nsid - 1 >= subsystem->max_nsid)) { 452 return NULL; 453 } 454 455 return subsystem->ns[nsid - 1]; 456 } 457 458 static inline bool 459 nvmf_qpair_is_admin_queue(struct spdk_nvmf_qpair *qpair) 460 { 461 return qpair->qid == 0; 462 } 463 464 /** 465 * Initiates a zcopy start operation 466 * 467 * \param bdev The \ref spdk_bdev 468 * \param desc The \ref spdk_bdev_desc 469 * \param ch The \ref spdk_io_channel 470 * \param req The \ref spdk_nvmf_request passed to the bdev for processing 471 * 472 * \return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE if the command was completed immediately or 473 * SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS if the command was submitted and will be 474 * completed asynchronously. Asynchronous completions are notified through 475 * spdk_nvmf_request_complete(). 476 */ 477 int nvmf_bdev_ctrlr_zcopy_start(struct spdk_bdev *bdev, 478 struct spdk_bdev_desc *desc, 479 struct spdk_io_channel *ch, 480 struct spdk_nvmf_request *req); 481 482 /** 483 * Ends a zcopy operation 484 * 485 * \param req The NVMe-oF request 486 * \param commit Flag indicating whether the buffers should be committed 487 */ 488 void nvmf_bdev_ctrlr_zcopy_end(struct spdk_nvmf_request *req, bool commit); 489 490 #endif /* __NVMF_INTERNAL_H__ */ 491