1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2016 Intel Corporation. All rights reserved. 3 * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved. 4 * Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 5 */ 6 7 #ifndef __NVMF_INTERNAL_H__ 8 #define __NVMF_INTERNAL_H__ 9 10 #include "spdk/stdinc.h" 11 12 #include "spdk/likely.h" 13 #include "spdk/nvmf.h" 14 #include "spdk/nvmf_cmd.h" 15 #include "spdk/nvmf_transport.h" 16 #include "spdk/nvmf_spec.h" 17 #include "spdk/assert.h" 18 #include "spdk/bdev.h" 19 #include "spdk/queue.h" 20 #include "spdk/util.h" 21 #include "spdk/thread.h" 22 23 /* The spec reserves cntlid values in the range FFF0h to FFFFh. */ 24 #define NVMF_MIN_CNTLID 1 25 #define NVMF_MAX_CNTLID 0xFFEF 26 27 enum spdk_nvmf_subsystem_state { 28 SPDK_NVMF_SUBSYSTEM_INACTIVE = 0, 29 SPDK_NVMF_SUBSYSTEM_ACTIVATING, 30 SPDK_NVMF_SUBSYSTEM_ACTIVE, 31 SPDK_NVMF_SUBSYSTEM_PAUSING, 32 SPDK_NVMF_SUBSYSTEM_PAUSED, 33 SPDK_NVMF_SUBSYSTEM_RESUMING, 34 SPDK_NVMF_SUBSYSTEM_DEACTIVATING, 35 SPDK_NVMF_SUBSYSTEM_NUM_STATES, 36 }; 37 38 struct spdk_nvmf_tgt { 39 char name[NVMF_TGT_NAME_MAX_LENGTH]; 40 41 pthread_mutex_t mutex; 42 43 uint64_t discovery_genctr; 44 45 uint32_t max_subsystems; 46 47 enum spdk_nvmf_tgt_discovery_filter discovery_filter; 48 49 /* Array of subsystem pointers of size max_subsystems indexed by sid */ 50 struct spdk_nvmf_subsystem **subsystems; 51 52 TAILQ_HEAD(, spdk_nvmf_transport) transports; 53 TAILQ_HEAD(, spdk_nvmf_poll_group) poll_groups; 54 55 /* Used for round-robin assignment of connections to poll groups */ 56 struct spdk_nvmf_poll_group *next_poll_group; 57 58 spdk_nvmf_tgt_destroy_done_fn *destroy_cb_fn; 59 void *destroy_cb_arg; 60 61 uint16_t crdt[3]; 62 63 TAILQ_ENTRY(spdk_nvmf_tgt) link; 64 }; 65 66 struct spdk_nvmf_host { 67 char nqn[SPDK_NVMF_NQN_MAX_LEN + 1]; 68 TAILQ_ENTRY(spdk_nvmf_host) link; 69 }; 70 71 struct spdk_nvmf_subsystem_listener { 72 struct spdk_nvmf_subsystem *subsystem; 73 spdk_nvmf_tgt_subsystem_listen_done_fn cb_fn; 74 void *cb_arg; 75 struct spdk_nvme_transport_id *trid; 76 struct spdk_nvmf_transport *transport; 77 enum spdk_nvme_ana_state *ana_state; 78 uint64_t ana_state_change_count; 79 uint16_t id; 80 TAILQ_ENTRY(spdk_nvmf_subsystem_listener) link; 81 }; 82 83 /* Maximum number of registrants supported per namespace */ 84 #define SPDK_NVMF_MAX_NUM_REGISTRANTS 16 85 86 struct spdk_nvmf_registrant_info { 87 uint64_t rkey; 88 char host_uuid[SPDK_UUID_STRING_LEN]; 89 }; 90 91 struct spdk_nvmf_reservation_info { 92 bool ptpl_activated; 93 enum spdk_nvme_reservation_type rtype; 94 uint64_t crkey; 95 char bdev_uuid[SPDK_UUID_STRING_LEN]; 96 char holder_uuid[SPDK_UUID_STRING_LEN]; 97 uint32_t num_regs; 98 struct spdk_nvmf_registrant_info registrants[SPDK_NVMF_MAX_NUM_REGISTRANTS]; 99 }; 100 101 struct spdk_nvmf_subsystem_pg_ns_info { 102 struct spdk_io_channel *channel; 103 struct spdk_uuid uuid; 104 /* current reservation key, no reservation if the value is 0 */ 105 uint64_t crkey; 106 /* reservation type */ 107 enum spdk_nvme_reservation_type rtype; 108 /* Host ID which holds the reservation */ 109 struct spdk_uuid holder_id; 110 /* Host ID for the registrants with the namespace */ 111 struct spdk_uuid reg_hostid[SPDK_NVMF_MAX_NUM_REGISTRANTS]; 112 uint64_t num_blocks; 113 114 /* I/O outstanding to this namespace */ 115 uint64_t io_outstanding; 116 enum spdk_nvmf_subsystem_state state; 117 }; 118 119 typedef void(*spdk_nvmf_poll_group_mod_done)(void *cb_arg, int status); 120 121 struct spdk_nvmf_subsystem_poll_group { 122 /* Array of namespace information for each namespace indexed by nsid - 1 */ 123 struct spdk_nvmf_subsystem_pg_ns_info *ns_info; 124 uint32_t num_ns; 125 126 /* Number of ADMIN and FABRICS requests outstanding */ 127 uint64_t mgmt_io_outstanding; 128 spdk_nvmf_poll_group_mod_done cb_fn; 129 void *cb_arg; 130 131 enum spdk_nvmf_subsystem_state state; 132 133 TAILQ_HEAD(, spdk_nvmf_request) queued; 134 }; 135 136 struct spdk_nvmf_registrant { 137 TAILQ_ENTRY(spdk_nvmf_registrant) link; 138 struct spdk_uuid hostid; 139 /* Registration key */ 140 uint64_t rkey; 141 }; 142 143 struct spdk_nvmf_ns { 144 uint32_t nsid; 145 uint32_t anagrpid; 146 struct spdk_nvmf_subsystem *subsystem; 147 struct spdk_bdev *bdev; 148 struct spdk_bdev_desc *desc; 149 struct spdk_nvmf_ns_opts opts; 150 /* reservation notification mask */ 151 uint32_t mask; 152 /* generation code */ 153 uint32_t gen; 154 /* registrants head */ 155 TAILQ_HEAD(, spdk_nvmf_registrant) registrants; 156 /* current reservation key */ 157 uint64_t crkey; 158 /* reservation type */ 159 enum spdk_nvme_reservation_type rtype; 160 /* current reservation holder, only valid if reservation type can only have one holder */ 161 struct spdk_nvmf_registrant *holder; 162 /* Persist Through Power Loss file which contains the persistent reservation */ 163 char *ptpl_file; 164 /* Persist Through Power Loss feature is enabled */ 165 bool ptpl_activated; 166 /* ZCOPY supported on bdev device */ 167 bool zcopy; 168 }; 169 170 /* 171 * NVMf reservation notification log page. 172 */ 173 struct spdk_nvmf_reservation_log { 174 struct spdk_nvme_reservation_notification_log log; 175 TAILQ_ENTRY(spdk_nvmf_reservation_log) link; 176 struct spdk_nvmf_ctrlr *ctrlr; 177 }; 178 179 /* 180 * NVMf async event completion. 181 */ 182 struct spdk_nvmf_async_event_completion { 183 union spdk_nvme_async_event_completion event; 184 STAILQ_ENTRY(spdk_nvmf_async_event_completion) link; 185 }; 186 187 /* 188 * This structure represents an NVMe-oF controller, 189 * which is like a "session" in networking terms. 190 */ 191 struct spdk_nvmf_ctrlr { 192 uint16_t cntlid; 193 char hostnqn[SPDK_NVMF_NQN_MAX_LEN + 1]; 194 struct spdk_nvmf_subsystem *subsys; 195 196 struct spdk_nvmf_ctrlr_data cdata; 197 198 struct spdk_nvmf_registers vcprop; 199 200 struct spdk_nvmf_ctrlr_feat feat; 201 202 struct spdk_nvmf_qpair *admin_qpair; 203 struct spdk_thread *thread; 204 struct spdk_bit_array *qpair_mask; 205 206 const struct spdk_nvmf_subsystem_listener *listener; 207 208 struct spdk_nvmf_request *aer_req[SPDK_NVMF_MAX_ASYNC_EVENTS]; 209 STAILQ_HEAD(, spdk_nvmf_async_event_completion) async_events; 210 uint64_t notice_aen_mask; 211 uint8_t nr_aer_reqs; 212 struct spdk_uuid hostid; 213 214 uint32_t association_timeout; /* in milliseconds */ 215 uint16_t changed_ns_list_count; 216 struct spdk_nvme_ns_list changed_ns_list; 217 uint64_t log_page_count; 218 uint8_t num_avail_log_pages; 219 TAILQ_HEAD(log_page_head, spdk_nvmf_reservation_log) log_head; 220 221 /* Time to trigger keep-alive--poller_time = now_tick + period */ 222 uint64_t last_keep_alive_tick; 223 struct spdk_poller *keep_alive_poller; 224 225 struct spdk_poller *association_timer; 226 227 struct spdk_poller *cc_timer; 228 uint64_t cc_timeout_tsc; 229 struct spdk_poller *cc_timeout_timer; 230 231 bool dif_insert_or_strip; 232 bool in_destruct; 233 bool disconnect_in_progress; 234 /* valid only when disconnect_in_progress is true */ 235 bool disconnect_is_shn; 236 bool acre_enabled; 237 bool dynamic_ctrlr; 238 239 TAILQ_ENTRY(spdk_nvmf_ctrlr) link; 240 }; 241 242 #define NVMF_MAX_LISTENERS_PER_SUBSYSTEM 16 243 244 struct spdk_nvmf_subsystem { 245 struct spdk_thread *thread; 246 247 uint32_t id; 248 249 enum spdk_nvmf_subsystem_state state; 250 enum spdk_nvmf_subtype subtype; 251 252 uint16_t next_cntlid; 253 struct { 254 uint8_t allow_any_host : 1; 255 uint8_t allow_any_listener : 1; 256 uint8_t ana_reporting : 1; 257 uint8_t reserved : 5; 258 } flags; 259 260 /* boolean for state change synchronization */ 261 bool changing_state; 262 263 bool destroying; 264 bool async_destroy; 265 266 struct spdk_nvmf_tgt *tgt; 267 268 /* Array of pointers to namespaces of size max_nsid indexed by nsid - 1 */ 269 struct spdk_nvmf_ns **ns; 270 uint32_t max_nsid; 271 272 uint16_t min_cntlid; 273 uint16_t max_cntlid; 274 275 TAILQ_HEAD(, spdk_nvmf_ctrlr) ctrlrs; 276 277 /* A mutex used to protect the hosts list and allow_any_host flag. Unlike the namespace 278 * array, this list is not used on the I/O path (it's needed for handling things like 279 * the CONNECT command), so use a mutex to protect it instead of requiring the subsystem 280 * state to be paused. This removes the requirement to pause the subsystem when hosts 281 * are added or removed dynamically. */ 282 pthread_mutex_t mutex; 283 TAILQ_HEAD(, spdk_nvmf_host) hosts; 284 TAILQ_HEAD(, spdk_nvmf_subsystem_listener) listeners; 285 struct spdk_bit_array *used_listener_ids; 286 287 TAILQ_ENTRY(spdk_nvmf_subsystem) entries; 288 289 nvmf_subsystem_destroy_cb async_destroy_cb; 290 void *async_destroy_cb_arg; 291 292 char sn[SPDK_NVME_CTRLR_SN_LEN + 1]; 293 char mn[SPDK_NVME_CTRLR_MN_LEN + 1]; 294 char subnqn[SPDK_NVMF_NQN_MAX_LEN + 1]; 295 296 /* Array of namespace count per ANA group of size max_nsid indexed anagrpid - 1 297 * It will be enough for ANA group to use the same size as namespaces. 298 */ 299 uint32_t *ana_group; 300 }; 301 302 int nvmf_poll_group_update_subsystem(struct spdk_nvmf_poll_group *group, 303 struct spdk_nvmf_subsystem *subsystem); 304 int nvmf_poll_group_add_subsystem(struct spdk_nvmf_poll_group *group, 305 struct spdk_nvmf_subsystem *subsystem, 306 spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg); 307 void nvmf_poll_group_remove_subsystem(struct spdk_nvmf_poll_group *group, 308 struct spdk_nvmf_subsystem *subsystem, spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg); 309 void nvmf_poll_group_pause_subsystem(struct spdk_nvmf_poll_group *group, 310 struct spdk_nvmf_subsystem *subsystem, 311 uint32_t nsid, 312 spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg); 313 void nvmf_poll_group_resume_subsystem(struct spdk_nvmf_poll_group *group, 314 struct spdk_nvmf_subsystem *subsystem, spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg); 315 316 void nvmf_update_discovery_log(struct spdk_nvmf_tgt *tgt, const char *hostnqn); 317 void nvmf_get_discovery_log_page(struct spdk_nvmf_tgt *tgt, const char *hostnqn, struct iovec *iov, 318 uint32_t iovcnt, uint64_t offset, uint32_t length, 319 struct spdk_nvme_transport_id *cmd_source_trid); 320 321 void nvmf_ctrlr_destruct(struct spdk_nvmf_ctrlr *ctrlr); 322 int nvmf_ctrlr_process_admin_cmd(struct spdk_nvmf_request *req); 323 int nvmf_ctrlr_process_io_cmd(struct spdk_nvmf_request *req); 324 bool nvmf_ctrlr_dsm_supported(struct spdk_nvmf_ctrlr *ctrlr); 325 bool nvmf_ctrlr_write_zeroes_supported(struct spdk_nvmf_ctrlr *ctrlr); 326 bool nvmf_ctrlr_copy_supported(struct spdk_nvmf_ctrlr *ctrlr); 327 void nvmf_ctrlr_ns_changed(struct spdk_nvmf_ctrlr *ctrlr, uint32_t nsid); 328 bool nvmf_ctrlr_use_zcopy(struct spdk_nvmf_request *req); 329 330 void nvmf_bdev_ctrlr_identify_ns(struct spdk_nvmf_ns *ns, struct spdk_nvme_ns_data *nsdata, 331 bool dif_insert_or_strip); 332 int nvmf_bdev_ctrlr_read_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, 333 struct spdk_io_channel *ch, struct spdk_nvmf_request *req); 334 int nvmf_bdev_ctrlr_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, 335 struct spdk_io_channel *ch, struct spdk_nvmf_request *req); 336 int nvmf_bdev_ctrlr_compare_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, 337 struct spdk_io_channel *ch, struct spdk_nvmf_request *req); 338 int nvmf_bdev_ctrlr_compare_and_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, 339 struct spdk_io_channel *ch, struct spdk_nvmf_request *cmp_req, struct spdk_nvmf_request *write_req); 340 int nvmf_bdev_ctrlr_write_zeroes_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, 341 struct spdk_io_channel *ch, struct spdk_nvmf_request *req); 342 int nvmf_bdev_ctrlr_flush_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, 343 struct spdk_io_channel *ch, struct spdk_nvmf_request *req); 344 int nvmf_bdev_ctrlr_dsm_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, 345 struct spdk_io_channel *ch, struct spdk_nvmf_request *req); 346 int nvmf_bdev_ctrlr_copy_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, 347 struct spdk_io_channel *ch, struct spdk_nvmf_request *req); 348 int nvmf_bdev_ctrlr_nvme_passthru_io(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, 349 struct spdk_io_channel *ch, struct spdk_nvmf_request *req); 350 bool nvmf_bdev_ctrlr_get_dif_ctx(struct spdk_bdev *bdev, struct spdk_nvme_cmd *cmd, 351 struct spdk_dif_ctx *dif_ctx); 352 bool nvmf_bdev_zcopy_enabled(struct spdk_bdev *bdev); 353 354 int nvmf_subsystem_add_ctrlr(struct spdk_nvmf_subsystem *subsystem, 355 struct spdk_nvmf_ctrlr *ctrlr); 356 void nvmf_subsystem_remove_ctrlr(struct spdk_nvmf_subsystem *subsystem, 357 struct spdk_nvmf_ctrlr *ctrlr); 358 void nvmf_subsystem_remove_all_listeners(struct spdk_nvmf_subsystem *subsystem, 359 bool stop); 360 struct spdk_nvmf_ctrlr *nvmf_subsystem_get_ctrlr(struct spdk_nvmf_subsystem *subsystem, 361 uint16_t cntlid); 362 struct spdk_nvmf_subsystem_listener *nvmf_subsystem_find_listener( 363 struct spdk_nvmf_subsystem *subsystem, 364 const struct spdk_nvme_transport_id *trid); 365 struct spdk_nvmf_listener *nvmf_transport_find_listener( 366 struct spdk_nvmf_transport *transport, 367 const struct spdk_nvme_transport_id *trid); 368 void nvmf_transport_dump_opts(struct spdk_nvmf_transport *transport, struct spdk_json_write_ctx *w, 369 bool named); 370 void nvmf_transport_listen_dump_opts(struct spdk_nvmf_transport *transport, 371 const struct spdk_nvme_transport_id *trid, struct spdk_json_write_ctx *w); 372 void nvmf_subsystem_set_ana_state(struct spdk_nvmf_subsystem *subsystem, 373 const struct spdk_nvme_transport_id *trid, 374 enum spdk_nvme_ana_state ana_state, uint32_t anagrpid, 375 spdk_nvmf_tgt_subsystem_listen_done_fn cb_fn, void *cb_arg); 376 bool nvmf_subsystem_get_ana_reporting(struct spdk_nvmf_subsystem *subsystem); 377 378 /** 379 * Sets the controller ID range for a subsystem. 380 * Valid range is [1, 0xFFEF]. 381 * 382 * May only be performed on subsystems in the INACTIVE state. 383 * 384 * \param subsystem Subsystem to modify. 385 * \param min_cntlid Minimum controller ID. 386 * \param max_cntlid Maximum controller ID. 387 * 388 * \return 0 on success, or negated errno value on failure. 389 */ 390 int nvmf_subsystem_set_cntlid_range(struct spdk_nvmf_subsystem *subsystem, 391 uint16_t min_cntlid, uint16_t max_cntlid); 392 393 int nvmf_ctrlr_async_event_ns_notice(struct spdk_nvmf_ctrlr *ctrlr); 394 int nvmf_ctrlr_async_event_ana_change_notice(struct spdk_nvmf_ctrlr *ctrlr); 395 void nvmf_ctrlr_async_event_discovery_log_change_notice(void *ctx); 396 void nvmf_ctrlr_async_event_reservation_notification(struct spdk_nvmf_ctrlr *ctrlr); 397 int nvmf_ctrlr_async_event_error_event(struct spdk_nvmf_ctrlr *ctrlr, 398 union spdk_nvme_async_event_completion event); 399 void nvmf_ns_reservation_request(void *ctx); 400 void nvmf_ctrlr_reservation_notice_log(struct spdk_nvmf_ctrlr *ctrlr, 401 struct spdk_nvmf_ns *ns, 402 enum spdk_nvme_reservation_notification_log_page_type type); 403 404 /* 405 * Abort aer is sent on a per controller basis and sends a completion for the aer to the host. 406 * This function should be called when attempting to recover in error paths when it is OK for 407 * the host to send a subsequent AER. 408 */ 409 void nvmf_ctrlr_abort_aer(struct spdk_nvmf_ctrlr *ctrlr); 410 411 /* 412 * Abort zero-copy requests that already got the buffer (received zcopy_start cb), but haven't 413 * started zcopy_end. These requests are kept on the outstanding queue, but are not waiting for a 414 * completion from the bdev layer, so, when a qpair is being disconnected, we need to kick them to 415 * force their completion. 416 */ 417 void nvmf_qpair_abort_pending_zcopy_reqs(struct spdk_nvmf_qpair *qpair); 418 419 /* 420 * Free aer simply frees the rdma resources for the aer without informing the host. 421 * This function should be called when deleting a qpair when one wants to make sure 422 * the qpair is completely empty before freeing the request. The reason we free the 423 * AER without sending a completion is to prevent the host from sending another AER. 424 */ 425 void nvmf_qpair_free_aer(struct spdk_nvmf_qpair *qpair); 426 427 int nvmf_ctrlr_abort_request(struct spdk_nvmf_request *req); 428 429 void nvmf_ctrlr_set_fatal_status(struct spdk_nvmf_ctrlr *ctrlr); 430 431 static inline struct spdk_nvmf_ns * 432 _nvmf_subsystem_get_ns(struct spdk_nvmf_subsystem *subsystem, uint32_t nsid) 433 { 434 /* NOTE: This implicitly also checks for 0, since 0 - 1 wraps around to UINT32_MAX. */ 435 if (spdk_unlikely(nsid - 1 >= subsystem->max_nsid)) { 436 return NULL; 437 } 438 439 return subsystem->ns[nsid - 1]; 440 } 441 442 static inline bool 443 nvmf_qpair_is_admin_queue(struct spdk_nvmf_qpair *qpair) 444 { 445 return qpair->qid == 0; 446 } 447 448 /** 449 * Initiates a zcopy start operation 450 * 451 * \param bdev The \ref spdk_bdev 452 * \param desc The \ref spdk_bdev_desc 453 * \param ch The \ref spdk_io_channel 454 * \param req The \ref spdk_nvmf_request passed to the bdev for processing 455 * 456 * \return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE if the command was completed immediately or 457 * SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS if the command was submitted and will be 458 * completed asynchronously. Asynchronous completions are notified through 459 * spdk_nvmf_request_complete(). 460 */ 461 int nvmf_bdev_ctrlr_zcopy_start(struct spdk_bdev *bdev, 462 struct spdk_bdev_desc *desc, 463 struct spdk_io_channel *ch, 464 struct spdk_nvmf_request *req); 465 466 /** 467 * Ends a zcopy operation 468 * 469 * \param req The NVMe-oF request 470 * \param commit Flag indicating whether the buffers should be committed 471 */ 472 void nvmf_bdev_ctrlr_zcopy_end(struct spdk_nvmf_request *req, bool commit); 473 474 #endif /* __NVMF_INTERNAL_H__ */ 475