1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (C) 2016 Intel Corporation. All rights reserved. 3 * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved. 4 * Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 5 */ 6 7 #ifndef __NVMF_INTERNAL_H__ 8 #define __NVMF_INTERNAL_H__ 9 10 #include "spdk/stdinc.h" 11 12 #include "spdk/likely.h" 13 #include "spdk/nvmf.h" 14 #include "spdk/nvmf_cmd.h" 15 #include "spdk/nvmf_transport.h" 16 #include "spdk/nvmf_spec.h" 17 #include "spdk/assert.h" 18 #include "spdk/bdev.h" 19 #include "spdk/queue.h" 20 #include "spdk/util.h" 21 #include "spdk/thread.h" 22 #include "spdk/tree.h" 23 24 /* The spec reserves cntlid values in the range FFF0h to FFFFh. */ 25 #define NVMF_MIN_CNTLID 1 26 #define NVMF_MAX_CNTLID 0xFFEF 27 28 enum spdk_nvmf_tgt_state { 29 NVMF_TGT_IDLE = 0, 30 NVMF_TGT_RUNNING, 31 NVMF_TGT_PAUSING, 32 NVMF_TGT_PAUSED, 33 NVMF_TGT_RESUMING, 34 }; 35 36 enum spdk_nvmf_subsystem_state { 37 SPDK_NVMF_SUBSYSTEM_INACTIVE = 0, 38 SPDK_NVMF_SUBSYSTEM_ACTIVATING, 39 SPDK_NVMF_SUBSYSTEM_ACTIVE, 40 SPDK_NVMF_SUBSYSTEM_PAUSING, 41 SPDK_NVMF_SUBSYSTEM_PAUSED, 42 SPDK_NVMF_SUBSYSTEM_RESUMING, 43 SPDK_NVMF_SUBSYSTEM_DEACTIVATING, 44 SPDK_NVMF_SUBSYSTEM_NUM_STATES, 45 }; 46 47 RB_HEAD(subsystem_tree, spdk_nvmf_subsystem); 48 49 struct spdk_nvmf_tgt { 50 char name[NVMF_TGT_NAME_MAX_LENGTH]; 51 52 pthread_mutex_t mutex; 53 54 uint64_t discovery_genctr; 55 56 uint32_t max_subsystems; 57 58 enum spdk_nvmf_tgt_discovery_filter discovery_filter; 59 60 enum spdk_nvmf_tgt_state state; 61 62 struct spdk_bit_array *subsystem_ids; 63 64 struct subsystem_tree subsystems; 65 66 TAILQ_HEAD(, spdk_nvmf_transport) transports; 67 TAILQ_HEAD(, spdk_nvmf_poll_group) poll_groups; 68 TAILQ_HEAD(, spdk_nvmf_referral) referrals; 69 70 /* Used for round-robin assignment of connections to poll groups */ 71 struct spdk_nvmf_poll_group *next_poll_group; 72 73 spdk_nvmf_tgt_destroy_done_fn *destroy_cb_fn; 74 void *destroy_cb_arg; 75 76 uint16_t crdt[3]; 77 uint16_t num_poll_groups; 78 79 TAILQ_ENTRY(spdk_nvmf_tgt) link; 80 }; 81 82 struct spdk_nvmf_host { 83 char nqn[SPDK_NVMF_NQN_MAX_LEN + 1]; 84 TAILQ_ENTRY(spdk_nvmf_host) link; 85 }; 86 87 struct spdk_nvmf_subsystem_listener { 88 struct spdk_nvmf_subsystem *subsystem; 89 spdk_nvmf_tgt_subsystem_listen_done_fn cb_fn; 90 void *cb_arg; 91 struct spdk_nvme_transport_id *trid; 92 struct spdk_nvmf_transport *transport; 93 enum spdk_nvme_ana_state *ana_state; 94 uint64_t ana_state_change_count; 95 uint16_t id; 96 struct spdk_nvmf_listener_opts opts; 97 TAILQ_ENTRY(spdk_nvmf_subsystem_listener) link; 98 }; 99 100 struct spdk_nvmf_referral { 101 /* Discovery Log Page Entry for this referral */ 102 struct spdk_nvmf_discovery_log_page_entry entry; 103 /* Transport ID */ 104 struct spdk_nvme_transport_id trid; 105 TAILQ_ENTRY(spdk_nvmf_referral) link; 106 }; 107 108 /* Maximum number of registrants supported per namespace */ 109 #define SPDK_NVMF_MAX_NUM_REGISTRANTS 16 110 111 struct spdk_nvmf_registrant_info { 112 uint64_t rkey; 113 char host_uuid[SPDK_UUID_STRING_LEN]; 114 }; 115 116 struct spdk_nvmf_reservation_info { 117 bool ptpl_activated; 118 enum spdk_nvme_reservation_type rtype; 119 uint64_t crkey; 120 char bdev_uuid[SPDK_UUID_STRING_LEN]; 121 char holder_uuid[SPDK_UUID_STRING_LEN]; 122 uint32_t num_regs; 123 struct spdk_nvmf_registrant_info registrants[SPDK_NVMF_MAX_NUM_REGISTRANTS]; 124 }; 125 126 struct spdk_nvmf_subsystem_pg_ns_info { 127 struct spdk_io_channel *channel; 128 struct spdk_uuid uuid; 129 /* current reservation key, no reservation if the value is 0 */ 130 uint64_t crkey; 131 /* reservation type */ 132 enum spdk_nvme_reservation_type rtype; 133 /* Host ID which holds the reservation */ 134 struct spdk_uuid holder_id; 135 /* Host ID for the registrants with the namespace */ 136 struct spdk_uuid reg_hostid[SPDK_NVMF_MAX_NUM_REGISTRANTS]; 137 uint64_t num_blocks; 138 139 /* I/O outstanding to this namespace */ 140 uint64_t io_outstanding; 141 enum spdk_nvmf_subsystem_state state; 142 }; 143 144 typedef void(*spdk_nvmf_poll_group_mod_done)(void *cb_arg, int status); 145 146 struct spdk_nvmf_subsystem_poll_group { 147 /* Array of namespace information for each namespace indexed by nsid - 1 */ 148 struct spdk_nvmf_subsystem_pg_ns_info *ns_info; 149 uint32_t num_ns; 150 enum spdk_nvmf_subsystem_state state; 151 152 /* Number of ADMIN and FABRICS requests outstanding */ 153 uint64_t mgmt_io_outstanding; 154 spdk_nvmf_poll_group_mod_done cb_fn; 155 void *cb_arg; 156 157 TAILQ_HEAD(, spdk_nvmf_request) queued; 158 }; 159 160 struct spdk_nvmf_registrant { 161 TAILQ_ENTRY(spdk_nvmf_registrant) link; 162 struct spdk_uuid hostid; 163 /* Registration key */ 164 uint64_t rkey; 165 }; 166 167 struct spdk_nvmf_ns { 168 uint32_t nsid; 169 uint32_t anagrpid; 170 struct spdk_nvmf_subsystem *subsystem; 171 struct spdk_bdev *bdev; 172 struct spdk_bdev_desc *desc; 173 struct spdk_nvmf_ns_opts opts; 174 /* reservation notification mask */ 175 uint32_t mask; 176 /* generation code */ 177 uint32_t gen; 178 /* registrants head */ 179 TAILQ_HEAD(, spdk_nvmf_registrant) registrants; 180 /* current reservation key */ 181 uint64_t crkey; 182 /* reservation type */ 183 enum spdk_nvme_reservation_type rtype; 184 /* current reservation holder, only valid if reservation type can only have one holder */ 185 struct spdk_nvmf_registrant *holder; 186 /* Persist Through Power Loss file which contains the persistent reservation */ 187 char *ptpl_file; 188 /* Persist Through Power Loss feature is enabled */ 189 bool ptpl_activated; 190 /* ZCOPY supported on bdev device */ 191 bool zcopy; 192 /* Command Set Identifier */ 193 enum spdk_nvme_csi csi; 194 }; 195 196 /* 197 * NVMf reservation notification log page. 198 */ 199 struct spdk_nvmf_reservation_log { 200 struct spdk_nvme_reservation_notification_log log; 201 TAILQ_ENTRY(spdk_nvmf_reservation_log) link; 202 struct spdk_nvmf_ctrlr *ctrlr; 203 }; 204 205 /* 206 * NVMf async event completion. 207 */ 208 struct spdk_nvmf_async_event_completion { 209 union spdk_nvme_async_event_completion event; 210 STAILQ_ENTRY(spdk_nvmf_async_event_completion) link; 211 }; 212 213 /* 214 * This structure represents an NVMe-oF controller, 215 * which is like a "session" in networking terms. 216 */ 217 struct spdk_nvmf_ctrlr { 218 uint16_t cntlid; 219 char hostnqn[SPDK_NVMF_NQN_MAX_LEN + 1]; 220 struct spdk_nvmf_subsystem *subsys; 221 222 struct spdk_nvmf_ctrlr_data cdata; 223 224 struct spdk_nvmf_registers vcprop; 225 226 struct spdk_nvmf_ctrlr_feat feat; 227 228 struct spdk_nvmf_qpair *admin_qpair; 229 struct spdk_thread *thread; 230 struct spdk_bit_array *qpair_mask; 231 232 const struct spdk_nvmf_subsystem_listener *listener; 233 234 struct spdk_nvmf_request *aer_req[SPDK_NVMF_MAX_ASYNC_EVENTS]; 235 STAILQ_HEAD(, spdk_nvmf_async_event_completion) async_events; 236 uint64_t notice_aen_mask; 237 uint8_t nr_aer_reqs; 238 struct spdk_uuid hostid; 239 240 uint32_t association_timeout; /* in milliseconds */ 241 uint16_t changed_ns_list_count; 242 struct spdk_nvme_ns_list changed_ns_list; 243 uint64_t log_page_count; 244 uint8_t num_avail_log_pages; 245 TAILQ_HEAD(log_page_head, spdk_nvmf_reservation_log) log_head; 246 247 /* Time to trigger keep-alive--poller_time = now_tick + period */ 248 uint64_t last_keep_alive_tick; 249 struct spdk_poller *keep_alive_poller; 250 251 struct spdk_poller *association_timer; 252 253 struct spdk_poller *cc_timer; 254 uint64_t cc_timeout_tsc; 255 struct spdk_poller *cc_timeout_timer; 256 257 bool dif_insert_or_strip; 258 bool in_destruct; 259 bool disconnect_in_progress; 260 /* valid only when disconnect_in_progress is true */ 261 bool disconnect_is_shn; 262 bool acre_enabled; 263 bool dynamic_ctrlr; 264 265 TAILQ_ENTRY(spdk_nvmf_ctrlr) link; 266 }; 267 268 #define NVMF_MAX_LISTENERS_PER_SUBSYSTEM 16 269 270 struct spdk_nvmf_subsystem { 271 struct spdk_thread *thread; 272 273 uint32_t id; 274 275 enum spdk_nvmf_subsystem_state state; 276 enum spdk_nvmf_subtype subtype; 277 278 uint16_t next_cntlid; 279 struct { 280 uint8_t allow_any_host : 1; 281 uint8_t allow_any_listener : 1; 282 uint8_t ana_reporting : 1; 283 uint8_t reserved : 5; 284 } flags; 285 286 /* boolean for state change synchronization */ 287 bool changing_state; 288 289 bool destroying; 290 bool async_destroy; 291 292 /* Zoned storage related fields */ 293 bool zone_append_supported; 294 uint64_t max_zone_append_size_kib; 295 296 struct spdk_nvmf_tgt *tgt; 297 RB_ENTRY(spdk_nvmf_subsystem) link; 298 299 /* Array of pointers to namespaces of size max_nsid indexed by nsid - 1 */ 300 struct spdk_nvmf_ns **ns; 301 uint32_t max_nsid; 302 303 uint16_t min_cntlid; 304 uint16_t max_cntlid; 305 306 TAILQ_HEAD(, spdk_nvmf_ctrlr) ctrlrs; 307 308 /* A mutex used to protect the hosts list and allow_any_host flag. Unlike the namespace 309 * array, this list is not used on the I/O path (it's needed for handling things like 310 * the CONNECT command), so use a mutex to protect it instead of requiring the subsystem 311 * state to be paused. This removes the requirement to pause the subsystem when hosts 312 * are added or removed dynamically. */ 313 pthread_mutex_t mutex; 314 TAILQ_HEAD(, spdk_nvmf_host) hosts; 315 TAILQ_HEAD(, spdk_nvmf_subsystem_listener) listeners; 316 struct spdk_bit_array *used_listener_ids; 317 318 TAILQ_ENTRY(spdk_nvmf_subsystem) entries; 319 320 nvmf_subsystem_destroy_cb async_destroy_cb; 321 void *async_destroy_cb_arg; 322 323 char sn[SPDK_NVME_CTRLR_SN_LEN + 1]; 324 char mn[SPDK_NVME_CTRLR_MN_LEN + 1]; 325 char subnqn[SPDK_NVMF_NQN_MAX_LEN + 1]; 326 327 /* Array of namespace count per ANA group of size max_nsid indexed anagrpid - 1 328 * It will be enough for ANA group to use the same size as namespaces. 329 */ 330 uint32_t *ana_group; 331 }; 332 333 static int 334 subsystem_cmp(struct spdk_nvmf_subsystem *subsystem1, struct spdk_nvmf_subsystem *subsystem2) 335 { 336 return strncmp(subsystem1->subnqn, subsystem2->subnqn, sizeof(subsystem1->subnqn)); 337 } 338 339 RB_GENERATE_STATIC(subsystem_tree, spdk_nvmf_subsystem, link, subsystem_cmp); 340 341 int nvmf_poll_group_update_subsystem(struct spdk_nvmf_poll_group *group, 342 struct spdk_nvmf_subsystem *subsystem); 343 int nvmf_poll_group_add_subsystem(struct spdk_nvmf_poll_group *group, 344 struct spdk_nvmf_subsystem *subsystem, 345 spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg); 346 void nvmf_poll_group_remove_subsystem(struct spdk_nvmf_poll_group *group, 347 struct spdk_nvmf_subsystem *subsystem, spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg); 348 void nvmf_poll_group_pause_subsystem(struct spdk_nvmf_poll_group *group, 349 struct spdk_nvmf_subsystem *subsystem, 350 uint32_t nsid, 351 spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg); 352 void nvmf_poll_group_resume_subsystem(struct spdk_nvmf_poll_group *group, 353 struct spdk_nvmf_subsystem *subsystem, spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg); 354 355 void nvmf_update_discovery_log(struct spdk_nvmf_tgt *tgt, const char *hostnqn); 356 void nvmf_get_discovery_log_page(struct spdk_nvmf_tgt *tgt, const char *hostnqn, struct iovec *iov, 357 uint32_t iovcnt, uint64_t offset, uint32_t length, 358 struct spdk_nvme_transport_id *cmd_source_trid); 359 360 void nvmf_ctrlr_destruct(struct spdk_nvmf_ctrlr *ctrlr); 361 int nvmf_ctrlr_process_admin_cmd(struct spdk_nvmf_request *req); 362 int nvmf_ctrlr_process_io_cmd(struct spdk_nvmf_request *req); 363 bool nvmf_ctrlr_dsm_supported(struct spdk_nvmf_ctrlr *ctrlr); 364 bool nvmf_ctrlr_write_zeroes_supported(struct spdk_nvmf_ctrlr *ctrlr); 365 bool nvmf_ctrlr_copy_supported(struct spdk_nvmf_ctrlr *ctrlr); 366 void nvmf_ctrlr_ns_changed(struct spdk_nvmf_ctrlr *ctrlr, uint32_t nsid); 367 bool nvmf_ctrlr_use_zcopy(struct spdk_nvmf_request *req); 368 369 void nvmf_bdev_ctrlr_identify_ns(struct spdk_nvmf_ns *ns, struct spdk_nvme_ns_data *nsdata, 370 bool dif_insert_or_strip); 371 int nvmf_bdev_ctrlr_read_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, 372 struct spdk_io_channel *ch, struct spdk_nvmf_request *req); 373 int nvmf_bdev_ctrlr_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, 374 struct spdk_io_channel *ch, struct spdk_nvmf_request *req); 375 int nvmf_bdev_ctrlr_compare_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, 376 struct spdk_io_channel *ch, struct spdk_nvmf_request *req); 377 int nvmf_bdev_ctrlr_compare_and_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, 378 struct spdk_io_channel *ch, struct spdk_nvmf_request *cmp_req, struct spdk_nvmf_request *write_req); 379 int nvmf_bdev_ctrlr_write_zeroes_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, 380 struct spdk_io_channel *ch, struct spdk_nvmf_request *req); 381 int nvmf_bdev_ctrlr_flush_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, 382 struct spdk_io_channel *ch, struct spdk_nvmf_request *req); 383 int nvmf_bdev_ctrlr_dsm_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, 384 struct spdk_io_channel *ch, struct spdk_nvmf_request *req); 385 int nvmf_bdev_ctrlr_copy_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, 386 struct spdk_io_channel *ch, struct spdk_nvmf_request *req); 387 int nvmf_bdev_ctrlr_nvme_passthru_io(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, 388 struct spdk_io_channel *ch, struct spdk_nvmf_request *req); 389 bool nvmf_bdev_ctrlr_get_dif_ctx(struct spdk_bdev *bdev, struct spdk_nvme_cmd *cmd, 390 struct spdk_dif_ctx *dif_ctx); 391 bool nvmf_bdev_zcopy_enabled(struct spdk_bdev *bdev); 392 393 int nvmf_subsystem_add_ctrlr(struct spdk_nvmf_subsystem *subsystem, 394 struct spdk_nvmf_ctrlr *ctrlr); 395 void nvmf_subsystem_remove_ctrlr(struct spdk_nvmf_subsystem *subsystem, 396 struct spdk_nvmf_ctrlr *ctrlr); 397 void nvmf_subsystem_remove_all_listeners(struct spdk_nvmf_subsystem *subsystem, 398 bool stop); 399 struct spdk_nvmf_ctrlr *nvmf_subsystem_get_ctrlr(struct spdk_nvmf_subsystem *subsystem, 400 uint16_t cntlid); 401 struct spdk_nvmf_subsystem_listener *nvmf_subsystem_find_listener( 402 struct spdk_nvmf_subsystem *subsystem, 403 const struct spdk_nvme_transport_id *trid); 404 struct spdk_nvmf_listener *nvmf_transport_find_listener( 405 struct spdk_nvmf_transport *transport, 406 const struct spdk_nvme_transport_id *trid); 407 void nvmf_transport_dump_opts(struct spdk_nvmf_transport *transport, struct spdk_json_write_ctx *w, 408 bool named); 409 void nvmf_transport_listen_dump_trid(const struct spdk_nvme_transport_id *trid, 410 struct spdk_json_write_ctx *w); 411 void nvmf_subsystem_set_ana_state(struct spdk_nvmf_subsystem *subsystem, 412 const struct spdk_nvme_transport_id *trid, 413 enum spdk_nvme_ana_state ana_state, uint32_t anagrpid, 414 spdk_nvmf_tgt_subsystem_listen_done_fn cb_fn, void *cb_arg); 415 bool nvmf_subsystem_get_ana_reporting(struct spdk_nvmf_subsystem *subsystem); 416 417 /** 418 * Sets the controller ID range for a subsystem. 419 * Valid range is [1, 0xFFEF]. 420 * 421 * May only be performed on subsystems in the INACTIVE state. 422 * 423 * \param subsystem Subsystem to modify. 424 * \param min_cntlid Minimum controller ID. 425 * \param max_cntlid Maximum controller ID. 426 * 427 * \return 0 on success, or negated errno value on failure. 428 */ 429 int nvmf_subsystem_set_cntlid_range(struct spdk_nvmf_subsystem *subsystem, 430 uint16_t min_cntlid, uint16_t max_cntlid); 431 432 int nvmf_ctrlr_async_event_ns_notice(struct spdk_nvmf_ctrlr *ctrlr); 433 int nvmf_ctrlr_async_event_ana_change_notice(struct spdk_nvmf_ctrlr *ctrlr); 434 void nvmf_ctrlr_async_event_discovery_log_change_notice(void *ctx); 435 void nvmf_ctrlr_async_event_reservation_notification(struct spdk_nvmf_ctrlr *ctrlr); 436 437 void nvmf_ns_reservation_request(void *ctx); 438 void nvmf_ctrlr_reservation_notice_log(struct spdk_nvmf_ctrlr *ctrlr, 439 struct spdk_nvmf_ns *ns, 440 enum spdk_nvme_reservation_notification_log_page_type type); 441 442 443 /* 444 * Abort zero-copy requests that already got the buffer (received zcopy_start cb), but haven't 445 * started zcopy_end. These requests are kept on the outstanding queue, but are not waiting for a 446 * completion from the bdev layer, so, when a qpair is being disconnected, we need to kick them to 447 * force their completion. 448 */ 449 void nvmf_qpair_abort_pending_zcopy_reqs(struct spdk_nvmf_qpair *qpair); 450 451 /* 452 * Free aer simply frees the rdma resources for the aer without informing the host. 453 * This function should be called when deleting a qpair when one wants to make sure 454 * the qpair is completely empty before freeing the request. The reason we free the 455 * AER without sending a completion is to prevent the host from sending another AER. 456 */ 457 void nvmf_qpair_free_aer(struct spdk_nvmf_qpair *qpair); 458 459 int nvmf_ctrlr_abort_request(struct spdk_nvmf_request *req); 460 461 void nvmf_ctrlr_set_fatal_status(struct spdk_nvmf_ctrlr *ctrlr); 462 463 static inline struct spdk_nvmf_ns * 464 _nvmf_subsystem_get_ns(struct spdk_nvmf_subsystem *subsystem, uint32_t nsid) 465 { 466 /* NOTE: This implicitly also checks for 0, since 0 - 1 wraps around to UINT32_MAX. */ 467 if (spdk_unlikely(nsid - 1 >= subsystem->max_nsid)) { 468 return NULL; 469 } 470 471 return subsystem->ns[nsid - 1]; 472 } 473 474 static inline bool 475 nvmf_qpair_is_admin_queue(struct spdk_nvmf_qpair *qpair) 476 { 477 return qpair->qid == 0; 478 } 479 480 /* 481 * Tests whether a given string represents a valid NQN. 482 */ 483 bool nvmf_nqn_is_valid(const char *nqn); 484 485 /** 486 * Initiates a zcopy start operation 487 * 488 * \param bdev The \ref spdk_bdev 489 * \param desc The \ref spdk_bdev_desc 490 * \param ch The \ref spdk_io_channel 491 * \param req The \ref spdk_nvmf_request passed to the bdev for processing 492 * 493 * \return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE if the command was completed immediately or 494 * SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS if the command was submitted and will be 495 * completed asynchronously. Asynchronous completions are notified through 496 * spdk_nvmf_request_complete(). 497 */ 498 int nvmf_bdev_ctrlr_zcopy_start(struct spdk_bdev *bdev, 499 struct spdk_bdev_desc *desc, 500 struct spdk_io_channel *ch, 501 struct spdk_nvmf_request *req); 502 503 /** 504 * Ends a zcopy operation 505 * 506 * \param req The NVMe-oF request 507 * \param commit Flag indicating whether the buffers should be committed 508 */ 509 void nvmf_bdev_ctrlr_zcopy_end(struct spdk_nvmf_request *req, bool commit); 510 511 #endif /* __NVMF_INTERNAL_H__ */ 512