1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. All rights reserved. 5 * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #ifndef __NVMF_INTERNAL_H__ 35 #define __NVMF_INTERNAL_H__ 36 37 #include "spdk/stdinc.h" 38 39 #include "spdk/likely.h" 40 #include "spdk/nvmf.h" 41 #include "spdk/nvmf_cmd.h" 42 #include "spdk/nvmf_transport.h" 43 #include "spdk/nvmf_spec.h" 44 #include "spdk/assert.h" 45 #include "spdk/bdev.h" 46 #include "spdk/queue.h" 47 #include "spdk/util.h" 48 #include "spdk/thread.h" 49 50 #define NVMF_MAX_ASYNC_EVENTS (4) 51 52 /* The spec reserves cntlid values in the range FFF0h to FFFFh. */ 53 #define NVMF_MIN_CNTLID 1 54 #define NVMF_MAX_CNTLID 0xFFEF 55 56 enum spdk_nvmf_subsystem_state { 57 SPDK_NVMF_SUBSYSTEM_INACTIVE = 0, 58 SPDK_NVMF_SUBSYSTEM_ACTIVATING, 59 SPDK_NVMF_SUBSYSTEM_ACTIVE, 60 SPDK_NVMF_SUBSYSTEM_PAUSING, 61 SPDK_NVMF_SUBSYSTEM_PAUSED, 62 SPDK_NVMF_SUBSYSTEM_RESUMING, 63 SPDK_NVMF_SUBSYSTEM_DEACTIVATING, 64 SPDK_NVMF_SUBSYSTEM_NUM_STATES, 65 }; 66 67 struct spdk_nvmf_tgt { 68 char name[NVMF_TGT_NAME_MAX_LENGTH]; 69 70 pthread_mutex_t mutex; 71 72 uint64_t discovery_genctr; 73 74 struct spdk_poller *accept_poller; 75 76 uint32_t max_subsystems; 77 78 /* Array of subsystem pointers of size max_subsystems indexed by sid */ 79 struct spdk_nvmf_subsystem **subsystems; 80 81 TAILQ_HEAD(, spdk_nvmf_transport) transports; 82 TAILQ_HEAD(, spdk_nvmf_poll_group) poll_groups; 83 84 /* Used for round-robin assignment of connections to poll groups */ 85 struct spdk_nvmf_poll_group *next_poll_group; 86 87 spdk_nvmf_tgt_destroy_done_fn *destroy_cb_fn; 88 void *destroy_cb_arg; 89 90 uint16_t crdt[3]; 91 92 TAILQ_ENTRY(spdk_nvmf_tgt) link; 93 }; 94 95 struct spdk_nvmf_host { 96 char nqn[SPDK_NVMF_NQN_MAX_LEN + 1]; 97 TAILQ_ENTRY(spdk_nvmf_host) link; 98 }; 99 100 struct spdk_nvmf_subsystem_listener { 101 struct spdk_nvmf_subsystem *subsystem; 102 spdk_nvmf_tgt_subsystem_listen_done_fn cb_fn; 103 void *cb_arg; 104 struct spdk_nvme_transport_id *trid; 105 struct spdk_nvmf_transport *transport; 106 enum spdk_nvme_ana_state *ana_state; 107 uint64_t ana_state_change_count; 108 TAILQ_ENTRY(spdk_nvmf_subsystem_listener) link; 109 }; 110 111 /* Maximum number of registrants supported per namespace */ 112 #define SPDK_NVMF_MAX_NUM_REGISTRANTS 16 113 114 struct spdk_nvmf_registrant_info { 115 uint64_t rkey; 116 char host_uuid[SPDK_UUID_STRING_LEN]; 117 }; 118 119 struct spdk_nvmf_reservation_info { 120 bool ptpl_activated; 121 enum spdk_nvme_reservation_type rtype; 122 uint64_t crkey; 123 char bdev_uuid[SPDK_UUID_STRING_LEN]; 124 char holder_uuid[SPDK_UUID_STRING_LEN]; 125 uint32_t num_regs; 126 struct spdk_nvmf_registrant_info registrants[SPDK_NVMF_MAX_NUM_REGISTRANTS]; 127 }; 128 129 struct spdk_nvmf_subsystem_pg_ns_info { 130 struct spdk_io_channel *channel; 131 struct spdk_uuid uuid; 132 /* current reservation key, no reservation if the value is 0 */ 133 uint64_t crkey; 134 /* reservation type */ 135 enum spdk_nvme_reservation_type rtype; 136 /* Host ID which holds the reservation */ 137 struct spdk_uuid holder_id; 138 /* Host ID for the registrants with the namespace */ 139 struct spdk_uuid reg_hostid[SPDK_NVMF_MAX_NUM_REGISTRANTS]; 140 uint64_t num_blocks; 141 142 /* I/O outstanding to this namespace */ 143 uint64_t io_outstanding; 144 enum spdk_nvmf_subsystem_state state; 145 }; 146 147 typedef void(*spdk_nvmf_poll_group_mod_done)(void *cb_arg, int status); 148 149 struct spdk_nvmf_subsystem_poll_group { 150 /* Array of namespace information for each namespace indexed by nsid - 1 */ 151 struct spdk_nvmf_subsystem_pg_ns_info *ns_info; 152 uint32_t num_ns; 153 154 /* Number of ADMIN and FABRICS requests outstanding */ 155 uint64_t mgmt_io_outstanding; 156 spdk_nvmf_poll_group_mod_done cb_fn; 157 void *cb_arg; 158 159 enum spdk_nvmf_subsystem_state state; 160 161 TAILQ_HEAD(, spdk_nvmf_request) queued; 162 }; 163 164 struct spdk_nvmf_registrant { 165 TAILQ_ENTRY(spdk_nvmf_registrant) link; 166 struct spdk_uuid hostid; 167 /* Registration key */ 168 uint64_t rkey; 169 }; 170 171 struct spdk_nvmf_ns { 172 uint32_t nsid; 173 uint32_t anagrpid; 174 struct spdk_nvmf_subsystem *subsystem; 175 struct spdk_bdev *bdev; 176 struct spdk_bdev_desc *desc; 177 struct spdk_nvmf_ns_opts opts; 178 /* reservation notificaton mask */ 179 uint32_t mask; 180 /* generation code */ 181 uint32_t gen; 182 /* registrants head */ 183 TAILQ_HEAD(, spdk_nvmf_registrant) registrants; 184 /* current reservation key */ 185 uint64_t crkey; 186 /* reservation type */ 187 enum spdk_nvme_reservation_type rtype; 188 /* current reservation holder, only valid if reservation type can only have one holder */ 189 struct spdk_nvmf_registrant *holder; 190 /* Persist Through Power Loss file which contains the persistent reservation */ 191 char *ptpl_file; 192 /* Persist Through Power Loss feature is enabled */ 193 bool ptpl_activated; 194 /* ZCOPY supported on bdev device */ 195 bool zcopy; 196 }; 197 198 struct spdk_nvmf_ctrlr_feat { 199 union spdk_nvme_feat_arbitration arbitration; 200 union spdk_nvme_feat_power_management power_management; 201 union spdk_nvme_feat_error_recovery error_recovery; 202 union spdk_nvme_feat_volatile_write_cache volatile_write_cache; 203 union spdk_nvme_feat_number_of_queues number_of_queues; 204 union spdk_nvme_feat_interrupt_coalescing interrupt_coalescing; 205 union spdk_nvme_feat_interrupt_vector_configuration interrupt_vector_configuration; 206 union spdk_nvme_feat_write_atomicity write_atomicity; 207 union spdk_nvme_feat_async_event_configuration async_event_configuration; 208 union spdk_nvme_feat_keep_alive_timer keep_alive_timer; 209 }; 210 211 /* 212 * NVMf reservation notificaton log page. 213 */ 214 struct spdk_nvmf_reservation_log { 215 struct spdk_nvme_reservation_notification_log log; 216 TAILQ_ENTRY(spdk_nvmf_reservation_log) link; 217 struct spdk_nvmf_ctrlr *ctrlr; 218 }; 219 220 /* 221 * NVMf async event completion. 222 */ 223 struct spdk_nvmf_async_event_completion { 224 union spdk_nvme_async_event_completion event; 225 STAILQ_ENTRY(spdk_nvmf_async_event_completion) link; 226 }; 227 228 /* 229 * This structure represents an NVMe-oF controller, 230 * which is like a "session" in networking terms. 231 */ 232 struct spdk_nvmf_ctrlr { 233 uint16_t cntlid; 234 char hostnqn[SPDK_NVMF_NQN_MAX_LEN + 1]; 235 struct spdk_nvmf_subsystem *subsys; 236 237 struct spdk_nvmf_ctrlr_data cdata; 238 239 struct spdk_nvmf_registers vcprop; 240 241 struct spdk_nvmf_ctrlr_feat feat; 242 243 struct spdk_nvmf_qpair *admin_qpair; 244 struct spdk_thread *thread; 245 struct spdk_bit_array *qpair_mask; 246 247 const struct spdk_nvmf_subsystem_listener *listener; 248 249 struct spdk_nvmf_request *aer_req[NVMF_MAX_ASYNC_EVENTS]; 250 STAILQ_HEAD(, spdk_nvmf_async_event_completion) async_events; 251 uint64_t notice_aen_mask; 252 uint8_t nr_aer_reqs; 253 struct spdk_uuid hostid; 254 255 uint32_t association_timeout; /* in milliseconds */ 256 uint16_t changed_ns_list_count; 257 struct spdk_nvme_ns_list changed_ns_list; 258 uint64_t log_page_count; 259 uint8_t num_avail_log_pages; 260 TAILQ_HEAD(log_page_head, spdk_nvmf_reservation_log) log_head; 261 262 /* Time to trigger keep-alive--poller_time = now_tick + period */ 263 uint64_t last_keep_alive_tick; 264 struct spdk_poller *keep_alive_poller; 265 266 struct spdk_poller *association_timer; 267 268 bool dif_insert_or_strip; 269 bool in_destruct; 270 bool disconnect_in_progress; 271 bool acre_enabled; 272 273 TAILQ_ENTRY(spdk_nvmf_ctrlr) link; 274 }; 275 276 struct spdk_nvmf_subsystem { 277 struct spdk_thread *thread; 278 279 uint32_t id; 280 281 enum spdk_nvmf_subsystem_state state; 282 enum spdk_nvmf_subtype subtype; 283 284 uint16_t next_cntlid; 285 struct { 286 uint8_t allow_any_host : 1; 287 uint8_t allow_any_listener : 1; 288 uint8_t ana_reporting : 1; 289 uint8_t reserved : 5; 290 } flags; 291 292 /* boolean for state change synchronization */ 293 bool changing_state; 294 295 struct spdk_nvmf_tgt *tgt; 296 297 /* Array of pointers to namespaces of size max_nsid indexed by nsid - 1 */ 298 struct spdk_nvmf_ns **ns; 299 uint32_t max_nsid; 300 301 uint16_t min_cntlid; 302 uint16_t max_cntlid; 303 304 TAILQ_HEAD(, spdk_nvmf_ctrlr) ctrlrs; 305 306 /* A mutex used to protect the hosts list and allow_any_host flag. Unlike the namespace 307 * array, this list is not used on the I/O path (it's needed for handling things like 308 * the CONNECT command), so use a mutex to protect it instead of requiring the subsystem 309 * state to be paused. This removes the requirement to pause the subsystem when hosts 310 * are added or removed dynamically. */ 311 pthread_mutex_t mutex; 312 TAILQ_HEAD(, spdk_nvmf_host) hosts; 313 TAILQ_HEAD(, spdk_nvmf_subsystem_listener) listeners; 314 315 TAILQ_ENTRY(spdk_nvmf_subsystem) entries; 316 317 char sn[SPDK_NVME_CTRLR_SN_LEN + 1]; 318 char mn[SPDK_NVME_CTRLR_MN_LEN + 1]; 319 char subnqn[SPDK_NVMF_NQN_MAX_LEN + 1]; 320 321 /* Array of namespace count per ANA group of size max_nsid indexed anagrpid - 1 322 * It will be enough for ANA group to use the same size as namespaces. 323 */ 324 uint32_t *ana_group; 325 }; 326 327 int nvmf_poll_group_add_transport(struct spdk_nvmf_poll_group *group, 328 struct spdk_nvmf_transport *transport); 329 int nvmf_poll_group_update_subsystem(struct spdk_nvmf_poll_group *group, 330 struct spdk_nvmf_subsystem *subsystem); 331 int nvmf_poll_group_add_subsystem(struct spdk_nvmf_poll_group *group, 332 struct spdk_nvmf_subsystem *subsystem, 333 spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg); 334 void nvmf_poll_group_remove_subsystem(struct spdk_nvmf_poll_group *group, 335 struct spdk_nvmf_subsystem *subsystem, spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg); 336 void nvmf_poll_group_pause_subsystem(struct spdk_nvmf_poll_group *group, 337 struct spdk_nvmf_subsystem *subsystem, 338 uint32_t nsid, 339 spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg); 340 void nvmf_poll_group_resume_subsystem(struct spdk_nvmf_poll_group *group, 341 struct spdk_nvmf_subsystem *subsystem, spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg); 342 343 void nvmf_update_discovery_log(struct spdk_nvmf_tgt *tgt, const char *hostnqn); 344 void nvmf_get_discovery_log_page(struct spdk_nvmf_tgt *tgt, const char *hostnqn, 345 struct iovec *iov, 346 uint32_t iovcnt, uint64_t offset, uint32_t length); 347 348 void nvmf_ctrlr_destruct(struct spdk_nvmf_ctrlr *ctrlr); 349 int nvmf_ctrlr_process_admin_cmd(struct spdk_nvmf_request *req); 350 int nvmf_ctrlr_process_io_cmd(struct spdk_nvmf_request *req); 351 bool nvmf_ctrlr_dsm_supported(struct spdk_nvmf_ctrlr *ctrlr); 352 bool nvmf_ctrlr_write_zeroes_supported(struct spdk_nvmf_ctrlr *ctrlr); 353 void nvmf_ctrlr_ns_changed(struct spdk_nvmf_ctrlr *ctrlr, uint32_t nsid); 354 bool nvmf_ctrlr_use_zcopy(struct spdk_nvmf_request *req); 355 356 void nvmf_bdev_ctrlr_identify_ns(struct spdk_nvmf_ns *ns, struct spdk_nvme_ns_data *nsdata, 357 bool dif_insert_or_strip); 358 int nvmf_bdev_ctrlr_read_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, 359 struct spdk_io_channel *ch, struct spdk_nvmf_request *req); 360 int nvmf_bdev_ctrlr_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, 361 struct spdk_io_channel *ch, struct spdk_nvmf_request *req); 362 int nvmf_bdev_ctrlr_compare_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, 363 struct spdk_io_channel *ch, struct spdk_nvmf_request *req); 364 int nvmf_bdev_ctrlr_compare_and_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, 365 struct spdk_io_channel *ch, struct spdk_nvmf_request *cmp_req, struct spdk_nvmf_request *write_req); 366 int nvmf_bdev_ctrlr_write_zeroes_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, 367 struct spdk_io_channel *ch, struct spdk_nvmf_request *req); 368 int nvmf_bdev_ctrlr_flush_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, 369 struct spdk_io_channel *ch, struct spdk_nvmf_request *req); 370 int nvmf_bdev_ctrlr_dsm_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, 371 struct spdk_io_channel *ch, struct spdk_nvmf_request *req); 372 int nvmf_bdev_ctrlr_nvme_passthru_io(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, 373 struct spdk_io_channel *ch, struct spdk_nvmf_request *req); 374 bool nvmf_bdev_ctrlr_get_dif_ctx(struct spdk_bdev *bdev, struct spdk_nvme_cmd *cmd, 375 struct spdk_dif_ctx *dif_ctx); 376 bool nvmf_bdev_zcopy_enabled(struct spdk_bdev *bdev); 377 378 int nvmf_subsystem_add_ctrlr(struct spdk_nvmf_subsystem *subsystem, 379 struct spdk_nvmf_ctrlr *ctrlr); 380 void nvmf_subsystem_remove_ctrlr(struct spdk_nvmf_subsystem *subsystem, 381 struct spdk_nvmf_ctrlr *ctrlr); 382 void nvmf_subsystem_remove_all_listeners(struct spdk_nvmf_subsystem *subsystem, 383 bool stop); 384 struct spdk_nvmf_ctrlr *nvmf_subsystem_get_ctrlr(struct spdk_nvmf_subsystem *subsystem, 385 uint16_t cntlid); 386 struct spdk_nvmf_subsystem_listener *nvmf_subsystem_find_listener( 387 struct spdk_nvmf_subsystem *subsystem, 388 const struct spdk_nvme_transport_id *trid); 389 struct spdk_nvmf_listener *nvmf_transport_find_listener( 390 struct spdk_nvmf_transport *transport, 391 const struct spdk_nvme_transport_id *trid); 392 void nvmf_transport_dump_opts(struct spdk_nvmf_transport *transport, struct spdk_json_write_ctx *w, 393 bool named); 394 void nvmf_transport_listen_dump_opts(struct spdk_nvmf_transport *transport, 395 const struct spdk_nvme_transport_id *trid, struct spdk_json_write_ctx *w); 396 void nvmf_subsystem_set_ana_state(struct spdk_nvmf_subsystem *subsystem, 397 const struct spdk_nvme_transport_id *trid, 398 enum spdk_nvme_ana_state ana_state, uint32_t anagrpid, 399 spdk_nvmf_tgt_subsystem_listen_done_fn cb_fn, void *cb_arg); 400 bool nvmf_subsystem_get_ana_reporting(struct spdk_nvmf_subsystem *subsystem); 401 402 /** 403 * Sets the controller ID range for a subsystem. 404 * Valid range is [1, 0xFFEF]. 405 * 406 * May only be performed on subsystems in the INACTIVE state. 407 * 408 * \param subsystem Subsystem to modify. 409 * \param min_cntlid Minimum controller ID. 410 * \param max_cntlid Maximum controller ID. 411 * 412 * \return 0 on success, or negated errno value on failure. 413 */ 414 int nvmf_subsystem_set_cntlid_range(struct spdk_nvmf_subsystem *subsystem, 415 uint16_t min_cntlid, uint16_t max_cntlid); 416 417 int nvmf_ctrlr_async_event_ns_notice(struct spdk_nvmf_ctrlr *ctrlr); 418 int nvmf_ctrlr_async_event_ana_change_notice(struct spdk_nvmf_ctrlr *ctrlr); 419 int nvmf_ctrlr_async_event_discovery_log_change_notice(struct spdk_nvmf_ctrlr *ctrlr); 420 void nvmf_ctrlr_async_event_reservation_notification(struct spdk_nvmf_ctrlr *ctrlr); 421 void nvmf_ns_reservation_request(void *ctx); 422 void nvmf_ctrlr_reservation_notice_log(struct spdk_nvmf_ctrlr *ctrlr, 423 struct spdk_nvmf_ns *ns, 424 enum spdk_nvme_reservation_notification_log_page_type type); 425 426 /* 427 * Abort aer is sent on a per controller basis and sends a completion for the aer to the host. 428 * This function should be called when attempting to recover in error paths when it is OK for 429 * the host to send a subsequent AER. 430 */ 431 void nvmf_ctrlr_abort_aer(struct spdk_nvmf_ctrlr *ctrlr); 432 433 /* 434 * Free aer simply frees the rdma resources for the aer without informing the host. 435 * This function should be called when deleting a qpair when one wants to make sure 436 * the qpair is completely empty before freeing the request. The reason we free the 437 * AER without sending a completion is to prevent the host from sending another AER. 438 */ 439 void nvmf_qpair_free_aer(struct spdk_nvmf_qpair *qpair); 440 441 int nvmf_ctrlr_abort_request(struct spdk_nvmf_request *req); 442 443 static inline struct spdk_nvmf_ns * 444 _nvmf_subsystem_get_ns(struct spdk_nvmf_subsystem *subsystem, uint32_t nsid) 445 { 446 /* NOTE: This implicitly also checks for 0, since 0 - 1 wraps around to UINT32_MAX. */ 447 if (spdk_unlikely(nsid - 1 >= subsystem->max_nsid)) { 448 return NULL; 449 } 450 451 return subsystem->ns[nsid - 1]; 452 } 453 454 static inline bool 455 nvmf_qpair_is_admin_queue(struct spdk_nvmf_qpair *qpair) 456 { 457 return qpair->qid == 0; 458 } 459 460 /** 461 * Initiates a zcopy start operation 462 * 463 * \param bdev The \ref spdk_bdev 464 * \param desc The \ref spdk_bdev_desc 465 * \param ch The \ref spdk_io_channel 466 * \param req The \ref spdk_nvmf_request passed to the bdev for processing 467 * 468 * \return 0 upon success 469 * \return <0 if the zcopy operation could not be started 470 */ 471 int nvmf_bdev_ctrlr_start_zcopy(struct spdk_bdev *bdev, 472 struct spdk_bdev_desc *desc, 473 struct spdk_io_channel *ch, 474 struct spdk_nvmf_request *req); 475 476 /** 477 * Ends a zcopy operation 478 * 479 * \param req The NVMe-oF request 480 * \param commit Flag indicating whether the buffers shoule be committed 481 * 482 * \return 0 upon success 483 * \return <0 on error 484 */ 485 int nvmf_bdev_ctrlr_end_zcopy(struct spdk_nvmf_request *req, bool commit); 486 487 #endif /* __NVMF_INTERNAL_H__ */ 488