xref: /spdk/lib/nvmf/nvmf_internal.h (revision fecffda6ecf8853b82edccde429b68252f0a62c5)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (C) 2016 Intel Corporation. All rights reserved.
3  *   Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
4  *   Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
5  */
6 
7 #ifndef __NVMF_INTERNAL_H__
8 #define __NVMF_INTERNAL_H__
9 
10 #include "spdk/stdinc.h"
11 
12 #include "spdk/likely.h"
13 #include "spdk/nvmf.h"
14 #include "spdk/nvmf_cmd.h"
15 #include "spdk/nvmf_transport.h"
16 #include "spdk/nvmf_spec.h"
17 #include "spdk/assert.h"
18 #include "spdk/bdev.h"
19 #include "spdk/queue.h"
20 #include "spdk/util.h"
21 #include "spdk/thread.h"
22 
23 /* The spec reserves cntlid values in the range FFF0h to FFFFh. */
24 #define NVMF_MIN_CNTLID 1
25 #define NVMF_MAX_CNTLID 0xFFEF
26 
27 enum spdk_nvmf_subsystem_state {
28 	SPDK_NVMF_SUBSYSTEM_INACTIVE = 0,
29 	SPDK_NVMF_SUBSYSTEM_ACTIVATING,
30 	SPDK_NVMF_SUBSYSTEM_ACTIVE,
31 	SPDK_NVMF_SUBSYSTEM_PAUSING,
32 	SPDK_NVMF_SUBSYSTEM_PAUSED,
33 	SPDK_NVMF_SUBSYSTEM_RESUMING,
34 	SPDK_NVMF_SUBSYSTEM_DEACTIVATING,
35 	SPDK_NVMF_SUBSYSTEM_NUM_STATES,
36 };
37 
38 struct spdk_nvmf_tgt {
39 	char					name[NVMF_TGT_NAME_MAX_LENGTH];
40 
41 	pthread_mutex_t				mutex;
42 
43 	uint64_t				discovery_genctr;
44 
45 	uint32_t				max_subsystems;
46 
47 	enum spdk_nvmf_tgt_discovery_filter	discovery_filter;
48 
49 	/* Array of subsystem pointers of size max_subsystems indexed by sid */
50 	struct spdk_nvmf_subsystem		**subsystems;
51 
52 	TAILQ_HEAD(, spdk_nvmf_transport)	transports;
53 	TAILQ_HEAD(, spdk_nvmf_poll_group)	poll_groups;
54 
55 	/* Used for round-robin assignment of connections to poll groups */
56 	struct spdk_nvmf_poll_group		*next_poll_group;
57 
58 	spdk_nvmf_tgt_destroy_done_fn		*destroy_cb_fn;
59 	void					*destroy_cb_arg;
60 
61 	uint16_t				crdt[3];
62 
63 	TAILQ_ENTRY(spdk_nvmf_tgt)		link;
64 };
65 
66 struct spdk_nvmf_host {
67 	char				nqn[SPDK_NVMF_NQN_MAX_LEN + 1];
68 	TAILQ_ENTRY(spdk_nvmf_host)	link;
69 };
70 
71 struct spdk_nvmf_subsystem_listener {
72 	struct spdk_nvmf_subsystem			*subsystem;
73 	spdk_nvmf_tgt_subsystem_listen_done_fn		cb_fn;
74 	void						*cb_arg;
75 	struct spdk_nvme_transport_id			*trid;
76 	struct spdk_nvmf_transport			*transport;
77 	enum spdk_nvme_ana_state			*ana_state;
78 	uint64_t					ana_state_change_count;
79 	uint16_t					id;
80 	TAILQ_ENTRY(spdk_nvmf_subsystem_listener)	link;
81 };
82 
83 /* Maximum number of registrants supported per namespace */
84 #define SPDK_NVMF_MAX_NUM_REGISTRANTS		16
85 
86 struct spdk_nvmf_registrant_info {
87 	uint64_t		rkey;
88 	char			host_uuid[SPDK_UUID_STRING_LEN];
89 };
90 
91 struct spdk_nvmf_reservation_info {
92 	bool					ptpl_activated;
93 	enum spdk_nvme_reservation_type		rtype;
94 	uint64_t				crkey;
95 	char					bdev_uuid[SPDK_UUID_STRING_LEN];
96 	char					holder_uuid[SPDK_UUID_STRING_LEN];
97 	uint32_t				num_regs;
98 	struct spdk_nvmf_registrant_info	registrants[SPDK_NVMF_MAX_NUM_REGISTRANTS];
99 };
100 
101 struct spdk_nvmf_subsystem_pg_ns_info {
102 	struct spdk_io_channel		*channel;
103 	struct spdk_uuid		uuid;
104 	/* current reservation key, no reservation if the value is 0 */
105 	uint64_t			crkey;
106 	/* reservation type */
107 	enum spdk_nvme_reservation_type	rtype;
108 	/* Host ID which holds the reservation */
109 	struct spdk_uuid		holder_id;
110 	/* Host ID for the registrants with the namespace */
111 	struct spdk_uuid		reg_hostid[SPDK_NVMF_MAX_NUM_REGISTRANTS];
112 	uint64_t			num_blocks;
113 
114 	/* I/O outstanding to this namespace */
115 	uint64_t			io_outstanding;
116 	enum spdk_nvmf_subsystem_state	state;
117 };
118 
119 typedef void(*spdk_nvmf_poll_group_mod_done)(void *cb_arg, int status);
120 
121 struct spdk_nvmf_subsystem_poll_group {
122 	/* Array of namespace information for each namespace indexed by nsid - 1 */
123 	struct spdk_nvmf_subsystem_pg_ns_info	*ns_info;
124 	uint32_t				num_ns;
125 
126 	/* Number of ADMIN and FABRICS requests outstanding */
127 	uint64_t				mgmt_io_outstanding;
128 	spdk_nvmf_poll_group_mod_done		cb_fn;
129 	void					*cb_arg;
130 
131 	enum spdk_nvmf_subsystem_state		state;
132 
133 	TAILQ_HEAD(, spdk_nvmf_request)		queued;
134 };
135 
136 struct spdk_nvmf_registrant {
137 	TAILQ_ENTRY(spdk_nvmf_registrant) link;
138 	struct spdk_uuid hostid;
139 	/* Registration key */
140 	uint64_t rkey;
141 };
142 
143 struct spdk_nvmf_ns {
144 	uint32_t nsid;
145 	uint32_t anagrpid;
146 	struct spdk_nvmf_subsystem *subsystem;
147 	struct spdk_bdev *bdev;
148 	struct spdk_bdev_desc *desc;
149 	struct spdk_nvmf_ns_opts opts;
150 	/* reservation notification mask */
151 	uint32_t mask;
152 	/* generation code */
153 	uint32_t gen;
154 	/* registrants head */
155 	TAILQ_HEAD(, spdk_nvmf_registrant) registrants;
156 	/* current reservation key */
157 	uint64_t crkey;
158 	/* reservation type */
159 	enum spdk_nvme_reservation_type rtype;
160 	/* current reservation holder, only valid if reservation type can only have one holder */
161 	struct spdk_nvmf_registrant *holder;
162 	/* Persist Through Power Loss file which contains the persistent reservation */
163 	char *ptpl_file;
164 	/* Persist Through Power Loss feature is enabled */
165 	bool ptpl_activated;
166 	/* ZCOPY supported on bdev device */
167 	bool zcopy;
168 };
169 
170 /*
171  * NVMf reservation notification log page.
172  */
173 struct spdk_nvmf_reservation_log {
174 	struct spdk_nvme_reservation_notification_log	log;
175 	TAILQ_ENTRY(spdk_nvmf_reservation_log)		link;
176 	struct spdk_nvmf_ctrlr				*ctrlr;
177 };
178 
179 /*
180  * NVMf async event completion.
181  */
182 struct spdk_nvmf_async_event_completion {
183 	union spdk_nvme_async_event_completion		event;
184 	STAILQ_ENTRY(spdk_nvmf_async_event_completion)	link;
185 };
186 
187 /*
188  * This structure represents an NVMe-oF controller,
189  * which is like a "session" in networking terms.
190  */
191 struct spdk_nvmf_ctrlr {
192 	uint16_t			cntlid;
193 	char				hostnqn[SPDK_NVMF_NQN_MAX_LEN + 1];
194 	struct spdk_nvmf_subsystem	*subsys;
195 
196 	struct spdk_nvmf_ctrlr_data	cdata;
197 
198 	struct spdk_nvmf_registers	vcprop;
199 
200 	struct spdk_nvmf_ctrlr_feat feat;
201 
202 	struct spdk_nvmf_qpair	*admin_qpair;
203 	struct spdk_thread	*thread;
204 	struct spdk_bit_array	*qpair_mask;
205 
206 	const struct spdk_nvmf_subsystem_listener	*listener;
207 
208 	struct spdk_nvmf_request *aer_req[SPDK_NVMF_MAX_ASYNC_EVENTS];
209 	STAILQ_HEAD(, spdk_nvmf_async_event_completion) async_events;
210 	uint64_t notice_aen_mask;
211 	uint8_t nr_aer_reqs;
212 	struct spdk_uuid  hostid;
213 
214 	uint32_t association_timeout; /* in milliseconds */
215 	uint16_t changed_ns_list_count;
216 	struct spdk_nvme_ns_list changed_ns_list;
217 	uint64_t log_page_count;
218 	uint8_t num_avail_log_pages;
219 	TAILQ_HEAD(log_page_head, spdk_nvmf_reservation_log) log_head;
220 
221 	/* Time to trigger keep-alive--poller_time = now_tick + period */
222 	uint64_t			last_keep_alive_tick;
223 	struct spdk_poller		*keep_alive_poller;
224 
225 	struct spdk_poller		*association_timer;
226 
227 	struct spdk_poller		*cc_timer;
228 	uint64_t			cc_timeout_tsc;
229 	struct spdk_poller		*cc_timeout_timer;
230 
231 	bool				dif_insert_or_strip;
232 	bool				in_destruct;
233 	bool				disconnect_in_progress;
234 	/* valid only when disconnect_in_progress is true */
235 	bool				disconnect_is_shn;
236 	bool				acre_enabled;
237 	bool				dynamic_ctrlr;
238 
239 	TAILQ_ENTRY(spdk_nvmf_ctrlr)	link;
240 };
241 
242 #define NVMF_MAX_LISTENERS_PER_SUBSYSTEM	16
243 
244 struct spdk_nvmf_subsystem {
245 	struct spdk_thread				*thread;
246 
247 	uint32_t					id;
248 
249 	enum spdk_nvmf_subsystem_state			state;
250 	enum spdk_nvmf_subtype				subtype;
251 
252 	uint16_t					next_cntlid;
253 	struct {
254 		uint8_t					allow_any_host : 1;
255 		uint8_t					allow_any_listener : 1;
256 		uint8_t					ana_reporting : 1;
257 		uint8_t					reserved : 5;
258 	} flags;
259 
260 	/* boolean for state change synchronization */
261 	bool						changing_state;
262 
263 	bool						destroying;
264 	bool						async_destroy;
265 
266 	struct spdk_nvmf_tgt				*tgt;
267 
268 	/* Array of pointers to namespaces of size max_nsid indexed by nsid - 1 */
269 	struct spdk_nvmf_ns				**ns;
270 	uint32_t					max_nsid;
271 
272 	uint16_t					min_cntlid;
273 	uint16_t					max_cntlid;
274 
275 	TAILQ_HEAD(, spdk_nvmf_ctrlr)			ctrlrs;
276 
277 	/* A mutex used to protect the hosts list and allow_any_host flag. Unlike the namespace
278 	 * array, this list is not used on the I/O path (it's needed for handling things like
279 	 * the CONNECT command), so use a mutex to protect it instead of requiring the subsystem
280 	 * state to be paused. This removes the requirement to pause the subsystem when hosts
281 	 * are added or removed dynamically. */
282 	pthread_mutex_t					mutex;
283 	TAILQ_HEAD(, spdk_nvmf_host)			hosts;
284 	TAILQ_HEAD(, spdk_nvmf_subsystem_listener)	listeners;
285 	struct spdk_bit_array				*used_listener_ids;
286 
287 	TAILQ_ENTRY(spdk_nvmf_subsystem)		entries;
288 
289 	nvmf_subsystem_destroy_cb			async_destroy_cb;
290 	void						*async_destroy_cb_arg;
291 
292 	char						sn[SPDK_NVME_CTRLR_SN_LEN + 1];
293 	char						mn[SPDK_NVME_CTRLR_MN_LEN + 1];
294 	char						subnqn[SPDK_NVMF_NQN_MAX_LEN + 1];
295 
296 	/* Array of namespace count per ANA group of size max_nsid indexed anagrpid - 1
297 	 * It will be enough for ANA group to use the same size as namespaces.
298 	 */
299 	uint32_t					*ana_group;
300 };
301 
302 int nvmf_poll_group_update_subsystem(struct spdk_nvmf_poll_group *group,
303 				     struct spdk_nvmf_subsystem *subsystem);
304 int nvmf_poll_group_add_subsystem(struct spdk_nvmf_poll_group *group,
305 				  struct spdk_nvmf_subsystem *subsystem,
306 				  spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg);
307 void nvmf_poll_group_remove_subsystem(struct spdk_nvmf_poll_group *group,
308 				      struct spdk_nvmf_subsystem *subsystem, spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg);
309 void nvmf_poll_group_pause_subsystem(struct spdk_nvmf_poll_group *group,
310 				     struct spdk_nvmf_subsystem *subsystem,
311 				     uint32_t nsid,
312 				     spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg);
313 void nvmf_poll_group_resume_subsystem(struct spdk_nvmf_poll_group *group,
314 				      struct spdk_nvmf_subsystem *subsystem, spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg);
315 
316 void nvmf_update_discovery_log(struct spdk_nvmf_tgt *tgt, const char *hostnqn);
317 void nvmf_get_discovery_log_page(struct spdk_nvmf_tgt *tgt, const char *hostnqn, struct iovec *iov,
318 				 uint32_t iovcnt, uint64_t offset, uint32_t length,
319 				 struct spdk_nvme_transport_id *cmd_source_trid);
320 
321 void nvmf_ctrlr_destruct(struct spdk_nvmf_ctrlr *ctrlr);
322 int nvmf_ctrlr_process_admin_cmd(struct spdk_nvmf_request *req);
323 int nvmf_ctrlr_process_io_cmd(struct spdk_nvmf_request *req);
324 bool nvmf_ctrlr_dsm_supported(struct spdk_nvmf_ctrlr *ctrlr);
325 bool nvmf_ctrlr_write_zeroes_supported(struct spdk_nvmf_ctrlr *ctrlr);
326 bool nvmf_ctrlr_copy_supported(struct spdk_nvmf_ctrlr *ctrlr);
327 void nvmf_ctrlr_ns_changed(struct spdk_nvmf_ctrlr *ctrlr, uint32_t nsid);
328 bool nvmf_ctrlr_use_zcopy(struct spdk_nvmf_request *req);
329 
330 void nvmf_bdev_ctrlr_identify_ns(struct spdk_nvmf_ns *ns, struct spdk_nvme_ns_data *nsdata,
331 				 bool dif_insert_or_strip);
332 int nvmf_bdev_ctrlr_read_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
333 			     struct spdk_io_channel *ch, struct spdk_nvmf_request *req);
334 int nvmf_bdev_ctrlr_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
335 			      struct spdk_io_channel *ch, struct spdk_nvmf_request *req);
336 int nvmf_bdev_ctrlr_compare_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
337 				struct spdk_io_channel *ch, struct spdk_nvmf_request *req);
338 int nvmf_bdev_ctrlr_compare_and_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
339 		struct spdk_io_channel *ch, struct spdk_nvmf_request *cmp_req, struct spdk_nvmf_request *write_req);
340 int nvmf_bdev_ctrlr_write_zeroes_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
341 				     struct spdk_io_channel *ch, struct spdk_nvmf_request *req);
342 int nvmf_bdev_ctrlr_flush_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
343 			      struct spdk_io_channel *ch, struct spdk_nvmf_request *req);
344 int nvmf_bdev_ctrlr_dsm_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
345 			    struct spdk_io_channel *ch, struct spdk_nvmf_request *req);
346 int nvmf_bdev_ctrlr_copy_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
347 			     struct spdk_io_channel *ch, struct spdk_nvmf_request *req);
348 int nvmf_bdev_ctrlr_nvme_passthru_io(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
349 				     struct spdk_io_channel *ch, struct spdk_nvmf_request *req);
350 bool nvmf_bdev_ctrlr_get_dif_ctx(struct spdk_bdev *bdev, struct spdk_nvme_cmd *cmd,
351 				 struct spdk_dif_ctx *dif_ctx);
352 bool nvmf_bdev_zcopy_enabled(struct spdk_bdev *bdev);
353 
354 int nvmf_subsystem_add_ctrlr(struct spdk_nvmf_subsystem *subsystem,
355 			     struct spdk_nvmf_ctrlr *ctrlr);
356 void nvmf_subsystem_remove_ctrlr(struct spdk_nvmf_subsystem *subsystem,
357 				 struct spdk_nvmf_ctrlr *ctrlr);
358 void nvmf_subsystem_remove_all_listeners(struct spdk_nvmf_subsystem *subsystem,
359 		bool stop);
360 struct spdk_nvmf_ctrlr *nvmf_subsystem_get_ctrlr(struct spdk_nvmf_subsystem *subsystem,
361 		uint16_t cntlid);
362 struct spdk_nvmf_subsystem_listener *nvmf_subsystem_find_listener(
363 	struct spdk_nvmf_subsystem *subsystem,
364 	const struct spdk_nvme_transport_id *trid);
365 struct spdk_nvmf_listener *nvmf_transport_find_listener(
366 	struct spdk_nvmf_transport *transport,
367 	const struct spdk_nvme_transport_id *trid);
368 void nvmf_transport_dump_opts(struct spdk_nvmf_transport *transport, struct spdk_json_write_ctx *w,
369 			      bool named);
370 void nvmf_transport_listen_dump_opts(struct spdk_nvmf_transport *transport,
371 				     const struct spdk_nvme_transport_id *trid, struct spdk_json_write_ctx *w);
372 void nvmf_subsystem_set_ana_state(struct spdk_nvmf_subsystem *subsystem,
373 				  const struct spdk_nvme_transport_id *trid,
374 				  enum spdk_nvme_ana_state ana_state, uint32_t anagrpid,
375 				  spdk_nvmf_tgt_subsystem_listen_done_fn cb_fn, void *cb_arg);
376 bool nvmf_subsystem_get_ana_reporting(struct spdk_nvmf_subsystem *subsystem);
377 
378 /**
379  * Sets the controller ID range for a subsystem.
380  * Valid range is [1, 0xFFEF].
381  *
382  * May only be performed on subsystems in the INACTIVE state.
383  *
384  * \param subsystem Subsystem to modify.
385  * \param min_cntlid Minimum controller ID.
386  * \param max_cntlid Maximum controller ID.
387  *
388  * \return 0 on success, or negated errno value on failure.
389  */
390 int nvmf_subsystem_set_cntlid_range(struct spdk_nvmf_subsystem *subsystem,
391 				    uint16_t min_cntlid, uint16_t max_cntlid);
392 
393 int nvmf_ctrlr_async_event_ns_notice(struct spdk_nvmf_ctrlr *ctrlr);
394 int nvmf_ctrlr_async_event_ana_change_notice(struct spdk_nvmf_ctrlr *ctrlr);
395 void nvmf_ctrlr_async_event_discovery_log_change_notice(void *ctx);
396 void nvmf_ctrlr_async_event_reservation_notification(struct spdk_nvmf_ctrlr *ctrlr);
397 int nvmf_ctrlr_async_event_error_event(struct spdk_nvmf_ctrlr *ctrlr,
398 				       union spdk_nvme_async_event_completion event);
399 void nvmf_ns_reservation_request(void *ctx);
400 void nvmf_ctrlr_reservation_notice_log(struct spdk_nvmf_ctrlr *ctrlr,
401 				       struct spdk_nvmf_ns *ns,
402 				       enum spdk_nvme_reservation_notification_log_page_type type);
403 
404 /*
405  * Abort aer is sent on a per controller basis and sends a completion for the aer to the host.
406  * This function should be called when attempting to recover in error paths when it is OK for
407  * the host to send a subsequent AER.
408  */
409 void nvmf_ctrlr_abort_aer(struct spdk_nvmf_ctrlr *ctrlr);
410 
411 /*
412  * Abort zero-copy requests that already got the buffer (received zcopy_start cb), but haven't
413  * started zcopy_end.  These requests are kept on the outstanding queue, but are not waiting for a
414  * completion from the bdev layer, so, when a qpair is being disconnected, we need to kick them to
415  * force their completion.
416  */
417 void nvmf_qpair_abort_pending_zcopy_reqs(struct spdk_nvmf_qpair *qpair);
418 
419 /*
420  * Free aer simply frees the rdma resources for the aer without informing the host.
421  * This function should be called when deleting a qpair when one wants to make sure
422  * the qpair is completely empty before freeing the request. The reason we free the
423  * AER without sending a completion is to prevent the host from sending another AER.
424  */
425 void nvmf_qpair_free_aer(struct spdk_nvmf_qpair *qpair);
426 
427 int nvmf_ctrlr_abort_request(struct spdk_nvmf_request *req);
428 
429 void nvmf_ctrlr_set_fatal_status(struct spdk_nvmf_ctrlr *ctrlr);
430 
431 static inline struct spdk_nvmf_ns *
432 _nvmf_subsystem_get_ns(struct spdk_nvmf_subsystem *subsystem, uint32_t nsid)
433 {
434 	/* NOTE: This implicitly also checks for 0, since 0 - 1 wraps around to UINT32_MAX. */
435 	if (spdk_unlikely(nsid - 1 >= subsystem->max_nsid)) {
436 		return NULL;
437 	}
438 
439 	return subsystem->ns[nsid - 1];
440 }
441 
442 static inline bool
443 nvmf_qpair_is_admin_queue(struct spdk_nvmf_qpair *qpair)
444 {
445 	return qpair->qid == 0;
446 }
447 
448 /**
449  * Initiates a zcopy start operation
450  *
451  * \param bdev The \ref spdk_bdev
452  * \param desc The \ref spdk_bdev_desc
453  * \param ch The \ref spdk_io_channel
454  * \param req The \ref spdk_nvmf_request passed to the bdev for processing
455  *
456  * \return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE if the command was completed immediately or
457  *         SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS if the command was submitted and will be
458  *         completed asynchronously.  Asynchronous completions are notified through
459  *         spdk_nvmf_request_complete().
460  */
461 int nvmf_bdev_ctrlr_zcopy_start(struct spdk_bdev *bdev,
462 				struct spdk_bdev_desc *desc,
463 				struct spdk_io_channel *ch,
464 				struct spdk_nvmf_request *req);
465 
466 /**
467  * Ends a zcopy operation
468  *
469  * \param req The NVMe-oF request
470  * \param commit Flag indicating whether the buffers should be committed
471  */
472 void nvmf_bdev_ctrlr_zcopy_end(struct spdk_nvmf_request *req, bool commit);
473 
474 #endif /* __NVMF_INTERNAL_H__ */
475