xref: /spdk/lib/nvmf/nvmf_internal.h (revision 441431d22872ae4e05a1bf8b78e9aeff1eba1eb3)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation. All rights reserved.
5  *   Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #ifndef __NVMF_INTERNAL_H__
35 #define __NVMF_INTERNAL_H__
36 
37 #include "spdk/stdinc.h"
38 
39 #include "spdk/likely.h"
40 #include "spdk/nvmf.h"
41 #include "spdk/nvmf_cmd.h"
42 #include "spdk/nvmf_transport.h"
43 #include "spdk/nvmf_spec.h"
44 #include "spdk/assert.h"
45 #include "spdk/bdev.h"
46 #include "spdk/queue.h"
47 #include "spdk/util.h"
48 #include "spdk/thread.h"
49 
50 #define NVMF_MAX_ASYNC_EVENTS	(4)
51 
52 /* The spec reserves cntlid values in the range FFF0h to FFFFh. */
53 #define NVMF_MIN_CNTLID 1
54 #define NVMF_MAX_CNTLID 0xFFEF
55 
56 enum spdk_nvmf_subsystem_state {
57 	SPDK_NVMF_SUBSYSTEM_INACTIVE = 0,
58 	SPDK_NVMF_SUBSYSTEM_ACTIVATING,
59 	SPDK_NVMF_SUBSYSTEM_ACTIVE,
60 	SPDK_NVMF_SUBSYSTEM_PAUSING,
61 	SPDK_NVMF_SUBSYSTEM_PAUSED,
62 	SPDK_NVMF_SUBSYSTEM_RESUMING,
63 	SPDK_NVMF_SUBSYSTEM_DEACTIVATING,
64 	SPDK_NVMF_SUBSYSTEM_NUM_STATES,
65 };
66 
67 struct spdk_nvmf_tgt {
68 	char					name[NVMF_TGT_NAME_MAX_LENGTH];
69 
70 	pthread_mutex_t				mutex;
71 
72 	uint64_t				discovery_genctr;
73 
74 	struct spdk_poller			*accept_poller;
75 
76 	uint32_t				max_subsystems;
77 
78 	/* Array of subsystem pointers of size max_subsystems indexed by sid */
79 	struct spdk_nvmf_subsystem		**subsystems;
80 
81 	TAILQ_HEAD(, spdk_nvmf_transport)	transports;
82 	TAILQ_HEAD(, spdk_nvmf_poll_group)	poll_groups;
83 
84 	/* Used for round-robin assignment of connections to poll groups */
85 	struct spdk_nvmf_poll_group		*next_poll_group;
86 
87 	spdk_nvmf_tgt_destroy_done_fn		*destroy_cb_fn;
88 	void					*destroy_cb_arg;
89 
90 	uint16_t				crdt[3];
91 
92 	TAILQ_ENTRY(spdk_nvmf_tgt)		link;
93 };
94 
95 struct spdk_nvmf_host {
96 	char				nqn[SPDK_NVMF_NQN_MAX_LEN + 1];
97 	TAILQ_ENTRY(spdk_nvmf_host)	link;
98 };
99 
100 struct spdk_nvmf_subsystem_listener {
101 	struct spdk_nvmf_subsystem			*subsystem;
102 	spdk_nvmf_tgt_subsystem_listen_done_fn		cb_fn;
103 	void						*cb_arg;
104 	struct spdk_nvme_transport_id			*trid;
105 	struct spdk_nvmf_transport			*transport;
106 	enum spdk_nvme_ana_state			ana_state;
107 	uint64_t					ana_state_change_count;
108 	TAILQ_ENTRY(spdk_nvmf_subsystem_listener)	link;
109 };
110 
111 /* Maximum number of registrants supported per namespace */
112 #define SPDK_NVMF_MAX_NUM_REGISTRANTS		16
113 
114 struct spdk_nvmf_registrant_info {
115 	uint64_t		rkey;
116 	char			host_uuid[SPDK_UUID_STRING_LEN];
117 };
118 
119 struct spdk_nvmf_reservation_info {
120 	bool					ptpl_activated;
121 	enum spdk_nvme_reservation_type		rtype;
122 	uint64_t				crkey;
123 	char					bdev_uuid[SPDK_UUID_STRING_LEN];
124 	char					holder_uuid[SPDK_UUID_STRING_LEN];
125 	uint32_t				num_regs;
126 	struct spdk_nvmf_registrant_info	registrants[SPDK_NVMF_MAX_NUM_REGISTRANTS];
127 };
128 
129 struct spdk_nvmf_subsystem_pg_ns_info {
130 	struct spdk_io_channel		*channel;
131 	struct spdk_uuid		uuid;
132 	/* current reservation key, no reservation if the value is 0 */
133 	uint64_t			crkey;
134 	/* reservation type */
135 	enum spdk_nvme_reservation_type	rtype;
136 	/* Host ID which holds the reservation */
137 	struct spdk_uuid		holder_id;
138 	/* Host ID for the registrants with the namespace */
139 	struct spdk_uuid		reg_hostid[SPDK_NVMF_MAX_NUM_REGISTRANTS];
140 	uint64_t			num_blocks;
141 
142 	/* I/O outstanding to this namespace */
143 	uint64_t			io_outstanding;
144 	enum spdk_nvmf_subsystem_state	state;
145 };
146 
147 typedef void(*spdk_nvmf_poll_group_mod_done)(void *cb_arg, int status);
148 
149 struct spdk_nvmf_subsystem_poll_group {
150 	/* Array of namespace information for each namespace indexed by nsid - 1 */
151 	struct spdk_nvmf_subsystem_pg_ns_info	*ns_info;
152 	uint32_t				num_ns;
153 
154 	/* Number of ADMIN and FABRICS requests outstanding */
155 	uint64_t				mgmt_io_outstanding;
156 	spdk_nvmf_poll_group_mod_done		cb_fn;
157 	void					*cb_arg;
158 
159 	enum spdk_nvmf_subsystem_state		state;
160 
161 	TAILQ_HEAD(, spdk_nvmf_request)		queued;
162 };
163 
164 struct spdk_nvmf_registrant {
165 	TAILQ_ENTRY(spdk_nvmf_registrant) link;
166 	struct spdk_uuid hostid;
167 	/* Registration key */
168 	uint64_t rkey;
169 };
170 
171 struct spdk_nvmf_ns {
172 	uint32_t nsid;
173 	struct spdk_nvmf_subsystem *subsystem;
174 	struct spdk_bdev *bdev;
175 	struct spdk_bdev_desc *desc;
176 	struct spdk_nvmf_ns_opts opts;
177 	/* reservation notificaton mask */
178 	uint32_t mask;
179 	/* generation code */
180 	uint32_t gen;
181 	/* registrants head */
182 	TAILQ_HEAD(, spdk_nvmf_registrant) registrants;
183 	/* current reservation key */
184 	uint64_t crkey;
185 	/* reservation type */
186 	enum spdk_nvme_reservation_type rtype;
187 	/* current reservation holder, only valid if reservation type can only have one holder */
188 	struct spdk_nvmf_registrant *holder;
189 	/* Persist Through Power Loss file which contains the persistent reservation */
190 	char *ptpl_file;
191 	/* Persist Through Power Loss feature is enabled */
192 	bool ptpl_activated;
193 };
194 
195 struct spdk_nvmf_ctrlr_feat {
196 	union spdk_nvme_feat_arbitration arbitration;
197 	union spdk_nvme_feat_power_management power_management;
198 	union spdk_nvme_feat_error_recovery error_recovery;
199 	union spdk_nvme_feat_volatile_write_cache volatile_write_cache;
200 	union spdk_nvme_feat_number_of_queues number_of_queues;
201 	union spdk_nvme_feat_write_atomicity write_atomicity;
202 	union spdk_nvme_feat_async_event_configuration async_event_configuration;
203 	union spdk_nvme_feat_keep_alive_timer keep_alive_timer;
204 };
205 
206 /*
207  * NVMf reservation notificaton log page.
208  */
209 struct spdk_nvmf_reservation_log {
210 	struct spdk_nvme_reservation_notification_log	log;
211 	TAILQ_ENTRY(spdk_nvmf_reservation_log)		link;
212 	struct spdk_nvmf_ctrlr				*ctrlr;
213 };
214 
215 /*
216  * NVMf async event completion.
217  */
218 struct spdk_nvmf_async_event_completion {
219 	union spdk_nvme_async_event_completion		event;
220 	STAILQ_ENTRY(spdk_nvmf_async_event_completion)	link;
221 };
222 
223 /*
224  * This structure represents an NVMe-oF controller,
225  * which is like a "session" in networking terms.
226  */
227 struct spdk_nvmf_ctrlr {
228 	uint16_t			cntlid;
229 	char				hostnqn[SPDK_NVMF_NQN_MAX_LEN + 1];
230 	struct spdk_nvmf_subsystem	*subsys;
231 
232 	struct spdk_nvmf_ctrlr_data	cdata;
233 
234 	struct spdk_nvmf_registers	vcprop;
235 
236 	struct spdk_nvmf_ctrlr_feat feat;
237 
238 	struct spdk_nvmf_qpair	*admin_qpair;
239 	struct spdk_thread	*thread;
240 	struct spdk_bit_array	*qpair_mask;
241 
242 	const struct spdk_nvmf_subsystem_listener	*listener;
243 
244 	struct spdk_nvmf_request *aer_req[NVMF_MAX_ASYNC_EVENTS];
245 	STAILQ_HEAD(, spdk_nvmf_async_event_completion) async_events;
246 	uint64_t notice_aen_mask;
247 	uint8_t nr_aer_reqs;
248 	struct spdk_uuid  hostid;
249 
250 	uint32_t association_timeout; /* in milliseconds */
251 	uint16_t changed_ns_list_count;
252 	struct spdk_nvme_ns_list changed_ns_list;
253 	uint64_t log_page_count;
254 	uint8_t num_avail_log_pages;
255 	TAILQ_HEAD(log_page_head, spdk_nvmf_reservation_log) log_head;
256 
257 	/* Time to trigger keep-alive--poller_time = now_tick + period */
258 	uint64_t			last_keep_alive_tick;
259 	struct spdk_poller		*keep_alive_poller;
260 
261 	struct spdk_poller		*association_timer;
262 
263 	bool				dif_insert_or_strip;
264 	bool				in_destruct;
265 	bool				disconnect_in_progress;
266 	bool				acre_enabled;
267 
268 	TAILQ_ENTRY(spdk_nvmf_ctrlr)	link;
269 };
270 
271 struct spdk_nvmf_subsystem {
272 	struct spdk_thread				*thread;
273 
274 	uint32_t					id;
275 
276 	enum spdk_nvmf_subsystem_state			state;
277 	enum spdk_nvmf_subtype				subtype;
278 
279 	uint16_t					next_cntlid;
280 	struct {
281 		uint8_t					allow_any_host : 1;
282 		uint8_t					allow_any_listener : 1;
283 		uint8_t					ana_reporting : 1;
284 		uint8_t					reserved : 5;
285 	} flags;
286 
287 	/* boolean for state change synchronization */
288 	bool						changing_state;
289 
290 	struct spdk_nvmf_tgt				*tgt;
291 
292 	/* Array of pointers to namespaces of size max_nsid indexed by nsid - 1 */
293 	struct spdk_nvmf_ns				**ns;
294 	uint32_t					max_nsid;
295 
296 	uint16_t					min_cntlid;
297 	uint16_t					max_cntlid;
298 
299 	TAILQ_HEAD(, spdk_nvmf_ctrlr)			ctrlrs;
300 
301 	/* A mutex used to protect the hosts list and allow_any_host flag. Unlike the namespace
302 	 * array, this list is not used on the I/O path (it's needed for handling things like
303 	 * the CONNECT command), so use a mutex to protect it instead of requiring the subsystem
304 	 * state to be paused. This removes the requirement to pause the subsystem when hosts
305 	 * are added or removed dynamically. */
306 	pthread_mutex_t					mutex;
307 	TAILQ_HEAD(, spdk_nvmf_host)			hosts;
308 	TAILQ_HEAD(, spdk_nvmf_subsystem_listener)	listeners;
309 
310 	TAILQ_ENTRY(spdk_nvmf_subsystem)		entries;
311 
312 	char						sn[SPDK_NVME_CTRLR_SN_LEN + 1];
313 	char						mn[SPDK_NVME_CTRLR_MN_LEN + 1];
314 	char						subnqn[SPDK_NVMF_NQN_MAX_LEN + 1];
315 };
316 
317 int nvmf_poll_group_add_transport(struct spdk_nvmf_poll_group *group,
318 				  struct spdk_nvmf_transport *transport);
319 int nvmf_poll_group_update_subsystem(struct spdk_nvmf_poll_group *group,
320 				     struct spdk_nvmf_subsystem *subsystem);
321 int nvmf_poll_group_add_subsystem(struct spdk_nvmf_poll_group *group,
322 				  struct spdk_nvmf_subsystem *subsystem,
323 				  spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg);
324 void nvmf_poll_group_remove_subsystem(struct spdk_nvmf_poll_group *group,
325 				      struct spdk_nvmf_subsystem *subsystem, spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg);
326 void nvmf_poll_group_pause_subsystem(struct spdk_nvmf_poll_group *group,
327 				     struct spdk_nvmf_subsystem *subsystem,
328 				     uint32_t nsid,
329 				     spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg);
330 void nvmf_poll_group_resume_subsystem(struct spdk_nvmf_poll_group *group,
331 				      struct spdk_nvmf_subsystem *subsystem, spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg);
332 
333 void nvmf_update_discovery_log(struct spdk_nvmf_tgt *tgt, const char *hostnqn);
334 void nvmf_get_discovery_log_page(struct spdk_nvmf_tgt *tgt, const char *hostnqn,
335 				 struct iovec *iov,
336 				 uint32_t iovcnt, uint64_t offset, uint32_t length);
337 
338 void nvmf_ctrlr_destruct(struct spdk_nvmf_ctrlr *ctrlr);
339 int nvmf_ctrlr_process_admin_cmd(struct spdk_nvmf_request *req);
340 int nvmf_ctrlr_process_io_cmd(struct spdk_nvmf_request *req);
341 bool nvmf_ctrlr_dsm_supported(struct spdk_nvmf_ctrlr *ctrlr);
342 bool nvmf_ctrlr_write_zeroes_supported(struct spdk_nvmf_ctrlr *ctrlr);
343 void nvmf_ctrlr_ns_changed(struct spdk_nvmf_ctrlr *ctrlr, uint32_t nsid);
344 
345 void nvmf_bdev_ctrlr_identify_ns(struct spdk_nvmf_ns *ns, struct spdk_nvme_ns_data *nsdata,
346 				 bool dif_insert_or_strip);
347 int nvmf_bdev_ctrlr_read_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
348 			     struct spdk_io_channel *ch, struct spdk_nvmf_request *req);
349 int nvmf_bdev_ctrlr_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
350 			      struct spdk_io_channel *ch, struct spdk_nvmf_request *req);
351 int nvmf_bdev_ctrlr_compare_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
352 				struct spdk_io_channel *ch, struct spdk_nvmf_request *req);
353 int nvmf_bdev_ctrlr_compare_and_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
354 		struct spdk_io_channel *ch, struct spdk_nvmf_request *cmp_req, struct spdk_nvmf_request *write_req);
355 int nvmf_bdev_ctrlr_write_zeroes_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
356 				     struct spdk_io_channel *ch, struct spdk_nvmf_request *req);
357 int nvmf_bdev_ctrlr_flush_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
358 			      struct spdk_io_channel *ch, struct spdk_nvmf_request *req);
359 int nvmf_bdev_ctrlr_dsm_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
360 			    struct spdk_io_channel *ch, struct spdk_nvmf_request *req);
361 int nvmf_bdev_ctrlr_nvme_passthru_io(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
362 				     struct spdk_io_channel *ch, struct spdk_nvmf_request *req);
363 bool nvmf_bdev_ctrlr_get_dif_ctx(struct spdk_bdev *bdev, struct spdk_nvme_cmd *cmd,
364 				 struct spdk_dif_ctx *dif_ctx);
365 
366 int nvmf_subsystem_add_ctrlr(struct spdk_nvmf_subsystem *subsystem,
367 			     struct spdk_nvmf_ctrlr *ctrlr);
368 void nvmf_subsystem_remove_ctrlr(struct spdk_nvmf_subsystem *subsystem,
369 				 struct spdk_nvmf_ctrlr *ctrlr);
370 void nvmf_subsystem_remove_all_listeners(struct spdk_nvmf_subsystem *subsystem,
371 		bool stop);
372 struct spdk_nvmf_ctrlr *nvmf_subsystem_get_ctrlr(struct spdk_nvmf_subsystem *subsystem,
373 		uint16_t cntlid);
374 struct spdk_nvmf_subsystem_listener *nvmf_subsystem_find_listener(
375 	struct spdk_nvmf_subsystem *subsystem,
376 	const struct spdk_nvme_transport_id *trid);
377 struct spdk_nvmf_listener *nvmf_transport_find_listener(
378 	struct spdk_nvmf_transport *transport,
379 	const struct spdk_nvme_transport_id *trid);
380 void nvmf_subsystem_set_ana_state(struct spdk_nvmf_subsystem *subsystem,
381 				  const struct spdk_nvme_transport_id *trid,
382 				  enum spdk_nvme_ana_state ana_state,
383 				  spdk_nvmf_tgt_subsystem_listen_done_fn cb_fn, void *cb_arg);
384 
385 /**
386  * Sets the controller ID range for a subsystem.
387  * Valid range is [1, 0xFFEF].
388  *
389  * May only be performed on subsystems in the INACTIVE state.
390  *
391  * \param subsystem Subsystem to modify.
392  * \param min_cntlid Minimum controller ID.
393  * \param max_cntlid Maximum controller ID.
394  *
395  * \return 0 on success, or negated errno value on failure.
396  */
397 int nvmf_subsystem_set_cntlid_range(struct spdk_nvmf_subsystem *subsystem,
398 				    uint16_t min_cntlid, uint16_t max_cntlid);
399 
400 int nvmf_ctrlr_async_event_ns_notice(struct spdk_nvmf_ctrlr *ctrlr);
401 int nvmf_ctrlr_async_event_ana_change_notice(struct spdk_nvmf_ctrlr *ctrlr);
402 int nvmf_ctrlr_async_event_discovery_log_change_notice(struct spdk_nvmf_ctrlr *ctrlr);
403 void nvmf_ctrlr_async_event_reservation_notification(struct spdk_nvmf_ctrlr *ctrlr);
404 void nvmf_ns_reservation_request(void *ctx);
405 void nvmf_ctrlr_reservation_notice_log(struct spdk_nvmf_ctrlr *ctrlr,
406 				       struct spdk_nvmf_ns *ns,
407 				       enum spdk_nvme_reservation_notification_log_page_type type);
408 
409 /*
410  * Abort aer is sent on a per controller basis and sends a completion for the aer to the host.
411  * This function should be called when attempting to recover in error paths when it is OK for
412  * the host to send a subsequent AER.
413  */
414 void nvmf_ctrlr_abort_aer(struct spdk_nvmf_ctrlr *ctrlr);
415 
416 /*
417  * Free aer simply frees the rdma resources for the aer without informing the host.
418  * This function should be called when deleting a qpair when one wants to make sure
419  * the qpair is completely empty before freeing the request. The reason we free the
420  * AER without sending a completion is to prevent the host from sending another AER.
421  */
422 void nvmf_qpair_free_aer(struct spdk_nvmf_qpair *qpair);
423 
424 int nvmf_ctrlr_abort_request(struct spdk_nvmf_request *req);
425 
426 static inline struct spdk_nvmf_ns *
427 _nvmf_subsystem_get_ns(struct spdk_nvmf_subsystem *subsystem, uint32_t nsid)
428 {
429 	/* NOTE: This implicitly also checks for 0, since 0 - 1 wraps around to UINT32_MAX. */
430 	if (spdk_unlikely(nsid - 1 >= subsystem->max_nsid)) {
431 		return NULL;
432 	}
433 
434 	return subsystem->ns[nsid - 1];
435 }
436 
437 static inline bool
438 nvmf_qpair_is_admin_queue(struct spdk_nvmf_qpair *qpair)
439 {
440 	return qpair->qid == 0;
441 }
442 
443 #endif /* __NVMF_INTERNAL_H__ */
444