xref: /spdk/lib/nvmf/nvmf_internal.h (revision 407e88fd2ab020d753e33014cf759353a9901b51)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation. All rights reserved.
5  *   Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #ifndef __NVMF_INTERNAL_H__
35 #define __NVMF_INTERNAL_H__
36 
37 #include "spdk/stdinc.h"
38 
39 #include "spdk/likely.h"
40 #include "spdk/nvmf.h"
41 #include "spdk/nvmf_spec.h"
42 #include "spdk/assert.h"
43 #include "spdk/bdev.h"
44 #include "spdk/queue.h"
45 #include "spdk/util.h"
46 #include "spdk/thread.h"
47 
48 #define SPDK_NVMF_MAX_SGL_ENTRIES	16
49 
50 /* AIO backend requires block size aligned data buffers,
51  * extra 4KiB aligned data buffer should work for most devices.
52  */
53 #define SHIFT_4KB			12u
54 #define NVMF_DATA_BUFFER_ALIGNMENT	(1u << SHIFT_4KB)
55 #define NVMF_DATA_BUFFER_MASK		(NVMF_DATA_BUFFER_ALIGNMENT - 1LL)
56 
57 enum spdk_nvmf_subsystem_state {
58 	SPDK_NVMF_SUBSYSTEM_INACTIVE = 0,
59 	SPDK_NVMF_SUBSYSTEM_ACTIVATING,
60 	SPDK_NVMF_SUBSYSTEM_ACTIVE,
61 	SPDK_NVMF_SUBSYSTEM_PAUSING,
62 	SPDK_NVMF_SUBSYSTEM_PAUSED,
63 	SPDK_NVMF_SUBSYSTEM_RESUMING,
64 	SPDK_NVMF_SUBSYSTEM_DEACTIVATING,
65 };
66 
67 enum spdk_nvmf_qpair_state {
68 	SPDK_NVMF_QPAIR_UNINITIALIZED = 0,
69 	SPDK_NVMF_QPAIR_ACTIVE,
70 	SPDK_NVMF_QPAIR_DEACTIVATING,
71 	SPDK_NVMF_QPAIR_ERROR,
72 };
73 
74 typedef void (*spdk_nvmf_state_change_done)(void *cb_arg, int status);
75 
76 struct spdk_nvmf_tgt {
77 	char					name[NVMF_TGT_NAME_MAX_LENGTH];
78 
79 	uint64_t				discovery_genctr;
80 
81 	uint32_t				max_subsystems;
82 
83 	/* Array of subsystem pointers of size max_subsystems indexed by sid */
84 	struct spdk_nvmf_subsystem		**subsystems;
85 
86 	struct spdk_nvmf_discovery_log_page	*discovery_log_page;
87 	size_t					discovery_log_page_size;
88 	TAILQ_HEAD(, spdk_nvmf_transport)	transports;
89 
90 	spdk_nvmf_tgt_destroy_done_fn		*destroy_cb_fn;
91 	void					*destroy_cb_arg;
92 
93 	TAILQ_ENTRY(spdk_nvmf_tgt)		link;
94 };
95 
96 struct spdk_nvmf_host {
97 	char				nqn[SPDK_NVMF_NQN_MAX_LEN + 1];
98 	TAILQ_ENTRY(spdk_nvmf_host)	link;
99 };
100 
101 struct spdk_nvmf_listener {
102 	struct spdk_nvme_transport_id	trid;
103 	struct spdk_nvmf_transport	*transport;
104 	TAILQ_ENTRY(spdk_nvmf_listener)	link;
105 };
106 
107 struct spdk_nvmf_transport_pg_cache_buf {
108 	STAILQ_ENTRY(spdk_nvmf_transport_pg_cache_buf) link;
109 };
110 
111 struct spdk_nvmf_transport_poll_group {
112 	struct spdk_nvmf_transport					*transport;
113 	STAILQ_HEAD(, spdk_nvmf_transport_pg_cache_buf)			buf_cache;
114 	uint32_t							buf_cache_count;
115 	uint32_t							buf_cache_size;
116 	struct spdk_nvmf_poll_group					*group;
117 	TAILQ_ENTRY(spdk_nvmf_transport_poll_group)			link;
118 };
119 
120 /* Maximum number of registrants supported per namespace */
121 #define SPDK_NVMF_MAX_NUM_REGISTRANTS		16
122 
123 struct spdk_nvmf_registrant_info {
124 	uint64_t		rkey;
125 	char			host_uuid[SPDK_UUID_STRING_LEN];
126 };
127 
128 struct spdk_nvmf_reservation_info {
129 	bool					ptpl_activated;
130 	enum spdk_nvme_reservation_type		rtype;
131 	uint64_t				crkey;
132 	char					bdev_uuid[SPDK_UUID_STRING_LEN];
133 	char					holder_uuid[SPDK_UUID_STRING_LEN];
134 	uint32_t				num_regs;
135 	struct spdk_nvmf_registrant_info	registrants[SPDK_NVMF_MAX_NUM_REGISTRANTS];
136 };
137 
138 struct spdk_nvmf_subsystem_pg_ns_info {
139 	struct spdk_io_channel		*channel;
140 	struct spdk_uuid		uuid;
141 	/* current reservation key, no reservation if the value is 0 */
142 	uint64_t			crkey;
143 	/* reservation type */
144 	enum spdk_nvme_reservation_type	rtype;
145 	/* Host ID which holds the reservation */
146 	struct spdk_uuid		holder_id;
147 	/* Host ID for the registrants with the namespace */
148 	struct spdk_uuid		reg_hostid[SPDK_NVMF_MAX_NUM_REGISTRANTS];
149 };
150 
151 typedef void(*spdk_nvmf_poll_group_mod_done)(void *cb_arg, int status);
152 
153 struct spdk_nvmf_subsystem_poll_group {
154 	/* Array of namespace information for each namespace indexed by nsid - 1 */
155 	struct spdk_nvmf_subsystem_pg_ns_info	*ns_info;
156 	uint32_t				num_ns;
157 
158 	uint64_t				io_outstanding;
159 	spdk_nvmf_poll_group_mod_done		cb_fn;
160 	void					*cb_arg;
161 
162 	enum spdk_nvmf_subsystem_state		state;
163 
164 	TAILQ_HEAD(, spdk_nvmf_request)		queued;
165 };
166 
167 struct spdk_nvmf_poll_group {
168 	struct spdk_thread				*thread;
169 	struct spdk_poller				*poller;
170 
171 	TAILQ_HEAD(, spdk_nvmf_transport_poll_group)	tgroups;
172 
173 	/* Array of poll groups indexed by subsystem id (sid) */
174 	struct spdk_nvmf_subsystem_poll_group		*sgroups;
175 	uint32_t					num_sgroups;
176 
177 	/* All of the queue pairs that belong to this poll group */
178 	TAILQ_HEAD(, spdk_nvmf_qpair)			qpairs;
179 
180 	/* Statistics */
181 	struct spdk_nvmf_poll_group_stat		stat;
182 };
183 
184 typedef enum _spdk_nvmf_request_exec_status {
185 	SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE,
186 	SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS,
187 } spdk_nvmf_request_exec_status;
188 
189 union nvmf_h2c_msg {
190 	struct spdk_nvmf_capsule_cmd			nvmf_cmd;
191 	struct spdk_nvme_cmd				nvme_cmd;
192 	struct spdk_nvmf_fabric_prop_set_cmd		prop_set_cmd;
193 	struct spdk_nvmf_fabric_prop_get_cmd		prop_get_cmd;
194 	struct spdk_nvmf_fabric_connect_cmd		connect_cmd;
195 };
196 SPDK_STATIC_ASSERT(sizeof(union nvmf_h2c_msg) == 64, "Incorrect size");
197 
198 union nvmf_c2h_msg {
199 	struct spdk_nvme_cpl				nvme_cpl;
200 	struct spdk_nvmf_fabric_prop_get_rsp		prop_get_rsp;
201 	struct spdk_nvmf_fabric_connect_rsp		connect_rsp;
202 };
203 SPDK_STATIC_ASSERT(sizeof(union nvmf_c2h_msg) == 16, "Incorrect size");
204 
205 struct spdk_nvmf_request {
206 	struct spdk_nvmf_qpair		*qpair;
207 	uint32_t			length;
208 	enum spdk_nvme_data_transfer	xfer;
209 	void				*data;
210 	union nvmf_h2c_msg		*cmd;
211 	union nvmf_c2h_msg		*rsp;
212 	struct iovec			iov[SPDK_NVMF_MAX_SGL_ENTRIES * 2];
213 	uint32_t			iovcnt;
214 	struct spdk_bdev_io_wait_entry	bdev_io_wait;
215 
216 	TAILQ_ENTRY(spdk_nvmf_request)	link;
217 };
218 
219 struct spdk_nvmf_registrant {
220 	TAILQ_ENTRY(spdk_nvmf_registrant) link;
221 	struct spdk_uuid hostid;
222 	/* Registration key */
223 	uint64_t rkey;
224 };
225 
226 struct spdk_nvmf_ns {
227 	uint32_t nsid;
228 	struct spdk_nvmf_subsystem *subsystem;
229 	struct spdk_bdev *bdev;
230 	struct spdk_bdev_desc *desc;
231 	struct spdk_nvmf_ns_opts opts;
232 	/* reservation notificaton mask */
233 	uint32_t mask;
234 	/* generation code */
235 	uint32_t gen;
236 	/* registrants head */
237 	TAILQ_HEAD(, spdk_nvmf_registrant) registrants;
238 	/* current reservation key */
239 	uint64_t crkey;
240 	/* reservation type */
241 	enum spdk_nvme_reservation_type rtype;
242 	/* current reservation holder, only valid if reservation type can only have one holder */
243 	struct spdk_nvmf_registrant *holder;
244 	/* Persist Through Power Loss file which contains the persistent reservation */
245 	char *ptpl_file;
246 	/* Persist Through Power Loss feature is enabled */
247 	bool ptpl_activated;
248 };
249 
250 struct spdk_nvmf_qpair {
251 	enum spdk_nvmf_qpair_state		state;
252 	spdk_nvmf_state_change_done		state_cb;
253 	void					*state_cb_arg;
254 
255 	struct spdk_nvmf_transport		*transport;
256 	struct spdk_nvmf_ctrlr			*ctrlr;
257 	struct spdk_nvmf_poll_group		*group;
258 
259 	uint16_t				qid;
260 	uint16_t				sq_head;
261 	uint16_t				sq_head_max;
262 
263 	TAILQ_HEAD(, spdk_nvmf_request)		outstanding;
264 	TAILQ_ENTRY(spdk_nvmf_qpair)		link;
265 };
266 
267 struct spdk_nvmf_ctrlr_feat {
268 	union spdk_nvme_feat_arbitration arbitration;
269 	union spdk_nvme_feat_power_management power_management;
270 	union spdk_nvme_feat_error_recovery error_recovery;
271 	union spdk_nvme_feat_volatile_write_cache volatile_write_cache;
272 	union spdk_nvme_feat_number_of_queues number_of_queues;
273 	union spdk_nvme_feat_write_atomicity write_atomicity;
274 	union spdk_nvme_feat_async_event_configuration async_event_configuration;
275 	union spdk_nvme_feat_keep_alive_timer keep_alive_timer;
276 };
277 
278 /*
279  * NVMf reservation notificaton log page.
280  */
281 struct spdk_nvmf_reservation_log {
282 	struct spdk_nvme_reservation_notification_log	log;
283 	TAILQ_ENTRY(spdk_nvmf_reservation_log)		link;
284 	struct spdk_nvmf_ctrlr				*ctrlr;
285 };
286 
287 /*
288  * This structure represents an NVMe-oF controller,
289  * which is like a "session" in networking terms.
290  */
291 struct spdk_nvmf_ctrlr {
292 	uint16_t			cntlid;
293 	char				hostnqn[SPDK_NVMF_NQN_MAX_LEN + 1];
294 	struct spdk_nvmf_subsystem	*subsys;
295 
296 	struct {
297 		union spdk_nvme_cap_register	cap;
298 		union spdk_nvme_vs_register	vs;
299 		union spdk_nvme_cc_register	cc;
300 		union spdk_nvme_csts_register	csts;
301 	} vcprop; /* virtual controller properties */
302 
303 	struct spdk_nvmf_ctrlr_feat feat;
304 
305 	struct spdk_nvmf_qpair	*admin_qpair;
306 	struct spdk_thread	*thread;
307 	struct spdk_bit_array	*qpair_mask;
308 
309 	struct spdk_nvmf_request *aer_req;
310 	union spdk_nvme_async_event_completion notice_event;
311 	union spdk_nvme_async_event_completion reservation_event;
312 	struct spdk_uuid  hostid;
313 
314 	uint16_t changed_ns_list_count;
315 	struct spdk_nvme_ns_list changed_ns_list;
316 	uint64_t log_page_count;
317 	uint8_t num_avail_log_pages;
318 	TAILQ_HEAD(log_page_head, spdk_nvmf_reservation_log) log_head;
319 
320 	/* Time to trigger keep-alive--poller_time = now_tick + period */
321 	uint64_t			last_keep_alive_tick;
322 	struct spdk_poller		*keep_alive_poller;
323 
324 	bool				dif_insert_or_strip;
325 
326 	TAILQ_ENTRY(spdk_nvmf_ctrlr)	link;
327 };
328 
329 struct spdk_nvmf_subsystem {
330 	struct spdk_thread		*thread;
331 	uint32_t			id;
332 	enum spdk_nvmf_subsystem_state	state;
333 
334 	char subnqn[SPDK_NVMF_NQN_MAX_LEN + 1];
335 	enum spdk_nvmf_subtype subtype;
336 	uint16_t next_cntlid;
337 	bool allow_any_host;
338 
339 	struct spdk_nvmf_tgt			*tgt;
340 
341 	char sn[SPDK_NVME_CTRLR_SN_LEN + 1];
342 	char mn[SPDK_NVME_CTRLR_MN_LEN + 1];
343 
344 	/* Array of pointers to namespaces of size max_nsid indexed by nsid - 1 */
345 	struct spdk_nvmf_ns			**ns;
346 	uint32_t				max_nsid;
347 	/* This is the maximum allowed nsid to a subsystem */
348 	uint32_t				max_allowed_nsid;
349 
350 	TAILQ_HEAD(, spdk_nvmf_ctrlr)		ctrlrs;
351 
352 	TAILQ_HEAD(, spdk_nvmf_host)		hosts;
353 
354 	TAILQ_HEAD(, spdk_nvmf_listener)	listeners;
355 
356 	TAILQ_ENTRY(spdk_nvmf_subsystem)	entries;
357 };
358 
359 
360 struct spdk_nvmf_transport *spdk_nvmf_tgt_get_transport(struct spdk_nvmf_tgt *tgt,
361 		enum spdk_nvme_transport_type);
362 
363 int spdk_nvmf_poll_group_add_transport(struct spdk_nvmf_poll_group *group,
364 				       struct spdk_nvmf_transport *transport);
365 int spdk_nvmf_poll_group_update_subsystem(struct spdk_nvmf_poll_group *group,
366 		struct spdk_nvmf_subsystem *subsystem);
367 int spdk_nvmf_poll_group_add_subsystem(struct spdk_nvmf_poll_group *group,
368 				       struct spdk_nvmf_subsystem *subsystem,
369 				       spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg);
370 void spdk_nvmf_poll_group_remove_subsystem(struct spdk_nvmf_poll_group *group,
371 		struct spdk_nvmf_subsystem *subsystem, spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg);
372 void spdk_nvmf_poll_group_pause_subsystem(struct spdk_nvmf_poll_group *group,
373 		struct spdk_nvmf_subsystem *subsystem, spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg);
374 void spdk_nvmf_poll_group_resume_subsystem(struct spdk_nvmf_poll_group *group,
375 		struct spdk_nvmf_subsystem *subsystem, spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg);
376 void spdk_nvmf_request_exec(struct spdk_nvmf_request *req);
377 int spdk_nvmf_request_free(struct spdk_nvmf_request *req);
378 int spdk_nvmf_request_complete(struct spdk_nvmf_request *req);
379 
380 bool spdk_nvmf_request_get_dif_ctx(struct spdk_nvmf_request *req, struct spdk_dif_ctx *dif_ctx);
381 
382 void spdk_nvmf_get_discovery_log_page(struct spdk_nvmf_tgt *tgt, const char *hostnqn,
383 				      struct iovec *iov,
384 				      uint32_t iovcnt, uint64_t offset, uint32_t length);
385 
386 void spdk_nvmf_ctrlr_destruct(struct spdk_nvmf_ctrlr *ctrlr);
387 int spdk_nvmf_ctrlr_process_fabrics_cmd(struct spdk_nvmf_request *req);
388 int spdk_nvmf_ctrlr_process_admin_cmd(struct spdk_nvmf_request *req);
389 int spdk_nvmf_ctrlr_process_io_cmd(struct spdk_nvmf_request *req);
390 bool spdk_nvmf_ctrlr_dsm_supported(struct spdk_nvmf_ctrlr *ctrlr);
391 bool spdk_nvmf_ctrlr_write_zeroes_supported(struct spdk_nvmf_ctrlr *ctrlr);
392 void spdk_nvmf_ctrlr_ns_changed(struct spdk_nvmf_ctrlr *ctrlr, uint32_t nsid);
393 
394 void spdk_nvmf_bdev_ctrlr_identify_ns(struct spdk_nvmf_ns *ns, struct spdk_nvme_ns_data *nsdata,
395 				      bool dif_insert_or_strip);
396 int spdk_nvmf_bdev_ctrlr_read_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
397 				  struct spdk_io_channel *ch, struct spdk_nvmf_request *req);
398 int spdk_nvmf_bdev_ctrlr_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
399 				   struct spdk_io_channel *ch, struct spdk_nvmf_request *req);
400 int spdk_nvmf_bdev_ctrlr_write_zeroes_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
401 		struct spdk_io_channel *ch, struct spdk_nvmf_request *req);
402 int spdk_nvmf_bdev_ctrlr_flush_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
403 				   struct spdk_io_channel *ch, struct spdk_nvmf_request *req);
404 int spdk_nvmf_bdev_ctrlr_dsm_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
405 				 struct spdk_io_channel *ch, struct spdk_nvmf_request *req);
406 int spdk_nvmf_bdev_ctrlr_nvme_passthru_io(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
407 		struct spdk_io_channel *ch, struct spdk_nvmf_request *req);
408 bool spdk_nvmf_bdev_ctrlr_get_dif_ctx(struct spdk_bdev *bdev, struct spdk_nvme_cmd *cmd,
409 				      struct spdk_dif_ctx *dif_ctx);
410 
411 int spdk_nvmf_subsystem_add_ctrlr(struct spdk_nvmf_subsystem *subsystem,
412 				  struct spdk_nvmf_ctrlr *ctrlr);
413 void spdk_nvmf_subsystem_remove_ctrlr(struct spdk_nvmf_subsystem *subsystem,
414 				      struct spdk_nvmf_ctrlr *ctrlr);
415 struct spdk_nvmf_ctrlr *spdk_nvmf_subsystem_get_ctrlr(struct spdk_nvmf_subsystem *subsystem,
416 		uint16_t cntlid);
417 int spdk_nvmf_ctrlr_async_event_ns_notice(struct spdk_nvmf_ctrlr *ctrlr);
418 void spdk_nvmf_ctrlr_async_event_reservation_notification(struct spdk_nvmf_ctrlr *ctrlr);
419 void spdk_nvmf_ns_reservation_request(void *ctx);
420 void spdk_nvmf_ctrlr_reservation_notice_log(struct spdk_nvmf_ctrlr *ctrlr,
421 		struct spdk_nvmf_ns *ns,
422 		enum spdk_nvme_reservation_notification_log_page_type type);
423 
424 /*
425  * Abort aer is sent on a per controller basis and sends a completion for the aer to the host.
426  * This function should be called when attempting to recover in error paths when it is OK for
427  * the host to send a subsequent AER.
428  */
429 void spdk_nvmf_ctrlr_abort_aer(struct spdk_nvmf_ctrlr *ctrlr);
430 
431 /*
432  * Free aer simply frees the rdma resources for the aer without informing the host.
433  * This function should be called when deleting a qpair when one wants to make sure
434  * the qpair is completely empty before freeing the request. The reason we free the
435  * AER without sending a completion is to prevent the host from sending another AER.
436  */
437 void spdk_nvmf_qpair_free_aer(struct spdk_nvmf_qpair *qpair);
438 
439 static inline struct spdk_nvmf_ns *
440 _spdk_nvmf_subsystem_get_ns(struct spdk_nvmf_subsystem *subsystem, uint32_t nsid)
441 {
442 	/* NOTE: This implicitly also checks for 0, since 0 - 1 wraps around to UINT32_MAX. */
443 	if (spdk_unlikely(nsid - 1 >= subsystem->max_nsid)) {
444 		return NULL;
445 	}
446 
447 	return subsystem->ns[nsid - 1];
448 }
449 
450 static inline bool
451 spdk_nvmf_qpair_is_admin_queue(struct spdk_nvmf_qpair *qpair)
452 {
453 	return qpair->qid == 0;
454 }
455 
456 #endif /* __NVMF_INTERNAL_H__ */
457