xref: /spdk/lib/nvmf/subsystem.c (revision 282b8b70a7e61dd67150fdb49b10a53ded6080a9)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation. All rights reserved.
5  *   Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
6  *   Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
7  *
8  *   Redistribution and use in source and binary forms, with or without
9  *   modification, are permitted provided that the following conditions
10  *   are met:
11  *
12  *     * Redistributions of source code must retain the above copyright
13  *       notice, this list of conditions and the following disclaimer.
14  *     * Redistributions in binary form must reproduce the above copyright
15  *       notice, this list of conditions and the following disclaimer in
16  *       the documentation and/or other materials provided with the
17  *       distribution.
18  *     * Neither the name of Intel Corporation nor the names of its
19  *       contributors may be used to endorse or promote products derived
20  *       from this software without specific prior written permission.
21  *
22  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 #include "spdk/stdinc.h"
36 
37 #include "nvmf_internal.h"
38 #include "transport.h"
39 
40 #include "spdk/assert.h"
41 #include "spdk/likely.h"
42 #include "spdk/string.h"
43 #include "spdk/trace.h"
44 #include "spdk/nvmf_spec.h"
45 #include "spdk/uuid.h"
46 #include "spdk/json.h"
47 #include "spdk/file.h"
48 
49 #define __SPDK_BDEV_MODULE_ONLY
50 #include "spdk/bdev_module.h"
51 #include "spdk/log.h"
52 #include "spdk_internal/utf.h"
53 #include "spdk_internal/usdt.h"
54 
55 #define MODEL_NUMBER_DEFAULT "SPDK bdev Controller"
56 #define NVMF_SUBSYSTEM_DEFAULT_NAMESPACES 32
57 
58 /*
59  * States for parsing valid domains in NQNs according to RFC 1034
60  */
61 enum spdk_nvmf_nqn_domain_states {
62 	/* First character of a domain must be a letter */
63 	SPDK_NVMF_DOMAIN_ACCEPT_LETTER = 0,
64 
65 	/* Subsequent characters can be any of letter, digit, or hyphen */
66 	SPDK_NVMF_DOMAIN_ACCEPT_LDH = 1,
67 
68 	/* A domain label must end with either a letter or digit */
69 	SPDK_NVMF_DOMAIN_ACCEPT_ANY = 2
70 };
71 
72 static int _nvmf_subsystem_destroy(struct spdk_nvmf_subsystem *subsystem);
73 
74 /* Returns true if is a valid ASCII string as defined by the NVMe spec */
75 static bool
76 nvmf_valid_ascii_string(const void *buf, size_t size)
77 {
78 	const uint8_t *str = buf;
79 	size_t i;
80 
81 	for (i = 0; i < size; i++) {
82 		if (str[i] < 0x20 || str[i] > 0x7E) {
83 			return false;
84 		}
85 	}
86 
87 	return true;
88 }
89 
90 static bool
91 nvmf_valid_nqn(const char *nqn)
92 {
93 	size_t len;
94 	struct spdk_uuid uuid_value;
95 	uint32_t i;
96 	int bytes_consumed;
97 	uint32_t domain_label_length;
98 	char *reverse_domain_end;
99 	uint32_t reverse_domain_end_index;
100 	enum spdk_nvmf_nqn_domain_states domain_state = SPDK_NVMF_DOMAIN_ACCEPT_LETTER;
101 
102 	/* Check for length requirements */
103 	len = strlen(nqn);
104 	if (len > SPDK_NVMF_NQN_MAX_LEN) {
105 		SPDK_ERRLOG("Invalid NQN \"%s\": length %zu > max %d\n", nqn, len, SPDK_NVMF_NQN_MAX_LEN);
106 		return false;
107 	}
108 
109 	/* The nqn must be at least as long as SPDK_NVMF_NQN_MIN_LEN to contain the necessary prefix. */
110 	if (len < SPDK_NVMF_NQN_MIN_LEN) {
111 		SPDK_ERRLOG("Invalid NQN \"%s\": length %zu < min %d\n", nqn, len, SPDK_NVMF_NQN_MIN_LEN);
112 		return false;
113 	}
114 
115 	/* Check for discovery controller nqn */
116 	if (!strcmp(nqn, SPDK_NVMF_DISCOVERY_NQN)) {
117 		return true;
118 	}
119 
120 	/* Check for equality with the generic nqn structure of the form "nqn.2014-08.org.nvmexpress:uuid:11111111-2222-3333-4444-555555555555" */
121 	if (!strncmp(nqn, SPDK_NVMF_NQN_UUID_PRE, SPDK_NVMF_NQN_UUID_PRE_LEN)) {
122 		if (len != SPDK_NVMF_NQN_UUID_PRE_LEN + SPDK_NVMF_UUID_STRING_LEN) {
123 			SPDK_ERRLOG("Invalid NQN \"%s\": uuid is not the correct length\n", nqn);
124 			return false;
125 		}
126 
127 		if (spdk_uuid_parse(&uuid_value, &nqn[SPDK_NVMF_NQN_UUID_PRE_LEN])) {
128 			SPDK_ERRLOG("Invalid NQN \"%s\": uuid is not formatted correctly\n", nqn);
129 			return false;
130 		}
131 		return true;
132 	}
133 
134 	/* If the nqn does not match the uuid structure, the next several checks validate the form "nqn.yyyy-mm.reverse.domain:user-string" */
135 
136 	if (strncmp(nqn, "nqn.", 4) != 0) {
137 		SPDK_ERRLOG("Invalid NQN \"%s\": NQN must begin with \"nqn.\".\n", nqn);
138 		return false;
139 	}
140 
141 	/* Check for yyyy-mm. */
142 	if (!(isdigit(nqn[4]) && isdigit(nqn[5]) && isdigit(nqn[6]) && isdigit(nqn[7]) &&
143 	      nqn[8] == '-' && isdigit(nqn[9]) && isdigit(nqn[10]) && nqn[11] == '.')) {
144 		SPDK_ERRLOG("Invalid date code in NQN \"%s\"\n", nqn);
145 		return false;
146 	}
147 
148 	reverse_domain_end = strchr(nqn, ':');
149 	if (reverse_domain_end != NULL && (reverse_domain_end_index = reverse_domain_end - nqn) < len - 1) {
150 	} else {
151 		SPDK_ERRLOG("Invalid NQN \"%s\". NQN must contain user specified name with a ':' as a prefix.\n",
152 			    nqn);
153 		return false;
154 	}
155 
156 	/* Check for valid reverse domain */
157 	domain_label_length = 0;
158 	for (i = 12; i < reverse_domain_end_index; i++) {
159 		if (domain_label_length > SPDK_DOMAIN_LABEL_MAX_LEN) {
160 			SPDK_ERRLOG("Invalid domain name in NQN \"%s\". At least one Label is too long.\n", nqn);
161 			return false;
162 		}
163 
164 		switch (domain_state) {
165 
166 		case SPDK_NVMF_DOMAIN_ACCEPT_LETTER: {
167 			if (isalpha(nqn[i])) {
168 				domain_state = SPDK_NVMF_DOMAIN_ACCEPT_ANY;
169 				domain_label_length++;
170 				break;
171 			} else {
172 				SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must start with a letter.\n", nqn);
173 				return false;
174 			}
175 		}
176 
177 		case SPDK_NVMF_DOMAIN_ACCEPT_LDH: {
178 			if (isalpha(nqn[i]) || isdigit(nqn[i])) {
179 				domain_state = SPDK_NVMF_DOMAIN_ACCEPT_ANY;
180 				domain_label_length++;
181 				break;
182 			} else if (nqn[i] == '-') {
183 				if (i == reverse_domain_end_index - 1) {
184 					SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must end with an alphanumeric symbol.\n",
185 						    nqn);
186 					return false;
187 				}
188 				domain_state = SPDK_NVMF_DOMAIN_ACCEPT_LDH;
189 				domain_label_length++;
190 				break;
191 			} else if (nqn[i] == '.') {
192 				SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must end with an alphanumeric symbol.\n",
193 					    nqn);
194 				return false;
195 			} else {
196 				SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must contain only [a-z,A-Z,0-9,'-','.'].\n",
197 					    nqn);
198 				return false;
199 			}
200 		}
201 
202 		case SPDK_NVMF_DOMAIN_ACCEPT_ANY: {
203 			if (isalpha(nqn[i]) || isdigit(nqn[i])) {
204 				domain_state = SPDK_NVMF_DOMAIN_ACCEPT_ANY;
205 				domain_label_length++;
206 				break;
207 			} else if (nqn[i] == '-') {
208 				if (i == reverse_domain_end_index - 1) {
209 					SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must end with an alphanumeric symbol.\n",
210 						    nqn);
211 					return false;
212 				}
213 				domain_state = SPDK_NVMF_DOMAIN_ACCEPT_LDH;
214 				domain_label_length++;
215 				break;
216 			} else if (nqn[i] == '.') {
217 				domain_state = SPDK_NVMF_DOMAIN_ACCEPT_LETTER;
218 				domain_label_length = 0;
219 				break;
220 			} else {
221 				SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must contain only [a-z,A-Z,0-9,'-','.'].\n",
222 					    nqn);
223 				return false;
224 			}
225 		}
226 		}
227 	}
228 
229 	i = reverse_domain_end_index + 1;
230 	while (i < len) {
231 		bytes_consumed = utf8_valid(&nqn[i], &nqn[len]);
232 		if (bytes_consumed <= 0) {
233 			SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must contain only valid utf-8.\n", nqn);
234 			return false;
235 		}
236 
237 		i += bytes_consumed;
238 	}
239 	return true;
240 }
241 
242 static void subsystem_state_change_on_pg(struct spdk_io_channel_iter *i);
243 
244 struct spdk_nvmf_subsystem *
245 spdk_nvmf_subsystem_create(struct spdk_nvmf_tgt *tgt,
246 			   const char *nqn,
247 			   enum spdk_nvmf_subtype type,
248 			   uint32_t num_ns)
249 {
250 	struct spdk_nvmf_subsystem	*subsystem;
251 	uint32_t			sid;
252 
253 	if (spdk_nvmf_tgt_find_subsystem(tgt, nqn)) {
254 		SPDK_ERRLOG("Subsystem NQN '%s' already exists\n", nqn);
255 		return NULL;
256 	}
257 
258 	if (!nvmf_valid_nqn(nqn)) {
259 		return NULL;
260 	}
261 
262 	if (type == SPDK_NVMF_SUBTYPE_DISCOVERY) {
263 		if (num_ns != 0) {
264 			SPDK_ERRLOG("Discovery subsystem cannot have namespaces.\n");
265 			return NULL;
266 		}
267 	} else if (num_ns == 0) {
268 		num_ns = NVMF_SUBSYSTEM_DEFAULT_NAMESPACES;
269 	}
270 
271 	/* Find a free subsystem id (sid) */
272 	for (sid = 0; sid < tgt->max_subsystems; sid++) {
273 		if (tgt->subsystems[sid] == NULL) {
274 			break;
275 		}
276 	}
277 	if (sid >= tgt->max_subsystems) {
278 		return NULL;
279 	}
280 
281 	subsystem = calloc(1, sizeof(struct spdk_nvmf_subsystem));
282 	if (subsystem == NULL) {
283 		return NULL;
284 	}
285 
286 	subsystem->thread = spdk_get_thread();
287 	subsystem->state = SPDK_NVMF_SUBSYSTEM_INACTIVE;
288 	subsystem->tgt = tgt;
289 	subsystem->id = sid;
290 	subsystem->subtype = type;
291 	subsystem->max_nsid = num_ns;
292 	subsystem->next_cntlid = 0;
293 	subsystem->min_cntlid = NVMF_MIN_CNTLID;
294 	subsystem->max_cntlid = NVMF_MAX_CNTLID;
295 	snprintf(subsystem->subnqn, sizeof(subsystem->subnqn), "%s", nqn);
296 	pthread_mutex_init(&subsystem->mutex, NULL);
297 	TAILQ_INIT(&subsystem->listeners);
298 	TAILQ_INIT(&subsystem->hosts);
299 	TAILQ_INIT(&subsystem->ctrlrs);
300 
301 	if (num_ns != 0) {
302 		subsystem->ns = calloc(num_ns, sizeof(struct spdk_nvmf_ns *));
303 		if (subsystem->ns == NULL) {
304 			SPDK_ERRLOG("Namespace memory allocation failed\n");
305 			pthread_mutex_destroy(&subsystem->mutex);
306 			free(subsystem);
307 			return NULL;
308 		}
309 		subsystem->ana_group = calloc(num_ns, sizeof(uint32_t));
310 		if (subsystem->ana_group == NULL) {
311 			SPDK_ERRLOG("ANA group memory allocation failed\n");
312 			pthread_mutex_destroy(&subsystem->mutex);
313 			free(subsystem->ns);
314 			free(subsystem);
315 			return NULL;
316 		}
317 	}
318 
319 	memset(subsystem->sn, '0', sizeof(subsystem->sn) - 1);
320 	subsystem->sn[sizeof(subsystem->sn) - 1] = '\0';
321 
322 	snprintf(subsystem->mn, sizeof(subsystem->mn), "%s",
323 		 MODEL_NUMBER_DEFAULT);
324 
325 	tgt->subsystems[sid] = subsystem;
326 	nvmf_update_discovery_log(tgt, NULL);
327 
328 	return subsystem;
329 }
330 
331 /* Must hold subsystem->mutex while calling this function */
332 static void
333 nvmf_subsystem_remove_host(struct spdk_nvmf_subsystem *subsystem, struct spdk_nvmf_host *host)
334 {
335 	TAILQ_REMOVE(&subsystem->hosts, host, link);
336 	free(host);
337 }
338 
339 static void
340 _nvmf_subsystem_remove_listener(struct spdk_nvmf_subsystem *subsystem,
341 				struct spdk_nvmf_subsystem_listener *listener,
342 				bool stop)
343 {
344 	struct spdk_nvmf_transport *transport;
345 
346 	if (stop) {
347 		transport = spdk_nvmf_tgt_get_transport(subsystem->tgt, listener->trid->trstring);
348 		if (transport != NULL) {
349 			spdk_nvmf_transport_stop_listen(transport, listener->trid);
350 		}
351 	}
352 
353 	TAILQ_REMOVE(&subsystem->listeners, listener, link);
354 	free(listener->ana_state);
355 	free(listener);
356 }
357 
358 static void
359 _nvmf_subsystem_destroy_msg(void *cb_arg)
360 {
361 	struct spdk_nvmf_subsystem *subsystem = cb_arg;
362 
363 	_nvmf_subsystem_destroy(subsystem);
364 }
365 
366 static int
367 _nvmf_subsystem_destroy(struct spdk_nvmf_subsystem *subsystem)
368 {
369 	struct spdk_nvmf_ns		*ns;
370 	nvmf_subsystem_destroy_cb	async_destroy_cb = NULL;
371 	void				*async_destroy_cb_arg = NULL;
372 	int				rc;
373 
374 	if (!TAILQ_EMPTY(&subsystem->ctrlrs)) {
375 		SPDK_DEBUGLOG(nvmf, "subsystem %p %s has active controllers\n", subsystem, subsystem->subnqn);
376 		subsystem->async_destroy = true;
377 		rc = spdk_thread_send_msg(subsystem->thread, _nvmf_subsystem_destroy_msg, subsystem);
378 		if (rc) {
379 			SPDK_ERRLOG("Failed to send thread msg, rc %d\n", rc);
380 			assert(0);
381 			return rc;
382 		}
383 		return -EINPROGRESS;
384 	}
385 
386 	ns = spdk_nvmf_subsystem_get_first_ns(subsystem);
387 	while (ns != NULL) {
388 		struct spdk_nvmf_ns *next_ns = spdk_nvmf_subsystem_get_next_ns(subsystem, ns);
389 
390 		spdk_nvmf_subsystem_remove_ns(subsystem, ns->opts.nsid);
391 		ns = next_ns;
392 	}
393 
394 	free(subsystem->ns);
395 	free(subsystem->ana_group);
396 
397 	subsystem->tgt->subsystems[subsystem->id] = NULL;
398 	nvmf_update_discovery_log(subsystem->tgt, NULL);
399 
400 	pthread_mutex_destroy(&subsystem->mutex);
401 
402 	if (subsystem->async_destroy) {
403 		async_destroy_cb = subsystem->async_destroy_cb;
404 		async_destroy_cb_arg = subsystem->async_destroy_cb_arg;
405 	}
406 
407 	free(subsystem);
408 
409 	if (async_destroy_cb) {
410 		async_destroy_cb(async_destroy_cb_arg);
411 	}
412 
413 	return 0;
414 }
415 
416 int
417 spdk_nvmf_subsystem_destroy(struct spdk_nvmf_subsystem *subsystem, nvmf_subsystem_destroy_cb cpl_cb,
418 			    void *cpl_cb_arg)
419 {
420 	struct spdk_nvmf_host *host, *host_tmp;
421 
422 	if (!subsystem) {
423 		return -EINVAL;
424 	}
425 
426 	assert(spdk_get_thread() == subsystem->thread);
427 
428 	if (subsystem->state != SPDK_NVMF_SUBSYSTEM_INACTIVE) {
429 		SPDK_ERRLOG("Subsystem can only be destroyed in inactive state\n");
430 		assert(0);
431 		return -EAGAIN;
432 	}
433 	if (subsystem->destroying) {
434 		SPDK_ERRLOG("Subsystem destruction is already started\n");
435 		assert(0);
436 		return -EALREADY;
437 	}
438 
439 	subsystem->destroying = true;
440 
441 	SPDK_DEBUGLOG(nvmf, "subsystem is %p %s\n", subsystem, subsystem->subnqn);
442 
443 	nvmf_subsystem_remove_all_listeners(subsystem, false);
444 
445 	pthread_mutex_lock(&subsystem->mutex);
446 
447 	TAILQ_FOREACH_SAFE(host, &subsystem->hosts, link, host_tmp) {
448 		nvmf_subsystem_remove_host(subsystem, host);
449 	}
450 
451 	pthread_mutex_unlock(&subsystem->mutex);
452 
453 	subsystem->async_destroy_cb = cpl_cb;
454 	subsystem->async_destroy_cb_arg = cpl_cb_arg;
455 
456 	return _nvmf_subsystem_destroy(subsystem);
457 }
458 
459 /* we have to use the typedef in the function declaration to appease astyle. */
460 typedef enum spdk_nvmf_subsystem_state spdk_nvmf_subsystem_state_t;
461 
462 static spdk_nvmf_subsystem_state_t
463 nvmf_subsystem_get_intermediate_state(enum spdk_nvmf_subsystem_state current_state,
464 				      enum spdk_nvmf_subsystem_state requested_state)
465 {
466 	switch (requested_state) {
467 	case SPDK_NVMF_SUBSYSTEM_INACTIVE:
468 		return SPDK_NVMF_SUBSYSTEM_DEACTIVATING;
469 	case SPDK_NVMF_SUBSYSTEM_ACTIVE:
470 		if (current_state == SPDK_NVMF_SUBSYSTEM_PAUSED) {
471 			return SPDK_NVMF_SUBSYSTEM_RESUMING;
472 		} else {
473 			return SPDK_NVMF_SUBSYSTEM_ACTIVATING;
474 		}
475 	case SPDK_NVMF_SUBSYSTEM_PAUSED:
476 		return SPDK_NVMF_SUBSYSTEM_PAUSING;
477 	default:
478 		assert(false);
479 		return SPDK_NVMF_SUBSYSTEM_NUM_STATES;
480 	}
481 }
482 
483 static int
484 nvmf_subsystem_set_state(struct spdk_nvmf_subsystem *subsystem,
485 			 enum spdk_nvmf_subsystem_state state)
486 {
487 	enum spdk_nvmf_subsystem_state actual_old_state, expected_old_state;
488 	bool exchanged;
489 
490 	switch (state) {
491 	case SPDK_NVMF_SUBSYSTEM_INACTIVE:
492 		expected_old_state = SPDK_NVMF_SUBSYSTEM_DEACTIVATING;
493 		break;
494 	case SPDK_NVMF_SUBSYSTEM_ACTIVATING:
495 		expected_old_state = SPDK_NVMF_SUBSYSTEM_INACTIVE;
496 		break;
497 	case SPDK_NVMF_SUBSYSTEM_ACTIVE:
498 		expected_old_state = SPDK_NVMF_SUBSYSTEM_ACTIVATING;
499 		break;
500 	case SPDK_NVMF_SUBSYSTEM_PAUSING:
501 		expected_old_state = SPDK_NVMF_SUBSYSTEM_ACTIVE;
502 		break;
503 	case SPDK_NVMF_SUBSYSTEM_PAUSED:
504 		expected_old_state = SPDK_NVMF_SUBSYSTEM_PAUSING;
505 		break;
506 	case SPDK_NVMF_SUBSYSTEM_RESUMING:
507 		expected_old_state = SPDK_NVMF_SUBSYSTEM_PAUSED;
508 		break;
509 	case SPDK_NVMF_SUBSYSTEM_DEACTIVATING:
510 		expected_old_state = SPDK_NVMF_SUBSYSTEM_ACTIVE;
511 		break;
512 	default:
513 		assert(false);
514 		return -1;
515 	}
516 
517 	actual_old_state = expected_old_state;
518 	exchanged = __atomic_compare_exchange_n(&subsystem->state, &actual_old_state, state, false,
519 						__ATOMIC_RELAXED, __ATOMIC_RELAXED);
520 	if (spdk_unlikely(exchanged == false)) {
521 		if (actual_old_state == SPDK_NVMF_SUBSYSTEM_RESUMING &&
522 		    state == SPDK_NVMF_SUBSYSTEM_ACTIVE) {
523 			expected_old_state = SPDK_NVMF_SUBSYSTEM_RESUMING;
524 		}
525 		/* This is for the case when activating the subsystem fails. */
526 		if (actual_old_state == SPDK_NVMF_SUBSYSTEM_ACTIVATING &&
527 		    state == SPDK_NVMF_SUBSYSTEM_DEACTIVATING) {
528 			expected_old_state = SPDK_NVMF_SUBSYSTEM_ACTIVATING;
529 		}
530 		/* This is for the case when resuming the subsystem fails. */
531 		if (actual_old_state == SPDK_NVMF_SUBSYSTEM_RESUMING &&
532 		    state == SPDK_NVMF_SUBSYSTEM_PAUSING) {
533 			expected_old_state = SPDK_NVMF_SUBSYSTEM_RESUMING;
534 		}
535 		actual_old_state = expected_old_state;
536 		__atomic_compare_exchange_n(&subsystem->state, &actual_old_state, state, false,
537 					    __ATOMIC_RELAXED, __ATOMIC_RELAXED);
538 	}
539 	assert(actual_old_state == expected_old_state);
540 	return actual_old_state - expected_old_state;
541 }
542 
543 struct subsystem_state_change_ctx {
544 	struct spdk_nvmf_subsystem		*subsystem;
545 	uint16_t				nsid;
546 
547 	enum spdk_nvmf_subsystem_state		original_state;
548 	enum spdk_nvmf_subsystem_state		requested_state;
549 
550 	spdk_nvmf_subsystem_state_change_done	cb_fn;
551 	void					*cb_arg;
552 };
553 
554 static void
555 subsystem_state_change_revert_done(struct spdk_io_channel_iter *i, int status)
556 {
557 	struct subsystem_state_change_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
558 
559 	/* Nothing to be done here if the state setting fails, we are just screwed. */
560 	if (nvmf_subsystem_set_state(ctx->subsystem, ctx->requested_state)) {
561 		SPDK_ERRLOG("Unable to revert the subsystem state after operation failure.\n");
562 	}
563 
564 	ctx->subsystem->changing_state = false;
565 	if (ctx->cb_fn) {
566 		/* return a failure here. This function only exists in an error path. */
567 		ctx->cb_fn(ctx->subsystem, ctx->cb_arg, -1);
568 	}
569 	free(ctx);
570 }
571 
572 static void
573 subsystem_state_change_done(struct spdk_io_channel_iter *i, int status)
574 {
575 	struct subsystem_state_change_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
576 	enum spdk_nvmf_subsystem_state intermediate_state;
577 
578 	SPDK_DTRACE_PROBE4(nvmf_subsystem_change_state_done, ctx->subsystem->subnqn,
579 			   ctx->requested_state, ctx->original_state, status);
580 
581 	if (status == 0) {
582 		status = nvmf_subsystem_set_state(ctx->subsystem, ctx->requested_state);
583 		if (status) {
584 			status = -1;
585 		}
586 	}
587 
588 	if (status) {
589 		intermediate_state = nvmf_subsystem_get_intermediate_state(ctx->requested_state,
590 				     ctx->original_state);
591 		assert(intermediate_state != SPDK_NVMF_SUBSYSTEM_NUM_STATES);
592 
593 		if (nvmf_subsystem_set_state(ctx->subsystem, intermediate_state)) {
594 			goto out;
595 		}
596 		ctx->requested_state = ctx->original_state;
597 		spdk_for_each_channel(ctx->subsystem->tgt,
598 				      subsystem_state_change_on_pg,
599 				      ctx,
600 				      subsystem_state_change_revert_done);
601 		return;
602 	}
603 
604 out:
605 	ctx->subsystem->changing_state = false;
606 	if (ctx->cb_fn) {
607 		ctx->cb_fn(ctx->subsystem, ctx->cb_arg, status);
608 	}
609 	free(ctx);
610 }
611 
612 static void
613 subsystem_state_change_continue(void *ctx, int status)
614 {
615 	struct spdk_io_channel_iter *i = ctx;
616 	struct subsystem_state_change_ctx *_ctx __attribute__((unused));
617 
618 	_ctx = spdk_io_channel_iter_get_ctx(i);
619 	SPDK_DTRACE_PROBE3(nvmf_pg_change_state_done, _ctx->subsystem->subnqn,
620 			   _ctx->requested_state, spdk_thread_get_id(spdk_get_thread()));
621 
622 	spdk_for_each_channel_continue(i, status);
623 }
624 
625 static void
626 subsystem_state_change_on_pg(struct spdk_io_channel_iter *i)
627 {
628 	struct subsystem_state_change_ctx *ctx;
629 	struct spdk_io_channel *ch;
630 	struct spdk_nvmf_poll_group *group;
631 
632 	ctx = spdk_io_channel_iter_get_ctx(i);
633 	ch = spdk_io_channel_iter_get_channel(i);
634 	group = spdk_io_channel_get_ctx(ch);
635 
636 	SPDK_DTRACE_PROBE3(nvmf_pg_change_state, ctx->subsystem->subnqn,
637 			   ctx->requested_state, spdk_thread_get_id(spdk_get_thread()));
638 	switch (ctx->requested_state) {
639 	case SPDK_NVMF_SUBSYSTEM_INACTIVE:
640 		nvmf_poll_group_remove_subsystem(group, ctx->subsystem, subsystem_state_change_continue, i);
641 		break;
642 	case SPDK_NVMF_SUBSYSTEM_ACTIVE:
643 		if (ctx->subsystem->state == SPDK_NVMF_SUBSYSTEM_ACTIVATING) {
644 			nvmf_poll_group_add_subsystem(group, ctx->subsystem, subsystem_state_change_continue, i);
645 		} else if (ctx->subsystem->state == SPDK_NVMF_SUBSYSTEM_RESUMING) {
646 			nvmf_poll_group_resume_subsystem(group, ctx->subsystem, subsystem_state_change_continue, i);
647 		}
648 		break;
649 	case SPDK_NVMF_SUBSYSTEM_PAUSED:
650 		nvmf_poll_group_pause_subsystem(group, ctx->subsystem, ctx->nsid, subsystem_state_change_continue,
651 						i);
652 		break;
653 	default:
654 		assert(false);
655 		break;
656 	}
657 }
658 
659 static int
660 nvmf_subsystem_state_change(struct spdk_nvmf_subsystem *subsystem,
661 			    uint32_t nsid,
662 			    enum spdk_nvmf_subsystem_state requested_state,
663 			    spdk_nvmf_subsystem_state_change_done cb_fn,
664 			    void *cb_arg)
665 {
666 	struct subsystem_state_change_ctx *ctx;
667 	enum spdk_nvmf_subsystem_state intermediate_state;
668 	int rc;
669 
670 	if (__sync_val_compare_and_swap(&subsystem->changing_state, false, true)) {
671 		return -EBUSY;
672 	}
673 
674 	SPDK_DTRACE_PROBE3(nvmf_subsystem_change_state, subsystem->subnqn,
675 			   requested_state, subsystem->state);
676 	/* If we are already in the requested state, just call the callback immediately. */
677 	if (subsystem->state == requested_state) {
678 		subsystem->changing_state = false;
679 		if (cb_fn) {
680 			cb_fn(subsystem, cb_arg, 0);
681 		}
682 		return 0;
683 	}
684 
685 	intermediate_state = nvmf_subsystem_get_intermediate_state(subsystem->state, requested_state);
686 	assert(intermediate_state != SPDK_NVMF_SUBSYSTEM_NUM_STATES);
687 
688 	ctx = calloc(1, sizeof(*ctx));
689 	if (!ctx) {
690 		subsystem->changing_state = false;
691 		return -ENOMEM;
692 	}
693 
694 	ctx->original_state = subsystem->state;
695 	rc = nvmf_subsystem_set_state(subsystem, intermediate_state);
696 	if (rc) {
697 		free(ctx);
698 		subsystem->changing_state = false;
699 		return rc;
700 	}
701 
702 	ctx->subsystem = subsystem;
703 	ctx->nsid = nsid;
704 	ctx->requested_state = requested_state;
705 	ctx->cb_fn = cb_fn;
706 	ctx->cb_arg = cb_arg;
707 
708 	spdk_for_each_channel(subsystem->tgt,
709 			      subsystem_state_change_on_pg,
710 			      ctx,
711 			      subsystem_state_change_done);
712 
713 	return 0;
714 }
715 
716 int
717 spdk_nvmf_subsystem_start(struct spdk_nvmf_subsystem *subsystem,
718 			  spdk_nvmf_subsystem_state_change_done cb_fn,
719 			  void *cb_arg)
720 {
721 	return nvmf_subsystem_state_change(subsystem, 0, SPDK_NVMF_SUBSYSTEM_ACTIVE, cb_fn, cb_arg);
722 }
723 
724 int
725 spdk_nvmf_subsystem_stop(struct spdk_nvmf_subsystem *subsystem,
726 			 spdk_nvmf_subsystem_state_change_done cb_fn,
727 			 void *cb_arg)
728 {
729 	return nvmf_subsystem_state_change(subsystem, 0, SPDK_NVMF_SUBSYSTEM_INACTIVE, cb_fn, cb_arg);
730 }
731 
732 int
733 spdk_nvmf_subsystem_pause(struct spdk_nvmf_subsystem *subsystem,
734 			  uint32_t nsid,
735 			  spdk_nvmf_subsystem_state_change_done cb_fn,
736 			  void *cb_arg)
737 {
738 	return nvmf_subsystem_state_change(subsystem, nsid, SPDK_NVMF_SUBSYSTEM_PAUSED, cb_fn, cb_arg);
739 }
740 
741 int
742 spdk_nvmf_subsystem_resume(struct spdk_nvmf_subsystem *subsystem,
743 			   spdk_nvmf_subsystem_state_change_done cb_fn,
744 			   void *cb_arg)
745 {
746 	return nvmf_subsystem_state_change(subsystem, 0, SPDK_NVMF_SUBSYSTEM_ACTIVE, cb_fn, cb_arg);
747 }
748 
749 struct spdk_nvmf_subsystem *
750 spdk_nvmf_subsystem_get_first(struct spdk_nvmf_tgt *tgt)
751 {
752 	struct spdk_nvmf_subsystem	*subsystem;
753 	uint32_t sid;
754 
755 	for (sid = 0; sid < tgt->max_subsystems; sid++) {
756 		subsystem = tgt->subsystems[sid];
757 		if (subsystem) {
758 			return subsystem;
759 		}
760 	}
761 
762 	return NULL;
763 }
764 
765 struct spdk_nvmf_subsystem *
766 spdk_nvmf_subsystem_get_next(struct spdk_nvmf_subsystem *subsystem)
767 {
768 	uint32_t sid;
769 	struct spdk_nvmf_tgt *tgt;
770 
771 	if (!subsystem) {
772 		return NULL;
773 	}
774 
775 	tgt = subsystem->tgt;
776 
777 	for (sid = subsystem->id + 1; sid < tgt->max_subsystems; sid++) {
778 		subsystem = tgt->subsystems[sid];
779 		if (subsystem) {
780 			return subsystem;
781 		}
782 	}
783 
784 	return NULL;
785 }
786 
787 /* Must hold subsystem->mutex while calling this function */
788 static struct spdk_nvmf_host *
789 nvmf_subsystem_find_host(struct spdk_nvmf_subsystem *subsystem, const char *hostnqn)
790 {
791 	struct spdk_nvmf_host *host = NULL;
792 
793 	TAILQ_FOREACH(host, &subsystem->hosts, link) {
794 		if (strcmp(hostnqn, host->nqn) == 0) {
795 			return host;
796 		}
797 	}
798 
799 	return NULL;
800 }
801 
802 int
803 spdk_nvmf_subsystem_add_host(struct spdk_nvmf_subsystem *subsystem, const char *hostnqn)
804 {
805 	struct spdk_nvmf_host *host;
806 
807 	if (!nvmf_valid_nqn(hostnqn)) {
808 		return -EINVAL;
809 	}
810 
811 	pthread_mutex_lock(&subsystem->mutex);
812 
813 	if (nvmf_subsystem_find_host(subsystem, hostnqn)) {
814 		/* This subsystem already allows the specified host. */
815 		pthread_mutex_unlock(&subsystem->mutex);
816 		return 0;
817 	}
818 
819 	host = calloc(1, sizeof(*host));
820 	if (!host) {
821 		pthread_mutex_unlock(&subsystem->mutex);
822 		return -ENOMEM;
823 	}
824 
825 	snprintf(host->nqn, sizeof(host->nqn), "%s", hostnqn);
826 
827 	TAILQ_INSERT_HEAD(&subsystem->hosts, host, link);
828 
829 	nvmf_update_discovery_log(subsystem->tgt, hostnqn);
830 
831 	pthread_mutex_unlock(&subsystem->mutex);
832 
833 	return 0;
834 }
835 
836 int
837 spdk_nvmf_subsystem_remove_host(struct spdk_nvmf_subsystem *subsystem, const char *hostnqn)
838 {
839 	struct spdk_nvmf_host *host;
840 
841 	pthread_mutex_lock(&subsystem->mutex);
842 
843 	host = nvmf_subsystem_find_host(subsystem, hostnqn);
844 	if (host == NULL) {
845 		pthread_mutex_unlock(&subsystem->mutex);
846 		return -ENOENT;
847 	}
848 
849 	nvmf_subsystem_remove_host(subsystem, host);
850 	pthread_mutex_unlock(&subsystem->mutex);
851 
852 	return 0;
853 }
854 
855 struct nvmf_subsystem_disconnect_host_ctx {
856 	struct spdk_nvmf_subsystem		*subsystem;
857 	char					*hostnqn;
858 	spdk_nvmf_tgt_subsystem_listen_done_fn	cb_fn;
859 	void					*cb_arg;
860 };
861 
862 static void
863 nvmf_subsystem_disconnect_host_fini(struct spdk_io_channel_iter *i, int status)
864 {
865 	struct nvmf_subsystem_disconnect_host_ctx *ctx;
866 
867 	ctx = spdk_io_channel_iter_get_ctx(i);
868 
869 	if (ctx->cb_fn) {
870 		ctx->cb_fn(ctx->cb_arg, status);
871 	}
872 	free(ctx->hostnqn);
873 	free(ctx);
874 }
875 
876 static void
877 nvmf_subsystem_disconnect_qpairs_by_host(struct spdk_io_channel_iter *i)
878 {
879 	struct nvmf_subsystem_disconnect_host_ctx *ctx;
880 	struct spdk_nvmf_poll_group *group;
881 	struct spdk_io_channel *ch;
882 	struct spdk_nvmf_qpair *qpair, *tmp_qpair;
883 	struct spdk_nvmf_ctrlr *ctrlr;
884 
885 	ctx = spdk_io_channel_iter_get_ctx(i);
886 	ch = spdk_io_channel_iter_get_channel(i);
887 	group = spdk_io_channel_get_ctx(ch);
888 
889 	TAILQ_FOREACH_SAFE(qpair, &group->qpairs, link, tmp_qpair) {
890 		ctrlr = qpair->ctrlr;
891 
892 		if (ctrlr == NULL || ctrlr->subsys != ctx->subsystem) {
893 			continue;
894 		}
895 
896 		if (strncmp(ctrlr->hostnqn, ctx->hostnqn, sizeof(ctrlr->hostnqn)) == 0) {
897 			/* Right now this does not wait for the queue pairs to actually disconnect. */
898 			spdk_nvmf_qpair_disconnect(qpair, NULL, NULL);
899 		}
900 	}
901 	spdk_for_each_channel_continue(i, 0);
902 }
903 
904 int
905 spdk_nvmf_subsystem_disconnect_host(struct spdk_nvmf_subsystem *subsystem,
906 				    const char *hostnqn,
907 				    spdk_nvmf_tgt_subsystem_listen_done_fn cb_fn,
908 				    void *cb_arg)
909 {
910 	struct nvmf_subsystem_disconnect_host_ctx *ctx;
911 
912 	ctx = calloc(1, sizeof(struct nvmf_subsystem_disconnect_host_ctx));
913 	if (ctx == NULL) {
914 		return -ENOMEM;
915 	}
916 
917 	ctx->hostnqn = strdup(hostnqn);
918 	if (ctx->hostnqn == NULL) {
919 		free(ctx);
920 		return -ENOMEM;
921 	}
922 
923 	ctx->subsystem = subsystem;
924 	ctx->cb_fn = cb_fn;
925 	ctx->cb_arg = cb_arg;
926 
927 	spdk_for_each_channel(subsystem->tgt, nvmf_subsystem_disconnect_qpairs_by_host, ctx,
928 			      nvmf_subsystem_disconnect_host_fini);
929 
930 	return 0;
931 }
932 
933 int
934 spdk_nvmf_subsystem_set_allow_any_host(struct spdk_nvmf_subsystem *subsystem, bool allow_any_host)
935 {
936 	pthread_mutex_lock(&subsystem->mutex);
937 	subsystem->flags.allow_any_host = allow_any_host;
938 	nvmf_update_discovery_log(subsystem->tgt, NULL);
939 	pthread_mutex_unlock(&subsystem->mutex);
940 
941 	return 0;
942 }
943 
944 bool
945 spdk_nvmf_subsystem_get_allow_any_host(const struct spdk_nvmf_subsystem *subsystem)
946 {
947 	bool allow_any_host;
948 	struct spdk_nvmf_subsystem *sub;
949 
950 	/* Technically, taking the mutex modifies data in the subsystem. But the const
951 	 * is still important to convey that this doesn't mutate any other data. Cast
952 	 * it away to work around this. */
953 	sub = (struct spdk_nvmf_subsystem *)subsystem;
954 
955 	pthread_mutex_lock(&sub->mutex);
956 	allow_any_host = sub->flags.allow_any_host;
957 	pthread_mutex_unlock(&sub->mutex);
958 
959 	return allow_any_host;
960 }
961 
962 bool
963 spdk_nvmf_subsystem_host_allowed(struct spdk_nvmf_subsystem *subsystem, const char *hostnqn)
964 {
965 	bool allowed;
966 
967 	if (!hostnqn) {
968 		return false;
969 	}
970 
971 	pthread_mutex_lock(&subsystem->mutex);
972 
973 	if (subsystem->flags.allow_any_host) {
974 		pthread_mutex_unlock(&subsystem->mutex);
975 		return true;
976 	}
977 
978 	allowed =  nvmf_subsystem_find_host(subsystem, hostnqn) != NULL;
979 	pthread_mutex_unlock(&subsystem->mutex);
980 
981 	return allowed;
982 }
983 
984 struct spdk_nvmf_host *
985 spdk_nvmf_subsystem_get_first_host(struct spdk_nvmf_subsystem *subsystem)
986 {
987 	return TAILQ_FIRST(&subsystem->hosts);
988 }
989 
990 
991 struct spdk_nvmf_host *
992 spdk_nvmf_subsystem_get_next_host(struct spdk_nvmf_subsystem *subsystem,
993 				  struct spdk_nvmf_host *prev_host)
994 {
995 	return TAILQ_NEXT(prev_host, link);
996 }
997 
998 const char *
999 spdk_nvmf_host_get_nqn(const struct spdk_nvmf_host *host)
1000 {
1001 	return host->nqn;
1002 }
1003 
1004 struct spdk_nvmf_subsystem_listener *
1005 nvmf_subsystem_find_listener(struct spdk_nvmf_subsystem *subsystem,
1006 			     const struct spdk_nvme_transport_id *trid)
1007 {
1008 	struct spdk_nvmf_subsystem_listener *listener;
1009 
1010 	TAILQ_FOREACH(listener, &subsystem->listeners, link) {
1011 		if (spdk_nvme_transport_id_compare(listener->trid, trid) == 0) {
1012 			return listener;
1013 		}
1014 	}
1015 
1016 	return NULL;
1017 }
1018 
1019 /**
1020  * Function to be called once the target is listening.
1021  *
1022  * \param ctx Context argument passed to this function.
1023  * \param status 0 if it completed successfully, or negative errno if it failed.
1024  */
1025 static void
1026 _nvmf_subsystem_add_listener_done(void *ctx, int status)
1027 {
1028 	struct spdk_nvmf_subsystem_listener *listener = ctx;
1029 
1030 	if (status) {
1031 		listener->cb_fn(listener->cb_arg, status);
1032 		free(listener);
1033 		return;
1034 	}
1035 
1036 	TAILQ_INSERT_HEAD(&listener->subsystem->listeners, listener, link);
1037 	nvmf_update_discovery_log(listener->subsystem->tgt, NULL);
1038 	listener->cb_fn(listener->cb_arg, status);
1039 }
1040 
1041 void
1042 spdk_nvmf_subsystem_add_listener(struct spdk_nvmf_subsystem *subsystem,
1043 				 struct spdk_nvme_transport_id *trid,
1044 				 spdk_nvmf_tgt_subsystem_listen_done_fn cb_fn,
1045 				 void *cb_arg)
1046 {
1047 	struct spdk_nvmf_transport *transport;
1048 	struct spdk_nvmf_subsystem_listener *listener;
1049 	struct spdk_nvmf_listener *tr_listener;
1050 	uint32_t i;
1051 	int rc = 0;
1052 
1053 	assert(cb_fn != NULL);
1054 
1055 	if (!(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE ||
1056 	      subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED)) {
1057 		cb_fn(cb_arg, -EAGAIN);
1058 		return;
1059 	}
1060 
1061 	if (nvmf_subsystem_find_listener(subsystem, trid)) {
1062 		/* Listener already exists in this subsystem */
1063 		cb_fn(cb_arg, 0);
1064 		return;
1065 	}
1066 
1067 	transport = spdk_nvmf_tgt_get_transport(subsystem->tgt, trid->trstring);
1068 	if (!transport) {
1069 		SPDK_ERRLOG("Unable to find %s transport. The transport must be created first also make sure it is properly registered.\n",
1070 			    trid->trstring);
1071 		cb_fn(cb_arg, -EINVAL);
1072 		return;
1073 	}
1074 
1075 	tr_listener = nvmf_transport_find_listener(transport, trid);
1076 	if (!tr_listener) {
1077 		SPDK_ERRLOG("Cannot find transport listener for %s\n", trid->traddr);
1078 		cb_fn(cb_arg, -EINVAL);
1079 		return;
1080 	}
1081 
1082 	listener = calloc(1, sizeof(*listener));
1083 	if (!listener) {
1084 		cb_fn(cb_arg, -ENOMEM);
1085 		return;
1086 	}
1087 
1088 	listener->trid = &tr_listener->trid;
1089 	listener->transport = transport;
1090 	listener->cb_fn = cb_fn;
1091 	listener->cb_arg = cb_arg;
1092 	listener->subsystem = subsystem;
1093 	listener->ana_state = calloc(subsystem->max_nsid, sizeof(enum spdk_nvme_ana_state));
1094 	if (!listener->ana_state) {
1095 		free(listener);
1096 		cb_fn(cb_arg, -ENOMEM);
1097 		return;
1098 	}
1099 
1100 	for (i = 0; i < subsystem->max_nsid; i++) {
1101 		listener->ana_state[i] = SPDK_NVME_ANA_OPTIMIZED_STATE;
1102 	}
1103 
1104 	if (transport->ops->listen_associate != NULL) {
1105 		rc = transport->ops->listen_associate(transport, subsystem, trid);
1106 	}
1107 
1108 	_nvmf_subsystem_add_listener_done(listener, rc);
1109 }
1110 
1111 int
1112 spdk_nvmf_subsystem_remove_listener(struct spdk_nvmf_subsystem *subsystem,
1113 				    const struct spdk_nvme_transport_id *trid)
1114 {
1115 	struct spdk_nvmf_subsystem_listener *listener;
1116 
1117 	if (!(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE ||
1118 	      subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED)) {
1119 		return -EAGAIN;
1120 	}
1121 
1122 	listener = nvmf_subsystem_find_listener(subsystem, trid);
1123 	if (listener == NULL) {
1124 		return -ENOENT;
1125 	}
1126 
1127 	_nvmf_subsystem_remove_listener(subsystem, listener, false);
1128 
1129 	return 0;
1130 }
1131 
1132 void
1133 nvmf_subsystem_remove_all_listeners(struct spdk_nvmf_subsystem *subsystem,
1134 				    bool stop)
1135 {
1136 	struct spdk_nvmf_subsystem_listener *listener, *listener_tmp;
1137 
1138 	TAILQ_FOREACH_SAFE(listener, &subsystem->listeners, link, listener_tmp) {
1139 		_nvmf_subsystem_remove_listener(subsystem, listener, stop);
1140 	}
1141 }
1142 
1143 bool
1144 spdk_nvmf_subsystem_listener_allowed(struct spdk_nvmf_subsystem *subsystem,
1145 				     const struct spdk_nvme_transport_id *trid)
1146 {
1147 	struct spdk_nvmf_subsystem_listener *listener;
1148 
1149 	if (!strcmp(subsystem->subnqn, SPDK_NVMF_DISCOVERY_NQN)) {
1150 		return true;
1151 	}
1152 
1153 	TAILQ_FOREACH(listener, &subsystem->listeners, link) {
1154 		if (spdk_nvme_transport_id_compare(listener->trid, trid) == 0) {
1155 			return true;
1156 		}
1157 	}
1158 
1159 	return false;
1160 }
1161 
1162 struct spdk_nvmf_subsystem_listener *
1163 spdk_nvmf_subsystem_get_first_listener(struct spdk_nvmf_subsystem *subsystem)
1164 {
1165 	return TAILQ_FIRST(&subsystem->listeners);
1166 }
1167 
1168 struct spdk_nvmf_subsystem_listener *
1169 spdk_nvmf_subsystem_get_next_listener(struct spdk_nvmf_subsystem *subsystem,
1170 				      struct spdk_nvmf_subsystem_listener *prev_listener)
1171 {
1172 	return TAILQ_NEXT(prev_listener, link);
1173 }
1174 
1175 const struct spdk_nvme_transport_id *
1176 spdk_nvmf_subsystem_listener_get_trid(struct spdk_nvmf_subsystem_listener *listener)
1177 {
1178 	return listener->trid;
1179 }
1180 
1181 void
1182 spdk_nvmf_subsystem_allow_any_listener(struct spdk_nvmf_subsystem *subsystem,
1183 				       bool allow_any_listener)
1184 {
1185 	subsystem->flags.allow_any_listener = allow_any_listener;
1186 }
1187 
1188 bool
1189 spdk_nvmf_subsytem_any_listener_allowed(struct spdk_nvmf_subsystem *subsystem)
1190 {
1191 	return subsystem->flags.allow_any_listener;
1192 }
1193 
1194 
1195 struct subsystem_update_ns_ctx {
1196 	struct spdk_nvmf_subsystem *subsystem;
1197 
1198 	spdk_nvmf_subsystem_state_change_done cb_fn;
1199 	void *cb_arg;
1200 };
1201 
1202 static void
1203 subsystem_update_ns_done(struct spdk_io_channel_iter *i, int status)
1204 {
1205 	struct subsystem_update_ns_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
1206 
1207 	if (ctx->cb_fn) {
1208 		ctx->cb_fn(ctx->subsystem, ctx->cb_arg, status);
1209 	}
1210 	free(ctx);
1211 }
1212 
1213 static void
1214 subsystem_update_ns_on_pg(struct spdk_io_channel_iter *i)
1215 {
1216 	int rc;
1217 	struct subsystem_update_ns_ctx *ctx;
1218 	struct spdk_nvmf_poll_group *group;
1219 	struct spdk_nvmf_subsystem *subsystem;
1220 
1221 	ctx = spdk_io_channel_iter_get_ctx(i);
1222 	group = spdk_io_channel_get_ctx(spdk_io_channel_iter_get_channel(i));
1223 	subsystem = ctx->subsystem;
1224 
1225 	rc = nvmf_poll_group_update_subsystem(group, subsystem);
1226 	spdk_for_each_channel_continue(i, rc);
1227 }
1228 
1229 static int
1230 nvmf_subsystem_update_ns(struct spdk_nvmf_subsystem *subsystem, spdk_channel_for_each_cpl cpl,
1231 			 void *ctx)
1232 {
1233 	spdk_for_each_channel(subsystem->tgt,
1234 			      subsystem_update_ns_on_pg,
1235 			      ctx,
1236 			      cpl);
1237 
1238 	return 0;
1239 }
1240 
1241 static void
1242 nvmf_subsystem_ns_changed(struct spdk_nvmf_subsystem *subsystem, uint32_t nsid)
1243 {
1244 	struct spdk_nvmf_ctrlr *ctrlr;
1245 
1246 	TAILQ_FOREACH(ctrlr, &subsystem->ctrlrs, link) {
1247 		nvmf_ctrlr_ns_changed(ctrlr, nsid);
1248 	}
1249 }
1250 
1251 static uint32_t
1252 nvmf_ns_reservation_clear_all_registrants(struct spdk_nvmf_ns *ns);
1253 
1254 int
1255 spdk_nvmf_subsystem_remove_ns(struct spdk_nvmf_subsystem *subsystem, uint32_t nsid)
1256 {
1257 	struct spdk_nvmf_transport *transport;
1258 	struct spdk_nvmf_ns *ns;
1259 
1260 	if (!(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE ||
1261 	      subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED)) {
1262 		assert(false);
1263 		return -1;
1264 	}
1265 
1266 	if (nsid == 0 || nsid > subsystem->max_nsid) {
1267 		return -1;
1268 	}
1269 
1270 	ns = subsystem->ns[nsid - 1];
1271 	if (!ns) {
1272 		return -1;
1273 	}
1274 
1275 	subsystem->ns[nsid - 1] = NULL;
1276 
1277 	assert(ns->anagrpid - 1 < subsystem->max_nsid);
1278 	assert(subsystem->ana_group[ns->anagrpid - 1] > 0);
1279 
1280 	subsystem->ana_group[ns->anagrpid - 1]--;
1281 
1282 	free(ns->ptpl_file);
1283 	nvmf_ns_reservation_clear_all_registrants(ns);
1284 	spdk_bdev_module_release_bdev(ns->bdev);
1285 	spdk_bdev_close(ns->desc);
1286 	free(ns);
1287 
1288 	for (transport = spdk_nvmf_transport_get_first(subsystem->tgt); transport;
1289 	     transport = spdk_nvmf_transport_get_next(transport)) {
1290 		if (transport->ops->subsystem_remove_ns) {
1291 			transport->ops->subsystem_remove_ns(transport, subsystem, nsid);
1292 		}
1293 	}
1294 
1295 	nvmf_subsystem_ns_changed(subsystem, nsid);
1296 
1297 	return 0;
1298 }
1299 
1300 struct subsystem_ns_change_ctx {
1301 	struct spdk_nvmf_subsystem		*subsystem;
1302 	spdk_nvmf_subsystem_state_change_done	cb_fn;
1303 	uint32_t				nsid;
1304 };
1305 
1306 static void
1307 _nvmf_ns_hot_remove(struct spdk_nvmf_subsystem *subsystem,
1308 		    void *cb_arg, int status)
1309 {
1310 	struct subsystem_ns_change_ctx *ctx = cb_arg;
1311 	int rc;
1312 
1313 	rc = spdk_nvmf_subsystem_remove_ns(subsystem, ctx->nsid);
1314 	if (rc != 0) {
1315 		SPDK_ERRLOG("Failed to make changes to NVME-oF subsystem with id: %u\n", subsystem->id);
1316 	}
1317 
1318 	spdk_nvmf_subsystem_resume(subsystem, NULL, NULL);
1319 
1320 	free(ctx);
1321 }
1322 
1323 static void
1324 nvmf_ns_change_msg(void *ns_ctx)
1325 {
1326 	struct subsystem_ns_change_ctx *ctx = ns_ctx;
1327 	int rc;
1328 
1329 	rc = spdk_nvmf_subsystem_pause(ctx->subsystem, ctx->nsid, ctx->cb_fn, ctx);
1330 	if (rc) {
1331 		if (rc == -EBUSY) {
1332 			/* Try again, this is not a permanent situation. */
1333 			spdk_thread_send_msg(spdk_get_thread(), nvmf_ns_change_msg, ctx);
1334 		} else {
1335 			free(ctx);
1336 			SPDK_ERRLOG("Unable to pause subsystem to process namespace removal!\n");
1337 		}
1338 	}
1339 }
1340 
1341 static void
1342 nvmf_ns_hot_remove(void *remove_ctx)
1343 {
1344 	struct spdk_nvmf_ns *ns = remove_ctx;
1345 	struct subsystem_ns_change_ctx *ns_ctx;
1346 	int rc;
1347 
1348 	/* We have to allocate a new context because this op
1349 	 * is asynchronous and we could lose the ns in the middle.
1350 	 */
1351 	ns_ctx = calloc(1, sizeof(struct subsystem_ns_change_ctx));
1352 	if (!ns_ctx) {
1353 		SPDK_ERRLOG("Unable to allocate context to process namespace removal!\n");
1354 		return;
1355 	}
1356 
1357 	ns_ctx->subsystem = ns->subsystem;
1358 	ns_ctx->nsid = ns->opts.nsid;
1359 	ns_ctx->cb_fn = _nvmf_ns_hot_remove;
1360 
1361 	rc = spdk_nvmf_subsystem_pause(ns->subsystem, ns_ctx->nsid, _nvmf_ns_hot_remove, ns_ctx);
1362 	if (rc) {
1363 		if (rc == -EBUSY) {
1364 			/* Try again, this is not a permanent situation. */
1365 			spdk_thread_send_msg(spdk_get_thread(), nvmf_ns_change_msg, ns_ctx);
1366 		} else {
1367 			SPDK_ERRLOG("Unable to pause subsystem to process namespace removal!\n");
1368 			free(ns_ctx);
1369 		}
1370 	}
1371 }
1372 
1373 static void
1374 _nvmf_ns_resize(struct spdk_nvmf_subsystem *subsystem, void *cb_arg, int status)
1375 {
1376 	struct subsystem_ns_change_ctx *ctx = cb_arg;
1377 
1378 	nvmf_subsystem_ns_changed(subsystem, ctx->nsid);
1379 	spdk_nvmf_subsystem_resume(subsystem, NULL, NULL);
1380 
1381 	free(ctx);
1382 }
1383 
1384 static void
1385 nvmf_ns_resize(void *event_ctx)
1386 {
1387 	struct spdk_nvmf_ns *ns = event_ctx;
1388 	struct subsystem_ns_change_ctx *ns_ctx;
1389 	int rc;
1390 
1391 	/* We have to allocate a new context because this op
1392 	 * is asynchronous and we could lose the ns in the middle.
1393 	 */
1394 	ns_ctx = calloc(1, sizeof(struct subsystem_ns_change_ctx));
1395 	if (!ns_ctx) {
1396 		SPDK_ERRLOG("Unable to allocate context to process namespace removal!\n");
1397 		return;
1398 	}
1399 
1400 	ns_ctx->subsystem = ns->subsystem;
1401 	ns_ctx->nsid = ns->opts.nsid;
1402 	ns_ctx->cb_fn = _nvmf_ns_resize;
1403 
1404 	/* Specify 0 for the nsid here, because we do not need to pause the namespace.
1405 	 * Namespaces can only be resized bigger, so there is no need to quiesce I/O.
1406 	 */
1407 	rc = spdk_nvmf_subsystem_pause(ns->subsystem, 0, _nvmf_ns_resize, ns_ctx);
1408 	if (rc) {
1409 		if (rc == -EBUSY) {
1410 			/* Try again, this is not a permanent situation. */
1411 			spdk_thread_send_msg(spdk_get_thread(), nvmf_ns_change_msg, ns_ctx);
1412 		} else {
1413 			SPDK_ERRLOG("Unable to pause subsystem to process namespace resize!\n");
1414 			free(ns_ctx);
1415 		}
1416 	}
1417 }
1418 
1419 static void
1420 nvmf_ns_event(enum spdk_bdev_event_type type,
1421 	      struct spdk_bdev *bdev,
1422 	      void *event_ctx)
1423 {
1424 	SPDK_DEBUGLOG(nvmf, "Bdev event: type %d, name %s, subsystem_id %d, ns_id %d\n",
1425 		      type,
1426 		      spdk_bdev_get_name(bdev),
1427 		      ((struct spdk_nvmf_ns *)event_ctx)->subsystem->id,
1428 		      ((struct spdk_nvmf_ns *)event_ctx)->nsid);
1429 
1430 	switch (type) {
1431 	case SPDK_BDEV_EVENT_REMOVE:
1432 		nvmf_ns_hot_remove(event_ctx);
1433 		break;
1434 	case SPDK_BDEV_EVENT_RESIZE:
1435 		nvmf_ns_resize(event_ctx);
1436 		break;
1437 	default:
1438 		SPDK_NOTICELOG("Unsupported bdev event: type %d\n", type);
1439 		break;
1440 	}
1441 }
1442 
1443 void
1444 spdk_nvmf_ns_opts_get_defaults(struct spdk_nvmf_ns_opts *opts, size_t opts_size)
1445 {
1446 	if (!opts) {
1447 		SPDK_ERRLOG("opts should not be NULL.\n");
1448 		return;
1449 	}
1450 
1451 	if (!opts_size) {
1452 		SPDK_ERRLOG("opts_size should not be zero.\n");
1453 		return;
1454 	}
1455 
1456 	memset(opts, 0, opts_size);
1457 	opts->opts_size = opts_size;
1458 
1459 #define FIELD_OK(field) \
1460 	offsetof(struct spdk_nvmf_ns_opts, field) + sizeof(opts->field) <= opts_size
1461 
1462 #define SET_FIELD(field, value) \
1463 	if (FIELD_OK(field)) { \
1464 		opts->field = value; \
1465 	} \
1466 
1467 	/* All current fields are set to 0 by default. */
1468 	SET_FIELD(nsid, 0);
1469 	if (FIELD_OK(nguid)) {
1470 		memset(opts->nguid, 0, sizeof(opts->nguid));
1471 	}
1472 	if (FIELD_OK(eui64)) {
1473 		memset(opts->eui64, 0, sizeof(opts->eui64));
1474 	}
1475 	if (FIELD_OK(uuid)) {
1476 		memset(&opts->uuid, 0, sizeof(opts->uuid));
1477 	}
1478 	SET_FIELD(anagrpid, 0);
1479 
1480 #undef FIELD_OK
1481 #undef SET_FIELD
1482 }
1483 
1484 static void
1485 nvmf_ns_opts_copy(struct spdk_nvmf_ns_opts *opts,
1486 		  const struct spdk_nvmf_ns_opts *user_opts,
1487 		  size_t opts_size)
1488 {
1489 #define FIELD_OK(field)	\
1490 	offsetof(struct spdk_nvmf_ns_opts, field) + sizeof(opts->field) <= user_opts->opts_size
1491 
1492 #define SET_FIELD(field) \
1493 	if (FIELD_OK(field)) { \
1494 		opts->field = user_opts->field;	\
1495 	} \
1496 
1497 	SET_FIELD(nsid);
1498 	if (FIELD_OK(nguid)) {
1499 		memcpy(opts->nguid, user_opts->nguid, sizeof(opts->nguid));
1500 	}
1501 	if (FIELD_OK(eui64)) {
1502 		memcpy(opts->eui64, user_opts->eui64, sizeof(opts->eui64));
1503 	}
1504 	if (FIELD_OK(uuid)) {
1505 		memcpy(&opts->uuid, &user_opts->uuid, sizeof(opts->uuid));
1506 	}
1507 	SET_FIELD(anagrpid);
1508 
1509 	opts->opts_size = user_opts->opts_size;
1510 
1511 	/* We should not remove this statement, but need to update the assert statement
1512 	 * if we add a new field, and also add a corresponding SET_FIELD statement.
1513 	 */
1514 	SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_ns_opts) == 64, "Incorrect size");
1515 
1516 #undef FIELD_OK
1517 #undef SET_FIELD
1518 }
1519 
1520 /* Dummy bdev module used to to claim bdevs. */
1521 static struct spdk_bdev_module ns_bdev_module = {
1522 	.name	= "NVMe-oF Target",
1523 };
1524 
1525 static int
1526 nvmf_ns_load_reservation(const char *file, struct spdk_nvmf_reservation_info *info);
1527 static int
1528 nvmf_ns_reservation_restore(struct spdk_nvmf_ns *ns, struct spdk_nvmf_reservation_info *info);
1529 
1530 uint32_t
1531 spdk_nvmf_subsystem_add_ns_ext(struct spdk_nvmf_subsystem *subsystem, const char *bdev_name,
1532 			       const struct spdk_nvmf_ns_opts *user_opts, size_t opts_size,
1533 			       const char *ptpl_file)
1534 {
1535 	struct spdk_nvmf_transport *transport;
1536 	struct spdk_nvmf_ns_opts opts;
1537 	struct spdk_nvmf_ns *ns;
1538 	struct spdk_nvmf_reservation_info info = {0};
1539 	int rc;
1540 
1541 	if (!(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE ||
1542 	      subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED)) {
1543 		return 0;
1544 	}
1545 
1546 	spdk_nvmf_ns_opts_get_defaults(&opts, sizeof(opts));
1547 	if (user_opts) {
1548 		nvmf_ns_opts_copy(&opts, user_opts, opts_size);
1549 	}
1550 
1551 	if (opts.nsid == SPDK_NVME_GLOBAL_NS_TAG) {
1552 		SPDK_ERRLOG("Invalid NSID %" PRIu32 "\n", opts.nsid);
1553 		return 0;
1554 	}
1555 
1556 	if (opts.nsid == 0) {
1557 		/*
1558 		 * NSID not specified - find a free index.
1559 		 *
1560 		 * If no free slots are found, opts.nsid will be subsystem->max_nsid + 1, which will
1561 		 * expand max_nsid if possible.
1562 		 */
1563 		for (opts.nsid = 1; opts.nsid <= subsystem->max_nsid; opts.nsid++) {
1564 			if (_nvmf_subsystem_get_ns(subsystem, opts.nsid) == NULL) {
1565 				break;
1566 			}
1567 		}
1568 	}
1569 
1570 	if (_nvmf_subsystem_get_ns(subsystem, opts.nsid)) {
1571 		SPDK_ERRLOG("Requested NSID %" PRIu32 " already in use\n", opts.nsid);
1572 		return 0;
1573 	}
1574 
1575 	if (opts.nsid > subsystem->max_nsid) {
1576 		SPDK_ERRLOG("NSID greater than maximum not allowed\n");
1577 		return 0;
1578 	}
1579 
1580 	if (opts.anagrpid == 0) {
1581 		opts.anagrpid = opts.nsid;
1582 	}
1583 
1584 	if (opts.anagrpid > subsystem->max_nsid) {
1585 		SPDK_ERRLOG("ANAGRPID greater than maximum NSID not allowed\n");
1586 		return 0;
1587 	}
1588 
1589 	ns = calloc(1, sizeof(*ns));
1590 	if (ns == NULL) {
1591 		SPDK_ERRLOG("Namespace allocation failed\n");
1592 		return 0;
1593 	}
1594 
1595 	rc = spdk_bdev_open_ext(bdev_name, true, nvmf_ns_event, ns, &ns->desc);
1596 	if (rc != 0) {
1597 		SPDK_ERRLOG("Subsystem %s: bdev %s cannot be opened, error=%d\n",
1598 			    subsystem->subnqn, bdev_name, rc);
1599 		free(ns);
1600 		return 0;
1601 	}
1602 
1603 	ns->bdev = spdk_bdev_desc_get_bdev(ns->desc);
1604 
1605 	if (spdk_bdev_get_md_size(ns->bdev) != 0 && !spdk_bdev_is_md_interleaved(ns->bdev)) {
1606 		SPDK_ERRLOG("Can't attach bdev with separate metadata.\n");
1607 		spdk_bdev_close(ns->desc);
1608 		free(ns);
1609 		return 0;
1610 	}
1611 
1612 	rc = spdk_bdev_module_claim_bdev(ns->bdev, ns->desc, &ns_bdev_module);
1613 	if (rc != 0) {
1614 		spdk_bdev_close(ns->desc);
1615 		free(ns);
1616 		return 0;
1617 	}
1618 
1619 	/* Cache the zcopy capability of the bdev device */
1620 	ns->zcopy = spdk_bdev_io_type_supported(ns->bdev, SPDK_BDEV_IO_TYPE_ZCOPY);
1621 
1622 	if (spdk_mem_all_zero(&opts.uuid, sizeof(opts.uuid))) {
1623 		opts.uuid = *spdk_bdev_get_uuid(ns->bdev);
1624 	}
1625 
1626 	/* if nguid descriptor is supported by bdev module (nvme) then uuid = nguid */
1627 	if (spdk_mem_all_zero(opts.nguid, sizeof(opts.nguid))) {
1628 		SPDK_STATIC_ASSERT(sizeof(opts.nguid) == sizeof(opts.uuid), "size mismatch");
1629 		memcpy(opts.nguid, spdk_bdev_get_uuid(ns->bdev), sizeof(opts.nguid));
1630 	}
1631 
1632 	ns->opts = opts;
1633 	ns->subsystem = subsystem;
1634 	subsystem->ns[opts.nsid - 1] = ns;
1635 	ns->nsid = opts.nsid;
1636 	ns->anagrpid = opts.anagrpid;
1637 	subsystem->ana_group[ns->anagrpid - 1]++;
1638 	TAILQ_INIT(&ns->registrants);
1639 	if (ptpl_file) {
1640 		rc = nvmf_ns_load_reservation(ptpl_file, &info);
1641 		if (!rc) {
1642 			rc = nvmf_ns_reservation_restore(ns, &info);
1643 			if (rc) {
1644 				SPDK_ERRLOG("Subsystem restore reservation failed\n");
1645 				goto err_ns_reservation_restore;
1646 			}
1647 		}
1648 		ns->ptpl_file = strdup(ptpl_file);
1649 		if (!ns->ptpl_file) {
1650 			SPDK_ERRLOG("Namespace ns->ptpl_file allocation failed\n");
1651 			goto err_strdup;
1652 		}
1653 	}
1654 
1655 	for (transport = spdk_nvmf_transport_get_first(subsystem->tgt); transport;
1656 	     transport = spdk_nvmf_transport_get_next(transport)) {
1657 		if (transport->ops->subsystem_add_ns) {
1658 			rc = transport->ops->subsystem_add_ns(transport, subsystem, ns);
1659 			if (rc) {
1660 				SPDK_ERRLOG("Namespace attachment is not allowed by %s transport\n", transport->ops->name);
1661 				goto err_subsystem_add_ns;
1662 			}
1663 		}
1664 	}
1665 
1666 	SPDK_DEBUGLOG(nvmf, "Subsystem %s: bdev %s assigned nsid %" PRIu32 "\n",
1667 		      spdk_nvmf_subsystem_get_nqn(subsystem),
1668 		      bdev_name,
1669 		      opts.nsid);
1670 
1671 	nvmf_subsystem_ns_changed(subsystem, opts.nsid);
1672 
1673 	return opts.nsid;
1674 
1675 err_subsystem_add_ns:
1676 	free(ns->ptpl_file);
1677 err_strdup:
1678 	nvmf_ns_reservation_clear_all_registrants(ns);
1679 err_ns_reservation_restore:
1680 	subsystem->ns[opts.nsid - 1] = NULL;
1681 	spdk_bdev_module_release_bdev(ns->bdev);
1682 	spdk_bdev_close(ns->desc);
1683 	free(ns);
1684 	return 0;
1685 
1686 }
1687 
1688 static uint32_t
1689 nvmf_subsystem_get_next_allocated_nsid(struct spdk_nvmf_subsystem *subsystem,
1690 				       uint32_t prev_nsid)
1691 {
1692 	uint32_t nsid;
1693 
1694 	if (prev_nsid >= subsystem->max_nsid) {
1695 		return 0;
1696 	}
1697 
1698 	for (nsid = prev_nsid + 1; nsid <= subsystem->max_nsid; nsid++) {
1699 		if (subsystem->ns[nsid - 1]) {
1700 			return nsid;
1701 		}
1702 	}
1703 
1704 	return 0;
1705 }
1706 
1707 struct spdk_nvmf_ns *
1708 spdk_nvmf_subsystem_get_first_ns(struct spdk_nvmf_subsystem *subsystem)
1709 {
1710 	uint32_t first_nsid;
1711 
1712 	first_nsid = nvmf_subsystem_get_next_allocated_nsid(subsystem, 0);
1713 	return _nvmf_subsystem_get_ns(subsystem, first_nsid);
1714 }
1715 
1716 struct spdk_nvmf_ns *
1717 spdk_nvmf_subsystem_get_next_ns(struct spdk_nvmf_subsystem *subsystem,
1718 				struct spdk_nvmf_ns *prev_ns)
1719 {
1720 	uint32_t next_nsid;
1721 
1722 	next_nsid = nvmf_subsystem_get_next_allocated_nsid(subsystem, prev_ns->opts.nsid);
1723 	return _nvmf_subsystem_get_ns(subsystem, next_nsid);
1724 }
1725 
1726 struct spdk_nvmf_ns *
1727 spdk_nvmf_subsystem_get_ns(struct spdk_nvmf_subsystem *subsystem, uint32_t nsid)
1728 {
1729 	return _nvmf_subsystem_get_ns(subsystem, nsid);
1730 }
1731 
1732 uint32_t
1733 spdk_nvmf_ns_get_id(const struct spdk_nvmf_ns *ns)
1734 {
1735 	return ns->opts.nsid;
1736 }
1737 
1738 struct spdk_bdev *
1739 spdk_nvmf_ns_get_bdev(struct spdk_nvmf_ns *ns)
1740 {
1741 	return ns->bdev;
1742 }
1743 
1744 void
1745 spdk_nvmf_ns_get_opts(const struct spdk_nvmf_ns *ns, struct spdk_nvmf_ns_opts *opts,
1746 		      size_t opts_size)
1747 {
1748 	memset(opts, 0, opts_size);
1749 	memcpy(opts, &ns->opts, spdk_min(sizeof(ns->opts), opts_size));
1750 }
1751 
1752 const char *
1753 spdk_nvmf_subsystem_get_sn(const struct spdk_nvmf_subsystem *subsystem)
1754 {
1755 	return subsystem->sn;
1756 }
1757 
1758 int
1759 spdk_nvmf_subsystem_set_sn(struct spdk_nvmf_subsystem *subsystem, const char *sn)
1760 {
1761 	size_t len, max_len;
1762 
1763 	max_len = sizeof(subsystem->sn) - 1;
1764 	len = strlen(sn);
1765 	if (len > max_len) {
1766 		SPDK_DEBUGLOG(nvmf, "Invalid sn \"%s\": length %zu > max %zu\n",
1767 			      sn, len, max_len);
1768 		return -1;
1769 	}
1770 
1771 	if (!nvmf_valid_ascii_string(sn, len)) {
1772 		SPDK_DEBUGLOG(nvmf, "Non-ASCII sn\n");
1773 		SPDK_LOGDUMP(nvmf, "sn", sn, len);
1774 		return -1;
1775 	}
1776 
1777 	snprintf(subsystem->sn, sizeof(subsystem->sn), "%s", sn);
1778 
1779 	return 0;
1780 }
1781 
1782 const char *
1783 spdk_nvmf_subsystem_get_mn(const struct spdk_nvmf_subsystem *subsystem)
1784 {
1785 	return subsystem->mn;
1786 }
1787 
1788 int
1789 spdk_nvmf_subsystem_set_mn(struct spdk_nvmf_subsystem *subsystem, const char *mn)
1790 {
1791 	size_t len, max_len;
1792 
1793 	if (mn == NULL) {
1794 		mn = MODEL_NUMBER_DEFAULT;
1795 	}
1796 	max_len = sizeof(subsystem->mn) - 1;
1797 	len = strlen(mn);
1798 	if (len > max_len) {
1799 		SPDK_DEBUGLOG(nvmf, "Invalid mn \"%s\": length %zu > max %zu\n",
1800 			      mn, len, max_len);
1801 		return -1;
1802 	}
1803 
1804 	if (!nvmf_valid_ascii_string(mn, len)) {
1805 		SPDK_DEBUGLOG(nvmf, "Non-ASCII mn\n");
1806 		SPDK_LOGDUMP(nvmf, "mn", mn, len);
1807 		return -1;
1808 	}
1809 
1810 	snprintf(subsystem->mn, sizeof(subsystem->mn), "%s", mn);
1811 
1812 	return 0;
1813 }
1814 
1815 const char *
1816 spdk_nvmf_subsystem_get_nqn(const struct spdk_nvmf_subsystem *subsystem)
1817 {
1818 	return subsystem->subnqn;
1819 }
1820 
1821 enum spdk_nvmf_subtype spdk_nvmf_subsystem_get_type(struct spdk_nvmf_subsystem *subsystem)
1822 {
1823 	return subsystem->subtype;
1824 }
1825 
1826 uint32_t
1827 spdk_nvmf_subsystem_get_max_nsid(struct spdk_nvmf_subsystem *subsystem)
1828 {
1829 	return subsystem->max_nsid;
1830 }
1831 
1832 int
1833 nvmf_subsystem_set_cntlid_range(struct spdk_nvmf_subsystem *subsystem,
1834 				uint16_t min_cntlid, uint16_t max_cntlid)
1835 {
1836 	if (subsystem->state != SPDK_NVMF_SUBSYSTEM_INACTIVE) {
1837 		return -EAGAIN;
1838 	}
1839 
1840 	if (min_cntlid > max_cntlid) {
1841 		return -EINVAL;
1842 	}
1843 	/* The spec reserves cntlid values in the range FFF0h to FFFFh. */
1844 	if (min_cntlid < NVMF_MIN_CNTLID || min_cntlid > NVMF_MAX_CNTLID ||
1845 	    max_cntlid < NVMF_MIN_CNTLID || max_cntlid > NVMF_MAX_CNTLID) {
1846 		return -EINVAL;
1847 	}
1848 	subsystem->min_cntlid = min_cntlid;
1849 	subsystem->max_cntlid = max_cntlid;
1850 	if (subsystem->next_cntlid < min_cntlid || subsystem->next_cntlid > max_cntlid - 1) {
1851 		subsystem->next_cntlid = min_cntlid - 1;
1852 	}
1853 
1854 	return 0;
1855 }
1856 
1857 static uint16_t
1858 nvmf_subsystem_gen_cntlid(struct spdk_nvmf_subsystem *subsystem)
1859 {
1860 	int count;
1861 
1862 	/*
1863 	 * In the worst case, we might have to try all CNTLID values between min_cntlid and max_cntlid
1864 	 * before we find one that is unused (or find that all values are in use).
1865 	 */
1866 	for (count = 0; count < subsystem->max_cntlid - subsystem->min_cntlid + 1; count++) {
1867 		subsystem->next_cntlid++;
1868 		if (subsystem->next_cntlid > subsystem->max_cntlid) {
1869 			subsystem->next_cntlid = subsystem->min_cntlid;
1870 		}
1871 
1872 		/* Check if a controller with this cntlid currently exists. */
1873 		if (nvmf_subsystem_get_ctrlr(subsystem, subsystem->next_cntlid) == NULL) {
1874 			/* Found unused cntlid */
1875 			return subsystem->next_cntlid;
1876 		}
1877 	}
1878 
1879 	/* All valid cntlid values are in use. */
1880 	return 0xFFFF;
1881 }
1882 
1883 int
1884 nvmf_subsystem_add_ctrlr(struct spdk_nvmf_subsystem *subsystem, struct spdk_nvmf_ctrlr *ctrlr)
1885 {
1886 	ctrlr->cntlid = nvmf_subsystem_gen_cntlid(subsystem);
1887 	if (ctrlr->cntlid == 0xFFFF) {
1888 		/* Unable to get a cntlid */
1889 		SPDK_ERRLOG("Reached max simultaneous ctrlrs\n");
1890 		return -EBUSY;
1891 	}
1892 
1893 	TAILQ_INSERT_TAIL(&subsystem->ctrlrs, ctrlr, link);
1894 
1895 	return 0;
1896 }
1897 
1898 void
1899 nvmf_subsystem_remove_ctrlr(struct spdk_nvmf_subsystem *subsystem,
1900 			    struct spdk_nvmf_ctrlr *ctrlr)
1901 {
1902 	assert(spdk_get_thread() == subsystem->thread);
1903 	assert(subsystem == ctrlr->subsys);
1904 	SPDK_DEBUGLOG(nvmf, "remove ctrlr %p from subsys %p %s\n", ctrlr, subsystem, subsystem->subnqn);
1905 	TAILQ_REMOVE(&subsystem->ctrlrs, ctrlr, link);
1906 }
1907 
1908 struct spdk_nvmf_ctrlr *
1909 nvmf_subsystem_get_ctrlr(struct spdk_nvmf_subsystem *subsystem, uint16_t cntlid)
1910 {
1911 	struct spdk_nvmf_ctrlr *ctrlr;
1912 
1913 	TAILQ_FOREACH(ctrlr, &subsystem->ctrlrs, link) {
1914 		if (ctrlr->cntlid == cntlid) {
1915 			return ctrlr;
1916 		}
1917 	}
1918 
1919 	return NULL;
1920 }
1921 
1922 uint32_t
1923 spdk_nvmf_subsystem_get_max_namespaces(const struct spdk_nvmf_subsystem *subsystem)
1924 {
1925 	return subsystem->max_nsid;
1926 }
1927 
1928 uint16_t
1929 spdk_nvmf_subsystem_get_min_cntlid(const struct spdk_nvmf_subsystem *subsystem)
1930 {
1931 	return subsystem->min_cntlid;
1932 }
1933 
1934 uint16_t
1935 spdk_nvmf_subsystem_get_max_cntlid(const struct spdk_nvmf_subsystem *subsystem)
1936 {
1937 	return subsystem->max_cntlid;
1938 }
1939 
1940 struct _nvmf_ns_registrant {
1941 	uint64_t		rkey;
1942 	char			*host_uuid;
1943 };
1944 
1945 struct _nvmf_ns_registrants {
1946 	size_t				num_regs;
1947 	struct _nvmf_ns_registrant	reg[SPDK_NVMF_MAX_NUM_REGISTRANTS];
1948 };
1949 
1950 struct _nvmf_ns_reservation {
1951 	bool					ptpl_activated;
1952 	enum spdk_nvme_reservation_type		rtype;
1953 	uint64_t				crkey;
1954 	char					*bdev_uuid;
1955 	char					*holder_uuid;
1956 	struct _nvmf_ns_registrants		regs;
1957 };
1958 
1959 static const struct spdk_json_object_decoder nvmf_ns_pr_reg_decoders[] = {
1960 	{"rkey", offsetof(struct _nvmf_ns_registrant, rkey), spdk_json_decode_uint64},
1961 	{"host_uuid", offsetof(struct _nvmf_ns_registrant, host_uuid), spdk_json_decode_string},
1962 };
1963 
1964 static int
1965 nvmf_decode_ns_pr_reg(const struct spdk_json_val *val, void *out)
1966 {
1967 	struct _nvmf_ns_registrant *reg = out;
1968 
1969 	return spdk_json_decode_object(val, nvmf_ns_pr_reg_decoders,
1970 				       SPDK_COUNTOF(nvmf_ns_pr_reg_decoders), reg);
1971 }
1972 
1973 static int
1974 nvmf_decode_ns_pr_regs(const struct spdk_json_val *val, void *out)
1975 {
1976 	struct _nvmf_ns_registrants *regs = out;
1977 
1978 	return spdk_json_decode_array(val, nvmf_decode_ns_pr_reg, regs->reg,
1979 				      SPDK_NVMF_MAX_NUM_REGISTRANTS, &regs->num_regs,
1980 				      sizeof(struct _nvmf_ns_registrant));
1981 }
1982 
1983 static const struct spdk_json_object_decoder nvmf_ns_pr_decoders[] = {
1984 	{"ptpl", offsetof(struct _nvmf_ns_reservation, ptpl_activated), spdk_json_decode_bool, true},
1985 	{"rtype", offsetof(struct _nvmf_ns_reservation, rtype), spdk_json_decode_uint32, true},
1986 	{"crkey", offsetof(struct _nvmf_ns_reservation, crkey), spdk_json_decode_uint64, true},
1987 	{"bdev_uuid", offsetof(struct _nvmf_ns_reservation, bdev_uuid), spdk_json_decode_string},
1988 	{"holder_uuid", offsetof(struct _nvmf_ns_reservation, holder_uuid), spdk_json_decode_string, true},
1989 	{"registrants", offsetof(struct _nvmf_ns_reservation, regs), nvmf_decode_ns_pr_regs},
1990 };
1991 
1992 static int
1993 nvmf_ns_load_reservation(const char *file, struct spdk_nvmf_reservation_info *info)
1994 {
1995 	FILE *fd;
1996 	size_t json_size;
1997 	ssize_t values_cnt, rc;
1998 	void *json = NULL, *end;
1999 	struct spdk_json_val *values = NULL;
2000 	struct _nvmf_ns_reservation res = {};
2001 	uint32_t i;
2002 
2003 	fd = fopen(file, "r");
2004 	/* It's not an error if the file does not exist */
2005 	if (!fd) {
2006 		SPDK_NOTICELOG("File %s does not exist\n", file);
2007 		return -ENOENT;
2008 	}
2009 
2010 	/* Load all persist file contents into a local buffer */
2011 	json = spdk_posix_file_load(fd, &json_size);
2012 	fclose(fd);
2013 	if (!json) {
2014 		SPDK_ERRLOG("Load persit file %s failed\n", file);
2015 		return -ENOMEM;
2016 	}
2017 
2018 	rc = spdk_json_parse(json, json_size, NULL, 0, &end, 0);
2019 	if (rc < 0) {
2020 		SPDK_NOTICELOG("Parsing JSON configuration failed (%zd)\n", rc);
2021 		goto exit;
2022 	}
2023 
2024 	values_cnt = rc;
2025 	values = calloc(values_cnt, sizeof(struct spdk_json_val));
2026 	if (values == NULL) {
2027 		goto exit;
2028 	}
2029 
2030 	rc = spdk_json_parse(json, json_size, values, values_cnt, &end, 0);
2031 	if (rc != values_cnt) {
2032 		SPDK_ERRLOG("Parsing JSON configuration failed (%zd)\n", rc);
2033 		goto exit;
2034 	}
2035 
2036 	/* Decode json */
2037 	if (spdk_json_decode_object(values, nvmf_ns_pr_decoders,
2038 				    SPDK_COUNTOF(nvmf_ns_pr_decoders),
2039 				    &res)) {
2040 		SPDK_ERRLOG("Invalid objects in the persist file %s\n", file);
2041 		rc = -EINVAL;
2042 		goto exit;
2043 	}
2044 
2045 	if (res.regs.num_regs > SPDK_NVMF_MAX_NUM_REGISTRANTS) {
2046 		SPDK_ERRLOG("Can only support up to %u registrants\n", SPDK_NVMF_MAX_NUM_REGISTRANTS);
2047 		rc = -ERANGE;
2048 		goto exit;
2049 	}
2050 
2051 	rc = 0;
2052 	info->ptpl_activated = res.ptpl_activated;
2053 	info->rtype = res.rtype;
2054 	info->crkey = res.crkey;
2055 	snprintf(info->bdev_uuid, sizeof(info->bdev_uuid), "%s", res.bdev_uuid);
2056 	snprintf(info->holder_uuid, sizeof(info->holder_uuid), "%s", res.holder_uuid);
2057 	info->num_regs = res.regs.num_regs;
2058 	for (i = 0; i < res.regs.num_regs; i++) {
2059 		info->registrants[i].rkey = res.regs.reg[i].rkey;
2060 		snprintf(info->registrants[i].host_uuid, sizeof(info->registrants[i].host_uuid), "%s",
2061 			 res.regs.reg[i].host_uuid);
2062 	}
2063 
2064 exit:
2065 	free(json);
2066 	free(values);
2067 	free(res.bdev_uuid);
2068 	free(res.holder_uuid);
2069 	for (i = 0; i < res.regs.num_regs; i++) {
2070 		free(res.regs.reg[i].host_uuid);
2071 	}
2072 
2073 	return rc;
2074 }
2075 
2076 static bool
2077 nvmf_ns_reservation_all_registrants_type(struct spdk_nvmf_ns *ns);
2078 
2079 static int
2080 nvmf_ns_reservation_restore(struct spdk_nvmf_ns *ns, struct spdk_nvmf_reservation_info *info)
2081 {
2082 	uint32_t i;
2083 	struct spdk_nvmf_registrant *reg, *holder = NULL;
2084 	struct spdk_uuid bdev_uuid, holder_uuid;
2085 
2086 	SPDK_DEBUGLOG(nvmf, "NSID %u, PTPL %u, Number of registrants %u\n",
2087 		      ns->nsid, info->ptpl_activated, info->num_regs);
2088 
2089 	/* it's not an error */
2090 	if (!info->ptpl_activated || !info->num_regs) {
2091 		return 0;
2092 	}
2093 
2094 	spdk_uuid_parse(&bdev_uuid, info->bdev_uuid);
2095 	if (spdk_uuid_compare(&bdev_uuid, spdk_bdev_get_uuid(ns->bdev))) {
2096 		SPDK_ERRLOG("Existing bdev UUID is not same with configuration file\n");
2097 		return -EINVAL;
2098 	}
2099 
2100 	ns->crkey = info->crkey;
2101 	ns->rtype = info->rtype;
2102 	ns->ptpl_activated = info->ptpl_activated;
2103 	spdk_uuid_parse(&holder_uuid, info->holder_uuid);
2104 
2105 	SPDK_DEBUGLOG(nvmf, "Bdev UUID %s\n", info->bdev_uuid);
2106 	if (info->rtype) {
2107 		SPDK_DEBUGLOG(nvmf, "Holder UUID %s, RTYPE %u, RKEY 0x%"PRIx64"\n",
2108 			      info->holder_uuid, info->rtype, info->crkey);
2109 	}
2110 
2111 	for (i = 0; i < info->num_regs; i++) {
2112 		reg = calloc(1, sizeof(*reg));
2113 		if (!reg) {
2114 			return -ENOMEM;
2115 		}
2116 		spdk_uuid_parse(&reg->hostid, info->registrants[i].host_uuid);
2117 		reg->rkey = info->registrants[i].rkey;
2118 		TAILQ_INSERT_TAIL(&ns->registrants, reg, link);
2119 		if (!spdk_uuid_compare(&holder_uuid, &reg->hostid)) {
2120 			holder = reg;
2121 		}
2122 		SPDK_DEBUGLOG(nvmf, "Registrant RKEY 0x%"PRIx64", Host UUID %s\n",
2123 			      info->registrants[i].rkey, info->registrants[i].host_uuid);
2124 	}
2125 
2126 	if (nvmf_ns_reservation_all_registrants_type(ns)) {
2127 		ns->holder = TAILQ_FIRST(&ns->registrants);
2128 	} else {
2129 		ns->holder = holder;
2130 	}
2131 
2132 	return 0;
2133 }
2134 
2135 static int
2136 nvmf_ns_json_write_cb(void *cb_ctx, const void *data, size_t size)
2137 {
2138 	char *file = cb_ctx;
2139 	size_t rc;
2140 	FILE *fd;
2141 
2142 	fd = fopen(file, "w");
2143 	if (!fd) {
2144 		SPDK_ERRLOG("Can't open file %s for write\n", file);
2145 		return -ENOENT;
2146 	}
2147 	rc = fwrite(data, 1, size, fd);
2148 	fclose(fd);
2149 
2150 	return rc == size ? 0 : -1;
2151 }
2152 
2153 static int
2154 nvmf_ns_reservation_update(const char *file, struct spdk_nvmf_reservation_info *info)
2155 {
2156 	struct spdk_json_write_ctx *w;
2157 	uint32_t i;
2158 	int rc = 0;
2159 
2160 	w = spdk_json_write_begin(nvmf_ns_json_write_cb, (void *)file, 0);
2161 	if (w == NULL) {
2162 		return -ENOMEM;
2163 	}
2164 	/* clear the configuration file */
2165 	if (!info->ptpl_activated) {
2166 		goto exit;
2167 	}
2168 
2169 	spdk_json_write_object_begin(w);
2170 	spdk_json_write_named_bool(w, "ptpl", info->ptpl_activated);
2171 	spdk_json_write_named_uint32(w, "rtype", info->rtype);
2172 	spdk_json_write_named_uint64(w, "crkey", info->crkey);
2173 	spdk_json_write_named_string(w, "bdev_uuid", info->bdev_uuid);
2174 	spdk_json_write_named_string(w, "holder_uuid", info->holder_uuid);
2175 
2176 	spdk_json_write_named_array_begin(w, "registrants");
2177 	for (i = 0; i < info->num_regs; i++) {
2178 		spdk_json_write_object_begin(w);
2179 		spdk_json_write_named_uint64(w, "rkey", info->registrants[i].rkey);
2180 		spdk_json_write_named_string(w, "host_uuid", info->registrants[i].host_uuid);
2181 		spdk_json_write_object_end(w);
2182 	}
2183 	spdk_json_write_array_end(w);
2184 	spdk_json_write_object_end(w);
2185 
2186 exit:
2187 	rc = spdk_json_write_end(w);
2188 	return rc;
2189 }
2190 
2191 static int
2192 nvmf_ns_update_reservation_info(struct spdk_nvmf_ns *ns)
2193 {
2194 	struct spdk_nvmf_reservation_info info;
2195 	struct spdk_nvmf_registrant *reg, *tmp;
2196 	uint32_t i = 0;
2197 
2198 	assert(ns != NULL);
2199 
2200 	if (!ns->bdev || !ns->ptpl_file) {
2201 		return 0;
2202 	}
2203 
2204 	memset(&info, 0, sizeof(info));
2205 	spdk_uuid_fmt_lower(info.bdev_uuid, sizeof(info.bdev_uuid), spdk_bdev_get_uuid(ns->bdev));
2206 
2207 	if (ns->rtype) {
2208 		info.rtype = ns->rtype;
2209 		info.crkey = ns->crkey;
2210 		if (!nvmf_ns_reservation_all_registrants_type(ns)) {
2211 			assert(ns->holder != NULL);
2212 			spdk_uuid_fmt_lower(info.holder_uuid, sizeof(info.holder_uuid), &ns->holder->hostid);
2213 		}
2214 	}
2215 
2216 	TAILQ_FOREACH_SAFE(reg, &ns->registrants, link, tmp) {
2217 		spdk_uuid_fmt_lower(info.registrants[i].host_uuid, sizeof(info.registrants[i].host_uuid),
2218 				    &reg->hostid);
2219 		info.registrants[i++].rkey = reg->rkey;
2220 	}
2221 
2222 	info.num_regs = i;
2223 	info.ptpl_activated = ns->ptpl_activated;
2224 
2225 	return nvmf_ns_reservation_update(ns->ptpl_file, &info);
2226 }
2227 
2228 static struct spdk_nvmf_registrant *
2229 nvmf_ns_reservation_get_registrant(struct spdk_nvmf_ns *ns,
2230 				   struct spdk_uuid *uuid)
2231 {
2232 	struct spdk_nvmf_registrant *reg, *tmp;
2233 
2234 	TAILQ_FOREACH_SAFE(reg, &ns->registrants, link, tmp) {
2235 		if (!spdk_uuid_compare(&reg->hostid, uuid)) {
2236 			return reg;
2237 		}
2238 	}
2239 
2240 	return NULL;
2241 }
2242 
2243 /* Generate reservation notice log to registered HostID controllers */
2244 static void
2245 nvmf_subsystem_gen_ctrlr_notification(struct spdk_nvmf_subsystem *subsystem,
2246 				      struct spdk_nvmf_ns *ns,
2247 				      struct spdk_uuid *hostid_list,
2248 				      uint32_t num_hostid,
2249 				      enum spdk_nvme_reservation_notification_log_page_type type)
2250 {
2251 	struct spdk_nvmf_ctrlr *ctrlr;
2252 	uint32_t i;
2253 
2254 	for (i = 0; i < num_hostid; i++) {
2255 		TAILQ_FOREACH(ctrlr, &subsystem->ctrlrs, link) {
2256 			if (!spdk_uuid_compare(&ctrlr->hostid, &hostid_list[i])) {
2257 				nvmf_ctrlr_reservation_notice_log(ctrlr, ns, type);
2258 			}
2259 		}
2260 	}
2261 }
2262 
2263 /* Get all registrants' hostid other than the controller who issued the command */
2264 static uint32_t
2265 nvmf_ns_reservation_get_all_other_hostid(struct spdk_nvmf_ns *ns,
2266 		struct spdk_uuid *hostid_list,
2267 		uint32_t max_num_hostid,
2268 		struct spdk_uuid *current_hostid)
2269 {
2270 	struct spdk_nvmf_registrant *reg, *tmp;
2271 	uint32_t num_hostid = 0;
2272 
2273 	TAILQ_FOREACH_SAFE(reg, &ns->registrants, link, tmp) {
2274 		if (spdk_uuid_compare(&reg->hostid, current_hostid)) {
2275 			if (num_hostid == max_num_hostid) {
2276 				assert(false);
2277 				return max_num_hostid;
2278 			}
2279 			hostid_list[num_hostid++] = reg->hostid;
2280 		}
2281 	}
2282 
2283 	return num_hostid;
2284 }
2285 
2286 /* Calculate the unregistered HostID list according to list
2287  * prior to execute preempt command and list after executing
2288  * preempt command.
2289  */
2290 static uint32_t
2291 nvmf_ns_reservation_get_unregistered_hostid(struct spdk_uuid *old_hostid_list,
2292 		uint32_t old_num_hostid,
2293 		struct spdk_uuid *remaining_hostid_list,
2294 		uint32_t remaining_num_hostid)
2295 {
2296 	struct spdk_uuid temp_hostid_list[SPDK_NVMF_MAX_NUM_REGISTRANTS];
2297 	uint32_t i, j, num_hostid = 0;
2298 	bool found;
2299 
2300 	if (!remaining_num_hostid) {
2301 		return old_num_hostid;
2302 	}
2303 
2304 	for (i = 0; i < old_num_hostid; i++) {
2305 		found = false;
2306 		for (j = 0; j < remaining_num_hostid; j++) {
2307 			if (!spdk_uuid_compare(&old_hostid_list[i], &remaining_hostid_list[j])) {
2308 				found = true;
2309 				break;
2310 			}
2311 		}
2312 		if (!found) {
2313 			spdk_uuid_copy(&temp_hostid_list[num_hostid++], &old_hostid_list[i]);
2314 		}
2315 	}
2316 
2317 	if (num_hostid) {
2318 		memcpy(old_hostid_list, temp_hostid_list, sizeof(struct spdk_uuid) * num_hostid);
2319 	}
2320 
2321 	return num_hostid;
2322 }
2323 
2324 /* current reservation type is all registrants or not */
2325 static bool
2326 nvmf_ns_reservation_all_registrants_type(struct spdk_nvmf_ns *ns)
2327 {
2328 	return (ns->rtype == SPDK_NVME_RESERVE_WRITE_EXCLUSIVE_ALL_REGS ||
2329 		ns->rtype == SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS_ALL_REGS);
2330 }
2331 
2332 /* current registrant is reservation holder or not */
2333 static bool
2334 nvmf_ns_reservation_registrant_is_holder(struct spdk_nvmf_ns *ns,
2335 		struct spdk_nvmf_registrant *reg)
2336 {
2337 	if (!reg) {
2338 		return false;
2339 	}
2340 
2341 	if (nvmf_ns_reservation_all_registrants_type(ns)) {
2342 		return true;
2343 	}
2344 
2345 	return (ns->holder == reg);
2346 }
2347 
2348 static int
2349 nvmf_ns_reservation_add_registrant(struct spdk_nvmf_ns *ns,
2350 				   struct spdk_nvmf_ctrlr *ctrlr,
2351 				   uint64_t nrkey)
2352 {
2353 	struct spdk_nvmf_registrant *reg;
2354 
2355 	reg = calloc(1, sizeof(*reg));
2356 	if (!reg) {
2357 		return -ENOMEM;
2358 	}
2359 
2360 	reg->rkey = nrkey;
2361 	/* set hostid for the registrant */
2362 	spdk_uuid_copy(&reg->hostid, &ctrlr->hostid);
2363 	TAILQ_INSERT_TAIL(&ns->registrants, reg, link);
2364 	ns->gen++;
2365 
2366 	return 0;
2367 }
2368 
2369 static void
2370 nvmf_ns_reservation_release_reservation(struct spdk_nvmf_ns *ns)
2371 {
2372 	ns->rtype = 0;
2373 	ns->crkey = 0;
2374 	ns->holder = NULL;
2375 }
2376 
2377 /* release the reservation if the last registrant was removed */
2378 static void
2379 nvmf_ns_reservation_check_release_on_remove_registrant(struct spdk_nvmf_ns *ns,
2380 		struct spdk_nvmf_registrant *reg)
2381 {
2382 	struct spdk_nvmf_registrant *next_reg;
2383 
2384 	/* no reservation holder */
2385 	if (!ns->holder) {
2386 		assert(ns->rtype == 0);
2387 		return;
2388 	}
2389 
2390 	next_reg = TAILQ_FIRST(&ns->registrants);
2391 	if (next_reg && nvmf_ns_reservation_all_registrants_type(ns)) {
2392 		/* the next valid registrant is the new holder now */
2393 		ns->holder = next_reg;
2394 	} else if (nvmf_ns_reservation_registrant_is_holder(ns, reg)) {
2395 		/* release the reservation */
2396 		nvmf_ns_reservation_release_reservation(ns);
2397 	}
2398 }
2399 
2400 static void
2401 nvmf_ns_reservation_remove_registrant(struct spdk_nvmf_ns *ns,
2402 				      struct spdk_nvmf_registrant *reg)
2403 {
2404 	TAILQ_REMOVE(&ns->registrants, reg, link);
2405 	nvmf_ns_reservation_check_release_on_remove_registrant(ns, reg);
2406 	free(reg);
2407 	ns->gen++;
2408 	return;
2409 }
2410 
2411 static uint32_t
2412 nvmf_ns_reservation_remove_registrants_by_key(struct spdk_nvmf_ns *ns,
2413 		uint64_t rkey)
2414 {
2415 	struct spdk_nvmf_registrant *reg, *tmp;
2416 	uint32_t count = 0;
2417 
2418 	TAILQ_FOREACH_SAFE(reg, &ns->registrants, link, tmp) {
2419 		if (reg->rkey == rkey) {
2420 			nvmf_ns_reservation_remove_registrant(ns, reg);
2421 			count++;
2422 		}
2423 	}
2424 	return count;
2425 }
2426 
2427 static uint32_t
2428 nvmf_ns_reservation_remove_all_other_registrants(struct spdk_nvmf_ns *ns,
2429 		struct spdk_nvmf_registrant *reg)
2430 {
2431 	struct spdk_nvmf_registrant *reg_tmp, *reg_tmp2;
2432 	uint32_t count = 0;
2433 
2434 	TAILQ_FOREACH_SAFE(reg_tmp, &ns->registrants, link, reg_tmp2) {
2435 		if (reg_tmp != reg) {
2436 			nvmf_ns_reservation_remove_registrant(ns, reg_tmp);
2437 			count++;
2438 		}
2439 	}
2440 	return count;
2441 }
2442 
2443 static uint32_t
2444 nvmf_ns_reservation_clear_all_registrants(struct spdk_nvmf_ns *ns)
2445 {
2446 	struct spdk_nvmf_registrant *reg, *reg_tmp;
2447 	uint32_t count = 0;
2448 
2449 	TAILQ_FOREACH_SAFE(reg, &ns->registrants, link, reg_tmp) {
2450 		nvmf_ns_reservation_remove_registrant(ns, reg);
2451 		count++;
2452 	}
2453 	return count;
2454 }
2455 
2456 static void
2457 nvmf_ns_reservation_acquire_reservation(struct spdk_nvmf_ns *ns, uint64_t rkey,
2458 					enum spdk_nvme_reservation_type rtype,
2459 					struct spdk_nvmf_registrant *holder)
2460 {
2461 	ns->rtype = rtype;
2462 	ns->crkey = rkey;
2463 	assert(ns->holder == NULL);
2464 	ns->holder = holder;
2465 }
2466 
2467 static bool
2468 nvmf_ns_reservation_register(struct spdk_nvmf_ns *ns,
2469 			     struct spdk_nvmf_ctrlr *ctrlr,
2470 			     struct spdk_nvmf_request *req)
2471 {
2472 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
2473 	uint8_t rrega, iekey, cptpl, rtype;
2474 	struct spdk_nvme_reservation_register_data key;
2475 	struct spdk_nvmf_registrant *reg;
2476 	uint8_t status = SPDK_NVME_SC_SUCCESS;
2477 	bool update_sgroup = false;
2478 	struct spdk_uuid hostid_list[SPDK_NVMF_MAX_NUM_REGISTRANTS];
2479 	uint32_t num_hostid = 0;
2480 	int rc;
2481 
2482 	rrega = cmd->cdw10_bits.resv_register.rrega;
2483 	iekey = cmd->cdw10_bits.resv_register.iekey;
2484 	cptpl = cmd->cdw10_bits.resv_register.cptpl;
2485 
2486 	if (req->data && req->length >= sizeof(key)) {
2487 		memcpy(&key, req->data, sizeof(key));
2488 	} else {
2489 		SPDK_ERRLOG("No key provided. Failing request.\n");
2490 		status = SPDK_NVME_SC_INVALID_FIELD;
2491 		goto exit;
2492 	}
2493 
2494 	SPDK_DEBUGLOG(nvmf, "REGISTER: RREGA %u, IEKEY %u, CPTPL %u, "
2495 		      "NRKEY 0x%"PRIx64", NRKEY 0x%"PRIx64"\n",
2496 		      rrega, iekey, cptpl, key.crkey, key.nrkey);
2497 
2498 	if (cptpl == SPDK_NVME_RESERVE_PTPL_CLEAR_POWER_ON) {
2499 		/* Ture to OFF state, and need to be updated in the configuration file */
2500 		if (ns->ptpl_activated) {
2501 			ns->ptpl_activated = 0;
2502 			update_sgroup = true;
2503 		}
2504 	} else if (cptpl == SPDK_NVME_RESERVE_PTPL_PERSIST_POWER_LOSS) {
2505 		if (ns->ptpl_file == NULL) {
2506 			status = SPDK_NVME_SC_INVALID_FIELD;
2507 			goto exit;
2508 		} else if (ns->ptpl_activated == 0) {
2509 			ns->ptpl_activated = 1;
2510 			update_sgroup = true;
2511 		}
2512 	}
2513 
2514 	/* current Host Identifier has registrant or not */
2515 	reg = nvmf_ns_reservation_get_registrant(ns, &ctrlr->hostid);
2516 
2517 	switch (rrega) {
2518 	case SPDK_NVME_RESERVE_REGISTER_KEY:
2519 		if (!reg) {
2520 			/* register new controller */
2521 			if (key.nrkey == 0) {
2522 				SPDK_ERRLOG("Can't register zeroed new key\n");
2523 				status = SPDK_NVME_SC_INVALID_FIELD;
2524 				goto exit;
2525 			}
2526 			rc = nvmf_ns_reservation_add_registrant(ns, ctrlr, key.nrkey);
2527 			if (rc < 0) {
2528 				status = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
2529 				goto exit;
2530 			}
2531 			update_sgroup = true;
2532 		} else {
2533 			/* register with same key is not an error */
2534 			if (reg->rkey != key.nrkey) {
2535 				SPDK_ERRLOG("The same host already register a "
2536 					    "key with 0x%"PRIx64"\n",
2537 					    reg->rkey);
2538 				status = SPDK_NVME_SC_RESERVATION_CONFLICT;
2539 				goto exit;
2540 			}
2541 		}
2542 		break;
2543 	case SPDK_NVME_RESERVE_UNREGISTER_KEY:
2544 		if (!reg || (!iekey && reg->rkey != key.crkey)) {
2545 			SPDK_ERRLOG("No registrant or current key doesn't match "
2546 				    "with existing registrant key\n");
2547 			status = SPDK_NVME_SC_RESERVATION_CONFLICT;
2548 			goto exit;
2549 		}
2550 
2551 		rtype = ns->rtype;
2552 		num_hostid = nvmf_ns_reservation_get_all_other_hostid(ns, hostid_list,
2553 				SPDK_NVMF_MAX_NUM_REGISTRANTS,
2554 				&ctrlr->hostid);
2555 
2556 		nvmf_ns_reservation_remove_registrant(ns, reg);
2557 
2558 		if (!ns->rtype && num_hostid && (rtype == SPDK_NVME_RESERVE_WRITE_EXCLUSIVE_REG_ONLY ||
2559 						 rtype == SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS_REG_ONLY)) {
2560 			nvmf_subsystem_gen_ctrlr_notification(ns->subsystem, ns,
2561 							      hostid_list,
2562 							      num_hostid,
2563 							      SPDK_NVME_RESERVATION_RELEASED);
2564 		}
2565 		update_sgroup = true;
2566 		break;
2567 	case SPDK_NVME_RESERVE_REPLACE_KEY:
2568 		if (key.nrkey == 0) {
2569 			SPDK_ERRLOG("Can't register zeroed new key\n");
2570 			status = SPDK_NVME_SC_INVALID_FIELD;
2571 			goto exit;
2572 		}
2573 		/* Registrant exists */
2574 		if (reg) {
2575 			if (!iekey && reg->rkey != key.crkey) {
2576 				SPDK_ERRLOG("Current key doesn't match "
2577 					    "existing registrant key\n");
2578 				status = SPDK_NVME_SC_RESERVATION_CONFLICT;
2579 				goto exit;
2580 			}
2581 			if (reg->rkey == key.nrkey) {
2582 				goto exit;
2583 			}
2584 			reg->rkey = key.nrkey;
2585 		} else if (iekey) { /* No registrant but IEKEY is set */
2586 			/* new registrant */
2587 			rc = nvmf_ns_reservation_add_registrant(ns, ctrlr, key.nrkey);
2588 			if (rc < 0) {
2589 				status = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
2590 				goto exit;
2591 			}
2592 		} else { /* No registrant */
2593 			SPDK_ERRLOG("No registrant\n");
2594 			status = SPDK_NVME_SC_RESERVATION_CONFLICT;
2595 			goto exit;
2596 
2597 		}
2598 		update_sgroup = true;
2599 		break;
2600 	default:
2601 		status = SPDK_NVME_SC_INVALID_FIELD;
2602 		goto exit;
2603 	}
2604 
2605 exit:
2606 	if (update_sgroup) {
2607 		rc = nvmf_ns_update_reservation_info(ns);
2608 		if (rc != 0) {
2609 			status = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
2610 		}
2611 	}
2612 	req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
2613 	req->rsp->nvme_cpl.status.sc = status;
2614 	return update_sgroup;
2615 }
2616 
2617 static bool
2618 nvmf_ns_reservation_acquire(struct spdk_nvmf_ns *ns,
2619 			    struct spdk_nvmf_ctrlr *ctrlr,
2620 			    struct spdk_nvmf_request *req)
2621 {
2622 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
2623 	uint8_t racqa, iekey, rtype;
2624 	struct spdk_nvme_reservation_acquire_data key;
2625 	struct spdk_nvmf_registrant *reg;
2626 	bool all_regs = false;
2627 	uint32_t count = 0;
2628 	bool update_sgroup = true;
2629 	struct spdk_uuid hostid_list[SPDK_NVMF_MAX_NUM_REGISTRANTS];
2630 	uint32_t num_hostid = 0;
2631 	struct spdk_uuid new_hostid_list[SPDK_NVMF_MAX_NUM_REGISTRANTS];
2632 	uint32_t new_num_hostid = 0;
2633 	bool reservation_released = false;
2634 	uint8_t status = SPDK_NVME_SC_SUCCESS;
2635 
2636 	racqa = cmd->cdw10_bits.resv_acquire.racqa;
2637 	iekey = cmd->cdw10_bits.resv_acquire.iekey;
2638 	rtype = cmd->cdw10_bits.resv_acquire.rtype;
2639 
2640 	if (req->data && req->length >= sizeof(key)) {
2641 		memcpy(&key, req->data, sizeof(key));
2642 	} else {
2643 		SPDK_ERRLOG("No key provided. Failing request.\n");
2644 		status = SPDK_NVME_SC_INVALID_FIELD;
2645 		goto exit;
2646 	}
2647 
2648 	SPDK_DEBUGLOG(nvmf, "ACQUIRE: RACQA %u, IEKEY %u, RTYPE %u, "
2649 		      "NRKEY 0x%"PRIx64", PRKEY 0x%"PRIx64"\n",
2650 		      racqa, iekey, rtype, key.crkey, key.prkey);
2651 
2652 	if (iekey || rtype > SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS_ALL_REGS) {
2653 		SPDK_ERRLOG("Ignore existing key field set to 1\n");
2654 		status = SPDK_NVME_SC_INVALID_FIELD;
2655 		update_sgroup = false;
2656 		goto exit;
2657 	}
2658 
2659 	reg = nvmf_ns_reservation_get_registrant(ns, &ctrlr->hostid);
2660 	/* must be registrant and CRKEY must match */
2661 	if (!reg || reg->rkey != key.crkey) {
2662 		SPDK_ERRLOG("No registrant or current key doesn't match "
2663 			    "with existing registrant key\n");
2664 		status = SPDK_NVME_SC_RESERVATION_CONFLICT;
2665 		update_sgroup = false;
2666 		goto exit;
2667 	}
2668 
2669 	all_regs = nvmf_ns_reservation_all_registrants_type(ns);
2670 
2671 	switch (racqa) {
2672 	case SPDK_NVME_RESERVE_ACQUIRE:
2673 		/* it's not an error for the holder to acquire same reservation type again */
2674 		if (nvmf_ns_reservation_registrant_is_holder(ns, reg) && ns->rtype == rtype) {
2675 			/* do nothing */
2676 			update_sgroup = false;
2677 		} else if (ns->holder == NULL) {
2678 			/* fisrt time to acquire the reservation */
2679 			nvmf_ns_reservation_acquire_reservation(ns, key.crkey, rtype, reg);
2680 		} else {
2681 			SPDK_ERRLOG("Invalid rtype or current registrant is not holder\n");
2682 			status = SPDK_NVME_SC_RESERVATION_CONFLICT;
2683 			update_sgroup = false;
2684 			goto exit;
2685 		}
2686 		break;
2687 	case SPDK_NVME_RESERVE_PREEMPT:
2688 		/* no reservation holder */
2689 		if (!ns->holder) {
2690 			/* unregister with PRKEY */
2691 			nvmf_ns_reservation_remove_registrants_by_key(ns, key.prkey);
2692 			break;
2693 		}
2694 		num_hostid = nvmf_ns_reservation_get_all_other_hostid(ns, hostid_list,
2695 				SPDK_NVMF_MAX_NUM_REGISTRANTS,
2696 				&ctrlr->hostid);
2697 
2698 		/* only 1 reservation holder and reservation key is valid */
2699 		if (!all_regs) {
2700 			/* preempt itself */
2701 			if (nvmf_ns_reservation_registrant_is_holder(ns, reg) &&
2702 			    ns->crkey == key.prkey) {
2703 				ns->rtype = rtype;
2704 				reservation_released = true;
2705 				break;
2706 			}
2707 
2708 			if (ns->crkey == key.prkey) {
2709 				nvmf_ns_reservation_remove_registrant(ns, ns->holder);
2710 				nvmf_ns_reservation_acquire_reservation(ns, key.crkey, rtype, reg);
2711 				reservation_released = true;
2712 			} else if (key.prkey != 0) {
2713 				nvmf_ns_reservation_remove_registrants_by_key(ns, key.prkey);
2714 			} else {
2715 				/* PRKEY is zero */
2716 				SPDK_ERRLOG("Current PRKEY is zero\n");
2717 				status = SPDK_NVME_SC_RESERVATION_CONFLICT;
2718 				update_sgroup = false;
2719 				goto exit;
2720 			}
2721 		} else {
2722 			/* release all other registrants except for the current one */
2723 			if (key.prkey == 0) {
2724 				nvmf_ns_reservation_remove_all_other_registrants(ns, reg);
2725 				assert(ns->holder == reg);
2726 			} else {
2727 				count = nvmf_ns_reservation_remove_registrants_by_key(ns, key.prkey);
2728 				if (count == 0) {
2729 					SPDK_ERRLOG("PRKEY doesn't match any registrant\n");
2730 					status = SPDK_NVME_SC_RESERVATION_CONFLICT;
2731 					update_sgroup = false;
2732 					goto exit;
2733 				}
2734 			}
2735 		}
2736 		break;
2737 	default:
2738 		status = SPDK_NVME_SC_INVALID_FIELD;
2739 		update_sgroup = false;
2740 		break;
2741 	}
2742 
2743 exit:
2744 	if (update_sgroup && racqa == SPDK_NVME_RESERVE_PREEMPT) {
2745 		new_num_hostid = nvmf_ns_reservation_get_all_other_hostid(ns, new_hostid_list,
2746 				 SPDK_NVMF_MAX_NUM_REGISTRANTS,
2747 				 &ctrlr->hostid);
2748 		/* Preempt notification occurs on the unregistered controllers
2749 		 * other than the controller who issued the command.
2750 		 */
2751 		num_hostid = nvmf_ns_reservation_get_unregistered_hostid(hostid_list,
2752 				num_hostid,
2753 				new_hostid_list,
2754 				new_num_hostid);
2755 		if (num_hostid) {
2756 			nvmf_subsystem_gen_ctrlr_notification(ns->subsystem, ns,
2757 							      hostid_list,
2758 							      num_hostid,
2759 							      SPDK_NVME_REGISTRATION_PREEMPTED);
2760 
2761 		}
2762 		/* Reservation released notification occurs on the
2763 		 * controllers which are the remaining registrants other than
2764 		 * the controller who issued the command.
2765 		 */
2766 		if (reservation_released && new_num_hostid) {
2767 			nvmf_subsystem_gen_ctrlr_notification(ns->subsystem, ns,
2768 							      new_hostid_list,
2769 							      new_num_hostid,
2770 							      SPDK_NVME_RESERVATION_RELEASED);
2771 
2772 		}
2773 	}
2774 	if (update_sgroup && ns->ptpl_activated) {
2775 		if (nvmf_ns_update_reservation_info(ns)) {
2776 			status = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
2777 		}
2778 	}
2779 	req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
2780 	req->rsp->nvme_cpl.status.sc = status;
2781 	return update_sgroup;
2782 }
2783 
2784 static bool
2785 nvmf_ns_reservation_release(struct spdk_nvmf_ns *ns,
2786 			    struct spdk_nvmf_ctrlr *ctrlr,
2787 			    struct spdk_nvmf_request *req)
2788 {
2789 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
2790 	uint8_t rrela, iekey, rtype;
2791 	struct spdk_nvmf_registrant *reg;
2792 	uint64_t crkey;
2793 	uint8_t status = SPDK_NVME_SC_SUCCESS;
2794 	bool update_sgroup = true;
2795 	struct spdk_uuid hostid_list[SPDK_NVMF_MAX_NUM_REGISTRANTS];
2796 	uint32_t num_hostid = 0;
2797 
2798 	rrela = cmd->cdw10_bits.resv_release.rrela;
2799 	iekey = cmd->cdw10_bits.resv_release.iekey;
2800 	rtype = cmd->cdw10_bits.resv_release.rtype;
2801 
2802 	if (req->data && req->length >= sizeof(crkey)) {
2803 		memcpy(&crkey, req->data, sizeof(crkey));
2804 	} else {
2805 		SPDK_ERRLOG("No key provided. Failing request.\n");
2806 		status = SPDK_NVME_SC_INVALID_FIELD;
2807 		goto exit;
2808 	}
2809 
2810 	SPDK_DEBUGLOG(nvmf, "RELEASE: RRELA %u, IEKEY %u, RTYPE %u, "
2811 		      "CRKEY 0x%"PRIx64"\n",  rrela, iekey, rtype, crkey);
2812 
2813 	if (iekey) {
2814 		SPDK_ERRLOG("Ignore existing key field set to 1\n");
2815 		status = SPDK_NVME_SC_INVALID_FIELD;
2816 		update_sgroup = false;
2817 		goto exit;
2818 	}
2819 
2820 	reg = nvmf_ns_reservation_get_registrant(ns, &ctrlr->hostid);
2821 	if (!reg || reg->rkey != crkey) {
2822 		SPDK_ERRLOG("No registrant or current key doesn't match "
2823 			    "with existing registrant key\n");
2824 		status = SPDK_NVME_SC_RESERVATION_CONFLICT;
2825 		update_sgroup = false;
2826 		goto exit;
2827 	}
2828 
2829 	num_hostid = nvmf_ns_reservation_get_all_other_hostid(ns, hostid_list,
2830 			SPDK_NVMF_MAX_NUM_REGISTRANTS,
2831 			&ctrlr->hostid);
2832 
2833 	switch (rrela) {
2834 	case SPDK_NVME_RESERVE_RELEASE:
2835 		if (!ns->holder) {
2836 			SPDK_DEBUGLOG(nvmf, "RELEASE: no holder\n");
2837 			update_sgroup = false;
2838 			goto exit;
2839 		}
2840 		if (ns->rtype != rtype) {
2841 			SPDK_ERRLOG("Type doesn't match\n");
2842 			status = SPDK_NVME_SC_INVALID_FIELD;
2843 			update_sgroup = false;
2844 			goto exit;
2845 		}
2846 		if (!nvmf_ns_reservation_registrant_is_holder(ns, reg)) {
2847 			/* not the reservation holder, this isn't an error */
2848 			update_sgroup = false;
2849 			goto exit;
2850 		}
2851 
2852 		rtype = ns->rtype;
2853 		nvmf_ns_reservation_release_reservation(ns);
2854 
2855 		if (num_hostid && rtype != SPDK_NVME_RESERVE_WRITE_EXCLUSIVE &&
2856 		    rtype != SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS) {
2857 			nvmf_subsystem_gen_ctrlr_notification(ns->subsystem, ns,
2858 							      hostid_list,
2859 							      num_hostid,
2860 							      SPDK_NVME_RESERVATION_RELEASED);
2861 		}
2862 		break;
2863 	case SPDK_NVME_RESERVE_CLEAR:
2864 		nvmf_ns_reservation_clear_all_registrants(ns);
2865 		if (num_hostid) {
2866 			nvmf_subsystem_gen_ctrlr_notification(ns->subsystem, ns,
2867 							      hostid_list,
2868 							      num_hostid,
2869 							      SPDK_NVME_RESERVATION_PREEMPTED);
2870 		}
2871 		break;
2872 	default:
2873 		status = SPDK_NVME_SC_INVALID_FIELD;
2874 		update_sgroup = false;
2875 		goto exit;
2876 	}
2877 
2878 exit:
2879 	if (update_sgroup && ns->ptpl_activated) {
2880 		if (nvmf_ns_update_reservation_info(ns)) {
2881 			status = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
2882 		}
2883 	}
2884 	req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
2885 	req->rsp->nvme_cpl.status.sc = status;
2886 	return update_sgroup;
2887 }
2888 
2889 static void
2890 nvmf_ns_reservation_report(struct spdk_nvmf_ns *ns,
2891 			   struct spdk_nvmf_ctrlr *ctrlr,
2892 			   struct spdk_nvmf_request *req)
2893 {
2894 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
2895 	struct spdk_nvmf_registrant *reg, *tmp;
2896 	struct spdk_nvme_reservation_status_extended_data *status_data;
2897 	struct spdk_nvme_registered_ctrlr_extended_data *ctrlr_data;
2898 	uint8_t *payload;
2899 	uint32_t transfer_len, payload_len = 0;
2900 	uint32_t regctl = 0;
2901 	uint8_t status = SPDK_NVME_SC_SUCCESS;
2902 
2903 	if (req->data == NULL) {
2904 		SPDK_ERRLOG("No data transfer specified for request. "
2905 			    " Unable to transfer back response.\n");
2906 		status = SPDK_NVME_SC_INVALID_FIELD;
2907 		goto exit;
2908 	}
2909 
2910 	if (!cmd->cdw11_bits.resv_report.eds) {
2911 		SPDK_ERRLOG("NVMeoF uses extended controller data structure, "
2912 			    "please set EDS bit in cdw11 and try again\n");
2913 		status = SPDK_NVME_SC_HOSTID_INCONSISTENT_FORMAT;
2914 		goto exit;
2915 	}
2916 
2917 	/* Number of Dwords of the Reservation Status data structure to transfer */
2918 	transfer_len = (cmd->cdw10 + 1) * sizeof(uint32_t);
2919 	payload = req->data;
2920 
2921 	if (transfer_len < sizeof(struct spdk_nvme_reservation_status_extended_data)) {
2922 		status = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
2923 		goto exit;
2924 	}
2925 
2926 	status_data = (struct spdk_nvme_reservation_status_extended_data *)payload;
2927 	status_data->data.gen = ns->gen;
2928 	status_data->data.rtype = ns->rtype;
2929 	status_data->data.ptpls = ns->ptpl_activated;
2930 	payload_len += sizeof(struct spdk_nvme_reservation_status_extended_data);
2931 
2932 	TAILQ_FOREACH_SAFE(reg, &ns->registrants, link, tmp) {
2933 		payload_len += sizeof(struct spdk_nvme_registered_ctrlr_extended_data);
2934 		if (payload_len > transfer_len) {
2935 			break;
2936 		}
2937 
2938 		ctrlr_data = (struct spdk_nvme_registered_ctrlr_extended_data *)
2939 			     (payload + sizeof(*status_data) + sizeof(*ctrlr_data) * regctl);
2940 		/* Set to 0xffffh for dynamic controller */
2941 		ctrlr_data->cntlid = 0xffff;
2942 		ctrlr_data->rcsts.status = (ns->holder == reg) ? true : false;
2943 		ctrlr_data->rkey = reg->rkey;
2944 		spdk_uuid_copy((struct spdk_uuid *)ctrlr_data->hostid, &reg->hostid);
2945 		regctl++;
2946 	}
2947 	status_data->data.regctl = regctl;
2948 
2949 exit:
2950 	req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
2951 	req->rsp->nvme_cpl.status.sc = status;
2952 	return;
2953 }
2954 
2955 static void
2956 nvmf_ns_reservation_complete(void *ctx)
2957 {
2958 	struct spdk_nvmf_request *req = ctx;
2959 
2960 	spdk_nvmf_request_complete(req);
2961 }
2962 
2963 static void
2964 _nvmf_ns_reservation_update_done(struct spdk_nvmf_subsystem *subsystem,
2965 				 void *cb_arg, int status)
2966 {
2967 	struct spdk_nvmf_request *req = (struct spdk_nvmf_request *)cb_arg;
2968 	struct spdk_nvmf_poll_group *group = req->qpair->group;
2969 
2970 	spdk_thread_send_msg(group->thread, nvmf_ns_reservation_complete, req);
2971 }
2972 
2973 void
2974 nvmf_ns_reservation_request(void *ctx)
2975 {
2976 	struct spdk_nvmf_request *req = (struct spdk_nvmf_request *)ctx;
2977 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
2978 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
2979 	struct subsystem_update_ns_ctx *update_ctx;
2980 	uint32_t nsid;
2981 	struct spdk_nvmf_ns *ns;
2982 	bool update_sgroup = false;
2983 
2984 	nsid = cmd->nsid;
2985 	ns = _nvmf_subsystem_get_ns(ctrlr->subsys, nsid);
2986 	assert(ns != NULL);
2987 
2988 	switch (cmd->opc) {
2989 	case SPDK_NVME_OPC_RESERVATION_REGISTER:
2990 		update_sgroup = nvmf_ns_reservation_register(ns, ctrlr, req);
2991 		break;
2992 	case SPDK_NVME_OPC_RESERVATION_ACQUIRE:
2993 		update_sgroup = nvmf_ns_reservation_acquire(ns, ctrlr, req);
2994 		break;
2995 	case SPDK_NVME_OPC_RESERVATION_RELEASE:
2996 		update_sgroup = nvmf_ns_reservation_release(ns, ctrlr, req);
2997 		break;
2998 	case SPDK_NVME_OPC_RESERVATION_REPORT:
2999 		nvmf_ns_reservation_report(ns, ctrlr, req);
3000 		break;
3001 	default:
3002 		break;
3003 	}
3004 
3005 	/* update reservation information to subsystem's poll group */
3006 	if (update_sgroup) {
3007 		update_ctx = calloc(1, sizeof(*update_ctx));
3008 		if (update_ctx == NULL) {
3009 			SPDK_ERRLOG("Can't alloc subsystem poll group update context\n");
3010 			goto update_done;
3011 		}
3012 		update_ctx->subsystem = ctrlr->subsys;
3013 		update_ctx->cb_fn = _nvmf_ns_reservation_update_done;
3014 		update_ctx->cb_arg = req;
3015 
3016 		nvmf_subsystem_update_ns(ctrlr->subsys, subsystem_update_ns_done, update_ctx);
3017 		return;
3018 	}
3019 
3020 update_done:
3021 	_nvmf_ns_reservation_update_done(ctrlr->subsys, (void *)req, 0);
3022 }
3023 
3024 int
3025 spdk_nvmf_subsystem_set_ana_reporting(struct spdk_nvmf_subsystem *subsystem,
3026 				      bool ana_reporting)
3027 {
3028 	if (subsystem->state != SPDK_NVMF_SUBSYSTEM_INACTIVE) {
3029 		return -EAGAIN;
3030 	}
3031 
3032 	subsystem->flags.ana_reporting = ana_reporting;
3033 
3034 	return 0;
3035 }
3036 
3037 bool
3038 nvmf_subsystem_get_ana_reporting(struct spdk_nvmf_subsystem *subsystem)
3039 {
3040 	return subsystem->flags.ana_reporting;
3041 }
3042 
3043 struct subsystem_listener_update_ctx {
3044 	struct spdk_nvmf_subsystem_listener *listener;
3045 
3046 	spdk_nvmf_tgt_subsystem_listen_done_fn cb_fn;
3047 	void *cb_arg;
3048 };
3049 
3050 static void
3051 subsystem_listener_update_done(struct spdk_io_channel_iter *i, int status)
3052 {
3053 	struct subsystem_listener_update_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
3054 
3055 	if (ctx->cb_fn) {
3056 		ctx->cb_fn(ctx->cb_arg, status);
3057 	}
3058 	free(ctx);
3059 }
3060 
3061 static void
3062 subsystem_listener_update_on_pg(struct spdk_io_channel_iter *i)
3063 {
3064 	struct subsystem_listener_update_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
3065 	struct spdk_nvmf_subsystem_listener *listener;
3066 	struct spdk_nvmf_poll_group *group;
3067 	struct spdk_nvmf_ctrlr *ctrlr;
3068 
3069 	listener = ctx->listener;
3070 	group = spdk_io_channel_get_ctx(spdk_io_channel_iter_get_channel(i));
3071 
3072 	TAILQ_FOREACH(ctrlr, &listener->subsystem->ctrlrs, link) {
3073 		if (ctrlr->admin_qpair->group == group && ctrlr->listener == listener) {
3074 			nvmf_ctrlr_async_event_ana_change_notice(ctrlr);
3075 		}
3076 	}
3077 
3078 	spdk_for_each_channel_continue(i, 0);
3079 }
3080 
3081 void
3082 nvmf_subsystem_set_ana_state(struct spdk_nvmf_subsystem *subsystem,
3083 			     const struct spdk_nvme_transport_id *trid,
3084 			     enum spdk_nvme_ana_state ana_state, uint32_t anagrpid,
3085 			     spdk_nvmf_tgt_subsystem_listen_done_fn cb_fn, void *cb_arg)
3086 {
3087 	struct spdk_nvmf_subsystem_listener *listener;
3088 	struct subsystem_listener_update_ctx *ctx;
3089 	uint32_t i;
3090 
3091 	assert(cb_fn != NULL);
3092 	assert(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE ||
3093 	       subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED);
3094 
3095 	if (!subsystem->flags.ana_reporting) {
3096 		SPDK_ERRLOG("ANA reporting is disabled\n");
3097 		cb_fn(cb_arg, -EINVAL);
3098 		return;
3099 	}
3100 
3101 	/* ANA Change state is not used, ANA Persistent Loss state
3102 	 * is not supported yet.
3103 	 */
3104 	if (!(ana_state == SPDK_NVME_ANA_OPTIMIZED_STATE ||
3105 	      ana_state == SPDK_NVME_ANA_NON_OPTIMIZED_STATE ||
3106 	      ana_state == SPDK_NVME_ANA_INACCESSIBLE_STATE)) {
3107 		SPDK_ERRLOG("ANA state %d is not supported\n", ana_state);
3108 		cb_fn(cb_arg, -ENOTSUP);
3109 		return;
3110 	}
3111 
3112 	if (anagrpid > subsystem->max_nsid) {
3113 		SPDK_ERRLOG("ANA group ID %" PRIu32 " is more than maximum\n", anagrpid);
3114 		cb_fn(cb_arg, -EINVAL);
3115 		return;
3116 	}
3117 
3118 	listener = nvmf_subsystem_find_listener(subsystem, trid);
3119 	if (!listener) {
3120 		SPDK_ERRLOG("Unable to find listener.\n");
3121 		cb_fn(cb_arg, -EINVAL);
3122 		return;
3123 	}
3124 
3125 	if (anagrpid != 0 && listener->ana_state[anagrpid - 1] == ana_state) {
3126 		cb_fn(cb_arg, 0);
3127 		return;
3128 	}
3129 
3130 	ctx = calloc(1, sizeof(*ctx));
3131 	if (!ctx) {
3132 		SPDK_ERRLOG("Unable to allocate context\n");
3133 		cb_fn(cb_arg, -ENOMEM);
3134 		return;
3135 	}
3136 
3137 	for (i = 1; i <= subsystem->max_nsid; i++) {
3138 		if (anagrpid == 0 || i == anagrpid) {
3139 			listener->ana_state[i - 1] = ana_state;
3140 		}
3141 	}
3142 	listener->ana_state_change_count++;
3143 
3144 	ctx->listener = listener;
3145 	ctx->cb_fn = cb_fn;
3146 	ctx->cb_arg = cb_arg;
3147 
3148 	spdk_for_each_channel(subsystem->tgt,
3149 			      subsystem_listener_update_on_pg,
3150 			      ctx,
3151 			      subsystem_listener_update_done);
3152 }
3153