xref: /spdk/lib/nvmf/subsystem.c (revision b68f2eeb0b5244d09648487a40584ccb480bfb6d)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation. All rights reserved.
5  *   Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
6  *   Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
7  *
8  *   Redistribution and use in source and binary forms, with or without
9  *   modification, are permitted provided that the following conditions
10  *   are met:
11  *
12  *     * Redistributions of source code must retain the above copyright
13  *       notice, this list of conditions and the following disclaimer.
14  *     * Redistributions in binary form must reproduce the above copyright
15  *       notice, this list of conditions and the following disclaimer in
16  *       the documentation and/or other materials provided with the
17  *       distribution.
18  *     * Neither the name of Intel Corporation nor the names of its
19  *       contributors may be used to endorse or promote products derived
20  *       from this software without specific prior written permission.
21  *
22  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 #include "spdk/stdinc.h"
36 
37 #include "nvmf_internal.h"
38 #include "transport.h"
39 
40 #include "spdk/assert.h"
41 #include "spdk/likely.h"
42 #include "spdk/string.h"
43 #include "spdk/trace.h"
44 #include "spdk/nvmf_spec.h"
45 #include "spdk/uuid.h"
46 #include "spdk/json.h"
47 #include "spdk/file.h"
48 #include "spdk/bit_array.h"
49 
50 #define __SPDK_BDEV_MODULE_ONLY
51 #include "spdk/bdev_module.h"
52 #include "spdk/log.h"
53 #include "spdk_internal/utf.h"
54 #include "spdk_internal/usdt.h"
55 
56 #define MODEL_NUMBER_DEFAULT "SPDK bdev Controller"
57 #define NVMF_SUBSYSTEM_DEFAULT_NAMESPACES 32
58 
59 /*
60  * States for parsing valid domains in NQNs according to RFC 1034
61  */
62 enum spdk_nvmf_nqn_domain_states {
63 	/* First character of a domain must be a letter */
64 	SPDK_NVMF_DOMAIN_ACCEPT_LETTER = 0,
65 
66 	/* Subsequent characters can be any of letter, digit, or hyphen */
67 	SPDK_NVMF_DOMAIN_ACCEPT_LDH = 1,
68 
69 	/* A domain label must end with either a letter or digit */
70 	SPDK_NVMF_DOMAIN_ACCEPT_ANY = 2
71 };
72 
73 static int _nvmf_subsystem_destroy(struct spdk_nvmf_subsystem *subsystem);
74 
75 /* Returns true if is a valid ASCII string as defined by the NVMe spec */
76 static bool
77 nvmf_valid_ascii_string(const void *buf, size_t size)
78 {
79 	const uint8_t *str = buf;
80 	size_t i;
81 
82 	for (i = 0; i < size; i++) {
83 		if (str[i] < 0x20 || str[i] > 0x7E) {
84 			return false;
85 		}
86 	}
87 
88 	return true;
89 }
90 
91 static bool
92 nvmf_valid_nqn(const char *nqn)
93 {
94 	size_t len;
95 	struct spdk_uuid uuid_value;
96 	uint32_t i;
97 	int bytes_consumed;
98 	uint32_t domain_label_length;
99 	char *reverse_domain_end;
100 	uint32_t reverse_domain_end_index;
101 	enum spdk_nvmf_nqn_domain_states domain_state = SPDK_NVMF_DOMAIN_ACCEPT_LETTER;
102 
103 	/* Check for length requirements */
104 	len = strlen(nqn);
105 	if (len > SPDK_NVMF_NQN_MAX_LEN) {
106 		SPDK_ERRLOG("Invalid NQN \"%s\": length %zu > max %d\n", nqn, len, SPDK_NVMF_NQN_MAX_LEN);
107 		return false;
108 	}
109 
110 	/* The nqn must be at least as long as SPDK_NVMF_NQN_MIN_LEN to contain the necessary prefix. */
111 	if (len < SPDK_NVMF_NQN_MIN_LEN) {
112 		SPDK_ERRLOG("Invalid NQN \"%s\": length %zu < min %d\n", nqn, len, SPDK_NVMF_NQN_MIN_LEN);
113 		return false;
114 	}
115 
116 	/* Check for discovery controller nqn */
117 	if (!strcmp(nqn, SPDK_NVMF_DISCOVERY_NQN)) {
118 		return true;
119 	}
120 
121 	/* Check for equality with the generic nqn structure of the form "nqn.2014-08.org.nvmexpress:uuid:11111111-2222-3333-4444-555555555555" */
122 	if (!strncmp(nqn, SPDK_NVMF_NQN_UUID_PRE, SPDK_NVMF_NQN_UUID_PRE_LEN)) {
123 		if (len != SPDK_NVMF_NQN_UUID_PRE_LEN + SPDK_NVMF_UUID_STRING_LEN) {
124 			SPDK_ERRLOG("Invalid NQN \"%s\": uuid is not the correct length\n", nqn);
125 			return false;
126 		}
127 
128 		if (spdk_uuid_parse(&uuid_value, &nqn[SPDK_NVMF_NQN_UUID_PRE_LEN])) {
129 			SPDK_ERRLOG("Invalid NQN \"%s\": uuid is not formatted correctly\n", nqn);
130 			return false;
131 		}
132 		return true;
133 	}
134 
135 	/* If the nqn does not match the uuid structure, the next several checks validate the form "nqn.yyyy-mm.reverse.domain:user-string" */
136 
137 	if (strncmp(nqn, "nqn.", 4) != 0) {
138 		SPDK_ERRLOG("Invalid NQN \"%s\": NQN must begin with \"nqn.\".\n", nqn);
139 		return false;
140 	}
141 
142 	/* Check for yyyy-mm. */
143 	if (!(isdigit(nqn[4]) && isdigit(nqn[5]) && isdigit(nqn[6]) && isdigit(nqn[7]) &&
144 	      nqn[8] == '-' && isdigit(nqn[9]) && isdigit(nqn[10]) && nqn[11] == '.')) {
145 		SPDK_ERRLOG("Invalid date code in NQN \"%s\"\n", nqn);
146 		return false;
147 	}
148 
149 	reverse_domain_end = strchr(nqn, ':');
150 	if (reverse_domain_end != NULL && (reverse_domain_end_index = reverse_domain_end - nqn) < len - 1) {
151 	} else {
152 		SPDK_ERRLOG("Invalid NQN \"%s\". NQN must contain user specified name with a ':' as a prefix.\n",
153 			    nqn);
154 		return false;
155 	}
156 
157 	/* Check for valid reverse domain */
158 	domain_label_length = 0;
159 	for (i = 12; i < reverse_domain_end_index; i++) {
160 		if (domain_label_length > SPDK_DOMAIN_LABEL_MAX_LEN) {
161 			SPDK_ERRLOG("Invalid domain name in NQN \"%s\". At least one Label is too long.\n", nqn);
162 			return false;
163 		}
164 
165 		switch (domain_state) {
166 
167 		case SPDK_NVMF_DOMAIN_ACCEPT_LETTER: {
168 			if (isalpha(nqn[i])) {
169 				domain_state = SPDK_NVMF_DOMAIN_ACCEPT_ANY;
170 				domain_label_length++;
171 				break;
172 			} else {
173 				SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must start with a letter.\n", nqn);
174 				return false;
175 			}
176 		}
177 
178 		case SPDK_NVMF_DOMAIN_ACCEPT_LDH: {
179 			if (isalpha(nqn[i]) || isdigit(nqn[i])) {
180 				domain_state = SPDK_NVMF_DOMAIN_ACCEPT_ANY;
181 				domain_label_length++;
182 				break;
183 			} else if (nqn[i] == '-') {
184 				if (i == reverse_domain_end_index - 1) {
185 					SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must end with an alphanumeric symbol.\n",
186 						    nqn);
187 					return false;
188 				}
189 				domain_state = SPDK_NVMF_DOMAIN_ACCEPT_LDH;
190 				domain_label_length++;
191 				break;
192 			} else if (nqn[i] == '.') {
193 				SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must end with an alphanumeric symbol.\n",
194 					    nqn);
195 				return false;
196 			} else {
197 				SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must contain only [a-z,A-Z,0-9,'-','.'].\n",
198 					    nqn);
199 				return false;
200 			}
201 		}
202 
203 		case SPDK_NVMF_DOMAIN_ACCEPT_ANY: {
204 			if (isalpha(nqn[i]) || isdigit(nqn[i])) {
205 				domain_state = SPDK_NVMF_DOMAIN_ACCEPT_ANY;
206 				domain_label_length++;
207 				break;
208 			} else if (nqn[i] == '-') {
209 				if (i == reverse_domain_end_index - 1) {
210 					SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must end with an alphanumeric symbol.\n",
211 						    nqn);
212 					return false;
213 				}
214 				domain_state = SPDK_NVMF_DOMAIN_ACCEPT_LDH;
215 				domain_label_length++;
216 				break;
217 			} else if (nqn[i] == '.') {
218 				domain_state = SPDK_NVMF_DOMAIN_ACCEPT_LETTER;
219 				domain_label_length = 0;
220 				break;
221 			} else {
222 				SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must contain only [a-z,A-Z,0-9,'-','.'].\n",
223 					    nqn);
224 				return false;
225 			}
226 		}
227 		}
228 	}
229 
230 	i = reverse_domain_end_index + 1;
231 	while (i < len) {
232 		bytes_consumed = utf8_valid(&nqn[i], &nqn[len]);
233 		if (bytes_consumed <= 0) {
234 			SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must contain only valid utf-8.\n", nqn);
235 			return false;
236 		}
237 
238 		i += bytes_consumed;
239 	}
240 	return true;
241 }
242 
243 static void subsystem_state_change_on_pg(struct spdk_io_channel_iter *i);
244 
245 struct spdk_nvmf_subsystem *
246 spdk_nvmf_subsystem_create(struct spdk_nvmf_tgt *tgt,
247 			   const char *nqn,
248 			   enum spdk_nvmf_subtype type,
249 			   uint32_t num_ns)
250 {
251 	struct spdk_nvmf_subsystem	*subsystem;
252 	uint32_t			sid;
253 
254 	if (spdk_nvmf_tgt_find_subsystem(tgt, nqn)) {
255 		SPDK_ERRLOG("Subsystem NQN '%s' already exists\n", nqn);
256 		return NULL;
257 	}
258 
259 	if (!nvmf_valid_nqn(nqn)) {
260 		return NULL;
261 	}
262 
263 	if (type == SPDK_NVMF_SUBTYPE_DISCOVERY) {
264 		if (num_ns != 0) {
265 			SPDK_ERRLOG("Discovery subsystem cannot have namespaces.\n");
266 			return NULL;
267 		}
268 	} else if (num_ns == 0) {
269 		num_ns = NVMF_SUBSYSTEM_DEFAULT_NAMESPACES;
270 	}
271 
272 	/* Find a free subsystem id (sid) */
273 	for (sid = 0; sid < tgt->max_subsystems; sid++) {
274 		if (tgt->subsystems[sid] == NULL) {
275 			break;
276 		}
277 	}
278 	if (sid >= tgt->max_subsystems) {
279 		return NULL;
280 	}
281 
282 	subsystem = calloc(1, sizeof(struct spdk_nvmf_subsystem));
283 	if (subsystem == NULL) {
284 		return NULL;
285 	}
286 
287 	subsystem->thread = spdk_get_thread();
288 	subsystem->state = SPDK_NVMF_SUBSYSTEM_INACTIVE;
289 	subsystem->tgt = tgt;
290 	subsystem->id = sid;
291 	subsystem->subtype = type;
292 	subsystem->max_nsid = num_ns;
293 	subsystem->next_cntlid = 0;
294 	subsystem->min_cntlid = NVMF_MIN_CNTLID;
295 	subsystem->max_cntlid = NVMF_MAX_CNTLID;
296 	snprintf(subsystem->subnqn, sizeof(subsystem->subnqn), "%s", nqn);
297 	pthread_mutex_init(&subsystem->mutex, NULL);
298 	TAILQ_INIT(&subsystem->listeners);
299 	TAILQ_INIT(&subsystem->hosts);
300 	TAILQ_INIT(&subsystem->ctrlrs);
301 	subsystem->used_listener_ids = spdk_bit_array_create(NVMF_MAX_LISTENERS_PER_SUBSYSTEM);
302 	if (subsystem->used_listener_ids == NULL) {
303 		pthread_mutex_destroy(&subsystem->mutex);
304 		free(subsystem);
305 		return NULL;
306 	}
307 
308 	if (num_ns != 0) {
309 		subsystem->ns = calloc(num_ns, sizeof(struct spdk_nvmf_ns *));
310 		if (subsystem->ns == NULL) {
311 			SPDK_ERRLOG("Namespace memory allocation failed\n");
312 			pthread_mutex_destroy(&subsystem->mutex);
313 			spdk_bit_array_free(&subsystem->used_listener_ids);
314 			free(subsystem);
315 			return NULL;
316 		}
317 		subsystem->ana_group = calloc(num_ns, sizeof(uint32_t));
318 		if (subsystem->ana_group == NULL) {
319 			SPDK_ERRLOG("ANA group memory allocation failed\n");
320 			pthread_mutex_destroy(&subsystem->mutex);
321 			free(subsystem->ns);
322 			spdk_bit_array_free(&subsystem->used_listener_ids);
323 			free(subsystem);
324 			return NULL;
325 		}
326 	}
327 
328 	memset(subsystem->sn, '0', sizeof(subsystem->sn) - 1);
329 	subsystem->sn[sizeof(subsystem->sn) - 1] = '\0';
330 
331 	snprintf(subsystem->mn, sizeof(subsystem->mn), "%s",
332 		 MODEL_NUMBER_DEFAULT);
333 
334 	tgt->subsystems[sid] = subsystem;
335 
336 	return subsystem;
337 }
338 
339 /* Must hold subsystem->mutex while calling this function */
340 static void
341 nvmf_subsystem_remove_host(struct spdk_nvmf_subsystem *subsystem, struct spdk_nvmf_host *host)
342 {
343 	TAILQ_REMOVE(&subsystem->hosts, host, link);
344 	free(host);
345 }
346 
347 static void
348 _nvmf_subsystem_remove_listener(struct spdk_nvmf_subsystem *subsystem,
349 				struct spdk_nvmf_subsystem_listener *listener,
350 				bool stop)
351 {
352 	struct spdk_nvmf_transport *transport;
353 	struct spdk_nvmf_ctrlr *ctrlr;
354 
355 	if (stop) {
356 		transport = spdk_nvmf_tgt_get_transport(subsystem->tgt, listener->trid->trstring);
357 		if (transport != NULL) {
358 			spdk_nvmf_transport_stop_listen(transport, listener->trid);
359 		}
360 	}
361 
362 	TAILQ_FOREACH(ctrlr, &subsystem->ctrlrs, link) {
363 		if (ctrlr->listener == listener) {
364 			ctrlr->listener = NULL;
365 		}
366 	}
367 
368 	TAILQ_REMOVE(&subsystem->listeners, listener, link);
369 	nvmf_update_discovery_log(listener->subsystem->tgt, NULL);
370 	free(listener->ana_state);
371 	spdk_bit_array_clear(subsystem->used_listener_ids, listener->id);
372 	free(listener);
373 }
374 
375 static void
376 _nvmf_subsystem_destroy_msg(void *cb_arg)
377 {
378 	struct spdk_nvmf_subsystem *subsystem = cb_arg;
379 
380 	_nvmf_subsystem_destroy(subsystem);
381 }
382 
383 static int
384 _nvmf_subsystem_destroy(struct spdk_nvmf_subsystem *subsystem)
385 {
386 	struct spdk_nvmf_ns		*ns;
387 	nvmf_subsystem_destroy_cb	async_destroy_cb = NULL;
388 	void				*async_destroy_cb_arg = NULL;
389 	int				rc;
390 
391 	if (!TAILQ_EMPTY(&subsystem->ctrlrs)) {
392 		SPDK_DEBUGLOG(nvmf, "subsystem %p %s has active controllers\n", subsystem, subsystem->subnqn);
393 		subsystem->async_destroy = true;
394 		rc = spdk_thread_send_msg(subsystem->thread, _nvmf_subsystem_destroy_msg, subsystem);
395 		if (rc) {
396 			SPDK_ERRLOG("Failed to send thread msg, rc %d\n", rc);
397 			assert(0);
398 			return rc;
399 		}
400 		return -EINPROGRESS;
401 	}
402 
403 	ns = spdk_nvmf_subsystem_get_first_ns(subsystem);
404 	while (ns != NULL) {
405 		struct spdk_nvmf_ns *next_ns = spdk_nvmf_subsystem_get_next_ns(subsystem, ns);
406 
407 		spdk_nvmf_subsystem_remove_ns(subsystem, ns->opts.nsid);
408 		ns = next_ns;
409 	}
410 
411 	free(subsystem->ns);
412 	free(subsystem->ana_group);
413 
414 	subsystem->tgt->subsystems[subsystem->id] = NULL;
415 
416 	pthread_mutex_destroy(&subsystem->mutex);
417 
418 	spdk_bit_array_free(&subsystem->used_listener_ids);
419 
420 	if (subsystem->async_destroy) {
421 		async_destroy_cb = subsystem->async_destroy_cb;
422 		async_destroy_cb_arg = subsystem->async_destroy_cb_arg;
423 	}
424 
425 	free(subsystem);
426 
427 	if (async_destroy_cb) {
428 		async_destroy_cb(async_destroy_cb_arg);
429 	}
430 
431 	return 0;
432 }
433 
434 int
435 spdk_nvmf_subsystem_destroy(struct spdk_nvmf_subsystem *subsystem, nvmf_subsystem_destroy_cb cpl_cb,
436 			    void *cpl_cb_arg)
437 {
438 	struct spdk_nvmf_host *host, *host_tmp;
439 
440 	if (!subsystem) {
441 		return -EINVAL;
442 	}
443 
444 	assert(spdk_get_thread() == subsystem->thread);
445 
446 	if (subsystem->state != SPDK_NVMF_SUBSYSTEM_INACTIVE) {
447 		SPDK_ERRLOG("Subsystem can only be destroyed in inactive state\n");
448 		assert(0);
449 		return -EAGAIN;
450 	}
451 	if (subsystem->destroying) {
452 		SPDK_ERRLOG("Subsystem destruction is already started\n");
453 		assert(0);
454 		return -EALREADY;
455 	}
456 
457 	subsystem->destroying = true;
458 
459 	SPDK_DEBUGLOG(nvmf, "subsystem is %p %s\n", subsystem, subsystem->subnqn);
460 
461 	nvmf_subsystem_remove_all_listeners(subsystem, false);
462 
463 	pthread_mutex_lock(&subsystem->mutex);
464 
465 	TAILQ_FOREACH_SAFE(host, &subsystem->hosts, link, host_tmp) {
466 		nvmf_subsystem_remove_host(subsystem, host);
467 	}
468 
469 	pthread_mutex_unlock(&subsystem->mutex);
470 
471 	subsystem->async_destroy_cb = cpl_cb;
472 	subsystem->async_destroy_cb_arg = cpl_cb_arg;
473 
474 	return _nvmf_subsystem_destroy(subsystem);
475 }
476 
477 /* we have to use the typedef in the function declaration to appease astyle. */
478 typedef enum spdk_nvmf_subsystem_state spdk_nvmf_subsystem_state_t;
479 
480 static spdk_nvmf_subsystem_state_t
481 nvmf_subsystem_get_intermediate_state(enum spdk_nvmf_subsystem_state current_state,
482 				      enum spdk_nvmf_subsystem_state requested_state)
483 {
484 	switch (requested_state) {
485 	case SPDK_NVMF_SUBSYSTEM_INACTIVE:
486 		return SPDK_NVMF_SUBSYSTEM_DEACTIVATING;
487 	case SPDK_NVMF_SUBSYSTEM_ACTIVE:
488 		if (current_state == SPDK_NVMF_SUBSYSTEM_PAUSED) {
489 			return SPDK_NVMF_SUBSYSTEM_RESUMING;
490 		} else {
491 			return SPDK_NVMF_SUBSYSTEM_ACTIVATING;
492 		}
493 	case SPDK_NVMF_SUBSYSTEM_PAUSED:
494 		return SPDK_NVMF_SUBSYSTEM_PAUSING;
495 	default:
496 		assert(false);
497 		return SPDK_NVMF_SUBSYSTEM_NUM_STATES;
498 	}
499 }
500 
501 static int
502 nvmf_subsystem_set_state(struct spdk_nvmf_subsystem *subsystem,
503 			 enum spdk_nvmf_subsystem_state state)
504 {
505 	enum spdk_nvmf_subsystem_state actual_old_state, expected_old_state;
506 	bool exchanged;
507 
508 	switch (state) {
509 	case SPDK_NVMF_SUBSYSTEM_INACTIVE:
510 		expected_old_state = SPDK_NVMF_SUBSYSTEM_DEACTIVATING;
511 		break;
512 	case SPDK_NVMF_SUBSYSTEM_ACTIVATING:
513 		expected_old_state = SPDK_NVMF_SUBSYSTEM_INACTIVE;
514 		break;
515 	case SPDK_NVMF_SUBSYSTEM_ACTIVE:
516 		expected_old_state = SPDK_NVMF_SUBSYSTEM_ACTIVATING;
517 		break;
518 	case SPDK_NVMF_SUBSYSTEM_PAUSING:
519 		expected_old_state = SPDK_NVMF_SUBSYSTEM_ACTIVE;
520 		break;
521 	case SPDK_NVMF_SUBSYSTEM_PAUSED:
522 		expected_old_state = SPDK_NVMF_SUBSYSTEM_PAUSING;
523 		break;
524 	case SPDK_NVMF_SUBSYSTEM_RESUMING:
525 		expected_old_state = SPDK_NVMF_SUBSYSTEM_PAUSED;
526 		break;
527 	case SPDK_NVMF_SUBSYSTEM_DEACTIVATING:
528 		expected_old_state = SPDK_NVMF_SUBSYSTEM_ACTIVE;
529 		break;
530 	default:
531 		assert(false);
532 		return -1;
533 	}
534 
535 	actual_old_state = expected_old_state;
536 	exchanged = __atomic_compare_exchange_n(&subsystem->state, &actual_old_state, state, false,
537 						__ATOMIC_RELAXED, __ATOMIC_RELAXED);
538 	if (spdk_unlikely(exchanged == false)) {
539 		if (actual_old_state == SPDK_NVMF_SUBSYSTEM_RESUMING &&
540 		    state == SPDK_NVMF_SUBSYSTEM_ACTIVE) {
541 			expected_old_state = SPDK_NVMF_SUBSYSTEM_RESUMING;
542 		}
543 		/* This is for the case when activating the subsystem fails. */
544 		if (actual_old_state == SPDK_NVMF_SUBSYSTEM_ACTIVATING &&
545 		    state == SPDK_NVMF_SUBSYSTEM_DEACTIVATING) {
546 			expected_old_state = SPDK_NVMF_SUBSYSTEM_ACTIVATING;
547 		}
548 		/* This is for the case when resuming the subsystem fails. */
549 		if (actual_old_state == SPDK_NVMF_SUBSYSTEM_RESUMING &&
550 		    state == SPDK_NVMF_SUBSYSTEM_PAUSING) {
551 			expected_old_state = SPDK_NVMF_SUBSYSTEM_RESUMING;
552 		}
553 		/* This is for the case when stopping paused subsystem */
554 		if (actual_old_state == SPDK_NVMF_SUBSYSTEM_PAUSED &&
555 		    state == SPDK_NVMF_SUBSYSTEM_DEACTIVATING) {
556 			expected_old_state = SPDK_NVMF_SUBSYSTEM_PAUSED;
557 		}
558 		actual_old_state = expected_old_state;
559 		__atomic_compare_exchange_n(&subsystem->state, &actual_old_state, state, false,
560 					    __ATOMIC_RELAXED, __ATOMIC_RELAXED);
561 	}
562 	assert(actual_old_state == expected_old_state);
563 	return actual_old_state - expected_old_state;
564 }
565 
566 struct subsystem_state_change_ctx {
567 	struct spdk_nvmf_subsystem		*subsystem;
568 	uint16_t				nsid;
569 
570 	enum spdk_nvmf_subsystem_state		original_state;
571 	enum spdk_nvmf_subsystem_state		requested_state;
572 
573 	spdk_nvmf_subsystem_state_change_done	cb_fn;
574 	void					*cb_arg;
575 };
576 
577 static void
578 subsystem_state_change_revert_done(struct spdk_io_channel_iter *i, int status)
579 {
580 	struct subsystem_state_change_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
581 
582 	/* Nothing to be done here if the state setting fails, we are just screwed. */
583 	if (nvmf_subsystem_set_state(ctx->subsystem, ctx->requested_state)) {
584 		SPDK_ERRLOG("Unable to revert the subsystem state after operation failure.\n");
585 	}
586 
587 	ctx->subsystem->changing_state = false;
588 	if (ctx->cb_fn) {
589 		/* return a failure here. This function only exists in an error path. */
590 		ctx->cb_fn(ctx->subsystem, ctx->cb_arg, -1);
591 	}
592 	free(ctx);
593 }
594 
595 static void
596 subsystem_state_change_done(struct spdk_io_channel_iter *i, int status)
597 {
598 	struct subsystem_state_change_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
599 	enum spdk_nvmf_subsystem_state intermediate_state;
600 
601 	SPDK_DTRACE_PROBE4(nvmf_subsystem_change_state_done, ctx->subsystem->subnqn,
602 			   ctx->requested_state, ctx->original_state, status);
603 
604 	if (status == 0) {
605 		status = nvmf_subsystem_set_state(ctx->subsystem, ctx->requested_state);
606 		if (status) {
607 			status = -1;
608 		}
609 	}
610 
611 	if (status) {
612 		intermediate_state = nvmf_subsystem_get_intermediate_state(ctx->requested_state,
613 				     ctx->original_state);
614 		assert(intermediate_state != SPDK_NVMF_SUBSYSTEM_NUM_STATES);
615 
616 		if (nvmf_subsystem_set_state(ctx->subsystem, intermediate_state)) {
617 			goto out;
618 		}
619 		ctx->requested_state = ctx->original_state;
620 		spdk_for_each_channel(ctx->subsystem->tgt,
621 				      subsystem_state_change_on_pg,
622 				      ctx,
623 				      subsystem_state_change_revert_done);
624 		return;
625 	}
626 
627 out:
628 	ctx->subsystem->changing_state = false;
629 	if (ctx->cb_fn) {
630 		ctx->cb_fn(ctx->subsystem, ctx->cb_arg, status);
631 	}
632 	free(ctx);
633 }
634 
635 static void
636 subsystem_state_change_continue(void *ctx, int status)
637 {
638 	struct spdk_io_channel_iter *i = ctx;
639 	struct subsystem_state_change_ctx *_ctx __attribute__((unused));
640 
641 	_ctx = spdk_io_channel_iter_get_ctx(i);
642 	SPDK_DTRACE_PROBE3(nvmf_pg_change_state_done, _ctx->subsystem->subnqn,
643 			   _ctx->requested_state, spdk_thread_get_id(spdk_get_thread()));
644 
645 	spdk_for_each_channel_continue(i, status);
646 }
647 
648 static void
649 subsystem_state_change_on_pg(struct spdk_io_channel_iter *i)
650 {
651 	struct subsystem_state_change_ctx *ctx;
652 	struct spdk_io_channel *ch;
653 	struct spdk_nvmf_poll_group *group;
654 
655 	ctx = spdk_io_channel_iter_get_ctx(i);
656 	ch = spdk_io_channel_iter_get_channel(i);
657 	group = spdk_io_channel_get_ctx(ch);
658 
659 	SPDK_DTRACE_PROBE3(nvmf_pg_change_state, ctx->subsystem->subnqn,
660 			   ctx->requested_state, spdk_thread_get_id(spdk_get_thread()));
661 	switch (ctx->requested_state) {
662 	case SPDK_NVMF_SUBSYSTEM_INACTIVE:
663 		nvmf_poll_group_remove_subsystem(group, ctx->subsystem, subsystem_state_change_continue, i);
664 		break;
665 	case SPDK_NVMF_SUBSYSTEM_ACTIVE:
666 		if (ctx->subsystem->state == SPDK_NVMF_SUBSYSTEM_ACTIVATING) {
667 			nvmf_poll_group_add_subsystem(group, ctx->subsystem, subsystem_state_change_continue, i);
668 		} else if (ctx->subsystem->state == SPDK_NVMF_SUBSYSTEM_RESUMING) {
669 			nvmf_poll_group_resume_subsystem(group, ctx->subsystem, subsystem_state_change_continue, i);
670 		}
671 		break;
672 	case SPDK_NVMF_SUBSYSTEM_PAUSED:
673 		nvmf_poll_group_pause_subsystem(group, ctx->subsystem, ctx->nsid, subsystem_state_change_continue,
674 						i);
675 		break;
676 	default:
677 		assert(false);
678 		break;
679 	}
680 }
681 
682 static int
683 nvmf_subsystem_state_change(struct spdk_nvmf_subsystem *subsystem,
684 			    uint32_t nsid,
685 			    enum spdk_nvmf_subsystem_state requested_state,
686 			    spdk_nvmf_subsystem_state_change_done cb_fn,
687 			    void *cb_arg)
688 {
689 	struct subsystem_state_change_ctx *ctx;
690 	enum spdk_nvmf_subsystem_state intermediate_state;
691 	int rc;
692 
693 	if (__sync_val_compare_and_swap(&subsystem->changing_state, false, true)) {
694 		return -EBUSY;
695 	}
696 
697 	SPDK_DTRACE_PROBE3(nvmf_subsystem_change_state, subsystem->subnqn,
698 			   requested_state, subsystem->state);
699 	/* If we are already in the requested state, just call the callback immediately. */
700 	if (subsystem->state == requested_state) {
701 		subsystem->changing_state = false;
702 		if (cb_fn) {
703 			cb_fn(subsystem, cb_arg, 0);
704 		}
705 		return 0;
706 	}
707 
708 	intermediate_state = nvmf_subsystem_get_intermediate_state(subsystem->state, requested_state);
709 	assert(intermediate_state != SPDK_NVMF_SUBSYSTEM_NUM_STATES);
710 
711 	ctx = calloc(1, sizeof(*ctx));
712 	if (!ctx) {
713 		subsystem->changing_state = false;
714 		return -ENOMEM;
715 	}
716 
717 	ctx->original_state = subsystem->state;
718 	rc = nvmf_subsystem_set_state(subsystem, intermediate_state);
719 	if (rc) {
720 		free(ctx);
721 		subsystem->changing_state = false;
722 		return rc;
723 	}
724 
725 	ctx->subsystem = subsystem;
726 	ctx->nsid = nsid;
727 	ctx->requested_state = requested_state;
728 	ctx->cb_fn = cb_fn;
729 	ctx->cb_arg = cb_arg;
730 
731 	spdk_for_each_channel(subsystem->tgt,
732 			      subsystem_state_change_on_pg,
733 			      ctx,
734 			      subsystem_state_change_done);
735 
736 	return 0;
737 }
738 
739 int
740 spdk_nvmf_subsystem_start(struct spdk_nvmf_subsystem *subsystem,
741 			  spdk_nvmf_subsystem_state_change_done cb_fn,
742 			  void *cb_arg)
743 {
744 	return nvmf_subsystem_state_change(subsystem, 0, SPDK_NVMF_SUBSYSTEM_ACTIVE, cb_fn, cb_arg);
745 }
746 
747 int
748 spdk_nvmf_subsystem_stop(struct spdk_nvmf_subsystem *subsystem,
749 			 spdk_nvmf_subsystem_state_change_done cb_fn,
750 			 void *cb_arg)
751 {
752 	return nvmf_subsystem_state_change(subsystem, 0, SPDK_NVMF_SUBSYSTEM_INACTIVE, cb_fn, cb_arg);
753 }
754 
755 int
756 spdk_nvmf_subsystem_pause(struct spdk_nvmf_subsystem *subsystem,
757 			  uint32_t nsid,
758 			  spdk_nvmf_subsystem_state_change_done cb_fn,
759 			  void *cb_arg)
760 {
761 	return nvmf_subsystem_state_change(subsystem, nsid, SPDK_NVMF_SUBSYSTEM_PAUSED, cb_fn, cb_arg);
762 }
763 
764 int
765 spdk_nvmf_subsystem_resume(struct spdk_nvmf_subsystem *subsystem,
766 			   spdk_nvmf_subsystem_state_change_done cb_fn,
767 			   void *cb_arg)
768 {
769 	return nvmf_subsystem_state_change(subsystem, 0, SPDK_NVMF_SUBSYSTEM_ACTIVE, cb_fn, cb_arg);
770 }
771 
772 struct spdk_nvmf_subsystem *
773 spdk_nvmf_subsystem_get_first(struct spdk_nvmf_tgt *tgt)
774 {
775 	struct spdk_nvmf_subsystem	*subsystem;
776 	uint32_t sid;
777 
778 	for (sid = 0; sid < tgt->max_subsystems; sid++) {
779 		subsystem = tgt->subsystems[sid];
780 		if (subsystem) {
781 			return subsystem;
782 		}
783 	}
784 
785 	return NULL;
786 }
787 
788 struct spdk_nvmf_subsystem *
789 spdk_nvmf_subsystem_get_next(struct spdk_nvmf_subsystem *subsystem)
790 {
791 	uint32_t sid;
792 	struct spdk_nvmf_tgt *tgt;
793 
794 	if (!subsystem) {
795 		return NULL;
796 	}
797 
798 	tgt = subsystem->tgt;
799 
800 	for (sid = subsystem->id + 1; sid < tgt->max_subsystems; sid++) {
801 		subsystem = tgt->subsystems[sid];
802 		if (subsystem) {
803 			return subsystem;
804 		}
805 	}
806 
807 	return NULL;
808 }
809 
810 /* Must hold subsystem->mutex while calling this function */
811 static struct spdk_nvmf_host *
812 nvmf_subsystem_find_host(struct spdk_nvmf_subsystem *subsystem, const char *hostnqn)
813 {
814 	struct spdk_nvmf_host *host = NULL;
815 
816 	TAILQ_FOREACH(host, &subsystem->hosts, link) {
817 		if (strcmp(hostnqn, host->nqn) == 0) {
818 			return host;
819 		}
820 	}
821 
822 	return NULL;
823 }
824 
825 int
826 spdk_nvmf_subsystem_add_host(struct spdk_nvmf_subsystem *subsystem, const char *hostnqn)
827 {
828 	struct spdk_nvmf_host *host;
829 
830 	if (!nvmf_valid_nqn(hostnqn)) {
831 		return -EINVAL;
832 	}
833 
834 	pthread_mutex_lock(&subsystem->mutex);
835 
836 	if (nvmf_subsystem_find_host(subsystem, hostnqn)) {
837 		/* This subsystem already allows the specified host. */
838 		pthread_mutex_unlock(&subsystem->mutex);
839 		return 0;
840 	}
841 
842 	host = calloc(1, sizeof(*host));
843 	if (!host) {
844 		pthread_mutex_unlock(&subsystem->mutex);
845 		return -ENOMEM;
846 	}
847 
848 	snprintf(host->nqn, sizeof(host->nqn), "%s", hostnqn);
849 
850 	TAILQ_INSERT_HEAD(&subsystem->hosts, host, link);
851 
852 	if (!TAILQ_EMPTY(&subsystem->listeners)) {
853 		nvmf_update_discovery_log(subsystem->tgt, hostnqn);
854 	}
855 
856 	pthread_mutex_unlock(&subsystem->mutex);
857 
858 	return 0;
859 }
860 
861 int
862 spdk_nvmf_subsystem_remove_host(struct spdk_nvmf_subsystem *subsystem, const char *hostnqn)
863 {
864 	struct spdk_nvmf_host *host;
865 
866 	pthread_mutex_lock(&subsystem->mutex);
867 
868 	host = nvmf_subsystem_find_host(subsystem, hostnqn);
869 	if (host == NULL) {
870 		pthread_mutex_unlock(&subsystem->mutex);
871 		return -ENOENT;
872 	}
873 
874 	nvmf_subsystem_remove_host(subsystem, host);
875 
876 	if (!TAILQ_EMPTY(&subsystem->listeners)) {
877 		nvmf_update_discovery_log(subsystem->tgt, hostnqn);
878 	}
879 
880 	pthread_mutex_unlock(&subsystem->mutex);
881 
882 	return 0;
883 }
884 
885 struct nvmf_subsystem_disconnect_host_ctx {
886 	struct spdk_nvmf_subsystem		*subsystem;
887 	char					*hostnqn;
888 	spdk_nvmf_tgt_subsystem_listen_done_fn	cb_fn;
889 	void					*cb_arg;
890 };
891 
892 static void
893 nvmf_subsystem_disconnect_host_fini(struct spdk_io_channel_iter *i, int status)
894 {
895 	struct nvmf_subsystem_disconnect_host_ctx *ctx;
896 
897 	ctx = spdk_io_channel_iter_get_ctx(i);
898 
899 	if (ctx->cb_fn) {
900 		ctx->cb_fn(ctx->cb_arg, status);
901 	}
902 	free(ctx->hostnqn);
903 	free(ctx);
904 }
905 
906 static void
907 nvmf_subsystem_disconnect_qpairs_by_host(struct spdk_io_channel_iter *i)
908 {
909 	struct nvmf_subsystem_disconnect_host_ctx *ctx;
910 	struct spdk_nvmf_poll_group *group;
911 	struct spdk_io_channel *ch;
912 	struct spdk_nvmf_qpair *qpair, *tmp_qpair;
913 	struct spdk_nvmf_ctrlr *ctrlr;
914 
915 	ctx = spdk_io_channel_iter_get_ctx(i);
916 	ch = spdk_io_channel_iter_get_channel(i);
917 	group = spdk_io_channel_get_ctx(ch);
918 
919 	TAILQ_FOREACH_SAFE(qpair, &group->qpairs, link, tmp_qpair) {
920 		ctrlr = qpair->ctrlr;
921 
922 		if (ctrlr == NULL || ctrlr->subsys != ctx->subsystem) {
923 			continue;
924 		}
925 
926 		if (strncmp(ctrlr->hostnqn, ctx->hostnqn, sizeof(ctrlr->hostnqn)) == 0) {
927 			/* Right now this does not wait for the queue pairs to actually disconnect. */
928 			spdk_nvmf_qpair_disconnect(qpair, NULL, NULL);
929 		}
930 	}
931 	spdk_for_each_channel_continue(i, 0);
932 }
933 
934 int
935 spdk_nvmf_subsystem_disconnect_host(struct spdk_nvmf_subsystem *subsystem,
936 				    const char *hostnqn,
937 				    spdk_nvmf_tgt_subsystem_listen_done_fn cb_fn,
938 				    void *cb_arg)
939 {
940 	struct nvmf_subsystem_disconnect_host_ctx *ctx;
941 
942 	ctx = calloc(1, sizeof(struct nvmf_subsystem_disconnect_host_ctx));
943 	if (ctx == NULL) {
944 		return -ENOMEM;
945 	}
946 
947 	ctx->hostnqn = strdup(hostnqn);
948 	if (ctx->hostnqn == NULL) {
949 		free(ctx);
950 		return -ENOMEM;
951 	}
952 
953 	ctx->subsystem = subsystem;
954 	ctx->cb_fn = cb_fn;
955 	ctx->cb_arg = cb_arg;
956 
957 	spdk_for_each_channel(subsystem->tgt, nvmf_subsystem_disconnect_qpairs_by_host, ctx,
958 			      nvmf_subsystem_disconnect_host_fini);
959 
960 	return 0;
961 }
962 
963 int
964 spdk_nvmf_subsystem_set_allow_any_host(struct spdk_nvmf_subsystem *subsystem, bool allow_any_host)
965 {
966 	pthread_mutex_lock(&subsystem->mutex);
967 	subsystem->flags.allow_any_host = allow_any_host;
968 	if (!TAILQ_EMPTY(&subsystem->listeners)) {
969 		nvmf_update_discovery_log(subsystem->tgt, NULL);
970 	}
971 	pthread_mutex_unlock(&subsystem->mutex);
972 
973 	return 0;
974 }
975 
976 bool
977 spdk_nvmf_subsystem_get_allow_any_host(const struct spdk_nvmf_subsystem *subsystem)
978 {
979 	bool allow_any_host;
980 	struct spdk_nvmf_subsystem *sub;
981 
982 	/* Technically, taking the mutex modifies data in the subsystem. But the const
983 	 * is still important to convey that this doesn't mutate any other data. Cast
984 	 * it away to work around this. */
985 	sub = (struct spdk_nvmf_subsystem *)subsystem;
986 
987 	pthread_mutex_lock(&sub->mutex);
988 	allow_any_host = sub->flags.allow_any_host;
989 	pthread_mutex_unlock(&sub->mutex);
990 
991 	return allow_any_host;
992 }
993 
994 bool
995 spdk_nvmf_subsystem_host_allowed(struct spdk_nvmf_subsystem *subsystem, const char *hostnqn)
996 {
997 	bool allowed;
998 
999 	if (!hostnqn) {
1000 		return false;
1001 	}
1002 
1003 	pthread_mutex_lock(&subsystem->mutex);
1004 
1005 	if (subsystem->flags.allow_any_host) {
1006 		pthread_mutex_unlock(&subsystem->mutex);
1007 		return true;
1008 	}
1009 
1010 	allowed =  nvmf_subsystem_find_host(subsystem, hostnqn) != NULL;
1011 	pthread_mutex_unlock(&subsystem->mutex);
1012 
1013 	return allowed;
1014 }
1015 
1016 struct spdk_nvmf_host *
1017 spdk_nvmf_subsystem_get_first_host(struct spdk_nvmf_subsystem *subsystem)
1018 {
1019 	return TAILQ_FIRST(&subsystem->hosts);
1020 }
1021 
1022 
1023 struct spdk_nvmf_host *
1024 spdk_nvmf_subsystem_get_next_host(struct spdk_nvmf_subsystem *subsystem,
1025 				  struct spdk_nvmf_host *prev_host)
1026 {
1027 	return TAILQ_NEXT(prev_host, link);
1028 }
1029 
1030 const char *
1031 spdk_nvmf_host_get_nqn(const struct spdk_nvmf_host *host)
1032 {
1033 	return host->nqn;
1034 }
1035 
1036 struct spdk_nvmf_subsystem_listener *
1037 nvmf_subsystem_find_listener(struct spdk_nvmf_subsystem *subsystem,
1038 			     const struct spdk_nvme_transport_id *trid)
1039 {
1040 	struct spdk_nvmf_subsystem_listener *listener;
1041 
1042 	TAILQ_FOREACH(listener, &subsystem->listeners, link) {
1043 		if (spdk_nvme_transport_id_compare(listener->trid, trid) == 0) {
1044 			return listener;
1045 		}
1046 	}
1047 
1048 	return NULL;
1049 }
1050 
1051 /**
1052  * Function to be called once the target is listening.
1053  *
1054  * \param ctx Context argument passed to this function.
1055  * \param status 0 if it completed successfully, or negative errno if it failed.
1056  */
1057 static void
1058 _nvmf_subsystem_add_listener_done(void *ctx, int status)
1059 {
1060 	struct spdk_nvmf_subsystem_listener *listener = ctx;
1061 
1062 	if (status) {
1063 		listener->cb_fn(listener->cb_arg, status);
1064 		free(listener);
1065 		return;
1066 	}
1067 
1068 	TAILQ_INSERT_HEAD(&listener->subsystem->listeners, listener, link);
1069 	nvmf_update_discovery_log(listener->subsystem->tgt, NULL);
1070 	listener->cb_fn(listener->cb_arg, status);
1071 }
1072 
1073 void
1074 spdk_nvmf_subsystem_add_listener(struct spdk_nvmf_subsystem *subsystem,
1075 				 struct spdk_nvme_transport_id *trid,
1076 				 spdk_nvmf_tgt_subsystem_listen_done_fn cb_fn,
1077 				 void *cb_arg)
1078 {
1079 	struct spdk_nvmf_transport *transport;
1080 	struct spdk_nvmf_subsystem_listener *listener;
1081 	struct spdk_nvmf_listener *tr_listener;
1082 	uint32_t i;
1083 	uint32_t id;
1084 	int rc = 0;
1085 
1086 	assert(cb_fn != NULL);
1087 
1088 	if (!(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE ||
1089 	      subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED)) {
1090 		cb_fn(cb_arg, -EAGAIN);
1091 		return;
1092 	}
1093 
1094 	if (nvmf_subsystem_find_listener(subsystem, trid)) {
1095 		/* Listener already exists in this subsystem */
1096 		cb_fn(cb_arg, 0);
1097 		return;
1098 	}
1099 
1100 	transport = spdk_nvmf_tgt_get_transport(subsystem->tgt, trid->trstring);
1101 	if (!transport) {
1102 		SPDK_ERRLOG("Unable to find %s transport. The transport must be created first also make sure it is properly registered.\n",
1103 			    trid->trstring);
1104 		cb_fn(cb_arg, -EINVAL);
1105 		return;
1106 	}
1107 
1108 	tr_listener = nvmf_transport_find_listener(transport, trid);
1109 	if (!tr_listener) {
1110 		SPDK_ERRLOG("Cannot find transport listener for %s\n", trid->traddr);
1111 		cb_fn(cb_arg, -EINVAL);
1112 		return;
1113 	}
1114 
1115 	listener = calloc(1, sizeof(*listener));
1116 	if (!listener) {
1117 		cb_fn(cb_arg, -ENOMEM);
1118 		return;
1119 	}
1120 
1121 	listener->trid = &tr_listener->trid;
1122 	listener->transport = transport;
1123 	listener->cb_fn = cb_fn;
1124 	listener->cb_arg = cb_arg;
1125 	listener->subsystem = subsystem;
1126 	listener->ana_state = calloc(subsystem->max_nsid, sizeof(enum spdk_nvme_ana_state));
1127 	if (!listener->ana_state) {
1128 		free(listener);
1129 		cb_fn(cb_arg, -ENOMEM);
1130 		return;
1131 	}
1132 
1133 	id = spdk_bit_array_find_first_clear(subsystem->used_listener_ids, 0);
1134 	if (id == UINT32_MAX) {
1135 		SPDK_ERRLOG("Cannot add any more listeners\n");
1136 		free(listener->ana_state);
1137 		free(listener);
1138 		cb_fn(cb_arg, -EINVAL);
1139 		return;
1140 	}
1141 
1142 	spdk_bit_array_set(subsystem->used_listener_ids, id);
1143 	listener->id = id;
1144 
1145 	for (i = 0; i < subsystem->max_nsid; i++) {
1146 		listener->ana_state[i] = SPDK_NVME_ANA_OPTIMIZED_STATE;
1147 	}
1148 
1149 	if (transport->ops->listen_associate != NULL) {
1150 		rc = transport->ops->listen_associate(transport, subsystem, trid);
1151 	}
1152 
1153 	_nvmf_subsystem_add_listener_done(listener, rc);
1154 }
1155 
1156 int
1157 spdk_nvmf_subsystem_remove_listener(struct spdk_nvmf_subsystem *subsystem,
1158 				    const struct spdk_nvme_transport_id *trid)
1159 {
1160 	struct spdk_nvmf_subsystem_listener *listener;
1161 
1162 	if (!(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE ||
1163 	      subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED)) {
1164 		return -EAGAIN;
1165 	}
1166 
1167 	listener = nvmf_subsystem_find_listener(subsystem, trid);
1168 	if (listener == NULL) {
1169 		return -ENOENT;
1170 	}
1171 
1172 	_nvmf_subsystem_remove_listener(subsystem, listener, false);
1173 
1174 	return 0;
1175 }
1176 
1177 void
1178 nvmf_subsystem_remove_all_listeners(struct spdk_nvmf_subsystem *subsystem,
1179 				    bool stop)
1180 {
1181 	struct spdk_nvmf_subsystem_listener *listener, *listener_tmp;
1182 
1183 	TAILQ_FOREACH_SAFE(listener, &subsystem->listeners, link, listener_tmp) {
1184 		_nvmf_subsystem_remove_listener(subsystem, listener, stop);
1185 	}
1186 }
1187 
1188 bool
1189 spdk_nvmf_subsystem_listener_allowed(struct spdk_nvmf_subsystem *subsystem,
1190 				     const struct spdk_nvme_transport_id *trid)
1191 {
1192 	struct spdk_nvmf_subsystem_listener *listener;
1193 
1194 	if (!strcmp(subsystem->subnqn, SPDK_NVMF_DISCOVERY_NQN)) {
1195 		return true;
1196 	}
1197 
1198 	TAILQ_FOREACH(listener, &subsystem->listeners, link) {
1199 		if (spdk_nvme_transport_id_compare(listener->trid, trid) == 0) {
1200 			return true;
1201 		}
1202 	}
1203 
1204 	return false;
1205 }
1206 
1207 struct spdk_nvmf_subsystem_listener *
1208 spdk_nvmf_subsystem_get_first_listener(struct spdk_nvmf_subsystem *subsystem)
1209 {
1210 	return TAILQ_FIRST(&subsystem->listeners);
1211 }
1212 
1213 struct spdk_nvmf_subsystem_listener *
1214 spdk_nvmf_subsystem_get_next_listener(struct spdk_nvmf_subsystem *subsystem,
1215 				      struct spdk_nvmf_subsystem_listener *prev_listener)
1216 {
1217 	return TAILQ_NEXT(prev_listener, link);
1218 }
1219 
1220 const struct spdk_nvme_transport_id *
1221 spdk_nvmf_subsystem_listener_get_trid(struct spdk_nvmf_subsystem_listener *listener)
1222 {
1223 	return listener->trid;
1224 }
1225 
1226 void
1227 spdk_nvmf_subsystem_allow_any_listener(struct spdk_nvmf_subsystem *subsystem,
1228 				       bool allow_any_listener)
1229 {
1230 	subsystem->flags.allow_any_listener = allow_any_listener;
1231 }
1232 
1233 bool
1234 spdk_nvmf_subsytem_any_listener_allowed(struct spdk_nvmf_subsystem *subsystem)
1235 {
1236 	return subsystem->flags.allow_any_listener;
1237 }
1238 
1239 
1240 struct subsystem_update_ns_ctx {
1241 	struct spdk_nvmf_subsystem *subsystem;
1242 
1243 	spdk_nvmf_subsystem_state_change_done cb_fn;
1244 	void *cb_arg;
1245 };
1246 
1247 static void
1248 subsystem_update_ns_done(struct spdk_io_channel_iter *i, int status)
1249 {
1250 	struct subsystem_update_ns_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
1251 
1252 	if (ctx->cb_fn) {
1253 		ctx->cb_fn(ctx->subsystem, ctx->cb_arg, status);
1254 	}
1255 	free(ctx);
1256 }
1257 
1258 static void
1259 subsystem_update_ns_on_pg(struct spdk_io_channel_iter *i)
1260 {
1261 	int rc;
1262 	struct subsystem_update_ns_ctx *ctx;
1263 	struct spdk_nvmf_poll_group *group;
1264 	struct spdk_nvmf_subsystem *subsystem;
1265 
1266 	ctx = spdk_io_channel_iter_get_ctx(i);
1267 	group = spdk_io_channel_get_ctx(spdk_io_channel_iter_get_channel(i));
1268 	subsystem = ctx->subsystem;
1269 
1270 	rc = nvmf_poll_group_update_subsystem(group, subsystem);
1271 	spdk_for_each_channel_continue(i, rc);
1272 }
1273 
1274 static int
1275 nvmf_subsystem_update_ns(struct spdk_nvmf_subsystem *subsystem, spdk_channel_for_each_cpl cpl,
1276 			 void *ctx)
1277 {
1278 	spdk_for_each_channel(subsystem->tgt,
1279 			      subsystem_update_ns_on_pg,
1280 			      ctx,
1281 			      cpl);
1282 
1283 	return 0;
1284 }
1285 
1286 static void
1287 nvmf_subsystem_ns_changed(struct spdk_nvmf_subsystem *subsystem, uint32_t nsid)
1288 {
1289 	struct spdk_nvmf_ctrlr *ctrlr;
1290 
1291 	TAILQ_FOREACH(ctrlr, &subsystem->ctrlrs, link) {
1292 		nvmf_ctrlr_ns_changed(ctrlr, nsid);
1293 	}
1294 }
1295 
1296 static uint32_t
1297 nvmf_ns_reservation_clear_all_registrants(struct spdk_nvmf_ns *ns);
1298 
1299 int
1300 spdk_nvmf_subsystem_remove_ns(struct spdk_nvmf_subsystem *subsystem, uint32_t nsid)
1301 {
1302 	struct spdk_nvmf_transport *transport;
1303 	struct spdk_nvmf_ns *ns;
1304 
1305 	if (!(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE ||
1306 	      subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED)) {
1307 		assert(false);
1308 		return -1;
1309 	}
1310 
1311 	if (nsid == 0 || nsid > subsystem->max_nsid) {
1312 		return -1;
1313 	}
1314 
1315 	ns = subsystem->ns[nsid - 1];
1316 	if (!ns) {
1317 		return -1;
1318 	}
1319 
1320 	subsystem->ns[nsid - 1] = NULL;
1321 
1322 	assert(ns->anagrpid - 1 < subsystem->max_nsid);
1323 	assert(subsystem->ana_group[ns->anagrpid - 1] > 0);
1324 
1325 	subsystem->ana_group[ns->anagrpid - 1]--;
1326 
1327 	free(ns->ptpl_file);
1328 	nvmf_ns_reservation_clear_all_registrants(ns);
1329 	spdk_bdev_module_release_bdev(ns->bdev);
1330 	spdk_bdev_close(ns->desc);
1331 	free(ns);
1332 
1333 	for (transport = spdk_nvmf_transport_get_first(subsystem->tgt); transport;
1334 	     transport = spdk_nvmf_transport_get_next(transport)) {
1335 		if (transport->ops->subsystem_remove_ns) {
1336 			transport->ops->subsystem_remove_ns(transport, subsystem, nsid);
1337 		}
1338 	}
1339 
1340 	nvmf_subsystem_ns_changed(subsystem, nsid);
1341 
1342 	return 0;
1343 }
1344 
1345 struct subsystem_ns_change_ctx {
1346 	struct spdk_nvmf_subsystem		*subsystem;
1347 	spdk_nvmf_subsystem_state_change_done	cb_fn;
1348 	uint32_t				nsid;
1349 };
1350 
1351 static void
1352 _nvmf_ns_hot_remove(struct spdk_nvmf_subsystem *subsystem,
1353 		    void *cb_arg, int status)
1354 {
1355 	struct subsystem_ns_change_ctx *ctx = cb_arg;
1356 	int rc;
1357 
1358 	rc = spdk_nvmf_subsystem_remove_ns(subsystem, ctx->nsid);
1359 	if (rc != 0) {
1360 		SPDK_ERRLOG("Failed to make changes to NVME-oF subsystem with id: %u\n", subsystem->id);
1361 	}
1362 
1363 	spdk_nvmf_subsystem_resume(subsystem, NULL, NULL);
1364 
1365 	free(ctx);
1366 }
1367 
1368 static void
1369 nvmf_ns_change_msg(void *ns_ctx)
1370 {
1371 	struct subsystem_ns_change_ctx *ctx = ns_ctx;
1372 	int rc;
1373 
1374 	rc = spdk_nvmf_subsystem_pause(ctx->subsystem, ctx->nsid, ctx->cb_fn, ctx);
1375 	if (rc) {
1376 		if (rc == -EBUSY) {
1377 			/* Try again, this is not a permanent situation. */
1378 			spdk_thread_send_msg(spdk_get_thread(), nvmf_ns_change_msg, ctx);
1379 		} else {
1380 			free(ctx);
1381 			SPDK_ERRLOG("Unable to pause subsystem to process namespace removal!\n");
1382 		}
1383 	}
1384 }
1385 
1386 static void
1387 nvmf_ns_hot_remove(void *remove_ctx)
1388 {
1389 	struct spdk_nvmf_ns *ns = remove_ctx;
1390 	struct subsystem_ns_change_ctx *ns_ctx;
1391 	int rc;
1392 
1393 	/* We have to allocate a new context because this op
1394 	 * is asynchronous and we could lose the ns in the middle.
1395 	 */
1396 	ns_ctx = calloc(1, sizeof(struct subsystem_ns_change_ctx));
1397 	if (!ns_ctx) {
1398 		SPDK_ERRLOG("Unable to allocate context to process namespace removal!\n");
1399 		return;
1400 	}
1401 
1402 	ns_ctx->subsystem = ns->subsystem;
1403 	ns_ctx->nsid = ns->opts.nsid;
1404 	ns_ctx->cb_fn = _nvmf_ns_hot_remove;
1405 
1406 	rc = spdk_nvmf_subsystem_pause(ns->subsystem, ns_ctx->nsid, _nvmf_ns_hot_remove, ns_ctx);
1407 	if (rc) {
1408 		if (rc == -EBUSY) {
1409 			/* Try again, this is not a permanent situation. */
1410 			spdk_thread_send_msg(spdk_get_thread(), nvmf_ns_change_msg, ns_ctx);
1411 		} else {
1412 			SPDK_ERRLOG("Unable to pause subsystem to process namespace removal!\n");
1413 			free(ns_ctx);
1414 		}
1415 	}
1416 }
1417 
1418 static void
1419 _nvmf_ns_resize(struct spdk_nvmf_subsystem *subsystem, void *cb_arg, int status)
1420 {
1421 	struct subsystem_ns_change_ctx *ctx = cb_arg;
1422 
1423 	nvmf_subsystem_ns_changed(subsystem, ctx->nsid);
1424 	spdk_nvmf_subsystem_resume(subsystem, NULL, NULL);
1425 
1426 	free(ctx);
1427 }
1428 
1429 static void
1430 nvmf_ns_resize(void *event_ctx)
1431 {
1432 	struct spdk_nvmf_ns *ns = event_ctx;
1433 	struct subsystem_ns_change_ctx *ns_ctx;
1434 	int rc;
1435 
1436 	/* We have to allocate a new context because this op
1437 	 * is asynchronous and we could lose the ns in the middle.
1438 	 */
1439 	ns_ctx = calloc(1, sizeof(struct subsystem_ns_change_ctx));
1440 	if (!ns_ctx) {
1441 		SPDK_ERRLOG("Unable to allocate context to process namespace removal!\n");
1442 		return;
1443 	}
1444 
1445 	ns_ctx->subsystem = ns->subsystem;
1446 	ns_ctx->nsid = ns->opts.nsid;
1447 	ns_ctx->cb_fn = _nvmf_ns_resize;
1448 
1449 	/* Specify 0 for the nsid here, because we do not need to pause the namespace.
1450 	 * Namespaces can only be resized bigger, so there is no need to quiesce I/O.
1451 	 */
1452 	rc = spdk_nvmf_subsystem_pause(ns->subsystem, 0, _nvmf_ns_resize, ns_ctx);
1453 	if (rc) {
1454 		if (rc == -EBUSY) {
1455 			/* Try again, this is not a permanent situation. */
1456 			spdk_thread_send_msg(spdk_get_thread(), nvmf_ns_change_msg, ns_ctx);
1457 		} else {
1458 			SPDK_ERRLOG("Unable to pause subsystem to process namespace resize!\n");
1459 			free(ns_ctx);
1460 		}
1461 	}
1462 }
1463 
1464 static void
1465 nvmf_ns_event(enum spdk_bdev_event_type type,
1466 	      struct spdk_bdev *bdev,
1467 	      void *event_ctx)
1468 {
1469 	SPDK_DEBUGLOG(nvmf, "Bdev event: type %d, name %s, subsystem_id %d, ns_id %d\n",
1470 		      type,
1471 		      spdk_bdev_get_name(bdev),
1472 		      ((struct spdk_nvmf_ns *)event_ctx)->subsystem->id,
1473 		      ((struct spdk_nvmf_ns *)event_ctx)->nsid);
1474 
1475 	switch (type) {
1476 	case SPDK_BDEV_EVENT_REMOVE:
1477 		nvmf_ns_hot_remove(event_ctx);
1478 		break;
1479 	case SPDK_BDEV_EVENT_RESIZE:
1480 		nvmf_ns_resize(event_ctx);
1481 		break;
1482 	default:
1483 		SPDK_NOTICELOG("Unsupported bdev event: type %d\n", type);
1484 		break;
1485 	}
1486 }
1487 
1488 void
1489 spdk_nvmf_ns_opts_get_defaults(struct spdk_nvmf_ns_opts *opts, size_t opts_size)
1490 {
1491 	if (!opts) {
1492 		SPDK_ERRLOG("opts should not be NULL.\n");
1493 		return;
1494 	}
1495 
1496 	if (!opts_size) {
1497 		SPDK_ERRLOG("opts_size should not be zero.\n");
1498 		return;
1499 	}
1500 
1501 	memset(opts, 0, opts_size);
1502 	opts->opts_size = opts_size;
1503 
1504 #define FIELD_OK(field) \
1505 	offsetof(struct spdk_nvmf_ns_opts, field) + sizeof(opts->field) <= opts_size
1506 
1507 #define SET_FIELD(field, value) \
1508 	if (FIELD_OK(field)) { \
1509 		opts->field = value; \
1510 	} \
1511 
1512 	/* All current fields are set to 0 by default. */
1513 	SET_FIELD(nsid, 0);
1514 	if (FIELD_OK(nguid)) {
1515 		memset(opts->nguid, 0, sizeof(opts->nguid));
1516 	}
1517 	if (FIELD_OK(eui64)) {
1518 		memset(opts->eui64, 0, sizeof(opts->eui64));
1519 	}
1520 	if (FIELD_OK(uuid)) {
1521 		memset(&opts->uuid, 0, sizeof(opts->uuid));
1522 	}
1523 	SET_FIELD(anagrpid, 0);
1524 
1525 #undef FIELD_OK
1526 #undef SET_FIELD
1527 }
1528 
1529 static void
1530 nvmf_ns_opts_copy(struct spdk_nvmf_ns_opts *opts,
1531 		  const struct spdk_nvmf_ns_opts *user_opts,
1532 		  size_t opts_size)
1533 {
1534 #define FIELD_OK(field)	\
1535 	offsetof(struct spdk_nvmf_ns_opts, field) + sizeof(opts->field) <= user_opts->opts_size
1536 
1537 #define SET_FIELD(field) \
1538 	if (FIELD_OK(field)) { \
1539 		opts->field = user_opts->field;	\
1540 	} \
1541 
1542 	SET_FIELD(nsid);
1543 	if (FIELD_OK(nguid)) {
1544 		memcpy(opts->nguid, user_opts->nguid, sizeof(opts->nguid));
1545 	}
1546 	if (FIELD_OK(eui64)) {
1547 		memcpy(opts->eui64, user_opts->eui64, sizeof(opts->eui64));
1548 	}
1549 	if (FIELD_OK(uuid)) {
1550 		memcpy(&opts->uuid, &user_opts->uuid, sizeof(opts->uuid));
1551 	}
1552 	SET_FIELD(anagrpid);
1553 
1554 	opts->opts_size = user_opts->opts_size;
1555 
1556 	/* We should not remove this statement, but need to update the assert statement
1557 	 * if we add a new field, and also add a corresponding SET_FIELD statement.
1558 	 */
1559 	SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_ns_opts) == 64, "Incorrect size");
1560 
1561 #undef FIELD_OK
1562 #undef SET_FIELD
1563 }
1564 
1565 /* Dummy bdev module used to to claim bdevs. */
1566 static struct spdk_bdev_module ns_bdev_module = {
1567 	.name	= "NVMe-oF Target",
1568 };
1569 
1570 static int
1571 nvmf_ns_load_reservation(const char *file, struct spdk_nvmf_reservation_info *info);
1572 static int
1573 nvmf_ns_reservation_restore(struct spdk_nvmf_ns *ns, struct spdk_nvmf_reservation_info *info);
1574 
1575 uint32_t
1576 spdk_nvmf_subsystem_add_ns_ext(struct spdk_nvmf_subsystem *subsystem, const char *bdev_name,
1577 			       const struct spdk_nvmf_ns_opts *user_opts, size_t opts_size,
1578 			       const char *ptpl_file)
1579 {
1580 	struct spdk_nvmf_transport *transport;
1581 	struct spdk_nvmf_ns_opts opts;
1582 	struct spdk_nvmf_ns *ns;
1583 	struct spdk_nvmf_reservation_info info = {0};
1584 	int rc;
1585 
1586 	if (!(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE ||
1587 	      subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED)) {
1588 		return 0;
1589 	}
1590 
1591 	spdk_nvmf_ns_opts_get_defaults(&opts, sizeof(opts));
1592 	if (user_opts) {
1593 		nvmf_ns_opts_copy(&opts, user_opts, opts_size);
1594 	}
1595 
1596 	if (opts.nsid == SPDK_NVME_GLOBAL_NS_TAG) {
1597 		SPDK_ERRLOG("Invalid NSID %" PRIu32 "\n", opts.nsid);
1598 		return 0;
1599 	}
1600 
1601 	if (opts.nsid == 0) {
1602 		/*
1603 		 * NSID not specified - find a free index.
1604 		 *
1605 		 * If no free slots are found, opts.nsid will be subsystem->max_nsid + 1, which will
1606 		 * expand max_nsid if possible.
1607 		 */
1608 		for (opts.nsid = 1; opts.nsid <= subsystem->max_nsid; opts.nsid++) {
1609 			if (_nvmf_subsystem_get_ns(subsystem, opts.nsid) == NULL) {
1610 				break;
1611 			}
1612 		}
1613 	}
1614 
1615 	if (_nvmf_subsystem_get_ns(subsystem, opts.nsid)) {
1616 		SPDK_ERRLOG("Requested NSID %" PRIu32 " already in use\n", opts.nsid);
1617 		return 0;
1618 	}
1619 
1620 	if (opts.nsid > subsystem->max_nsid) {
1621 		SPDK_ERRLOG("NSID greater than maximum not allowed\n");
1622 		return 0;
1623 	}
1624 
1625 	if (opts.anagrpid == 0) {
1626 		opts.anagrpid = opts.nsid;
1627 	}
1628 
1629 	if (opts.anagrpid > subsystem->max_nsid) {
1630 		SPDK_ERRLOG("ANAGRPID greater than maximum NSID not allowed\n");
1631 		return 0;
1632 	}
1633 
1634 	ns = calloc(1, sizeof(*ns));
1635 	if (ns == NULL) {
1636 		SPDK_ERRLOG("Namespace allocation failed\n");
1637 		return 0;
1638 	}
1639 
1640 	rc = spdk_bdev_open_ext(bdev_name, true, nvmf_ns_event, ns, &ns->desc);
1641 	if (rc != 0) {
1642 		SPDK_ERRLOG("Subsystem %s: bdev %s cannot be opened, error=%d\n",
1643 			    subsystem->subnqn, bdev_name, rc);
1644 		free(ns);
1645 		return 0;
1646 	}
1647 
1648 	ns->bdev = spdk_bdev_desc_get_bdev(ns->desc);
1649 
1650 	if (spdk_bdev_get_md_size(ns->bdev) != 0 && !spdk_bdev_is_md_interleaved(ns->bdev)) {
1651 		SPDK_ERRLOG("Can't attach bdev with separate metadata.\n");
1652 		spdk_bdev_close(ns->desc);
1653 		free(ns);
1654 		return 0;
1655 	}
1656 
1657 	rc = spdk_bdev_module_claim_bdev(ns->bdev, ns->desc, &ns_bdev_module);
1658 	if (rc != 0) {
1659 		spdk_bdev_close(ns->desc);
1660 		free(ns);
1661 		return 0;
1662 	}
1663 
1664 	/* Cache the zcopy capability of the bdev device */
1665 	ns->zcopy = spdk_bdev_io_type_supported(ns->bdev, SPDK_BDEV_IO_TYPE_ZCOPY);
1666 
1667 	if (spdk_mem_all_zero(&opts.uuid, sizeof(opts.uuid))) {
1668 		opts.uuid = *spdk_bdev_get_uuid(ns->bdev);
1669 	}
1670 
1671 	/* if nguid descriptor is supported by bdev module (nvme) then uuid = nguid */
1672 	if (spdk_mem_all_zero(opts.nguid, sizeof(opts.nguid))) {
1673 		SPDK_STATIC_ASSERT(sizeof(opts.nguid) == sizeof(opts.uuid), "size mismatch");
1674 		memcpy(opts.nguid, spdk_bdev_get_uuid(ns->bdev), sizeof(opts.nguid));
1675 	}
1676 
1677 	ns->opts = opts;
1678 	ns->subsystem = subsystem;
1679 	subsystem->ns[opts.nsid - 1] = ns;
1680 	ns->nsid = opts.nsid;
1681 	ns->anagrpid = opts.anagrpid;
1682 	subsystem->ana_group[ns->anagrpid - 1]++;
1683 	TAILQ_INIT(&ns->registrants);
1684 	if (ptpl_file) {
1685 		rc = nvmf_ns_load_reservation(ptpl_file, &info);
1686 		if (!rc) {
1687 			rc = nvmf_ns_reservation_restore(ns, &info);
1688 			if (rc) {
1689 				SPDK_ERRLOG("Subsystem restore reservation failed\n");
1690 				goto err_ns_reservation_restore;
1691 			}
1692 		}
1693 		ns->ptpl_file = strdup(ptpl_file);
1694 		if (!ns->ptpl_file) {
1695 			SPDK_ERRLOG("Namespace ns->ptpl_file allocation failed\n");
1696 			goto err_strdup;
1697 		}
1698 	}
1699 
1700 	for (transport = spdk_nvmf_transport_get_first(subsystem->tgt); transport;
1701 	     transport = spdk_nvmf_transport_get_next(transport)) {
1702 		if (transport->ops->subsystem_add_ns) {
1703 			rc = transport->ops->subsystem_add_ns(transport, subsystem, ns);
1704 			if (rc) {
1705 				SPDK_ERRLOG("Namespace attachment is not allowed by %s transport\n", transport->ops->name);
1706 				goto err_subsystem_add_ns;
1707 			}
1708 		}
1709 	}
1710 
1711 	SPDK_DEBUGLOG(nvmf, "Subsystem %s: bdev %s assigned nsid %" PRIu32 "\n",
1712 		      spdk_nvmf_subsystem_get_nqn(subsystem),
1713 		      bdev_name,
1714 		      opts.nsid);
1715 
1716 	nvmf_subsystem_ns_changed(subsystem, opts.nsid);
1717 
1718 	return opts.nsid;
1719 
1720 err_subsystem_add_ns:
1721 	free(ns->ptpl_file);
1722 err_strdup:
1723 	nvmf_ns_reservation_clear_all_registrants(ns);
1724 err_ns_reservation_restore:
1725 	subsystem->ns[opts.nsid - 1] = NULL;
1726 	spdk_bdev_module_release_bdev(ns->bdev);
1727 	spdk_bdev_close(ns->desc);
1728 	free(ns);
1729 	return 0;
1730 
1731 }
1732 
1733 static uint32_t
1734 nvmf_subsystem_get_next_allocated_nsid(struct spdk_nvmf_subsystem *subsystem,
1735 				       uint32_t prev_nsid)
1736 {
1737 	uint32_t nsid;
1738 
1739 	if (prev_nsid >= subsystem->max_nsid) {
1740 		return 0;
1741 	}
1742 
1743 	for (nsid = prev_nsid + 1; nsid <= subsystem->max_nsid; nsid++) {
1744 		if (subsystem->ns[nsid - 1]) {
1745 			return nsid;
1746 		}
1747 	}
1748 
1749 	return 0;
1750 }
1751 
1752 struct spdk_nvmf_ns *
1753 spdk_nvmf_subsystem_get_first_ns(struct spdk_nvmf_subsystem *subsystem)
1754 {
1755 	uint32_t first_nsid;
1756 
1757 	first_nsid = nvmf_subsystem_get_next_allocated_nsid(subsystem, 0);
1758 	return _nvmf_subsystem_get_ns(subsystem, first_nsid);
1759 }
1760 
1761 struct spdk_nvmf_ns *
1762 spdk_nvmf_subsystem_get_next_ns(struct spdk_nvmf_subsystem *subsystem,
1763 				struct spdk_nvmf_ns *prev_ns)
1764 {
1765 	uint32_t next_nsid;
1766 
1767 	next_nsid = nvmf_subsystem_get_next_allocated_nsid(subsystem, prev_ns->opts.nsid);
1768 	return _nvmf_subsystem_get_ns(subsystem, next_nsid);
1769 }
1770 
1771 struct spdk_nvmf_ns *
1772 spdk_nvmf_subsystem_get_ns(struct spdk_nvmf_subsystem *subsystem, uint32_t nsid)
1773 {
1774 	return _nvmf_subsystem_get_ns(subsystem, nsid);
1775 }
1776 
1777 uint32_t
1778 spdk_nvmf_ns_get_id(const struct spdk_nvmf_ns *ns)
1779 {
1780 	return ns->opts.nsid;
1781 }
1782 
1783 struct spdk_bdev *
1784 spdk_nvmf_ns_get_bdev(struct spdk_nvmf_ns *ns)
1785 {
1786 	return ns->bdev;
1787 }
1788 
1789 void
1790 spdk_nvmf_ns_get_opts(const struct spdk_nvmf_ns *ns, struct spdk_nvmf_ns_opts *opts,
1791 		      size_t opts_size)
1792 {
1793 	memset(opts, 0, opts_size);
1794 	memcpy(opts, &ns->opts, spdk_min(sizeof(ns->opts), opts_size));
1795 }
1796 
1797 const char *
1798 spdk_nvmf_subsystem_get_sn(const struct spdk_nvmf_subsystem *subsystem)
1799 {
1800 	return subsystem->sn;
1801 }
1802 
1803 int
1804 spdk_nvmf_subsystem_set_sn(struct spdk_nvmf_subsystem *subsystem, const char *sn)
1805 {
1806 	size_t len, max_len;
1807 
1808 	max_len = sizeof(subsystem->sn) - 1;
1809 	len = strlen(sn);
1810 	if (len > max_len) {
1811 		SPDK_DEBUGLOG(nvmf, "Invalid sn \"%s\": length %zu > max %zu\n",
1812 			      sn, len, max_len);
1813 		return -1;
1814 	}
1815 
1816 	if (!nvmf_valid_ascii_string(sn, len)) {
1817 		SPDK_DEBUGLOG(nvmf, "Non-ASCII sn\n");
1818 		SPDK_LOGDUMP(nvmf, "sn", sn, len);
1819 		return -1;
1820 	}
1821 
1822 	snprintf(subsystem->sn, sizeof(subsystem->sn), "%s", sn);
1823 
1824 	return 0;
1825 }
1826 
1827 const char *
1828 spdk_nvmf_subsystem_get_mn(const struct spdk_nvmf_subsystem *subsystem)
1829 {
1830 	return subsystem->mn;
1831 }
1832 
1833 int
1834 spdk_nvmf_subsystem_set_mn(struct spdk_nvmf_subsystem *subsystem, const char *mn)
1835 {
1836 	size_t len, max_len;
1837 
1838 	if (mn == NULL) {
1839 		mn = MODEL_NUMBER_DEFAULT;
1840 	}
1841 	max_len = sizeof(subsystem->mn) - 1;
1842 	len = strlen(mn);
1843 	if (len > max_len) {
1844 		SPDK_DEBUGLOG(nvmf, "Invalid mn \"%s\": length %zu > max %zu\n",
1845 			      mn, len, max_len);
1846 		return -1;
1847 	}
1848 
1849 	if (!nvmf_valid_ascii_string(mn, len)) {
1850 		SPDK_DEBUGLOG(nvmf, "Non-ASCII mn\n");
1851 		SPDK_LOGDUMP(nvmf, "mn", mn, len);
1852 		return -1;
1853 	}
1854 
1855 	snprintf(subsystem->mn, sizeof(subsystem->mn), "%s", mn);
1856 
1857 	return 0;
1858 }
1859 
1860 const char *
1861 spdk_nvmf_subsystem_get_nqn(const struct spdk_nvmf_subsystem *subsystem)
1862 {
1863 	return subsystem->subnqn;
1864 }
1865 
1866 enum spdk_nvmf_subtype spdk_nvmf_subsystem_get_type(struct spdk_nvmf_subsystem *subsystem)
1867 {
1868 	return subsystem->subtype;
1869 }
1870 
1871 uint32_t
1872 spdk_nvmf_subsystem_get_max_nsid(struct spdk_nvmf_subsystem *subsystem)
1873 {
1874 	return subsystem->max_nsid;
1875 }
1876 
1877 int
1878 nvmf_subsystem_set_cntlid_range(struct spdk_nvmf_subsystem *subsystem,
1879 				uint16_t min_cntlid, uint16_t max_cntlid)
1880 {
1881 	if (subsystem->state != SPDK_NVMF_SUBSYSTEM_INACTIVE) {
1882 		return -EAGAIN;
1883 	}
1884 
1885 	if (min_cntlid > max_cntlid) {
1886 		return -EINVAL;
1887 	}
1888 	/* The spec reserves cntlid values in the range FFF0h to FFFFh. */
1889 	if (min_cntlid < NVMF_MIN_CNTLID || min_cntlid > NVMF_MAX_CNTLID ||
1890 	    max_cntlid < NVMF_MIN_CNTLID || max_cntlid > NVMF_MAX_CNTLID) {
1891 		return -EINVAL;
1892 	}
1893 	subsystem->min_cntlid = min_cntlid;
1894 	subsystem->max_cntlid = max_cntlid;
1895 	if (subsystem->next_cntlid < min_cntlid || subsystem->next_cntlid > max_cntlid - 1) {
1896 		subsystem->next_cntlid = min_cntlid - 1;
1897 	}
1898 
1899 	return 0;
1900 }
1901 
1902 static uint16_t
1903 nvmf_subsystem_gen_cntlid(struct spdk_nvmf_subsystem *subsystem)
1904 {
1905 	int count;
1906 
1907 	/*
1908 	 * In the worst case, we might have to try all CNTLID values between min_cntlid and max_cntlid
1909 	 * before we find one that is unused (or find that all values are in use).
1910 	 */
1911 	for (count = 0; count < subsystem->max_cntlid - subsystem->min_cntlid + 1; count++) {
1912 		subsystem->next_cntlid++;
1913 		if (subsystem->next_cntlid > subsystem->max_cntlid) {
1914 			subsystem->next_cntlid = subsystem->min_cntlid;
1915 		}
1916 
1917 		/* Check if a controller with this cntlid currently exists. */
1918 		if (nvmf_subsystem_get_ctrlr(subsystem, subsystem->next_cntlid) == NULL) {
1919 			/* Found unused cntlid */
1920 			return subsystem->next_cntlid;
1921 		}
1922 	}
1923 
1924 	/* All valid cntlid values are in use. */
1925 	return 0xFFFF;
1926 }
1927 
1928 int
1929 nvmf_subsystem_add_ctrlr(struct spdk_nvmf_subsystem *subsystem, struct spdk_nvmf_ctrlr *ctrlr)
1930 {
1931 
1932 	if (ctrlr->dynamic_ctrlr) {
1933 		ctrlr->cntlid = nvmf_subsystem_gen_cntlid(subsystem);
1934 		if (ctrlr->cntlid == 0xFFFF) {
1935 			/* Unable to get a cntlid */
1936 			SPDK_ERRLOG("Reached max simultaneous ctrlrs\n");
1937 			return -EBUSY;
1938 		}
1939 	} else if (nvmf_subsystem_get_ctrlr(subsystem, ctrlr->cntlid) != NULL) {
1940 		SPDK_ERRLOG("Ctrlr with cntlid %u already exist\n", ctrlr->cntlid);
1941 		return -EEXIST;
1942 	}
1943 
1944 	TAILQ_INSERT_TAIL(&subsystem->ctrlrs, ctrlr, link);
1945 
1946 	return 0;
1947 }
1948 
1949 void
1950 nvmf_subsystem_remove_ctrlr(struct spdk_nvmf_subsystem *subsystem,
1951 			    struct spdk_nvmf_ctrlr *ctrlr)
1952 {
1953 	assert(spdk_get_thread() == subsystem->thread);
1954 	assert(subsystem == ctrlr->subsys);
1955 	SPDK_DEBUGLOG(nvmf, "remove ctrlr %p from subsys %p %s\n", ctrlr, subsystem, subsystem->subnqn);
1956 	TAILQ_REMOVE(&subsystem->ctrlrs, ctrlr, link);
1957 }
1958 
1959 struct spdk_nvmf_ctrlr *
1960 nvmf_subsystem_get_ctrlr(struct spdk_nvmf_subsystem *subsystem, uint16_t cntlid)
1961 {
1962 	struct spdk_nvmf_ctrlr *ctrlr;
1963 
1964 	TAILQ_FOREACH(ctrlr, &subsystem->ctrlrs, link) {
1965 		if (ctrlr->cntlid == cntlid) {
1966 			return ctrlr;
1967 		}
1968 	}
1969 
1970 	return NULL;
1971 }
1972 
1973 uint32_t
1974 spdk_nvmf_subsystem_get_max_namespaces(const struct spdk_nvmf_subsystem *subsystem)
1975 {
1976 	return subsystem->max_nsid;
1977 }
1978 
1979 uint16_t
1980 spdk_nvmf_subsystem_get_min_cntlid(const struct spdk_nvmf_subsystem *subsystem)
1981 {
1982 	return subsystem->min_cntlid;
1983 }
1984 
1985 uint16_t
1986 spdk_nvmf_subsystem_get_max_cntlid(const struct spdk_nvmf_subsystem *subsystem)
1987 {
1988 	return subsystem->max_cntlid;
1989 }
1990 
1991 struct _nvmf_ns_registrant {
1992 	uint64_t		rkey;
1993 	char			*host_uuid;
1994 };
1995 
1996 struct _nvmf_ns_registrants {
1997 	size_t				num_regs;
1998 	struct _nvmf_ns_registrant	reg[SPDK_NVMF_MAX_NUM_REGISTRANTS];
1999 };
2000 
2001 struct _nvmf_ns_reservation {
2002 	bool					ptpl_activated;
2003 	enum spdk_nvme_reservation_type		rtype;
2004 	uint64_t				crkey;
2005 	char					*bdev_uuid;
2006 	char					*holder_uuid;
2007 	struct _nvmf_ns_registrants		regs;
2008 };
2009 
2010 static const struct spdk_json_object_decoder nvmf_ns_pr_reg_decoders[] = {
2011 	{"rkey", offsetof(struct _nvmf_ns_registrant, rkey), spdk_json_decode_uint64},
2012 	{"host_uuid", offsetof(struct _nvmf_ns_registrant, host_uuid), spdk_json_decode_string},
2013 };
2014 
2015 static int
2016 nvmf_decode_ns_pr_reg(const struct spdk_json_val *val, void *out)
2017 {
2018 	struct _nvmf_ns_registrant *reg = out;
2019 
2020 	return spdk_json_decode_object(val, nvmf_ns_pr_reg_decoders,
2021 				       SPDK_COUNTOF(nvmf_ns_pr_reg_decoders), reg);
2022 }
2023 
2024 static int
2025 nvmf_decode_ns_pr_regs(const struct spdk_json_val *val, void *out)
2026 {
2027 	struct _nvmf_ns_registrants *regs = out;
2028 
2029 	return spdk_json_decode_array(val, nvmf_decode_ns_pr_reg, regs->reg,
2030 				      SPDK_NVMF_MAX_NUM_REGISTRANTS, &regs->num_regs,
2031 				      sizeof(struct _nvmf_ns_registrant));
2032 }
2033 
2034 static const struct spdk_json_object_decoder nvmf_ns_pr_decoders[] = {
2035 	{"ptpl", offsetof(struct _nvmf_ns_reservation, ptpl_activated), spdk_json_decode_bool, true},
2036 	{"rtype", offsetof(struct _nvmf_ns_reservation, rtype), spdk_json_decode_uint32, true},
2037 	{"crkey", offsetof(struct _nvmf_ns_reservation, crkey), spdk_json_decode_uint64, true},
2038 	{"bdev_uuid", offsetof(struct _nvmf_ns_reservation, bdev_uuid), spdk_json_decode_string},
2039 	{"holder_uuid", offsetof(struct _nvmf_ns_reservation, holder_uuid), spdk_json_decode_string, true},
2040 	{"registrants", offsetof(struct _nvmf_ns_reservation, regs), nvmf_decode_ns_pr_regs},
2041 };
2042 
2043 static int
2044 nvmf_ns_load_reservation(const char *file, struct spdk_nvmf_reservation_info *info)
2045 {
2046 	FILE *fd;
2047 	size_t json_size;
2048 	ssize_t values_cnt, rc;
2049 	void *json = NULL, *end;
2050 	struct spdk_json_val *values = NULL;
2051 	struct _nvmf_ns_reservation res = {};
2052 	uint32_t i;
2053 
2054 	fd = fopen(file, "r");
2055 	/* It's not an error if the file does not exist */
2056 	if (!fd) {
2057 		SPDK_NOTICELOG("File %s does not exist\n", file);
2058 		return -ENOENT;
2059 	}
2060 
2061 	/* Load all persist file contents into a local buffer */
2062 	json = spdk_posix_file_load(fd, &json_size);
2063 	fclose(fd);
2064 	if (!json) {
2065 		SPDK_ERRLOG("Load persit file %s failed\n", file);
2066 		return -ENOMEM;
2067 	}
2068 
2069 	rc = spdk_json_parse(json, json_size, NULL, 0, &end, 0);
2070 	if (rc < 0) {
2071 		SPDK_NOTICELOG("Parsing JSON configuration failed (%zd)\n", rc);
2072 		goto exit;
2073 	}
2074 
2075 	values_cnt = rc;
2076 	values = calloc(values_cnt, sizeof(struct spdk_json_val));
2077 	if (values == NULL) {
2078 		goto exit;
2079 	}
2080 
2081 	rc = spdk_json_parse(json, json_size, values, values_cnt, &end, 0);
2082 	if (rc != values_cnt) {
2083 		SPDK_ERRLOG("Parsing JSON configuration failed (%zd)\n", rc);
2084 		goto exit;
2085 	}
2086 
2087 	/* Decode json */
2088 	if (spdk_json_decode_object(values, nvmf_ns_pr_decoders,
2089 				    SPDK_COUNTOF(nvmf_ns_pr_decoders),
2090 				    &res)) {
2091 		SPDK_ERRLOG("Invalid objects in the persist file %s\n", file);
2092 		rc = -EINVAL;
2093 		goto exit;
2094 	}
2095 
2096 	if (res.regs.num_regs > SPDK_NVMF_MAX_NUM_REGISTRANTS) {
2097 		SPDK_ERRLOG("Can only support up to %u registrants\n", SPDK_NVMF_MAX_NUM_REGISTRANTS);
2098 		rc = -ERANGE;
2099 		goto exit;
2100 	}
2101 
2102 	rc = 0;
2103 	info->ptpl_activated = res.ptpl_activated;
2104 	info->rtype = res.rtype;
2105 	info->crkey = res.crkey;
2106 	snprintf(info->bdev_uuid, sizeof(info->bdev_uuid), "%s", res.bdev_uuid);
2107 	snprintf(info->holder_uuid, sizeof(info->holder_uuid), "%s", res.holder_uuid);
2108 	info->num_regs = res.regs.num_regs;
2109 	for (i = 0; i < res.regs.num_regs; i++) {
2110 		info->registrants[i].rkey = res.regs.reg[i].rkey;
2111 		snprintf(info->registrants[i].host_uuid, sizeof(info->registrants[i].host_uuid), "%s",
2112 			 res.regs.reg[i].host_uuid);
2113 	}
2114 
2115 exit:
2116 	free(json);
2117 	free(values);
2118 	free(res.bdev_uuid);
2119 	free(res.holder_uuid);
2120 	for (i = 0; i < res.regs.num_regs; i++) {
2121 		free(res.regs.reg[i].host_uuid);
2122 	}
2123 
2124 	return rc;
2125 }
2126 
2127 static bool
2128 nvmf_ns_reservation_all_registrants_type(struct spdk_nvmf_ns *ns);
2129 
2130 static int
2131 nvmf_ns_reservation_restore(struct spdk_nvmf_ns *ns, struct spdk_nvmf_reservation_info *info)
2132 {
2133 	uint32_t i;
2134 	struct spdk_nvmf_registrant *reg, *holder = NULL;
2135 	struct spdk_uuid bdev_uuid, holder_uuid;
2136 	bool rkey_flag = false;
2137 
2138 	SPDK_DEBUGLOG(nvmf, "NSID %u, PTPL %u, Number of registrants %u\n",
2139 		      ns->nsid, info->ptpl_activated, info->num_regs);
2140 
2141 	/* it's not an error */
2142 	if (!info->ptpl_activated || !info->num_regs) {
2143 		return 0;
2144 	}
2145 
2146 	/* Check info->crkey exist or not in info->registrants[i].rkey */
2147 	for (i = 0; i < info->num_regs; i++) {
2148 		if (info->crkey == info->registrants[i].rkey) {
2149 			rkey_flag = true;
2150 		}
2151 	}
2152 	if (!rkey_flag) {
2153 		return -EINVAL;
2154 	}
2155 
2156 	spdk_uuid_parse(&bdev_uuid, info->bdev_uuid);
2157 	if (spdk_uuid_compare(&bdev_uuid, spdk_bdev_get_uuid(ns->bdev))) {
2158 		SPDK_ERRLOG("Existing bdev UUID is not same with configuration file\n");
2159 		return -EINVAL;
2160 	}
2161 
2162 	ns->crkey = info->crkey;
2163 	ns->rtype = info->rtype;
2164 	ns->ptpl_activated = info->ptpl_activated;
2165 	spdk_uuid_parse(&holder_uuid, info->holder_uuid);
2166 
2167 	SPDK_DEBUGLOG(nvmf, "Bdev UUID %s\n", info->bdev_uuid);
2168 	if (info->rtype) {
2169 		SPDK_DEBUGLOG(nvmf, "Holder UUID %s, RTYPE %u, RKEY 0x%"PRIx64"\n",
2170 			      info->holder_uuid, info->rtype, info->crkey);
2171 	}
2172 
2173 	for (i = 0; i < info->num_regs; i++) {
2174 		reg = calloc(1, sizeof(*reg));
2175 		if (!reg) {
2176 			return -ENOMEM;
2177 		}
2178 		spdk_uuid_parse(&reg->hostid, info->registrants[i].host_uuid);
2179 		reg->rkey = info->registrants[i].rkey;
2180 		TAILQ_INSERT_TAIL(&ns->registrants, reg, link);
2181 		if (!spdk_uuid_compare(&holder_uuid, &reg->hostid)) {
2182 			holder = reg;
2183 		}
2184 		SPDK_DEBUGLOG(nvmf, "Registrant RKEY 0x%"PRIx64", Host UUID %s\n",
2185 			      info->registrants[i].rkey, info->registrants[i].host_uuid);
2186 	}
2187 
2188 	if (nvmf_ns_reservation_all_registrants_type(ns)) {
2189 		ns->holder = TAILQ_FIRST(&ns->registrants);
2190 	} else {
2191 		ns->holder = holder;
2192 	}
2193 
2194 	return 0;
2195 }
2196 
2197 static int
2198 nvmf_ns_json_write_cb(void *cb_ctx, const void *data, size_t size)
2199 {
2200 	char *file = cb_ctx;
2201 	size_t rc;
2202 	FILE *fd;
2203 
2204 	fd = fopen(file, "w");
2205 	if (!fd) {
2206 		SPDK_ERRLOG("Can't open file %s for write\n", file);
2207 		return -ENOENT;
2208 	}
2209 	rc = fwrite(data, 1, size, fd);
2210 	fclose(fd);
2211 
2212 	return rc == size ? 0 : -1;
2213 }
2214 
2215 static int
2216 nvmf_ns_reservation_update(const char *file, struct spdk_nvmf_reservation_info *info)
2217 {
2218 	struct spdk_json_write_ctx *w;
2219 	uint32_t i;
2220 	int rc = 0;
2221 
2222 	w = spdk_json_write_begin(nvmf_ns_json_write_cb, (void *)file, 0);
2223 	if (w == NULL) {
2224 		return -ENOMEM;
2225 	}
2226 	/* clear the configuration file */
2227 	if (!info->ptpl_activated) {
2228 		goto exit;
2229 	}
2230 
2231 	spdk_json_write_object_begin(w);
2232 	spdk_json_write_named_bool(w, "ptpl", info->ptpl_activated);
2233 	spdk_json_write_named_uint32(w, "rtype", info->rtype);
2234 	spdk_json_write_named_uint64(w, "crkey", info->crkey);
2235 	spdk_json_write_named_string(w, "bdev_uuid", info->bdev_uuid);
2236 	spdk_json_write_named_string(w, "holder_uuid", info->holder_uuid);
2237 
2238 	spdk_json_write_named_array_begin(w, "registrants");
2239 	for (i = 0; i < info->num_regs; i++) {
2240 		spdk_json_write_object_begin(w);
2241 		spdk_json_write_named_uint64(w, "rkey", info->registrants[i].rkey);
2242 		spdk_json_write_named_string(w, "host_uuid", info->registrants[i].host_uuid);
2243 		spdk_json_write_object_end(w);
2244 	}
2245 	spdk_json_write_array_end(w);
2246 	spdk_json_write_object_end(w);
2247 
2248 exit:
2249 	rc = spdk_json_write_end(w);
2250 	return rc;
2251 }
2252 
2253 static int
2254 nvmf_ns_update_reservation_info(struct spdk_nvmf_ns *ns)
2255 {
2256 	struct spdk_nvmf_reservation_info info;
2257 	struct spdk_nvmf_registrant *reg, *tmp;
2258 	uint32_t i = 0;
2259 
2260 	assert(ns != NULL);
2261 
2262 	if (!ns->bdev || !ns->ptpl_file) {
2263 		return 0;
2264 	}
2265 
2266 	memset(&info, 0, sizeof(info));
2267 	spdk_uuid_fmt_lower(info.bdev_uuid, sizeof(info.bdev_uuid), spdk_bdev_get_uuid(ns->bdev));
2268 
2269 	if (ns->rtype) {
2270 		info.rtype = ns->rtype;
2271 		info.crkey = ns->crkey;
2272 		if (!nvmf_ns_reservation_all_registrants_type(ns)) {
2273 			assert(ns->holder != NULL);
2274 			spdk_uuid_fmt_lower(info.holder_uuid, sizeof(info.holder_uuid), &ns->holder->hostid);
2275 		}
2276 	}
2277 
2278 	TAILQ_FOREACH_SAFE(reg, &ns->registrants, link, tmp) {
2279 		spdk_uuid_fmt_lower(info.registrants[i].host_uuid, sizeof(info.registrants[i].host_uuid),
2280 				    &reg->hostid);
2281 		info.registrants[i++].rkey = reg->rkey;
2282 	}
2283 
2284 	info.num_regs = i;
2285 	info.ptpl_activated = ns->ptpl_activated;
2286 
2287 	return nvmf_ns_reservation_update(ns->ptpl_file, &info);
2288 }
2289 
2290 static struct spdk_nvmf_registrant *
2291 nvmf_ns_reservation_get_registrant(struct spdk_nvmf_ns *ns,
2292 				   struct spdk_uuid *uuid)
2293 {
2294 	struct spdk_nvmf_registrant *reg, *tmp;
2295 
2296 	TAILQ_FOREACH_SAFE(reg, &ns->registrants, link, tmp) {
2297 		if (!spdk_uuid_compare(&reg->hostid, uuid)) {
2298 			return reg;
2299 		}
2300 	}
2301 
2302 	return NULL;
2303 }
2304 
2305 /* Generate reservation notice log to registered HostID controllers */
2306 static void
2307 nvmf_subsystem_gen_ctrlr_notification(struct spdk_nvmf_subsystem *subsystem,
2308 				      struct spdk_nvmf_ns *ns,
2309 				      struct spdk_uuid *hostid_list,
2310 				      uint32_t num_hostid,
2311 				      enum spdk_nvme_reservation_notification_log_page_type type)
2312 {
2313 	struct spdk_nvmf_ctrlr *ctrlr;
2314 	uint32_t i;
2315 
2316 	for (i = 0; i < num_hostid; i++) {
2317 		TAILQ_FOREACH(ctrlr, &subsystem->ctrlrs, link) {
2318 			if (!spdk_uuid_compare(&ctrlr->hostid, &hostid_list[i])) {
2319 				nvmf_ctrlr_reservation_notice_log(ctrlr, ns, type);
2320 			}
2321 		}
2322 	}
2323 }
2324 
2325 /* Get all registrants' hostid other than the controller who issued the command */
2326 static uint32_t
2327 nvmf_ns_reservation_get_all_other_hostid(struct spdk_nvmf_ns *ns,
2328 		struct spdk_uuid *hostid_list,
2329 		uint32_t max_num_hostid,
2330 		struct spdk_uuid *current_hostid)
2331 {
2332 	struct spdk_nvmf_registrant *reg, *tmp;
2333 	uint32_t num_hostid = 0;
2334 
2335 	TAILQ_FOREACH_SAFE(reg, &ns->registrants, link, tmp) {
2336 		if (spdk_uuid_compare(&reg->hostid, current_hostid)) {
2337 			if (num_hostid == max_num_hostid) {
2338 				assert(false);
2339 				return max_num_hostid;
2340 			}
2341 			hostid_list[num_hostid++] = reg->hostid;
2342 		}
2343 	}
2344 
2345 	return num_hostid;
2346 }
2347 
2348 /* Calculate the unregistered HostID list according to list
2349  * prior to execute preempt command and list after executing
2350  * preempt command.
2351  */
2352 static uint32_t
2353 nvmf_ns_reservation_get_unregistered_hostid(struct spdk_uuid *old_hostid_list,
2354 		uint32_t old_num_hostid,
2355 		struct spdk_uuid *remaining_hostid_list,
2356 		uint32_t remaining_num_hostid)
2357 {
2358 	struct spdk_uuid temp_hostid_list[SPDK_NVMF_MAX_NUM_REGISTRANTS];
2359 	uint32_t i, j, num_hostid = 0;
2360 	bool found;
2361 
2362 	if (!remaining_num_hostid) {
2363 		return old_num_hostid;
2364 	}
2365 
2366 	for (i = 0; i < old_num_hostid; i++) {
2367 		found = false;
2368 		for (j = 0; j < remaining_num_hostid; j++) {
2369 			if (!spdk_uuid_compare(&old_hostid_list[i], &remaining_hostid_list[j])) {
2370 				found = true;
2371 				break;
2372 			}
2373 		}
2374 		if (!found) {
2375 			spdk_uuid_copy(&temp_hostid_list[num_hostid++], &old_hostid_list[i]);
2376 		}
2377 	}
2378 
2379 	if (num_hostid) {
2380 		memcpy(old_hostid_list, temp_hostid_list, sizeof(struct spdk_uuid) * num_hostid);
2381 	}
2382 
2383 	return num_hostid;
2384 }
2385 
2386 /* current reservation type is all registrants or not */
2387 static bool
2388 nvmf_ns_reservation_all_registrants_type(struct spdk_nvmf_ns *ns)
2389 {
2390 	return (ns->rtype == SPDK_NVME_RESERVE_WRITE_EXCLUSIVE_ALL_REGS ||
2391 		ns->rtype == SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS_ALL_REGS);
2392 }
2393 
2394 /* current registrant is reservation holder or not */
2395 static bool
2396 nvmf_ns_reservation_registrant_is_holder(struct spdk_nvmf_ns *ns,
2397 		struct spdk_nvmf_registrant *reg)
2398 {
2399 	if (!reg) {
2400 		return false;
2401 	}
2402 
2403 	if (nvmf_ns_reservation_all_registrants_type(ns)) {
2404 		return true;
2405 	}
2406 
2407 	return (ns->holder == reg);
2408 }
2409 
2410 static int
2411 nvmf_ns_reservation_add_registrant(struct spdk_nvmf_ns *ns,
2412 				   struct spdk_nvmf_ctrlr *ctrlr,
2413 				   uint64_t nrkey)
2414 {
2415 	struct spdk_nvmf_registrant *reg;
2416 
2417 	reg = calloc(1, sizeof(*reg));
2418 	if (!reg) {
2419 		return -ENOMEM;
2420 	}
2421 
2422 	reg->rkey = nrkey;
2423 	/* set hostid for the registrant */
2424 	spdk_uuid_copy(&reg->hostid, &ctrlr->hostid);
2425 	TAILQ_INSERT_TAIL(&ns->registrants, reg, link);
2426 	ns->gen++;
2427 
2428 	return 0;
2429 }
2430 
2431 static void
2432 nvmf_ns_reservation_release_reservation(struct spdk_nvmf_ns *ns)
2433 {
2434 	ns->rtype = 0;
2435 	ns->crkey = 0;
2436 	ns->holder = NULL;
2437 }
2438 
2439 /* release the reservation if the last registrant was removed */
2440 static void
2441 nvmf_ns_reservation_check_release_on_remove_registrant(struct spdk_nvmf_ns *ns,
2442 		struct spdk_nvmf_registrant *reg)
2443 {
2444 	struct spdk_nvmf_registrant *next_reg;
2445 
2446 	/* no reservation holder */
2447 	if (!ns->holder) {
2448 		assert(ns->rtype == 0);
2449 		return;
2450 	}
2451 
2452 	next_reg = TAILQ_FIRST(&ns->registrants);
2453 	if (next_reg && nvmf_ns_reservation_all_registrants_type(ns)) {
2454 		/* the next valid registrant is the new holder now */
2455 		ns->holder = next_reg;
2456 	} else if (nvmf_ns_reservation_registrant_is_holder(ns, reg)) {
2457 		/* release the reservation */
2458 		nvmf_ns_reservation_release_reservation(ns);
2459 	}
2460 }
2461 
2462 static void
2463 nvmf_ns_reservation_remove_registrant(struct spdk_nvmf_ns *ns,
2464 				      struct spdk_nvmf_registrant *reg)
2465 {
2466 	TAILQ_REMOVE(&ns->registrants, reg, link);
2467 	nvmf_ns_reservation_check_release_on_remove_registrant(ns, reg);
2468 	free(reg);
2469 	ns->gen++;
2470 	return;
2471 }
2472 
2473 static uint32_t
2474 nvmf_ns_reservation_remove_registrants_by_key(struct spdk_nvmf_ns *ns,
2475 		uint64_t rkey)
2476 {
2477 	struct spdk_nvmf_registrant *reg, *tmp;
2478 	uint32_t count = 0;
2479 
2480 	TAILQ_FOREACH_SAFE(reg, &ns->registrants, link, tmp) {
2481 		if (reg->rkey == rkey) {
2482 			nvmf_ns_reservation_remove_registrant(ns, reg);
2483 			count++;
2484 		}
2485 	}
2486 	return count;
2487 }
2488 
2489 static uint32_t
2490 nvmf_ns_reservation_remove_all_other_registrants(struct spdk_nvmf_ns *ns,
2491 		struct spdk_nvmf_registrant *reg)
2492 {
2493 	struct spdk_nvmf_registrant *reg_tmp, *reg_tmp2;
2494 	uint32_t count = 0;
2495 
2496 	TAILQ_FOREACH_SAFE(reg_tmp, &ns->registrants, link, reg_tmp2) {
2497 		if (reg_tmp != reg) {
2498 			nvmf_ns_reservation_remove_registrant(ns, reg_tmp);
2499 			count++;
2500 		}
2501 	}
2502 	return count;
2503 }
2504 
2505 static uint32_t
2506 nvmf_ns_reservation_clear_all_registrants(struct spdk_nvmf_ns *ns)
2507 {
2508 	struct spdk_nvmf_registrant *reg, *reg_tmp;
2509 	uint32_t count = 0;
2510 
2511 	TAILQ_FOREACH_SAFE(reg, &ns->registrants, link, reg_tmp) {
2512 		nvmf_ns_reservation_remove_registrant(ns, reg);
2513 		count++;
2514 	}
2515 	return count;
2516 }
2517 
2518 static void
2519 nvmf_ns_reservation_acquire_reservation(struct spdk_nvmf_ns *ns, uint64_t rkey,
2520 					enum spdk_nvme_reservation_type rtype,
2521 					struct spdk_nvmf_registrant *holder)
2522 {
2523 	ns->rtype = rtype;
2524 	ns->crkey = rkey;
2525 	assert(ns->holder == NULL);
2526 	ns->holder = holder;
2527 }
2528 
2529 static bool
2530 nvmf_ns_reservation_register(struct spdk_nvmf_ns *ns,
2531 			     struct spdk_nvmf_ctrlr *ctrlr,
2532 			     struct spdk_nvmf_request *req)
2533 {
2534 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
2535 	uint8_t rrega, iekey, cptpl, rtype;
2536 	struct spdk_nvme_reservation_register_data key;
2537 	struct spdk_nvmf_registrant *reg;
2538 	uint8_t status = SPDK_NVME_SC_SUCCESS;
2539 	bool update_sgroup = false;
2540 	struct spdk_uuid hostid_list[SPDK_NVMF_MAX_NUM_REGISTRANTS];
2541 	uint32_t num_hostid = 0;
2542 	int rc;
2543 
2544 	rrega = cmd->cdw10_bits.resv_register.rrega;
2545 	iekey = cmd->cdw10_bits.resv_register.iekey;
2546 	cptpl = cmd->cdw10_bits.resv_register.cptpl;
2547 
2548 	if (req->data && req->length >= sizeof(key)) {
2549 		memcpy(&key, req->data, sizeof(key));
2550 	} else {
2551 		SPDK_ERRLOG("No key provided. Failing request.\n");
2552 		status = SPDK_NVME_SC_INVALID_FIELD;
2553 		goto exit;
2554 	}
2555 
2556 	SPDK_DEBUGLOG(nvmf, "REGISTER: RREGA %u, IEKEY %u, CPTPL %u, "
2557 		      "NRKEY 0x%"PRIx64", NRKEY 0x%"PRIx64"\n",
2558 		      rrega, iekey, cptpl, key.crkey, key.nrkey);
2559 
2560 	if (cptpl == SPDK_NVME_RESERVE_PTPL_CLEAR_POWER_ON) {
2561 		/* Ture to OFF state, and need to be updated in the configuration file */
2562 		if (ns->ptpl_activated) {
2563 			ns->ptpl_activated = 0;
2564 			update_sgroup = true;
2565 		}
2566 	} else if (cptpl == SPDK_NVME_RESERVE_PTPL_PERSIST_POWER_LOSS) {
2567 		if (ns->ptpl_file == NULL) {
2568 			status = SPDK_NVME_SC_INVALID_FIELD;
2569 			goto exit;
2570 		} else if (ns->ptpl_activated == 0) {
2571 			ns->ptpl_activated = 1;
2572 			update_sgroup = true;
2573 		}
2574 	}
2575 
2576 	/* current Host Identifier has registrant or not */
2577 	reg = nvmf_ns_reservation_get_registrant(ns, &ctrlr->hostid);
2578 
2579 	switch (rrega) {
2580 	case SPDK_NVME_RESERVE_REGISTER_KEY:
2581 		if (!reg) {
2582 			/* register new controller */
2583 			if (key.nrkey == 0) {
2584 				SPDK_ERRLOG("Can't register zeroed new key\n");
2585 				status = SPDK_NVME_SC_INVALID_FIELD;
2586 				goto exit;
2587 			}
2588 			rc = nvmf_ns_reservation_add_registrant(ns, ctrlr, key.nrkey);
2589 			if (rc < 0) {
2590 				status = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
2591 				goto exit;
2592 			}
2593 			update_sgroup = true;
2594 		} else {
2595 			/* register with same key is not an error */
2596 			if (reg->rkey != key.nrkey) {
2597 				SPDK_ERRLOG("The same host already register a "
2598 					    "key with 0x%"PRIx64"\n",
2599 					    reg->rkey);
2600 				status = SPDK_NVME_SC_RESERVATION_CONFLICT;
2601 				goto exit;
2602 			}
2603 		}
2604 		break;
2605 	case SPDK_NVME_RESERVE_UNREGISTER_KEY:
2606 		if (!reg || (!iekey && reg->rkey != key.crkey)) {
2607 			SPDK_ERRLOG("No registrant or current key doesn't match "
2608 				    "with existing registrant key\n");
2609 			status = SPDK_NVME_SC_RESERVATION_CONFLICT;
2610 			goto exit;
2611 		}
2612 
2613 		rtype = ns->rtype;
2614 		num_hostid = nvmf_ns_reservation_get_all_other_hostid(ns, hostid_list,
2615 				SPDK_NVMF_MAX_NUM_REGISTRANTS,
2616 				&ctrlr->hostid);
2617 
2618 		nvmf_ns_reservation_remove_registrant(ns, reg);
2619 
2620 		if (!ns->rtype && num_hostid && (rtype == SPDK_NVME_RESERVE_WRITE_EXCLUSIVE_REG_ONLY ||
2621 						 rtype == SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS_REG_ONLY)) {
2622 			nvmf_subsystem_gen_ctrlr_notification(ns->subsystem, ns,
2623 							      hostid_list,
2624 							      num_hostid,
2625 							      SPDK_NVME_RESERVATION_RELEASED);
2626 		}
2627 		update_sgroup = true;
2628 		break;
2629 	case SPDK_NVME_RESERVE_REPLACE_KEY:
2630 		if (key.nrkey == 0) {
2631 			SPDK_ERRLOG("Can't register zeroed new key\n");
2632 			status = SPDK_NVME_SC_INVALID_FIELD;
2633 			goto exit;
2634 		}
2635 		/* Registrant exists */
2636 		if (reg) {
2637 			if (!iekey && reg->rkey != key.crkey) {
2638 				SPDK_ERRLOG("Current key doesn't match "
2639 					    "existing registrant key\n");
2640 				status = SPDK_NVME_SC_RESERVATION_CONFLICT;
2641 				goto exit;
2642 			}
2643 			if (reg->rkey == key.nrkey) {
2644 				goto exit;
2645 			}
2646 			reg->rkey = key.nrkey;
2647 		} else if (iekey) { /* No registrant but IEKEY is set */
2648 			/* new registrant */
2649 			rc = nvmf_ns_reservation_add_registrant(ns, ctrlr, key.nrkey);
2650 			if (rc < 0) {
2651 				status = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
2652 				goto exit;
2653 			}
2654 		} else { /* No registrant */
2655 			SPDK_ERRLOG("No registrant\n");
2656 			status = SPDK_NVME_SC_RESERVATION_CONFLICT;
2657 			goto exit;
2658 
2659 		}
2660 		update_sgroup = true;
2661 		break;
2662 	default:
2663 		status = SPDK_NVME_SC_INVALID_FIELD;
2664 		goto exit;
2665 	}
2666 
2667 exit:
2668 	if (update_sgroup) {
2669 		rc = nvmf_ns_update_reservation_info(ns);
2670 		if (rc != 0) {
2671 			status = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
2672 		}
2673 	}
2674 	req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
2675 	req->rsp->nvme_cpl.status.sc = status;
2676 	return update_sgroup;
2677 }
2678 
2679 static bool
2680 nvmf_ns_reservation_acquire(struct spdk_nvmf_ns *ns,
2681 			    struct spdk_nvmf_ctrlr *ctrlr,
2682 			    struct spdk_nvmf_request *req)
2683 {
2684 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
2685 	uint8_t racqa, iekey, rtype;
2686 	struct spdk_nvme_reservation_acquire_data key;
2687 	struct spdk_nvmf_registrant *reg;
2688 	bool all_regs = false;
2689 	uint32_t count = 0;
2690 	bool update_sgroup = true;
2691 	struct spdk_uuid hostid_list[SPDK_NVMF_MAX_NUM_REGISTRANTS];
2692 	uint32_t num_hostid = 0;
2693 	struct spdk_uuid new_hostid_list[SPDK_NVMF_MAX_NUM_REGISTRANTS];
2694 	uint32_t new_num_hostid = 0;
2695 	bool reservation_released = false;
2696 	uint8_t status = SPDK_NVME_SC_SUCCESS;
2697 
2698 	racqa = cmd->cdw10_bits.resv_acquire.racqa;
2699 	iekey = cmd->cdw10_bits.resv_acquire.iekey;
2700 	rtype = cmd->cdw10_bits.resv_acquire.rtype;
2701 
2702 	if (req->data && req->length >= sizeof(key)) {
2703 		memcpy(&key, req->data, sizeof(key));
2704 	} else {
2705 		SPDK_ERRLOG("No key provided. Failing request.\n");
2706 		status = SPDK_NVME_SC_INVALID_FIELD;
2707 		goto exit;
2708 	}
2709 
2710 	SPDK_DEBUGLOG(nvmf, "ACQUIRE: RACQA %u, IEKEY %u, RTYPE %u, "
2711 		      "NRKEY 0x%"PRIx64", PRKEY 0x%"PRIx64"\n",
2712 		      racqa, iekey, rtype, key.crkey, key.prkey);
2713 
2714 	if (iekey || rtype > SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS_ALL_REGS) {
2715 		SPDK_ERRLOG("Ignore existing key field set to 1\n");
2716 		status = SPDK_NVME_SC_INVALID_FIELD;
2717 		update_sgroup = false;
2718 		goto exit;
2719 	}
2720 
2721 	reg = nvmf_ns_reservation_get_registrant(ns, &ctrlr->hostid);
2722 	/* must be registrant and CRKEY must match */
2723 	if (!reg || reg->rkey != key.crkey) {
2724 		SPDK_ERRLOG("No registrant or current key doesn't match "
2725 			    "with existing registrant key\n");
2726 		status = SPDK_NVME_SC_RESERVATION_CONFLICT;
2727 		update_sgroup = false;
2728 		goto exit;
2729 	}
2730 
2731 	all_regs = nvmf_ns_reservation_all_registrants_type(ns);
2732 
2733 	switch (racqa) {
2734 	case SPDK_NVME_RESERVE_ACQUIRE:
2735 		/* it's not an error for the holder to acquire same reservation type again */
2736 		if (nvmf_ns_reservation_registrant_is_holder(ns, reg) && ns->rtype == rtype) {
2737 			/* do nothing */
2738 			update_sgroup = false;
2739 		} else if (ns->holder == NULL) {
2740 			/* first time to acquire the reservation */
2741 			nvmf_ns_reservation_acquire_reservation(ns, key.crkey, rtype, reg);
2742 		} else {
2743 			SPDK_ERRLOG("Invalid rtype or current registrant is not holder\n");
2744 			status = SPDK_NVME_SC_RESERVATION_CONFLICT;
2745 			update_sgroup = false;
2746 			goto exit;
2747 		}
2748 		break;
2749 	case SPDK_NVME_RESERVE_PREEMPT:
2750 		/* no reservation holder */
2751 		if (!ns->holder) {
2752 			/* unregister with PRKEY */
2753 			nvmf_ns_reservation_remove_registrants_by_key(ns, key.prkey);
2754 			break;
2755 		}
2756 		num_hostid = nvmf_ns_reservation_get_all_other_hostid(ns, hostid_list,
2757 				SPDK_NVMF_MAX_NUM_REGISTRANTS,
2758 				&ctrlr->hostid);
2759 
2760 		/* only 1 reservation holder and reservation key is valid */
2761 		if (!all_regs) {
2762 			/* preempt itself */
2763 			if (nvmf_ns_reservation_registrant_is_holder(ns, reg) &&
2764 			    ns->crkey == key.prkey) {
2765 				ns->rtype = rtype;
2766 				reservation_released = true;
2767 				break;
2768 			}
2769 
2770 			if (ns->crkey == key.prkey) {
2771 				nvmf_ns_reservation_remove_registrant(ns, ns->holder);
2772 				nvmf_ns_reservation_acquire_reservation(ns, key.crkey, rtype, reg);
2773 				reservation_released = true;
2774 			} else if (key.prkey != 0) {
2775 				nvmf_ns_reservation_remove_registrants_by_key(ns, key.prkey);
2776 			} else {
2777 				/* PRKEY is zero */
2778 				SPDK_ERRLOG("Current PRKEY is zero\n");
2779 				status = SPDK_NVME_SC_RESERVATION_CONFLICT;
2780 				update_sgroup = false;
2781 				goto exit;
2782 			}
2783 		} else {
2784 			/* release all other registrants except for the current one */
2785 			if (key.prkey == 0) {
2786 				nvmf_ns_reservation_remove_all_other_registrants(ns, reg);
2787 				assert(ns->holder == reg);
2788 			} else {
2789 				count = nvmf_ns_reservation_remove_registrants_by_key(ns, key.prkey);
2790 				if (count == 0) {
2791 					SPDK_ERRLOG("PRKEY doesn't match any registrant\n");
2792 					status = SPDK_NVME_SC_RESERVATION_CONFLICT;
2793 					update_sgroup = false;
2794 					goto exit;
2795 				}
2796 			}
2797 		}
2798 		break;
2799 	default:
2800 		status = SPDK_NVME_SC_INVALID_FIELD;
2801 		update_sgroup = false;
2802 		break;
2803 	}
2804 
2805 exit:
2806 	if (update_sgroup && racqa == SPDK_NVME_RESERVE_PREEMPT) {
2807 		new_num_hostid = nvmf_ns_reservation_get_all_other_hostid(ns, new_hostid_list,
2808 				 SPDK_NVMF_MAX_NUM_REGISTRANTS,
2809 				 &ctrlr->hostid);
2810 		/* Preempt notification occurs on the unregistered controllers
2811 		 * other than the controller who issued the command.
2812 		 */
2813 		num_hostid = nvmf_ns_reservation_get_unregistered_hostid(hostid_list,
2814 				num_hostid,
2815 				new_hostid_list,
2816 				new_num_hostid);
2817 		if (num_hostid) {
2818 			nvmf_subsystem_gen_ctrlr_notification(ns->subsystem, ns,
2819 							      hostid_list,
2820 							      num_hostid,
2821 							      SPDK_NVME_REGISTRATION_PREEMPTED);
2822 
2823 		}
2824 		/* Reservation released notification occurs on the
2825 		 * controllers which are the remaining registrants other than
2826 		 * the controller who issued the command.
2827 		 */
2828 		if (reservation_released && new_num_hostid) {
2829 			nvmf_subsystem_gen_ctrlr_notification(ns->subsystem, ns,
2830 							      new_hostid_list,
2831 							      new_num_hostid,
2832 							      SPDK_NVME_RESERVATION_RELEASED);
2833 
2834 		}
2835 	}
2836 	if (update_sgroup && ns->ptpl_activated) {
2837 		if (nvmf_ns_update_reservation_info(ns)) {
2838 			status = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
2839 		}
2840 	}
2841 	req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
2842 	req->rsp->nvme_cpl.status.sc = status;
2843 	return update_sgroup;
2844 }
2845 
2846 static bool
2847 nvmf_ns_reservation_release(struct spdk_nvmf_ns *ns,
2848 			    struct spdk_nvmf_ctrlr *ctrlr,
2849 			    struct spdk_nvmf_request *req)
2850 {
2851 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
2852 	uint8_t rrela, iekey, rtype;
2853 	struct spdk_nvmf_registrant *reg;
2854 	uint64_t crkey;
2855 	uint8_t status = SPDK_NVME_SC_SUCCESS;
2856 	bool update_sgroup = true;
2857 	struct spdk_uuid hostid_list[SPDK_NVMF_MAX_NUM_REGISTRANTS];
2858 	uint32_t num_hostid = 0;
2859 
2860 	rrela = cmd->cdw10_bits.resv_release.rrela;
2861 	iekey = cmd->cdw10_bits.resv_release.iekey;
2862 	rtype = cmd->cdw10_bits.resv_release.rtype;
2863 
2864 	if (req->data && req->length >= sizeof(crkey)) {
2865 		memcpy(&crkey, req->data, sizeof(crkey));
2866 	} else {
2867 		SPDK_ERRLOG("No key provided. Failing request.\n");
2868 		status = SPDK_NVME_SC_INVALID_FIELD;
2869 		goto exit;
2870 	}
2871 
2872 	SPDK_DEBUGLOG(nvmf, "RELEASE: RRELA %u, IEKEY %u, RTYPE %u, "
2873 		      "CRKEY 0x%"PRIx64"\n",  rrela, iekey, rtype, crkey);
2874 
2875 	if (iekey) {
2876 		SPDK_ERRLOG("Ignore existing key field set to 1\n");
2877 		status = SPDK_NVME_SC_INVALID_FIELD;
2878 		update_sgroup = false;
2879 		goto exit;
2880 	}
2881 
2882 	reg = nvmf_ns_reservation_get_registrant(ns, &ctrlr->hostid);
2883 	if (!reg || reg->rkey != crkey) {
2884 		SPDK_ERRLOG("No registrant or current key doesn't match "
2885 			    "with existing registrant key\n");
2886 		status = SPDK_NVME_SC_RESERVATION_CONFLICT;
2887 		update_sgroup = false;
2888 		goto exit;
2889 	}
2890 
2891 	num_hostid = nvmf_ns_reservation_get_all_other_hostid(ns, hostid_list,
2892 			SPDK_NVMF_MAX_NUM_REGISTRANTS,
2893 			&ctrlr->hostid);
2894 
2895 	switch (rrela) {
2896 	case SPDK_NVME_RESERVE_RELEASE:
2897 		if (!ns->holder) {
2898 			SPDK_DEBUGLOG(nvmf, "RELEASE: no holder\n");
2899 			update_sgroup = false;
2900 			goto exit;
2901 		}
2902 		if (ns->rtype != rtype) {
2903 			SPDK_ERRLOG("Type doesn't match\n");
2904 			status = SPDK_NVME_SC_INVALID_FIELD;
2905 			update_sgroup = false;
2906 			goto exit;
2907 		}
2908 		if (!nvmf_ns_reservation_registrant_is_holder(ns, reg)) {
2909 			/* not the reservation holder, this isn't an error */
2910 			update_sgroup = false;
2911 			goto exit;
2912 		}
2913 
2914 		rtype = ns->rtype;
2915 		nvmf_ns_reservation_release_reservation(ns);
2916 
2917 		if (num_hostid && rtype != SPDK_NVME_RESERVE_WRITE_EXCLUSIVE &&
2918 		    rtype != SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS) {
2919 			nvmf_subsystem_gen_ctrlr_notification(ns->subsystem, ns,
2920 							      hostid_list,
2921 							      num_hostid,
2922 							      SPDK_NVME_RESERVATION_RELEASED);
2923 		}
2924 		break;
2925 	case SPDK_NVME_RESERVE_CLEAR:
2926 		nvmf_ns_reservation_clear_all_registrants(ns);
2927 		if (num_hostid) {
2928 			nvmf_subsystem_gen_ctrlr_notification(ns->subsystem, ns,
2929 							      hostid_list,
2930 							      num_hostid,
2931 							      SPDK_NVME_RESERVATION_PREEMPTED);
2932 		}
2933 		break;
2934 	default:
2935 		status = SPDK_NVME_SC_INVALID_FIELD;
2936 		update_sgroup = false;
2937 		goto exit;
2938 	}
2939 
2940 exit:
2941 	if (update_sgroup && ns->ptpl_activated) {
2942 		if (nvmf_ns_update_reservation_info(ns)) {
2943 			status = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
2944 		}
2945 	}
2946 	req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
2947 	req->rsp->nvme_cpl.status.sc = status;
2948 	return update_sgroup;
2949 }
2950 
2951 static void
2952 nvmf_ns_reservation_report(struct spdk_nvmf_ns *ns,
2953 			   struct spdk_nvmf_ctrlr *ctrlr,
2954 			   struct spdk_nvmf_request *req)
2955 {
2956 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
2957 	struct spdk_nvmf_registrant *reg, *tmp;
2958 	struct spdk_nvme_reservation_status_extended_data *status_data;
2959 	struct spdk_nvme_registered_ctrlr_extended_data *ctrlr_data;
2960 	uint8_t *payload;
2961 	uint32_t transfer_len, payload_len = 0;
2962 	uint32_t regctl = 0;
2963 	uint8_t status = SPDK_NVME_SC_SUCCESS;
2964 
2965 	if (req->data == NULL) {
2966 		SPDK_ERRLOG("No data transfer specified for request. "
2967 			    " Unable to transfer back response.\n");
2968 		status = SPDK_NVME_SC_INVALID_FIELD;
2969 		goto exit;
2970 	}
2971 
2972 	if (!cmd->cdw11_bits.resv_report.eds) {
2973 		SPDK_ERRLOG("NVMeoF uses extended controller data structure, "
2974 			    "please set EDS bit in cdw11 and try again\n");
2975 		status = SPDK_NVME_SC_HOSTID_INCONSISTENT_FORMAT;
2976 		goto exit;
2977 	}
2978 
2979 	/* Number of Dwords of the Reservation Status data structure to transfer */
2980 	transfer_len = (cmd->cdw10 + 1) * sizeof(uint32_t);
2981 	payload = req->data;
2982 
2983 	if (transfer_len < sizeof(struct spdk_nvme_reservation_status_extended_data)) {
2984 		status = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
2985 		goto exit;
2986 	}
2987 
2988 	status_data = (struct spdk_nvme_reservation_status_extended_data *)payload;
2989 	status_data->data.gen = ns->gen;
2990 	status_data->data.rtype = ns->rtype;
2991 	status_data->data.ptpls = ns->ptpl_activated;
2992 	payload_len += sizeof(struct spdk_nvme_reservation_status_extended_data);
2993 
2994 	TAILQ_FOREACH_SAFE(reg, &ns->registrants, link, tmp) {
2995 		payload_len += sizeof(struct spdk_nvme_registered_ctrlr_extended_data);
2996 		if (payload_len > transfer_len) {
2997 			break;
2998 		}
2999 
3000 		ctrlr_data = (struct spdk_nvme_registered_ctrlr_extended_data *)
3001 			     (payload + sizeof(*status_data) + sizeof(*ctrlr_data) * regctl);
3002 		/* Set to 0xffffh for dynamic controller */
3003 		ctrlr_data->cntlid = 0xffff;
3004 		ctrlr_data->rcsts.status = (ns->holder == reg) ? true : false;
3005 		ctrlr_data->rkey = reg->rkey;
3006 		spdk_uuid_copy((struct spdk_uuid *)ctrlr_data->hostid, &reg->hostid);
3007 		regctl++;
3008 	}
3009 	status_data->data.regctl = regctl;
3010 
3011 exit:
3012 	req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
3013 	req->rsp->nvme_cpl.status.sc = status;
3014 	return;
3015 }
3016 
3017 static void
3018 nvmf_ns_reservation_complete(void *ctx)
3019 {
3020 	struct spdk_nvmf_request *req = ctx;
3021 
3022 	spdk_nvmf_request_complete(req);
3023 }
3024 
3025 static void
3026 _nvmf_ns_reservation_update_done(struct spdk_nvmf_subsystem *subsystem,
3027 				 void *cb_arg, int status)
3028 {
3029 	struct spdk_nvmf_request *req = (struct spdk_nvmf_request *)cb_arg;
3030 	struct spdk_nvmf_poll_group *group = req->qpair->group;
3031 
3032 	spdk_thread_send_msg(group->thread, nvmf_ns_reservation_complete, req);
3033 }
3034 
3035 void
3036 nvmf_ns_reservation_request(void *ctx)
3037 {
3038 	struct spdk_nvmf_request *req = (struct spdk_nvmf_request *)ctx;
3039 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
3040 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
3041 	struct subsystem_update_ns_ctx *update_ctx;
3042 	uint32_t nsid;
3043 	struct spdk_nvmf_ns *ns;
3044 	bool update_sgroup = false;
3045 
3046 	nsid = cmd->nsid;
3047 	ns = _nvmf_subsystem_get_ns(ctrlr->subsys, nsid);
3048 	assert(ns != NULL);
3049 
3050 	switch (cmd->opc) {
3051 	case SPDK_NVME_OPC_RESERVATION_REGISTER:
3052 		update_sgroup = nvmf_ns_reservation_register(ns, ctrlr, req);
3053 		break;
3054 	case SPDK_NVME_OPC_RESERVATION_ACQUIRE:
3055 		update_sgroup = nvmf_ns_reservation_acquire(ns, ctrlr, req);
3056 		break;
3057 	case SPDK_NVME_OPC_RESERVATION_RELEASE:
3058 		update_sgroup = nvmf_ns_reservation_release(ns, ctrlr, req);
3059 		break;
3060 	case SPDK_NVME_OPC_RESERVATION_REPORT:
3061 		nvmf_ns_reservation_report(ns, ctrlr, req);
3062 		break;
3063 	default:
3064 		break;
3065 	}
3066 
3067 	/* update reservation information to subsystem's poll group */
3068 	if (update_sgroup) {
3069 		update_ctx = calloc(1, sizeof(*update_ctx));
3070 		if (update_ctx == NULL) {
3071 			SPDK_ERRLOG("Can't alloc subsystem poll group update context\n");
3072 			goto update_done;
3073 		}
3074 		update_ctx->subsystem = ctrlr->subsys;
3075 		update_ctx->cb_fn = _nvmf_ns_reservation_update_done;
3076 		update_ctx->cb_arg = req;
3077 
3078 		nvmf_subsystem_update_ns(ctrlr->subsys, subsystem_update_ns_done, update_ctx);
3079 		return;
3080 	}
3081 
3082 update_done:
3083 	_nvmf_ns_reservation_update_done(ctrlr->subsys, (void *)req, 0);
3084 }
3085 
3086 int
3087 spdk_nvmf_subsystem_set_ana_reporting(struct spdk_nvmf_subsystem *subsystem,
3088 				      bool ana_reporting)
3089 {
3090 	if (subsystem->state != SPDK_NVMF_SUBSYSTEM_INACTIVE) {
3091 		return -EAGAIN;
3092 	}
3093 
3094 	subsystem->flags.ana_reporting = ana_reporting;
3095 
3096 	return 0;
3097 }
3098 
3099 bool
3100 nvmf_subsystem_get_ana_reporting(struct spdk_nvmf_subsystem *subsystem)
3101 {
3102 	return subsystem->flags.ana_reporting;
3103 }
3104 
3105 struct subsystem_listener_update_ctx {
3106 	struct spdk_nvmf_subsystem_listener *listener;
3107 
3108 	spdk_nvmf_tgt_subsystem_listen_done_fn cb_fn;
3109 	void *cb_arg;
3110 };
3111 
3112 static void
3113 subsystem_listener_update_done(struct spdk_io_channel_iter *i, int status)
3114 {
3115 	struct subsystem_listener_update_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
3116 
3117 	if (ctx->cb_fn) {
3118 		ctx->cb_fn(ctx->cb_arg, status);
3119 	}
3120 	free(ctx);
3121 }
3122 
3123 static void
3124 subsystem_listener_update_on_pg(struct spdk_io_channel_iter *i)
3125 {
3126 	struct subsystem_listener_update_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
3127 	struct spdk_nvmf_subsystem_listener *listener;
3128 	struct spdk_nvmf_poll_group *group;
3129 	struct spdk_nvmf_ctrlr *ctrlr;
3130 
3131 	listener = ctx->listener;
3132 	group = spdk_io_channel_get_ctx(spdk_io_channel_iter_get_channel(i));
3133 
3134 	TAILQ_FOREACH(ctrlr, &listener->subsystem->ctrlrs, link) {
3135 		if (ctrlr->admin_qpair->group == group && ctrlr->listener == listener) {
3136 			nvmf_ctrlr_async_event_ana_change_notice(ctrlr);
3137 		}
3138 	}
3139 
3140 	spdk_for_each_channel_continue(i, 0);
3141 }
3142 
3143 void
3144 nvmf_subsystem_set_ana_state(struct spdk_nvmf_subsystem *subsystem,
3145 			     const struct spdk_nvme_transport_id *trid,
3146 			     enum spdk_nvme_ana_state ana_state, uint32_t anagrpid,
3147 			     spdk_nvmf_tgt_subsystem_listen_done_fn cb_fn, void *cb_arg)
3148 {
3149 	struct spdk_nvmf_subsystem_listener *listener;
3150 	struct subsystem_listener_update_ctx *ctx;
3151 	uint32_t i;
3152 
3153 	assert(cb_fn != NULL);
3154 	assert(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE ||
3155 	       subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED);
3156 
3157 	if (!subsystem->flags.ana_reporting) {
3158 		SPDK_ERRLOG("ANA reporting is disabled\n");
3159 		cb_fn(cb_arg, -EINVAL);
3160 		return;
3161 	}
3162 
3163 	/* ANA Change state is not used, ANA Persistent Loss state
3164 	 * is not supported yet.
3165 	 */
3166 	if (!(ana_state == SPDK_NVME_ANA_OPTIMIZED_STATE ||
3167 	      ana_state == SPDK_NVME_ANA_NON_OPTIMIZED_STATE ||
3168 	      ana_state == SPDK_NVME_ANA_INACCESSIBLE_STATE)) {
3169 		SPDK_ERRLOG("ANA state %d is not supported\n", ana_state);
3170 		cb_fn(cb_arg, -ENOTSUP);
3171 		return;
3172 	}
3173 
3174 	if (anagrpid > subsystem->max_nsid) {
3175 		SPDK_ERRLOG("ANA group ID %" PRIu32 " is more than maximum\n", anagrpid);
3176 		cb_fn(cb_arg, -EINVAL);
3177 		return;
3178 	}
3179 
3180 	listener = nvmf_subsystem_find_listener(subsystem, trid);
3181 	if (!listener) {
3182 		SPDK_ERRLOG("Unable to find listener.\n");
3183 		cb_fn(cb_arg, -EINVAL);
3184 		return;
3185 	}
3186 
3187 	if (anagrpid != 0 && listener->ana_state[anagrpid - 1] == ana_state) {
3188 		cb_fn(cb_arg, 0);
3189 		return;
3190 	}
3191 
3192 	ctx = calloc(1, sizeof(*ctx));
3193 	if (!ctx) {
3194 		SPDK_ERRLOG("Unable to allocate context\n");
3195 		cb_fn(cb_arg, -ENOMEM);
3196 		return;
3197 	}
3198 
3199 	for (i = 1; i <= subsystem->max_nsid; i++) {
3200 		if (anagrpid == 0 || i == anagrpid) {
3201 			listener->ana_state[i - 1] = ana_state;
3202 		}
3203 	}
3204 	listener->ana_state_change_count++;
3205 
3206 	ctx->listener = listener;
3207 	ctx->cb_fn = cb_fn;
3208 	ctx->cb_arg = cb_arg;
3209 
3210 	spdk_for_each_channel(subsystem->tgt,
3211 			      subsystem_listener_update_on_pg,
3212 			      ctx,
3213 			      subsystem_listener_update_done);
3214 }
3215