xref: /spdk/lib/nvmf/subsystem.c (revision 6d2caa652b778f85d1c3386310b95ed93527245d)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation. All rights reserved.
5  *   Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
6  *   Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
7  *
8  *   Redistribution and use in source and binary forms, with or without
9  *   modification, are permitted provided that the following conditions
10  *   are met:
11  *
12  *     * Redistributions of source code must retain the above copyright
13  *       notice, this list of conditions and the following disclaimer.
14  *     * Redistributions in binary form must reproduce the above copyright
15  *       notice, this list of conditions and the following disclaimer in
16  *       the documentation and/or other materials provided with the
17  *       distribution.
18  *     * Neither the name of Intel Corporation nor the names of its
19  *       contributors may be used to endorse or promote products derived
20  *       from this software without specific prior written permission.
21  *
22  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 #include "spdk/stdinc.h"
36 
37 #include "nvmf_internal.h"
38 #include "transport.h"
39 
40 #include "spdk/assert.h"
41 #include "spdk/likely.h"
42 #include "spdk/string.h"
43 #include "spdk/trace.h"
44 #include "spdk/nvmf_spec.h"
45 #include "spdk/uuid.h"
46 #include "spdk/json.h"
47 #include "spdk/file.h"
48 
49 #define __SPDK_BDEV_MODULE_ONLY
50 #include "spdk/bdev_module.h"
51 #include "spdk/log.h"
52 #include "spdk_internal/utf.h"
53 #include "spdk_internal/usdt.h"
54 
55 #define MODEL_NUMBER_DEFAULT "SPDK bdev Controller"
56 #define NVMF_SUBSYSTEM_DEFAULT_NAMESPACES 32
57 
58 /*
59  * States for parsing valid domains in NQNs according to RFC 1034
60  */
61 enum spdk_nvmf_nqn_domain_states {
62 	/* First character of a domain must be a letter */
63 	SPDK_NVMF_DOMAIN_ACCEPT_LETTER = 0,
64 
65 	/* Subsequent characters can be any of letter, digit, or hyphen */
66 	SPDK_NVMF_DOMAIN_ACCEPT_LDH = 1,
67 
68 	/* A domain label must end with either a letter or digit */
69 	SPDK_NVMF_DOMAIN_ACCEPT_ANY = 2
70 };
71 
72 static int _nvmf_subsystem_destroy(struct spdk_nvmf_subsystem *subsystem);
73 
74 /* Returns true if is a valid ASCII string as defined by the NVMe spec */
75 static bool
76 nvmf_valid_ascii_string(const void *buf, size_t size)
77 {
78 	const uint8_t *str = buf;
79 	size_t i;
80 
81 	for (i = 0; i < size; i++) {
82 		if (str[i] < 0x20 || str[i] > 0x7E) {
83 			return false;
84 		}
85 	}
86 
87 	return true;
88 }
89 
90 static bool
91 nvmf_valid_nqn(const char *nqn)
92 {
93 	size_t len;
94 	struct spdk_uuid uuid_value;
95 	uint32_t i;
96 	int bytes_consumed;
97 	uint32_t domain_label_length;
98 	char *reverse_domain_end;
99 	uint32_t reverse_domain_end_index;
100 	enum spdk_nvmf_nqn_domain_states domain_state = SPDK_NVMF_DOMAIN_ACCEPT_LETTER;
101 
102 	/* Check for length requirements */
103 	len = strlen(nqn);
104 	if (len > SPDK_NVMF_NQN_MAX_LEN) {
105 		SPDK_ERRLOG("Invalid NQN \"%s\": length %zu > max %d\n", nqn, len, SPDK_NVMF_NQN_MAX_LEN);
106 		return false;
107 	}
108 
109 	/* The nqn must be at least as long as SPDK_NVMF_NQN_MIN_LEN to contain the necessary prefix. */
110 	if (len < SPDK_NVMF_NQN_MIN_LEN) {
111 		SPDK_ERRLOG("Invalid NQN \"%s\": length %zu < min %d\n", nqn, len, SPDK_NVMF_NQN_MIN_LEN);
112 		return false;
113 	}
114 
115 	/* Check for discovery controller nqn */
116 	if (!strcmp(nqn, SPDK_NVMF_DISCOVERY_NQN)) {
117 		return true;
118 	}
119 
120 	/* Check for equality with the generic nqn structure of the form "nqn.2014-08.org.nvmexpress:uuid:11111111-2222-3333-4444-555555555555" */
121 	if (!strncmp(nqn, SPDK_NVMF_NQN_UUID_PRE, SPDK_NVMF_NQN_UUID_PRE_LEN)) {
122 		if (len != SPDK_NVMF_NQN_UUID_PRE_LEN + SPDK_NVMF_UUID_STRING_LEN) {
123 			SPDK_ERRLOG("Invalid NQN \"%s\": uuid is not the correct length\n", nqn);
124 			return false;
125 		}
126 
127 		if (spdk_uuid_parse(&uuid_value, &nqn[SPDK_NVMF_NQN_UUID_PRE_LEN])) {
128 			SPDK_ERRLOG("Invalid NQN \"%s\": uuid is not formatted correctly\n", nqn);
129 			return false;
130 		}
131 		return true;
132 	}
133 
134 	/* If the nqn does not match the uuid structure, the next several checks validate the form "nqn.yyyy-mm.reverse.domain:user-string" */
135 
136 	if (strncmp(nqn, "nqn.", 4) != 0) {
137 		SPDK_ERRLOG("Invalid NQN \"%s\": NQN must begin with \"nqn.\".\n", nqn);
138 		return false;
139 	}
140 
141 	/* Check for yyyy-mm. */
142 	if (!(isdigit(nqn[4]) && isdigit(nqn[5]) && isdigit(nqn[6]) && isdigit(nqn[7]) &&
143 	      nqn[8] == '-' && isdigit(nqn[9]) && isdigit(nqn[10]) && nqn[11] == '.')) {
144 		SPDK_ERRLOG("Invalid date code in NQN \"%s\"\n", nqn);
145 		return false;
146 	}
147 
148 	reverse_domain_end = strchr(nqn, ':');
149 	if (reverse_domain_end != NULL && (reverse_domain_end_index = reverse_domain_end - nqn) < len - 1) {
150 	} else {
151 		SPDK_ERRLOG("Invalid NQN \"%s\". NQN must contain user specified name with a ':' as a prefix.\n",
152 			    nqn);
153 		return false;
154 	}
155 
156 	/* Check for valid reverse domain */
157 	domain_label_length = 0;
158 	for (i = 12; i < reverse_domain_end_index; i++) {
159 		if (domain_label_length > SPDK_DOMAIN_LABEL_MAX_LEN) {
160 			SPDK_ERRLOG("Invalid domain name in NQN \"%s\". At least one Label is too long.\n", nqn);
161 			return false;
162 		}
163 
164 		switch (domain_state) {
165 
166 		case SPDK_NVMF_DOMAIN_ACCEPT_LETTER: {
167 			if (isalpha(nqn[i])) {
168 				domain_state = SPDK_NVMF_DOMAIN_ACCEPT_ANY;
169 				domain_label_length++;
170 				break;
171 			} else {
172 				SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must start with a letter.\n", nqn);
173 				return false;
174 			}
175 		}
176 
177 		case SPDK_NVMF_DOMAIN_ACCEPT_LDH: {
178 			if (isalpha(nqn[i]) || isdigit(nqn[i])) {
179 				domain_state = SPDK_NVMF_DOMAIN_ACCEPT_ANY;
180 				domain_label_length++;
181 				break;
182 			} else if (nqn[i] == '-') {
183 				if (i == reverse_domain_end_index - 1) {
184 					SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must end with an alphanumeric symbol.\n",
185 						    nqn);
186 					return false;
187 				}
188 				domain_state = SPDK_NVMF_DOMAIN_ACCEPT_LDH;
189 				domain_label_length++;
190 				break;
191 			} else if (nqn[i] == '.') {
192 				SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must end with an alphanumeric symbol.\n",
193 					    nqn);
194 				return false;
195 			} else {
196 				SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must contain only [a-z,A-Z,0-9,'-','.'].\n",
197 					    nqn);
198 				return false;
199 			}
200 		}
201 
202 		case SPDK_NVMF_DOMAIN_ACCEPT_ANY: {
203 			if (isalpha(nqn[i]) || isdigit(nqn[i])) {
204 				domain_state = SPDK_NVMF_DOMAIN_ACCEPT_ANY;
205 				domain_label_length++;
206 				break;
207 			} else if (nqn[i] == '-') {
208 				if (i == reverse_domain_end_index - 1) {
209 					SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must end with an alphanumeric symbol.\n",
210 						    nqn);
211 					return false;
212 				}
213 				domain_state = SPDK_NVMF_DOMAIN_ACCEPT_LDH;
214 				domain_label_length++;
215 				break;
216 			} else if (nqn[i] == '.') {
217 				domain_state = SPDK_NVMF_DOMAIN_ACCEPT_LETTER;
218 				domain_label_length = 0;
219 				break;
220 			} else {
221 				SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must contain only [a-z,A-Z,0-9,'-','.'].\n",
222 					    nqn);
223 				return false;
224 			}
225 		}
226 		}
227 	}
228 
229 	i = reverse_domain_end_index + 1;
230 	while (i < len) {
231 		bytes_consumed = utf8_valid(&nqn[i], &nqn[len]);
232 		if (bytes_consumed <= 0) {
233 			SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must contain only valid utf-8.\n", nqn);
234 			return false;
235 		}
236 
237 		i += bytes_consumed;
238 	}
239 	return true;
240 }
241 
242 static void subsystem_state_change_on_pg(struct spdk_io_channel_iter *i);
243 
244 struct spdk_nvmf_subsystem *
245 spdk_nvmf_subsystem_create(struct spdk_nvmf_tgt *tgt,
246 			   const char *nqn,
247 			   enum spdk_nvmf_subtype type,
248 			   uint32_t num_ns)
249 {
250 	struct spdk_nvmf_subsystem	*subsystem;
251 	uint32_t			sid;
252 
253 	if (spdk_nvmf_tgt_find_subsystem(tgt, nqn)) {
254 		SPDK_ERRLOG("Subsystem NQN '%s' already exists\n", nqn);
255 		return NULL;
256 	}
257 
258 	if (!nvmf_valid_nqn(nqn)) {
259 		return NULL;
260 	}
261 
262 	if (type == SPDK_NVMF_SUBTYPE_DISCOVERY) {
263 		if (num_ns != 0) {
264 			SPDK_ERRLOG("Discovery subsystem cannot have namespaces.\n");
265 			return NULL;
266 		}
267 	} else if (num_ns == 0) {
268 		num_ns = NVMF_SUBSYSTEM_DEFAULT_NAMESPACES;
269 	}
270 
271 	/* Find a free subsystem id (sid) */
272 	for (sid = 0; sid < tgt->max_subsystems; sid++) {
273 		if (tgt->subsystems[sid] == NULL) {
274 			break;
275 		}
276 	}
277 	if (sid >= tgt->max_subsystems) {
278 		return NULL;
279 	}
280 
281 	subsystem = calloc(1, sizeof(struct spdk_nvmf_subsystem));
282 	if (subsystem == NULL) {
283 		return NULL;
284 	}
285 
286 	subsystem->thread = spdk_get_thread();
287 	subsystem->state = SPDK_NVMF_SUBSYSTEM_INACTIVE;
288 	subsystem->tgt = tgt;
289 	subsystem->id = sid;
290 	subsystem->subtype = type;
291 	subsystem->max_nsid = num_ns;
292 	subsystem->next_cntlid = 0;
293 	subsystem->min_cntlid = NVMF_MIN_CNTLID;
294 	subsystem->max_cntlid = NVMF_MAX_CNTLID;
295 	snprintf(subsystem->subnqn, sizeof(subsystem->subnqn), "%s", nqn);
296 	pthread_mutex_init(&subsystem->mutex, NULL);
297 	TAILQ_INIT(&subsystem->listeners);
298 	TAILQ_INIT(&subsystem->hosts);
299 	TAILQ_INIT(&subsystem->ctrlrs);
300 
301 	if (num_ns != 0) {
302 		subsystem->ns = calloc(num_ns, sizeof(struct spdk_nvmf_ns *));
303 		if (subsystem->ns == NULL) {
304 			SPDK_ERRLOG("Namespace memory allocation failed\n");
305 			pthread_mutex_destroy(&subsystem->mutex);
306 			free(subsystem);
307 			return NULL;
308 		}
309 		subsystem->ana_group = calloc(num_ns, sizeof(uint32_t));
310 		if (subsystem->ana_group == NULL) {
311 			SPDK_ERRLOG("ANA group memory allocation failed\n");
312 			pthread_mutex_destroy(&subsystem->mutex);
313 			free(subsystem->ns);
314 			free(subsystem);
315 			return NULL;
316 		}
317 	}
318 
319 	memset(subsystem->sn, '0', sizeof(subsystem->sn) - 1);
320 	subsystem->sn[sizeof(subsystem->sn) - 1] = '\0';
321 
322 	snprintf(subsystem->mn, sizeof(subsystem->mn), "%s",
323 		 MODEL_NUMBER_DEFAULT);
324 
325 	tgt->subsystems[sid] = subsystem;
326 	nvmf_update_discovery_log(tgt, NULL);
327 
328 	return subsystem;
329 }
330 
331 /* Must hold subsystem->mutex while calling this function */
332 static void
333 nvmf_subsystem_remove_host(struct spdk_nvmf_subsystem *subsystem, struct spdk_nvmf_host *host)
334 {
335 	TAILQ_REMOVE(&subsystem->hosts, host, link);
336 	free(host);
337 }
338 
339 static void
340 _nvmf_subsystem_remove_listener(struct spdk_nvmf_subsystem *subsystem,
341 				struct spdk_nvmf_subsystem_listener *listener,
342 				bool stop)
343 {
344 	struct spdk_nvmf_transport *transport;
345 	struct spdk_nvmf_ctrlr *ctrlr;
346 
347 	if (stop) {
348 		transport = spdk_nvmf_tgt_get_transport(subsystem->tgt, listener->trid->trstring);
349 		if (transport != NULL) {
350 			spdk_nvmf_transport_stop_listen(transport, listener->trid);
351 		}
352 	}
353 
354 	TAILQ_FOREACH(ctrlr, &subsystem->ctrlrs, link) {
355 		if (ctrlr->listener == listener) {
356 			ctrlr->listener = NULL;
357 		}
358 	}
359 
360 	TAILQ_REMOVE(&subsystem->listeners, listener, link);
361 	free(listener->ana_state);
362 	free(listener);
363 }
364 
365 static void
366 _nvmf_subsystem_destroy_msg(void *cb_arg)
367 {
368 	struct spdk_nvmf_subsystem *subsystem = cb_arg;
369 
370 	_nvmf_subsystem_destroy(subsystem);
371 }
372 
373 static int
374 _nvmf_subsystem_destroy(struct spdk_nvmf_subsystem *subsystem)
375 {
376 	struct spdk_nvmf_ns		*ns;
377 	nvmf_subsystem_destroy_cb	async_destroy_cb = NULL;
378 	void				*async_destroy_cb_arg = NULL;
379 	int				rc;
380 
381 	if (!TAILQ_EMPTY(&subsystem->ctrlrs)) {
382 		SPDK_DEBUGLOG(nvmf, "subsystem %p %s has active controllers\n", subsystem, subsystem->subnqn);
383 		subsystem->async_destroy = true;
384 		rc = spdk_thread_send_msg(subsystem->thread, _nvmf_subsystem_destroy_msg, subsystem);
385 		if (rc) {
386 			SPDK_ERRLOG("Failed to send thread msg, rc %d\n", rc);
387 			assert(0);
388 			return rc;
389 		}
390 		return -EINPROGRESS;
391 	}
392 
393 	ns = spdk_nvmf_subsystem_get_first_ns(subsystem);
394 	while (ns != NULL) {
395 		struct spdk_nvmf_ns *next_ns = spdk_nvmf_subsystem_get_next_ns(subsystem, ns);
396 
397 		spdk_nvmf_subsystem_remove_ns(subsystem, ns->opts.nsid);
398 		ns = next_ns;
399 	}
400 
401 	free(subsystem->ns);
402 	free(subsystem->ana_group);
403 
404 	subsystem->tgt->subsystems[subsystem->id] = NULL;
405 	nvmf_update_discovery_log(subsystem->tgt, NULL);
406 
407 	pthread_mutex_destroy(&subsystem->mutex);
408 
409 	if (subsystem->async_destroy) {
410 		async_destroy_cb = subsystem->async_destroy_cb;
411 		async_destroy_cb_arg = subsystem->async_destroy_cb_arg;
412 	}
413 
414 	free(subsystem);
415 
416 	if (async_destroy_cb) {
417 		async_destroy_cb(async_destroy_cb_arg);
418 	}
419 
420 	return 0;
421 }
422 
423 int
424 spdk_nvmf_subsystem_destroy(struct spdk_nvmf_subsystem *subsystem, nvmf_subsystem_destroy_cb cpl_cb,
425 			    void *cpl_cb_arg)
426 {
427 	struct spdk_nvmf_host *host, *host_tmp;
428 
429 	if (!subsystem) {
430 		return -EINVAL;
431 	}
432 
433 	assert(spdk_get_thread() == subsystem->thread);
434 
435 	if (subsystem->state != SPDK_NVMF_SUBSYSTEM_INACTIVE) {
436 		SPDK_ERRLOG("Subsystem can only be destroyed in inactive state\n");
437 		assert(0);
438 		return -EAGAIN;
439 	}
440 	if (subsystem->destroying) {
441 		SPDK_ERRLOG("Subsystem destruction is already started\n");
442 		assert(0);
443 		return -EALREADY;
444 	}
445 
446 	subsystem->destroying = true;
447 
448 	SPDK_DEBUGLOG(nvmf, "subsystem is %p %s\n", subsystem, subsystem->subnqn);
449 
450 	nvmf_subsystem_remove_all_listeners(subsystem, false);
451 
452 	pthread_mutex_lock(&subsystem->mutex);
453 
454 	TAILQ_FOREACH_SAFE(host, &subsystem->hosts, link, host_tmp) {
455 		nvmf_subsystem_remove_host(subsystem, host);
456 	}
457 
458 	pthread_mutex_unlock(&subsystem->mutex);
459 
460 	subsystem->async_destroy_cb = cpl_cb;
461 	subsystem->async_destroy_cb_arg = cpl_cb_arg;
462 
463 	return _nvmf_subsystem_destroy(subsystem);
464 }
465 
466 /* we have to use the typedef in the function declaration to appease astyle. */
467 typedef enum spdk_nvmf_subsystem_state spdk_nvmf_subsystem_state_t;
468 
469 static spdk_nvmf_subsystem_state_t
470 nvmf_subsystem_get_intermediate_state(enum spdk_nvmf_subsystem_state current_state,
471 				      enum spdk_nvmf_subsystem_state requested_state)
472 {
473 	switch (requested_state) {
474 	case SPDK_NVMF_SUBSYSTEM_INACTIVE:
475 		return SPDK_NVMF_SUBSYSTEM_DEACTIVATING;
476 	case SPDK_NVMF_SUBSYSTEM_ACTIVE:
477 		if (current_state == SPDK_NVMF_SUBSYSTEM_PAUSED) {
478 			return SPDK_NVMF_SUBSYSTEM_RESUMING;
479 		} else {
480 			return SPDK_NVMF_SUBSYSTEM_ACTIVATING;
481 		}
482 	case SPDK_NVMF_SUBSYSTEM_PAUSED:
483 		return SPDK_NVMF_SUBSYSTEM_PAUSING;
484 	default:
485 		assert(false);
486 		return SPDK_NVMF_SUBSYSTEM_NUM_STATES;
487 	}
488 }
489 
490 static int
491 nvmf_subsystem_set_state(struct spdk_nvmf_subsystem *subsystem,
492 			 enum spdk_nvmf_subsystem_state state)
493 {
494 	enum spdk_nvmf_subsystem_state actual_old_state, expected_old_state;
495 	bool exchanged;
496 
497 	switch (state) {
498 	case SPDK_NVMF_SUBSYSTEM_INACTIVE:
499 		expected_old_state = SPDK_NVMF_SUBSYSTEM_DEACTIVATING;
500 		break;
501 	case SPDK_NVMF_SUBSYSTEM_ACTIVATING:
502 		expected_old_state = SPDK_NVMF_SUBSYSTEM_INACTIVE;
503 		break;
504 	case SPDK_NVMF_SUBSYSTEM_ACTIVE:
505 		expected_old_state = SPDK_NVMF_SUBSYSTEM_ACTIVATING;
506 		break;
507 	case SPDK_NVMF_SUBSYSTEM_PAUSING:
508 		expected_old_state = SPDK_NVMF_SUBSYSTEM_ACTIVE;
509 		break;
510 	case SPDK_NVMF_SUBSYSTEM_PAUSED:
511 		expected_old_state = SPDK_NVMF_SUBSYSTEM_PAUSING;
512 		break;
513 	case SPDK_NVMF_SUBSYSTEM_RESUMING:
514 		expected_old_state = SPDK_NVMF_SUBSYSTEM_PAUSED;
515 		break;
516 	case SPDK_NVMF_SUBSYSTEM_DEACTIVATING:
517 		expected_old_state = SPDK_NVMF_SUBSYSTEM_ACTIVE;
518 		break;
519 	default:
520 		assert(false);
521 		return -1;
522 	}
523 
524 	actual_old_state = expected_old_state;
525 	exchanged = __atomic_compare_exchange_n(&subsystem->state, &actual_old_state, state, false,
526 						__ATOMIC_RELAXED, __ATOMIC_RELAXED);
527 	if (spdk_unlikely(exchanged == false)) {
528 		if (actual_old_state == SPDK_NVMF_SUBSYSTEM_RESUMING &&
529 		    state == SPDK_NVMF_SUBSYSTEM_ACTIVE) {
530 			expected_old_state = SPDK_NVMF_SUBSYSTEM_RESUMING;
531 		}
532 		/* This is for the case when activating the subsystem fails. */
533 		if (actual_old_state == SPDK_NVMF_SUBSYSTEM_ACTIVATING &&
534 		    state == SPDK_NVMF_SUBSYSTEM_DEACTIVATING) {
535 			expected_old_state = SPDK_NVMF_SUBSYSTEM_ACTIVATING;
536 		}
537 		/* This is for the case when resuming the subsystem fails. */
538 		if (actual_old_state == SPDK_NVMF_SUBSYSTEM_RESUMING &&
539 		    state == SPDK_NVMF_SUBSYSTEM_PAUSING) {
540 			expected_old_state = SPDK_NVMF_SUBSYSTEM_RESUMING;
541 		}
542 		actual_old_state = expected_old_state;
543 		__atomic_compare_exchange_n(&subsystem->state, &actual_old_state, state, false,
544 					    __ATOMIC_RELAXED, __ATOMIC_RELAXED);
545 	}
546 	assert(actual_old_state == expected_old_state);
547 	return actual_old_state - expected_old_state;
548 }
549 
550 struct subsystem_state_change_ctx {
551 	struct spdk_nvmf_subsystem		*subsystem;
552 	uint16_t				nsid;
553 
554 	enum spdk_nvmf_subsystem_state		original_state;
555 	enum spdk_nvmf_subsystem_state		requested_state;
556 
557 	spdk_nvmf_subsystem_state_change_done	cb_fn;
558 	void					*cb_arg;
559 };
560 
561 static void
562 subsystem_state_change_revert_done(struct spdk_io_channel_iter *i, int status)
563 {
564 	struct subsystem_state_change_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
565 
566 	/* Nothing to be done here if the state setting fails, we are just screwed. */
567 	if (nvmf_subsystem_set_state(ctx->subsystem, ctx->requested_state)) {
568 		SPDK_ERRLOG("Unable to revert the subsystem state after operation failure.\n");
569 	}
570 
571 	ctx->subsystem->changing_state = false;
572 	if (ctx->cb_fn) {
573 		/* return a failure here. This function only exists in an error path. */
574 		ctx->cb_fn(ctx->subsystem, ctx->cb_arg, -1);
575 	}
576 	free(ctx);
577 }
578 
579 static void
580 subsystem_state_change_done(struct spdk_io_channel_iter *i, int status)
581 {
582 	struct subsystem_state_change_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
583 	enum spdk_nvmf_subsystem_state intermediate_state;
584 
585 	SPDK_DTRACE_PROBE4(nvmf_subsystem_change_state_done, ctx->subsystem->subnqn,
586 			   ctx->requested_state, ctx->original_state, status);
587 
588 	if (status == 0) {
589 		status = nvmf_subsystem_set_state(ctx->subsystem, ctx->requested_state);
590 		if (status) {
591 			status = -1;
592 		}
593 	}
594 
595 	if (status) {
596 		intermediate_state = nvmf_subsystem_get_intermediate_state(ctx->requested_state,
597 				     ctx->original_state);
598 		assert(intermediate_state != SPDK_NVMF_SUBSYSTEM_NUM_STATES);
599 
600 		if (nvmf_subsystem_set_state(ctx->subsystem, intermediate_state)) {
601 			goto out;
602 		}
603 		ctx->requested_state = ctx->original_state;
604 		spdk_for_each_channel(ctx->subsystem->tgt,
605 				      subsystem_state_change_on_pg,
606 				      ctx,
607 				      subsystem_state_change_revert_done);
608 		return;
609 	}
610 
611 out:
612 	ctx->subsystem->changing_state = false;
613 	if (ctx->cb_fn) {
614 		ctx->cb_fn(ctx->subsystem, ctx->cb_arg, status);
615 	}
616 	free(ctx);
617 }
618 
619 static void
620 subsystem_state_change_continue(void *ctx, int status)
621 {
622 	struct spdk_io_channel_iter *i = ctx;
623 	struct subsystem_state_change_ctx *_ctx __attribute__((unused));
624 
625 	_ctx = spdk_io_channel_iter_get_ctx(i);
626 	SPDK_DTRACE_PROBE3(nvmf_pg_change_state_done, _ctx->subsystem->subnqn,
627 			   _ctx->requested_state, spdk_thread_get_id(spdk_get_thread()));
628 
629 	spdk_for_each_channel_continue(i, status);
630 }
631 
632 static void
633 subsystem_state_change_on_pg(struct spdk_io_channel_iter *i)
634 {
635 	struct subsystem_state_change_ctx *ctx;
636 	struct spdk_io_channel *ch;
637 	struct spdk_nvmf_poll_group *group;
638 
639 	ctx = spdk_io_channel_iter_get_ctx(i);
640 	ch = spdk_io_channel_iter_get_channel(i);
641 	group = spdk_io_channel_get_ctx(ch);
642 
643 	SPDK_DTRACE_PROBE3(nvmf_pg_change_state, ctx->subsystem->subnqn,
644 			   ctx->requested_state, spdk_thread_get_id(spdk_get_thread()));
645 	switch (ctx->requested_state) {
646 	case SPDK_NVMF_SUBSYSTEM_INACTIVE:
647 		nvmf_poll_group_remove_subsystem(group, ctx->subsystem, subsystem_state_change_continue, i);
648 		break;
649 	case SPDK_NVMF_SUBSYSTEM_ACTIVE:
650 		if (ctx->subsystem->state == SPDK_NVMF_SUBSYSTEM_ACTIVATING) {
651 			nvmf_poll_group_add_subsystem(group, ctx->subsystem, subsystem_state_change_continue, i);
652 		} else if (ctx->subsystem->state == SPDK_NVMF_SUBSYSTEM_RESUMING) {
653 			nvmf_poll_group_resume_subsystem(group, ctx->subsystem, subsystem_state_change_continue, i);
654 		}
655 		break;
656 	case SPDK_NVMF_SUBSYSTEM_PAUSED:
657 		nvmf_poll_group_pause_subsystem(group, ctx->subsystem, ctx->nsid, subsystem_state_change_continue,
658 						i);
659 		break;
660 	default:
661 		assert(false);
662 		break;
663 	}
664 }
665 
666 static int
667 nvmf_subsystem_state_change(struct spdk_nvmf_subsystem *subsystem,
668 			    uint32_t nsid,
669 			    enum spdk_nvmf_subsystem_state requested_state,
670 			    spdk_nvmf_subsystem_state_change_done cb_fn,
671 			    void *cb_arg)
672 {
673 	struct subsystem_state_change_ctx *ctx;
674 	enum spdk_nvmf_subsystem_state intermediate_state;
675 	int rc;
676 
677 	if (__sync_val_compare_and_swap(&subsystem->changing_state, false, true)) {
678 		return -EBUSY;
679 	}
680 
681 	SPDK_DTRACE_PROBE3(nvmf_subsystem_change_state, subsystem->subnqn,
682 			   requested_state, subsystem->state);
683 	/* If we are already in the requested state, just call the callback immediately. */
684 	if (subsystem->state == requested_state) {
685 		subsystem->changing_state = false;
686 		if (cb_fn) {
687 			cb_fn(subsystem, cb_arg, 0);
688 		}
689 		return 0;
690 	}
691 
692 	intermediate_state = nvmf_subsystem_get_intermediate_state(subsystem->state, requested_state);
693 	assert(intermediate_state != SPDK_NVMF_SUBSYSTEM_NUM_STATES);
694 
695 	ctx = calloc(1, sizeof(*ctx));
696 	if (!ctx) {
697 		subsystem->changing_state = false;
698 		return -ENOMEM;
699 	}
700 
701 	ctx->original_state = subsystem->state;
702 	rc = nvmf_subsystem_set_state(subsystem, intermediate_state);
703 	if (rc) {
704 		free(ctx);
705 		subsystem->changing_state = false;
706 		return rc;
707 	}
708 
709 	ctx->subsystem = subsystem;
710 	ctx->nsid = nsid;
711 	ctx->requested_state = requested_state;
712 	ctx->cb_fn = cb_fn;
713 	ctx->cb_arg = cb_arg;
714 
715 	spdk_for_each_channel(subsystem->tgt,
716 			      subsystem_state_change_on_pg,
717 			      ctx,
718 			      subsystem_state_change_done);
719 
720 	return 0;
721 }
722 
723 int
724 spdk_nvmf_subsystem_start(struct spdk_nvmf_subsystem *subsystem,
725 			  spdk_nvmf_subsystem_state_change_done cb_fn,
726 			  void *cb_arg)
727 {
728 	return nvmf_subsystem_state_change(subsystem, 0, SPDK_NVMF_SUBSYSTEM_ACTIVE, cb_fn, cb_arg);
729 }
730 
731 int
732 spdk_nvmf_subsystem_stop(struct spdk_nvmf_subsystem *subsystem,
733 			 spdk_nvmf_subsystem_state_change_done cb_fn,
734 			 void *cb_arg)
735 {
736 	return nvmf_subsystem_state_change(subsystem, 0, SPDK_NVMF_SUBSYSTEM_INACTIVE, cb_fn, cb_arg);
737 }
738 
739 int
740 spdk_nvmf_subsystem_pause(struct spdk_nvmf_subsystem *subsystem,
741 			  uint32_t nsid,
742 			  spdk_nvmf_subsystem_state_change_done cb_fn,
743 			  void *cb_arg)
744 {
745 	return nvmf_subsystem_state_change(subsystem, nsid, SPDK_NVMF_SUBSYSTEM_PAUSED, cb_fn, cb_arg);
746 }
747 
748 int
749 spdk_nvmf_subsystem_resume(struct spdk_nvmf_subsystem *subsystem,
750 			   spdk_nvmf_subsystem_state_change_done cb_fn,
751 			   void *cb_arg)
752 {
753 	return nvmf_subsystem_state_change(subsystem, 0, SPDK_NVMF_SUBSYSTEM_ACTIVE, cb_fn, cb_arg);
754 }
755 
756 struct spdk_nvmf_subsystem *
757 spdk_nvmf_subsystem_get_first(struct spdk_nvmf_tgt *tgt)
758 {
759 	struct spdk_nvmf_subsystem	*subsystem;
760 	uint32_t sid;
761 
762 	for (sid = 0; sid < tgt->max_subsystems; sid++) {
763 		subsystem = tgt->subsystems[sid];
764 		if (subsystem) {
765 			return subsystem;
766 		}
767 	}
768 
769 	return NULL;
770 }
771 
772 struct spdk_nvmf_subsystem *
773 spdk_nvmf_subsystem_get_next(struct spdk_nvmf_subsystem *subsystem)
774 {
775 	uint32_t sid;
776 	struct spdk_nvmf_tgt *tgt;
777 
778 	if (!subsystem) {
779 		return NULL;
780 	}
781 
782 	tgt = subsystem->tgt;
783 
784 	for (sid = subsystem->id + 1; sid < tgt->max_subsystems; sid++) {
785 		subsystem = tgt->subsystems[sid];
786 		if (subsystem) {
787 			return subsystem;
788 		}
789 	}
790 
791 	return NULL;
792 }
793 
794 /* Must hold subsystem->mutex while calling this function */
795 static struct spdk_nvmf_host *
796 nvmf_subsystem_find_host(struct spdk_nvmf_subsystem *subsystem, const char *hostnqn)
797 {
798 	struct spdk_nvmf_host *host = NULL;
799 
800 	TAILQ_FOREACH(host, &subsystem->hosts, link) {
801 		if (strcmp(hostnqn, host->nqn) == 0) {
802 			return host;
803 		}
804 	}
805 
806 	return NULL;
807 }
808 
809 int
810 spdk_nvmf_subsystem_add_host(struct spdk_nvmf_subsystem *subsystem, const char *hostnqn)
811 {
812 	struct spdk_nvmf_host *host;
813 
814 	if (!nvmf_valid_nqn(hostnqn)) {
815 		return -EINVAL;
816 	}
817 
818 	pthread_mutex_lock(&subsystem->mutex);
819 
820 	if (nvmf_subsystem_find_host(subsystem, hostnqn)) {
821 		/* This subsystem already allows the specified host. */
822 		pthread_mutex_unlock(&subsystem->mutex);
823 		return 0;
824 	}
825 
826 	host = calloc(1, sizeof(*host));
827 	if (!host) {
828 		pthread_mutex_unlock(&subsystem->mutex);
829 		return -ENOMEM;
830 	}
831 
832 	snprintf(host->nqn, sizeof(host->nqn), "%s", hostnqn);
833 
834 	TAILQ_INSERT_HEAD(&subsystem->hosts, host, link);
835 
836 	nvmf_update_discovery_log(subsystem->tgt, hostnqn);
837 
838 	pthread_mutex_unlock(&subsystem->mutex);
839 
840 	return 0;
841 }
842 
843 int
844 spdk_nvmf_subsystem_remove_host(struct spdk_nvmf_subsystem *subsystem, const char *hostnqn)
845 {
846 	struct spdk_nvmf_host *host;
847 
848 	pthread_mutex_lock(&subsystem->mutex);
849 
850 	host = nvmf_subsystem_find_host(subsystem, hostnqn);
851 	if (host == NULL) {
852 		pthread_mutex_unlock(&subsystem->mutex);
853 		return -ENOENT;
854 	}
855 
856 	nvmf_subsystem_remove_host(subsystem, host);
857 	pthread_mutex_unlock(&subsystem->mutex);
858 
859 	return 0;
860 }
861 
862 struct nvmf_subsystem_disconnect_host_ctx {
863 	struct spdk_nvmf_subsystem		*subsystem;
864 	char					*hostnqn;
865 	spdk_nvmf_tgt_subsystem_listen_done_fn	cb_fn;
866 	void					*cb_arg;
867 };
868 
869 static void
870 nvmf_subsystem_disconnect_host_fini(struct spdk_io_channel_iter *i, int status)
871 {
872 	struct nvmf_subsystem_disconnect_host_ctx *ctx;
873 
874 	ctx = spdk_io_channel_iter_get_ctx(i);
875 
876 	if (ctx->cb_fn) {
877 		ctx->cb_fn(ctx->cb_arg, status);
878 	}
879 	free(ctx->hostnqn);
880 	free(ctx);
881 }
882 
883 static void
884 nvmf_subsystem_disconnect_qpairs_by_host(struct spdk_io_channel_iter *i)
885 {
886 	struct nvmf_subsystem_disconnect_host_ctx *ctx;
887 	struct spdk_nvmf_poll_group *group;
888 	struct spdk_io_channel *ch;
889 	struct spdk_nvmf_qpair *qpair, *tmp_qpair;
890 	struct spdk_nvmf_ctrlr *ctrlr;
891 
892 	ctx = spdk_io_channel_iter_get_ctx(i);
893 	ch = spdk_io_channel_iter_get_channel(i);
894 	group = spdk_io_channel_get_ctx(ch);
895 
896 	TAILQ_FOREACH_SAFE(qpair, &group->qpairs, link, tmp_qpair) {
897 		ctrlr = qpair->ctrlr;
898 
899 		if (ctrlr == NULL || ctrlr->subsys != ctx->subsystem) {
900 			continue;
901 		}
902 
903 		if (strncmp(ctrlr->hostnqn, ctx->hostnqn, sizeof(ctrlr->hostnqn)) == 0) {
904 			/* Right now this does not wait for the queue pairs to actually disconnect. */
905 			spdk_nvmf_qpair_disconnect(qpair, NULL, NULL);
906 		}
907 	}
908 	spdk_for_each_channel_continue(i, 0);
909 }
910 
911 int
912 spdk_nvmf_subsystem_disconnect_host(struct spdk_nvmf_subsystem *subsystem,
913 				    const char *hostnqn,
914 				    spdk_nvmf_tgt_subsystem_listen_done_fn cb_fn,
915 				    void *cb_arg)
916 {
917 	struct nvmf_subsystem_disconnect_host_ctx *ctx;
918 
919 	ctx = calloc(1, sizeof(struct nvmf_subsystem_disconnect_host_ctx));
920 	if (ctx == NULL) {
921 		return -ENOMEM;
922 	}
923 
924 	ctx->hostnqn = strdup(hostnqn);
925 	if (ctx->hostnqn == NULL) {
926 		free(ctx);
927 		return -ENOMEM;
928 	}
929 
930 	ctx->subsystem = subsystem;
931 	ctx->cb_fn = cb_fn;
932 	ctx->cb_arg = cb_arg;
933 
934 	spdk_for_each_channel(subsystem->tgt, nvmf_subsystem_disconnect_qpairs_by_host, ctx,
935 			      nvmf_subsystem_disconnect_host_fini);
936 
937 	return 0;
938 }
939 
940 int
941 spdk_nvmf_subsystem_set_allow_any_host(struct spdk_nvmf_subsystem *subsystem, bool allow_any_host)
942 {
943 	pthread_mutex_lock(&subsystem->mutex);
944 	subsystem->flags.allow_any_host = allow_any_host;
945 	nvmf_update_discovery_log(subsystem->tgt, NULL);
946 	pthread_mutex_unlock(&subsystem->mutex);
947 
948 	return 0;
949 }
950 
951 bool
952 spdk_nvmf_subsystem_get_allow_any_host(const struct spdk_nvmf_subsystem *subsystem)
953 {
954 	bool allow_any_host;
955 	struct spdk_nvmf_subsystem *sub;
956 
957 	/* Technically, taking the mutex modifies data in the subsystem. But the const
958 	 * is still important to convey that this doesn't mutate any other data. Cast
959 	 * it away to work around this. */
960 	sub = (struct spdk_nvmf_subsystem *)subsystem;
961 
962 	pthread_mutex_lock(&sub->mutex);
963 	allow_any_host = sub->flags.allow_any_host;
964 	pthread_mutex_unlock(&sub->mutex);
965 
966 	return allow_any_host;
967 }
968 
969 bool
970 spdk_nvmf_subsystem_host_allowed(struct spdk_nvmf_subsystem *subsystem, const char *hostnqn)
971 {
972 	bool allowed;
973 
974 	if (!hostnqn) {
975 		return false;
976 	}
977 
978 	pthread_mutex_lock(&subsystem->mutex);
979 
980 	if (subsystem->flags.allow_any_host) {
981 		pthread_mutex_unlock(&subsystem->mutex);
982 		return true;
983 	}
984 
985 	allowed =  nvmf_subsystem_find_host(subsystem, hostnqn) != NULL;
986 	pthread_mutex_unlock(&subsystem->mutex);
987 
988 	return allowed;
989 }
990 
991 struct spdk_nvmf_host *
992 spdk_nvmf_subsystem_get_first_host(struct spdk_nvmf_subsystem *subsystem)
993 {
994 	return TAILQ_FIRST(&subsystem->hosts);
995 }
996 
997 
998 struct spdk_nvmf_host *
999 spdk_nvmf_subsystem_get_next_host(struct spdk_nvmf_subsystem *subsystem,
1000 				  struct spdk_nvmf_host *prev_host)
1001 {
1002 	return TAILQ_NEXT(prev_host, link);
1003 }
1004 
1005 const char *
1006 spdk_nvmf_host_get_nqn(const struct spdk_nvmf_host *host)
1007 {
1008 	return host->nqn;
1009 }
1010 
1011 struct spdk_nvmf_subsystem_listener *
1012 nvmf_subsystem_find_listener(struct spdk_nvmf_subsystem *subsystem,
1013 			     const struct spdk_nvme_transport_id *trid)
1014 {
1015 	struct spdk_nvmf_subsystem_listener *listener;
1016 
1017 	TAILQ_FOREACH(listener, &subsystem->listeners, link) {
1018 		if (spdk_nvme_transport_id_compare(listener->trid, trid) == 0) {
1019 			return listener;
1020 		}
1021 	}
1022 
1023 	return NULL;
1024 }
1025 
1026 /**
1027  * Function to be called once the target is listening.
1028  *
1029  * \param ctx Context argument passed to this function.
1030  * \param status 0 if it completed successfully, or negative errno if it failed.
1031  */
1032 static void
1033 _nvmf_subsystem_add_listener_done(void *ctx, int status)
1034 {
1035 	struct spdk_nvmf_subsystem_listener *listener = ctx;
1036 
1037 	if (status) {
1038 		listener->cb_fn(listener->cb_arg, status);
1039 		free(listener);
1040 		return;
1041 	}
1042 
1043 	TAILQ_INSERT_HEAD(&listener->subsystem->listeners, listener, link);
1044 	nvmf_update_discovery_log(listener->subsystem->tgt, NULL);
1045 	listener->cb_fn(listener->cb_arg, status);
1046 }
1047 
1048 void
1049 spdk_nvmf_subsystem_add_listener(struct spdk_nvmf_subsystem *subsystem,
1050 				 struct spdk_nvme_transport_id *trid,
1051 				 spdk_nvmf_tgt_subsystem_listen_done_fn cb_fn,
1052 				 void *cb_arg)
1053 {
1054 	struct spdk_nvmf_transport *transport;
1055 	struct spdk_nvmf_subsystem_listener *listener;
1056 	struct spdk_nvmf_listener *tr_listener;
1057 	uint32_t i;
1058 	int rc = 0;
1059 
1060 	assert(cb_fn != NULL);
1061 
1062 	if (!(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE ||
1063 	      subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED)) {
1064 		cb_fn(cb_arg, -EAGAIN);
1065 		return;
1066 	}
1067 
1068 	if (nvmf_subsystem_find_listener(subsystem, trid)) {
1069 		/* Listener already exists in this subsystem */
1070 		cb_fn(cb_arg, 0);
1071 		return;
1072 	}
1073 
1074 	transport = spdk_nvmf_tgt_get_transport(subsystem->tgt, trid->trstring);
1075 	if (!transport) {
1076 		SPDK_ERRLOG("Unable to find %s transport. The transport must be created first also make sure it is properly registered.\n",
1077 			    trid->trstring);
1078 		cb_fn(cb_arg, -EINVAL);
1079 		return;
1080 	}
1081 
1082 	tr_listener = nvmf_transport_find_listener(transport, trid);
1083 	if (!tr_listener) {
1084 		SPDK_ERRLOG("Cannot find transport listener for %s\n", trid->traddr);
1085 		cb_fn(cb_arg, -EINVAL);
1086 		return;
1087 	}
1088 
1089 	listener = calloc(1, sizeof(*listener));
1090 	if (!listener) {
1091 		cb_fn(cb_arg, -ENOMEM);
1092 		return;
1093 	}
1094 
1095 	listener->trid = &tr_listener->trid;
1096 	listener->transport = transport;
1097 	listener->cb_fn = cb_fn;
1098 	listener->cb_arg = cb_arg;
1099 	listener->subsystem = subsystem;
1100 	listener->ana_state = calloc(subsystem->max_nsid, sizeof(enum spdk_nvme_ana_state));
1101 	if (!listener->ana_state) {
1102 		free(listener);
1103 		cb_fn(cb_arg, -ENOMEM);
1104 		return;
1105 	}
1106 
1107 	for (i = 0; i < subsystem->max_nsid; i++) {
1108 		listener->ana_state[i] = SPDK_NVME_ANA_OPTIMIZED_STATE;
1109 	}
1110 
1111 	if (transport->ops->listen_associate != NULL) {
1112 		rc = transport->ops->listen_associate(transport, subsystem, trid);
1113 	}
1114 
1115 	_nvmf_subsystem_add_listener_done(listener, rc);
1116 }
1117 
1118 int
1119 spdk_nvmf_subsystem_remove_listener(struct spdk_nvmf_subsystem *subsystem,
1120 				    const struct spdk_nvme_transport_id *trid)
1121 {
1122 	struct spdk_nvmf_subsystem_listener *listener;
1123 
1124 	if (!(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE ||
1125 	      subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED)) {
1126 		return -EAGAIN;
1127 	}
1128 
1129 	listener = nvmf_subsystem_find_listener(subsystem, trid);
1130 	if (listener == NULL) {
1131 		return -ENOENT;
1132 	}
1133 
1134 	_nvmf_subsystem_remove_listener(subsystem, listener, false);
1135 
1136 	return 0;
1137 }
1138 
1139 void
1140 nvmf_subsystem_remove_all_listeners(struct spdk_nvmf_subsystem *subsystem,
1141 				    bool stop)
1142 {
1143 	struct spdk_nvmf_subsystem_listener *listener, *listener_tmp;
1144 
1145 	TAILQ_FOREACH_SAFE(listener, &subsystem->listeners, link, listener_tmp) {
1146 		_nvmf_subsystem_remove_listener(subsystem, listener, stop);
1147 	}
1148 }
1149 
1150 bool
1151 spdk_nvmf_subsystem_listener_allowed(struct spdk_nvmf_subsystem *subsystem,
1152 				     const struct spdk_nvme_transport_id *trid)
1153 {
1154 	struct spdk_nvmf_subsystem_listener *listener;
1155 
1156 	if (!strcmp(subsystem->subnqn, SPDK_NVMF_DISCOVERY_NQN)) {
1157 		return true;
1158 	}
1159 
1160 	TAILQ_FOREACH(listener, &subsystem->listeners, link) {
1161 		if (spdk_nvme_transport_id_compare(listener->trid, trid) == 0) {
1162 			return true;
1163 		}
1164 	}
1165 
1166 	return false;
1167 }
1168 
1169 struct spdk_nvmf_subsystem_listener *
1170 spdk_nvmf_subsystem_get_first_listener(struct spdk_nvmf_subsystem *subsystem)
1171 {
1172 	return TAILQ_FIRST(&subsystem->listeners);
1173 }
1174 
1175 struct spdk_nvmf_subsystem_listener *
1176 spdk_nvmf_subsystem_get_next_listener(struct spdk_nvmf_subsystem *subsystem,
1177 				      struct spdk_nvmf_subsystem_listener *prev_listener)
1178 {
1179 	return TAILQ_NEXT(prev_listener, link);
1180 }
1181 
1182 const struct spdk_nvme_transport_id *
1183 spdk_nvmf_subsystem_listener_get_trid(struct spdk_nvmf_subsystem_listener *listener)
1184 {
1185 	return listener->trid;
1186 }
1187 
1188 void
1189 spdk_nvmf_subsystem_allow_any_listener(struct spdk_nvmf_subsystem *subsystem,
1190 				       bool allow_any_listener)
1191 {
1192 	subsystem->flags.allow_any_listener = allow_any_listener;
1193 }
1194 
1195 bool
1196 spdk_nvmf_subsytem_any_listener_allowed(struct spdk_nvmf_subsystem *subsystem)
1197 {
1198 	return subsystem->flags.allow_any_listener;
1199 }
1200 
1201 
1202 struct subsystem_update_ns_ctx {
1203 	struct spdk_nvmf_subsystem *subsystem;
1204 
1205 	spdk_nvmf_subsystem_state_change_done cb_fn;
1206 	void *cb_arg;
1207 };
1208 
1209 static void
1210 subsystem_update_ns_done(struct spdk_io_channel_iter *i, int status)
1211 {
1212 	struct subsystem_update_ns_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
1213 
1214 	if (ctx->cb_fn) {
1215 		ctx->cb_fn(ctx->subsystem, ctx->cb_arg, status);
1216 	}
1217 	free(ctx);
1218 }
1219 
1220 static void
1221 subsystem_update_ns_on_pg(struct spdk_io_channel_iter *i)
1222 {
1223 	int rc;
1224 	struct subsystem_update_ns_ctx *ctx;
1225 	struct spdk_nvmf_poll_group *group;
1226 	struct spdk_nvmf_subsystem *subsystem;
1227 
1228 	ctx = spdk_io_channel_iter_get_ctx(i);
1229 	group = spdk_io_channel_get_ctx(spdk_io_channel_iter_get_channel(i));
1230 	subsystem = ctx->subsystem;
1231 
1232 	rc = nvmf_poll_group_update_subsystem(group, subsystem);
1233 	spdk_for_each_channel_continue(i, rc);
1234 }
1235 
1236 static int
1237 nvmf_subsystem_update_ns(struct spdk_nvmf_subsystem *subsystem, spdk_channel_for_each_cpl cpl,
1238 			 void *ctx)
1239 {
1240 	spdk_for_each_channel(subsystem->tgt,
1241 			      subsystem_update_ns_on_pg,
1242 			      ctx,
1243 			      cpl);
1244 
1245 	return 0;
1246 }
1247 
1248 static void
1249 nvmf_subsystem_ns_changed(struct spdk_nvmf_subsystem *subsystem, uint32_t nsid)
1250 {
1251 	struct spdk_nvmf_ctrlr *ctrlr;
1252 
1253 	TAILQ_FOREACH(ctrlr, &subsystem->ctrlrs, link) {
1254 		nvmf_ctrlr_ns_changed(ctrlr, nsid);
1255 	}
1256 }
1257 
1258 static uint32_t
1259 nvmf_ns_reservation_clear_all_registrants(struct spdk_nvmf_ns *ns);
1260 
1261 int
1262 spdk_nvmf_subsystem_remove_ns(struct spdk_nvmf_subsystem *subsystem, uint32_t nsid)
1263 {
1264 	struct spdk_nvmf_transport *transport;
1265 	struct spdk_nvmf_ns *ns;
1266 
1267 	if (!(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE ||
1268 	      subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED)) {
1269 		assert(false);
1270 		return -1;
1271 	}
1272 
1273 	if (nsid == 0 || nsid > subsystem->max_nsid) {
1274 		return -1;
1275 	}
1276 
1277 	ns = subsystem->ns[nsid - 1];
1278 	if (!ns) {
1279 		return -1;
1280 	}
1281 
1282 	subsystem->ns[nsid - 1] = NULL;
1283 
1284 	assert(ns->anagrpid - 1 < subsystem->max_nsid);
1285 	assert(subsystem->ana_group[ns->anagrpid - 1] > 0);
1286 
1287 	subsystem->ana_group[ns->anagrpid - 1]--;
1288 
1289 	free(ns->ptpl_file);
1290 	nvmf_ns_reservation_clear_all_registrants(ns);
1291 	spdk_bdev_module_release_bdev(ns->bdev);
1292 	spdk_bdev_close(ns->desc);
1293 	free(ns);
1294 
1295 	for (transport = spdk_nvmf_transport_get_first(subsystem->tgt); transport;
1296 	     transport = spdk_nvmf_transport_get_next(transport)) {
1297 		if (transport->ops->subsystem_remove_ns) {
1298 			transport->ops->subsystem_remove_ns(transport, subsystem, nsid);
1299 		}
1300 	}
1301 
1302 	nvmf_subsystem_ns_changed(subsystem, nsid);
1303 
1304 	return 0;
1305 }
1306 
1307 struct subsystem_ns_change_ctx {
1308 	struct spdk_nvmf_subsystem		*subsystem;
1309 	spdk_nvmf_subsystem_state_change_done	cb_fn;
1310 	uint32_t				nsid;
1311 };
1312 
1313 static void
1314 _nvmf_ns_hot_remove(struct spdk_nvmf_subsystem *subsystem,
1315 		    void *cb_arg, int status)
1316 {
1317 	struct subsystem_ns_change_ctx *ctx = cb_arg;
1318 	int rc;
1319 
1320 	rc = spdk_nvmf_subsystem_remove_ns(subsystem, ctx->nsid);
1321 	if (rc != 0) {
1322 		SPDK_ERRLOG("Failed to make changes to NVME-oF subsystem with id: %u\n", subsystem->id);
1323 	}
1324 
1325 	spdk_nvmf_subsystem_resume(subsystem, NULL, NULL);
1326 
1327 	free(ctx);
1328 }
1329 
1330 static void
1331 nvmf_ns_change_msg(void *ns_ctx)
1332 {
1333 	struct subsystem_ns_change_ctx *ctx = ns_ctx;
1334 	int rc;
1335 
1336 	rc = spdk_nvmf_subsystem_pause(ctx->subsystem, ctx->nsid, ctx->cb_fn, ctx);
1337 	if (rc) {
1338 		if (rc == -EBUSY) {
1339 			/* Try again, this is not a permanent situation. */
1340 			spdk_thread_send_msg(spdk_get_thread(), nvmf_ns_change_msg, ctx);
1341 		} else {
1342 			free(ctx);
1343 			SPDK_ERRLOG("Unable to pause subsystem to process namespace removal!\n");
1344 		}
1345 	}
1346 }
1347 
1348 static void
1349 nvmf_ns_hot_remove(void *remove_ctx)
1350 {
1351 	struct spdk_nvmf_ns *ns = remove_ctx;
1352 	struct subsystem_ns_change_ctx *ns_ctx;
1353 	int rc;
1354 
1355 	/* We have to allocate a new context because this op
1356 	 * is asynchronous and we could lose the ns in the middle.
1357 	 */
1358 	ns_ctx = calloc(1, sizeof(struct subsystem_ns_change_ctx));
1359 	if (!ns_ctx) {
1360 		SPDK_ERRLOG("Unable to allocate context to process namespace removal!\n");
1361 		return;
1362 	}
1363 
1364 	ns_ctx->subsystem = ns->subsystem;
1365 	ns_ctx->nsid = ns->opts.nsid;
1366 	ns_ctx->cb_fn = _nvmf_ns_hot_remove;
1367 
1368 	rc = spdk_nvmf_subsystem_pause(ns->subsystem, ns_ctx->nsid, _nvmf_ns_hot_remove, ns_ctx);
1369 	if (rc) {
1370 		if (rc == -EBUSY) {
1371 			/* Try again, this is not a permanent situation. */
1372 			spdk_thread_send_msg(spdk_get_thread(), nvmf_ns_change_msg, ns_ctx);
1373 		} else {
1374 			SPDK_ERRLOG("Unable to pause subsystem to process namespace removal!\n");
1375 			free(ns_ctx);
1376 		}
1377 	}
1378 }
1379 
1380 static void
1381 _nvmf_ns_resize(struct spdk_nvmf_subsystem *subsystem, void *cb_arg, int status)
1382 {
1383 	struct subsystem_ns_change_ctx *ctx = cb_arg;
1384 
1385 	nvmf_subsystem_ns_changed(subsystem, ctx->nsid);
1386 	spdk_nvmf_subsystem_resume(subsystem, NULL, NULL);
1387 
1388 	free(ctx);
1389 }
1390 
1391 static void
1392 nvmf_ns_resize(void *event_ctx)
1393 {
1394 	struct spdk_nvmf_ns *ns = event_ctx;
1395 	struct subsystem_ns_change_ctx *ns_ctx;
1396 	int rc;
1397 
1398 	/* We have to allocate a new context because this op
1399 	 * is asynchronous and we could lose the ns in the middle.
1400 	 */
1401 	ns_ctx = calloc(1, sizeof(struct subsystem_ns_change_ctx));
1402 	if (!ns_ctx) {
1403 		SPDK_ERRLOG("Unable to allocate context to process namespace removal!\n");
1404 		return;
1405 	}
1406 
1407 	ns_ctx->subsystem = ns->subsystem;
1408 	ns_ctx->nsid = ns->opts.nsid;
1409 	ns_ctx->cb_fn = _nvmf_ns_resize;
1410 
1411 	/* Specify 0 for the nsid here, because we do not need to pause the namespace.
1412 	 * Namespaces can only be resized bigger, so there is no need to quiesce I/O.
1413 	 */
1414 	rc = spdk_nvmf_subsystem_pause(ns->subsystem, 0, _nvmf_ns_resize, ns_ctx);
1415 	if (rc) {
1416 		if (rc == -EBUSY) {
1417 			/* Try again, this is not a permanent situation. */
1418 			spdk_thread_send_msg(spdk_get_thread(), nvmf_ns_change_msg, ns_ctx);
1419 		} else {
1420 			SPDK_ERRLOG("Unable to pause subsystem to process namespace resize!\n");
1421 			free(ns_ctx);
1422 		}
1423 	}
1424 }
1425 
1426 static void
1427 nvmf_ns_event(enum spdk_bdev_event_type type,
1428 	      struct spdk_bdev *bdev,
1429 	      void *event_ctx)
1430 {
1431 	SPDK_DEBUGLOG(nvmf, "Bdev event: type %d, name %s, subsystem_id %d, ns_id %d\n",
1432 		      type,
1433 		      spdk_bdev_get_name(bdev),
1434 		      ((struct spdk_nvmf_ns *)event_ctx)->subsystem->id,
1435 		      ((struct spdk_nvmf_ns *)event_ctx)->nsid);
1436 
1437 	switch (type) {
1438 	case SPDK_BDEV_EVENT_REMOVE:
1439 		nvmf_ns_hot_remove(event_ctx);
1440 		break;
1441 	case SPDK_BDEV_EVENT_RESIZE:
1442 		nvmf_ns_resize(event_ctx);
1443 		break;
1444 	default:
1445 		SPDK_NOTICELOG("Unsupported bdev event: type %d\n", type);
1446 		break;
1447 	}
1448 }
1449 
1450 void
1451 spdk_nvmf_ns_opts_get_defaults(struct spdk_nvmf_ns_opts *opts, size_t opts_size)
1452 {
1453 	if (!opts) {
1454 		SPDK_ERRLOG("opts should not be NULL.\n");
1455 		return;
1456 	}
1457 
1458 	if (!opts_size) {
1459 		SPDK_ERRLOG("opts_size should not be zero.\n");
1460 		return;
1461 	}
1462 
1463 	memset(opts, 0, opts_size);
1464 	opts->opts_size = opts_size;
1465 
1466 #define FIELD_OK(field) \
1467 	offsetof(struct spdk_nvmf_ns_opts, field) + sizeof(opts->field) <= opts_size
1468 
1469 #define SET_FIELD(field, value) \
1470 	if (FIELD_OK(field)) { \
1471 		opts->field = value; \
1472 	} \
1473 
1474 	/* All current fields are set to 0 by default. */
1475 	SET_FIELD(nsid, 0);
1476 	if (FIELD_OK(nguid)) {
1477 		memset(opts->nguid, 0, sizeof(opts->nguid));
1478 	}
1479 	if (FIELD_OK(eui64)) {
1480 		memset(opts->eui64, 0, sizeof(opts->eui64));
1481 	}
1482 	if (FIELD_OK(uuid)) {
1483 		memset(&opts->uuid, 0, sizeof(opts->uuid));
1484 	}
1485 	SET_FIELD(anagrpid, 0);
1486 
1487 #undef FIELD_OK
1488 #undef SET_FIELD
1489 }
1490 
1491 static void
1492 nvmf_ns_opts_copy(struct spdk_nvmf_ns_opts *opts,
1493 		  const struct spdk_nvmf_ns_opts *user_opts,
1494 		  size_t opts_size)
1495 {
1496 #define FIELD_OK(field)	\
1497 	offsetof(struct spdk_nvmf_ns_opts, field) + sizeof(opts->field) <= user_opts->opts_size
1498 
1499 #define SET_FIELD(field) \
1500 	if (FIELD_OK(field)) { \
1501 		opts->field = user_opts->field;	\
1502 	} \
1503 
1504 	SET_FIELD(nsid);
1505 	if (FIELD_OK(nguid)) {
1506 		memcpy(opts->nguid, user_opts->nguid, sizeof(opts->nguid));
1507 	}
1508 	if (FIELD_OK(eui64)) {
1509 		memcpy(opts->eui64, user_opts->eui64, sizeof(opts->eui64));
1510 	}
1511 	if (FIELD_OK(uuid)) {
1512 		memcpy(&opts->uuid, &user_opts->uuid, sizeof(opts->uuid));
1513 	}
1514 	SET_FIELD(anagrpid);
1515 
1516 	opts->opts_size = user_opts->opts_size;
1517 
1518 	/* We should not remove this statement, but need to update the assert statement
1519 	 * if we add a new field, and also add a corresponding SET_FIELD statement.
1520 	 */
1521 	SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_ns_opts) == 64, "Incorrect size");
1522 
1523 #undef FIELD_OK
1524 #undef SET_FIELD
1525 }
1526 
1527 /* Dummy bdev module used to to claim bdevs. */
1528 static struct spdk_bdev_module ns_bdev_module = {
1529 	.name	= "NVMe-oF Target",
1530 };
1531 
1532 static int
1533 nvmf_ns_load_reservation(const char *file, struct spdk_nvmf_reservation_info *info);
1534 static int
1535 nvmf_ns_reservation_restore(struct spdk_nvmf_ns *ns, struct spdk_nvmf_reservation_info *info);
1536 
1537 uint32_t
1538 spdk_nvmf_subsystem_add_ns_ext(struct spdk_nvmf_subsystem *subsystem, const char *bdev_name,
1539 			       const struct spdk_nvmf_ns_opts *user_opts, size_t opts_size,
1540 			       const char *ptpl_file)
1541 {
1542 	struct spdk_nvmf_transport *transport;
1543 	struct spdk_nvmf_ns_opts opts;
1544 	struct spdk_nvmf_ns *ns;
1545 	struct spdk_nvmf_reservation_info info = {0};
1546 	int rc;
1547 
1548 	if (!(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE ||
1549 	      subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED)) {
1550 		return 0;
1551 	}
1552 
1553 	spdk_nvmf_ns_opts_get_defaults(&opts, sizeof(opts));
1554 	if (user_opts) {
1555 		nvmf_ns_opts_copy(&opts, user_opts, opts_size);
1556 	}
1557 
1558 	if (opts.nsid == SPDK_NVME_GLOBAL_NS_TAG) {
1559 		SPDK_ERRLOG("Invalid NSID %" PRIu32 "\n", opts.nsid);
1560 		return 0;
1561 	}
1562 
1563 	if (opts.nsid == 0) {
1564 		/*
1565 		 * NSID not specified - find a free index.
1566 		 *
1567 		 * If no free slots are found, opts.nsid will be subsystem->max_nsid + 1, which will
1568 		 * expand max_nsid if possible.
1569 		 */
1570 		for (opts.nsid = 1; opts.nsid <= subsystem->max_nsid; opts.nsid++) {
1571 			if (_nvmf_subsystem_get_ns(subsystem, opts.nsid) == NULL) {
1572 				break;
1573 			}
1574 		}
1575 	}
1576 
1577 	if (_nvmf_subsystem_get_ns(subsystem, opts.nsid)) {
1578 		SPDK_ERRLOG("Requested NSID %" PRIu32 " already in use\n", opts.nsid);
1579 		return 0;
1580 	}
1581 
1582 	if (opts.nsid > subsystem->max_nsid) {
1583 		SPDK_ERRLOG("NSID greater than maximum not allowed\n");
1584 		return 0;
1585 	}
1586 
1587 	if (opts.anagrpid == 0) {
1588 		opts.anagrpid = opts.nsid;
1589 	}
1590 
1591 	if (opts.anagrpid > subsystem->max_nsid) {
1592 		SPDK_ERRLOG("ANAGRPID greater than maximum NSID not allowed\n");
1593 		return 0;
1594 	}
1595 
1596 	ns = calloc(1, sizeof(*ns));
1597 	if (ns == NULL) {
1598 		SPDK_ERRLOG("Namespace allocation failed\n");
1599 		return 0;
1600 	}
1601 
1602 	rc = spdk_bdev_open_ext(bdev_name, true, nvmf_ns_event, ns, &ns->desc);
1603 	if (rc != 0) {
1604 		SPDK_ERRLOG("Subsystem %s: bdev %s cannot be opened, error=%d\n",
1605 			    subsystem->subnqn, bdev_name, rc);
1606 		free(ns);
1607 		return 0;
1608 	}
1609 
1610 	ns->bdev = spdk_bdev_desc_get_bdev(ns->desc);
1611 
1612 	if (spdk_bdev_get_md_size(ns->bdev) != 0 && !spdk_bdev_is_md_interleaved(ns->bdev)) {
1613 		SPDK_ERRLOG("Can't attach bdev with separate metadata.\n");
1614 		spdk_bdev_close(ns->desc);
1615 		free(ns);
1616 		return 0;
1617 	}
1618 
1619 	rc = spdk_bdev_module_claim_bdev(ns->bdev, ns->desc, &ns_bdev_module);
1620 	if (rc != 0) {
1621 		spdk_bdev_close(ns->desc);
1622 		free(ns);
1623 		return 0;
1624 	}
1625 
1626 	/* Cache the zcopy capability of the bdev device */
1627 	ns->zcopy = spdk_bdev_io_type_supported(ns->bdev, SPDK_BDEV_IO_TYPE_ZCOPY);
1628 
1629 	if (spdk_mem_all_zero(&opts.uuid, sizeof(opts.uuid))) {
1630 		opts.uuid = *spdk_bdev_get_uuid(ns->bdev);
1631 	}
1632 
1633 	/* if nguid descriptor is supported by bdev module (nvme) then uuid = nguid */
1634 	if (spdk_mem_all_zero(opts.nguid, sizeof(opts.nguid))) {
1635 		SPDK_STATIC_ASSERT(sizeof(opts.nguid) == sizeof(opts.uuid), "size mismatch");
1636 		memcpy(opts.nguid, spdk_bdev_get_uuid(ns->bdev), sizeof(opts.nguid));
1637 	}
1638 
1639 	ns->opts = opts;
1640 	ns->subsystem = subsystem;
1641 	subsystem->ns[opts.nsid - 1] = ns;
1642 	ns->nsid = opts.nsid;
1643 	ns->anagrpid = opts.anagrpid;
1644 	subsystem->ana_group[ns->anagrpid - 1]++;
1645 	TAILQ_INIT(&ns->registrants);
1646 	if (ptpl_file) {
1647 		rc = nvmf_ns_load_reservation(ptpl_file, &info);
1648 		if (!rc) {
1649 			rc = nvmf_ns_reservation_restore(ns, &info);
1650 			if (rc) {
1651 				SPDK_ERRLOG("Subsystem restore reservation failed\n");
1652 				goto err_ns_reservation_restore;
1653 			}
1654 		}
1655 		ns->ptpl_file = strdup(ptpl_file);
1656 		if (!ns->ptpl_file) {
1657 			SPDK_ERRLOG("Namespace ns->ptpl_file allocation failed\n");
1658 			goto err_strdup;
1659 		}
1660 	}
1661 
1662 	for (transport = spdk_nvmf_transport_get_first(subsystem->tgt); transport;
1663 	     transport = spdk_nvmf_transport_get_next(transport)) {
1664 		if (transport->ops->subsystem_add_ns) {
1665 			rc = transport->ops->subsystem_add_ns(transport, subsystem, ns);
1666 			if (rc) {
1667 				SPDK_ERRLOG("Namespace attachment is not allowed by %s transport\n", transport->ops->name);
1668 				goto err_subsystem_add_ns;
1669 			}
1670 		}
1671 	}
1672 
1673 	SPDK_DEBUGLOG(nvmf, "Subsystem %s: bdev %s assigned nsid %" PRIu32 "\n",
1674 		      spdk_nvmf_subsystem_get_nqn(subsystem),
1675 		      bdev_name,
1676 		      opts.nsid);
1677 
1678 	nvmf_subsystem_ns_changed(subsystem, opts.nsid);
1679 
1680 	return opts.nsid;
1681 
1682 err_subsystem_add_ns:
1683 	free(ns->ptpl_file);
1684 err_strdup:
1685 	nvmf_ns_reservation_clear_all_registrants(ns);
1686 err_ns_reservation_restore:
1687 	subsystem->ns[opts.nsid - 1] = NULL;
1688 	spdk_bdev_module_release_bdev(ns->bdev);
1689 	spdk_bdev_close(ns->desc);
1690 	free(ns);
1691 	return 0;
1692 
1693 }
1694 
1695 static uint32_t
1696 nvmf_subsystem_get_next_allocated_nsid(struct spdk_nvmf_subsystem *subsystem,
1697 				       uint32_t prev_nsid)
1698 {
1699 	uint32_t nsid;
1700 
1701 	if (prev_nsid >= subsystem->max_nsid) {
1702 		return 0;
1703 	}
1704 
1705 	for (nsid = prev_nsid + 1; nsid <= subsystem->max_nsid; nsid++) {
1706 		if (subsystem->ns[nsid - 1]) {
1707 			return nsid;
1708 		}
1709 	}
1710 
1711 	return 0;
1712 }
1713 
1714 struct spdk_nvmf_ns *
1715 spdk_nvmf_subsystem_get_first_ns(struct spdk_nvmf_subsystem *subsystem)
1716 {
1717 	uint32_t first_nsid;
1718 
1719 	first_nsid = nvmf_subsystem_get_next_allocated_nsid(subsystem, 0);
1720 	return _nvmf_subsystem_get_ns(subsystem, first_nsid);
1721 }
1722 
1723 struct spdk_nvmf_ns *
1724 spdk_nvmf_subsystem_get_next_ns(struct spdk_nvmf_subsystem *subsystem,
1725 				struct spdk_nvmf_ns *prev_ns)
1726 {
1727 	uint32_t next_nsid;
1728 
1729 	next_nsid = nvmf_subsystem_get_next_allocated_nsid(subsystem, prev_ns->opts.nsid);
1730 	return _nvmf_subsystem_get_ns(subsystem, next_nsid);
1731 }
1732 
1733 struct spdk_nvmf_ns *
1734 spdk_nvmf_subsystem_get_ns(struct spdk_nvmf_subsystem *subsystem, uint32_t nsid)
1735 {
1736 	return _nvmf_subsystem_get_ns(subsystem, nsid);
1737 }
1738 
1739 uint32_t
1740 spdk_nvmf_ns_get_id(const struct spdk_nvmf_ns *ns)
1741 {
1742 	return ns->opts.nsid;
1743 }
1744 
1745 struct spdk_bdev *
1746 spdk_nvmf_ns_get_bdev(struct spdk_nvmf_ns *ns)
1747 {
1748 	return ns->bdev;
1749 }
1750 
1751 void
1752 spdk_nvmf_ns_get_opts(const struct spdk_nvmf_ns *ns, struct spdk_nvmf_ns_opts *opts,
1753 		      size_t opts_size)
1754 {
1755 	memset(opts, 0, opts_size);
1756 	memcpy(opts, &ns->opts, spdk_min(sizeof(ns->opts), opts_size));
1757 }
1758 
1759 const char *
1760 spdk_nvmf_subsystem_get_sn(const struct spdk_nvmf_subsystem *subsystem)
1761 {
1762 	return subsystem->sn;
1763 }
1764 
1765 int
1766 spdk_nvmf_subsystem_set_sn(struct spdk_nvmf_subsystem *subsystem, const char *sn)
1767 {
1768 	size_t len, max_len;
1769 
1770 	max_len = sizeof(subsystem->sn) - 1;
1771 	len = strlen(sn);
1772 	if (len > max_len) {
1773 		SPDK_DEBUGLOG(nvmf, "Invalid sn \"%s\": length %zu > max %zu\n",
1774 			      sn, len, max_len);
1775 		return -1;
1776 	}
1777 
1778 	if (!nvmf_valid_ascii_string(sn, len)) {
1779 		SPDK_DEBUGLOG(nvmf, "Non-ASCII sn\n");
1780 		SPDK_LOGDUMP(nvmf, "sn", sn, len);
1781 		return -1;
1782 	}
1783 
1784 	snprintf(subsystem->sn, sizeof(subsystem->sn), "%s", sn);
1785 
1786 	return 0;
1787 }
1788 
1789 const char *
1790 spdk_nvmf_subsystem_get_mn(const struct spdk_nvmf_subsystem *subsystem)
1791 {
1792 	return subsystem->mn;
1793 }
1794 
1795 int
1796 spdk_nvmf_subsystem_set_mn(struct spdk_nvmf_subsystem *subsystem, const char *mn)
1797 {
1798 	size_t len, max_len;
1799 
1800 	if (mn == NULL) {
1801 		mn = MODEL_NUMBER_DEFAULT;
1802 	}
1803 	max_len = sizeof(subsystem->mn) - 1;
1804 	len = strlen(mn);
1805 	if (len > max_len) {
1806 		SPDK_DEBUGLOG(nvmf, "Invalid mn \"%s\": length %zu > max %zu\n",
1807 			      mn, len, max_len);
1808 		return -1;
1809 	}
1810 
1811 	if (!nvmf_valid_ascii_string(mn, len)) {
1812 		SPDK_DEBUGLOG(nvmf, "Non-ASCII mn\n");
1813 		SPDK_LOGDUMP(nvmf, "mn", mn, len);
1814 		return -1;
1815 	}
1816 
1817 	snprintf(subsystem->mn, sizeof(subsystem->mn), "%s", mn);
1818 
1819 	return 0;
1820 }
1821 
1822 const char *
1823 spdk_nvmf_subsystem_get_nqn(const struct spdk_nvmf_subsystem *subsystem)
1824 {
1825 	return subsystem->subnqn;
1826 }
1827 
1828 enum spdk_nvmf_subtype spdk_nvmf_subsystem_get_type(struct spdk_nvmf_subsystem *subsystem)
1829 {
1830 	return subsystem->subtype;
1831 }
1832 
1833 uint32_t
1834 spdk_nvmf_subsystem_get_max_nsid(struct spdk_nvmf_subsystem *subsystem)
1835 {
1836 	return subsystem->max_nsid;
1837 }
1838 
1839 int
1840 nvmf_subsystem_set_cntlid_range(struct spdk_nvmf_subsystem *subsystem,
1841 				uint16_t min_cntlid, uint16_t max_cntlid)
1842 {
1843 	if (subsystem->state != SPDK_NVMF_SUBSYSTEM_INACTIVE) {
1844 		return -EAGAIN;
1845 	}
1846 
1847 	if (min_cntlid > max_cntlid) {
1848 		return -EINVAL;
1849 	}
1850 	/* The spec reserves cntlid values in the range FFF0h to FFFFh. */
1851 	if (min_cntlid < NVMF_MIN_CNTLID || min_cntlid > NVMF_MAX_CNTLID ||
1852 	    max_cntlid < NVMF_MIN_CNTLID || max_cntlid > NVMF_MAX_CNTLID) {
1853 		return -EINVAL;
1854 	}
1855 	subsystem->min_cntlid = min_cntlid;
1856 	subsystem->max_cntlid = max_cntlid;
1857 	if (subsystem->next_cntlid < min_cntlid || subsystem->next_cntlid > max_cntlid - 1) {
1858 		subsystem->next_cntlid = min_cntlid - 1;
1859 	}
1860 
1861 	return 0;
1862 }
1863 
1864 static uint16_t
1865 nvmf_subsystem_gen_cntlid(struct spdk_nvmf_subsystem *subsystem)
1866 {
1867 	int count;
1868 
1869 	/*
1870 	 * In the worst case, we might have to try all CNTLID values between min_cntlid and max_cntlid
1871 	 * before we find one that is unused (or find that all values are in use).
1872 	 */
1873 	for (count = 0; count < subsystem->max_cntlid - subsystem->min_cntlid + 1; count++) {
1874 		subsystem->next_cntlid++;
1875 		if (subsystem->next_cntlid > subsystem->max_cntlid) {
1876 			subsystem->next_cntlid = subsystem->min_cntlid;
1877 		}
1878 
1879 		/* Check if a controller with this cntlid currently exists. */
1880 		if (nvmf_subsystem_get_ctrlr(subsystem, subsystem->next_cntlid) == NULL) {
1881 			/* Found unused cntlid */
1882 			return subsystem->next_cntlid;
1883 		}
1884 	}
1885 
1886 	/* All valid cntlid values are in use. */
1887 	return 0xFFFF;
1888 }
1889 
1890 int
1891 nvmf_subsystem_add_ctrlr(struct spdk_nvmf_subsystem *subsystem, struct spdk_nvmf_ctrlr *ctrlr)
1892 {
1893 	ctrlr->cntlid = nvmf_subsystem_gen_cntlid(subsystem);
1894 	if (ctrlr->cntlid == 0xFFFF) {
1895 		/* Unable to get a cntlid */
1896 		SPDK_ERRLOG("Reached max simultaneous ctrlrs\n");
1897 		return -EBUSY;
1898 	}
1899 
1900 	TAILQ_INSERT_TAIL(&subsystem->ctrlrs, ctrlr, link);
1901 
1902 	return 0;
1903 }
1904 
1905 void
1906 nvmf_subsystem_remove_ctrlr(struct spdk_nvmf_subsystem *subsystem,
1907 			    struct spdk_nvmf_ctrlr *ctrlr)
1908 {
1909 	assert(spdk_get_thread() == subsystem->thread);
1910 	assert(subsystem == ctrlr->subsys);
1911 	SPDK_DEBUGLOG(nvmf, "remove ctrlr %p from subsys %p %s\n", ctrlr, subsystem, subsystem->subnqn);
1912 	TAILQ_REMOVE(&subsystem->ctrlrs, ctrlr, link);
1913 }
1914 
1915 struct spdk_nvmf_ctrlr *
1916 nvmf_subsystem_get_ctrlr(struct spdk_nvmf_subsystem *subsystem, uint16_t cntlid)
1917 {
1918 	struct spdk_nvmf_ctrlr *ctrlr;
1919 
1920 	TAILQ_FOREACH(ctrlr, &subsystem->ctrlrs, link) {
1921 		if (ctrlr->cntlid == cntlid) {
1922 			return ctrlr;
1923 		}
1924 	}
1925 
1926 	return NULL;
1927 }
1928 
1929 uint32_t
1930 spdk_nvmf_subsystem_get_max_namespaces(const struct spdk_nvmf_subsystem *subsystem)
1931 {
1932 	return subsystem->max_nsid;
1933 }
1934 
1935 uint16_t
1936 spdk_nvmf_subsystem_get_min_cntlid(const struct spdk_nvmf_subsystem *subsystem)
1937 {
1938 	return subsystem->min_cntlid;
1939 }
1940 
1941 uint16_t
1942 spdk_nvmf_subsystem_get_max_cntlid(const struct spdk_nvmf_subsystem *subsystem)
1943 {
1944 	return subsystem->max_cntlid;
1945 }
1946 
1947 struct _nvmf_ns_registrant {
1948 	uint64_t		rkey;
1949 	char			*host_uuid;
1950 };
1951 
1952 struct _nvmf_ns_registrants {
1953 	size_t				num_regs;
1954 	struct _nvmf_ns_registrant	reg[SPDK_NVMF_MAX_NUM_REGISTRANTS];
1955 };
1956 
1957 struct _nvmf_ns_reservation {
1958 	bool					ptpl_activated;
1959 	enum spdk_nvme_reservation_type		rtype;
1960 	uint64_t				crkey;
1961 	char					*bdev_uuid;
1962 	char					*holder_uuid;
1963 	struct _nvmf_ns_registrants		regs;
1964 };
1965 
1966 static const struct spdk_json_object_decoder nvmf_ns_pr_reg_decoders[] = {
1967 	{"rkey", offsetof(struct _nvmf_ns_registrant, rkey), spdk_json_decode_uint64},
1968 	{"host_uuid", offsetof(struct _nvmf_ns_registrant, host_uuid), spdk_json_decode_string},
1969 };
1970 
1971 static int
1972 nvmf_decode_ns_pr_reg(const struct spdk_json_val *val, void *out)
1973 {
1974 	struct _nvmf_ns_registrant *reg = out;
1975 
1976 	return spdk_json_decode_object(val, nvmf_ns_pr_reg_decoders,
1977 				       SPDK_COUNTOF(nvmf_ns_pr_reg_decoders), reg);
1978 }
1979 
1980 static int
1981 nvmf_decode_ns_pr_regs(const struct spdk_json_val *val, void *out)
1982 {
1983 	struct _nvmf_ns_registrants *regs = out;
1984 
1985 	return spdk_json_decode_array(val, nvmf_decode_ns_pr_reg, regs->reg,
1986 				      SPDK_NVMF_MAX_NUM_REGISTRANTS, &regs->num_regs,
1987 				      sizeof(struct _nvmf_ns_registrant));
1988 }
1989 
1990 static const struct spdk_json_object_decoder nvmf_ns_pr_decoders[] = {
1991 	{"ptpl", offsetof(struct _nvmf_ns_reservation, ptpl_activated), spdk_json_decode_bool, true},
1992 	{"rtype", offsetof(struct _nvmf_ns_reservation, rtype), spdk_json_decode_uint32, true},
1993 	{"crkey", offsetof(struct _nvmf_ns_reservation, crkey), spdk_json_decode_uint64, true},
1994 	{"bdev_uuid", offsetof(struct _nvmf_ns_reservation, bdev_uuid), spdk_json_decode_string},
1995 	{"holder_uuid", offsetof(struct _nvmf_ns_reservation, holder_uuid), spdk_json_decode_string, true},
1996 	{"registrants", offsetof(struct _nvmf_ns_reservation, regs), nvmf_decode_ns_pr_regs},
1997 };
1998 
1999 static int
2000 nvmf_ns_load_reservation(const char *file, struct spdk_nvmf_reservation_info *info)
2001 {
2002 	FILE *fd;
2003 	size_t json_size;
2004 	ssize_t values_cnt, rc;
2005 	void *json = NULL, *end;
2006 	struct spdk_json_val *values = NULL;
2007 	struct _nvmf_ns_reservation res = {};
2008 	uint32_t i;
2009 
2010 	fd = fopen(file, "r");
2011 	/* It's not an error if the file does not exist */
2012 	if (!fd) {
2013 		SPDK_NOTICELOG("File %s does not exist\n", file);
2014 		return -ENOENT;
2015 	}
2016 
2017 	/* Load all persist file contents into a local buffer */
2018 	json = spdk_posix_file_load(fd, &json_size);
2019 	fclose(fd);
2020 	if (!json) {
2021 		SPDK_ERRLOG("Load persit file %s failed\n", file);
2022 		return -ENOMEM;
2023 	}
2024 
2025 	rc = spdk_json_parse(json, json_size, NULL, 0, &end, 0);
2026 	if (rc < 0) {
2027 		SPDK_NOTICELOG("Parsing JSON configuration failed (%zd)\n", rc);
2028 		goto exit;
2029 	}
2030 
2031 	values_cnt = rc;
2032 	values = calloc(values_cnt, sizeof(struct spdk_json_val));
2033 	if (values == NULL) {
2034 		goto exit;
2035 	}
2036 
2037 	rc = spdk_json_parse(json, json_size, values, values_cnt, &end, 0);
2038 	if (rc != values_cnt) {
2039 		SPDK_ERRLOG("Parsing JSON configuration failed (%zd)\n", rc);
2040 		goto exit;
2041 	}
2042 
2043 	/* Decode json */
2044 	if (spdk_json_decode_object(values, nvmf_ns_pr_decoders,
2045 				    SPDK_COUNTOF(nvmf_ns_pr_decoders),
2046 				    &res)) {
2047 		SPDK_ERRLOG("Invalid objects in the persist file %s\n", file);
2048 		rc = -EINVAL;
2049 		goto exit;
2050 	}
2051 
2052 	if (res.regs.num_regs > SPDK_NVMF_MAX_NUM_REGISTRANTS) {
2053 		SPDK_ERRLOG("Can only support up to %u registrants\n", SPDK_NVMF_MAX_NUM_REGISTRANTS);
2054 		rc = -ERANGE;
2055 		goto exit;
2056 	}
2057 
2058 	rc = 0;
2059 	info->ptpl_activated = res.ptpl_activated;
2060 	info->rtype = res.rtype;
2061 	info->crkey = res.crkey;
2062 	snprintf(info->bdev_uuid, sizeof(info->bdev_uuid), "%s", res.bdev_uuid);
2063 	snprintf(info->holder_uuid, sizeof(info->holder_uuid), "%s", res.holder_uuid);
2064 	info->num_regs = res.regs.num_regs;
2065 	for (i = 0; i < res.regs.num_regs; i++) {
2066 		info->registrants[i].rkey = res.regs.reg[i].rkey;
2067 		snprintf(info->registrants[i].host_uuid, sizeof(info->registrants[i].host_uuid), "%s",
2068 			 res.regs.reg[i].host_uuid);
2069 	}
2070 
2071 exit:
2072 	free(json);
2073 	free(values);
2074 	free(res.bdev_uuid);
2075 	free(res.holder_uuid);
2076 	for (i = 0; i < res.regs.num_regs; i++) {
2077 		free(res.regs.reg[i].host_uuid);
2078 	}
2079 
2080 	return rc;
2081 }
2082 
2083 static bool
2084 nvmf_ns_reservation_all_registrants_type(struct spdk_nvmf_ns *ns);
2085 
2086 static int
2087 nvmf_ns_reservation_restore(struct spdk_nvmf_ns *ns, struct spdk_nvmf_reservation_info *info)
2088 {
2089 	uint32_t i;
2090 	struct spdk_nvmf_registrant *reg, *holder = NULL;
2091 	struct spdk_uuid bdev_uuid, holder_uuid;
2092 
2093 	SPDK_DEBUGLOG(nvmf, "NSID %u, PTPL %u, Number of registrants %u\n",
2094 		      ns->nsid, info->ptpl_activated, info->num_regs);
2095 
2096 	/* it's not an error */
2097 	if (!info->ptpl_activated || !info->num_regs) {
2098 		return 0;
2099 	}
2100 
2101 	spdk_uuid_parse(&bdev_uuid, info->bdev_uuid);
2102 	if (spdk_uuid_compare(&bdev_uuid, spdk_bdev_get_uuid(ns->bdev))) {
2103 		SPDK_ERRLOG("Existing bdev UUID is not same with configuration file\n");
2104 		return -EINVAL;
2105 	}
2106 
2107 	ns->crkey = info->crkey;
2108 	ns->rtype = info->rtype;
2109 	ns->ptpl_activated = info->ptpl_activated;
2110 	spdk_uuid_parse(&holder_uuid, info->holder_uuid);
2111 
2112 	SPDK_DEBUGLOG(nvmf, "Bdev UUID %s\n", info->bdev_uuid);
2113 	if (info->rtype) {
2114 		SPDK_DEBUGLOG(nvmf, "Holder UUID %s, RTYPE %u, RKEY 0x%"PRIx64"\n",
2115 			      info->holder_uuid, info->rtype, info->crkey);
2116 	}
2117 
2118 	for (i = 0; i < info->num_regs; i++) {
2119 		reg = calloc(1, sizeof(*reg));
2120 		if (!reg) {
2121 			return -ENOMEM;
2122 		}
2123 		spdk_uuid_parse(&reg->hostid, info->registrants[i].host_uuid);
2124 		reg->rkey = info->registrants[i].rkey;
2125 		TAILQ_INSERT_TAIL(&ns->registrants, reg, link);
2126 		if (!spdk_uuid_compare(&holder_uuid, &reg->hostid)) {
2127 			holder = reg;
2128 		}
2129 		SPDK_DEBUGLOG(nvmf, "Registrant RKEY 0x%"PRIx64", Host UUID %s\n",
2130 			      info->registrants[i].rkey, info->registrants[i].host_uuid);
2131 	}
2132 
2133 	if (nvmf_ns_reservation_all_registrants_type(ns)) {
2134 		ns->holder = TAILQ_FIRST(&ns->registrants);
2135 	} else {
2136 		ns->holder = holder;
2137 	}
2138 
2139 	return 0;
2140 }
2141 
2142 static int
2143 nvmf_ns_json_write_cb(void *cb_ctx, const void *data, size_t size)
2144 {
2145 	char *file = cb_ctx;
2146 	size_t rc;
2147 	FILE *fd;
2148 
2149 	fd = fopen(file, "w");
2150 	if (!fd) {
2151 		SPDK_ERRLOG("Can't open file %s for write\n", file);
2152 		return -ENOENT;
2153 	}
2154 	rc = fwrite(data, 1, size, fd);
2155 	fclose(fd);
2156 
2157 	return rc == size ? 0 : -1;
2158 }
2159 
2160 static int
2161 nvmf_ns_reservation_update(const char *file, struct spdk_nvmf_reservation_info *info)
2162 {
2163 	struct spdk_json_write_ctx *w;
2164 	uint32_t i;
2165 	int rc = 0;
2166 
2167 	w = spdk_json_write_begin(nvmf_ns_json_write_cb, (void *)file, 0);
2168 	if (w == NULL) {
2169 		return -ENOMEM;
2170 	}
2171 	/* clear the configuration file */
2172 	if (!info->ptpl_activated) {
2173 		goto exit;
2174 	}
2175 
2176 	spdk_json_write_object_begin(w);
2177 	spdk_json_write_named_bool(w, "ptpl", info->ptpl_activated);
2178 	spdk_json_write_named_uint32(w, "rtype", info->rtype);
2179 	spdk_json_write_named_uint64(w, "crkey", info->crkey);
2180 	spdk_json_write_named_string(w, "bdev_uuid", info->bdev_uuid);
2181 	spdk_json_write_named_string(w, "holder_uuid", info->holder_uuid);
2182 
2183 	spdk_json_write_named_array_begin(w, "registrants");
2184 	for (i = 0; i < info->num_regs; i++) {
2185 		spdk_json_write_object_begin(w);
2186 		spdk_json_write_named_uint64(w, "rkey", info->registrants[i].rkey);
2187 		spdk_json_write_named_string(w, "host_uuid", info->registrants[i].host_uuid);
2188 		spdk_json_write_object_end(w);
2189 	}
2190 	spdk_json_write_array_end(w);
2191 	spdk_json_write_object_end(w);
2192 
2193 exit:
2194 	rc = spdk_json_write_end(w);
2195 	return rc;
2196 }
2197 
2198 static int
2199 nvmf_ns_update_reservation_info(struct spdk_nvmf_ns *ns)
2200 {
2201 	struct spdk_nvmf_reservation_info info;
2202 	struct spdk_nvmf_registrant *reg, *tmp;
2203 	uint32_t i = 0;
2204 
2205 	assert(ns != NULL);
2206 
2207 	if (!ns->bdev || !ns->ptpl_file) {
2208 		return 0;
2209 	}
2210 
2211 	memset(&info, 0, sizeof(info));
2212 	spdk_uuid_fmt_lower(info.bdev_uuid, sizeof(info.bdev_uuid), spdk_bdev_get_uuid(ns->bdev));
2213 
2214 	if (ns->rtype) {
2215 		info.rtype = ns->rtype;
2216 		info.crkey = ns->crkey;
2217 		if (!nvmf_ns_reservation_all_registrants_type(ns)) {
2218 			assert(ns->holder != NULL);
2219 			spdk_uuid_fmt_lower(info.holder_uuid, sizeof(info.holder_uuid), &ns->holder->hostid);
2220 		}
2221 	}
2222 
2223 	TAILQ_FOREACH_SAFE(reg, &ns->registrants, link, tmp) {
2224 		spdk_uuid_fmt_lower(info.registrants[i].host_uuid, sizeof(info.registrants[i].host_uuid),
2225 				    &reg->hostid);
2226 		info.registrants[i++].rkey = reg->rkey;
2227 	}
2228 
2229 	info.num_regs = i;
2230 	info.ptpl_activated = ns->ptpl_activated;
2231 
2232 	return nvmf_ns_reservation_update(ns->ptpl_file, &info);
2233 }
2234 
2235 static struct spdk_nvmf_registrant *
2236 nvmf_ns_reservation_get_registrant(struct spdk_nvmf_ns *ns,
2237 				   struct spdk_uuid *uuid)
2238 {
2239 	struct spdk_nvmf_registrant *reg, *tmp;
2240 
2241 	TAILQ_FOREACH_SAFE(reg, &ns->registrants, link, tmp) {
2242 		if (!spdk_uuid_compare(&reg->hostid, uuid)) {
2243 			return reg;
2244 		}
2245 	}
2246 
2247 	return NULL;
2248 }
2249 
2250 /* Generate reservation notice log to registered HostID controllers */
2251 static void
2252 nvmf_subsystem_gen_ctrlr_notification(struct spdk_nvmf_subsystem *subsystem,
2253 				      struct spdk_nvmf_ns *ns,
2254 				      struct spdk_uuid *hostid_list,
2255 				      uint32_t num_hostid,
2256 				      enum spdk_nvme_reservation_notification_log_page_type type)
2257 {
2258 	struct spdk_nvmf_ctrlr *ctrlr;
2259 	uint32_t i;
2260 
2261 	for (i = 0; i < num_hostid; i++) {
2262 		TAILQ_FOREACH(ctrlr, &subsystem->ctrlrs, link) {
2263 			if (!spdk_uuid_compare(&ctrlr->hostid, &hostid_list[i])) {
2264 				nvmf_ctrlr_reservation_notice_log(ctrlr, ns, type);
2265 			}
2266 		}
2267 	}
2268 }
2269 
2270 /* Get all registrants' hostid other than the controller who issued the command */
2271 static uint32_t
2272 nvmf_ns_reservation_get_all_other_hostid(struct spdk_nvmf_ns *ns,
2273 		struct spdk_uuid *hostid_list,
2274 		uint32_t max_num_hostid,
2275 		struct spdk_uuid *current_hostid)
2276 {
2277 	struct spdk_nvmf_registrant *reg, *tmp;
2278 	uint32_t num_hostid = 0;
2279 
2280 	TAILQ_FOREACH_SAFE(reg, &ns->registrants, link, tmp) {
2281 		if (spdk_uuid_compare(&reg->hostid, current_hostid)) {
2282 			if (num_hostid == max_num_hostid) {
2283 				assert(false);
2284 				return max_num_hostid;
2285 			}
2286 			hostid_list[num_hostid++] = reg->hostid;
2287 		}
2288 	}
2289 
2290 	return num_hostid;
2291 }
2292 
2293 /* Calculate the unregistered HostID list according to list
2294  * prior to execute preempt command and list after executing
2295  * preempt command.
2296  */
2297 static uint32_t
2298 nvmf_ns_reservation_get_unregistered_hostid(struct spdk_uuid *old_hostid_list,
2299 		uint32_t old_num_hostid,
2300 		struct spdk_uuid *remaining_hostid_list,
2301 		uint32_t remaining_num_hostid)
2302 {
2303 	struct spdk_uuid temp_hostid_list[SPDK_NVMF_MAX_NUM_REGISTRANTS];
2304 	uint32_t i, j, num_hostid = 0;
2305 	bool found;
2306 
2307 	if (!remaining_num_hostid) {
2308 		return old_num_hostid;
2309 	}
2310 
2311 	for (i = 0; i < old_num_hostid; i++) {
2312 		found = false;
2313 		for (j = 0; j < remaining_num_hostid; j++) {
2314 			if (!spdk_uuid_compare(&old_hostid_list[i], &remaining_hostid_list[j])) {
2315 				found = true;
2316 				break;
2317 			}
2318 		}
2319 		if (!found) {
2320 			spdk_uuid_copy(&temp_hostid_list[num_hostid++], &old_hostid_list[i]);
2321 		}
2322 	}
2323 
2324 	if (num_hostid) {
2325 		memcpy(old_hostid_list, temp_hostid_list, sizeof(struct spdk_uuid) * num_hostid);
2326 	}
2327 
2328 	return num_hostid;
2329 }
2330 
2331 /* current reservation type is all registrants or not */
2332 static bool
2333 nvmf_ns_reservation_all_registrants_type(struct spdk_nvmf_ns *ns)
2334 {
2335 	return (ns->rtype == SPDK_NVME_RESERVE_WRITE_EXCLUSIVE_ALL_REGS ||
2336 		ns->rtype == SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS_ALL_REGS);
2337 }
2338 
2339 /* current registrant is reservation holder or not */
2340 static bool
2341 nvmf_ns_reservation_registrant_is_holder(struct spdk_nvmf_ns *ns,
2342 		struct spdk_nvmf_registrant *reg)
2343 {
2344 	if (!reg) {
2345 		return false;
2346 	}
2347 
2348 	if (nvmf_ns_reservation_all_registrants_type(ns)) {
2349 		return true;
2350 	}
2351 
2352 	return (ns->holder == reg);
2353 }
2354 
2355 static int
2356 nvmf_ns_reservation_add_registrant(struct spdk_nvmf_ns *ns,
2357 				   struct spdk_nvmf_ctrlr *ctrlr,
2358 				   uint64_t nrkey)
2359 {
2360 	struct spdk_nvmf_registrant *reg;
2361 
2362 	reg = calloc(1, sizeof(*reg));
2363 	if (!reg) {
2364 		return -ENOMEM;
2365 	}
2366 
2367 	reg->rkey = nrkey;
2368 	/* set hostid for the registrant */
2369 	spdk_uuid_copy(&reg->hostid, &ctrlr->hostid);
2370 	TAILQ_INSERT_TAIL(&ns->registrants, reg, link);
2371 	ns->gen++;
2372 
2373 	return 0;
2374 }
2375 
2376 static void
2377 nvmf_ns_reservation_release_reservation(struct spdk_nvmf_ns *ns)
2378 {
2379 	ns->rtype = 0;
2380 	ns->crkey = 0;
2381 	ns->holder = NULL;
2382 }
2383 
2384 /* release the reservation if the last registrant was removed */
2385 static void
2386 nvmf_ns_reservation_check_release_on_remove_registrant(struct spdk_nvmf_ns *ns,
2387 		struct spdk_nvmf_registrant *reg)
2388 {
2389 	struct spdk_nvmf_registrant *next_reg;
2390 
2391 	/* no reservation holder */
2392 	if (!ns->holder) {
2393 		assert(ns->rtype == 0);
2394 		return;
2395 	}
2396 
2397 	next_reg = TAILQ_FIRST(&ns->registrants);
2398 	if (next_reg && nvmf_ns_reservation_all_registrants_type(ns)) {
2399 		/* the next valid registrant is the new holder now */
2400 		ns->holder = next_reg;
2401 	} else if (nvmf_ns_reservation_registrant_is_holder(ns, reg)) {
2402 		/* release the reservation */
2403 		nvmf_ns_reservation_release_reservation(ns);
2404 	}
2405 }
2406 
2407 static void
2408 nvmf_ns_reservation_remove_registrant(struct spdk_nvmf_ns *ns,
2409 				      struct spdk_nvmf_registrant *reg)
2410 {
2411 	TAILQ_REMOVE(&ns->registrants, reg, link);
2412 	nvmf_ns_reservation_check_release_on_remove_registrant(ns, reg);
2413 	free(reg);
2414 	ns->gen++;
2415 	return;
2416 }
2417 
2418 static uint32_t
2419 nvmf_ns_reservation_remove_registrants_by_key(struct spdk_nvmf_ns *ns,
2420 		uint64_t rkey)
2421 {
2422 	struct spdk_nvmf_registrant *reg, *tmp;
2423 	uint32_t count = 0;
2424 
2425 	TAILQ_FOREACH_SAFE(reg, &ns->registrants, link, tmp) {
2426 		if (reg->rkey == rkey) {
2427 			nvmf_ns_reservation_remove_registrant(ns, reg);
2428 			count++;
2429 		}
2430 	}
2431 	return count;
2432 }
2433 
2434 static uint32_t
2435 nvmf_ns_reservation_remove_all_other_registrants(struct spdk_nvmf_ns *ns,
2436 		struct spdk_nvmf_registrant *reg)
2437 {
2438 	struct spdk_nvmf_registrant *reg_tmp, *reg_tmp2;
2439 	uint32_t count = 0;
2440 
2441 	TAILQ_FOREACH_SAFE(reg_tmp, &ns->registrants, link, reg_tmp2) {
2442 		if (reg_tmp != reg) {
2443 			nvmf_ns_reservation_remove_registrant(ns, reg_tmp);
2444 			count++;
2445 		}
2446 	}
2447 	return count;
2448 }
2449 
2450 static uint32_t
2451 nvmf_ns_reservation_clear_all_registrants(struct spdk_nvmf_ns *ns)
2452 {
2453 	struct spdk_nvmf_registrant *reg, *reg_tmp;
2454 	uint32_t count = 0;
2455 
2456 	TAILQ_FOREACH_SAFE(reg, &ns->registrants, link, reg_tmp) {
2457 		nvmf_ns_reservation_remove_registrant(ns, reg);
2458 		count++;
2459 	}
2460 	return count;
2461 }
2462 
2463 static void
2464 nvmf_ns_reservation_acquire_reservation(struct spdk_nvmf_ns *ns, uint64_t rkey,
2465 					enum spdk_nvme_reservation_type rtype,
2466 					struct spdk_nvmf_registrant *holder)
2467 {
2468 	ns->rtype = rtype;
2469 	ns->crkey = rkey;
2470 	assert(ns->holder == NULL);
2471 	ns->holder = holder;
2472 }
2473 
2474 static bool
2475 nvmf_ns_reservation_register(struct spdk_nvmf_ns *ns,
2476 			     struct spdk_nvmf_ctrlr *ctrlr,
2477 			     struct spdk_nvmf_request *req)
2478 {
2479 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
2480 	uint8_t rrega, iekey, cptpl, rtype;
2481 	struct spdk_nvme_reservation_register_data key;
2482 	struct spdk_nvmf_registrant *reg;
2483 	uint8_t status = SPDK_NVME_SC_SUCCESS;
2484 	bool update_sgroup = false;
2485 	struct spdk_uuid hostid_list[SPDK_NVMF_MAX_NUM_REGISTRANTS];
2486 	uint32_t num_hostid = 0;
2487 	int rc;
2488 
2489 	rrega = cmd->cdw10_bits.resv_register.rrega;
2490 	iekey = cmd->cdw10_bits.resv_register.iekey;
2491 	cptpl = cmd->cdw10_bits.resv_register.cptpl;
2492 
2493 	if (req->data && req->length >= sizeof(key)) {
2494 		memcpy(&key, req->data, sizeof(key));
2495 	} else {
2496 		SPDK_ERRLOG("No key provided. Failing request.\n");
2497 		status = SPDK_NVME_SC_INVALID_FIELD;
2498 		goto exit;
2499 	}
2500 
2501 	SPDK_DEBUGLOG(nvmf, "REGISTER: RREGA %u, IEKEY %u, CPTPL %u, "
2502 		      "NRKEY 0x%"PRIx64", NRKEY 0x%"PRIx64"\n",
2503 		      rrega, iekey, cptpl, key.crkey, key.nrkey);
2504 
2505 	if (cptpl == SPDK_NVME_RESERVE_PTPL_CLEAR_POWER_ON) {
2506 		/* Ture to OFF state, and need to be updated in the configuration file */
2507 		if (ns->ptpl_activated) {
2508 			ns->ptpl_activated = 0;
2509 			update_sgroup = true;
2510 		}
2511 	} else if (cptpl == SPDK_NVME_RESERVE_PTPL_PERSIST_POWER_LOSS) {
2512 		if (ns->ptpl_file == NULL) {
2513 			status = SPDK_NVME_SC_INVALID_FIELD;
2514 			goto exit;
2515 		} else if (ns->ptpl_activated == 0) {
2516 			ns->ptpl_activated = 1;
2517 			update_sgroup = true;
2518 		}
2519 	}
2520 
2521 	/* current Host Identifier has registrant or not */
2522 	reg = nvmf_ns_reservation_get_registrant(ns, &ctrlr->hostid);
2523 
2524 	switch (rrega) {
2525 	case SPDK_NVME_RESERVE_REGISTER_KEY:
2526 		if (!reg) {
2527 			/* register new controller */
2528 			if (key.nrkey == 0) {
2529 				SPDK_ERRLOG("Can't register zeroed new key\n");
2530 				status = SPDK_NVME_SC_INVALID_FIELD;
2531 				goto exit;
2532 			}
2533 			rc = nvmf_ns_reservation_add_registrant(ns, ctrlr, key.nrkey);
2534 			if (rc < 0) {
2535 				status = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
2536 				goto exit;
2537 			}
2538 			update_sgroup = true;
2539 		} else {
2540 			/* register with same key is not an error */
2541 			if (reg->rkey != key.nrkey) {
2542 				SPDK_ERRLOG("The same host already register a "
2543 					    "key with 0x%"PRIx64"\n",
2544 					    reg->rkey);
2545 				status = SPDK_NVME_SC_RESERVATION_CONFLICT;
2546 				goto exit;
2547 			}
2548 		}
2549 		break;
2550 	case SPDK_NVME_RESERVE_UNREGISTER_KEY:
2551 		if (!reg || (!iekey && reg->rkey != key.crkey)) {
2552 			SPDK_ERRLOG("No registrant or current key doesn't match "
2553 				    "with existing registrant key\n");
2554 			status = SPDK_NVME_SC_RESERVATION_CONFLICT;
2555 			goto exit;
2556 		}
2557 
2558 		rtype = ns->rtype;
2559 		num_hostid = nvmf_ns_reservation_get_all_other_hostid(ns, hostid_list,
2560 				SPDK_NVMF_MAX_NUM_REGISTRANTS,
2561 				&ctrlr->hostid);
2562 
2563 		nvmf_ns_reservation_remove_registrant(ns, reg);
2564 
2565 		if (!ns->rtype && num_hostid && (rtype == SPDK_NVME_RESERVE_WRITE_EXCLUSIVE_REG_ONLY ||
2566 						 rtype == SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS_REG_ONLY)) {
2567 			nvmf_subsystem_gen_ctrlr_notification(ns->subsystem, ns,
2568 							      hostid_list,
2569 							      num_hostid,
2570 							      SPDK_NVME_RESERVATION_RELEASED);
2571 		}
2572 		update_sgroup = true;
2573 		break;
2574 	case SPDK_NVME_RESERVE_REPLACE_KEY:
2575 		if (key.nrkey == 0) {
2576 			SPDK_ERRLOG("Can't register zeroed new key\n");
2577 			status = SPDK_NVME_SC_INVALID_FIELD;
2578 			goto exit;
2579 		}
2580 		/* Registrant exists */
2581 		if (reg) {
2582 			if (!iekey && reg->rkey != key.crkey) {
2583 				SPDK_ERRLOG("Current key doesn't match "
2584 					    "existing registrant key\n");
2585 				status = SPDK_NVME_SC_RESERVATION_CONFLICT;
2586 				goto exit;
2587 			}
2588 			if (reg->rkey == key.nrkey) {
2589 				goto exit;
2590 			}
2591 			reg->rkey = key.nrkey;
2592 		} else if (iekey) { /* No registrant but IEKEY is set */
2593 			/* new registrant */
2594 			rc = nvmf_ns_reservation_add_registrant(ns, ctrlr, key.nrkey);
2595 			if (rc < 0) {
2596 				status = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
2597 				goto exit;
2598 			}
2599 		} else { /* No registrant */
2600 			SPDK_ERRLOG("No registrant\n");
2601 			status = SPDK_NVME_SC_RESERVATION_CONFLICT;
2602 			goto exit;
2603 
2604 		}
2605 		update_sgroup = true;
2606 		break;
2607 	default:
2608 		status = SPDK_NVME_SC_INVALID_FIELD;
2609 		goto exit;
2610 	}
2611 
2612 exit:
2613 	if (update_sgroup) {
2614 		rc = nvmf_ns_update_reservation_info(ns);
2615 		if (rc != 0) {
2616 			status = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
2617 		}
2618 	}
2619 	req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
2620 	req->rsp->nvme_cpl.status.sc = status;
2621 	return update_sgroup;
2622 }
2623 
2624 static bool
2625 nvmf_ns_reservation_acquire(struct spdk_nvmf_ns *ns,
2626 			    struct spdk_nvmf_ctrlr *ctrlr,
2627 			    struct spdk_nvmf_request *req)
2628 {
2629 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
2630 	uint8_t racqa, iekey, rtype;
2631 	struct spdk_nvme_reservation_acquire_data key;
2632 	struct spdk_nvmf_registrant *reg;
2633 	bool all_regs = false;
2634 	uint32_t count = 0;
2635 	bool update_sgroup = true;
2636 	struct spdk_uuid hostid_list[SPDK_NVMF_MAX_NUM_REGISTRANTS];
2637 	uint32_t num_hostid = 0;
2638 	struct spdk_uuid new_hostid_list[SPDK_NVMF_MAX_NUM_REGISTRANTS];
2639 	uint32_t new_num_hostid = 0;
2640 	bool reservation_released = false;
2641 	uint8_t status = SPDK_NVME_SC_SUCCESS;
2642 
2643 	racqa = cmd->cdw10_bits.resv_acquire.racqa;
2644 	iekey = cmd->cdw10_bits.resv_acquire.iekey;
2645 	rtype = cmd->cdw10_bits.resv_acquire.rtype;
2646 
2647 	if (req->data && req->length >= sizeof(key)) {
2648 		memcpy(&key, req->data, sizeof(key));
2649 	} else {
2650 		SPDK_ERRLOG("No key provided. Failing request.\n");
2651 		status = SPDK_NVME_SC_INVALID_FIELD;
2652 		goto exit;
2653 	}
2654 
2655 	SPDK_DEBUGLOG(nvmf, "ACQUIRE: RACQA %u, IEKEY %u, RTYPE %u, "
2656 		      "NRKEY 0x%"PRIx64", PRKEY 0x%"PRIx64"\n",
2657 		      racqa, iekey, rtype, key.crkey, key.prkey);
2658 
2659 	if (iekey || rtype > SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS_ALL_REGS) {
2660 		SPDK_ERRLOG("Ignore existing key field set to 1\n");
2661 		status = SPDK_NVME_SC_INVALID_FIELD;
2662 		update_sgroup = false;
2663 		goto exit;
2664 	}
2665 
2666 	reg = nvmf_ns_reservation_get_registrant(ns, &ctrlr->hostid);
2667 	/* must be registrant and CRKEY must match */
2668 	if (!reg || reg->rkey != key.crkey) {
2669 		SPDK_ERRLOG("No registrant or current key doesn't match "
2670 			    "with existing registrant key\n");
2671 		status = SPDK_NVME_SC_RESERVATION_CONFLICT;
2672 		update_sgroup = false;
2673 		goto exit;
2674 	}
2675 
2676 	all_regs = nvmf_ns_reservation_all_registrants_type(ns);
2677 
2678 	switch (racqa) {
2679 	case SPDK_NVME_RESERVE_ACQUIRE:
2680 		/* it's not an error for the holder to acquire same reservation type again */
2681 		if (nvmf_ns_reservation_registrant_is_holder(ns, reg) && ns->rtype == rtype) {
2682 			/* do nothing */
2683 			update_sgroup = false;
2684 		} else if (ns->holder == NULL) {
2685 			/* fisrt time to acquire the reservation */
2686 			nvmf_ns_reservation_acquire_reservation(ns, key.crkey, rtype, reg);
2687 		} else {
2688 			SPDK_ERRLOG("Invalid rtype or current registrant is not holder\n");
2689 			status = SPDK_NVME_SC_RESERVATION_CONFLICT;
2690 			update_sgroup = false;
2691 			goto exit;
2692 		}
2693 		break;
2694 	case SPDK_NVME_RESERVE_PREEMPT:
2695 		/* no reservation holder */
2696 		if (!ns->holder) {
2697 			/* unregister with PRKEY */
2698 			nvmf_ns_reservation_remove_registrants_by_key(ns, key.prkey);
2699 			break;
2700 		}
2701 		num_hostid = nvmf_ns_reservation_get_all_other_hostid(ns, hostid_list,
2702 				SPDK_NVMF_MAX_NUM_REGISTRANTS,
2703 				&ctrlr->hostid);
2704 
2705 		/* only 1 reservation holder and reservation key is valid */
2706 		if (!all_regs) {
2707 			/* preempt itself */
2708 			if (nvmf_ns_reservation_registrant_is_holder(ns, reg) &&
2709 			    ns->crkey == key.prkey) {
2710 				ns->rtype = rtype;
2711 				reservation_released = true;
2712 				break;
2713 			}
2714 
2715 			if (ns->crkey == key.prkey) {
2716 				nvmf_ns_reservation_remove_registrant(ns, ns->holder);
2717 				nvmf_ns_reservation_acquire_reservation(ns, key.crkey, rtype, reg);
2718 				reservation_released = true;
2719 			} else if (key.prkey != 0) {
2720 				nvmf_ns_reservation_remove_registrants_by_key(ns, key.prkey);
2721 			} else {
2722 				/* PRKEY is zero */
2723 				SPDK_ERRLOG("Current PRKEY is zero\n");
2724 				status = SPDK_NVME_SC_RESERVATION_CONFLICT;
2725 				update_sgroup = false;
2726 				goto exit;
2727 			}
2728 		} else {
2729 			/* release all other registrants except for the current one */
2730 			if (key.prkey == 0) {
2731 				nvmf_ns_reservation_remove_all_other_registrants(ns, reg);
2732 				assert(ns->holder == reg);
2733 			} else {
2734 				count = nvmf_ns_reservation_remove_registrants_by_key(ns, key.prkey);
2735 				if (count == 0) {
2736 					SPDK_ERRLOG("PRKEY doesn't match any registrant\n");
2737 					status = SPDK_NVME_SC_RESERVATION_CONFLICT;
2738 					update_sgroup = false;
2739 					goto exit;
2740 				}
2741 			}
2742 		}
2743 		break;
2744 	default:
2745 		status = SPDK_NVME_SC_INVALID_FIELD;
2746 		update_sgroup = false;
2747 		break;
2748 	}
2749 
2750 exit:
2751 	if (update_sgroup && racqa == SPDK_NVME_RESERVE_PREEMPT) {
2752 		new_num_hostid = nvmf_ns_reservation_get_all_other_hostid(ns, new_hostid_list,
2753 				 SPDK_NVMF_MAX_NUM_REGISTRANTS,
2754 				 &ctrlr->hostid);
2755 		/* Preempt notification occurs on the unregistered controllers
2756 		 * other than the controller who issued the command.
2757 		 */
2758 		num_hostid = nvmf_ns_reservation_get_unregistered_hostid(hostid_list,
2759 				num_hostid,
2760 				new_hostid_list,
2761 				new_num_hostid);
2762 		if (num_hostid) {
2763 			nvmf_subsystem_gen_ctrlr_notification(ns->subsystem, ns,
2764 							      hostid_list,
2765 							      num_hostid,
2766 							      SPDK_NVME_REGISTRATION_PREEMPTED);
2767 
2768 		}
2769 		/* Reservation released notification occurs on the
2770 		 * controllers which are the remaining registrants other than
2771 		 * the controller who issued the command.
2772 		 */
2773 		if (reservation_released && new_num_hostid) {
2774 			nvmf_subsystem_gen_ctrlr_notification(ns->subsystem, ns,
2775 							      new_hostid_list,
2776 							      new_num_hostid,
2777 							      SPDK_NVME_RESERVATION_RELEASED);
2778 
2779 		}
2780 	}
2781 	if (update_sgroup && ns->ptpl_activated) {
2782 		if (nvmf_ns_update_reservation_info(ns)) {
2783 			status = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
2784 		}
2785 	}
2786 	req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
2787 	req->rsp->nvme_cpl.status.sc = status;
2788 	return update_sgroup;
2789 }
2790 
2791 static bool
2792 nvmf_ns_reservation_release(struct spdk_nvmf_ns *ns,
2793 			    struct spdk_nvmf_ctrlr *ctrlr,
2794 			    struct spdk_nvmf_request *req)
2795 {
2796 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
2797 	uint8_t rrela, iekey, rtype;
2798 	struct spdk_nvmf_registrant *reg;
2799 	uint64_t crkey;
2800 	uint8_t status = SPDK_NVME_SC_SUCCESS;
2801 	bool update_sgroup = true;
2802 	struct spdk_uuid hostid_list[SPDK_NVMF_MAX_NUM_REGISTRANTS];
2803 	uint32_t num_hostid = 0;
2804 
2805 	rrela = cmd->cdw10_bits.resv_release.rrela;
2806 	iekey = cmd->cdw10_bits.resv_release.iekey;
2807 	rtype = cmd->cdw10_bits.resv_release.rtype;
2808 
2809 	if (req->data && req->length >= sizeof(crkey)) {
2810 		memcpy(&crkey, req->data, sizeof(crkey));
2811 	} else {
2812 		SPDK_ERRLOG("No key provided. Failing request.\n");
2813 		status = SPDK_NVME_SC_INVALID_FIELD;
2814 		goto exit;
2815 	}
2816 
2817 	SPDK_DEBUGLOG(nvmf, "RELEASE: RRELA %u, IEKEY %u, RTYPE %u, "
2818 		      "CRKEY 0x%"PRIx64"\n",  rrela, iekey, rtype, crkey);
2819 
2820 	if (iekey) {
2821 		SPDK_ERRLOG("Ignore existing key field set to 1\n");
2822 		status = SPDK_NVME_SC_INVALID_FIELD;
2823 		update_sgroup = false;
2824 		goto exit;
2825 	}
2826 
2827 	reg = nvmf_ns_reservation_get_registrant(ns, &ctrlr->hostid);
2828 	if (!reg || reg->rkey != crkey) {
2829 		SPDK_ERRLOG("No registrant or current key doesn't match "
2830 			    "with existing registrant key\n");
2831 		status = SPDK_NVME_SC_RESERVATION_CONFLICT;
2832 		update_sgroup = false;
2833 		goto exit;
2834 	}
2835 
2836 	num_hostid = nvmf_ns_reservation_get_all_other_hostid(ns, hostid_list,
2837 			SPDK_NVMF_MAX_NUM_REGISTRANTS,
2838 			&ctrlr->hostid);
2839 
2840 	switch (rrela) {
2841 	case SPDK_NVME_RESERVE_RELEASE:
2842 		if (!ns->holder) {
2843 			SPDK_DEBUGLOG(nvmf, "RELEASE: no holder\n");
2844 			update_sgroup = false;
2845 			goto exit;
2846 		}
2847 		if (ns->rtype != rtype) {
2848 			SPDK_ERRLOG("Type doesn't match\n");
2849 			status = SPDK_NVME_SC_INVALID_FIELD;
2850 			update_sgroup = false;
2851 			goto exit;
2852 		}
2853 		if (!nvmf_ns_reservation_registrant_is_holder(ns, reg)) {
2854 			/* not the reservation holder, this isn't an error */
2855 			update_sgroup = false;
2856 			goto exit;
2857 		}
2858 
2859 		rtype = ns->rtype;
2860 		nvmf_ns_reservation_release_reservation(ns);
2861 
2862 		if (num_hostid && rtype != SPDK_NVME_RESERVE_WRITE_EXCLUSIVE &&
2863 		    rtype != SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS) {
2864 			nvmf_subsystem_gen_ctrlr_notification(ns->subsystem, ns,
2865 							      hostid_list,
2866 							      num_hostid,
2867 							      SPDK_NVME_RESERVATION_RELEASED);
2868 		}
2869 		break;
2870 	case SPDK_NVME_RESERVE_CLEAR:
2871 		nvmf_ns_reservation_clear_all_registrants(ns);
2872 		if (num_hostid) {
2873 			nvmf_subsystem_gen_ctrlr_notification(ns->subsystem, ns,
2874 							      hostid_list,
2875 							      num_hostid,
2876 							      SPDK_NVME_RESERVATION_PREEMPTED);
2877 		}
2878 		break;
2879 	default:
2880 		status = SPDK_NVME_SC_INVALID_FIELD;
2881 		update_sgroup = false;
2882 		goto exit;
2883 	}
2884 
2885 exit:
2886 	if (update_sgroup && ns->ptpl_activated) {
2887 		if (nvmf_ns_update_reservation_info(ns)) {
2888 			status = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
2889 		}
2890 	}
2891 	req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
2892 	req->rsp->nvme_cpl.status.sc = status;
2893 	return update_sgroup;
2894 }
2895 
2896 static void
2897 nvmf_ns_reservation_report(struct spdk_nvmf_ns *ns,
2898 			   struct spdk_nvmf_ctrlr *ctrlr,
2899 			   struct spdk_nvmf_request *req)
2900 {
2901 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
2902 	struct spdk_nvmf_registrant *reg, *tmp;
2903 	struct spdk_nvme_reservation_status_extended_data *status_data;
2904 	struct spdk_nvme_registered_ctrlr_extended_data *ctrlr_data;
2905 	uint8_t *payload;
2906 	uint32_t transfer_len, payload_len = 0;
2907 	uint32_t regctl = 0;
2908 	uint8_t status = SPDK_NVME_SC_SUCCESS;
2909 
2910 	if (req->data == NULL) {
2911 		SPDK_ERRLOG("No data transfer specified for request. "
2912 			    " Unable to transfer back response.\n");
2913 		status = SPDK_NVME_SC_INVALID_FIELD;
2914 		goto exit;
2915 	}
2916 
2917 	if (!cmd->cdw11_bits.resv_report.eds) {
2918 		SPDK_ERRLOG("NVMeoF uses extended controller data structure, "
2919 			    "please set EDS bit in cdw11 and try again\n");
2920 		status = SPDK_NVME_SC_HOSTID_INCONSISTENT_FORMAT;
2921 		goto exit;
2922 	}
2923 
2924 	/* Number of Dwords of the Reservation Status data structure to transfer */
2925 	transfer_len = (cmd->cdw10 + 1) * sizeof(uint32_t);
2926 	payload = req->data;
2927 
2928 	if (transfer_len < sizeof(struct spdk_nvme_reservation_status_extended_data)) {
2929 		status = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
2930 		goto exit;
2931 	}
2932 
2933 	status_data = (struct spdk_nvme_reservation_status_extended_data *)payload;
2934 	status_data->data.gen = ns->gen;
2935 	status_data->data.rtype = ns->rtype;
2936 	status_data->data.ptpls = ns->ptpl_activated;
2937 	payload_len += sizeof(struct spdk_nvme_reservation_status_extended_data);
2938 
2939 	TAILQ_FOREACH_SAFE(reg, &ns->registrants, link, tmp) {
2940 		payload_len += sizeof(struct spdk_nvme_registered_ctrlr_extended_data);
2941 		if (payload_len > transfer_len) {
2942 			break;
2943 		}
2944 
2945 		ctrlr_data = (struct spdk_nvme_registered_ctrlr_extended_data *)
2946 			     (payload + sizeof(*status_data) + sizeof(*ctrlr_data) * regctl);
2947 		/* Set to 0xffffh for dynamic controller */
2948 		ctrlr_data->cntlid = 0xffff;
2949 		ctrlr_data->rcsts.status = (ns->holder == reg) ? true : false;
2950 		ctrlr_data->rkey = reg->rkey;
2951 		spdk_uuid_copy((struct spdk_uuid *)ctrlr_data->hostid, &reg->hostid);
2952 		regctl++;
2953 	}
2954 	status_data->data.regctl = regctl;
2955 
2956 exit:
2957 	req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
2958 	req->rsp->nvme_cpl.status.sc = status;
2959 	return;
2960 }
2961 
2962 static void
2963 nvmf_ns_reservation_complete(void *ctx)
2964 {
2965 	struct spdk_nvmf_request *req = ctx;
2966 
2967 	spdk_nvmf_request_complete(req);
2968 }
2969 
2970 static void
2971 _nvmf_ns_reservation_update_done(struct spdk_nvmf_subsystem *subsystem,
2972 				 void *cb_arg, int status)
2973 {
2974 	struct spdk_nvmf_request *req = (struct spdk_nvmf_request *)cb_arg;
2975 	struct spdk_nvmf_poll_group *group = req->qpair->group;
2976 
2977 	spdk_thread_send_msg(group->thread, nvmf_ns_reservation_complete, req);
2978 }
2979 
2980 void
2981 nvmf_ns_reservation_request(void *ctx)
2982 {
2983 	struct spdk_nvmf_request *req = (struct spdk_nvmf_request *)ctx;
2984 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
2985 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
2986 	struct subsystem_update_ns_ctx *update_ctx;
2987 	uint32_t nsid;
2988 	struct spdk_nvmf_ns *ns;
2989 	bool update_sgroup = false;
2990 
2991 	nsid = cmd->nsid;
2992 	ns = _nvmf_subsystem_get_ns(ctrlr->subsys, nsid);
2993 	assert(ns != NULL);
2994 
2995 	switch (cmd->opc) {
2996 	case SPDK_NVME_OPC_RESERVATION_REGISTER:
2997 		update_sgroup = nvmf_ns_reservation_register(ns, ctrlr, req);
2998 		break;
2999 	case SPDK_NVME_OPC_RESERVATION_ACQUIRE:
3000 		update_sgroup = nvmf_ns_reservation_acquire(ns, ctrlr, req);
3001 		break;
3002 	case SPDK_NVME_OPC_RESERVATION_RELEASE:
3003 		update_sgroup = nvmf_ns_reservation_release(ns, ctrlr, req);
3004 		break;
3005 	case SPDK_NVME_OPC_RESERVATION_REPORT:
3006 		nvmf_ns_reservation_report(ns, ctrlr, req);
3007 		break;
3008 	default:
3009 		break;
3010 	}
3011 
3012 	/* update reservation information to subsystem's poll group */
3013 	if (update_sgroup) {
3014 		update_ctx = calloc(1, sizeof(*update_ctx));
3015 		if (update_ctx == NULL) {
3016 			SPDK_ERRLOG("Can't alloc subsystem poll group update context\n");
3017 			goto update_done;
3018 		}
3019 		update_ctx->subsystem = ctrlr->subsys;
3020 		update_ctx->cb_fn = _nvmf_ns_reservation_update_done;
3021 		update_ctx->cb_arg = req;
3022 
3023 		nvmf_subsystem_update_ns(ctrlr->subsys, subsystem_update_ns_done, update_ctx);
3024 		return;
3025 	}
3026 
3027 update_done:
3028 	_nvmf_ns_reservation_update_done(ctrlr->subsys, (void *)req, 0);
3029 }
3030 
3031 int
3032 spdk_nvmf_subsystem_set_ana_reporting(struct spdk_nvmf_subsystem *subsystem,
3033 				      bool ana_reporting)
3034 {
3035 	if (subsystem->state != SPDK_NVMF_SUBSYSTEM_INACTIVE) {
3036 		return -EAGAIN;
3037 	}
3038 
3039 	subsystem->flags.ana_reporting = ana_reporting;
3040 
3041 	return 0;
3042 }
3043 
3044 bool
3045 nvmf_subsystem_get_ana_reporting(struct spdk_nvmf_subsystem *subsystem)
3046 {
3047 	return subsystem->flags.ana_reporting;
3048 }
3049 
3050 struct subsystem_listener_update_ctx {
3051 	struct spdk_nvmf_subsystem_listener *listener;
3052 
3053 	spdk_nvmf_tgt_subsystem_listen_done_fn cb_fn;
3054 	void *cb_arg;
3055 };
3056 
3057 static void
3058 subsystem_listener_update_done(struct spdk_io_channel_iter *i, int status)
3059 {
3060 	struct subsystem_listener_update_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
3061 
3062 	if (ctx->cb_fn) {
3063 		ctx->cb_fn(ctx->cb_arg, status);
3064 	}
3065 	free(ctx);
3066 }
3067 
3068 static void
3069 subsystem_listener_update_on_pg(struct spdk_io_channel_iter *i)
3070 {
3071 	struct subsystem_listener_update_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
3072 	struct spdk_nvmf_subsystem_listener *listener;
3073 	struct spdk_nvmf_poll_group *group;
3074 	struct spdk_nvmf_ctrlr *ctrlr;
3075 
3076 	listener = ctx->listener;
3077 	group = spdk_io_channel_get_ctx(spdk_io_channel_iter_get_channel(i));
3078 
3079 	TAILQ_FOREACH(ctrlr, &listener->subsystem->ctrlrs, link) {
3080 		if (ctrlr->admin_qpair->group == group && ctrlr->listener == listener) {
3081 			nvmf_ctrlr_async_event_ana_change_notice(ctrlr);
3082 		}
3083 	}
3084 
3085 	spdk_for_each_channel_continue(i, 0);
3086 }
3087 
3088 void
3089 nvmf_subsystem_set_ana_state(struct spdk_nvmf_subsystem *subsystem,
3090 			     const struct spdk_nvme_transport_id *trid,
3091 			     enum spdk_nvme_ana_state ana_state, uint32_t anagrpid,
3092 			     spdk_nvmf_tgt_subsystem_listen_done_fn cb_fn, void *cb_arg)
3093 {
3094 	struct spdk_nvmf_subsystem_listener *listener;
3095 	struct subsystem_listener_update_ctx *ctx;
3096 	uint32_t i;
3097 
3098 	assert(cb_fn != NULL);
3099 	assert(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE ||
3100 	       subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED);
3101 
3102 	if (!subsystem->flags.ana_reporting) {
3103 		SPDK_ERRLOG("ANA reporting is disabled\n");
3104 		cb_fn(cb_arg, -EINVAL);
3105 		return;
3106 	}
3107 
3108 	/* ANA Change state is not used, ANA Persistent Loss state
3109 	 * is not supported yet.
3110 	 */
3111 	if (!(ana_state == SPDK_NVME_ANA_OPTIMIZED_STATE ||
3112 	      ana_state == SPDK_NVME_ANA_NON_OPTIMIZED_STATE ||
3113 	      ana_state == SPDK_NVME_ANA_INACCESSIBLE_STATE)) {
3114 		SPDK_ERRLOG("ANA state %d is not supported\n", ana_state);
3115 		cb_fn(cb_arg, -ENOTSUP);
3116 		return;
3117 	}
3118 
3119 	if (anagrpid > subsystem->max_nsid) {
3120 		SPDK_ERRLOG("ANA group ID %" PRIu32 " is more than maximum\n", anagrpid);
3121 		cb_fn(cb_arg, -EINVAL);
3122 		return;
3123 	}
3124 
3125 	listener = nvmf_subsystem_find_listener(subsystem, trid);
3126 	if (!listener) {
3127 		SPDK_ERRLOG("Unable to find listener.\n");
3128 		cb_fn(cb_arg, -EINVAL);
3129 		return;
3130 	}
3131 
3132 	if (anagrpid != 0 && listener->ana_state[anagrpid - 1] == ana_state) {
3133 		cb_fn(cb_arg, 0);
3134 		return;
3135 	}
3136 
3137 	ctx = calloc(1, sizeof(*ctx));
3138 	if (!ctx) {
3139 		SPDK_ERRLOG("Unable to allocate context\n");
3140 		cb_fn(cb_arg, -ENOMEM);
3141 		return;
3142 	}
3143 
3144 	for (i = 1; i <= subsystem->max_nsid; i++) {
3145 		if (anagrpid == 0 || i == anagrpid) {
3146 			listener->ana_state[i - 1] = ana_state;
3147 		}
3148 	}
3149 	listener->ana_state_change_count++;
3150 
3151 	ctx->listener = listener;
3152 	ctx->cb_fn = cb_fn;
3153 	ctx->cb_arg = cb_arg;
3154 
3155 	spdk_for_each_channel(subsystem->tgt,
3156 			      subsystem_listener_update_on_pg,
3157 			      ctx,
3158 			      subsystem_listener_update_done);
3159 }
3160