xref: /spdk/lib/nvmf/subsystem.c (revision 927f1fd57bd004df581518466ec4c1b8083e5d23)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation. All rights reserved.
5  *   Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
6  *   Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
7  *
8  *   Redistribution and use in source and binary forms, with or without
9  *   modification, are permitted provided that the following conditions
10  *   are met:
11  *
12  *     * Redistributions of source code must retain the above copyright
13  *       notice, this list of conditions and the following disclaimer.
14  *     * Redistributions in binary form must reproduce the above copyright
15  *       notice, this list of conditions and the following disclaimer in
16  *       the documentation and/or other materials provided with the
17  *       distribution.
18  *     * Neither the name of Intel Corporation nor the names of its
19  *       contributors may be used to endorse or promote products derived
20  *       from this software without specific prior written permission.
21  *
22  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 #include "spdk/stdinc.h"
36 
37 #include "nvmf_internal.h"
38 #include "transport.h"
39 
40 #include "spdk/assert.h"
41 #include "spdk/likely.h"
42 #include "spdk/string.h"
43 #include "spdk/trace.h"
44 #include "spdk/nvmf_spec.h"
45 #include "spdk/uuid.h"
46 #include "spdk/json.h"
47 #include "spdk/file.h"
48 #include "spdk/bit_array.h"
49 
50 #define __SPDK_BDEV_MODULE_ONLY
51 #include "spdk/bdev_module.h"
52 #include "spdk/log.h"
53 #include "spdk_internal/utf.h"
54 #include "spdk_internal/usdt.h"
55 
56 #define MODEL_NUMBER_DEFAULT "SPDK bdev Controller"
57 #define NVMF_SUBSYSTEM_DEFAULT_NAMESPACES 32
58 
59 /*
60  * States for parsing valid domains in NQNs according to RFC 1034
61  */
62 enum spdk_nvmf_nqn_domain_states {
63 	/* First character of a domain must be a letter */
64 	SPDK_NVMF_DOMAIN_ACCEPT_LETTER = 0,
65 
66 	/* Subsequent characters can be any of letter, digit, or hyphen */
67 	SPDK_NVMF_DOMAIN_ACCEPT_LDH = 1,
68 
69 	/* A domain label must end with either a letter or digit */
70 	SPDK_NVMF_DOMAIN_ACCEPT_ANY = 2
71 };
72 
73 static int _nvmf_subsystem_destroy(struct spdk_nvmf_subsystem *subsystem);
74 
75 /* Returns true if is a valid ASCII string as defined by the NVMe spec */
76 static bool
77 nvmf_valid_ascii_string(const void *buf, size_t size)
78 {
79 	const uint8_t *str = buf;
80 	size_t i;
81 
82 	for (i = 0; i < size; i++) {
83 		if (str[i] < 0x20 || str[i] > 0x7E) {
84 			return false;
85 		}
86 	}
87 
88 	return true;
89 }
90 
91 static bool
92 nvmf_valid_nqn(const char *nqn)
93 {
94 	size_t len;
95 	struct spdk_uuid uuid_value;
96 	uint32_t i;
97 	int bytes_consumed;
98 	uint32_t domain_label_length;
99 	char *reverse_domain_end;
100 	uint32_t reverse_domain_end_index;
101 	enum spdk_nvmf_nqn_domain_states domain_state = SPDK_NVMF_DOMAIN_ACCEPT_LETTER;
102 
103 	/* Check for length requirements */
104 	len = strlen(nqn);
105 	if (len > SPDK_NVMF_NQN_MAX_LEN) {
106 		SPDK_ERRLOG("Invalid NQN \"%s\": length %zu > max %d\n", nqn, len, SPDK_NVMF_NQN_MAX_LEN);
107 		return false;
108 	}
109 
110 	/* The nqn must be at least as long as SPDK_NVMF_NQN_MIN_LEN to contain the necessary prefix. */
111 	if (len < SPDK_NVMF_NQN_MIN_LEN) {
112 		SPDK_ERRLOG("Invalid NQN \"%s\": length %zu < min %d\n", nqn, len, SPDK_NVMF_NQN_MIN_LEN);
113 		return false;
114 	}
115 
116 	/* Check for discovery controller nqn */
117 	if (!strcmp(nqn, SPDK_NVMF_DISCOVERY_NQN)) {
118 		return true;
119 	}
120 
121 	/* Check for equality with the generic nqn structure of the form "nqn.2014-08.org.nvmexpress:uuid:11111111-2222-3333-4444-555555555555" */
122 	if (!strncmp(nqn, SPDK_NVMF_NQN_UUID_PRE, SPDK_NVMF_NQN_UUID_PRE_LEN)) {
123 		if (len != SPDK_NVMF_NQN_UUID_PRE_LEN + SPDK_NVMF_UUID_STRING_LEN) {
124 			SPDK_ERRLOG("Invalid NQN \"%s\": uuid is not the correct length\n", nqn);
125 			return false;
126 		}
127 
128 		if (spdk_uuid_parse(&uuid_value, &nqn[SPDK_NVMF_NQN_UUID_PRE_LEN])) {
129 			SPDK_ERRLOG("Invalid NQN \"%s\": uuid is not formatted correctly\n", nqn);
130 			return false;
131 		}
132 		return true;
133 	}
134 
135 	/* If the nqn does not match the uuid structure, the next several checks validate the form "nqn.yyyy-mm.reverse.domain:user-string" */
136 
137 	if (strncmp(nqn, "nqn.", 4) != 0) {
138 		SPDK_ERRLOG("Invalid NQN \"%s\": NQN must begin with \"nqn.\".\n", nqn);
139 		return false;
140 	}
141 
142 	/* Check for yyyy-mm. */
143 	if (!(isdigit(nqn[4]) && isdigit(nqn[5]) && isdigit(nqn[6]) && isdigit(nqn[7]) &&
144 	      nqn[8] == '-' && isdigit(nqn[9]) && isdigit(nqn[10]) && nqn[11] == '.')) {
145 		SPDK_ERRLOG("Invalid date code in NQN \"%s\"\n", nqn);
146 		return false;
147 	}
148 
149 	reverse_domain_end = strchr(nqn, ':');
150 	if (reverse_domain_end != NULL && (reverse_domain_end_index = reverse_domain_end - nqn) < len - 1) {
151 	} else {
152 		SPDK_ERRLOG("Invalid NQN \"%s\". NQN must contain user specified name with a ':' as a prefix.\n",
153 			    nqn);
154 		return false;
155 	}
156 
157 	/* Check for valid reverse domain */
158 	domain_label_length = 0;
159 	for (i = 12; i < reverse_domain_end_index; i++) {
160 		if (domain_label_length > SPDK_DOMAIN_LABEL_MAX_LEN) {
161 			SPDK_ERRLOG("Invalid domain name in NQN \"%s\". At least one Label is too long.\n", nqn);
162 			return false;
163 		}
164 
165 		switch (domain_state) {
166 
167 		case SPDK_NVMF_DOMAIN_ACCEPT_LETTER: {
168 			if (isalpha(nqn[i])) {
169 				domain_state = SPDK_NVMF_DOMAIN_ACCEPT_ANY;
170 				domain_label_length++;
171 				break;
172 			} else {
173 				SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must start with a letter.\n", nqn);
174 				return false;
175 			}
176 		}
177 
178 		case SPDK_NVMF_DOMAIN_ACCEPT_LDH: {
179 			if (isalpha(nqn[i]) || isdigit(nqn[i])) {
180 				domain_state = SPDK_NVMF_DOMAIN_ACCEPT_ANY;
181 				domain_label_length++;
182 				break;
183 			} else if (nqn[i] == '-') {
184 				if (i == reverse_domain_end_index - 1) {
185 					SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must end with an alphanumeric symbol.\n",
186 						    nqn);
187 					return false;
188 				}
189 				domain_state = SPDK_NVMF_DOMAIN_ACCEPT_LDH;
190 				domain_label_length++;
191 				break;
192 			} else if (nqn[i] == '.') {
193 				SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must end with an alphanumeric symbol.\n",
194 					    nqn);
195 				return false;
196 			} else {
197 				SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must contain only [a-z,A-Z,0-9,'-','.'].\n",
198 					    nqn);
199 				return false;
200 			}
201 		}
202 
203 		case SPDK_NVMF_DOMAIN_ACCEPT_ANY: {
204 			if (isalpha(nqn[i]) || isdigit(nqn[i])) {
205 				domain_state = SPDK_NVMF_DOMAIN_ACCEPT_ANY;
206 				domain_label_length++;
207 				break;
208 			} else if (nqn[i] == '-') {
209 				if (i == reverse_domain_end_index - 1) {
210 					SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must end with an alphanumeric symbol.\n",
211 						    nqn);
212 					return false;
213 				}
214 				domain_state = SPDK_NVMF_DOMAIN_ACCEPT_LDH;
215 				domain_label_length++;
216 				break;
217 			} else if (nqn[i] == '.') {
218 				domain_state = SPDK_NVMF_DOMAIN_ACCEPT_LETTER;
219 				domain_label_length = 0;
220 				break;
221 			} else {
222 				SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must contain only [a-z,A-Z,0-9,'-','.'].\n",
223 					    nqn);
224 				return false;
225 			}
226 		}
227 		}
228 	}
229 
230 	i = reverse_domain_end_index + 1;
231 	while (i < len) {
232 		bytes_consumed = utf8_valid(&nqn[i], &nqn[len]);
233 		if (bytes_consumed <= 0) {
234 			SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must contain only valid utf-8.\n", nqn);
235 			return false;
236 		}
237 
238 		i += bytes_consumed;
239 	}
240 	return true;
241 }
242 
243 static void subsystem_state_change_on_pg(struct spdk_io_channel_iter *i);
244 
245 struct spdk_nvmf_subsystem *
246 spdk_nvmf_subsystem_create(struct spdk_nvmf_tgt *tgt,
247 			   const char *nqn,
248 			   enum spdk_nvmf_subtype type,
249 			   uint32_t num_ns)
250 {
251 	struct spdk_nvmf_subsystem	*subsystem;
252 	uint32_t			sid;
253 
254 	if (spdk_nvmf_tgt_find_subsystem(tgt, nqn)) {
255 		SPDK_ERRLOG("Subsystem NQN '%s' already exists\n", nqn);
256 		return NULL;
257 	}
258 
259 	if (!nvmf_valid_nqn(nqn)) {
260 		return NULL;
261 	}
262 
263 	if (type == SPDK_NVMF_SUBTYPE_DISCOVERY) {
264 		if (num_ns != 0) {
265 			SPDK_ERRLOG("Discovery subsystem cannot have namespaces.\n");
266 			return NULL;
267 		}
268 	} else if (num_ns == 0) {
269 		num_ns = NVMF_SUBSYSTEM_DEFAULT_NAMESPACES;
270 	}
271 
272 	/* Find a free subsystem id (sid) */
273 	for (sid = 0; sid < tgt->max_subsystems; sid++) {
274 		if (tgt->subsystems[sid] == NULL) {
275 			break;
276 		}
277 	}
278 	if (sid >= tgt->max_subsystems) {
279 		return NULL;
280 	}
281 
282 	subsystem = calloc(1, sizeof(struct spdk_nvmf_subsystem));
283 	if (subsystem == NULL) {
284 		return NULL;
285 	}
286 
287 	subsystem->thread = spdk_get_thread();
288 	subsystem->state = SPDK_NVMF_SUBSYSTEM_INACTIVE;
289 	subsystem->tgt = tgt;
290 	subsystem->id = sid;
291 	subsystem->subtype = type;
292 	subsystem->max_nsid = num_ns;
293 	subsystem->next_cntlid = 0;
294 	subsystem->min_cntlid = NVMF_MIN_CNTLID;
295 	subsystem->max_cntlid = NVMF_MAX_CNTLID;
296 	snprintf(subsystem->subnqn, sizeof(subsystem->subnqn), "%s", nqn);
297 	pthread_mutex_init(&subsystem->mutex, NULL);
298 	TAILQ_INIT(&subsystem->listeners);
299 	TAILQ_INIT(&subsystem->hosts);
300 	TAILQ_INIT(&subsystem->ctrlrs);
301 	subsystem->used_listener_ids = spdk_bit_array_create(NVMF_MAX_LISTENERS_PER_SUBSYSTEM);
302 	if (subsystem->used_listener_ids == NULL) {
303 		pthread_mutex_destroy(&subsystem->mutex);
304 		free(subsystem);
305 		return NULL;
306 	}
307 
308 	if (num_ns != 0) {
309 		subsystem->ns = calloc(num_ns, sizeof(struct spdk_nvmf_ns *));
310 		if (subsystem->ns == NULL) {
311 			SPDK_ERRLOG("Namespace memory allocation failed\n");
312 			pthread_mutex_destroy(&subsystem->mutex);
313 			spdk_bit_array_free(&subsystem->used_listener_ids);
314 			free(subsystem);
315 			return NULL;
316 		}
317 		subsystem->ana_group = calloc(num_ns, sizeof(uint32_t));
318 		if (subsystem->ana_group == NULL) {
319 			SPDK_ERRLOG("ANA group memory allocation failed\n");
320 			pthread_mutex_destroy(&subsystem->mutex);
321 			free(subsystem->ns);
322 			spdk_bit_array_free(&subsystem->used_listener_ids);
323 			free(subsystem);
324 			return NULL;
325 		}
326 	}
327 
328 	memset(subsystem->sn, '0', sizeof(subsystem->sn) - 1);
329 	subsystem->sn[sizeof(subsystem->sn) - 1] = '\0';
330 
331 	snprintf(subsystem->mn, sizeof(subsystem->mn), "%s",
332 		 MODEL_NUMBER_DEFAULT);
333 
334 	tgt->subsystems[sid] = subsystem;
335 
336 	SPDK_DTRACE_PROBE1(nvmf_subsystem_create, subsystem->subnqn);
337 
338 	return subsystem;
339 }
340 
341 /* Must hold subsystem->mutex while calling this function */
342 static void
343 nvmf_subsystem_remove_host(struct spdk_nvmf_subsystem *subsystem, struct spdk_nvmf_host *host)
344 {
345 	TAILQ_REMOVE(&subsystem->hosts, host, link);
346 	free(host);
347 }
348 
349 static void
350 _nvmf_subsystem_remove_listener(struct spdk_nvmf_subsystem *subsystem,
351 				struct spdk_nvmf_subsystem_listener *listener,
352 				bool stop)
353 {
354 	struct spdk_nvmf_transport *transport;
355 	struct spdk_nvmf_ctrlr *ctrlr;
356 
357 	if (stop) {
358 		transport = spdk_nvmf_tgt_get_transport(subsystem->tgt, listener->trid->trstring);
359 		if (transport != NULL) {
360 			spdk_nvmf_transport_stop_listen(transport, listener->trid);
361 		}
362 	}
363 
364 	TAILQ_FOREACH(ctrlr, &subsystem->ctrlrs, link) {
365 		if (ctrlr->listener == listener) {
366 			ctrlr->listener = NULL;
367 		}
368 	}
369 
370 	TAILQ_REMOVE(&subsystem->listeners, listener, link);
371 	nvmf_update_discovery_log(listener->subsystem->tgt, NULL);
372 	free(listener->ana_state);
373 	spdk_bit_array_clear(subsystem->used_listener_ids, listener->id);
374 	free(listener);
375 }
376 
377 static void
378 _nvmf_subsystem_destroy_msg(void *cb_arg)
379 {
380 	struct spdk_nvmf_subsystem *subsystem = cb_arg;
381 
382 	_nvmf_subsystem_destroy(subsystem);
383 }
384 
385 static int
386 _nvmf_subsystem_destroy(struct spdk_nvmf_subsystem *subsystem)
387 {
388 	struct spdk_nvmf_ns		*ns;
389 	nvmf_subsystem_destroy_cb	async_destroy_cb = NULL;
390 	void				*async_destroy_cb_arg = NULL;
391 	int				rc;
392 
393 	if (!TAILQ_EMPTY(&subsystem->ctrlrs)) {
394 		SPDK_DEBUGLOG(nvmf, "subsystem %p %s has active controllers\n", subsystem, subsystem->subnqn);
395 		subsystem->async_destroy = true;
396 		rc = spdk_thread_send_msg(subsystem->thread, _nvmf_subsystem_destroy_msg, subsystem);
397 		if (rc) {
398 			SPDK_ERRLOG("Failed to send thread msg, rc %d\n", rc);
399 			assert(0);
400 			return rc;
401 		}
402 		return -EINPROGRESS;
403 	}
404 
405 	ns = spdk_nvmf_subsystem_get_first_ns(subsystem);
406 	while (ns != NULL) {
407 		struct spdk_nvmf_ns *next_ns = spdk_nvmf_subsystem_get_next_ns(subsystem, ns);
408 
409 		spdk_nvmf_subsystem_remove_ns(subsystem, ns->opts.nsid);
410 		ns = next_ns;
411 	}
412 
413 	free(subsystem->ns);
414 	free(subsystem->ana_group);
415 
416 	subsystem->tgt->subsystems[subsystem->id] = NULL;
417 
418 	pthread_mutex_destroy(&subsystem->mutex);
419 
420 	spdk_bit_array_free(&subsystem->used_listener_ids);
421 
422 	if (subsystem->async_destroy) {
423 		async_destroy_cb = subsystem->async_destroy_cb;
424 		async_destroy_cb_arg = subsystem->async_destroy_cb_arg;
425 	}
426 
427 	free(subsystem);
428 
429 	if (async_destroy_cb) {
430 		async_destroy_cb(async_destroy_cb_arg);
431 	}
432 
433 	return 0;
434 }
435 
436 int
437 spdk_nvmf_subsystem_destroy(struct spdk_nvmf_subsystem *subsystem, nvmf_subsystem_destroy_cb cpl_cb,
438 			    void *cpl_cb_arg)
439 {
440 	struct spdk_nvmf_host *host, *host_tmp;
441 
442 	if (!subsystem) {
443 		return -EINVAL;
444 	}
445 
446 	SPDK_DTRACE_PROBE1(nvmf_subsystem_destroy, subsystem->subnqn);
447 
448 	assert(spdk_get_thread() == subsystem->thread);
449 
450 	if (subsystem->state != SPDK_NVMF_SUBSYSTEM_INACTIVE) {
451 		SPDK_ERRLOG("Subsystem can only be destroyed in inactive state\n");
452 		assert(0);
453 		return -EAGAIN;
454 	}
455 	if (subsystem->destroying) {
456 		SPDK_ERRLOG("Subsystem destruction is already started\n");
457 		assert(0);
458 		return -EALREADY;
459 	}
460 
461 	subsystem->destroying = true;
462 
463 	SPDK_DEBUGLOG(nvmf, "subsystem is %p %s\n", subsystem, subsystem->subnqn);
464 
465 	nvmf_subsystem_remove_all_listeners(subsystem, false);
466 
467 	pthread_mutex_lock(&subsystem->mutex);
468 
469 	TAILQ_FOREACH_SAFE(host, &subsystem->hosts, link, host_tmp) {
470 		nvmf_subsystem_remove_host(subsystem, host);
471 	}
472 
473 	pthread_mutex_unlock(&subsystem->mutex);
474 
475 	subsystem->async_destroy_cb = cpl_cb;
476 	subsystem->async_destroy_cb_arg = cpl_cb_arg;
477 
478 	return _nvmf_subsystem_destroy(subsystem);
479 }
480 
481 /* we have to use the typedef in the function declaration to appease astyle. */
482 typedef enum spdk_nvmf_subsystem_state spdk_nvmf_subsystem_state_t;
483 
484 static spdk_nvmf_subsystem_state_t
485 nvmf_subsystem_get_intermediate_state(enum spdk_nvmf_subsystem_state current_state,
486 				      enum spdk_nvmf_subsystem_state requested_state)
487 {
488 	switch (requested_state) {
489 	case SPDK_NVMF_SUBSYSTEM_INACTIVE:
490 		return SPDK_NVMF_SUBSYSTEM_DEACTIVATING;
491 	case SPDK_NVMF_SUBSYSTEM_ACTIVE:
492 		if (current_state == SPDK_NVMF_SUBSYSTEM_PAUSED) {
493 			return SPDK_NVMF_SUBSYSTEM_RESUMING;
494 		} else {
495 			return SPDK_NVMF_SUBSYSTEM_ACTIVATING;
496 		}
497 	case SPDK_NVMF_SUBSYSTEM_PAUSED:
498 		return SPDK_NVMF_SUBSYSTEM_PAUSING;
499 	default:
500 		assert(false);
501 		return SPDK_NVMF_SUBSYSTEM_NUM_STATES;
502 	}
503 }
504 
505 static int
506 nvmf_subsystem_set_state(struct spdk_nvmf_subsystem *subsystem,
507 			 enum spdk_nvmf_subsystem_state state)
508 {
509 	enum spdk_nvmf_subsystem_state actual_old_state, expected_old_state;
510 	bool exchanged;
511 
512 	switch (state) {
513 	case SPDK_NVMF_SUBSYSTEM_INACTIVE:
514 		expected_old_state = SPDK_NVMF_SUBSYSTEM_DEACTIVATING;
515 		break;
516 	case SPDK_NVMF_SUBSYSTEM_ACTIVATING:
517 		expected_old_state = SPDK_NVMF_SUBSYSTEM_INACTIVE;
518 		break;
519 	case SPDK_NVMF_SUBSYSTEM_ACTIVE:
520 		expected_old_state = SPDK_NVMF_SUBSYSTEM_ACTIVATING;
521 		break;
522 	case SPDK_NVMF_SUBSYSTEM_PAUSING:
523 		expected_old_state = SPDK_NVMF_SUBSYSTEM_ACTIVE;
524 		break;
525 	case SPDK_NVMF_SUBSYSTEM_PAUSED:
526 		expected_old_state = SPDK_NVMF_SUBSYSTEM_PAUSING;
527 		break;
528 	case SPDK_NVMF_SUBSYSTEM_RESUMING:
529 		expected_old_state = SPDK_NVMF_SUBSYSTEM_PAUSED;
530 		break;
531 	case SPDK_NVMF_SUBSYSTEM_DEACTIVATING:
532 		expected_old_state = SPDK_NVMF_SUBSYSTEM_ACTIVE;
533 		break;
534 	default:
535 		assert(false);
536 		return -1;
537 	}
538 
539 	actual_old_state = expected_old_state;
540 	exchanged = __atomic_compare_exchange_n(&subsystem->state, &actual_old_state, state, false,
541 						__ATOMIC_RELAXED, __ATOMIC_RELAXED);
542 	if (spdk_unlikely(exchanged == false)) {
543 		if (actual_old_state == SPDK_NVMF_SUBSYSTEM_RESUMING &&
544 		    state == SPDK_NVMF_SUBSYSTEM_ACTIVE) {
545 			expected_old_state = SPDK_NVMF_SUBSYSTEM_RESUMING;
546 		}
547 		/* This is for the case when activating the subsystem fails. */
548 		if (actual_old_state == SPDK_NVMF_SUBSYSTEM_ACTIVATING &&
549 		    state == SPDK_NVMF_SUBSYSTEM_DEACTIVATING) {
550 			expected_old_state = SPDK_NVMF_SUBSYSTEM_ACTIVATING;
551 		}
552 		/* This is for the case when resuming the subsystem fails. */
553 		if (actual_old_state == SPDK_NVMF_SUBSYSTEM_RESUMING &&
554 		    state == SPDK_NVMF_SUBSYSTEM_PAUSING) {
555 			expected_old_state = SPDK_NVMF_SUBSYSTEM_RESUMING;
556 		}
557 		/* This is for the case when stopping paused subsystem */
558 		if (actual_old_state == SPDK_NVMF_SUBSYSTEM_PAUSED &&
559 		    state == SPDK_NVMF_SUBSYSTEM_DEACTIVATING) {
560 			expected_old_state = SPDK_NVMF_SUBSYSTEM_PAUSED;
561 		}
562 		actual_old_state = expected_old_state;
563 		__atomic_compare_exchange_n(&subsystem->state, &actual_old_state, state, false,
564 					    __ATOMIC_RELAXED, __ATOMIC_RELAXED);
565 	}
566 	assert(actual_old_state == expected_old_state);
567 	return actual_old_state - expected_old_state;
568 }
569 
570 struct subsystem_state_change_ctx {
571 	struct spdk_nvmf_subsystem		*subsystem;
572 	uint16_t				nsid;
573 
574 	enum spdk_nvmf_subsystem_state		original_state;
575 	enum spdk_nvmf_subsystem_state		requested_state;
576 
577 	spdk_nvmf_subsystem_state_change_done	cb_fn;
578 	void					*cb_arg;
579 };
580 
581 static void
582 subsystem_state_change_revert_done(struct spdk_io_channel_iter *i, int status)
583 {
584 	struct subsystem_state_change_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
585 
586 	/* Nothing to be done here if the state setting fails, we are just screwed. */
587 	if (nvmf_subsystem_set_state(ctx->subsystem, ctx->requested_state)) {
588 		SPDK_ERRLOG("Unable to revert the subsystem state after operation failure.\n");
589 	}
590 
591 	ctx->subsystem->changing_state = false;
592 	if (ctx->cb_fn) {
593 		/* return a failure here. This function only exists in an error path. */
594 		ctx->cb_fn(ctx->subsystem, ctx->cb_arg, -1);
595 	}
596 	free(ctx);
597 }
598 
599 static void
600 subsystem_state_change_done(struct spdk_io_channel_iter *i, int status)
601 {
602 	struct subsystem_state_change_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
603 	enum spdk_nvmf_subsystem_state intermediate_state;
604 
605 	SPDK_DTRACE_PROBE4(nvmf_subsystem_change_state_done, ctx->subsystem->subnqn,
606 			   ctx->requested_state, ctx->original_state, status);
607 
608 	if (status == 0) {
609 		status = nvmf_subsystem_set_state(ctx->subsystem, ctx->requested_state);
610 		if (status) {
611 			status = -1;
612 		}
613 	}
614 
615 	if (status) {
616 		intermediate_state = nvmf_subsystem_get_intermediate_state(ctx->requested_state,
617 				     ctx->original_state);
618 		assert(intermediate_state != SPDK_NVMF_SUBSYSTEM_NUM_STATES);
619 
620 		if (nvmf_subsystem_set_state(ctx->subsystem, intermediate_state)) {
621 			goto out;
622 		}
623 		ctx->requested_state = ctx->original_state;
624 		spdk_for_each_channel(ctx->subsystem->tgt,
625 				      subsystem_state_change_on_pg,
626 				      ctx,
627 				      subsystem_state_change_revert_done);
628 		return;
629 	}
630 
631 out:
632 	ctx->subsystem->changing_state = false;
633 	if (ctx->cb_fn) {
634 		ctx->cb_fn(ctx->subsystem, ctx->cb_arg, status);
635 	}
636 	free(ctx);
637 }
638 
639 static void
640 subsystem_state_change_continue(void *ctx, int status)
641 {
642 	struct spdk_io_channel_iter *i = ctx;
643 	struct subsystem_state_change_ctx *_ctx __attribute__((unused));
644 
645 	_ctx = spdk_io_channel_iter_get_ctx(i);
646 	SPDK_DTRACE_PROBE3(nvmf_pg_change_state_done, _ctx->subsystem->subnqn,
647 			   _ctx->requested_state, spdk_thread_get_id(spdk_get_thread()));
648 
649 	spdk_for_each_channel_continue(i, status);
650 }
651 
652 static void
653 subsystem_state_change_on_pg(struct spdk_io_channel_iter *i)
654 {
655 	struct subsystem_state_change_ctx *ctx;
656 	struct spdk_io_channel *ch;
657 	struct spdk_nvmf_poll_group *group;
658 
659 	ctx = spdk_io_channel_iter_get_ctx(i);
660 	ch = spdk_io_channel_iter_get_channel(i);
661 	group = spdk_io_channel_get_ctx(ch);
662 
663 	SPDK_DTRACE_PROBE3(nvmf_pg_change_state, ctx->subsystem->subnqn,
664 			   ctx->requested_state, spdk_thread_get_id(spdk_get_thread()));
665 	switch (ctx->requested_state) {
666 	case SPDK_NVMF_SUBSYSTEM_INACTIVE:
667 		nvmf_poll_group_remove_subsystem(group, ctx->subsystem, subsystem_state_change_continue, i);
668 		break;
669 	case SPDK_NVMF_SUBSYSTEM_ACTIVE:
670 		if (ctx->subsystem->state == SPDK_NVMF_SUBSYSTEM_ACTIVATING) {
671 			nvmf_poll_group_add_subsystem(group, ctx->subsystem, subsystem_state_change_continue, i);
672 		} else if (ctx->subsystem->state == SPDK_NVMF_SUBSYSTEM_RESUMING) {
673 			nvmf_poll_group_resume_subsystem(group, ctx->subsystem, subsystem_state_change_continue, i);
674 		}
675 		break;
676 	case SPDK_NVMF_SUBSYSTEM_PAUSED:
677 		nvmf_poll_group_pause_subsystem(group, ctx->subsystem, ctx->nsid, subsystem_state_change_continue,
678 						i);
679 		break;
680 	default:
681 		assert(false);
682 		break;
683 	}
684 }
685 
686 static int
687 nvmf_subsystem_state_change(struct spdk_nvmf_subsystem *subsystem,
688 			    uint32_t nsid,
689 			    enum spdk_nvmf_subsystem_state requested_state,
690 			    spdk_nvmf_subsystem_state_change_done cb_fn,
691 			    void *cb_arg)
692 {
693 	struct subsystem_state_change_ctx *ctx;
694 	enum spdk_nvmf_subsystem_state intermediate_state;
695 	int rc;
696 
697 	if (__sync_val_compare_and_swap(&subsystem->changing_state, false, true)) {
698 		return -EBUSY;
699 	}
700 
701 	SPDK_DTRACE_PROBE3(nvmf_subsystem_change_state, subsystem->subnqn,
702 			   requested_state, subsystem->state);
703 	/* If we are already in the requested state, just call the callback immediately. */
704 	if (subsystem->state == requested_state) {
705 		subsystem->changing_state = false;
706 		if (cb_fn) {
707 			cb_fn(subsystem, cb_arg, 0);
708 		}
709 		return 0;
710 	}
711 
712 	intermediate_state = nvmf_subsystem_get_intermediate_state(subsystem->state, requested_state);
713 	assert(intermediate_state != SPDK_NVMF_SUBSYSTEM_NUM_STATES);
714 
715 	ctx = calloc(1, sizeof(*ctx));
716 	if (!ctx) {
717 		subsystem->changing_state = false;
718 		return -ENOMEM;
719 	}
720 
721 	ctx->original_state = subsystem->state;
722 	rc = nvmf_subsystem_set_state(subsystem, intermediate_state);
723 	if (rc) {
724 		free(ctx);
725 		subsystem->changing_state = false;
726 		return rc;
727 	}
728 
729 	ctx->subsystem = subsystem;
730 	ctx->nsid = nsid;
731 	ctx->requested_state = requested_state;
732 	ctx->cb_fn = cb_fn;
733 	ctx->cb_arg = cb_arg;
734 
735 	spdk_for_each_channel(subsystem->tgt,
736 			      subsystem_state_change_on_pg,
737 			      ctx,
738 			      subsystem_state_change_done);
739 
740 	return 0;
741 }
742 
743 int
744 spdk_nvmf_subsystem_start(struct spdk_nvmf_subsystem *subsystem,
745 			  spdk_nvmf_subsystem_state_change_done cb_fn,
746 			  void *cb_arg)
747 {
748 	return nvmf_subsystem_state_change(subsystem, 0, SPDK_NVMF_SUBSYSTEM_ACTIVE, cb_fn, cb_arg);
749 }
750 
751 int
752 spdk_nvmf_subsystem_stop(struct spdk_nvmf_subsystem *subsystem,
753 			 spdk_nvmf_subsystem_state_change_done cb_fn,
754 			 void *cb_arg)
755 {
756 	return nvmf_subsystem_state_change(subsystem, 0, SPDK_NVMF_SUBSYSTEM_INACTIVE, cb_fn, cb_arg);
757 }
758 
759 int
760 spdk_nvmf_subsystem_pause(struct spdk_nvmf_subsystem *subsystem,
761 			  uint32_t nsid,
762 			  spdk_nvmf_subsystem_state_change_done cb_fn,
763 			  void *cb_arg)
764 {
765 	return nvmf_subsystem_state_change(subsystem, nsid, SPDK_NVMF_SUBSYSTEM_PAUSED, cb_fn, cb_arg);
766 }
767 
768 int
769 spdk_nvmf_subsystem_resume(struct spdk_nvmf_subsystem *subsystem,
770 			   spdk_nvmf_subsystem_state_change_done cb_fn,
771 			   void *cb_arg)
772 {
773 	return nvmf_subsystem_state_change(subsystem, 0, SPDK_NVMF_SUBSYSTEM_ACTIVE, cb_fn, cb_arg);
774 }
775 
776 struct spdk_nvmf_subsystem *
777 spdk_nvmf_subsystem_get_first(struct spdk_nvmf_tgt *tgt)
778 {
779 	struct spdk_nvmf_subsystem	*subsystem;
780 	uint32_t sid;
781 
782 	for (sid = 0; sid < tgt->max_subsystems; sid++) {
783 		subsystem = tgt->subsystems[sid];
784 		if (subsystem) {
785 			return subsystem;
786 		}
787 	}
788 
789 	return NULL;
790 }
791 
792 struct spdk_nvmf_subsystem *
793 spdk_nvmf_subsystem_get_next(struct spdk_nvmf_subsystem *subsystem)
794 {
795 	uint32_t sid;
796 	struct spdk_nvmf_tgt *tgt;
797 
798 	if (!subsystem) {
799 		return NULL;
800 	}
801 
802 	tgt = subsystem->tgt;
803 
804 	for (sid = subsystem->id + 1; sid < tgt->max_subsystems; sid++) {
805 		subsystem = tgt->subsystems[sid];
806 		if (subsystem) {
807 			return subsystem;
808 		}
809 	}
810 
811 	return NULL;
812 }
813 
814 /* Must hold subsystem->mutex while calling this function */
815 static struct spdk_nvmf_host *
816 nvmf_subsystem_find_host(struct spdk_nvmf_subsystem *subsystem, const char *hostnqn)
817 {
818 	struct spdk_nvmf_host *host = NULL;
819 
820 	TAILQ_FOREACH(host, &subsystem->hosts, link) {
821 		if (strcmp(hostnqn, host->nqn) == 0) {
822 			return host;
823 		}
824 	}
825 
826 	return NULL;
827 }
828 
829 int
830 spdk_nvmf_subsystem_add_host(struct spdk_nvmf_subsystem *subsystem, const char *hostnqn)
831 {
832 	struct spdk_nvmf_host *host;
833 
834 	if (!nvmf_valid_nqn(hostnqn)) {
835 		return -EINVAL;
836 	}
837 
838 	pthread_mutex_lock(&subsystem->mutex);
839 
840 	if (nvmf_subsystem_find_host(subsystem, hostnqn)) {
841 		/* This subsystem already allows the specified host. */
842 		pthread_mutex_unlock(&subsystem->mutex);
843 		return 0;
844 	}
845 
846 	host = calloc(1, sizeof(*host));
847 	if (!host) {
848 		pthread_mutex_unlock(&subsystem->mutex);
849 		return -ENOMEM;
850 	}
851 
852 	snprintf(host->nqn, sizeof(host->nqn), "%s", hostnqn);
853 
854 	SPDK_DTRACE_PROBE2(nvmf_subsystem_add_host, subsystem->subnqn, host->nqn);
855 
856 	TAILQ_INSERT_HEAD(&subsystem->hosts, host, link);
857 
858 	if (!TAILQ_EMPTY(&subsystem->listeners)) {
859 		nvmf_update_discovery_log(subsystem->tgt, hostnqn);
860 	}
861 
862 	pthread_mutex_unlock(&subsystem->mutex);
863 
864 	return 0;
865 }
866 
867 int
868 spdk_nvmf_subsystem_remove_host(struct spdk_nvmf_subsystem *subsystem, const char *hostnqn)
869 {
870 	struct spdk_nvmf_host *host;
871 
872 	pthread_mutex_lock(&subsystem->mutex);
873 
874 	host = nvmf_subsystem_find_host(subsystem, hostnqn);
875 	if (host == NULL) {
876 		pthread_mutex_unlock(&subsystem->mutex);
877 		return -ENOENT;
878 	}
879 
880 	SPDK_DTRACE_PROBE2(nvmf_subsystem_remove_host, subsystem->subnqn, host->nqn);
881 
882 	nvmf_subsystem_remove_host(subsystem, host);
883 
884 	if (!TAILQ_EMPTY(&subsystem->listeners)) {
885 		nvmf_update_discovery_log(subsystem->tgt, hostnqn);
886 	}
887 
888 	pthread_mutex_unlock(&subsystem->mutex);
889 
890 	return 0;
891 }
892 
893 struct nvmf_subsystem_disconnect_host_ctx {
894 	struct spdk_nvmf_subsystem		*subsystem;
895 	char					*hostnqn;
896 	spdk_nvmf_tgt_subsystem_listen_done_fn	cb_fn;
897 	void					*cb_arg;
898 };
899 
900 static void
901 nvmf_subsystem_disconnect_host_fini(struct spdk_io_channel_iter *i, int status)
902 {
903 	struct nvmf_subsystem_disconnect_host_ctx *ctx;
904 
905 	ctx = spdk_io_channel_iter_get_ctx(i);
906 
907 	if (ctx->cb_fn) {
908 		ctx->cb_fn(ctx->cb_arg, status);
909 	}
910 	free(ctx->hostnqn);
911 	free(ctx);
912 }
913 
914 static void
915 nvmf_subsystem_disconnect_qpairs_by_host(struct spdk_io_channel_iter *i)
916 {
917 	struct nvmf_subsystem_disconnect_host_ctx *ctx;
918 	struct spdk_nvmf_poll_group *group;
919 	struct spdk_io_channel *ch;
920 	struct spdk_nvmf_qpair *qpair, *tmp_qpair;
921 	struct spdk_nvmf_ctrlr *ctrlr;
922 
923 	ctx = spdk_io_channel_iter_get_ctx(i);
924 	ch = spdk_io_channel_iter_get_channel(i);
925 	group = spdk_io_channel_get_ctx(ch);
926 
927 	TAILQ_FOREACH_SAFE(qpair, &group->qpairs, link, tmp_qpair) {
928 		ctrlr = qpair->ctrlr;
929 
930 		if (ctrlr == NULL || ctrlr->subsys != ctx->subsystem) {
931 			continue;
932 		}
933 
934 		if (strncmp(ctrlr->hostnqn, ctx->hostnqn, sizeof(ctrlr->hostnqn)) == 0) {
935 			/* Right now this does not wait for the queue pairs to actually disconnect. */
936 			spdk_nvmf_qpair_disconnect(qpair, NULL, NULL);
937 		}
938 	}
939 	spdk_for_each_channel_continue(i, 0);
940 }
941 
942 int
943 spdk_nvmf_subsystem_disconnect_host(struct spdk_nvmf_subsystem *subsystem,
944 				    const char *hostnqn,
945 				    spdk_nvmf_tgt_subsystem_listen_done_fn cb_fn,
946 				    void *cb_arg)
947 {
948 	struct nvmf_subsystem_disconnect_host_ctx *ctx;
949 
950 	ctx = calloc(1, sizeof(struct nvmf_subsystem_disconnect_host_ctx));
951 	if (ctx == NULL) {
952 		return -ENOMEM;
953 	}
954 
955 	ctx->hostnqn = strdup(hostnqn);
956 	if (ctx->hostnqn == NULL) {
957 		free(ctx);
958 		return -ENOMEM;
959 	}
960 
961 	ctx->subsystem = subsystem;
962 	ctx->cb_fn = cb_fn;
963 	ctx->cb_arg = cb_arg;
964 
965 	spdk_for_each_channel(subsystem->tgt, nvmf_subsystem_disconnect_qpairs_by_host, ctx,
966 			      nvmf_subsystem_disconnect_host_fini);
967 
968 	return 0;
969 }
970 
971 int
972 spdk_nvmf_subsystem_set_allow_any_host(struct spdk_nvmf_subsystem *subsystem, bool allow_any_host)
973 {
974 	pthread_mutex_lock(&subsystem->mutex);
975 	subsystem->flags.allow_any_host = allow_any_host;
976 	if (!TAILQ_EMPTY(&subsystem->listeners)) {
977 		nvmf_update_discovery_log(subsystem->tgt, NULL);
978 	}
979 	pthread_mutex_unlock(&subsystem->mutex);
980 
981 	return 0;
982 }
983 
984 bool
985 spdk_nvmf_subsystem_get_allow_any_host(const struct spdk_nvmf_subsystem *subsystem)
986 {
987 	bool allow_any_host;
988 	struct spdk_nvmf_subsystem *sub;
989 
990 	/* Technically, taking the mutex modifies data in the subsystem. But the const
991 	 * is still important to convey that this doesn't mutate any other data. Cast
992 	 * it away to work around this. */
993 	sub = (struct spdk_nvmf_subsystem *)subsystem;
994 
995 	pthread_mutex_lock(&sub->mutex);
996 	allow_any_host = sub->flags.allow_any_host;
997 	pthread_mutex_unlock(&sub->mutex);
998 
999 	return allow_any_host;
1000 }
1001 
1002 bool
1003 spdk_nvmf_subsystem_host_allowed(struct spdk_nvmf_subsystem *subsystem, const char *hostnqn)
1004 {
1005 	bool allowed;
1006 
1007 	if (!hostnqn) {
1008 		return false;
1009 	}
1010 
1011 	pthread_mutex_lock(&subsystem->mutex);
1012 
1013 	if (subsystem->flags.allow_any_host) {
1014 		pthread_mutex_unlock(&subsystem->mutex);
1015 		return true;
1016 	}
1017 
1018 	allowed =  nvmf_subsystem_find_host(subsystem, hostnqn) != NULL;
1019 	pthread_mutex_unlock(&subsystem->mutex);
1020 
1021 	return allowed;
1022 }
1023 
1024 struct spdk_nvmf_host *
1025 spdk_nvmf_subsystem_get_first_host(struct spdk_nvmf_subsystem *subsystem)
1026 {
1027 	return TAILQ_FIRST(&subsystem->hosts);
1028 }
1029 
1030 
1031 struct spdk_nvmf_host *
1032 spdk_nvmf_subsystem_get_next_host(struct spdk_nvmf_subsystem *subsystem,
1033 				  struct spdk_nvmf_host *prev_host)
1034 {
1035 	return TAILQ_NEXT(prev_host, link);
1036 }
1037 
1038 const char *
1039 spdk_nvmf_host_get_nqn(const struct spdk_nvmf_host *host)
1040 {
1041 	return host->nqn;
1042 }
1043 
1044 struct spdk_nvmf_subsystem_listener *
1045 nvmf_subsystem_find_listener(struct spdk_nvmf_subsystem *subsystem,
1046 			     const struct spdk_nvme_transport_id *trid)
1047 {
1048 	struct spdk_nvmf_subsystem_listener *listener;
1049 
1050 	TAILQ_FOREACH(listener, &subsystem->listeners, link) {
1051 		if (spdk_nvme_transport_id_compare(listener->trid, trid) == 0) {
1052 			return listener;
1053 		}
1054 	}
1055 
1056 	return NULL;
1057 }
1058 
1059 /**
1060  * Function to be called once the target is listening.
1061  *
1062  * \param ctx Context argument passed to this function.
1063  * \param status 0 if it completed successfully, or negative errno if it failed.
1064  */
1065 static void
1066 _nvmf_subsystem_add_listener_done(void *ctx, int status)
1067 {
1068 	struct spdk_nvmf_subsystem_listener *listener = ctx;
1069 
1070 	if (status) {
1071 		listener->cb_fn(listener->cb_arg, status);
1072 		free(listener);
1073 		return;
1074 	}
1075 
1076 	TAILQ_INSERT_HEAD(&listener->subsystem->listeners, listener, link);
1077 	nvmf_update_discovery_log(listener->subsystem->tgt, NULL);
1078 	listener->cb_fn(listener->cb_arg, status);
1079 }
1080 
1081 void
1082 spdk_nvmf_subsystem_add_listener(struct spdk_nvmf_subsystem *subsystem,
1083 				 struct spdk_nvme_transport_id *trid,
1084 				 spdk_nvmf_tgt_subsystem_listen_done_fn cb_fn,
1085 				 void *cb_arg)
1086 {
1087 	struct spdk_nvmf_transport *transport;
1088 	struct spdk_nvmf_subsystem_listener *listener;
1089 	struct spdk_nvmf_listener *tr_listener;
1090 	uint32_t i;
1091 	uint32_t id;
1092 	int rc = 0;
1093 
1094 	assert(cb_fn != NULL);
1095 
1096 	if (!(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE ||
1097 	      subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED)) {
1098 		cb_fn(cb_arg, -EAGAIN);
1099 		return;
1100 	}
1101 
1102 	if (nvmf_subsystem_find_listener(subsystem, trid)) {
1103 		/* Listener already exists in this subsystem */
1104 		cb_fn(cb_arg, 0);
1105 		return;
1106 	}
1107 
1108 	transport = spdk_nvmf_tgt_get_transport(subsystem->tgt, trid->trstring);
1109 	if (!transport) {
1110 		SPDK_ERRLOG("Unable to find %s transport. The transport must be created first also make sure it is properly registered.\n",
1111 			    trid->trstring);
1112 		cb_fn(cb_arg, -EINVAL);
1113 		return;
1114 	}
1115 
1116 	tr_listener = nvmf_transport_find_listener(transport, trid);
1117 	if (!tr_listener) {
1118 		SPDK_ERRLOG("Cannot find transport listener for %s\n", trid->traddr);
1119 		cb_fn(cb_arg, -EINVAL);
1120 		return;
1121 	}
1122 
1123 	listener = calloc(1, sizeof(*listener));
1124 	if (!listener) {
1125 		cb_fn(cb_arg, -ENOMEM);
1126 		return;
1127 	}
1128 
1129 	listener->trid = &tr_listener->trid;
1130 	listener->transport = transport;
1131 	listener->cb_fn = cb_fn;
1132 	listener->cb_arg = cb_arg;
1133 	listener->subsystem = subsystem;
1134 	listener->ana_state = calloc(subsystem->max_nsid, sizeof(enum spdk_nvme_ana_state));
1135 	if (!listener->ana_state) {
1136 		free(listener);
1137 		cb_fn(cb_arg, -ENOMEM);
1138 		return;
1139 	}
1140 
1141 	id = spdk_bit_array_find_first_clear(subsystem->used_listener_ids, 0);
1142 	if (id == UINT32_MAX) {
1143 		SPDK_ERRLOG("Cannot add any more listeners\n");
1144 		free(listener->ana_state);
1145 		free(listener);
1146 		cb_fn(cb_arg, -EINVAL);
1147 		return;
1148 	}
1149 
1150 	spdk_bit_array_set(subsystem->used_listener_ids, id);
1151 	listener->id = id;
1152 
1153 	for (i = 0; i < subsystem->max_nsid; i++) {
1154 		listener->ana_state[i] = SPDK_NVME_ANA_OPTIMIZED_STATE;
1155 	}
1156 
1157 	if (transport->ops->listen_associate != NULL) {
1158 		rc = transport->ops->listen_associate(transport, subsystem, trid);
1159 	}
1160 
1161 	SPDK_DTRACE_PROBE4(nvmf_subsystem_add_listener, subsystem->subnqn, listener->trid->trtype,
1162 			   listener->trid->traddr, listener->trid->trsvcid);
1163 
1164 	_nvmf_subsystem_add_listener_done(listener, rc);
1165 }
1166 
1167 int
1168 spdk_nvmf_subsystem_remove_listener(struct spdk_nvmf_subsystem *subsystem,
1169 				    const struct spdk_nvme_transport_id *trid)
1170 {
1171 	struct spdk_nvmf_subsystem_listener *listener;
1172 
1173 	if (!(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE ||
1174 	      subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED)) {
1175 		return -EAGAIN;
1176 	}
1177 
1178 	listener = nvmf_subsystem_find_listener(subsystem, trid);
1179 	if (listener == NULL) {
1180 		return -ENOENT;
1181 	}
1182 
1183 	SPDK_DTRACE_PROBE4(nvmf_subsystem_remove_listener, subsystem->subnqn, listener->trid->trtype,
1184 			   listener->trid->traddr, listener->trid->trsvcid);
1185 
1186 	_nvmf_subsystem_remove_listener(subsystem, listener, false);
1187 
1188 	return 0;
1189 }
1190 
1191 void
1192 nvmf_subsystem_remove_all_listeners(struct spdk_nvmf_subsystem *subsystem,
1193 				    bool stop)
1194 {
1195 	struct spdk_nvmf_subsystem_listener *listener, *listener_tmp;
1196 
1197 	TAILQ_FOREACH_SAFE(listener, &subsystem->listeners, link, listener_tmp) {
1198 		_nvmf_subsystem_remove_listener(subsystem, listener, stop);
1199 	}
1200 }
1201 
1202 bool
1203 spdk_nvmf_subsystem_listener_allowed(struct spdk_nvmf_subsystem *subsystem,
1204 				     const struct spdk_nvme_transport_id *trid)
1205 {
1206 	struct spdk_nvmf_subsystem_listener *listener;
1207 
1208 	TAILQ_FOREACH(listener, &subsystem->listeners, link) {
1209 		if (spdk_nvme_transport_id_compare(listener->trid, trid) == 0) {
1210 			return true;
1211 		}
1212 	}
1213 
1214 	if (!strcmp(subsystem->subnqn, SPDK_NVMF_DISCOVERY_NQN)) {
1215 		SPDK_WARNLOG("Allowing connection to discovery subsystem on %s/%s/%s, "
1216 			     "even though this listener was not added to the discovery "
1217 			     "subsystem.  This behavior is deprecated and will be removed "
1218 			     "in a future release.\n",
1219 			     spdk_nvme_transport_id_trtype_str(trid->trtype), trid->traddr, trid->trsvcid);
1220 		return true;
1221 	}
1222 
1223 	return false;
1224 }
1225 
1226 struct spdk_nvmf_subsystem_listener *
1227 spdk_nvmf_subsystem_get_first_listener(struct spdk_nvmf_subsystem *subsystem)
1228 {
1229 	return TAILQ_FIRST(&subsystem->listeners);
1230 }
1231 
1232 struct spdk_nvmf_subsystem_listener *
1233 spdk_nvmf_subsystem_get_next_listener(struct spdk_nvmf_subsystem *subsystem,
1234 				      struct spdk_nvmf_subsystem_listener *prev_listener)
1235 {
1236 	return TAILQ_NEXT(prev_listener, link);
1237 }
1238 
1239 const struct spdk_nvme_transport_id *
1240 spdk_nvmf_subsystem_listener_get_trid(struct spdk_nvmf_subsystem_listener *listener)
1241 {
1242 	return listener->trid;
1243 }
1244 
1245 void
1246 spdk_nvmf_subsystem_allow_any_listener(struct spdk_nvmf_subsystem *subsystem,
1247 				       bool allow_any_listener)
1248 {
1249 	subsystem->flags.allow_any_listener = allow_any_listener;
1250 }
1251 
1252 bool
1253 spdk_nvmf_subsytem_any_listener_allowed(struct spdk_nvmf_subsystem *subsystem)
1254 {
1255 	return subsystem->flags.allow_any_listener;
1256 }
1257 
1258 
1259 struct subsystem_update_ns_ctx {
1260 	struct spdk_nvmf_subsystem *subsystem;
1261 
1262 	spdk_nvmf_subsystem_state_change_done cb_fn;
1263 	void *cb_arg;
1264 };
1265 
1266 static void
1267 subsystem_update_ns_done(struct spdk_io_channel_iter *i, int status)
1268 {
1269 	struct subsystem_update_ns_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
1270 
1271 	if (ctx->cb_fn) {
1272 		ctx->cb_fn(ctx->subsystem, ctx->cb_arg, status);
1273 	}
1274 	free(ctx);
1275 }
1276 
1277 static void
1278 subsystem_update_ns_on_pg(struct spdk_io_channel_iter *i)
1279 {
1280 	int rc;
1281 	struct subsystem_update_ns_ctx *ctx;
1282 	struct spdk_nvmf_poll_group *group;
1283 	struct spdk_nvmf_subsystem *subsystem;
1284 
1285 	ctx = spdk_io_channel_iter_get_ctx(i);
1286 	group = spdk_io_channel_get_ctx(spdk_io_channel_iter_get_channel(i));
1287 	subsystem = ctx->subsystem;
1288 
1289 	rc = nvmf_poll_group_update_subsystem(group, subsystem);
1290 	spdk_for_each_channel_continue(i, rc);
1291 }
1292 
1293 static int
1294 nvmf_subsystem_update_ns(struct spdk_nvmf_subsystem *subsystem, spdk_channel_for_each_cpl cpl,
1295 			 void *ctx)
1296 {
1297 	spdk_for_each_channel(subsystem->tgt,
1298 			      subsystem_update_ns_on_pg,
1299 			      ctx,
1300 			      cpl);
1301 
1302 	return 0;
1303 }
1304 
1305 static void
1306 nvmf_subsystem_ns_changed(struct spdk_nvmf_subsystem *subsystem, uint32_t nsid)
1307 {
1308 	struct spdk_nvmf_ctrlr *ctrlr;
1309 
1310 	TAILQ_FOREACH(ctrlr, &subsystem->ctrlrs, link) {
1311 		nvmf_ctrlr_ns_changed(ctrlr, nsid);
1312 	}
1313 }
1314 
1315 static uint32_t
1316 nvmf_ns_reservation_clear_all_registrants(struct spdk_nvmf_ns *ns);
1317 
1318 int
1319 spdk_nvmf_subsystem_remove_ns(struct spdk_nvmf_subsystem *subsystem, uint32_t nsid)
1320 {
1321 	struct spdk_nvmf_transport *transport;
1322 	struct spdk_nvmf_ns *ns;
1323 
1324 	if (!(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE ||
1325 	      subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED)) {
1326 		assert(false);
1327 		return -1;
1328 	}
1329 
1330 	if (nsid == 0 || nsid > subsystem->max_nsid) {
1331 		return -1;
1332 	}
1333 
1334 	ns = subsystem->ns[nsid - 1];
1335 	if (!ns) {
1336 		return -1;
1337 	}
1338 
1339 	subsystem->ns[nsid - 1] = NULL;
1340 
1341 	assert(ns->anagrpid - 1 < subsystem->max_nsid);
1342 	assert(subsystem->ana_group[ns->anagrpid - 1] > 0);
1343 
1344 	subsystem->ana_group[ns->anagrpid - 1]--;
1345 
1346 	free(ns->ptpl_file);
1347 	nvmf_ns_reservation_clear_all_registrants(ns);
1348 	spdk_bdev_module_release_bdev(ns->bdev);
1349 	spdk_bdev_close(ns->desc);
1350 	free(ns);
1351 
1352 	for (transport = spdk_nvmf_transport_get_first(subsystem->tgt); transport;
1353 	     transport = spdk_nvmf_transport_get_next(transport)) {
1354 		if (transport->ops->subsystem_remove_ns) {
1355 			transport->ops->subsystem_remove_ns(transport, subsystem, nsid);
1356 		}
1357 	}
1358 
1359 	nvmf_subsystem_ns_changed(subsystem, nsid);
1360 
1361 	return 0;
1362 }
1363 
1364 struct subsystem_ns_change_ctx {
1365 	struct spdk_nvmf_subsystem		*subsystem;
1366 	spdk_nvmf_subsystem_state_change_done	cb_fn;
1367 	uint32_t				nsid;
1368 };
1369 
1370 static void
1371 _nvmf_ns_hot_remove(struct spdk_nvmf_subsystem *subsystem,
1372 		    void *cb_arg, int status)
1373 {
1374 	struct subsystem_ns_change_ctx *ctx = cb_arg;
1375 	int rc;
1376 
1377 	rc = spdk_nvmf_subsystem_remove_ns(subsystem, ctx->nsid);
1378 	if (rc != 0) {
1379 		SPDK_ERRLOG("Failed to make changes to NVME-oF subsystem with id: %u\n", subsystem->id);
1380 	}
1381 
1382 	spdk_nvmf_subsystem_resume(subsystem, NULL, NULL);
1383 
1384 	free(ctx);
1385 }
1386 
1387 static void
1388 nvmf_ns_change_msg(void *ns_ctx)
1389 {
1390 	struct subsystem_ns_change_ctx *ctx = ns_ctx;
1391 	int rc;
1392 
1393 	SPDK_DTRACE_PROBE2(nvmf_ns_change, ctx->nsid, ctx->subsystem->subnqn);
1394 
1395 	rc = spdk_nvmf_subsystem_pause(ctx->subsystem, ctx->nsid, ctx->cb_fn, ctx);
1396 	if (rc) {
1397 		if (rc == -EBUSY) {
1398 			/* Try again, this is not a permanent situation. */
1399 			spdk_thread_send_msg(spdk_get_thread(), nvmf_ns_change_msg, ctx);
1400 		} else {
1401 			free(ctx);
1402 			SPDK_ERRLOG("Unable to pause subsystem to process namespace removal!\n");
1403 		}
1404 	}
1405 }
1406 
1407 static void
1408 nvmf_ns_hot_remove(void *remove_ctx)
1409 {
1410 	struct spdk_nvmf_ns *ns = remove_ctx;
1411 	struct subsystem_ns_change_ctx *ns_ctx;
1412 	int rc;
1413 
1414 	/* We have to allocate a new context because this op
1415 	 * is asynchronous and we could lose the ns in the middle.
1416 	 */
1417 	ns_ctx = calloc(1, sizeof(struct subsystem_ns_change_ctx));
1418 	if (!ns_ctx) {
1419 		SPDK_ERRLOG("Unable to allocate context to process namespace removal!\n");
1420 		return;
1421 	}
1422 
1423 	ns_ctx->subsystem = ns->subsystem;
1424 	ns_ctx->nsid = ns->opts.nsid;
1425 	ns_ctx->cb_fn = _nvmf_ns_hot_remove;
1426 
1427 	rc = spdk_nvmf_subsystem_pause(ns->subsystem, ns_ctx->nsid, _nvmf_ns_hot_remove, ns_ctx);
1428 	if (rc) {
1429 		if (rc == -EBUSY) {
1430 			/* Try again, this is not a permanent situation. */
1431 			spdk_thread_send_msg(spdk_get_thread(), nvmf_ns_change_msg, ns_ctx);
1432 		} else {
1433 			SPDK_ERRLOG("Unable to pause subsystem to process namespace removal!\n");
1434 			free(ns_ctx);
1435 		}
1436 	}
1437 }
1438 
1439 static void
1440 _nvmf_ns_resize(struct spdk_nvmf_subsystem *subsystem, void *cb_arg, int status)
1441 {
1442 	struct subsystem_ns_change_ctx *ctx = cb_arg;
1443 
1444 	nvmf_subsystem_ns_changed(subsystem, ctx->nsid);
1445 	spdk_nvmf_subsystem_resume(subsystem, NULL, NULL);
1446 
1447 	free(ctx);
1448 }
1449 
1450 static void
1451 nvmf_ns_resize(void *event_ctx)
1452 {
1453 	struct spdk_nvmf_ns *ns = event_ctx;
1454 	struct subsystem_ns_change_ctx *ns_ctx;
1455 	int rc;
1456 
1457 	/* We have to allocate a new context because this op
1458 	 * is asynchronous and we could lose the ns in the middle.
1459 	 */
1460 	ns_ctx = calloc(1, sizeof(struct subsystem_ns_change_ctx));
1461 	if (!ns_ctx) {
1462 		SPDK_ERRLOG("Unable to allocate context to process namespace removal!\n");
1463 		return;
1464 	}
1465 
1466 	ns_ctx->subsystem = ns->subsystem;
1467 	ns_ctx->nsid = ns->opts.nsid;
1468 	ns_ctx->cb_fn = _nvmf_ns_resize;
1469 
1470 	/* Specify 0 for the nsid here, because we do not need to pause the namespace.
1471 	 * Namespaces can only be resized bigger, so there is no need to quiesce I/O.
1472 	 */
1473 	rc = spdk_nvmf_subsystem_pause(ns->subsystem, 0, _nvmf_ns_resize, ns_ctx);
1474 	if (rc) {
1475 		if (rc == -EBUSY) {
1476 			/* Try again, this is not a permanent situation. */
1477 			spdk_thread_send_msg(spdk_get_thread(), nvmf_ns_change_msg, ns_ctx);
1478 		} else {
1479 			SPDK_ERRLOG("Unable to pause subsystem to process namespace resize!\n");
1480 			free(ns_ctx);
1481 		}
1482 	}
1483 }
1484 
1485 static void
1486 nvmf_ns_event(enum spdk_bdev_event_type type,
1487 	      struct spdk_bdev *bdev,
1488 	      void *event_ctx)
1489 {
1490 	SPDK_DEBUGLOG(nvmf, "Bdev event: type %d, name %s, subsystem_id %d, ns_id %d\n",
1491 		      type,
1492 		      spdk_bdev_get_name(bdev),
1493 		      ((struct spdk_nvmf_ns *)event_ctx)->subsystem->id,
1494 		      ((struct spdk_nvmf_ns *)event_ctx)->nsid);
1495 
1496 	switch (type) {
1497 	case SPDK_BDEV_EVENT_REMOVE:
1498 		nvmf_ns_hot_remove(event_ctx);
1499 		break;
1500 	case SPDK_BDEV_EVENT_RESIZE:
1501 		nvmf_ns_resize(event_ctx);
1502 		break;
1503 	default:
1504 		SPDK_NOTICELOG("Unsupported bdev event: type %d\n", type);
1505 		break;
1506 	}
1507 }
1508 
1509 void
1510 spdk_nvmf_ns_opts_get_defaults(struct spdk_nvmf_ns_opts *opts, size_t opts_size)
1511 {
1512 	if (!opts) {
1513 		SPDK_ERRLOG("opts should not be NULL.\n");
1514 		return;
1515 	}
1516 
1517 	if (!opts_size) {
1518 		SPDK_ERRLOG("opts_size should not be zero.\n");
1519 		return;
1520 	}
1521 
1522 	memset(opts, 0, opts_size);
1523 	opts->opts_size = opts_size;
1524 
1525 #define FIELD_OK(field) \
1526 	offsetof(struct spdk_nvmf_ns_opts, field) + sizeof(opts->field) <= opts_size
1527 
1528 #define SET_FIELD(field, value) \
1529 	if (FIELD_OK(field)) { \
1530 		opts->field = value; \
1531 	} \
1532 
1533 	/* All current fields are set to 0 by default. */
1534 	SET_FIELD(nsid, 0);
1535 	if (FIELD_OK(nguid)) {
1536 		memset(opts->nguid, 0, sizeof(opts->nguid));
1537 	}
1538 	if (FIELD_OK(eui64)) {
1539 		memset(opts->eui64, 0, sizeof(opts->eui64));
1540 	}
1541 	if (FIELD_OK(uuid)) {
1542 		memset(&opts->uuid, 0, sizeof(opts->uuid));
1543 	}
1544 	SET_FIELD(anagrpid, 0);
1545 
1546 #undef FIELD_OK
1547 #undef SET_FIELD
1548 }
1549 
1550 static void
1551 nvmf_ns_opts_copy(struct spdk_nvmf_ns_opts *opts,
1552 		  const struct spdk_nvmf_ns_opts *user_opts,
1553 		  size_t opts_size)
1554 {
1555 #define FIELD_OK(field)	\
1556 	offsetof(struct spdk_nvmf_ns_opts, field) + sizeof(opts->field) <= user_opts->opts_size
1557 
1558 #define SET_FIELD(field) \
1559 	if (FIELD_OK(field)) { \
1560 		opts->field = user_opts->field;	\
1561 	} \
1562 
1563 	SET_FIELD(nsid);
1564 	if (FIELD_OK(nguid)) {
1565 		memcpy(opts->nguid, user_opts->nguid, sizeof(opts->nguid));
1566 	}
1567 	if (FIELD_OK(eui64)) {
1568 		memcpy(opts->eui64, user_opts->eui64, sizeof(opts->eui64));
1569 	}
1570 	if (FIELD_OK(uuid)) {
1571 		memcpy(&opts->uuid, &user_opts->uuid, sizeof(opts->uuid));
1572 	}
1573 	SET_FIELD(anagrpid);
1574 
1575 	opts->opts_size = user_opts->opts_size;
1576 
1577 	/* We should not remove this statement, but need to update the assert statement
1578 	 * if we add a new field, and also add a corresponding SET_FIELD statement.
1579 	 */
1580 	SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_ns_opts) == 64, "Incorrect size");
1581 
1582 #undef FIELD_OK
1583 #undef SET_FIELD
1584 }
1585 
1586 /* Dummy bdev module used to to claim bdevs. */
1587 static struct spdk_bdev_module ns_bdev_module = {
1588 	.name	= "NVMe-oF Target",
1589 };
1590 
1591 static int
1592 nvmf_ns_load_reservation(const char *file, struct spdk_nvmf_reservation_info *info);
1593 static int
1594 nvmf_ns_reservation_restore(struct spdk_nvmf_ns *ns, struct spdk_nvmf_reservation_info *info);
1595 
1596 uint32_t
1597 spdk_nvmf_subsystem_add_ns_ext(struct spdk_nvmf_subsystem *subsystem, const char *bdev_name,
1598 			       const struct spdk_nvmf_ns_opts *user_opts, size_t opts_size,
1599 			       const char *ptpl_file)
1600 {
1601 	struct spdk_nvmf_transport *transport;
1602 	struct spdk_nvmf_ns_opts opts;
1603 	struct spdk_nvmf_ns *ns;
1604 	struct spdk_nvmf_reservation_info info = {0};
1605 	int rc;
1606 
1607 	if (!(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE ||
1608 	      subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED)) {
1609 		return 0;
1610 	}
1611 
1612 	spdk_nvmf_ns_opts_get_defaults(&opts, sizeof(opts));
1613 	if (user_opts) {
1614 		nvmf_ns_opts_copy(&opts, user_opts, opts_size);
1615 	}
1616 
1617 	if (opts.nsid == SPDK_NVME_GLOBAL_NS_TAG) {
1618 		SPDK_ERRLOG("Invalid NSID %" PRIu32 "\n", opts.nsid);
1619 		return 0;
1620 	}
1621 
1622 	if (opts.nsid == 0) {
1623 		/*
1624 		 * NSID not specified - find a free index.
1625 		 *
1626 		 * If no free slots are found, opts.nsid will be subsystem->max_nsid + 1, which will
1627 		 * expand max_nsid if possible.
1628 		 */
1629 		for (opts.nsid = 1; opts.nsid <= subsystem->max_nsid; opts.nsid++) {
1630 			if (_nvmf_subsystem_get_ns(subsystem, opts.nsid) == NULL) {
1631 				break;
1632 			}
1633 		}
1634 	}
1635 
1636 	if (_nvmf_subsystem_get_ns(subsystem, opts.nsid)) {
1637 		SPDK_ERRLOG("Requested NSID %" PRIu32 " already in use\n", opts.nsid);
1638 		return 0;
1639 	}
1640 
1641 	if (opts.nsid > subsystem->max_nsid) {
1642 		SPDK_ERRLOG("NSID greater than maximum not allowed\n");
1643 		return 0;
1644 	}
1645 
1646 	if (opts.anagrpid == 0) {
1647 		opts.anagrpid = opts.nsid;
1648 	}
1649 
1650 	if (opts.anagrpid > subsystem->max_nsid) {
1651 		SPDK_ERRLOG("ANAGRPID greater than maximum NSID not allowed\n");
1652 		return 0;
1653 	}
1654 
1655 	ns = calloc(1, sizeof(*ns));
1656 	if (ns == NULL) {
1657 		SPDK_ERRLOG("Namespace allocation failed\n");
1658 		return 0;
1659 	}
1660 
1661 	rc = spdk_bdev_open_ext(bdev_name, true, nvmf_ns_event, ns, &ns->desc);
1662 	if (rc != 0) {
1663 		SPDK_ERRLOG("Subsystem %s: bdev %s cannot be opened, error=%d\n",
1664 			    subsystem->subnqn, bdev_name, rc);
1665 		free(ns);
1666 		return 0;
1667 	}
1668 
1669 	ns->bdev = spdk_bdev_desc_get_bdev(ns->desc);
1670 
1671 	if (spdk_bdev_get_md_size(ns->bdev) != 0 && !spdk_bdev_is_md_interleaved(ns->bdev)) {
1672 		SPDK_ERRLOG("Can't attach bdev with separate metadata.\n");
1673 		spdk_bdev_close(ns->desc);
1674 		free(ns);
1675 		return 0;
1676 	}
1677 
1678 	rc = spdk_bdev_module_claim_bdev(ns->bdev, ns->desc, &ns_bdev_module);
1679 	if (rc != 0) {
1680 		spdk_bdev_close(ns->desc);
1681 		free(ns);
1682 		return 0;
1683 	}
1684 
1685 	/* Cache the zcopy capability of the bdev device */
1686 	ns->zcopy = spdk_bdev_io_type_supported(ns->bdev, SPDK_BDEV_IO_TYPE_ZCOPY);
1687 
1688 	if (spdk_mem_all_zero(&opts.uuid, sizeof(opts.uuid))) {
1689 		opts.uuid = *spdk_bdev_get_uuid(ns->bdev);
1690 	}
1691 
1692 	/* if nguid descriptor is supported by bdev module (nvme) then uuid = nguid */
1693 	if (spdk_mem_all_zero(opts.nguid, sizeof(opts.nguid))) {
1694 		SPDK_STATIC_ASSERT(sizeof(opts.nguid) == sizeof(opts.uuid), "size mismatch");
1695 		memcpy(opts.nguid, spdk_bdev_get_uuid(ns->bdev), sizeof(opts.nguid));
1696 	}
1697 
1698 	ns->opts = opts;
1699 	ns->subsystem = subsystem;
1700 	subsystem->ns[opts.nsid - 1] = ns;
1701 	ns->nsid = opts.nsid;
1702 	ns->anagrpid = opts.anagrpid;
1703 	subsystem->ana_group[ns->anagrpid - 1]++;
1704 	TAILQ_INIT(&ns->registrants);
1705 	if (ptpl_file) {
1706 		rc = nvmf_ns_load_reservation(ptpl_file, &info);
1707 		if (!rc) {
1708 			rc = nvmf_ns_reservation_restore(ns, &info);
1709 			if (rc) {
1710 				SPDK_ERRLOG("Subsystem restore reservation failed\n");
1711 				goto err_ns_reservation_restore;
1712 			}
1713 		}
1714 		ns->ptpl_file = strdup(ptpl_file);
1715 		if (!ns->ptpl_file) {
1716 			SPDK_ERRLOG("Namespace ns->ptpl_file allocation failed\n");
1717 			goto err_strdup;
1718 		}
1719 	}
1720 
1721 	for (transport = spdk_nvmf_transport_get_first(subsystem->tgt); transport;
1722 	     transport = spdk_nvmf_transport_get_next(transport)) {
1723 		if (transport->ops->subsystem_add_ns) {
1724 			rc = transport->ops->subsystem_add_ns(transport, subsystem, ns);
1725 			if (rc) {
1726 				SPDK_ERRLOG("Namespace attachment is not allowed by %s transport\n", transport->ops->name);
1727 				goto err_subsystem_add_ns;
1728 			}
1729 		}
1730 	}
1731 
1732 	SPDK_DEBUGLOG(nvmf, "Subsystem %s: bdev %s assigned nsid %" PRIu32 "\n",
1733 		      spdk_nvmf_subsystem_get_nqn(subsystem),
1734 		      bdev_name,
1735 		      opts.nsid);
1736 
1737 	nvmf_subsystem_ns_changed(subsystem, opts.nsid);
1738 
1739 	SPDK_DTRACE_PROBE2(nvmf_subsystem_add_ns, subsystem->subnqn, ns->nsid);
1740 
1741 	return opts.nsid;
1742 
1743 err_subsystem_add_ns:
1744 	free(ns->ptpl_file);
1745 err_strdup:
1746 	nvmf_ns_reservation_clear_all_registrants(ns);
1747 err_ns_reservation_restore:
1748 	subsystem->ns[opts.nsid - 1] = NULL;
1749 	spdk_bdev_module_release_bdev(ns->bdev);
1750 	spdk_bdev_close(ns->desc);
1751 	free(ns);
1752 
1753 	return 0;
1754 }
1755 
1756 static uint32_t
1757 nvmf_subsystem_get_next_allocated_nsid(struct spdk_nvmf_subsystem *subsystem,
1758 				       uint32_t prev_nsid)
1759 {
1760 	uint32_t nsid;
1761 
1762 	if (prev_nsid >= subsystem->max_nsid) {
1763 		return 0;
1764 	}
1765 
1766 	for (nsid = prev_nsid + 1; nsid <= subsystem->max_nsid; nsid++) {
1767 		if (subsystem->ns[nsid - 1]) {
1768 			return nsid;
1769 		}
1770 	}
1771 
1772 	return 0;
1773 }
1774 
1775 struct spdk_nvmf_ns *
1776 spdk_nvmf_subsystem_get_first_ns(struct spdk_nvmf_subsystem *subsystem)
1777 {
1778 	uint32_t first_nsid;
1779 
1780 	first_nsid = nvmf_subsystem_get_next_allocated_nsid(subsystem, 0);
1781 	return _nvmf_subsystem_get_ns(subsystem, first_nsid);
1782 }
1783 
1784 struct spdk_nvmf_ns *
1785 spdk_nvmf_subsystem_get_next_ns(struct spdk_nvmf_subsystem *subsystem,
1786 				struct spdk_nvmf_ns *prev_ns)
1787 {
1788 	uint32_t next_nsid;
1789 
1790 	next_nsid = nvmf_subsystem_get_next_allocated_nsid(subsystem, prev_ns->opts.nsid);
1791 	return _nvmf_subsystem_get_ns(subsystem, next_nsid);
1792 }
1793 
1794 struct spdk_nvmf_ns *
1795 spdk_nvmf_subsystem_get_ns(struct spdk_nvmf_subsystem *subsystem, uint32_t nsid)
1796 {
1797 	return _nvmf_subsystem_get_ns(subsystem, nsid);
1798 }
1799 
1800 uint32_t
1801 spdk_nvmf_ns_get_id(const struct spdk_nvmf_ns *ns)
1802 {
1803 	return ns->opts.nsid;
1804 }
1805 
1806 struct spdk_bdev *
1807 spdk_nvmf_ns_get_bdev(struct spdk_nvmf_ns *ns)
1808 {
1809 	return ns->bdev;
1810 }
1811 
1812 void
1813 spdk_nvmf_ns_get_opts(const struct spdk_nvmf_ns *ns, struct spdk_nvmf_ns_opts *opts,
1814 		      size_t opts_size)
1815 {
1816 	memset(opts, 0, opts_size);
1817 	memcpy(opts, &ns->opts, spdk_min(sizeof(ns->opts), opts_size));
1818 }
1819 
1820 const char *
1821 spdk_nvmf_subsystem_get_sn(const struct spdk_nvmf_subsystem *subsystem)
1822 {
1823 	return subsystem->sn;
1824 }
1825 
1826 int
1827 spdk_nvmf_subsystem_set_sn(struct spdk_nvmf_subsystem *subsystem, const char *sn)
1828 {
1829 	size_t len, max_len;
1830 
1831 	max_len = sizeof(subsystem->sn) - 1;
1832 	len = strlen(sn);
1833 	if (len > max_len) {
1834 		SPDK_DEBUGLOG(nvmf, "Invalid sn \"%s\": length %zu > max %zu\n",
1835 			      sn, len, max_len);
1836 		return -1;
1837 	}
1838 
1839 	if (!nvmf_valid_ascii_string(sn, len)) {
1840 		SPDK_DEBUGLOG(nvmf, "Non-ASCII sn\n");
1841 		SPDK_LOGDUMP(nvmf, "sn", sn, len);
1842 		return -1;
1843 	}
1844 
1845 	snprintf(subsystem->sn, sizeof(subsystem->sn), "%s", sn);
1846 
1847 	return 0;
1848 }
1849 
1850 const char *
1851 spdk_nvmf_subsystem_get_mn(const struct spdk_nvmf_subsystem *subsystem)
1852 {
1853 	return subsystem->mn;
1854 }
1855 
1856 int
1857 spdk_nvmf_subsystem_set_mn(struct spdk_nvmf_subsystem *subsystem, const char *mn)
1858 {
1859 	size_t len, max_len;
1860 
1861 	if (mn == NULL) {
1862 		mn = MODEL_NUMBER_DEFAULT;
1863 	}
1864 	max_len = sizeof(subsystem->mn) - 1;
1865 	len = strlen(mn);
1866 	if (len > max_len) {
1867 		SPDK_DEBUGLOG(nvmf, "Invalid mn \"%s\": length %zu > max %zu\n",
1868 			      mn, len, max_len);
1869 		return -1;
1870 	}
1871 
1872 	if (!nvmf_valid_ascii_string(mn, len)) {
1873 		SPDK_DEBUGLOG(nvmf, "Non-ASCII mn\n");
1874 		SPDK_LOGDUMP(nvmf, "mn", mn, len);
1875 		return -1;
1876 	}
1877 
1878 	snprintf(subsystem->mn, sizeof(subsystem->mn), "%s", mn);
1879 
1880 	return 0;
1881 }
1882 
1883 const char *
1884 spdk_nvmf_subsystem_get_nqn(const struct spdk_nvmf_subsystem *subsystem)
1885 {
1886 	return subsystem->subnqn;
1887 }
1888 
1889 enum spdk_nvmf_subtype spdk_nvmf_subsystem_get_type(struct spdk_nvmf_subsystem *subsystem)
1890 {
1891 	return subsystem->subtype;
1892 }
1893 
1894 uint32_t
1895 spdk_nvmf_subsystem_get_max_nsid(struct spdk_nvmf_subsystem *subsystem)
1896 {
1897 	return subsystem->max_nsid;
1898 }
1899 
1900 int
1901 nvmf_subsystem_set_cntlid_range(struct spdk_nvmf_subsystem *subsystem,
1902 				uint16_t min_cntlid, uint16_t max_cntlid)
1903 {
1904 	if (subsystem->state != SPDK_NVMF_SUBSYSTEM_INACTIVE) {
1905 		return -EAGAIN;
1906 	}
1907 
1908 	if (min_cntlid > max_cntlid) {
1909 		return -EINVAL;
1910 	}
1911 	/* The spec reserves cntlid values in the range FFF0h to FFFFh. */
1912 	if (min_cntlid < NVMF_MIN_CNTLID || min_cntlid > NVMF_MAX_CNTLID ||
1913 	    max_cntlid < NVMF_MIN_CNTLID || max_cntlid > NVMF_MAX_CNTLID) {
1914 		return -EINVAL;
1915 	}
1916 	subsystem->min_cntlid = min_cntlid;
1917 	subsystem->max_cntlid = max_cntlid;
1918 	if (subsystem->next_cntlid < min_cntlid || subsystem->next_cntlid > max_cntlid - 1) {
1919 		subsystem->next_cntlid = min_cntlid - 1;
1920 	}
1921 
1922 	return 0;
1923 }
1924 
1925 static uint16_t
1926 nvmf_subsystem_gen_cntlid(struct spdk_nvmf_subsystem *subsystem)
1927 {
1928 	int count;
1929 
1930 	/*
1931 	 * In the worst case, we might have to try all CNTLID values between min_cntlid and max_cntlid
1932 	 * before we find one that is unused (or find that all values are in use).
1933 	 */
1934 	for (count = 0; count < subsystem->max_cntlid - subsystem->min_cntlid + 1; count++) {
1935 		subsystem->next_cntlid++;
1936 		if (subsystem->next_cntlid > subsystem->max_cntlid) {
1937 			subsystem->next_cntlid = subsystem->min_cntlid;
1938 		}
1939 
1940 		/* Check if a controller with this cntlid currently exists. */
1941 		if (nvmf_subsystem_get_ctrlr(subsystem, subsystem->next_cntlid) == NULL) {
1942 			/* Found unused cntlid */
1943 			return subsystem->next_cntlid;
1944 		}
1945 	}
1946 
1947 	/* All valid cntlid values are in use. */
1948 	return 0xFFFF;
1949 }
1950 
1951 int
1952 nvmf_subsystem_add_ctrlr(struct spdk_nvmf_subsystem *subsystem, struct spdk_nvmf_ctrlr *ctrlr)
1953 {
1954 
1955 	if (ctrlr->dynamic_ctrlr) {
1956 		ctrlr->cntlid = nvmf_subsystem_gen_cntlid(subsystem);
1957 		if (ctrlr->cntlid == 0xFFFF) {
1958 			/* Unable to get a cntlid */
1959 			SPDK_ERRLOG("Reached max simultaneous ctrlrs\n");
1960 			return -EBUSY;
1961 		}
1962 	} else if (nvmf_subsystem_get_ctrlr(subsystem, ctrlr->cntlid) != NULL) {
1963 		SPDK_ERRLOG("Ctrlr with cntlid %u already exist\n", ctrlr->cntlid);
1964 		return -EEXIST;
1965 	}
1966 
1967 	TAILQ_INSERT_TAIL(&subsystem->ctrlrs, ctrlr, link);
1968 
1969 	SPDK_DTRACE_PROBE3(nvmf_subsystem_add_ctrlr, subsystem->subnqn, ctrlr, ctrlr->hostnqn);
1970 
1971 	return 0;
1972 }
1973 
1974 void
1975 nvmf_subsystem_remove_ctrlr(struct spdk_nvmf_subsystem *subsystem,
1976 			    struct spdk_nvmf_ctrlr *ctrlr)
1977 {
1978 	SPDK_DTRACE_PROBE3(nvmf_subsystem_remove_ctrlr, subsystem->subnqn, ctrlr, ctrlr->hostnqn);
1979 
1980 	assert(spdk_get_thread() == subsystem->thread);
1981 	assert(subsystem == ctrlr->subsys);
1982 	SPDK_DEBUGLOG(nvmf, "remove ctrlr %p id 0x%x from subsys %p %s\n", ctrlr, ctrlr->cntlid, subsystem,
1983 		      subsystem->subnqn);
1984 	TAILQ_REMOVE(&subsystem->ctrlrs, ctrlr, link);
1985 }
1986 
1987 struct spdk_nvmf_ctrlr *
1988 nvmf_subsystem_get_ctrlr(struct spdk_nvmf_subsystem *subsystem, uint16_t cntlid)
1989 {
1990 	struct spdk_nvmf_ctrlr *ctrlr;
1991 
1992 	TAILQ_FOREACH(ctrlr, &subsystem->ctrlrs, link) {
1993 		if (ctrlr->cntlid == cntlid) {
1994 			return ctrlr;
1995 		}
1996 	}
1997 
1998 	return NULL;
1999 }
2000 
2001 uint32_t
2002 spdk_nvmf_subsystem_get_max_namespaces(const struct spdk_nvmf_subsystem *subsystem)
2003 {
2004 	return subsystem->max_nsid;
2005 }
2006 
2007 uint16_t
2008 spdk_nvmf_subsystem_get_min_cntlid(const struct spdk_nvmf_subsystem *subsystem)
2009 {
2010 	return subsystem->min_cntlid;
2011 }
2012 
2013 uint16_t
2014 spdk_nvmf_subsystem_get_max_cntlid(const struct spdk_nvmf_subsystem *subsystem)
2015 {
2016 	return subsystem->max_cntlid;
2017 }
2018 
2019 struct _nvmf_ns_registrant {
2020 	uint64_t		rkey;
2021 	char			*host_uuid;
2022 };
2023 
2024 struct _nvmf_ns_registrants {
2025 	size_t				num_regs;
2026 	struct _nvmf_ns_registrant	reg[SPDK_NVMF_MAX_NUM_REGISTRANTS];
2027 };
2028 
2029 struct _nvmf_ns_reservation {
2030 	bool					ptpl_activated;
2031 	enum spdk_nvme_reservation_type		rtype;
2032 	uint64_t				crkey;
2033 	char					*bdev_uuid;
2034 	char					*holder_uuid;
2035 	struct _nvmf_ns_registrants		regs;
2036 };
2037 
2038 static const struct spdk_json_object_decoder nvmf_ns_pr_reg_decoders[] = {
2039 	{"rkey", offsetof(struct _nvmf_ns_registrant, rkey), spdk_json_decode_uint64},
2040 	{"host_uuid", offsetof(struct _nvmf_ns_registrant, host_uuid), spdk_json_decode_string},
2041 };
2042 
2043 static int
2044 nvmf_decode_ns_pr_reg(const struct spdk_json_val *val, void *out)
2045 {
2046 	struct _nvmf_ns_registrant *reg = out;
2047 
2048 	return spdk_json_decode_object(val, nvmf_ns_pr_reg_decoders,
2049 				       SPDK_COUNTOF(nvmf_ns_pr_reg_decoders), reg);
2050 }
2051 
2052 static int
2053 nvmf_decode_ns_pr_regs(const struct spdk_json_val *val, void *out)
2054 {
2055 	struct _nvmf_ns_registrants *regs = out;
2056 
2057 	return spdk_json_decode_array(val, nvmf_decode_ns_pr_reg, regs->reg,
2058 				      SPDK_NVMF_MAX_NUM_REGISTRANTS, &regs->num_regs,
2059 				      sizeof(struct _nvmf_ns_registrant));
2060 }
2061 
2062 static const struct spdk_json_object_decoder nvmf_ns_pr_decoders[] = {
2063 	{"ptpl", offsetof(struct _nvmf_ns_reservation, ptpl_activated), spdk_json_decode_bool, true},
2064 	{"rtype", offsetof(struct _nvmf_ns_reservation, rtype), spdk_json_decode_uint32, true},
2065 	{"crkey", offsetof(struct _nvmf_ns_reservation, crkey), spdk_json_decode_uint64, true},
2066 	{"bdev_uuid", offsetof(struct _nvmf_ns_reservation, bdev_uuid), spdk_json_decode_string},
2067 	{"holder_uuid", offsetof(struct _nvmf_ns_reservation, holder_uuid), spdk_json_decode_string, true},
2068 	{"registrants", offsetof(struct _nvmf_ns_reservation, regs), nvmf_decode_ns_pr_regs},
2069 };
2070 
2071 static int
2072 nvmf_ns_load_reservation(const char *file, struct spdk_nvmf_reservation_info *info)
2073 {
2074 	FILE *fd;
2075 	size_t json_size;
2076 	ssize_t values_cnt, rc;
2077 	void *json = NULL, *end;
2078 	struct spdk_json_val *values = NULL;
2079 	struct _nvmf_ns_reservation res = {};
2080 	uint32_t i;
2081 
2082 	fd = fopen(file, "r");
2083 	/* It's not an error if the file does not exist */
2084 	if (!fd) {
2085 		SPDK_NOTICELOG("File %s does not exist\n", file);
2086 		return -ENOENT;
2087 	}
2088 
2089 	/* Load all persist file contents into a local buffer */
2090 	json = spdk_posix_file_load(fd, &json_size);
2091 	fclose(fd);
2092 	if (!json) {
2093 		SPDK_ERRLOG("Load persit file %s failed\n", file);
2094 		return -ENOMEM;
2095 	}
2096 
2097 	rc = spdk_json_parse(json, json_size, NULL, 0, &end, 0);
2098 	if (rc < 0) {
2099 		SPDK_NOTICELOG("Parsing JSON configuration failed (%zd)\n", rc);
2100 		goto exit;
2101 	}
2102 
2103 	values_cnt = rc;
2104 	values = calloc(values_cnt, sizeof(struct spdk_json_val));
2105 	if (values == NULL) {
2106 		goto exit;
2107 	}
2108 
2109 	rc = spdk_json_parse(json, json_size, values, values_cnt, &end, 0);
2110 	if (rc != values_cnt) {
2111 		SPDK_ERRLOG("Parsing JSON configuration failed (%zd)\n", rc);
2112 		goto exit;
2113 	}
2114 
2115 	/* Decode json */
2116 	if (spdk_json_decode_object(values, nvmf_ns_pr_decoders,
2117 				    SPDK_COUNTOF(nvmf_ns_pr_decoders),
2118 				    &res)) {
2119 		SPDK_ERRLOG("Invalid objects in the persist file %s\n", file);
2120 		rc = -EINVAL;
2121 		goto exit;
2122 	}
2123 
2124 	if (res.regs.num_regs > SPDK_NVMF_MAX_NUM_REGISTRANTS) {
2125 		SPDK_ERRLOG("Can only support up to %u registrants\n", SPDK_NVMF_MAX_NUM_REGISTRANTS);
2126 		rc = -ERANGE;
2127 		goto exit;
2128 	}
2129 
2130 	rc = 0;
2131 	info->ptpl_activated = res.ptpl_activated;
2132 	info->rtype = res.rtype;
2133 	info->crkey = res.crkey;
2134 	snprintf(info->bdev_uuid, sizeof(info->bdev_uuid), "%s", res.bdev_uuid);
2135 	snprintf(info->holder_uuid, sizeof(info->holder_uuid), "%s", res.holder_uuid);
2136 	info->num_regs = res.regs.num_regs;
2137 	for (i = 0; i < res.regs.num_regs; i++) {
2138 		info->registrants[i].rkey = res.regs.reg[i].rkey;
2139 		snprintf(info->registrants[i].host_uuid, sizeof(info->registrants[i].host_uuid), "%s",
2140 			 res.regs.reg[i].host_uuid);
2141 	}
2142 
2143 exit:
2144 	free(json);
2145 	free(values);
2146 	free(res.bdev_uuid);
2147 	free(res.holder_uuid);
2148 	for (i = 0; i < res.regs.num_regs; i++) {
2149 		free(res.regs.reg[i].host_uuid);
2150 	}
2151 
2152 	return rc;
2153 }
2154 
2155 static bool
2156 nvmf_ns_reservation_all_registrants_type(struct spdk_nvmf_ns *ns);
2157 
2158 static int
2159 nvmf_ns_reservation_restore(struct spdk_nvmf_ns *ns, struct spdk_nvmf_reservation_info *info)
2160 {
2161 	uint32_t i;
2162 	struct spdk_nvmf_registrant *reg, *holder = NULL;
2163 	struct spdk_uuid bdev_uuid, holder_uuid;
2164 	bool rkey_flag = false;
2165 
2166 	SPDK_DEBUGLOG(nvmf, "NSID %u, PTPL %u, Number of registrants %u\n",
2167 		      ns->nsid, info->ptpl_activated, info->num_regs);
2168 
2169 	/* it's not an error */
2170 	if (!info->ptpl_activated || !info->num_regs) {
2171 		return 0;
2172 	}
2173 
2174 	/* Check info->crkey exist or not in info->registrants[i].rkey */
2175 	for (i = 0; i < info->num_regs; i++) {
2176 		if (info->crkey == info->registrants[i].rkey) {
2177 			rkey_flag = true;
2178 		}
2179 	}
2180 	if (!rkey_flag) {
2181 		return -EINVAL;
2182 	}
2183 
2184 	spdk_uuid_parse(&bdev_uuid, info->bdev_uuid);
2185 	if (spdk_uuid_compare(&bdev_uuid, spdk_bdev_get_uuid(ns->bdev))) {
2186 		SPDK_ERRLOG("Existing bdev UUID is not same with configuration file\n");
2187 		return -EINVAL;
2188 	}
2189 
2190 	ns->crkey = info->crkey;
2191 	ns->rtype = info->rtype;
2192 	ns->ptpl_activated = info->ptpl_activated;
2193 	spdk_uuid_parse(&holder_uuid, info->holder_uuid);
2194 
2195 	SPDK_DEBUGLOG(nvmf, "Bdev UUID %s\n", info->bdev_uuid);
2196 	if (info->rtype) {
2197 		SPDK_DEBUGLOG(nvmf, "Holder UUID %s, RTYPE %u, RKEY 0x%"PRIx64"\n",
2198 			      info->holder_uuid, info->rtype, info->crkey);
2199 	}
2200 
2201 	for (i = 0; i < info->num_regs; i++) {
2202 		reg = calloc(1, sizeof(*reg));
2203 		if (!reg) {
2204 			return -ENOMEM;
2205 		}
2206 		spdk_uuid_parse(&reg->hostid, info->registrants[i].host_uuid);
2207 		reg->rkey = info->registrants[i].rkey;
2208 		TAILQ_INSERT_TAIL(&ns->registrants, reg, link);
2209 		if (!spdk_uuid_compare(&holder_uuid, &reg->hostid)) {
2210 			holder = reg;
2211 		}
2212 		SPDK_DEBUGLOG(nvmf, "Registrant RKEY 0x%"PRIx64", Host UUID %s\n",
2213 			      info->registrants[i].rkey, info->registrants[i].host_uuid);
2214 	}
2215 
2216 	if (nvmf_ns_reservation_all_registrants_type(ns)) {
2217 		ns->holder = TAILQ_FIRST(&ns->registrants);
2218 	} else {
2219 		ns->holder = holder;
2220 	}
2221 
2222 	return 0;
2223 }
2224 
2225 static int
2226 nvmf_ns_json_write_cb(void *cb_ctx, const void *data, size_t size)
2227 {
2228 	char *file = cb_ctx;
2229 	size_t rc;
2230 	FILE *fd;
2231 
2232 	fd = fopen(file, "w");
2233 	if (!fd) {
2234 		SPDK_ERRLOG("Can't open file %s for write\n", file);
2235 		return -ENOENT;
2236 	}
2237 	rc = fwrite(data, 1, size, fd);
2238 	fclose(fd);
2239 
2240 	return rc == size ? 0 : -1;
2241 }
2242 
2243 static int
2244 nvmf_ns_reservation_update(const char *file, struct spdk_nvmf_reservation_info *info)
2245 {
2246 	struct spdk_json_write_ctx *w;
2247 	uint32_t i;
2248 	int rc = 0;
2249 
2250 	w = spdk_json_write_begin(nvmf_ns_json_write_cb, (void *)file, 0);
2251 	if (w == NULL) {
2252 		return -ENOMEM;
2253 	}
2254 	/* clear the configuration file */
2255 	if (!info->ptpl_activated) {
2256 		goto exit;
2257 	}
2258 
2259 	spdk_json_write_object_begin(w);
2260 	spdk_json_write_named_bool(w, "ptpl", info->ptpl_activated);
2261 	spdk_json_write_named_uint32(w, "rtype", info->rtype);
2262 	spdk_json_write_named_uint64(w, "crkey", info->crkey);
2263 	spdk_json_write_named_string(w, "bdev_uuid", info->bdev_uuid);
2264 	spdk_json_write_named_string(w, "holder_uuid", info->holder_uuid);
2265 
2266 	spdk_json_write_named_array_begin(w, "registrants");
2267 	for (i = 0; i < info->num_regs; i++) {
2268 		spdk_json_write_object_begin(w);
2269 		spdk_json_write_named_uint64(w, "rkey", info->registrants[i].rkey);
2270 		spdk_json_write_named_string(w, "host_uuid", info->registrants[i].host_uuid);
2271 		spdk_json_write_object_end(w);
2272 	}
2273 	spdk_json_write_array_end(w);
2274 	spdk_json_write_object_end(w);
2275 
2276 exit:
2277 	rc = spdk_json_write_end(w);
2278 	return rc;
2279 }
2280 
2281 static int
2282 nvmf_ns_update_reservation_info(struct spdk_nvmf_ns *ns)
2283 {
2284 	struct spdk_nvmf_reservation_info info;
2285 	struct spdk_nvmf_registrant *reg, *tmp;
2286 	uint32_t i = 0;
2287 
2288 	assert(ns != NULL);
2289 
2290 	if (!ns->bdev || !ns->ptpl_file) {
2291 		return 0;
2292 	}
2293 
2294 	memset(&info, 0, sizeof(info));
2295 	spdk_uuid_fmt_lower(info.bdev_uuid, sizeof(info.bdev_uuid), spdk_bdev_get_uuid(ns->bdev));
2296 
2297 	if (ns->rtype) {
2298 		info.rtype = ns->rtype;
2299 		info.crkey = ns->crkey;
2300 		if (!nvmf_ns_reservation_all_registrants_type(ns)) {
2301 			assert(ns->holder != NULL);
2302 			spdk_uuid_fmt_lower(info.holder_uuid, sizeof(info.holder_uuid), &ns->holder->hostid);
2303 		}
2304 	}
2305 
2306 	TAILQ_FOREACH_SAFE(reg, &ns->registrants, link, tmp) {
2307 		spdk_uuid_fmt_lower(info.registrants[i].host_uuid, sizeof(info.registrants[i].host_uuid),
2308 				    &reg->hostid);
2309 		info.registrants[i++].rkey = reg->rkey;
2310 	}
2311 
2312 	info.num_regs = i;
2313 	info.ptpl_activated = ns->ptpl_activated;
2314 
2315 	return nvmf_ns_reservation_update(ns->ptpl_file, &info);
2316 }
2317 
2318 static struct spdk_nvmf_registrant *
2319 nvmf_ns_reservation_get_registrant(struct spdk_nvmf_ns *ns,
2320 				   struct spdk_uuid *uuid)
2321 {
2322 	struct spdk_nvmf_registrant *reg, *tmp;
2323 
2324 	TAILQ_FOREACH_SAFE(reg, &ns->registrants, link, tmp) {
2325 		if (!spdk_uuid_compare(&reg->hostid, uuid)) {
2326 			return reg;
2327 		}
2328 	}
2329 
2330 	return NULL;
2331 }
2332 
2333 /* Generate reservation notice log to registered HostID controllers */
2334 static void
2335 nvmf_subsystem_gen_ctrlr_notification(struct spdk_nvmf_subsystem *subsystem,
2336 				      struct spdk_nvmf_ns *ns,
2337 				      struct spdk_uuid *hostid_list,
2338 				      uint32_t num_hostid,
2339 				      enum spdk_nvme_reservation_notification_log_page_type type)
2340 {
2341 	struct spdk_nvmf_ctrlr *ctrlr;
2342 	uint32_t i;
2343 
2344 	for (i = 0; i < num_hostid; i++) {
2345 		TAILQ_FOREACH(ctrlr, &subsystem->ctrlrs, link) {
2346 			if (!spdk_uuid_compare(&ctrlr->hostid, &hostid_list[i])) {
2347 				nvmf_ctrlr_reservation_notice_log(ctrlr, ns, type);
2348 			}
2349 		}
2350 	}
2351 }
2352 
2353 /* Get all registrants' hostid other than the controller who issued the command */
2354 static uint32_t
2355 nvmf_ns_reservation_get_all_other_hostid(struct spdk_nvmf_ns *ns,
2356 		struct spdk_uuid *hostid_list,
2357 		uint32_t max_num_hostid,
2358 		struct spdk_uuid *current_hostid)
2359 {
2360 	struct spdk_nvmf_registrant *reg, *tmp;
2361 	uint32_t num_hostid = 0;
2362 
2363 	TAILQ_FOREACH_SAFE(reg, &ns->registrants, link, tmp) {
2364 		if (spdk_uuid_compare(&reg->hostid, current_hostid)) {
2365 			if (num_hostid == max_num_hostid) {
2366 				assert(false);
2367 				return max_num_hostid;
2368 			}
2369 			hostid_list[num_hostid++] = reg->hostid;
2370 		}
2371 	}
2372 
2373 	return num_hostid;
2374 }
2375 
2376 /* Calculate the unregistered HostID list according to list
2377  * prior to execute preempt command and list after executing
2378  * preempt command.
2379  */
2380 static uint32_t
2381 nvmf_ns_reservation_get_unregistered_hostid(struct spdk_uuid *old_hostid_list,
2382 		uint32_t old_num_hostid,
2383 		struct spdk_uuid *remaining_hostid_list,
2384 		uint32_t remaining_num_hostid)
2385 {
2386 	struct spdk_uuid temp_hostid_list[SPDK_NVMF_MAX_NUM_REGISTRANTS];
2387 	uint32_t i, j, num_hostid = 0;
2388 	bool found;
2389 
2390 	if (!remaining_num_hostid) {
2391 		return old_num_hostid;
2392 	}
2393 
2394 	for (i = 0; i < old_num_hostid; i++) {
2395 		found = false;
2396 		for (j = 0; j < remaining_num_hostid; j++) {
2397 			if (!spdk_uuid_compare(&old_hostid_list[i], &remaining_hostid_list[j])) {
2398 				found = true;
2399 				break;
2400 			}
2401 		}
2402 		if (!found) {
2403 			spdk_uuid_copy(&temp_hostid_list[num_hostid++], &old_hostid_list[i]);
2404 		}
2405 	}
2406 
2407 	if (num_hostid) {
2408 		memcpy(old_hostid_list, temp_hostid_list, sizeof(struct spdk_uuid) * num_hostid);
2409 	}
2410 
2411 	return num_hostid;
2412 }
2413 
2414 /* current reservation type is all registrants or not */
2415 static bool
2416 nvmf_ns_reservation_all_registrants_type(struct spdk_nvmf_ns *ns)
2417 {
2418 	return (ns->rtype == SPDK_NVME_RESERVE_WRITE_EXCLUSIVE_ALL_REGS ||
2419 		ns->rtype == SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS_ALL_REGS);
2420 }
2421 
2422 /* current registrant is reservation holder or not */
2423 static bool
2424 nvmf_ns_reservation_registrant_is_holder(struct spdk_nvmf_ns *ns,
2425 		struct spdk_nvmf_registrant *reg)
2426 {
2427 	if (!reg) {
2428 		return false;
2429 	}
2430 
2431 	if (nvmf_ns_reservation_all_registrants_type(ns)) {
2432 		return true;
2433 	}
2434 
2435 	return (ns->holder == reg);
2436 }
2437 
2438 static int
2439 nvmf_ns_reservation_add_registrant(struct spdk_nvmf_ns *ns,
2440 				   struct spdk_nvmf_ctrlr *ctrlr,
2441 				   uint64_t nrkey)
2442 {
2443 	struct spdk_nvmf_registrant *reg;
2444 
2445 	reg = calloc(1, sizeof(*reg));
2446 	if (!reg) {
2447 		return -ENOMEM;
2448 	}
2449 
2450 	reg->rkey = nrkey;
2451 	/* set hostid for the registrant */
2452 	spdk_uuid_copy(&reg->hostid, &ctrlr->hostid);
2453 	TAILQ_INSERT_TAIL(&ns->registrants, reg, link);
2454 	ns->gen++;
2455 
2456 	return 0;
2457 }
2458 
2459 static void
2460 nvmf_ns_reservation_release_reservation(struct spdk_nvmf_ns *ns)
2461 {
2462 	ns->rtype = 0;
2463 	ns->crkey = 0;
2464 	ns->holder = NULL;
2465 }
2466 
2467 /* release the reservation if the last registrant was removed */
2468 static void
2469 nvmf_ns_reservation_check_release_on_remove_registrant(struct spdk_nvmf_ns *ns,
2470 		struct spdk_nvmf_registrant *reg)
2471 {
2472 	struct spdk_nvmf_registrant *next_reg;
2473 
2474 	/* no reservation holder */
2475 	if (!ns->holder) {
2476 		assert(ns->rtype == 0);
2477 		return;
2478 	}
2479 
2480 	next_reg = TAILQ_FIRST(&ns->registrants);
2481 	if (next_reg && nvmf_ns_reservation_all_registrants_type(ns)) {
2482 		/* the next valid registrant is the new holder now */
2483 		ns->holder = next_reg;
2484 	} else if (nvmf_ns_reservation_registrant_is_holder(ns, reg)) {
2485 		/* release the reservation */
2486 		nvmf_ns_reservation_release_reservation(ns);
2487 	}
2488 }
2489 
2490 static void
2491 nvmf_ns_reservation_remove_registrant(struct spdk_nvmf_ns *ns,
2492 				      struct spdk_nvmf_registrant *reg)
2493 {
2494 	TAILQ_REMOVE(&ns->registrants, reg, link);
2495 	nvmf_ns_reservation_check_release_on_remove_registrant(ns, reg);
2496 	free(reg);
2497 	ns->gen++;
2498 	return;
2499 }
2500 
2501 static uint32_t
2502 nvmf_ns_reservation_remove_registrants_by_key(struct spdk_nvmf_ns *ns,
2503 		uint64_t rkey)
2504 {
2505 	struct spdk_nvmf_registrant *reg, *tmp;
2506 	uint32_t count = 0;
2507 
2508 	TAILQ_FOREACH_SAFE(reg, &ns->registrants, link, tmp) {
2509 		if (reg->rkey == rkey) {
2510 			nvmf_ns_reservation_remove_registrant(ns, reg);
2511 			count++;
2512 		}
2513 	}
2514 	return count;
2515 }
2516 
2517 static uint32_t
2518 nvmf_ns_reservation_remove_all_other_registrants(struct spdk_nvmf_ns *ns,
2519 		struct spdk_nvmf_registrant *reg)
2520 {
2521 	struct spdk_nvmf_registrant *reg_tmp, *reg_tmp2;
2522 	uint32_t count = 0;
2523 
2524 	TAILQ_FOREACH_SAFE(reg_tmp, &ns->registrants, link, reg_tmp2) {
2525 		if (reg_tmp != reg) {
2526 			nvmf_ns_reservation_remove_registrant(ns, reg_tmp);
2527 			count++;
2528 		}
2529 	}
2530 	return count;
2531 }
2532 
2533 static uint32_t
2534 nvmf_ns_reservation_clear_all_registrants(struct spdk_nvmf_ns *ns)
2535 {
2536 	struct spdk_nvmf_registrant *reg, *reg_tmp;
2537 	uint32_t count = 0;
2538 
2539 	TAILQ_FOREACH_SAFE(reg, &ns->registrants, link, reg_tmp) {
2540 		nvmf_ns_reservation_remove_registrant(ns, reg);
2541 		count++;
2542 	}
2543 	return count;
2544 }
2545 
2546 static void
2547 nvmf_ns_reservation_acquire_reservation(struct spdk_nvmf_ns *ns, uint64_t rkey,
2548 					enum spdk_nvme_reservation_type rtype,
2549 					struct spdk_nvmf_registrant *holder)
2550 {
2551 	ns->rtype = rtype;
2552 	ns->crkey = rkey;
2553 	assert(ns->holder == NULL);
2554 	ns->holder = holder;
2555 }
2556 
2557 static bool
2558 nvmf_ns_reservation_register(struct spdk_nvmf_ns *ns,
2559 			     struct spdk_nvmf_ctrlr *ctrlr,
2560 			     struct spdk_nvmf_request *req)
2561 {
2562 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
2563 	uint8_t rrega, iekey, cptpl, rtype;
2564 	struct spdk_nvme_reservation_register_data key;
2565 	struct spdk_nvmf_registrant *reg;
2566 	uint8_t status = SPDK_NVME_SC_SUCCESS;
2567 	bool update_sgroup = false;
2568 	struct spdk_uuid hostid_list[SPDK_NVMF_MAX_NUM_REGISTRANTS];
2569 	uint32_t num_hostid = 0;
2570 	int rc;
2571 
2572 	rrega = cmd->cdw10_bits.resv_register.rrega;
2573 	iekey = cmd->cdw10_bits.resv_register.iekey;
2574 	cptpl = cmd->cdw10_bits.resv_register.cptpl;
2575 
2576 	if (req->data && req->length >= sizeof(key)) {
2577 		memcpy(&key, req->data, sizeof(key));
2578 	} else {
2579 		SPDK_ERRLOG("No key provided. Failing request.\n");
2580 		status = SPDK_NVME_SC_INVALID_FIELD;
2581 		goto exit;
2582 	}
2583 
2584 	SPDK_DEBUGLOG(nvmf, "REGISTER: RREGA %u, IEKEY %u, CPTPL %u, "
2585 		      "NRKEY 0x%"PRIx64", NRKEY 0x%"PRIx64"\n",
2586 		      rrega, iekey, cptpl, key.crkey, key.nrkey);
2587 
2588 	if (cptpl == SPDK_NVME_RESERVE_PTPL_CLEAR_POWER_ON) {
2589 		/* Ture to OFF state, and need to be updated in the configuration file */
2590 		if (ns->ptpl_activated) {
2591 			ns->ptpl_activated = 0;
2592 			update_sgroup = true;
2593 		}
2594 	} else if (cptpl == SPDK_NVME_RESERVE_PTPL_PERSIST_POWER_LOSS) {
2595 		if (ns->ptpl_file == NULL) {
2596 			status = SPDK_NVME_SC_INVALID_FIELD;
2597 			goto exit;
2598 		} else if (ns->ptpl_activated == 0) {
2599 			ns->ptpl_activated = 1;
2600 			update_sgroup = true;
2601 		}
2602 	}
2603 
2604 	/* current Host Identifier has registrant or not */
2605 	reg = nvmf_ns_reservation_get_registrant(ns, &ctrlr->hostid);
2606 
2607 	switch (rrega) {
2608 	case SPDK_NVME_RESERVE_REGISTER_KEY:
2609 		if (!reg) {
2610 			/* register new controller */
2611 			if (key.nrkey == 0) {
2612 				SPDK_ERRLOG("Can't register zeroed new key\n");
2613 				status = SPDK_NVME_SC_INVALID_FIELD;
2614 				goto exit;
2615 			}
2616 			rc = nvmf_ns_reservation_add_registrant(ns, ctrlr, key.nrkey);
2617 			if (rc < 0) {
2618 				status = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
2619 				goto exit;
2620 			}
2621 			update_sgroup = true;
2622 		} else {
2623 			/* register with same key is not an error */
2624 			if (reg->rkey != key.nrkey) {
2625 				SPDK_ERRLOG("The same host already register a "
2626 					    "key with 0x%"PRIx64"\n",
2627 					    reg->rkey);
2628 				status = SPDK_NVME_SC_RESERVATION_CONFLICT;
2629 				goto exit;
2630 			}
2631 		}
2632 		break;
2633 	case SPDK_NVME_RESERVE_UNREGISTER_KEY:
2634 		if (!reg || (!iekey && reg->rkey != key.crkey)) {
2635 			SPDK_ERRLOG("No registrant or current key doesn't match "
2636 				    "with existing registrant key\n");
2637 			status = SPDK_NVME_SC_RESERVATION_CONFLICT;
2638 			goto exit;
2639 		}
2640 
2641 		rtype = ns->rtype;
2642 		num_hostid = nvmf_ns_reservation_get_all_other_hostid(ns, hostid_list,
2643 				SPDK_NVMF_MAX_NUM_REGISTRANTS,
2644 				&ctrlr->hostid);
2645 
2646 		nvmf_ns_reservation_remove_registrant(ns, reg);
2647 
2648 		if (!ns->rtype && num_hostid && (rtype == SPDK_NVME_RESERVE_WRITE_EXCLUSIVE_REG_ONLY ||
2649 						 rtype == SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS_REG_ONLY)) {
2650 			nvmf_subsystem_gen_ctrlr_notification(ns->subsystem, ns,
2651 							      hostid_list,
2652 							      num_hostid,
2653 							      SPDK_NVME_RESERVATION_RELEASED);
2654 		}
2655 		update_sgroup = true;
2656 		break;
2657 	case SPDK_NVME_RESERVE_REPLACE_KEY:
2658 		if (key.nrkey == 0) {
2659 			SPDK_ERRLOG("Can't register zeroed new key\n");
2660 			status = SPDK_NVME_SC_INVALID_FIELD;
2661 			goto exit;
2662 		}
2663 		/* Registrant exists */
2664 		if (reg) {
2665 			if (!iekey && reg->rkey != key.crkey) {
2666 				SPDK_ERRLOG("Current key doesn't match "
2667 					    "existing registrant key\n");
2668 				status = SPDK_NVME_SC_RESERVATION_CONFLICT;
2669 				goto exit;
2670 			}
2671 			if (reg->rkey == key.nrkey) {
2672 				goto exit;
2673 			}
2674 			reg->rkey = key.nrkey;
2675 		} else if (iekey) { /* No registrant but IEKEY is set */
2676 			/* new registrant */
2677 			rc = nvmf_ns_reservation_add_registrant(ns, ctrlr, key.nrkey);
2678 			if (rc < 0) {
2679 				status = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
2680 				goto exit;
2681 			}
2682 		} else { /* No registrant */
2683 			SPDK_ERRLOG("No registrant\n");
2684 			status = SPDK_NVME_SC_RESERVATION_CONFLICT;
2685 			goto exit;
2686 
2687 		}
2688 		update_sgroup = true;
2689 		break;
2690 	default:
2691 		status = SPDK_NVME_SC_INVALID_FIELD;
2692 		goto exit;
2693 	}
2694 
2695 exit:
2696 	if (update_sgroup) {
2697 		rc = nvmf_ns_update_reservation_info(ns);
2698 		if (rc != 0) {
2699 			status = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
2700 		}
2701 	}
2702 	req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
2703 	req->rsp->nvme_cpl.status.sc = status;
2704 	return update_sgroup;
2705 }
2706 
2707 static bool
2708 nvmf_ns_reservation_acquire(struct spdk_nvmf_ns *ns,
2709 			    struct spdk_nvmf_ctrlr *ctrlr,
2710 			    struct spdk_nvmf_request *req)
2711 {
2712 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
2713 	uint8_t racqa, iekey, rtype;
2714 	struct spdk_nvme_reservation_acquire_data key;
2715 	struct spdk_nvmf_registrant *reg;
2716 	bool all_regs = false;
2717 	uint32_t count = 0;
2718 	bool update_sgroup = true;
2719 	struct spdk_uuid hostid_list[SPDK_NVMF_MAX_NUM_REGISTRANTS];
2720 	uint32_t num_hostid = 0;
2721 	struct spdk_uuid new_hostid_list[SPDK_NVMF_MAX_NUM_REGISTRANTS];
2722 	uint32_t new_num_hostid = 0;
2723 	bool reservation_released = false;
2724 	uint8_t status = SPDK_NVME_SC_SUCCESS;
2725 
2726 	racqa = cmd->cdw10_bits.resv_acquire.racqa;
2727 	iekey = cmd->cdw10_bits.resv_acquire.iekey;
2728 	rtype = cmd->cdw10_bits.resv_acquire.rtype;
2729 
2730 	if (req->data && req->length >= sizeof(key)) {
2731 		memcpy(&key, req->data, sizeof(key));
2732 	} else {
2733 		SPDK_ERRLOG("No key provided. Failing request.\n");
2734 		status = SPDK_NVME_SC_INVALID_FIELD;
2735 		goto exit;
2736 	}
2737 
2738 	SPDK_DEBUGLOG(nvmf, "ACQUIRE: RACQA %u, IEKEY %u, RTYPE %u, "
2739 		      "NRKEY 0x%"PRIx64", PRKEY 0x%"PRIx64"\n",
2740 		      racqa, iekey, rtype, key.crkey, key.prkey);
2741 
2742 	if (iekey || rtype > SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS_ALL_REGS) {
2743 		SPDK_ERRLOG("Ignore existing key field set to 1\n");
2744 		status = SPDK_NVME_SC_INVALID_FIELD;
2745 		update_sgroup = false;
2746 		goto exit;
2747 	}
2748 
2749 	reg = nvmf_ns_reservation_get_registrant(ns, &ctrlr->hostid);
2750 	/* must be registrant and CRKEY must match */
2751 	if (!reg || reg->rkey != key.crkey) {
2752 		SPDK_ERRLOG("No registrant or current key doesn't match "
2753 			    "with existing registrant key\n");
2754 		status = SPDK_NVME_SC_RESERVATION_CONFLICT;
2755 		update_sgroup = false;
2756 		goto exit;
2757 	}
2758 
2759 	all_regs = nvmf_ns_reservation_all_registrants_type(ns);
2760 
2761 	switch (racqa) {
2762 	case SPDK_NVME_RESERVE_ACQUIRE:
2763 		/* it's not an error for the holder to acquire same reservation type again */
2764 		if (nvmf_ns_reservation_registrant_is_holder(ns, reg) && ns->rtype == rtype) {
2765 			/* do nothing */
2766 			update_sgroup = false;
2767 		} else if (ns->holder == NULL) {
2768 			/* first time to acquire the reservation */
2769 			nvmf_ns_reservation_acquire_reservation(ns, key.crkey, rtype, reg);
2770 		} else {
2771 			SPDK_ERRLOG("Invalid rtype or current registrant is not holder\n");
2772 			status = SPDK_NVME_SC_RESERVATION_CONFLICT;
2773 			update_sgroup = false;
2774 			goto exit;
2775 		}
2776 		break;
2777 	case SPDK_NVME_RESERVE_PREEMPT:
2778 		/* no reservation holder */
2779 		if (!ns->holder) {
2780 			/* unregister with PRKEY */
2781 			nvmf_ns_reservation_remove_registrants_by_key(ns, key.prkey);
2782 			break;
2783 		}
2784 		num_hostid = nvmf_ns_reservation_get_all_other_hostid(ns, hostid_list,
2785 				SPDK_NVMF_MAX_NUM_REGISTRANTS,
2786 				&ctrlr->hostid);
2787 
2788 		/* only 1 reservation holder and reservation key is valid */
2789 		if (!all_regs) {
2790 			/* preempt itself */
2791 			if (nvmf_ns_reservation_registrant_is_holder(ns, reg) &&
2792 			    ns->crkey == key.prkey) {
2793 				ns->rtype = rtype;
2794 				reservation_released = true;
2795 				break;
2796 			}
2797 
2798 			if (ns->crkey == key.prkey) {
2799 				nvmf_ns_reservation_remove_registrant(ns, ns->holder);
2800 				nvmf_ns_reservation_acquire_reservation(ns, key.crkey, rtype, reg);
2801 				reservation_released = true;
2802 			} else if (key.prkey != 0) {
2803 				nvmf_ns_reservation_remove_registrants_by_key(ns, key.prkey);
2804 			} else {
2805 				/* PRKEY is zero */
2806 				SPDK_ERRLOG("Current PRKEY is zero\n");
2807 				status = SPDK_NVME_SC_RESERVATION_CONFLICT;
2808 				update_sgroup = false;
2809 				goto exit;
2810 			}
2811 		} else {
2812 			/* release all other registrants except for the current one */
2813 			if (key.prkey == 0) {
2814 				nvmf_ns_reservation_remove_all_other_registrants(ns, reg);
2815 				assert(ns->holder == reg);
2816 			} else {
2817 				count = nvmf_ns_reservation_remove_registrants_by_key(ns, key.prkey);
2818 				if (count == 0) {
2819 					SPDK_ERRLOG("PRKEY doesn't match any registrant\n");
2820 					status = SPDK_NVME_SC_RESERVATION_CONFLICT;
2821 					update_sgroup = false;
2822 					goto exit;
2823 				}
2824 			}
2825 		}
2826 		break;
2827 	default:
2828 		status = SPDK_NVME_SC_INVALID_FIELD;
2829 		update_sgroup = false;
2830 		break;
2831 	}
2832 
2833 exit:
2834 	if (update_sgroup && racqa == SPDK_NVME_RESERVE_PREEMPT) {
2835 		new_num_hostid = nvmf_ns_reservation_get_all_other_hostid(ns, new_hostid_list,
2836 				 SPDK_NVMF_MAX_NUM_REGISTRANTS,
2837 				 &ctrlr->hostid);
2838 		/* Preempt notification occurs on the unregistered controllers
2839 		 * other than the controller who issued the command.
2840 		 */
2841 		num_hostid = nvmf_ns_reservation_get_unregistered_hostid(hostid_list,
2842 				num_hostid,
2843 				new_hostid_list,
2844 				new_num_hostid);
2845 		if (num_hostid) {
2846 			nvmf_subsystem_gen_ctrlr_notification(ns->subsystem, ns,
2847 							      hostid_list,
2848 							      num_hostid,
2849 							      SPDK_NVME_REGISTRATION_PREEMPTED);
2850 
2851 		}
2852 		/* Reservation released notification occurs on the
2853 		 * controllers which are the remaining registrants other than
2854 		 * the controller who issued the command.
2855 		 */
2856 		if (reservation_released && new_num_hostid) {
2857 			nvmf_subsystem_gen_ctrlr_notification(ns->subsystem, ns,
2858 							      new_hostid_list,
2859 							      new_num_hostid,
2860 							      SPDK_NVME_RESERVATION_RELEASED);
2861 
2862 		}
2863 	}
2864 	if (update_sgroup && ns->ptpl_activated) {
2865 		if (nvmf_ns_update_reservation_info(ns)) {
2866 			status = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
2867 		}
2868 	}
2869 	req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
2870 	req->rsp->nvme_cpl.status.sc = status;
2871 	return update_sgroup;
2872 }
2873 
2874 static bool
2875 nvmf_ns_reservation_release(struct spdk_nvmf_ns *ns,
2876 			    struct spdk_nvmf_ctrlr *ctrlr,
2877 			    struct spdk_nvmf_request *req)
2878 {
2879 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
2880 	uint8_t rrela, iekey, rtype;
2881 	struct spdk_nvmf_registrant *reg;
2882 	uint64_t crkey;
2883 	uint8_t status = SPDK_NVME_SC_SUCCESS;
2884 	bool update_sgroup = true;
2885 	struct spdk_uuid hostid_list[SPDK_NVMF_MAX_NUM_REGISTRANTS];
2886 	uint32_t num_hostid = 0;
2887 
2888 	rrela = cmd->cdw10_bits.resv_release.rrela;
2889 	iekey = cmd->cdw10_bits.resv_release.iekey;
2890 	rtype = cmd->cdw10_bits.resv_release.rtype;
2891 
2892 	if (req->data && req->length >= sizeof(crkey)) {
2893 		memcpy(&crkey, req->data, sizeof(crkey));
2894 	} else {
2895 		SPDK_ERRLOG("No key provided. Failing request.\n");
2896 		status = SPDK_NVME_SC_INVALID_FIELD;
2897 		goto exit;
2898 	}
2899 
2900 	SPDK_DEBUGLOG(nvmf, "RELEASE: RRELA %u, IEKEY %u, RTYPE %u, "
2901 		      "CRKEY 0x%"PRIx64"\n",  rrela, iekey, rtype, crkey);
2902 
2903 	if (iekey) {
2904 		SPDK_ERRLOG("Ignore existing key field set to 1\n");
2905 		status = SPDK_NVME_SC_INVALID_FIELD;
2906 		update_sgroup = false;
2907 		goto exit;
2908 	}
2909 
2910 	reg = nvmf_ns_reservation_get_registrant(ns, &ctrlr->hostid);
2911 	if (!reg || reg->rkey != crkey) {
2912 		SPDK_ERRLOG("No registrant or current key doesn't match "
2913 			    "with existing registrant key\n");
2914 		status = SPDK_NVME_SC_RESERVATION_CONFLICT;
2915 		update_sgroup = false;
2916 		goto exit;
2917 	}
2918 
2919 	num_hostid = nvmf_ns_reservation_get_all_other_hostid(ns, hostid_list,
2920 			SPDK_NVMF_MAX_NUM_REGISTRANTS,
2921 			&ctrlr->hostid);
2922 
2923 	switch (rrela) {
2924 	case SPDK_NVME_RESERVE_RELEASE:
2925 		if (!ns->holder) {
2926 			SPDK_DEBUGLOG(nvmf, "RELEASE: no holder\n");
2927 			update_sgroup = false;
2928 			goto exit;
2929 		}
2930 		if (ns->rtype != rtype) {
2931 			SPDK_ERRLOG("Type doesn't match\n");
2932 			status = SPDK_NVME_SC_INVALID_FIELD;
2933 			update_sgroup = false;
2934 			goto exit;
2935 		}
2936 		if (!nvmf_ns_reservation_registrant_is_holder(ns, reg)) {
2937 			/* not the reservation holder, this isn't an error */
2938 			update_sgroup = false;
2939 			goto exit;
2940 		}
2941 
2942 		rtype = ns->rtype;
2943 		nvmf_ns_reservation_release_reservation(ns);
2944 
2945 		if (num_hostid && rtype != SPDK_NVME_RESERVE_WRITE_EXCLUSIVE &&
2946 		    rtype != SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS) {
2947 			nvmf_subsystem_gen_ctrlr_notification(ns->subsystem, ns,
2948 							      hostid_list,
2949 							      num_hostid,
2950 							      SPDK_NVME_RESERVATION_RELEASED);
2951 		}
2952 		break;
2953 	case SPDK_NVME_RESERVE_CLEAR:
2954 		nvmf_ns_reservation_clear_all_registrants(ns);
2955 		if (num_hostid) {
2956 			nvmf_subsystem_gen_ctrlr_notification(ns->subsystem, ns,
2957 							      hostid_list,
2958 							      num_hostid,
2959 							      SPDK_NVME_RESERVATION_PREEMPTED);
2960 		}
2961 		break;
2962 	default:
2963 		status = SPDK_NVME_SC_INVALID_FIELD;
2964 		update_sgroup = false;
2965 		goto exit;
2966 	}
2967 
2968 exit:
2969 	if (update_sgroup && ns->ptpl_activated) {
2970 		if (nvmf_ns_update_reservation_info(ns)) {
2971 			status = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
2972 		}
2973 	}
2974 	req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
2975 	req->rsp->nvme_cpl.status.sc = status;
2976 	return update_sgroup;
2977 }
2978 
2979 static void
2980 nvmf_ns_reservation_report(struct spdk_nvmf_ns *ns,
2981 			   struct spdk_nvmf_ctrlr *ctrlr,
2982 			   struct spdk_nvmf_request *req)
2983 {
2984 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
2985 	struct spdk_nvmf_registrant *reg, *tmp;
2986 	struct spdk_nvme_reservation_status_extended_data *status_data;
2987 	struct spdk_nvme_registered_ctrlr_extended_data *ctrlr_data;
2988 	uint8_t *payload;
2989 	uint32_t transfer_len, payload_len = 0;
2990 	uint32_t regctl = 0;
2991 	uint8_t status = SPDK_NVME_SC_SUCCESS;
2992 
2993 	if (req->data == NULL) {
2994 		SPDK_ERRLOG("No data transfer specified for request. "
2995 			    " Unable to transfer back response.\n");
2996 		status = SPDK_NVME_SC_INVALID_FIELD;
2997 		goto exit;
2998 	}
2999 
3000 	if (!cmd->cdw11_bits.resv_report.eds) {
3001 		SPDK_ERRLOG("NVMeoF uses extended controller data structure, "
3002 			    "please set EDS bit in cdw11 and try again\n");
3003 		status = SPDK_NVME_SC_HOSTID_INCONSISTENT_FORMAT;
3004 		goto exit;
3005 	}
3006 
3007 	/* Number of Dwords of the Reservation Status data structure to transfer */
3008 	transfer_len = (cmd->cdw10 + 1) * sizeof(uint32_t);
3009 	payload = req->data;
3010 
3011 	if (transfer_len < sizeof(struct spdk_nvme_reservation_status_extended_data)) {
3012 		status = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
3013 		goto exit;
3014 	}
3015 
3016 	status_data = (struct spdk_nvme_reservation_status_extended_data *)payload;
3017 	status_data->data.gen = ns->gen;
3018 	status_data->data.rtype = ns->rtype;
3019 	status_data->data.ptpls = ns->ptpl_activated;
3020 	payload_len += sizeof(struct spdk_nvme_reservation_status_extended_data);
3021 
3022 	TAILQ_FOREACH_SAFE(reg, &ns->registrants, link, tmp) {
3023 		payload_len += sizeof(struct spdk_nvme_registered_ctrlr_extended_data);
3024 		if (payload_len > transfer_len) {
3025 			break;
3026 		}
3027 
3028 		ctrlr_data = (struct spdk_nvme_registered_ctrlr_extended_data *)
3029 			     (payload + sizeof(*status_data) + sizeof(*ctrlr_data) * regctl);
3030 		/* Set to 0xffffh for dynamic controller */
3031 		ctrlr_data->cntlid = 0xffff;
3032 		ctrlr_data->rcsts.status = (ns->holder == reg) ? true : false;
3033 		ctrlr_data->rkey = reg->rkey;
3034 		spdk_uuid_copy((struct spdk_uuid *)ctrlr_data->hostid, &reg->hostid);
3035 		regctl++;
3036 	}
3037 	status_data->data.regctl = regctl;
3038 
3039 exit:
3040 	req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
3041 	req->rsp->nvme_cpl.status.sc = status;
3042 	return;
3043 }
3044 
3045 static void
3046 nvmf_ns_reservation_complete(void *ctx)
3047 {
3048 	struct spdk_nvmf_request *req = ctx;
3049 
3050 	spdk_nvmf_request_complete(req);
3051 }
3052 
3053 static void
3054 _nvmf_ns_reservation_update_done(struct spdk_nvmf_subsystem *subsystem,
3055 				 void *cb_arg, int status)
3056 {
3057 	struct spdk_nvmf_request *req = (struct spdk_nvmf_request *)cb_arg;
3058 	struct spdk_nvmf_poll_group *group = req->qpair->group;
3059 
3060 	spdk_thread_send_msg(group->thread, nvmf_ns_reservation_complete, req);
3061 }
3062 
3063 void
3064 nvmf_ns_reservation_request(void *ctx)
3065 {
3066 	struct spdk_nvmf_request *req = (struct spdk_nvmf_request *)ctx;
3067 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
3068 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
3069 	struct subsystem_update_ns_ctx *update_ctx;
3070 	uint32_t nsid;
3071 	struct spdk_nvmf_ns *ns;
3072 	bool update_sgroup = false;
3073 
3074 	nsid = cmd->nsid;
3075 	ns = _nvmf_subsystem_get_ns(ctrlr->subsys, nsid);
3076 	assert(ns != NULL);
3077 
3078 	switch (cmd->opc) {
3079 	case SPDK_NVME_OPC_RESERVATION_REGISTER:
3080 		update_sgroup = nvmf_ns_reservation_register(ns, ctrlr, req);
3081 		break;
3082 	case SPDK_NVME_OPC_RESERVATION_ACQUIRE:
3083 		update_sgroup = nvmf_ns_reservation_acquire(ns, ctrlr, req);
3084 		break;
3085 	case SPDK_NVME_OPC_RESERVATION_RELEASE:
3086 		update_sgroup = nvmf_ns_reservation_release(ns, ctrlr, req);
3087 		break;
3088 	case SPDK_NVME_OPC_RESERVATION_REPORT:
3089 		nvmf_ns_reservation_report(ns, ctrlr, req);
3090 		break;
3091 	default:
3092 		break;
3093 	}
3094 
3095 	/* update reservation information to subsystem's poll group */
3096 	if (update_sgroup) {
3097 		update_ctx = calloc(1, sizeof(*update_ctx));
3098 		if (update_ctx == NULL) {
3099 			SPDK_ERRLOG("Can't alloc subsystem poll group update context\n");
3100 			goto update_done;
3101 		}
3102 		update_ctx->subsystem = ctrlr->subsys;
3103 		update_ctx->cb_fn = _nvmf_ns_reservation_update_done;
3104 		update_ctx->cb_arg = req;
3105 
3106 		nvmf_subsystem_update_ns(ctrlr->subsys, subsystem_update_ns_done, update_ctx);
3107 		return;
3108 	}
3109 
3110 update_done:
3111 	_nvmf_ns_reservation_update_done(ctrlr->subsys, (void *)req, 0);
3112 }
3113 
3114 int
3115 spdk_nvmf_subsystem_set_ana_reporting(struct spdk_nvmf_subsystem *subsystem,
3116 				      bool ana_reporting)
3117 {
3118 	if (subsystem->state != SPDK_NVMF_SUBSYSTEM_INACTIVE) {
3119 		return -EAGAIN;
3120 	}
3121 
3122 	subsystem->flags.ana_reporting = ana_reporting;
3123 
3124 	return 0;
3125 }
3126 
3127 bool
3128 nvmf_subsystem_get_ana_reporting(struct spdk_nvmf_subsystem *subsystem)
3129 {
3130 	return subsystem->flags.ana_reporting;
3131 }
3132 
3133 struct subsystem_listener_update_ctx {
3134 	struct spdk_nvmf_subsystem_listener *listener;
3135 
3136 	spdk_nvmf_tgt_subsystem_listen_done_fn cb_fn;
3137 	void *cb_arg;
3138 };
3139 
3140 static void
3141 subsystem_listener_update_done(struct spdk_io_channel_iter *i, int status)
3142 {
3143 	struct subsystem_listener_update_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
3144 
3145 	if (ctx->cb_fn) {
3146 		ctx->cb_fn(ctx->cb_arg, status);
3147 	}
3148 	free(ctx);
3149 }
3150 
3151 static void
3152 subsystem_listener_update_on_pg(struct spdk_io_channel_iter *i)
3153 {
3154 	struct subsystem_listener_update_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
3155 	struct spdk_nvmf_subsystem_listener *listener;
3156 	struct spdk_nvmf_poll_group *group;
3157 	struct spdk_nvmf_ctrlr *ctrlr;
3158 
3159 	listener = ctx->listener;
3160 	group = spdk_io_channel_get_ctx(spdk_io_channel_iter_get_channel(i));
3161 
3162 	TAILQ_FOREACH(ctrlr, &listener->subsystem->ctrlrs, link) {
3163 		if (ctrlr->admin_qpair && ctrlr->admin_qpair->group == group && ctrlr->listener == listener) {
3164 			nvmf_ctrlr_async_event_ana_change_notice(ctrlr);
3165 		}
3166 	}
3167 
3168 	spdk_for_each_channel_continue(i, 0);
3169 }
3170 
3171 void
3172 nvmf_subsystem_set_ana_state(struct spdk_nvmf_subsystem *subsystem,
3173 			     const struct spdk_nvme_transport_id *trid,
3174 			     enum spdk_nvme_ana_state ana_state, uint32_t anagrpid,
3175 			     spdk_nvmf_tgt_subsystem_listen_done_fn cb_fn, void *cb_arg)
3176 {
3177 	struct spdk_nvmf_subsystem_listener *listener;
3178 	struct subsystem_listener_update_ctx *ctx;
3179 	uint32_t i;
3180 
3181 	assert(cb_fn != NULL);
3182 	assert(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE ||
3183 	       subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED);
3184 
3185 	if (!subsystem->flags.ana_reporting) {
3186 		SPDK_ERRLOG("ANA reporting is disabled\n");
3187 		cb_fn(cb_arg, -EINVAL);
3188 		return;
3189 	}
3190 
3191 	/* ANA Change state is not used, ANA Persistent Loss state
3192 	 * is not supported yet.
3193 	 */
3194 	if (!(ana_state == SPDK_NVME_ANA_OPTIMIZED_STATE ||
3195 	      ana_state == SPDK_NVME_ANA_NON_OPTIMIZED_STATE ||
3196 	      ana_state == SPDK_NVME_ANA_INACCESSIBLE_STATE)) {
3197 		SPDK_ERRLOG("ANA state %d is not supported\n", ana_state);
3198 		cb_fn(cb_arg, -ENOTSUP);
3199 		return;
3200 	}
3201 
3202 	if (anagrpid > subsystem->max_nsid) {
3203 		SPDK_ERRLOG("ANA group ID %" PRIu32 " is more than maximum\n", anagrpid);
3204 		cb_fn(cb_arg, -EINVAL);
3205 		return;
3206 	}
3207 
3208 	listener = nvmf_subsystem_find_listener(subsystem, trid);
3209 	if (!listener) {
3210 		SPDK_ERRLOG("Unable to find listener.\n");
3211 		cb_fn(cb_arg, -EINVAL);
3212 		return;
3213 	}
3214 
3215 	if (anagrpid != 0 && listener->ana_state[anagrpid - 1] == ana_state) {
3216 		cb_fn(cb_arg, 0);
3217 		return;
3218 	}
3219 
3220 	ctx = calloc(1, sizeof(*ctx));
3221 	if (!ctx) {
3222 		SPDK_ERRLOG("Unable to allocate context\n");
3223 		cb_fn(cb_arg, -ENOMEM);
3224 		return;
3225 	}
3226 
3227 	for (i = 1; i <= subsystem->max_nsid; i++) {
3228 		if (anagrpid == 0 || i == anagrpid) {
3229 			listener->ana_state[i - 1] = ana_state;
3230 		}
3231 	}
3232 	listener->ana_state_change_count++;
3233 
3234 	ctx->listener = listener;
3235 	ctx->cb_fn = cb_fn;
3236 	ctx->cb_arg = cb_arg;
3237 
3238 	spdk_for_each_channel(subsystem->tgt,
3239 			      subsystem_listener_update_on_pg,
3240 			      ctx,
3241 			      subsystem_listener_update_done);
3242 }
3243