xref: /spdk/lib/nvmf/subsystem.c (revision 72e4a4d46a6e733d295a02f0cbf0e70aee629dd2)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation. All rights reserved.
5  *   Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
6  *   Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
7  *
8  *   Redistribution and use in source and binary forms, with or without
9  *   modification, are permitted provided that the following conditions
10  *   are met:
11  *
12  *     * Redistributions of source code must retain the above copyright
13  *       notice, this list of conditions and the following disclaimer.
14  *     * Redistributions in binary form must reproduce the above copyright
15  *       notice, this list of conditions and the following disclaimer in
16  *       the documentation and/or other materials provided with the
17  *       distribution.
18  *     * Neither the name of Intel Corporation nor the names of its
19  *       contributors may be used to endorse or promote products derived
20  *       from this software without specific prior written permission.
21  *
22  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 #include "spdk/stdinc.h"
36 
37 #include "nvmf_internal.h"
38 #include "transport.h"
39 
40 #include "spdk/assert.h"
41 #include "spdk/likely.h"
42 #include "spdk/string.h"
43 #include "spdk/trace.h"
44 #include "spdk/nvmf_spec.h"
45 #include "spdk/uuid.h"
46 #include "spdk/json.h"
47 #include "spdk/file.h"
48 
49 #define __SPDK_BDEV_MODULE_ONLY
50 #include "spdk/bdev_module.h"
51 #include "spdk/log.h"
52 #include "spdk_internal/utf.h"
53 #include "spdk_internal/usdt.h"
54 
55 #define MODEL_NUMBER_DEFAULT "SPDK bdev Controller"
56 #define NVMF_SUBSYSTEM_DEFAULT_NAMESPACES 32
57 
58 /*
59  * States for parsing valid domains in NQNs according to RFC 1034
60  */
61 enum spdk_nvmf_nqn_domain_states {
62 	/* First character of a domain must be a letter */
63 	SPDK_NVMF_DOMAIN_ACCEPT_LETTER = 0,
64 
65 	/* Subsequent characters can be any of letter, digit, or hyphen */
66 	SPDK_NVMF_DOMAIN_ACCEPT_LDH = 1,
67 
68 	/* A domain label must end with either a letter or digit */
69 	SPDK_NVMF_DOMAIN_ACCEPT_ANY = 2
70 };
71 
72 static int _nvmf_subsystem_destroy(struct spdk_nvmf_subsystem *subsystem);
73 
74 /* Returns true if is a valid ASCII string as defined by the NVMe spec */
75 static bool
76 nvmf_valid_ascii_string(const void *buf, size_t size)
77 {
78 	const uint8_t *str = buf;
79 	size_t i;
80 
81 	for (i = 0; i < size; i++) {
82 		if (str[i] < 0x20 || str[i] > 0x7E) {
83 			return false;
84 		}
85 	}
86 
87 	return true;
88 }
89 
90 static bool
91 nvmf_valid_nqn(const char *nqn)
92 {
93 	size_t len;
94 	struct spdk_uuid uuid_value;
95 	uint32_t i;
96 	int bytes_consumed;
97 	uint32_t domain_label_length;
98 	char *reverse_domain_end;
99 	uint32_t reverse_domain_end_index;
100 	enum spdk_nvmf_nqn_domain_states domain_state = SPDK_NVMF_DOMAIN_ACCEPT_LETTER;
101 
102 	/* Check for length requirements */
103 	len = strlen(nqn);
104 	if (len > SPDK_NVMF_NQN_MAX_LEN) {
105 		SPDK_ERRLOG("Invalid NQN \"%s\": length %zu > max %d\n", nqn, len, SPDK_NVMF_NQN_MAX_LEN);
106 		return false;
107 	}
108 
109 	/* The nqn must be at least as long as SPDK_NVMF_NQN_MIN_LEN to contain the necessary prefix. */
110 	if (len < SPDK_NVMF_NQN_MIN_LEN) {
111 		SPDK_ERRLOG("Invalid NQN \"%s\": length %zu < min %d\n", nqn, len, SPDK_NVMF_NQN_MIN_LEN);
112 		return false;
113 	}
114 
115 	/* Check for discovery controller nqn */
116 	if (!strcmp(nqn, SPDK_NVMF_DISCOVERY_NQN)) {
117 		return true;
118 	}
119 
120 	/* Check for equality with the generic nqn structure of the form "nqn.2014-08.org.nvmexpress:uuid:11111111-2222-3333-4444-555555555555" */
121 	if (!strncmp(nqn, SPDK_NVMF_NQN_UUID_PRE, SPDK_NVMF_NQN_UUID_PRE_LEN)) {
122 		if (len != SPDK_NVMF_NQN_UUID_PRE_LEN + SPDK_NVMF_UUID_STRING_LEN) {
123 			SPDK_ERRLOG("Invalid NQN \"%s\": uuid is not the correct length\n", nqn);
124 			return false;
125 		}
126 
127 		if (spdk_uuid_parse(&uuid_value, &nqn[SPDK_NVMF_NQN_UUID_PRE_LEN])) {
128 			SPDK_ERRLOG("Invalid NQN \"%s\": uuid is not formatted correctly\n", nqn);
129 			return false;
130 		}
131 		return true;
132 	}
133 
134 	/* If the nqn does not match the uuid structure, the next several checks validate the form "nqn.yyyy-mm.reverse.domain:user-string" */
135 
136 	if (strncmp(nqn, "nqn.", 4) != 0) {
137 		SPDK_ERRLOG("Invalid NQN \"%s\": NQN must begin with \"nqn.\".\n", nqn);
138 		return false;
139 	}
140 
141 	/* Check for yyyy-mm. */
142 	if (!(isdigit(nqn[4]) && isdigit(nqn[5]) && isdigit(nqn[6]) && isdigit(nqn[7]) &&
143 	      nqn[8] == '-' && isdigit(nqn[9]) && isdigit(nqn[10]) && nqn[11] == '.')) {
144 		SPDK_ERRLOG("Invalid date code in NQN \"%s\"\n", nqn);
145 		return false;
146 	}
147 
148 	reverse_domain_end = strchr(nqn, ':');
149 	if (reverse_domain_end != NULL && (reverse_domain_end_index = reverse_domain_end - nqn) < len - 1) {
150 	} else {
151 		SPDK_ERRLOG("Invalid NQN \"%s\". NQN must contain user specified name with a ':' as a prefix.\n",
152 			    nqn);
153 		return false;
154 	}
155 
156 	/* Check for valid reverse domain */
157 	domain_label_length = 0;
158 	for (i = 12; i < reverse_domain_end_index; i++) {
159 		if (domain_label_length > SPDK_DOMAIN_LABEL_MAX_LEN) {
160 			SPDK_ERRLOG("Invalid domain name in NQN \"%s\". At least one Label is too long.\n", nqn);
161 			return false;
162 		}
163 
164 		switch (domain_state) {
165 
166 		case SPDK_NVMF_DOMAIN_ACCEPT_LETTER: {
167 			if (isalpha(nqn[i])) {
168 				domain_state = SPDK_NVMF_DOMAIN_ACCEPT_ANY;
169 				domain_label_length++;
170 				break;
171 			} else {
172 				SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must start with a letter.\n", nqn);
173 				return false;
174 			}
175 		}
176 
177 		case SPDK_NVMF_DOMAIN_ACCEPT_LDH: {
178 			if (isalpha(nqn[i]) || isdigit(nqn[i])) {
179 				domain_state = SPDK_NVMF_DOMAIN_ACCEPT_ANY;
180 				domain_label_length++;
181 				break;
182 			} else if (nqn[i] == '-') {
183 				if (i == reverse_domain_end_index - 1) {
184 					SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must end with an alphanumeric symbol.\n",
185 						    nqn);
186 					return false;
187 				}
188 				domain_state = SPDK_NVMF_DOMAIN_ACCEPT_LDH;
189 				domain_label_length++;
190 				break;
191 			} else if (nqn[i] == '.') {
192 				SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must end with an alphanumeric symbol.\n",
193 					    nqn);
194 				return false;
195 			} else {
196 				SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must contain only [a-z,A-Z,0-9,'-','.'].\n",
197 					    nqn);
198 				return false;
199 			}
200 		}
201 
202 		case SPDK_NVMF_DOMAIN_ACCEPT_ANY: {
203 			if (isalpha(nqn[i]) || isdigit(nqn[i])) {
204 				domain_state = SPDK_NVMF_DOMAIN_ACCEPT_ANY;
205 				domain_label_length++;
206 				break;
207 			} else if (nqn[i] == '-') {
208 				if (i == reverse_domain_end_index - 1) {
209 					SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must end with an alphanumeric symbol.\n",
210 						    nqn);
211 					return false;
212 				}
213 				domain_state = SPDK_NVMF_DOMAIN_ACCEPT_LDH;
214 				domain_label_length++;
215 				break;
216 			} else if (nqn[i] == '.') {
217 				domain_state = SPDK_NVMF_DOMAIN_ACCEPT_LETTER;
218 				domain_label_length = 0;
219 				break;
220 			} else {
221 				SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must contain only [a-z,A-Z,0-9,'-','.'].\n",
222 					    nqn);
223 				return false;
224 			}
225 		}
226 		}
227 	}
228 
229 	i = reverse_domain_end_index + 1;
230 	while (i < len) {
231 		bytes_consumed = utf8_valid(&nqn[i], &nqn[len]);
232 		if (bytes_consumed <= 0) {
233 			SPDK_ERRLOG("Invalid domain name in NQN \"%s\". Label names must contain only valid utf-8.\n", nqn);
234 			return false;
235 		}
236 
237 		i += bytes_consumed;
238 	}
239 	return true;
240 }
241 
242 static void subsystem_state_change_on_pg(struct spdk_io_channel_iter *i);
243 
244 struct spdk_nvmf_subsystem *
245 spdk_nvmf_subsystem_create(struct spdk_nvmf_tgt *tgt,
246 			   const char *nqn,
247 			   enum spdk_nvmf_subtype type,
248 			   uint32_t num_ns)
249 {
250 	struct spdk_nvmf_subsystem	*subsystem;
251 	uint32_t			sid;
252 
253 	if (spdk_nvmf_tgt_find_subsystem(tgt, nqn)) {
254 		SPDK_ERRLOG("Subsystem NQN '%s' already exists\n", nqn);
255 		return NULL;
256 	}
257 
258 	if (!nvmf_valid_nqn(nqn)) {
259 		return NULL;
260 	}
261 
262 	if (type == SPDK_NVMF_SUBTYPE_DISCOVERY) {
263 		if (num_ns != 0) {
264 			SPDK_ERRLOG("Discovery subsystem cannot have namespaces.\n");
265 			return NULL;
266 		}
267 	} else if (num_ns == 0) {
268 		num_ns = NVMF_SUBSYSTEM_DEFAULT_NAMESPACES;
269 	}
270 
271 	/* Find a free subsystem id (sid) */
272 	for (sid = 0; sid < tgt->max_subsystems; sid++) {
273 		if (tgt->subsystems[sid] == NULL) {
274 			break;
275 		}
276 	}
277 	if (sid >= tgt->max_subsystems) {
278 		return NULL;
279 	}
280 
281 	subsystem = calloc(1, sizeof(struct spdk_nvmf_subsystem));
282 	if (subsystem == NULL) {
283 		return NULL;
284 	}
285 
286 	subsystem->thread = spdk_get_thread();
287 	subsystem->state = SPDK_NVMF_SUBSYSTEM_INACTIVE;
288 	subsystem->tgt = tgt;
289 	subsystem->id = sid;
290 	subsystem->subtype = type;
291 	subsystem->max_nsid = num_ns;
292 	subsystem->next_cntlid = 0;
293 	subsystem->min_cntlid = NVMF_MIN_CNTLID;
294 	subsystem->max_cntlid = NVMF_MAX_CNTLID;
295 	snprintf(subsystem->subnqn, sizeof(subsystem->subnqn), "%s", nqn);
296 	pthread_mutex_init(&subsystem->mutex, NULL);
297 	TAILQ_INIT(&subsystem->listeners);
298 	TAILQ_INIT(&subsystem->hosts);
299 	TAILQ_INIT(&subsystem->ctrlrs);
300 
301 	if (num_ns != 0) {
302 		subsystem->ns = calloc(num_ns, sizeof(struct spdk_nvmf_ns *));
303 		if (subsystem->ns == NULL) {
304 			SPDK_ERRLOG("Namespace memory allocation failed\n");
305 			pthread_mutex_destroy(&subsystem->mutex);
306 			free(subsystem);
307 			return NULL;
308 		}
309 		subsystem->ana_group = calloc(num_ns, sizeof(uint32_t));
310 		if (subsystem->ana_group == NULL) {
311 			SPDK_ERRLOG("ANA group memory allocation failed\n");
312 			pthread_mutex_destroy(&subsystem->mutex);
313 			free(subsystem->ns);
314 			free(subsystem);
315 			return NULL;
316 		}
317 	}
318 
319 	memset(subsystem->sn, '0', sizeof(subsystem->sn) - 1);
320 	subsystem->sn[sizeof(subsystem->sn) - 1] = '\0';
321 
322 	snprintf(subsystem->mn, sizeof(subsystem->mn), "%s",
323 		 MODEL_NUMBER_DEFAULT);
324 
325 	tgt->subsystems[sid] = subsystem;
326 	nvmf_update_discovery_log(tgt, NULL);
327 
328 	return subsystem;
329 }
330 
331 /* Must hold subsystem->mutex while calling this function */
332 static void
333 nvmf_subsystem_remove_host(struct spdk_nvmf_subsystem *subsystem, struct spdk_nvmf_host *host)
334 {
335 	TAILQ_REMOVE(&subsystem->hosts, host, link);
336 	free(host);
337 }
338 
339 static void
340 _nvmf_subsystem_remove_listener(struct spdk_nvmf_subsystem *subsystem,
341 				struct spdk_nvmf_subsystem_listener *listener,
342 				bool stop)
343 {
344 	struct spdk_nvmf_transport *transport;
345 	struct spdk_nvmf_ctrlr *ctrlr;
346 
347 	if (stop) {
348 		transport = spdk_nvmf_tgt_get_transport(subsystem->tgt, listener->trid->trstring);
349 		if (transport != NULL) {
350 			spdk_nvmf_transport_stop_listen(transport, listener->trid);
351 		}
352 	}
353 
354 	TAILQ_FOREACH(ctrlr, &subsystem->ctrlrs, link) {
355 		if (ctrlr->listener == listener) {
356 			ctrlr->listener = NULL;
357 		}
358 	}
359 
360 	TAILQ_REMOVE(&subsystem->listeners, listener, link);
361 	free(listener->ana_state);
362 	free(listener);
363 }
364 
365 static void
366 _nvmf_subsystem_destroy_msg(void *cb_arg)
367 {
368 	struct spdk_nvmf_subsystem *subsystem = cb_arg;
369 
370 	_nvmf_subsystem_destroy(subsystem);
371 }
372 
373 static int
374 _nvmf_subsystem_destroy(struct spdk_nvmf_subsystem *subsystem)
375 {
376 	struct spdk_nvmf_ns		*ns;
377 	nvmf_subsystem_destroy_cb	async_destroy_cb = NULL;
378 	void				*async_destroy_cb_arg = NULL;
379 	int				rc;
380 
381 	if (!TAILQ_EMPTY(&subsystem->ctrlrs)) {
382 		SPDK_DEBUGLOG(nvmf, "subsystem %p %s has active controllers\n", subsystem, subsystem->subnqn);
383 		subsystem->async_destroy = true;
384 		rc = spdk_thread_send_msg(subsystem->thread, _nvmf_subsystem_destroy_msg, subsystem);
385 		if (rc) {
386 			SPDK_ERRLOG("Failed to send thread msg, rc %d\n", rc);
387 			assert(0);
388 			return rc;
389 		}
390 		return -EINPROGRESS;
391 	}
392 
393 	ns = spdk_nvmf_subsystem_get_first_ns(subsystem);
394 	while (ns != NULL) {
395 		struct spdk_nvmf_ns *next_ns = spdk_nvmf_subsystem_get_next_ns(subsystem, ns);
396 
397 		spdk_nvmf_subsystem_remove_ns(subsystem, ns->opts.nsid);
398 		ns = next_ns;
399 	}
400 
401 	free(subsystem->ns);
402 	free(subsystem->ana_group);
403 
404 	subsystem->tgt->subsystems[subsystem->id] = NULL;
405 	nvmf_update_discovery_log(subsystem->tgt, NULL);
406 
407 	pthread_mutex_destroy(&subsystem->mutex);
408 
409 	if (subsystem->async_destroy) {
410 		async_destroy_cb = subsystem->async_destroy_cb;
411 		async_destroy_cb_arg = subsystem->async_destroy_cb_arg;
412 	}
413 
414 	free(subsystem);
415 
416 	if (async_destroy_cb) {
417 		async_destroy_cb(async_destroy_cb_arg);
418 	}
419 
420 	return 0;
421 }
422 
423 int
424 spdk_nvmf_subsystem_destroy(struct spdk_nvmf_subsystem *subsystem, nvmf_subsystem_destroy_cb cpl_cb,
425 			    void *cpl_cb_arg)
426 {
427 	struct spdk_nvmf_host *host, *host_tmp;
428 
429 	if (!subsystem) {
430 		return -EINVAL;
431 	}
432 
433 	assert(spdk_get_thread() == subsystem->thread);
434 
435 	if (subsystem->state != SPDK_NVMF_SUBSYSTEM_INACTIVE) {
436 		SPDK_ERRLOG("Subsystem can only be destroyed in inactive state\n");
437 		assert(0);
438 		return -EAGAIN;
439 	}
440 	if (subsystem->destroying) {
441 		SPDK_ERRLOG("Subsystem destruction is already started\n");
442 		assert(0);
443 		return -EALREADY;
444 	}
445 
446 	subsystem->destroying = true;
447 
448 	SPDK_DEBUGLOG(nvmf, "subsystem is %p %s\n", subsystem, subsystem->subnqn);
449 
450 	nvmf_subsystem_remove_all_listeners(subsystem, false);
451 
452 	pthread_mutex_lock(&subsystem->mutex);
453 
454 	TAILQ_FOREACH_SAFE(host, &subsystem->hosts, link, host_tmp) {
455 		nvmf_subsystem_remove_host(subsystem, host);
456 	}
457 
458 	pthread_mutex_unlock(&subsystem->mutex);
459 
460 	subsystem->async_destroy_cb = cpl_cb;
461 	subsystem->async_destroy_cb_arg = cpl_cb_arg;
462 
463 	return _nvmf_subsystem_destroy(subsystem);
464 }
465 
466 /* we have to use the typedef in the function declaration to appease astyle. */
467 typedef enum spdk_nvmf_subsystem_state spdk_nvmf_subsystem_state_t;
468 
469 static spdk_nvmf_subsystem_state_t
470 nvmf_subsystem_get_intermediate_state(enum spdk_nvmf_subsystem_state current_state,
471 				      enum spdk_nvmf_subsystem_state requested_state)
472 {
473 	switch (requested_state) {
474 	case SPDK_NVMF_SUBSYSTEM_INACTIVE:
475 		return SPDK_NVMF_SUBSYSTEM_DEACTIVATING;
476 	case SPDK_NVMF_SUBSYSTEM_ACTIVE:
477 		if (current_state == SPDK_NVMF_SUBSYSTEM_PAUSED) {
478 			return SPDK_NVMF_SUBSYSTEM_RESUMING;
479 		} else {
480 			return SPDK_NVMF_SUBSYSTEM_ACTIVATING;
481 		}
482 	case SPDK_NVMF_SUBSYSTEM_PAUSED:
483 		return SPDK_NVMF_SUBSYSTEM_PAUSING;
484 	default:
485 		assert(false);
486 		return SPDK_NVMF_SUBSYSTEM_NUM_STATES;
487 	}
488 }
489 
490 static int
491 nvmf_subsystem_set_state(struct spdk_nvmf_subsystem *subsystem,
492 			 enum spdk_nvmf_subsystem_state state)
493 {
494 	enum spdk_nvmf_subsystem_state actual_old_state, expected_old_state;
495 	bool exchanged;
496 
497 	switch (state) {
498 	case SPDK_NVMF_SUBSYSTEM_INACTIVE:
499 		expected_old_state = SPDK_NVMF_SUBSYSTEM_DEACTIVATING;
500 		break;
501 	case SPDK_NVMF_SUBSYSTEM_ACTIVATING:
502 		expected_old_state = SPDK_NVMF_SUBSYSTEM_INACTIVE;
503 		break;
504 	case SPDK_NVMF_SUBSYSTEM_ACTIVE:
505 		expected_old_state = SPDK_NVMF_SUBSYSTEM_ACTIVATING;
506 		break;
507 	case SPDK_NVMF_SUBSYSTEM_PAUSING:
508 		expected_old_state = SPDK_NVMF_SUBSYSTEM_ACTIVE;
509 		break;
510 	case SPDK_NVMF_SUBSYSTEM_PAUSED:
511 		expected_old_state = SPDK_NVMF_SUBSYSTEM_PAUSING;
512 		break;
513 	case SPDK_NVMF_SUBSYSTEM_RESUMING:
514 		expected_old_state = SPDK_NVMF_SUBSYSTEM_PAUSED;
515 		break;
516 	case SPDK_NVMF_SUBSYSTEM_DEACTIVATING:
517 		expected_old_state = SPDK_NVMF_SUBSYSTEM_ACTIVE;
518 		break;
519 	default:
520 		assert(false);
521 		return -1;
522 	}
523 
524 	actual_old_state = expected_old_state;
525 	exchanged = __atomic_compare_exchange_n(&subsystem->state, &actual_old_state, state, false,
526 						__ATOMIC_RELAXED, __ATOMIC_RELAXED);
527 	if (spdk_unlikely(exchanged == false)) {
528 		if (actual_old_state == SPDK_NVMF_SUBSYSTEM_RESUMING &&
529 		    state == SPDK_NVMF_SUBSYSTEM_ACTIVE) {
530 			expected_old_state = SPDK_NVMF_SUBSYSTEM_RESUMING;
531 		}
532 		/* This is for the case when activating the subsystem fails. */
533 		if (actual_old_state == SPDK_NVMF_SUBSYSTEM_ACTIVATING &&
534 		    state == SPDK_NVMF_SUBSYSTEM_DEACTIVATING) {
535 			expected_old_state = SPDK_NVMF_SUBSYSTEM_ACTIVATING;
536 		}
537 		/* This is for the case when resuming the subsystem fails. */
538 		if (actual_old_state == SPDK_NVMF_SUBSYSTEM_RESUMING &&
539 		    state == SPDK_NVMF_SUBSYSTEM_PAUSING) {
540 			expected_old_state = SPDK_NVMF_SUBSYSTEM_RESUMING;
541 		}
542 		/* This is for the case when stopping paused subsystem */
543 		if (actual_old_state == SPDK_NVMF_SUBSYSTEM_PAUSED &&
544 		    state == SPDK_NVMF_SUBSYSTEM_DEACTIVATING) {
545 			expected_old_state = SPDK_NVMF_SUBSYSTEM_PAUSED;
546 		}
547 		actual_old_state = expected_old_state;
548 		__atomic_compare_exchange_n(&subsystem->state, &actual_old_state, state, false,
549 					    __ATOMIC_RELAXED, __ATOMIC_RELAXED);
550 	}
551 	assert(actual_old_state == expected_old_state);
552 	return actual_old_state - expected_old_state;
553 }
554 
555 struct subsystem_state_change_ctx {
556 	struct spdk_nvmf_subsystem		*subsystem;
557 	uint16_t				nsid;
558 
559 	enum spdk_nvmf_subsystem_state		original_state;
560 	enum spdk_nvmf_subsystem_state		requested_state;
561 
562 	spdk_nvmf_subsystem_state_change_done	cb_fn;
563 	void					*cb_arg;
564 };
565 
566 static void
567 subsystem_state_change_revert_done(struct spdk_io_channel_iter *i, int status)
568 {
569 	struct subsystem_state_change_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
570 
571 	/* Nothing to be done here if the state setting fails, we are just screwed. */
572 	if (nvmf_subsystem_set_state(ctx->subsystem, ctx->requested_state)) {
573 		SPDK_ERRLOG("Unable to revert the subsystem state after operation failure.\n");
574 	}
575 
576 	ctx->subsystem->changing_state = false;
577 	if (ctx->cb_fn) {
578 		/* return a failure here. This function only exists in an error path. */
579 		ctx->cb_fn(ctx->subsystem, ctx->cb_arg, -1);
580 	}
581 	free(ctx);
582 }
583 
584 static void
585 subsystem_state_change_done(struct spdk_io_channel_iter *i, int status)
586 {
587 	struct subsystem_state_change_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
588 	enum spdk_nvmf_subsystem_state intermediate_state;
589 
590 	SPDK_DTRACE_PROBE4(nvmf_subsystem_change_state_done, ctx->subsystem->subnqn,
591 			   ctx->requested_state, ctx->original_state, status);
592 
593 	if (status == 0) {
594 		status = nvmf_subsystem_set_state(ctx->subsystem, ctx->requested_state);
595 		if (status) {
596 			status = -1;
597 		}
598 	}
599 
600 	if (status) {
601 		intermediate_state = nvmf_subsystem_get_intermediate_state(ctx->requested_state,
602 				     ctx->original_state);
603 		assert(intermediate_state != SPDK_NVMF_SUBSYSTEM_NUM_STATES);
604 
605 		if (nvmf_subsystem_set_state(ctx->subsystem, intermediate_state)) {
606 			goto out;
607 		}
608 		ctx->requested_state = ctx->original_state;
609 		spdk_for_each_channel(ctx->subsystem->tgt,
610 				      subsystem_state_change_on_pg,
611 				      ctx,
612 				      subsystem_state_change_revert_done);
613 		return;
614 	}
615 
616 out:
617 	ctx->subsystem->changing_state = false;
618 	if (ctx->cb_fn) {
619 		ctx->cb_fn(ctx->subsystem, ctx->cb_arg, status);
620 	}
621 	free(ctx);
622 }
623 
624 static void
625 subsystem_state_change_continue(void *ctx, int status)
626 {
627 	struct spdk_io_channel_iter *i = ctx;
628 	struct subsystem_state_change_ctx *_ctx __attribute__((unused));
629 
630 	_ctx = spdk_io_channel_iter_get_ctx(i);
631 	SPDK_DTRACE_PROBE3(nvmf_pg_change_state_done, _ctx->subsystem->subnqn,
632 			   _ctx->requested_state, spdk_thread_get_id(spdk_get_thread()));
633 
634 	spdk_for_each_channel_continue(i, status);
635 }
636 
637 static void
638 subsystem_state_change_on_pg(struct spdk_io_channel_iter *i)
639 {
640 	struct subsystem_state_change_ctx *ctx;
641 	struct spdk_io_channel *ch;
642 	struct spdk_nvmf_poll_group *group;
643 
644 	ctx = spdk_io_channel_iter_get_ctx(i);
645 	ch = spdk_io_channel_iter_get_channel(i);
646 	group = spdk_io_channel_get_ctx(ch);
647 
648 	SPDK_DTRACE_PROBE3(nvmf_pg_change_state, ctx->subsystem->subnqn,
649 			   ctx->requested_state, spdk_thread_get_id(spdk_get_thread()));
650 	switch (ctx->requested_state) {
651 	case SPDK_NVMF_SUBSYSTEM_INACTIVE:
652 		nvmf_poll_group_remove_subsystem(group, ctx->subsystem, subsystem_state_change_continue, i);
653 		break;
654 	case SPDK_NVMF_SUBSYSTEM_ACTIVE:
655 		if (ctx->subsystem->state == SPDK_NVMF_SUBSYSTEM_ACTIVATING) {
656 			nvmf_poll_group_add_subsystem(group, ctx->subsystem, subsystem_state_change_continue, i);
657 		} else if (ctx->subsystem->state == SPDK_NVMF_SUBSYSTEM_RESUMING) {
658 			nvmf_poll_group_resume_subsystem(group, ctx->subsystem, subsystem_state_change_continue, i);
659 		}
660 		break;
661 	case SPDK_NVMF_SUBSYSTEM_PAUSED:
662 		nvmf_poll_group_pause_subsystem(group, ctx->subsystem, ctx->nsid, subsystem_state_change_continue,
663 						i);
664 		break;
665 	default:
666 		assert(false);
667 		break;
668 	}
669 }
670 
671 static int
672 nvmf_subsystem_state_change(struct spdk_nvmf_subsystem *subsystem,
673 			    uint32_t nsid,
674 			    enum spdk_nvmf_subsystem_state requested_state,
675 			    spdk_nvmf_subsystem_state_change_done cb_fn,
676 			    void *cb_arg)
677 {
678 	struct subsystem_state_change_ctx *ctx;
679 	enum spdk_nvmf_subsystem_state intermediate_state;
680 	int rc;
681 
682 	if (__sync_val_compare_and_swap(&subsystem->changing_state, false, true)) {
683 		return -EBUSY;
684 	}
685 
686 	SPDK_DTRACE_PROBE3(nvmf_subsystem_change_state, subsystem->subnqn,
687 			   requested_state, subsystem->state);
688 	/* If we are already in the requested state, just call the callback immediately. */
689 	if (subsystem->state == requested_state) {
690 		subsystem->changing_state = false;
691 		if (cb_fn) {
692 			cb_fn(subsystem, cb_arg, 0);
693 		}
694 		return 0;
695 	}
696 
697 	intermediate_state = nvmf_subsystem_get_intermediate_state(subsystem->state, requested_state);
698 	assert(intermediate_state != SPDK_NVMF_SUBSYSTEM_NUM_STATES);
699 
700 	ctx = calloc(1, sizeof(*ctx));
701 	if (!ctx) {
702 		subsystem->changing_state = false;
703 		return -ENOMEM;
704 	}
705 
706 	ctx->original_state = subsystem->state;
707 	rc = nvmf_subsystem_set_state(subsystem, intermediate_state);
708 	if (rc) {
709 		free(ctx);
710 		subsystem->changing_state = false;
711 		return rc;
712 	}
713 
714 	ctx->subsystem = subsystem;
715 	ctx->nsid = nsid;
716 	ctx->requested_state = requested_state;
717 	ctx->cb_fn = cb_fn;
718 	ctx->cb_arg = cb_arg;
719 
720 	spdk_for_each_channel(subsystem->tgt,
721 			      subsystem_state_change_on_pg,
722 			      ctx,
723 			      subsystem_state_change_done);
724 
725 	return 0;
726 }
727 
728 int
729 spdk_nvmf_subsystem_start(struct spdk_nvmf_subsystem *subsystem,
730 			  spdk_nvmf_subsystem_state_change_done cb_fn,
731 			  void *cb_arg)
732 {
733 	return nvmf_subsystem_state_change(subsystem, 0, SPDK_NVMF_SUBSYSTEM_ACTIVE, cb_fn, cb_arg);
734 }
735 
736 int
737 spdk_nvmf_subsystem_stop(struct spdk_nvmf_subsystem *subsystem,
738 			 spdk_nvmf_subsystem_state_change_done cb_fn,
739 			 void *cb_arg)
740 {
741 	return nvmf_subsystem_state_change(subsystem, 0, SPDK_NVMF_SUBSYSTEM_INACTIVE, cb_fn, cb_arg);
742 }
743 
744 int
745 spdk_nvmf_subsystem_pause(struct spdk_nvmf_subsystem *subsystem,
746 			  uint32_t nsid,
747 			  spdk_nvmf_subsystem_state_change_done cb_fn,
748 			  void *cb_arg)
749 {
750 	return nvmf_subsystem_state_change(subsystem, nsid, SPDK_NVMF_SUBSYSTEM_PAUSED, cb_fn, cb_arg);
751 }
752 
753 int
754 spdk_nvmf_subsystem_resume(struct spdk_nvmf_subsystem *subsystem,
755 			   spdk_nvmf_subsystem_state_change_done cb_fn,
756 			   void *cb_arg)
757 {
758 	return nvmf_subsystem_state_change(subsystem, 0, SPDK_NVMF_SUBSYSTEM_ACTIVE, cb_fn, cb_arg);
759 }
760 
761 struct spdk_nvmf_subsystem *
762 spdk_nvmf_subsystem_get_first(struct spdk_nvmf_tgt *tgt)
763 {
764 	struct spdk_nvmf_subsystem	*subsystem;
765 	uint32_t sid;
766 
767 	for (sid = 0; sid < tgt->max_subsystems; sid++) {
768 		subsystem = tgt->subsystems[sid];
769 		if (subsystem) {
770 			return subsystem;
771 		}
772 	}
773 
774 	return NULL;
775 }
776 
777 struct spdk_nvmf_subsystem *
778 spdk_nvmf_subsystem_get_next(struct spdk_nvmf_subsystem *subsystem)
779 {
780 	uint32_t sid;
781 	struct spdk_nvmf_tgt *tgt;
782 
783 	if (!subsystem) {
784 		return NULL;
785 	}
786 
787 	tgt = subsystem->tgt;
788 
789 	for (sid = subsystem->id + 1; sid < tgt->max_subsystems; sid++) {
790 		subsystem = tgt->subsystems[sid];
791 		if (subsystem) {
792 			return subsystem;
793 		}
794 	}
795 
796 	return NULL;
797 }
798 
799 /* Must hold subsystem->mutex while calling this function */
800 static struct spdk_nvmf_host *
801 nvmf_subsystem_find_host(struct spdk_nvmf_subsystem *subsystem, const char *hostnqn)
802 {
803 	struct spdk_nvmf_host *host = NULL;
804 
805 	TAILQ_FOREACH(host, &subsystem->hosts, link) {
806 		if (strcmp(hostnqn, host->nqn) == 0) {
807 			return host;
808 		}
809 	}
810 
811 	return NULL;
812 }
813 
814 int
815 spdk_nvmf_subsystem_add_host(struct spdk_nvmf_subsystem *subsystem, const char *hostnqn)
816 {
817 	struct spdk_nvmf_host *host;
818 
819 	if (!nvmf_valid_nqn(hostnqn)) {
820 		return -EINVAL;
821 	}
822 
823 	pthread_mutex_lock(&subsystem->mutex);
824 
825 	if (nvmf_subsystem_find_host(subsystem, hostnqn)) {
826 		/* This subsystem already allows the specified host. */
827 		pthread_mutex_unlock(&subsystem->mutex);
828 		return 0;
829 	}
830 
831 	host = calloc(1, sizeof(*host));
832 	if (!host) {
833 		pthread_mutex_unlock(&subsystem->mutex);
834 		return -ENOMEM;
835 	}
836 
837 	snprintf(host->nqn, sizeof(host->nqn), "%s", hostnqn);
838 
839 	TAILQ_INSERT_HEAD(&subsystem->hosts, host, link);
840 
841 	nvmf_update_discovery_log(subsystem->tgt, hostnqn);
842 
843 	pthread_mutex_unlock(&subsystem->mutex);
844 
845 	return 0;
846 }
847 
848 int
849 spdk_nvmf_subsystem_remove_host(struct spdk_nvmf_subsystem *subsystem, const char *hostnqn)
850 {
851 	struct spdk_nvmf_host *host;
852 
853 	pthread_mutex_lock(&subsystem->mutex);
854 
855 	host = nvmf_subsystem_find_host(subsystem, hostnqn);
856 	if (host == NULL) {
857 		pthread_mutex_unlock(&subsystem->mutex);
858 		return -ENOENT;
859 	}
860 
861 	nvmf_subsystem_remove_host(subsystem, host);
862 	pthread_mutex_unlock(&subsystem->mutex);
863 
864 	return 0;
865 }
866 
867 struct nvmf_subsystem_disconnect_host_ctx {
868 	struct spdk_nvmf_subsystem		*subsystem;
869 	char					*hostnqn;
870 	spdk_nvmf_tgt_subsystem_listen_done_fn	cb_fn;
871 	void					*cb_arg;
872 };
873 
874 static void
875 nvmf_subsystem_disconnect_host_fini(struct spdk_io_channel_iter *i, int status)
876 {
877 	struct nvmf_subsystem_disconnect_host_ctx *ctx;
878 
879 	ctx = spdk_io_channel_iter_get_ctx(i);
880 
881 	if (ctx->cb_fn) {
882 		ctx->cb_fn(ctx->cb_arg, status);
883 	}
884 	free(ctx->hostnqn);
885 	free(ctx);
886 }
887 
888 static void
889 nvmf_subsystem_disconnect_qpairs_by_host(struct spdk_io_channel_iter *i)
890 {
891 	struct nvmf_subsystem_disconnect_host_ctx *ctx;
892 	struct spdk_nvmf_poll_group *group;
893 	struct spdk_io_channel *ch;
894 	struct spdk_nvmf_qpair *qpair, *tmp_qpair;
895 	struct spdk_nvmf_ctrlr *ctrlr;
896 
897 	ctx = spdk_io_channel_iter_get_ctx(i);
898 	ch = spdk_io_channel_iter_get_channel(i);
899 	group = spdk_io_channel_get_ctx(ch);
900 
901 	TAILQ_FOREACH_SAFE(qpair, &group->qpairs, link, tmp_qpair) {
902 		ctrlr = qpair->ctrlr;
903 
904 		if (ctrlr == NULL || ctrlr->subsys != ctx->subsystem) {
905 			continue;
906 		}
907 
908 		if (strncmp(ctrlr->hostnqn, ctx->hostnqn, sizeof(ctrlr->hostnqn)) == 0) {
909 			/* Right now this does not wait for the queue pairs to actually disconnect. */
910 			spdk_nvmf_qpair_disconnect(qpair, NULL, NULL);
911 		}
912 	}
913 	spdk_for_each_channel_continue(i, 0);
914 }
915 
916 int
917 spdk_nvmf_subsystem_disconnect_host(struct spdk_nvmf_subsystem *subsystem,
918 				    const char *hostnqn,
919 				    spdk_nvmf_tgt_subsystem_listen_done_fn cb_fn,
920 				    void *cb_arg)
921 {
922 	struct nvmf_subsystem_disconnect_host_ctx *ctx;
923 
924 	ctx = calloc(1, sizeof(struct nvmf_subsystem_disconnect_host_ctx));
925 	if (ctx == NULL) {
926 		return -ENOMEM;
927 	}
928 
929 	ctx->hostnqn = strdup(hostnqn);
930 	if (ctx->hostnqn == NULL) {
931 		free(ctx);
932 		return -ENOMEM;
933 	}
934 
935 	ctx->subsystem = subsystem;
936 	ctx->cb_fn = cb_fn;
937 	ctx->cb_arg = cb_arg;
938 
939 	spdk_for_each_channel(subsystem->tgt, nvmf_subsystem_disconnect_qpairs_by_host, ctx,
940 			      nvmf_subsystem_disconnect_host_fini);
941 
942 	return 0;
943 }
944 
945 int
946 spdk_nvmf_subsystem_set_allow_any_host(struct spdk_nvmf_subsystem *subsystem, bool allow_any_host)
947 {
948 	pthread_mutex_lock(&subsystem->mutex);
949 	subsystem->flags.allow_any_host = allow_any_host;
950 	nvmf_update_discovery_log(subsystem->tgt, NULL);
951 	pthread_mutex_unlock(&subsystem->mutex);
952 
953 	return 0;
954 }
955 
956 bool
957 spdk_nvmf_subsystem_get_allow_any_host(const struct spdk_nvmf_subsystem *subsystem)
958 {
959 	bool allow_any_host;
960 	struct spdk_nvmf_subsystem *sub;
961 
962 	/* Technically, taking the mutex modifies data in the subsystem. But the const
963 	 * is still important to convey that this doesn't mutate any other data. Cast
964 	 * it away to work around this. */
965 	sub = (struct spdk_nvmf_subsystem *)subsystem;
966 
967 	pthread_mutex_lock(&sub->mutex);
968 	allow_any_host = sub->flags.allow_any_host;
969 	pthread_mutex_unlock(&sub->mutex);
970 
971 	return allow_any_host;
972 }
973 
974 bool
975 spdk_nvmf_subsystem_host_allowed(struct spdk_nvmf_subsystem *subsystem, const char *hostnqn)
976 {
977 	bool allowed;
978 
979 	if (!hostnqn) {
980 		return false;
981 	}
982 
983 	pthread_mutex_lock(&subsystem->mutex);
984 
985 	if (subsystem->flags.allow_any_host) {
986 		pthread_mutex_unlock(&subsystem->mutex);
987 		return true;
988 	}
989 
990 	allowed =  nvmf_subsystem_find_host(subsystem, hostnqn) != NULL;
991 	pthread_mutex_unlock(&subsystem->mutex);
992 
993 	return allowed;
994 }
995 
996 struct spdk_nvmf_host *
997 spdk_nvmf_subsystem_get_first_host(struct spdk_nvmf_subsystem *subsystem)
998 {
999 	return TAILQ_FIRST(&subsystem->hosts);
1000 }
1001 
1002 
1003 struct spdk_nvmf_host *
1004 spdk_nvmf_subsystem_get_next_host(struct spdk_nvmf_subsystem *subsystem,
1005 				  struct spdk_nvmf_host *prev_host)
1006 {
1007 	return TAILQ_NEXT(prev_host, link);
1008 }
1009 
1010 const char *
1011 spdk_nvmf_host_get_nqn(const struct spdk_nvmf_host *host)
1012 {
1013 	return host->nqn;
1014 }
1015 
1016 struct spdk_nvmf_subsystem_listener *
1017 nvmf_subsystem_find_listener(struct spdk_nvmf_subsystem *subsystem,
1018 			     const struct spdk_nvme_transport_id *trid)
1019 {
1020 	struct spdk_nvmf_subsystem_listener *listener;
1021 
1022 	TAILQ_FOREACH(listener, &subsystem->listeners, link) {
1023 		if (spdk_nvme_transport_id_compare(listener->trid, trid) == 0) {
1024 			return listener;
1025 		}
1026 	}
1027 
1028 	return NULL;
1029 }
1030 
1031 /**
1032  * Function to be called once the target is listening.
1033  *
1034  * \param ctx Context argument passed to this function.
1035  * \param status 0 if it completed successfully, or negative errno if it failed.
1036  */
1037 static void
1038 _nvmf_subsystem_add_listener_done(void *ctx, int status)
1039 {
1040 	struct spdk_nvmf_subsystem_listener *listener = ctx;
1041 
1042 	if (status) {
1043 		listener->cb_fn(listener->cb_arg, status);
1044 		free(listener);
1045 		return;
1046 	}
1047 
1048 	TAILQ_INSERT_HEAD(&listener->subsystem->listeners, listener, link);
1049 	nvmf_update_discovery_log(listener->subsystem->tgt, NULL);
1050 	listener->cb_fn(listener->cb_arg, status);
1051 }
1052 
1053 void
1054 spdk_nvmf_subsystem_add_listener(struct spdk_nvmf_subsystem *subsystem,
1055 				 struct spdk_nvme_transport_id *trid,
1056 				 spdk_nvmf_tgt_subsystem_listen_done_fn cb_fn,
1057 				 void *cb_arg)
1058 {
1059 	struct spdk_nvmf_transport *transport;
1060 	struct spdk_nvmf_subsystem_listener *listener;
1061 	struct spdk_nvmf_listener *tr_listener;
1062 	uint32_t i;
1063 	int rc = 0;
1064 
1065 	assert(cb_fn != NULL);
1066 
1067 	if (!(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE ||
1068 	      subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED)) {
1069 		cb_fn(cb_arg, -EAGAIN);
1070 		return;
1071 	}
1072 
1073 	if (nvmf_subsystem_find_listener(subsystem, trid)) {
1074 		/* Listener already exists in this subsystem */
1075 		cb_fn(cb_arg, 0);
1076 		return;
1077 	}
1078 
1079 	transport = spdk_nvmf_tgt_get_transport(subsystem->tgt, trid->trstring);
1080 	if (!transport) {
1081 		SPDK_ERRLOG("Unable to find %s transport. The transport must be created first also make sure it is properly registered.\n",
1082 			    trid->trstring);
1083 		cb_fn(cb_arg, -EINVAL);
1084 		return;
1085 	}
1086 
1087 	tr_listener = nvmf_transport_find_listener(transport, trid);
1088 	if (!tr_listener) {
1089 		SPDK_ERRLOG("Cannot find transport listener for %s\n", trid->traddr);
1090 		cb_fn(cb_arg, -EINVAL);
1091 		return;
1092 	}
1093 
1094 	listener = calloc(1, sizeof(*listener));
1095 	if (!listener) {
1096 		cb_fn(cb_arg, -ENOMEM);
1097 		return;
1098 	}
1099 
1100 	listener->trid = &tr_listener->trid;
1101 	listener->transport = transport;
1102 	listener->cb_fn = cb_fn;
1103 	listener->cb_arg = cb_arg;
1104 	listener->subsystem = subsystem;
1105 	listener->ana_state = calloc(subsystem->max_nsid, sizeof(enum spdk_nvme_ana_state));
1106 	if (!listener->ana_state) {
1107 		free(listener);
1108 		cb_fn(cb_arg, -ENOMEM);
1109 		return;
1110 	}
1111 
1112 	for (i = 0; i < subsystem->max_nsid; i++) {
1113 		listener->ana_state[i] = SPDK_NVME_ANA_OPTIMIZED_STATE;
1114 	}
1115 
1116 	if (transport->ops->listen_associate != NULL) {
1117 		rc = transport->ops->listen_associate(transport, subsystem, trid);
1118 	}
1119 
1120 	_nvmf_subsystem_add_listener_done(listener, rc);
1121 }
1122 
1123 int
1124 spdk_nvmf_subsystem_remove_listener(struct spdk_nvmf_subsystem *subsystem,
1125 				    const struct spdk_nvme_transport_id *trid)
1126 {
1127 	struct spdk_nvmf_subsystem_listener *listener;
1128 
1129 	if (!(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE ||
1130 	      subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED)) {
1131 		return -EAGAIN;
1132 	}
1133 
1134 	listener = nvmf_subsystem_find_listener(subsystem, trid);
1135 	if (listener == NULL) {
1136 		return -ENOENT;
1137 	}
1138 
1139 	_nvmf_subsystem_remove_listener(subsystem, listener, false);
1140 
1141 	return 0;
1142 }
1143 
1144 void
1145 nvmf_subsystem_remove_all_listeners(struct spdk_nvmf_subsystem *subsystem,
1146 				    bool stop)
1147 {
1148 	struct spdk_nvmf_subsystem_listener *listener, *listener_tmp;
1149 
1150 	TAILQ_FOREACH_SAFE(listener, &subsystem->listeners, link, listener_tmp) {
1151 		_nvmf_subsystem_remove_listener(subsystem, listener, stop);
1152 	}
1153 }
1154 
1155 bool
1156 spdk_nvmf_subsystem_listener_allowed(struct spdk_nvmf_subsystem *subsystem,
1157 				     const struct spdk_nvme_transport_id *trid)
1158 {
1159 	struct spdk_nvmf_subsystem_listener *listener;
1160 
1161 	if (!strcmp(subsystem->subnqn, SPDK_NVMF_DISCOVERY_NQN)) {
1162 		return true;
1163 	}
1164 
1165 	TAILQ_FOREACH(listener, &subsystem->listeners, link) {
1166 		if (spdk_nvme_transport_id_compare(listener->trid, trid) == 0) {
1167 			return true;
1168 		}
1169 	}
1170 
1171 	return false;
1172 }
1173 
1174 struct spdk_nvmf_subsystem_listener *
1175 spdk_nvmf_subsystem_get_first_listener(struct spdk_nvmf_subsystem *subsystem)
1176 {
1177 	return TAILQ_FIRST(&subsystem->listeners);
1178 }
1179 
1180 struct spdk_nvmf_subsystem_listener *
1181 spdk_nvmf_subsystem_get_next_listener(struct spdk_nvmf_subsystem *subsystem,
1182 				      struct spdk_nvmf_subsystem_listener *prev_listener)
1183 {
1184 	return TAILQ_NEXT(prev_listener, link);
1185 }
1186 
1187 const struct spdk_nvme_transport_id *
1188 spdk_nvmf_subsystem_listener_get_trid(struct spdk_nvmf_subsystem_listener *listener)
1189 {
1190 	return listener->trid;
1191 }
1192 
1193 void
1194 spdk_nvmf_subsystem_allow_any_listener(struct spdk_nvmf_subsystem *subsystem,
1195 				       bool allow_any_listener)
1196 {
1197 	subsystem->flags.allow_any_listener = allow_any_listener;
1198 }
1199 
1200 bool
1201 spdk_nvmf_subsytem_any_listener_allowed(struct spdk_nvmf_subsystem *subsystem)
1202 {
1203 	return subsystem->flags.allow_any_listener;
1204 }
1205 
1206 
1207 struct subsystem_update_ns_ctx {
1208 	struct spdk_nvmf_subsystem *subsystem;
1209 
1210 	spdk_nvmf_subsystem_state_change_done cb_fn;
1211 	void *cb_arg;
1212 };
1213 
1214 static void
1215 subsystem_update_ns_done(struct spdk_io_channel_iter *i, int status)
1216 {
1217 	struct subsystem_update_ns_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
1218 
1219 	if (ctx->cb_fn) {
1220 		ctx->cb_fn(ctx->subsystem, ctx->cb_arg, status);
1221 	}
1222 	free(ctx);
1223 }
1224 
1225 static void
1226 subsystem_update_ns_on_pg(struct spdk_io_channel_iter *i)
1227 {
1228 	int rc;
1229 	struct subsystem_update_ns_ctx *ctx;
1230 	struct spdk_nvmf_poll_group *group;
1231 	struct spdk_nvmf_subsystem *subsystem;
1232 
1233 	ctx = spdk_io_channel_iter_get_ctx(i);
1234 	group = spdk_io_channel_get_ctx(spdk_io_channel_iter_get_channel(i));
1235 	subsystem = ctx->subsystem;
1236 
1237 	rc = nvmf_poll_group_update_subsystem(group, subsystem);
1238 	spdk_for_each_channel_continue(i, rc);
1239 }
1240 
1241 static int
1242 nvmf_subsystem_update_ns(struct spdk_nvmf_subsystem *subsystem, spdk_channel_for_each_cpl cpl,
1243 			 void *ctx)
1244 {
1245 	spdk_for_each_channel(subsystem->tgt,
1246 			      subsystem_update_ns_on_pg,
1247 			      ctx,
1248 			      cpl);
1249 
1250 	return 0;
1251 }
1252 
1253 static void
1254 nvmf_subsystem_ns_changed(struct spdk_nvmf_subsystem *subsystem, uint32_t nsid)
1255 {
1256 	struct spdk_nvmf_ctrlr *ctrlr;
1257 
1258 	TAILQ_FOREACH(ctrlr, &subsystem->ctrlrs, link) {
1259 		nvmf_ctrlr_ns_changed(ctrlr, nsid);
1260 	}
1261 }
1262 
1263 static uint32_t
1264 nvmf_ns_reservation_clear_all_registrants(struct spdk_nvmf_ns *ns);
1265 
1266 int
1267 spdk_nvmf_subsystem_remove_ns(struct spdk_nvmf_subsystem *subsystem, uint32_t nsid)
1268 {
1269 	struct spdk_nvmf_transport *transport;
1270 	struct spdk_nvmf_ns *ns;
1271 
1272 	if (!(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE ||
1273 	      subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED)) {
1274 		assert(false);
1275 		return -1;
1276 	}
1277 
1278 	if (nsid == 0 || nsid > subsystem->max_nsid) {
1279 		return -1;
1280 	}
1281 
1282 	ns = subsystem->ns[nsid - 1];
1283 	if (!ns) {
1284 		return -1;
1285 	}
1286 
1287 	subsystem->ns[nsid - 1] = NULL;
1288 
1289 	assert(ns->anagrpid - 1 < subsystem->max_nsid);
1290 	assert(subsystem->ana_group[ns->anagrpid - 1] > 0);
1291 
1292 	subsystem->ana_group[ns->anagrpid - 1]--;
1293 
1294 	free(ns->ptpl_file);
1295 	nvmf_ns_reservation_clear_all_registrants(ns);
1296 	spdk_bdev_module_release_bdev(ns->bdev);
1297 	spdk_bdev_close(ns->desc);
1298 	free(ns);
1299 
1300 	for (transport = spdk_nvmf_transport_get_first(subsystem->tgt); transport;
1301 	     transport = spdk_nvmf_transport_get_next(transport)) {
1302 		if (transport->ops->subsystem_remove_ns) {
1303 			transport->ops->subsystem_remove_ns(transport, subsystem, nsid);
1304 		}
1305 	}
1306 
1307 	nvmf_subsystem_ns_changed(subsystem, nsid);
1308 
1309 	return 0;
1310 }
1311 
1312 struct subsystem_ns_change_ctx {
1313 	struct spdk_nvmf_subsystem		*subsystem;
1314 	spdk_nvmf_subsystem_state_change_done	cb_fn;
1315 	uint32_t				nsid;
1316 };
1317 
1318 static void
1319 _nvmf_ns_hot_remove(struct spdk_nvmf_subsystem *subsystem,
1320 		    void *cb_arg, int status)
1321 {
1322 	struct subsystem_ns_change_ctx *ctx = cb_arg;
1323 	int rc;
1324 
1325 	rc = spdk_nvmf_subsystem_remove_ns(subsystem, ctx->nsid);
1326 	if (rc != 0) {
1327 		SPDK_ERRLOG("Failed to make changes to NVME-oF subsystem with id: %u\n", subsystem->id);
1328 	}
1329 
1330 	spdk_nvmf_subsystem_resume(subsystem, NULL, NULL);
1331 
1332 	free(ctx);
1333 }
1334 
1335 static void
1336 nvmf_ns_change_msg(void *ns_ctx)
1337 {
1338 	struct subsystem_ns_change_ctx *ctx = ns_ctx;
1339 	int rc;
1340 
1341 	rc = spdk_nvmf_subsystem_pause(ctx->subsystem, ctx->nsid, ctx->cb_fn, ctx);
1342 	if (rc) {
1343 		if (rc == -EBUSY) {
1344 			/* Try again, this is not a permanent situation. */
1345 			spdk_thread_send_msg(spdk_get_thread(), nvmf_ns_change_msg, ctx);
1346 		} else {
1347 			free(ctx);
1348 			SPDK_ERRLOG("Unable to pause subsystem to process namespace removal!\n");
1349 		}
1350 	}
1351 }
1352 
1353 static void
1354 nvmf_ns_hot_remove(void *remove_ctx)
1355 {
1356 	struct spdk_nvmf_ns *ns = remove_ctx;
1357 	struct subsystem_ns_change_ctx *ns_ctx;
1358 	int rc;
1359 
1360 	/* We have to allocate a new context because this op
1361 	 * is asynchronous and we could lose the ns in the middle.
1362 	 */
1363 	ns_ctx = calloc(1, sizeof(struct subsystem_ns_change_ctx));
1364 	if (!ns_ctx) {
1365 		SPDK_ERRLOG("Unable to allocate context to process namespace removal!\n");
1366 		return;
1367 	}
1368 
1369 	ns_ctx->subsystem = ns->subsystem;
1370 	ns_ctx->nsid = ns->opts.nsid;
1371 	ns_ctx->cb_fn = _nvmf_ns_hot_remove;
1372 
1373 	rc = spdk_nvmf_subsystem_pause(ns->subsystem, ns_ctx->nsid, _nvmf_ns_hot_remove, ns_ctx);
1374 	if (rc) {
1375 		if (rc == -EBUSY) {
1376 			/* Try again, this is not a permanent situation. */
1377 			spdk_thread_send_msg(spdk_get_thread(), nvmf_ns_change_msg, ns_ctx);
1378 		} else {
1379 			SPDK_ERRLOG("Unable to pause subsystem to process namespace removal!\n");
1380 			free(ns_ctx);
1381 		}
1382 	}
1383 }
1384 
1385 static void
1386 _nvmf_ns_resize(struct spdk_nvmf_subsystem *subsystem, void *cb_arg, int status)
1387 {
1388 	struct subsystem_ns_change_ctx *ctx = cb_arg;
1389 
1390 	nvmf_subsystem_ns_changed(subsystem, ctx->nsid);
1391 	spdk_nvmf_subsystem_resume(subsystem, NULL, NULL);
1392 
1393 	free(ctx);
1394 }
1395 
1396 static void
1397 nvmf_ns_resize(void *event_ctx)
1398 {
1399 	struct spdk_nvmf_ns *ns = event_ctx;
1400 	struct subsystem_ns_change_ctx *ns_ctx;
1401 	int rc;
1402 
1403 	/* We have to allocate a new context because this op
1404 	 * is asynchronous and we could lose the ns in the middle.
1405 	 */
1406 	ns_ctx = calloc(1, sizeof(struct subsystem_ns_change_ctx));
1407 	if (!ns_ctx) {
1408 		SPDK_ERRLOG("Unable to allocate context to process namespace removal!\n");
1409 		return;
1410 	}
1411 
1412 	ns_ctx->subsystem = ns->subsystem;
1413 	ns_ctx->nsid = ns->opts.nsid;
1414 	ns_ctx->cb_fn = _nvmf_ns_resize;
1415 
1416 	/* Specify 0 for the nsid here, because we do not need to pause the namespace.
1417 	 * Namespaces can only be resized bigger, so there is no need to quiesce I/O.
1418 	 */
1419 	rc = spdk_nvmf_subsystem_pause(ns->subsystem, 0, _nvmf_ns_resize, ns_ctx);
1420 	if (rc) {
1421 		if (rc == -EBUSY) {
1422 			/* Try again, this is not a permanent situation. */
1423 			spdk_thread_send_msg(spdk_get_thread(), nvmf_ns_change_msg, ns_ctx);
1424 		} else {
1425 			SPDK_ERRLOG("Unable to pause subsystem to process namespace resize!\n");
1426 			free(ns_ctx);
1427 		}
1428 	}
1429 }
1430 
1431 static void
1432 nvmf_ns_event(enum spdk_bdev_event_type type,
1433 	      struct spdk_bdev *bdev,
1434 	      void *event_ctx)
1435 {
1436 	SPDK_DEBUGLOG(nvmf, "Bdev event: type %d, name %s, subsystem_id %d, ns_id %d\n",
1437 		      type,
1438 		      spdk_bdev_get_name(bdev),
1439 		      ((struct spdk_nvmf_ns *)event_ctx)->subsystem->id,
1440 		      ((struct spdk_nvmf_ns *)event_ctx)->nsid);
1441 
1442 	switch (type) {
1443 	case SPDK_BDEV_EVENT_REMOVE:
1444 		nvmf_ns_hot_remove(event_ctx);
1445 		break;
1446 	case SPDK_BDEV_EVENT_RESIZE:
1447 		nvmf_ns_resize(event_ctx);
1448 		break;
1449 	default:
1450 		SPDK_NOTICELOG("Unsupported bdev event: type %d\n", type);
1451 		break;
1452 	}
1453 }
1454 
1455 void
1456 spdk_nvmf_ns_opts_get_defaults(struct spdk_nvmf_ns_opts *opts, size_t opts_size)
1457 {
1458 	if (!opts) {
1459 		SPDK_ERRLOG("opts should not be NULL.\n");
1460 		return;
1461 	}
1462 
1463 	if (!opts_size) {
1464 		SPDK_ERRLOG("opts_size should not be zero.\n");
1465 		return;
1466 	}
1467 
1468 	memset(opts, 0, opts_size);
1469 	opts->opts_size = opts_size;
1470 
1471 #define FIELD_OK(field) \
1472 	offsetof(struct spdk_nvmf_ns_opts, field) + sizeof(opts->field) <= opts_size
1473 
1474 #define SET_FIELD(field, value) \
1475 	if (FIELD_OK(field)) { \
1476 		opts->field = value; \
1477 	} \
1478 
1479 	/* All current fields are set to 0 by default. */
1480 	SET_FIELD(nsid, 0);
1481 	if (FIELD_OK(nguid)) {
1482 		memset(opts->nguid, 0, sizeof(opts->nguid));
1483 	}
1484 	if (FIELD_OK(eui64)) {
1485 		memset(opts->eui64, 0, sizeof(opts->eui64));
1486 	}
1487 	if (FIELD_OK(uuid)) {
1488 		memset(&opts->uuid, 0, sizeof(opts->uuid));
1489 	}
1490 	SET_FIELD(anagrpid, 0);
1491 
1492 #undef FIELD_OK
1493 #undef SET_FIELD
1494 }
1495 
1496 static void
1497 nvmf_ns_opts_copy(struct spdk_nvmf_ns_opts *opts,
1498 		  const struct spdk_nvmf_ns_opts *user_opts,
1499 		  size_t opts_size)
1500 {
1501 #define FIELD_OK(field)	\
1502 	offsetof(struct spdk_nvmf_ns_opts, field) + sizeof(opts->field) <= user_opts->opts_size
1503 
1504 #define SET_FIELD(field) \
1505 	if (FIELD_OK(field)) { \
1506 		opts->field = user_opts->field;	\
1507 	} \
1508 
1509 	SET_FIELD(nsid);
1510 	if (FIELD_OK(nguid)) {
1511 		memcpy(opts->nguid, user_opts->nguid, sizeof(opts->nguid));
1512 	}
1513 	if (FIELD_OK(eui64)) {
1514 		memcpy(opts->eui64, user_opts->eui64, sizeof(opts->eui64));
1515 	}
1516 	if (FIELD_OK(uuid)) {
1517 		memcpy(&opts->uuid, &user_opts->uuid, sizeof(opts->uuid));
1518 	}
1519 	SET_FIELD(anagrpid);
1520 
1521 	opts->opts_size = user_opts->opts_size;
1522 
1523 	/* We should not remove this statement, but need to update the assert statement
1524 	 * if we add a new field, and also add a corresponding SET_FIELD statement.
1525 	 */
1526 	SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_ns_opts) == 64, "Incorrect size");
1527 
1528 #undef FIELD_OK
1529 #undef SET_FIELD
1530 }
1531 
1532 /* Dummy bdev module used to to claim bdevs. */
1533 static struct spdk_bdev_module ns_bdev_module = {
1534 	.name	= "NVMe-oF Target",
1535 };
1536 
1537 static int
1538 nvmf_ns_load_reservation(const char *file, struct spdk_nvmf_reservation_info *info);
1539 static int
1540 nvmf_ns_reservation_restore(struct spdk_nvmf_ns *ns, struct spdk_nvmf_reservation_info *info);
1541 
1542 uint32_t
1543 spdk_nvmf_subsystem_add_ns_ext(struct spdk_nvmf_subsystem *subsystem, const char *bdev_name,
1544 			       const struct spdk_nvmf_ns_opts *user_opts, size_t opts_size,
1545 			       const char *ptpl_file)
1546 {
1547 	struct spdk_nvmf_transport *transport;
1548 	struct spdk_nvmf_ns_opts opts;
1549 	struct spdk_nvmf_ns *ns;
1550 	struct spdk_nvmf_reservation_info info = {0};
1551 	int rc;
1552 
1553 	if (!(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE ||
1554 	      subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED)) {
1555 		return 0;
1556 	}
1557 
1558 	spdk_nvmf_ns_opts_get_defaults(&opts, sizeof(opts));
1559 	if (user_opts) {
1560 		nvmf_ns_opts_copy(&opts, user_opts, opts_size);
1561 	}
1562 
1563 	if (opts.nsid == SPDK_NVME_GLOBAL_NS_TAG) {
1564 		SPDK_ERRLOG("Invalid NSID %" PRIu32 "\n", opts.nsid);
1565 		return 0;
1566 	}
1567 
1568 	if (opts.nsid == 0) {
1569 		/*
1570 		 * NSID not specified - find a free index.
1571 		 *
1572 		 * If no free slots are found, opts.nsid will be subsystem->max_nsid + 1, which will
1573 		 * expand max_nsid if possible.
1574 		 */
1575 		for (opts.nsid = 1; opts.nsid <= subsystem->max_nsid; opts.nsid++) {
1576 			if (_nvmf_subsystem_get_ns(subsystem, opts.nsid) == NULL) {
1577 				break;
1578 			}
1579 		}
1580 	}
1581 
1582 	if (_nvmf_subsystem_get_ns(subsystem, opts.nsid)) {
1583 		SPDK_ERRLOG("Requested NSID %" PRIu32 " already in use\n", opts.nsid);
1584 		return 0;
1585 	}
1586 
1587 	if (opts.nsid > subsystem->max_nsid) {
1588 		SPDK_ERRLOG("NSID greater than maximum not allowed\n");
1589 		return 0;
1590 	}
1591 
1592 	if (opts.anagrpid == 0) {
1593 		opts.anagrpid = opts.nsid;
1594 	}
1595 
1596 	if (opts.anagrpid > subsystem->max_nsid) {
1597 		SPDK_ERRLOG("ANAGRPID greater than maximum NSID not allowed\n");
1598 		return 0;
1599 	}
1600 
1601 	ns = calloc(1, sizeof(*ns));
1602 	if (ns == NULL) {
1603 		SPDK_ERRLOG("Namespace allocation failed\n");
1604 		return 0;
1605 	}
1606 
1607 	rc = spdk_bdev_open_ext(bdev_name, true, nvmf_ns_event, ns, &ns->desc);
1608 	if (rc != 0) {
1609 		SPDK_ERRLOG("Subsystem %s: bdev %s cannot be opened, error=%d\n",
1610 			    subsystem->subnqn, bdev_name, rc);
1611 		free(ns);
1612 		return 0;
1613 	}
1614 
1615 	ns->bdev = spdk_bdev_desc_get_bdev(ns->desc);
1616 
1617 	if (spdk_bdev_get_md_size(ns->bdev) != 0 && !spdk_bdev_is_md_interleaved(ns->bdev)) {
1618 		SPDK_ERRLOG("Can't attach bdev with separate metadata.\n");
1619 		spdk_bdev_close(ns->desc);
1620 		free(ns);
1621 		return 0;
1622 	}
1623 
1624 	rc = spdk_bdev_module_claim_bdev(ns->bdev, ns->desc, &ns_bdev_module);
1625 	if (rc != 0) {
1626 		spdk_bdev_close(ns->desc);
1627 		free(ns);
1628 		return 0;
1629 	}
1630 
1631 	/* Cache the zcopy capability of the bdev device */
1632 	ns->zcopy = spdk_bdev_io_type_supported(ns->bdev, SPDK_BDEV_IO_TYPE_ZCOPY);
1633 
1634 	if (spdk_mem_all_zero(&opts.uuid, sizeof(opts.uuid))) {
1635 		opts.uuid = *spdk_bdev_get_uuid(ns->bdev);
1636 	}
1637 
1638 	/* if nguid descriptor is supported by bdev module (nvme) then uuid = nguid */
1639 	if (spdk_mem_all_zero(opts.nguid, sizeof(opts.nguid))) {
1640 		SPDK_STATIC_ASSERT(sizeof(opts.nguid) == sizeof(opts.uuid), "size mismatch");
1641 		memcpy(opts.nguid, spdk_bdev_get_uuid(ns->bdev), sizeof(opts.nguid));
1642 	}
1643 
1644 	ns->opts = opts;
1645 	ns->subsystem = subsystem;
1646 	subsystem->ns[opts.nsid - 1] = ns;
1647 	ns->nsid = opts.nsid;
1648 	ns->anagrpid = opts.anagrpid;
1649 	subsystem->ana_group[ns->anagrpid - 1]++;
1650 	TAILQ_INIT(&ns->registrants);
1651 	if (ptpl_file) {
1652 		rc = nvmf_ns_load_reservation(ptpl_file, &info);
1653 		if (!rc) {
1654 			rc = nvmf_ns_reservation_restore(ns, &info);
1655 			if (rc) {
1656 				SPDK_ERRLOG("Subsystem restore reservation failed\n");
1657 				goto err_ns_reservation_restore;
1658 			}
1659 		}
1660 		ns->ptpl_file = strdup(ptpl_file);
1661 		if (!ns->ptpl_file) {
1662 			SPDK_ERRLOG("Namespace ns->ptpl_file allocation failed\n");
1663 			goto err_strdup;
1664 		}
1665 	}
1666 
1667 	for (transport = spdk_nvmf_transport_get_first(subsystem->tgt); transport;
1668 	     transport = spdk_nvmf_transport_get_next(transport)) {
1669 		if (transport->ops->subsystem_add_ns) {
1670 			rc = transport->ops->subsystem_add_ns(transport, subsystem, ns);
1671 			if (rc) {
1672 				SPDK_ERRLOG("Namespace attachment is not allowed by %s transport\n", transport->ops->name);
1673 				goto err_subsystem_add_ns;
1674 			}
1675 		}
1676 	}
1677 
1678 	SPDK_DEBUGLOG(nvmf, "Subsystem %s: bdev %s assigned nsid %" PRIu32 "\n",
1679 		      spdk_nvmf_subsystem_get_nqn(subsystem),
1680 		      bdev_name,
1681 		      opts.nsid);
1682 
1683 	nvmf_subsystem_ns_changed(subsystem, opts.nsid);
1684 
1685 	return opts.nsid;
1686 
1687 err_subsystem_add_ns:
1688 	free(ns->ptpl_file);
1689 err_strdup:
1690 	nvmf_ns_reservation_clear_all_registrants(ns);
1691 err_ns_reservation_restore:
1692 	subsystem->ns[opts.nsid - 1] = NULL;
1693 	spdk_bdev_module_release_bdev(ns->bdev);
1694 	spdk_bdev_close(ns->desc);
1695 	free(ns);
1696 	return 0;
1697 
1698 }
1699 
1700 static uint32_t
1701 nvmf_subsystem_get_next_allocated_nsid(struct spdk_nvmf_subsystem *subsystem,
1702 				       uint32_t prev_nsid)
1703 {
1704 	uint32_t nsid;
1705 
1706 	if (prev_nsid >= subsystem->max_nsid) {
1707 		return 0;
1708 	}
1709 
1710 	for (nsid = prev_nsid + 1; nsid <= subsystem->max_nsid; nsid++) {
1711 		if (subsystem->ns[nsid - 1]) {
1712 			return nsid;
1713 		}
1714 	}
1715 
1716 	return 0;
1717 }
1718 
1719 struct spdk_nvmf_ns *
1720 spdk_nvmf_subsystem_get_first_ns(struct spdk_nvmf_subsystem *subsystem)
1721 {
1722 	uint32_t first_nsid;
1723 
1724 	first_nsid = nvmf_subsystem_get_next_allocated_nsid(subsystem, 0);
1725 	return _nvmf_subsystem_get_ns(subsystem, first_nsid);
1726 }
1727 
1728 struct spdk_nvmf_ns *
1729 spdk_nvmf_subsystem_get_next_ns(struct spdk_nvmf_subsystem *subsystem,
1730 				struct spdk_nvmf_ns *prev_ns)
1731 {
1732 	uint32_t next_nsid;
1733 
1734 	next_nsid = nvmf_subsystem_get_next_allocated_nsid(subsystem, prev_ns->opts.nsid);
1735 	return _nvmf_subsystem_get_ns(subsystem, next_nsid);
1736 }
1737 
1738 struct spdk_nvmf_ns *
1739 spdk_nvmf_subsystem_get_ns(struct spdk_nvmf_subsystem *subsystem, uint32_t nsid)
1740 {
1741 	return _nvmf_subsystem_get_ns(subsystem, nsid);
1742 }
1743 
1744 uint32_t
1745 spdk_nvmf_ns_get_id(const struct spdk_nvmf_ns *ns)
1746 {
1747 	return ns->opts.nsid;
1748 }
1749 
1750 struct spdk_bdev *
1751 spdk_nvmf_ns_get_bdev(struct spdk_nvmf_ns *ns)
1752 {
1753 	return ns->bdev;
1754 }
1755 
1756 void
1757 spdk_nvmf_ns_get_opts(const struct spdk_nvmf_ns *ns, struct spdk_nvmf_ns_opts *opts,
1758 		      size_t opts_size)
1759 {
1760 	memset(opts, 0, opts_size);
1761 	memcpy(opts, &ns->opts, spdk_min(sizeof(ns->opts), opts_size));
1762 }
1763 
1764 const char *
1765 spdk_nvmf_subsystem_get_sn(const struct spdk_nvmf_subsystem *subsystem)
1766 {
1767 	return subsystem->sn;
1768 }
1769 
1770 int
1771 spdk_nvmf_subsystem_set_sn(struct spdk_nvmf_subsystem *subsystem, const char *sn)
1772 {
1773 	size_t len, max_len;
1774 
1775 	max_len = sizeof(subsystem->sn) - 1;
1776 	len = strlen(sn);
1777 	if (len > max_len) {
1778 		SPDK_DEBUGLOG(nvmf, "Invalid sn \"%s\": length %zu > max %zu\n",
1779 			      sn, len, max_len);
1780 		return -1;
1781 	}
1782 
1783 	if (!nvmf_valid_ascii_string(sn, len)) {
1784 		SPDK_DEBUGLOG(nvmf, "Non-ASCII sn\n");
1785 		SPDK_LOGDUMP(nvmf, "sn", sn, len);
1786 		return -1;
1787 	}
1788 
1789 	snprintf(subsystem->sn, sizeof(subsystem->sn), "%s", sn);
1790 
1791 	return 0;
1792 }
1793 
1794 const char *
1795 spdk_nvmf_subsystem_get_mn(const struct spdk_nvmf_subsystem *subsystem)
1796 {
1797 	return subsystem->mn;
1798 }
1799 
1800 int
1801 spdk_nvmf_subsystem_set_mn(struct spdk_nvmf_subsystem *subsystem, const char *mn)
1802 {
1803 	size_t len, max_len;
1804 
1805 	if (mn == NULL) {
1806 		mn = MODEL_NUMBER_DEFAULT;
1807 	}
1808 	max_len = sizeof(subsystem->mn) - 1;
1809 	len = strlen(mn);
1810 	if (len > max_len) {
1811 		SPDK_DEBUGLOG(nvmf, "Invalid mn \"%s\": length %zu > max %zu\n",
1812 			      mn, len, max_len);
1813 		return -1;
1814 	}
1815 
1816 	if (!nvmf_valid_ascii_string(mn, len)) {
1817 		SPDK_DEBUGLOG(nvmf, "Non-ASCII mn\n");
1818 		SPDK_LOGDUMP(nvmf, "mn", mn, len);
1819 		return -1;
1820 	}
1821 
1822 	snprintf(subsystem->mn, sizeof(subsystem->mn), "%s", mn);
1823 
1824 	return 0;
1825 }
1826 
1827 const char *
1828 spdk_nvmf_subsystem_get_nqn(const struct spdk_nvmf_subsystem *subsystem)
1829 {
1830 	return subsystem->subnqn;
1831 }
1832 
1833 enum spdk_nvmf_subtype spdk_nvmf_subsystem_get_type(struct spdk_nvmf_subsystem *subsystem)
1834 {
1835 	return subsystem->subtype;
1836 }
1837 
1838 uint32_t
1839 spdk_nvmf_subsystem_get_max_nsid(struct spdk_nvmf_subsystem *subsystem)
1840 {
1841 	return subsystem->max_nsid;
1842 }
1843 
1844 int
1845 nvmf_subsystem_set_cntlid_range(struct spdk_nvmf_subsystem *subsystem,
1846 				uint16_t min_cntlid, uint16_t max_cntlid)
1847 {
1848 	if (subsystem->state != SPDK_NVMF_SUBSYSTEM_INACTIVE) {
1849 		return -EAGAIN;
1850 	}
1851 
1852 	if (min_cntlid > max_cntlid) {
1853 		return -EINVAL;
1854 	}
1855 	/* The spec reserves cntlid values in the range FFF0h to FFFFh. */
1856 	if (min_cntlid < NVMF_MIN_CNTLID || min_cntlid > NVMF_MAX_CNTLID ||
1857 	    max_cntlid < NVMF_MIN_CNTLID || max_cntlid > NVMF_MAX_CNTLID) {
1858 		return -EINVAL;
1859 	}
1860 	subsystem->min_cntlid = min_cntlid;
1861 	subsystem->max_cntlid = max_cntlid;
1862 	if (subsystem->next_cntlid < min_cntlid || subsystem->next_cntlid > max_cntlid - 1) {
1863 		subsystem->next_cntlid = min_cntlid - 1;
1864 	}
1865 
1866 	return 0;
1867 }
1868 
1869 static uint16_t
1870 nvmf_subsystem_gen_cntlid(struct spdk_nvmf_subsystem *subsystem)
1871 {
1872 	int count;
1873 
1874 	/*
1875 	 * In the worst case, we might have to try all CNTLID values between min_cntlid and max_cntlid
1876 	 * before we find one that is unused (or find that all values are in use).
1877 	 */
1878 	for (count = 0; count < subsystem->max_cntlid - subsystem->min_cntlid + 1; count++) {
1879 		subsystem->next_cntlid++;
1880 		if (subsystem->next_cntlid > subsystem->max_cntlid) {
1881 			subsystem->next_cntlid = subsystem->min_cntlid;
1882 		}
1883 
1884 		/* Check if a controller with this cntlid currently exists. */
1885 		if (nvmf_subsystem_get_ctrlr(subsystem, subsystem->next_cntlid) == NULL) {
1886 			/* Found unused cntlid */
1887 			return subsystem->next_cntlid;
1888 		}
1889 	}
1890 
1891 	/* All valid cntlid values are in use. */
1892 	return 0xFFFF;
1893 }
1894 
1895 int
1896 nvmf_subsystem_add_ctrlr(struct spdk_nvmf_subsystem *subsystem, struct spdk_nvmf_ctrlr *ctrlr)
1897 {
1898 	ctrlr->cntlid = nvmf_subsystem_gen_cntlid(subsystem);
1899 	if (ctrlr->cntlid == 0xFFFF) {
1900 		/* Unable to get a cntlid */
1901 		SPDK_ERRLOG("Reached max simultaneous ctrlrs\n");
1902 		return -EBUSY;
1903 	}
1904 
1905 	TAILQ_INSERT_TAIL(&subsystem->ctrlrs, ctrlr, link);
1906 
1907 	return 0;
1908 }
1909 
1910 void
1911 nvmf_subsystem_remove_ctrlr(struct spdk_nvmf_subsystem *subsystem,
1912 			    struct spdk_nvmf_ctrlr *ctrlr)
1913 {
1914 	assert(spdk_get_thread() == subsystem->thread);
1915 	assert(subsystem == ctrlr->subsys);
1916 	SPDK_DEBUGLOG(nvmf, "remove ctrlr %p from subsys %p %s\n", ctrlr, subsystem, subsystem->subnqn);
1917 	TAILQ_REMOVE(&subsystem->ctrlrs, ctrlr, link);
1918 }
1919 
1920 struct spdk_nvmf_ctrlr *
1921 nvmf_subsystem_get_ctrlr(struct spdk_nvmf_subsystem *subsystem, uint16_t cntlid)
1922 {
1923 	struct spdk_nvmf_ctrlr *ctrlr;
1924 
1925 	TAILQ_FOREACH(ctrlr, &subsystem->ctrlrs, link) {
1926 		if (ctrlr->cntlid == cntlid) {
1927 			return ctrlr;
1928 		}
1929 	}
1930 
1931 	return NULL;
1932 }
1933 
1934 uint32_t
1935 spdk_nvmf_subsystem_get_max_namespaces(const struct spdk_nvmf_subsystem *subsystem)
1936 {
1937 	return subsystem->max_nsid;
1938 }
1939 
1940 uint16_t
1941 spdk_nvmf_subsystem_get_min_cntlid(const struct spdk_nvmf_subsystem *subsystem)
1942 {
1943 	return subsystem->min_cntlid;
1944 }
1945 
1946 uint16_t
1947 spdk_nvmf_subsystem_get_max_cntlid(const struct spdk_nvmf_subsystem *subsystem)
1948 {
1949 	return subsystem->max_cntlid;
1950 }
1951 
1952 struct _nvmf_ns_registrant {
1953 	uint64_t		rkey;
1954 	char			*host_uuid;
1955 };
1956 
1957 struct _nvmf_ns_registrants {
1958 	size_t				num_regs;
1959 	struct _nvmf_ns_registrant	reg[SPDK_NVMF_MAX_NUM_REGISTRANTS];
1960 };
1961 
1962 struct _nvmf_ns_reservation {
1963 	bool					ptpl_activated;
1964 	enum spdk_nvme_reservation_type		rtype;
1965 	uint64_t				crkey;
1966 	char					*bdev_uuid;
1967 	char					*holder_uuid;
1968 	struct _nvmf_ns_registrants		regs;
1969 };
1970 
1971 static const struct spdk_json_object_decoder nvmf_ns_pr_reg_decoders[] = {
1972 	{"rkey", offsetof(struct _nvmf_ns_registrant, rkey), spdk_json_decode_uint64},
1973 	{"host_uuid", offsetof(struct _nvmf_ns_registrant, host_uuid), spdk_json_decode_string},
1974 };
1975 
1976 static int
1977 nvmf_decode_ns_pr_reg(const struct spdk_json_val *val, void *out)
1978 {
1979 	struct _nvmf_ns_registrant *reg = out;
1980 
1981 	return spdk_json_decode_object(val, nvmf_ns_pr_reg_decoders,
1982 				       SPDK_COUNTOF(nvmf_ns_pr_reg_decoders), reg);
1983 }
1984 
1985 static int
1986 nvmf_decode_ns_pr_regs(const struct spdk_json_val *val, void *out)
1987 {
1988 	struct _nvmf_ns_registrants *regs = out;
1989 
1990 	return spdk_json_decode_array(val, nvmf_decode_ns_pr_reg, regs->reg,
1991 				      SPDK_NVMF_MAX_NUM_REGISTRANTS, &regs->num_regs,
1992 				      sizeof(struct _nvmf_ns_registrant));
1993 }
1994 
1995 static const struct spdk_json_object_decoder nvmf_ns_pr_decoders[] = {
1996 	{"ptpl", offsetof(struct _nvmf_ns_reservation, ptpl_activated), spdk_json_decode_bool, true},
1997 	{"rtype", offsetof(struct _nvmf_ns_reservation, rtype), spdk_json_decode_uint32, true},
1998 	{"crkey", offsetof(struct _nvmf_ns_reservation, crkey), spdk_json_decode_uint64, true},
1999 	{"bdev_uuid", offsetof(struct _nvmf_ns_reservation, bdev_uuid), spdk_json_decode_string},
2000 	{"holder_uuid", offsetof(struct _nvmf_ns_reservation, holder_uuid), spdk_json_decode_string, true},
2001 	{"registrants", offsetof(struct _nvmf_ns_reservation, regs), nvmf_decode_ns_pr_regs},
2002 };
2003 
2004 static int
2005 nvmf_ns_load_reservation(const char *file, struct spdk_nvmf_reservation_info *info)
2006 {
2007 	FILE *fd;
2008 	size_t json_size;
2009 	ssize_t values_cnt, rc;
2010 	void *json = NULL, *end;
2011 	struct spdk_json_val *values = NULL;
2012 	struct _nvmf_ns_reservation res = {};
2013 	uint32_t i;
2014 
2015 	fd = fopen(file, "r");
2016 	/* It's not an error if the file does not exist */
2017 	if (!fd) {
2018 		SPDK_NOTICELOG("File %s does not exist\n", file);
2019 		return -ENOENT;
2020 	}
2021 
2022 	/* Load all persist file contents into a local buffer */
2023 	json = spdk_posix_file_load(fd, &json_size);
2024 	fclose(fd);
2025 	if (!json) {
2026 		SPDK_ERRLOG("Load persit file %s failed\n", file);
2027 		return -ENOMEM;
2028 	}
2029 
2030 	rc = spdk_json_parse(json, json_size, NULL, 0, &end, 0);
2031 	if (rc < 0) {
2032 		SPDK_NOTICELOG("Parsing JSON configuration failed (%zd)\n", rc);
2033 		goto exit;
2034 	}
2035 
2036 	values_cnt = rc;
2037 	values = calloc(values_cnt, sizeof(struct spdk_json_val));
2038 	if (values == NULL) {
2039 		goto exit;
2040 	}
2041 
2042 	rc = spdk_json_parse(json, json_size, values, values_cnt, &end, 0);
2043 	if (rc != values_cnt) {
2044 		SPDK_ERRLOG("Parsing JSON configuration failed (%zd)\n", rc);
2045 		goto exit;
2046 	}
2047 
2048 	/* Decode json */
2049 	if (spdk_json_decode_object(values, nvmf_ns_pr_decoders,
2050 				    SPDK_COUNTOF(nvmf_ns_pr_decoders),
2051 				    &res)) {
2052 		SPDK_ERRLOG("Invalid objects in the persist file %s\n", file);
2053 		rc = -EINVAL;
2054 		goto exit;
2055 	}
2056 
2057 	if (res.regs.num_regs > SPDK_NVMF_MAX_NUM_REGISTRANTS) {
2058 		SPDK_ERRLOG("Can only support up to %u registrants\n", SPDK_NVMF_MAX_NUM_REGISTRANTS);
2059 		rc = -ERANGE;
2060 		goto exit;
2061 	}
2062 
2063 	rc = 0;
2064 	info->ptpl_activated = res.ptpl_activated;
2065 	info->rtype = res.rtype;
2066 	info->crkey = res.crkey;
2067 	snprintf(info->bdev_uuid, sizeof(info->bdev_uuid), "%s", res.bdev_uuid);
2068 	snprintf(info->holder_uuid, sizeof(info->holder_uuid), "%s", res.holder_uuid);
2069 	info->num_regs = res.regs.num_regs;
2070 	for (i = 0; i < res.regs.num_regs; i++) {
2071 		info->registrants[i].rkey = res.regs.reg[i].rkey;
2072 		snprintf(info->registrants[i].host_uuid, sizeof(info->registrants[i].host_uuid), "%s",
2073 			 res.regs.reg[i].host_uuid);
2074 	}
2075 
2076 exit:
2077 	free(json);
2078 	free(values);
2079 	free(res.bdev_uuid);
2080 	free(res.holder_uuid);
2081 	for (i = 0; i < res.regs.num_regs; i++) {
2082 		free(res.regs.reg[i].host_uuid);
2083 	}
2084 
2085 	return rc;
2086 }
2087 
2088 static bool
2089 nvmf_ns_reservation_all_registrants_type(struct spdk_nvmf_ns *ns);
2090 
2091 static int
2092 nvmf_ns_reservation_restore(struct spdk_nvmf_ns *ns, struct spdk_nvmf_reservation_info *info)
2093 {
2094 	uint32_t i;
2095 	struct spdk_nvmf_registrant *reg, *holder = NULL;
2096 	struct spdk_uuid bdev_uuid, holder_uuid;
2097 	bool rkey_flag = false;
2098 
2099 	SPDK_DEBUGLOG(nvmf, "NSID %u, PTPL %u, Number of registrants %u\n",
2100 		      ns->nsid, info->ptpl_activated, info->num_regs);
2101 
2102 	/* it's not an error */
2103 	if (!info->ptpl_activated || !info->num_regs) {
2104 		return 0;
2105 	}
2106 
2107 	/* Check info->crkey exist or not in info->registrants[i].rkey */
2108 	for (i = 0; i < info->num_regs; i++) {
2109 		if (info->crkey == info->registrants[i].rkey) {
2110 			rkey_flag = true;
2111 		}
2112 	}
2113 	if (!rkey_flag) {
2114 		return -EINVAL;
2115 	}
2116 
2117 	spdk_uuid_parse(&bdev_uuid, info->bdev_uuid);
2118 	if (spdk_uuid_compare(&bdev_uuid, spdk_bdev_get_uuid(ns->bdev))) {
2119 		SPDK_ERRLOG("Existing bdev UUID is not same with configuration file\n");
2120 		return -EINVAL;
2121 	}
2122 
2123 	ns->crkey = info->crkey;
2124 	ns->rtype = info->rtype;
2125 	ns->ptpl_activated = info->ptpl_activated;
2126 	spdk_uuid_parse(&holder_uuid, info->holder_uuid);
2127 
2128 	SPDK_DEBUGLOG(nvmf, "Bdev UUID %s\n", info->bdev_uuid);
2129 	if (info->rtype) {
2130 		SPDK_DEBUGLOG(nvmf, "Holder UUID %s, RTYPE %u, RKEY 0x%"PRIx64"\n",
2131 			      info->holder_uuid, info->rtype, info->crkey);
2132 	}
2133 
2134 	for (i = 0; i < info->num_regs; i++) {
2135 		reg = calloc(1, sizeof(*reg));
2136 		if (!reg) {
2137 			return -ENOMEM;
2138 		}
2139 		spdk_uuid_parse(&reg->hostid, info->registrants[i].host_uuid);
2140 		reg->rkey = info->registrants[i].rkey;
2141 		TAILQ_INSERT_TAIL(&ns->registrants, reg, link);
2142 		if (!spdk_uuid_compare(&holder_uuid, &reg->hostid)) {
2143 			holder = reg;
2144 		}
2145 		SPDK_DEBUGLOG(nvmf, "Registrant RKEY 0x%"PRIx64", Host UUID %s\n",
2146 			      info->registrants[i].rkey, info->registrants[i].host_uuid);
2147 	}
2148 
2149 	if (nvmf_ns_reservation_all_registrants_type(ns)) {
2150 		ns->holder = TAILQ_FIRST(&ns->registrants);
2151 	} else {
2152 		ns->holder = holder;
2153 	}
2154 
2155 	return 0;
2156 }
2157 
2158 static int
2159 nvmf_ns_json_write_cb(void *cb_ctx, const void *data, size_t size)
2160 {
2161 	char *file = cb_ctx;
2162 	size_t rc;
2163 	FILE *fd;
2164 
2165 	fd = fopen(file, "w");
2166 	if (!fd) {
2167 		SPDK_ERRLOG("Can't open file %s for write\n", file);
2168 		return -ENOENT;
2169 	}
2170 	rc = fwrite(data, 1, size, fd);
2171 	fclose(fd);
2172 
2173 	return rc == size ? 0 : -1;
2174 }
2175 
2176 static int
2177 nvmf_ns_reservation_update(const char *file, struct spdk_nvmf_reservation_info *info)
2178 {
2179 	struct spdk_json_write_ctx *w;
2180 	uint32_t i;
2181 	int rc = 0;
2182 
2183 	w = spdk_json_write_begin(nvmf_ns_json_write_cb, (void *)file, 0);
2184 	if (w == NULL) {
2185 		return -ENOMEM;
2186 	}
2187 	/* clear the configuration file */
2188 	if (!info->ptpl_activated) {
2189 		goto exit;
2190 	}
2191 
2192 	spdk_json_write_object_begin(w);
2193 	spdk_json_write_named_bool(w, "ptpl", info->ptpl_activated);
2194 	spdk_json_write_named_uint32(w, "rtype", info->rtype);
2195 	spdk_json_write_named_uint64(w, "crkey", info->crkey);
2196 	spdk_json_write_named_string(w, "bdev_uuid", info->bdev_uuid);
2197 	spdk_json_write_named_string(w, "holder_uuid", info->holder_uuid);
2198 
2199 	spdk_json_write_named_array_begin(w, "registrants");
2200 	for (i = 0; i < info->num_regs; i++) {
2201 		spdk_json_write_object_begin(w);
2202 		spdk_json_write_named_uint64(w, "rkey", info->registrants[i].rkey);
2203 		spdk_json_write_named_string(w, "host_uuid", info->registrants[i].host_uuid);
2204 		spdk_json_write_object_end(w);
2205 	}
2206 	spdk_json_write_array_end(w);
2207 	spdk_json_write_object_end(w);
2208 
2209 exit:
2210 	rc = spdk_json_write_end(w);
2211 	return rc;
2212 }
2213 
2214 static int
2215 nvmf_ns_update_reservation_info(struct spdk_nvmf_ns *ns)
2216 {
2217 	struct spdk_nvmf_reservation_info info;
2218 	struct spdk_nvmf_registrant *reg, *tmp;
2219 	uint32_t i = 0;
2220 
2221 	assert(ns != NULL);
2222 
2223 	if (!ns->bdev || !ns->ptpl_file) {
2224 		return 0;
2225 	}
2226 
2227 	memset(&info, 0, sizeof(info));
2228 	spdk_uuid_fmt_lower(info.bdev_uuid, sizeof(info.bdev_uuid), spdk_bdev_get_uuid(ns->bdev));
2229 
2230 	if (ns->rtype) {
2231 		info.rtype = ns->rtype;
2232 		info.crkey = ns->crkey;
2233 		if (!nvmf_ns_reservation_all_registrants_type(ns)) {
2234 			assert(ns->holder != NULL);
2235 			spdk_uuid_fmt_lower(info.holder_uuid, sizeof(info.holder_uuid), &ns->holder->hostid);
2236 		}
2237 	}
2238 
2239 	TAILQ_FOREACH_SAFE(reg, &ns->registrants, link, tmp) {
2240 		spdk_uuid_fmt_lower(info.registrants[i].host_uuid, sizeof(info.registrants[i].host_uuid),
2241 				    &reg->hostid);
2242 		info.registrants[i++].rkey = reg->rkey;
2243 	}
2244 
2245 	info.num_regs = i;
2246 	info.ptpl_activated = ns->ptpl_activated;
2247 
2248 	return nvmf_ns_reservation_update(ns->ptpl_file, &info);
2249 }
2250 
2251 static struct spdk_nvmf_registrant *
2252 nvmf_ns_reservation_get_registrant(struct spdk_nvmf_ns *ns,
2253 				   struct spdk_uuid *uuid)
2254 {
2255 	struct spdk_nvmf_registrant *reg, *tmp;
2256 
2257 	TAILQ_FOREACH_SAFE(reg, &ns->registrants, link, tmp) {
2258 		if (!spdk_uuid_compare(&reg->hostid, uuid)) {
2259 			return reg;
2260 		}
2261 	}
2262 
2263 	return NULL;
2264 }
2265 
2266 /* Generate reservation notice log to registered HostID controllers */
2267 static void
2268 nvmf_subsystem_gen_ctrlr_notification(struct spdk_nvmf_subsystem *subsystem,
2269 				      struct spdk_nvmf_ns *ns,
2270 				      struct spdk_uuid *hostid_list,
2271 				      uint32_t num_hostid,
2272 				      enum spdk_nvme_reservation_notification_log_page_type type)
2273 {
2274 	struct spdk_nvmf_ctrlr *ctrlr;
2275 	uint32_t i;
2276 
2277 	for (i = 0; i < num_hostid; i++) {
2278 		TAILQ_FOREACH(ctrlr, &subsystem->ctrlrs, link) {
2279 			if (!spdk_uuid_compare(&ctrlr->hostid, &hostid_list[i])) {
2280 				nvmf_ctrlr_reservation_notice_log(ctrlr, ns, type);
2281 			}
2282 		}
2283 	}
2284 }
2285 
2286 /* Get all registrants' hostid other than the controller who issued the command */
2287 static uint32_t
2288 nvmf_ns_reservation_get_all_other_hostid(struct spdk_nvmf_ns *ns,
2289 		struct spdk_uuid *hostid_list,
2290 		uint32_t max_num_hostid,
2291 		struct spdk_uuid *current_hostid)
2292 {
2293 	struct spdk_nvmf_registrant *reg, *tmp;
2294 	uint32_t num_hostid = 0;
2295 
2296 	TAILQ_FOREACH_SAFE(reg, &ns->registrants, link, tmp) {
2297 		if (spdk_uuid_compare(&reg->hostid, current_hostid)) {
2298 			if (num_hostid == max_num_hostid) {
2299 				assert(false);
2300 				return max_num_hostid;
2301 			}
2302 			hostid_list[num_hostid++] = reg->hostid;
2303 		}
2304 	}
2305 
2306 	return num_hostid;
2307 }
2308 
2309 /* Calculate the unregistered HostID list according to list
2310  * prior to execute preempt command and list after executing
2311  * preempt command.
2312  */
2313 static uint32_t
2314 nvmf_ns_reservation_get_unregistered_hostid(struct spdk_uuid *old_hostid_list,
2315 		uint32_t old_num_hostid,
2316 		struct spdk_uuid *remaining_hostid_list,
2317 		uint32_t remaining_num_hostid)
2318 {
2319 	struct spdk_uuid temp_hostid_list[SPDK_NVMF_MAX_NUM_REGISTRANTS];
2320 	uint32_t i, j, num_hostid = 0;
2321 	bool found;
2322 
2323 	if (!remaining_num_hostid) {
2324 		return old_num_hostid;
2325 	}
2326 
2327 	for (i = 0; i < old_num_hostid; i++) {
2328 		found = false;
2329 		for (j = 0; j < remaining_num_hostid; j++) {
2330 			if (!spdk_uuid_compare(&old_hostid_list[i], &remaining_hostid_list[j])) {
2331 				found = true;
2332 				break;
2333 			}
2334 		}
2335 		if (!found) {
2336 			spdk_uuid_copy(&temp_hostid_list[num_hostid++], &old_hostid_list[i]);
2337 		}
2338 	}
2339 
2340 	if (num_hostid) {
2341 		memcpy(old_hostid_list, temp_hostid_list, sizeof(struct spdk_uuid) * num_hostid);
2342 	}
2343 
2344 	return num_hostid;
2345 }
2346 
2347 /* current reservation type is all registrants or not */
2348 static bool
2349 nvmf_ns_reservation_all_registrants_type(struct spdk_nvmf_ns *ns)
2350 {
2351 	return (ns->rtype == SPDK_NVME_RESERVE_WRITE_EXCLUSIVE_ALL_REGS ||
2352 		ns->rtype == SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS_ALL_REGS);
2353 }
2354 
2355 /* current registrant is reservation holder or not */
2356 static bool
2357 nvmf_ns_reservation_registrant_is_holder(struct spdk_nvmf_ns *ns,
2358 		struct spdk_nvmf_registrant *reg)
2359 {
2360 	if (!reg) {
2361 		return false;
2362 	}
2363 
2364 	if (nvmf_ns_reservation_all_registrants_type(ns)) {
2365 		return true;
2366 	}
2367 
2368 	return (ns->holder == reg);
2369 }
2370 
2371 static int
2372 nvmf_ns_reservation_add_registrant(struct spdk_nvmf_ns *ns,
2373 				   struct spdk_nvmf_ctrlr *ctrlr,
2374 				   uint64_t nrkey)
2375 {
2376 	struct spdk_nvmf_registrant *reg;
2377 
2378 	reg = calloc(1, sizeof(*reg));
2379 	if (!reg) {
2380 		return -ENOMEM;
2381 	}
2382 
2383 	reg->rkey = nrkey;
2384 	/* set hostid for the registrant */
2385 	spdk_uuid_copy(&reg->hostid, &ctrlr->hostid);
2386 	TAILQ_INSERT_TAIL(&ns->registrants, reg, link);
2387 	ns->gen++;
2388 
2389 	return 0;
2390 }
2391 
2392 static void
2393 nvmf_ns_reservation_release_reservation(struct spdk_nvmf_ns *ns)
2394 {
2395 	ns->rtype = 0;
2396 	ns->crkey = 0;
2397 	ns->holder = NULL;
2398 }
2399 
2400 /* release the reservation if the last registrant was removed */
2401 static void
2402 nvmf_ns_reservation_check_release_on_remove_registrant(struct spdk_nvmf_ns *ns,
2403 		struct spdk_nvmf_registrant *reg)
2404 {
2405 	struct spdk_nvmf_registrant *next_reg;
2406 
2407 	/* no reservation holder */
2408 	if (!ns->holder) {
2409 		assert(ns->rtype == 0);
2410 		return;
2411 	}
2412 
2413 	next_reg = TAILQ_FIRST(&ns->registrants);
2414 	if (next_reg && nvmf_ns_reservation_all_registrants_type(ns)) {
2415 		/* the next valid registrant is the new holder now */
2416 		ns->holder = next_reg;
2417 	} else if (nvmf_ns_reservation_registrant_is_holder(ns, reg)) {
2418 		/* release the reservation */
2419 		nvmf_ns_reservation_release_reservation(ns);
2420 	}
2421 }
2422 
2423 static void
2424 nvmf_ns_reservation_remove_registrant(struct spdk_nvmf_ns *ns,
2425 				      struct spdk_nvmf_registrant *reg)
2426 {
2427 	TAILQ_REMOVE(&ns->registrants, reg, link);
2428 	nvmf_ns_reservation_check_release_on_remove_registrant(ns, reg);
2429 	free(reg);
2430 	ns->gen++;
2431 	return;
2432 }
2433 
2434 static uint32_t
2435 nvmf_ns_reservation_remove_registrants_by_key(struct spdk_nvmf_ns *ns,
2436 		uint64_t rkey)
2437 {
2438 	struct spdk_nvmf_registrant *reg, *tmp;
2439 	uint32_t count = 0;
2440 
2441 	TAILQ_FOREACH_SAFE(reg, &ns->registrants, link, tmp) {
2442 		if (reg->rkey == rkey) {
2443 			nvmf_ns_reservation_remove_registrant(ns, reg);
2444 			count++;
2445 		}
2446 	}
2447 	return count;
2448 }
2449 
2450 static uint32_t
2451 nvmf_ns_reservation_remove_all_other_registrants(struct spdk_nvmf_ns *ns,
2452 		struct spdk_nvmf_registrant *reg)
2453 {
2454 	struct spdk_nvmf_registrant *reg_tmp, *reg_tmp2;
2455 	uint32_t count = 0;
2456 
2457 	TAILQ_FOREACH_SAFE(reg_tmp, &ns->registrants, link, reg_tmp2) {
2458 		if (reg_tmp != reg) {
2459 			nvmf_ns_reservation_remove_registrant(ns, reg_tmp);
2460 			count++;
2461 		}
2462 	}
2463 	return count;
2464 }
2465 
2466 static uint32_t
2467 nvmf_ns_reservation_clear_all_registrants(struct spdk_nvmf_ns *ns)
2468 {
2469 	struct spdk_nvmf_registrant *reg, *reg_tmp;
2470 	uint32_t count = 0;
2471 
2472 	TAILQ_FOREACH_SAFE(reg, &ns->registrants, link, reg_tmp) {
2473 		nvmf_ns_reservation_remove_registrant(ns, reg);
2474 		count++;
2475 	}
2476 	return count;
2477 }
2478 
2479 static void
2480 nvmf_ns_reservation_acquire_reservation(struct spdk_nvmf_ns *ns, uint64_t rkey,
2481 					enum spdk_nvme_reservation_type rtype,
2482 					struct spdk_nvmf_registrant *holder)
2483 {
2484 	ns->rtype = rtype;
2485 	ns->crkey = rkey;
2486 	assert(ns->holder == NULL);
2487 	ns->holder = holder;
2488 }
2489 
2490 static bool
2491 nvmf_ns_reservation_register(struct spdk_nvmf_ns *ns,
2492 			     struct spdk_nvmf_ctrlr *ctrlr,
2493 			     struct spdk_nvmf_request *req)
2494 {
2495 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
2496 	uint8_t rrega, iekey, cptpl, rtype;
2497 	struct spdk_nvme_reservation_register_data key;
2498 	struct spdk_nvmf_registrant *reg;
2499 	uint8_t status = SPDK_NVME_SC_SUCCESS;
2500 	bool update_sgroup = false;
2501 	struct spdk_uuid hostid_list[SPDK_NVMF_MAX_NUM_REGISTRANTS];
2502 	uint32_t num_hostid = 0;
2503 	int rc;
2504 
2505 	rrega = cmd->cdw10_bits.resv_register.rrega;
2506 	iekey = cmd->cdw10_bits.resv_register.iekey;
2507 	cptpl = cmd->cdw10_bits.resv_register.cptpl;
2508 
2509 	if (req->data && req->length >= sizeof(key)) {
2510 		memcpy(&key, req->data, sizeof(key));
2511 	} else {
2512 		SPDK_ERRLOG("No key provided. Failing request.\n");
2513 		status = SPDK_NVME_SC_INVALID_FIELD;
2514 		goto exit;
2515 	}
2516 
2517 	SPDK_DEBUGLOG(nvmf, "REGISTER: RREGA %u, IEKEY %u, CPTPL %u, "
2518 		      "NRKEY 0x%"PRIx64", NRKEY 0x%"PRIx64"\n",
2519 		      rrega, iekey, cptpl, key.crkey, key.nrkey);
2520 
2521 	if (cptpl == SPDK_NVME_RESERVE_PTPL_CLEAR_POWER_ON) {
2522 		/* Ture to OFF state, and need to be updated in the configuration file */
2523 		if (ns->ptpl_activated) {
2524 			ns->ptpl_activated = 0;
2525 			update_sgroup = true;
2526 		}
2527 	} else if (cptpl == SPDK_NVME_RESERVE_PTPL_PERSIST_POWER_LOSS) {
2528 		if (ns->ptpl_file == NULL) {
2529 			status = SPDK_NVME_SC_INVALID_FIELD;
2530 			goto exit;
2531 		} else if (ns->ptpl_activated == 0) {
2532 			ns->ptpl_activated = 1;
2533 			update_sgroup = true;
2534 		}
2535 	}
2536 
2537 	/* current Host Identifier has registrant or not */
2538 	reg = nvmf_ns_reservation_get_registrant(ns, &ctrlr->hostid);
2539 
2540 	switch (rrega) {
2541 	case SPDK_NVME_RESERVE_REGISTER_KEY:
2542 		if (!reg) {
2543 			/* register new controller */
2544 			if (key.nrkey == 0) {
2545 				SPDK_ERRLOG("Can't register zeroed new key\n");
2546 				status = SPDK_NVME_SC_INVALID_FIELD;
2547 				goto exit;
2548 			}
2549 			rc = nvmf_ns_reservation_add_registrant(ns, ctrlr, key.nrkey);
2550 			if (rc < 0) {
2551 				status = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
2552 				goto exit;
2553 			}
2554 			update_sgroup = true;
2555 		} else {
2556 			/* register with same key is not an error */
2557 			if (reg->rkey != key.nrkey) {
2558 				SPDK_ERRLOG("The same host already register a "
2559 					    "key with 0x%"PRIx64"\n",
2560 					    reg->rkey);
2561 				status = SPDK_NVME_SC_RESERVATION_CONFLICT;
2562 				goto exit;
2563 			}
2564 		}
2565 		break;
2566 	case SPDK_NVME_RESERVE_UNREGISTER_KEY:
2567 		if (!reg || (!iekey && reg->rkey != key.crkey)) {
2568 			SPDK_ERRLOG("No registrant or current key doesn't match "
2569 				    "with existing registrant key\n");
2570 			status = SPDK_NVME_SC_RESERVATION_CONFLICT;
2571 			goto exit;
2572 		}
2573 
2574 		rtype = ns->rtype;
2575 		num_hostid = nvmf_ns_reservation_get_all_other_hostid(ns, hostid_list,
2576 				SPDK_NVMF_MAX_NUM_REGISTRANTS,
2577 				&ctrlr->hostid);
2578 
2579 		nvmf_ns_reservation_remove_registrant(ns, reg);
2580 
2581 		if (!ns->rtype && num_hostid && (rtype == SPDK_NVME_RESERVE_WRITE_EXCLUSIVE_REG_ONLY ||
2582 						 rtype == SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS_REG_ONLY)) {
2583 			nvmf_subsystem_gen_ctrlr_notification(ns->subsystem, ns,
2584 							      hostid_list,
2585 							      num_hostid,
2586 							      SPDK_NVME_RESERVATION_RELEASED);
2587 		}
2588 		update_sgroup = true;
2589 		break;
2590 	case SPDK_NVME_RESERVE_REPLACE_KEY:
2591 		if (key.nrkey == 0) {
2592 			SPDK_ERRLOG("Can't register zeroed new key\n");
2593 			status = SPDK_NVME_SC_INVALID_FIELD;
2594 			goto exit;
2595 		}
2596 		/* Registrant exists */
2597 		if (reg) {
2598 			if (!iekey && reg->rkey != key.crkey) {
2599 				SPDK_ERRLOG("Current key doesn't match "
2600 					    "existing registrant key\n");
2601 				status = SPDK_NVME_SC_RESERVATION_CONFLICT;
2602 				goto exit;
2603 			}
2604 			if (reg->rkey == key.nrkey) {
2605 				goto exit;
2606 			}
2607 			reg->rkey = key.nrkey;
2608 		} else if (iekey) { /* No registrant but IEKEY is set */
2609 			/* new registrant */
2610 			rc = nvmf_ns_reservation_add_registrant(ns, ctrlr, key.nrkey);
2611 			if (rc < 0) {
2612 				status = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
2613 				goto exit;
2614 			}
2615 		} else { /* No registrant */
2616 			SPDK_ERRLOG("No registrant\n");
2617 			status = SPDK_NVME_SC_RESERVATION_CONFLICT;
2618 			goto exit;
2619 
2620 		}
2621 		update_sgroup = true;
2622 		break;
2623 	default:
2624 		status = SPDK_NVME_SC_INVALID_FIELD;
2625 		goto exit;
2626 	}
2627 
2628 exit:
2629 	if (update_sgroup) {
2630 		rc = nvmf_ns_update_reservation_info(ns);
2631 		if (rc != 0) {
2632 			status = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
2633 		}
2634 	}
2635 	req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
2636 	req->rsp->nvme_cpl.status.sc = status;
2637 	return update_sgroup;
2638 }
2639 
2640 static bool
2641 nvmf_ns_reservation_acquire(struct spdk_nvmf_ns *ns,
2642 			    struct spdk_nvmf_ctrlr *ctrlr,
2643 			    struct spdk_nvmf_request *req)
2644 {
2645 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
2646 	uint8_t racqa, iekey, rtype;
2647 	struct spdk_nvme_reservation_acquire_data key;
2648 	struct spdk_nvmf_registrant *reg;
2649 	bool all_regs = false;
2650 	uint32_t count = 0;
2651 	bool update_sgroup = true;
2652 	struct spdk_uuid hostid_list[SPDK_NVMF_MAX_NUM_REGISTRANTS];
2653 	uint32_t num_hostid = 0;
2654 	struct spdk_uuid new_hostid_list[SPDK_NVMF_MAX_NUM_REGISTRANTS];
2655 	uint32_t new_num_hostid = 0;
2656 	bool reservation_released = false;
2657 	uint8_t status = SPDK_NVME_SC_SUCCESS;
2658 
2659 	racqa = cmd->cdw10_bits.resv_acquire.racqa;
2660 	iekey = cmd->cdw10_bits.resv_acquire.iekey;
2661 	rtype = cmd->cdw10_bits.resv_acquire.rtype;
2662 
2663 	if (req->data && req->length >= sizeof(key)) {
2664 		memcpy(&key, req->data, sizeof(key));
2665 	} else {
2666 		SPDK_ERRLOG("No key provided. Failing request.\n");
2667 		status = SPDK_NVME_SC_INVALID_FIELD;
2668 		goto exit;
2669 	}
2670 
2671 	SPDK_DEBUGLOG(nvmf, "ACQUIRE: RACQA %u, IEKEY %u, RTYPE %u, "
2672 		      "NRKEY 0x%"PRIx64", PRKEY 0x%"PRIx64"\n",
2673 		      racqa, iekey, rtype, key.crkey, key.prkey);
2674 
2675 	if (iekey || rtype > SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS_ALL_REGS) {
2676 		SPDK_ERRLOG("Ignore existing key field set to 1\n");
2677 		status = SPDK_NVME_SC_INVALID_FIELD;
2678 		update_sgroup = false;
2679 		goto exit;
2680 	}
2681 
2682 	reg = nvmf_ns_reservation_get_registrant(ns, &ctrlr->hostid);
2683 	/* must be registrant and CRKEY must match */
2684 	if (!reg || reg->rkey != key.crkey) {
2685 		SPDK_ERRLOG("No registrant or current key doesn't match "
2686 			    "with existing registrant key\n");
2687 		status = SPDK_NVME_SC_RESERVATION_CONFLICT;
2688 		update_sgroup = false;
2689 		goto exit;
2690 	}
2691 
2692 	all_regs = nvmf_ns_reservation_all_registrants_type(ns);
2693 
2694 	switch (racqa) {
2695 	case SPDK_NVME_RESERVE_ACQUIRE:
2696 		/* it's not an error for the holder to acquire same reservation type again */
2697 		if (nvmf_ns_reservation_registrant_is_holder(ns, reg) && ns->rtype == rtype) {
2698 			/* do nothing */
2699 			update_sgroup = false;
2700 		} else if (ns->holder == NULL) {
2701 			/* fisrt time to acquire the reservation */
2702 			nvmf_ns_reservation_acquire_reservation(ns, key.crkey, rtype, reg);
2703 		} else {
2704 			SPDK_ERRLOG("Invalid rtype or current registrant is not holder\n");
2705 			status = SPDK_NVME_SC_RESERVATION_CONFLICT;
2706 			update_sgroup = false;
2707 			goto exit;
2708 		}
2709 		break;
2710 	case SPDK_NVME_RESERVE_PREEMPT:
2711 		/* no reservation holder */
2712 		if (!ns->holder) {
2713 			/* unregister with PRKEY */
2714 			nvmf_ns_reservation_remove_registrants_by_key(ns, key.prkey);
2715 			break;
2716 		}
2717 		num_hostid = nvmf_ns_reservation_get_all_other_hostid(ns, hostid_list,
2718 				SPDK_NVMF_MAX_NUM_REGISTRANTS,
2719 				&ctrlr->hostid);
2720 
2721 		/* only 1 reservation holder and reservation key is valid */
2722 		if (!all_regs) {
2723 			/* preempt itself */
2724 			if (nvmf_ns_reservation_registrant_is_holder(ns, reg) &&
2725 			    ns->crkey == key.prkey) {
2726 				ns->rtype = rtype;
2727 				reservation_released = true;
2728 				break;
2729 			}
2730 
2731 			if (ns->crkey == key.prkey) {
2732 				nvmf_ns_reservation_remove_registrant(ns, ns->holder);
2733 				nvmf_ns_reservation_acquire_reservation(ns, key.crkey, rtype, reg);
2734 				reservation_released = true;
2735 			} else if (key.prkey != 0) {
2736 				nvmf_ns_reservation_remove_registrants_by_key(ns, key.prkey);
2737 			} else {
2738 				/* PRKEY is zero */
2739 				SPDK_ERRLOG("Current PRKEY is zero\n");
2740 				status = SPDK_NVME_SC_RESERVATION_CONFLICT;
2741 				update_sgroup = false;
2742 				goto exit;
2743 			}
2744 		} else {
2745 			/* release all other registrants except for the current one */
2746 			if (key.prkey == 0) {
2747 				nvmf_ns_reservation_remove_all_other_registrants(ns, reg);
2748 				assert(ns->holder == reg);
2749 			} else {
2750 				count = nvmf_ns_reservation_remove_registrants_by_key(ns, key.prkey);
2751 				if (count == 0) {
2752 					SPDK_ERRLOG("PRKEY doesn't match any registrant\n");
2753 					status = SPDK_NVME_SC_RESERVATION_CONFLICT;
2754 					update_sgroup = false;
2755 					goto exit;
2756 				}
2757 			}
2758 		}
2759 		break;
2760 	default:
2761 		status = SPDK_NVME_SC_INVALID_FIELD;
2762 		update_sgroup = false;
2763 		break;
2764 	}
2765 
2766 exit:
2767 	if (update_sgroup && racqa == SPDK_NVME_RESERVE_PREEMPT) {
2768 		new_num_hostid = nvmf_ns_reservation_get_all_other_hostid(ns, new_hostid_list,
2769 				 SPDK_NVMF_MAX_NUM_REGISTRANTS,
2770 				 &ctrlr->hostid);
2771 		/* Preempt notification occurs on the unregistered controllers
2772 		 * other than the controller who issued the command.
2773 		 */
2774 		num_hostid = nvmf_ns_reservation_get_unregistered_hostid(hostid_list,
2775 				num_hostid,
2776 				new_hostid_list,
2777 				new_num_hostid);
2778 		if (num_hostid) {
2779 			nvmf_subsystem_gen_ctrlr_notification(ns->subsystem, ns,
2780 							      hostid_list,
2781 							      num_hostid,
2782 							      SPDK_NVME_REGISTRATION_PREEMPTED);
2783 
2784 		}
2785 		/* Reservation released notification occurs on the
2786 		 * controllers which are the remaining registrants other than
2787 		 * the controller who issued the command.
2788 		 */
2789 		if (reservation_released && new_num_hostid) {
2790 			nvmf_subsystem_gen_ctrlr_notification(ns->subsystem, ns,
2791 							      new_hostid_list,
2792 							      new_num_hostid,
2793 							      SPDK_NVME_RESERVATION_RELEASED);
2794 
2795 		}
2796 	}
2797 	if (update_sgroup && ns->ptpl_activated) {
2798 		if (nvmf_ns_update_reservation_info(ns)) {
2799 			status = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
2800 		}
2801 	}
2802 	req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
2803 	req->rsp->nvme_cpl.status.sc = status;
2804 	return update_sgroup;
2805 }
2806 
2807 static bool
2808 nvmf_ns_reservation_release(struct spdk_nvmf_ns *ns,
2809 			    struct spdk_nvmf_ctrlr *ctrlr,
2810 			    struct spdk_nvmf_request *req)
2811 {
2812 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
2813 	uint8_t rrela, iekey, rtype;
2814 	struct spdk_nvmf_registrant *reg;
2815 	uint64_t crkey;
2816 	uint8_t status = SPDK_NVME_SC_SUCCESS;
2817 	bool update_sgroup = true;
2818 	struct spdk_uuid hostid_list[SPDK_NVMF_MAX_NUM_REGISTRANTS];
2819 	uint32_t num_hostid = 0;
2820 
2821 	rrela = cmd->cdw10_bits.resv_release.rrela;
2822 	iekey = cmd->cdw10_bits.resv_release.iekey;
2823 	rtype = cmd->cdw10_bits.resv_release.rtype;
2824 
2825 	if (req->data && req->length >= sizeof(crkey)) {
2826 		memcpy(&crkey, req->data, sizeof(crkey));
2827 	} else {
2828 		SPDK_ERRLOG("No key provided. Failing request.\n");
2829 		status = SPDK_NVME_SC_INVALID_FIELD;
2830 		goto exit;
2831 	}
2832 
2833 	SPDK_DEBUGLOG(nvmf, "RELEASE: RRELA %u, IEKEY %u, RTYPE %u, "
2834 		      "CRKEY 0x%"PRIx64"\n",  rrela, iekey, rtype, crkey);
2835 
2836 	if (iekey) {
2837 		SPDK_ERRLOG("Ignore existing key field set to 1\n");
2838 		status = SPDK_NVME_SC_INVALID_FIELD;
2839 		update_sgroup = false;
2840 		goto exit;
2841 	}
2842 
2843 	reg = nvmf_ns_reservation_get_registrant(ns, &ctrlr->hostid);
2844 	if (!reg || reg->rkey != crkey) {
2845 		SPDK_ERRLOG("No registrant or current key doesn't match "
2846 			    "with existing registrant key\n");
2847 		status = SPDK_NVME_SC_RESERVATION_CONFLICT;
2848 		update_sgroup = false;
2849 		goto exit;
2850 	}
2851 
2852 	num_hostid = nvmf_ns_reservation_get_all_other_hostid(ns, hostid_list,
2853 			SPDK_NVMF_MAX_NUM_REGISTRANTS,
2854 			&ctrlr->hostid);
2855 
2856 	switch (rrela) {
2857 	case SPDK_NVME_RESERVE_RELEASE:
2858 		if (!ns->holder) {
2859 			SPDK_DEBUGLOG(nvmf, "RELEASE: no holder\n");
2860 			update_sgroup = false;
2861 			goto exit;
2862 		}
2863 		if (ns->rtype != rtype) {
2864 			SPDK_ERRLOG("Type doesn't match\n");
2865 			status = SPDK_NVME_SC_INVALID_FIELD;
2866 			update_sgroup = false;
2867 			goto exit;
2868 		}
2869 		if (!nvmf_ns_reservation_registrant_is_holder(ns, reg)) {
2870 			/* not the reservation holder, this isn't an error */
2871 			update_sgroup = false;
2872 			goto exit;
2873 		}
2874 
2875 		rtype = ns->rtype;
2876 		nvmf_ns_reservation_release_reservation(ns);
2877 
2878 		if (num_hostid && rtype != SPDK_NVME_RESERVE_WRITE_EXCLUSIVE &&
2879 		    rtype != SPDK_NVME_RESERVE_EXCLUSIVE_ACCESS) {
2880 			nvmf_subsystem_gen_ctrlr_notification(ns->subsystem, ns,
2881 							      hostid_list,
2882 							      num_hostid,
2883 							      SPDK_NVME_RESERVATION_RELEASED);
2884 		}
2885 		break;
2886 	case SPDK_NVME_RESERVE_CLEAR:
2887 		nvmf_ns_reservation_clear_all_registrants(ns);
2888 		if (num_hostid) {
2889 			nvmf_subsystem_gen_ctrlr_notification(ns->subsystem, ns,
2890 							      hostid_list,
2891 							      num_hostid,
2892 							      SPDK_NVME_RESERVATION_PREEMPTED);
2893 		}
2894 		break;
2895 	default:
2896 		status = SPDK_NVME_SC_INVALID_FIELD;
2897 		update_sgroup = false;
2898 		goto exit;
2899 	}
2900 
2901 exit:
2902 	if (update_sgroup && ns->ptpl_activated) {
2903 		if (nvmf_ns_update_reservation_info(ns)) {
2904 			status = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
2905 		}
2906 	}
2907 	req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
2908 	req->rsp->nvme_cpl.status.sc = status;
2909 	return update_sgroup;
2910 }
2911 
2912 static void
2913 nvmf_ns_reservation_report(struct spdk_nvmf_ns *ns,
2914 			   struct spdk_nvmf_ctrlr *ctrlr,
2915 			   struct spdk_nvmf_request *req)
2916 {
2917 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
2918 	struct spdk_nvmf_registrant *reg, *tmp;
2919 	struct spdk_nvme_reservation_status_extended_data *status_data;
2920 	struct spdk_nvme_registered_ctrlr_extended_data *ctrlr_data;
2921 	uint8_t *payload;
2922 	uint32_t transfer_len, payload_len = 0;
2923 	uint32_t regctl = 0;
2924 	uint8_t status = SPDK_NVME_SC_SUCCESS;
2925 
2926 	if (req->data == NULL) {
2927 		SPDK_ERRLOG("No data transfer specified for request. "
2928 			    " Unable to transfer back response.\n");
2929 		status = SPDK_NVME_SC_INVALID_FIELD;
2930 		goto exit;
2931 	}
2932 
2933 	if (!cmd->cdw11_bits.resv_report.eds) {
2934 		SPDK_ERRLOG("NVMeoF uses extended controller data structure, "
2935 			    "please set EDS bit in cdw11 and try again\n");
2936 		status = SPDK_NVME_SC_HOSTID_INCONSISTENT_FORMAT;
2937 		goto exit;
2938 	}
2939 
2940 	/* Number of Dwords of the Reservation Status data structure to transfer */
2941 	transfer_len = (cmd->cdw10 + 1) * sizeof(uint32_t);
2942 	payload = req->data;
2943 
2944 	if (transfer_len < sizeof(struct spdk_nvme_reservation_status_extended_data)) {
2945 		status = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
2946 		goto exit;
2947 	}
2948 
2949 	status_data = (struct spdk_nvme_reservation_status_extended_data *)payload;
2950 	status_data->data.gen = ns->gen;
2951 	status_data->data.rtype = ns->rtype;
2952 	status_data->data.ptpls = ns->ptpl_activated;
2953 	payload_len += sizeof(struct spdk_nvme_reservation_status_extended_data);
2954 
2955 	TAILQ_FOREACH_SAFE(reg, &ns->registrants, link, tmp) {
2956 		payload_len += sizeof(struct spdk_nvme_registered_ctrlr_extended_data);
2957 		if (payload_len > transfer_len) {
2958 			break;
2959 		}
2960 
2961 		ctrlr_data = (struct spdk_nvme_registered_ctrlr_extended_data *)
2962 			     (payload + sizeof(*status_data) + sizeof(*ctrlr_data) * regctl);
2963 		/* Set to 0xffffh for dynamic controller */
2964 		ctrlr_data->cntlid = 0xffff;
2965 		ctrlr_data->rcsts.status = (ns->holder == reg) ? true : false;
2966 		ctrlr_data->rkey = reg->rkey;
2967 		spdk_uuid_copy((struct spdk_uuid *)ctrlr_data->hostid, &reg->hostid);
2968 		regctl++;
2969 	}
2970 	status_data->data.regctl = regctl;
2971 
2972 exit:
2973 	req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
2974 	req->rsp->nvme_cpl.status.sc = status;
2975 	return;
2976 }
2977 
2978 static void
2979 nvmf_ns_reservation_complete(void *ctx)
2980 {
2981 	struct spdk_nvmf_request *req = ctx;
2982 
2983 	spdk_nvmf_request_complete(req);
2984 }
2985 
2986 static void
2987 _nvmf_ns_reservation_update_done(struct spdk_nvmf_subsystem *subsystem,
2988 				 void *cb_arg, int status)
2989 {
2990 	struct spdk_nvmf_request *req = (struct spdk_nvmf_request *)cb_arg;
2991 	struct spdk_nvmf_poll_group *group = req->qpair->group;
2992 
2993 	spdk_thread_send_msg(group->thread, nvmf_ns_reservation_complete, req);
2994 }
2995 
2996 void
2997 nvmf_ns_reservation_request(void *ctx)
2998 {
2999 	struct spdk_nvmf_request *req = (struct spdk_nvmf_request *)ctx;
3000 	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
3001 	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
3002 	struct subsystem_update_ns_ctx *update_ctx;
3003 	uint32_t nsid;
3004 	struct spdk_nvmf_ns *ns;
3005 	bool update_sgroup = false;
3006 
3007 	nsid = cmd->nsid;
3008 	ns = _nvmf_subsystem_get_ns(ctrlr->subsys, nsid);
3009 	assert(ns != NULL);
3010 
3011 	switch (cmd->opc) {
3012 	case SPDK_NVME_OPC_RESERVATION_REGISTER:
3013 		update_sgroup = nvmf_ns_reservation_register(ns, ctrlr, req);
3014 		break;
3015 	case SPDK_NVME_OPC_RESERVATION_ACQUIRE:
3016 		update_sgroup = nvmf_ns_reservation_acquire(ns, ctrlr, req);
3017 		break;
3018 	case SPDK_NVME_OPC_RESERVATION_RELEASE:
3019 		update_sgroup = nvmf_ns_reservation_release(ns, ctrlr, req);
3020 		break;
3021 	case SPDK_NVME_OPC_RESERVATION_REPORT:
3022 		nvmf_ns_reservation_report(ns, ctrlr, req);
3023 		break;
3024 	default:
3025 		break;
3026 	}
3027 
3028 	/* update reservation information to subsystem's poll group */
3029 	if (update_sgroup) {
3030 		update_ctx = calloc(1, sizeof(*update_ctx));
3031 		if (update_ctx == NULL) {
3032 			SPDK_ERRLOG("Can't alloc subsystem poll group update context\n");
3033 			goto update_done;
3034 		}
3035 		update_ctx->subsystem = ctrlr->subsys;
3036 		update_ctx->cb_fn = _nvmf_ns_reservation_update_done;
3037 		update_ctx->cb_arg = req;
3038 
3039 		nvmf_subsystem_update_ns(ctrlr->subsys, subsystem_update_ns_done, update_ctx);
3040 		return;
3041 	}
3042 
3043 update_done:
3044 	_nvmf_ns_reservation_update_done(ctrlr->subsys, (void *)req, 0);
3045 }
3046 
3047 int
3048 spdk_nvmf_subsystem_set_ana_reporting(struct spdk_nvmf_subsystem *subsystem,
3049 				      bool ana_reporting)
3050 {
3051 	if (subsystem->state != SPDK_NVMF_SUBSYSTEM_INACTIVE) {
3052 		return -EAGAIN;
3053 	}
3054 
3055 	subsystem->flags.ana_reporting = ana_reporting;
3056 
3057 	return 0;
3058 }
3059 
3060 bool
3061 nvmf_subsystem_get_ana_reporting(struct spdk_nvmf_subsystem *subsystem)
3062 {
3063 	return subsystem->flags.ana_reporting;
3064 }
3065 
3066 struct subsystem_listener_update_ctx {
3067 	struct spdk_nvmf_subsystem_listener *listener;
3068 
3069 	spdk_nvmf_tgt_subsystem_listen_done_fn cb_fn;
3070 	void *cb_arg;
3071 };
3072 
3073 static void
3074 subsystem_listener_update_done(struct spdk_io_channel_iter *i, int status)
3075 {
3076 	struct subsystem_listener_update_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
3077 
3078 	if (ctx->cb_fn) {
3079 		ctx->cb_fn(ctx->cb_arg, status);
3080 	}
3081 	free(ctx);
3082 }
3083 
3084 static void
3085 subsystem_listener_update_on_pg(struct spdk_io_channel_iter *i)
3086 {
3087 	struct subsystem_listener_update_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
3088 	struct spdk_nvmf_subsystem_listener *listener;
3089 	struct spdk_nvmf_poll_group *group;
3090 	struct spdk_nvmf_ctrlr *ctrlr;
3091 
3092 	listener = ctx->listener;
3093 	group = spdk_io_channel_get_ctx(spdk_io_channel_iter_get_channel(i));
3094 
3095 	TAILQ_FOREACH(ctrlr, &listener->subsystem->ctrlrs, link) {
3096 		if (ctrlr->admin_qpair->group == group && ctrlr->listener == listener) {
3097 			nvmf_ctrlr_async_event_ana_change_notice(ctrlr);
3098 		}
3099 	}
3100 
3101 	spdk_for_each_channel_continue(i, 0);
3102 }
3103 
3104 void
3105 nvmf_subsystem_set_ana_state(struct spdk_nvmf_subsystem *subsystem,
3106 			     const struct spdk_nvme_transport_id *trid,
3107 			     enum spdk_nvme_ana_state ana_state, uint32_t anagrpid,
3108 			     spdk_nvmf_tgt_subsystem_listen_done_fn cb_fn, void *cb_arg)
3109 {
3110 	struct spdk_nvmf_subsystem_listener *listener;
3111 	struct subsystem_listener_update_ctx *ctx;
3112 	uint32_t i;
3113 
3114 	assert(cb_fn != NULL);
3115 	assert(subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE ||
3116 	       subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED);
3117 
3118 	if (!subsystem->flags.ana_reporting) {
3119 		SPDK_ERRLOG("ANA reporting is disabled\n");
3120 		cb_fn(cb_arg, -EINVAL);
3121 		return;
3122 	}
3123 
3124 	/* ANA Change state is not used, ANA Persistent Loss state
3125 	 * is not supported yet.
3126 	 */
3127 	if (!(ana_state == SPDK_NVME_ANA_OPTIMIZED_STATE ||
3128 	      ana_state == SPDK_NVME_ANA_NON_OPTIMIZED_STATE ||
3129 	      ana_state == SPDK_NVME_ANA_INACCESSIBLE_STATE)) {
3130 		SPDK_ERRLOG("ANA state %d is not supported\n", ana_state);
3131 		cb_fn(cb_arg, -ENOTSUP);
3132 		return;
3133 	}
3134 
3135 	if (anagrpid > subsystem->max_nsid) {
3136 		SPDK_ERRLOG("ANA group ID %" PRIu32 " is more than maximum\n", anagrpid);
3137 		cb_fn(cb_arg, -EINVAL);
3138 		return;
3139 	}
3140 
3141 	listener = nvmf_subsystem_find_listener(subsystem, trid);
3142 	if (!listener) {
3143 		SPDK_ERRLOG("Unable to find listener.\n");
3144 		cb_fn(cb_arg, -EINVAL);
3145 		return;
3146 	}
3147 
3148 	if (anagrpid != 0 && listener->ana_state[anagrpid - 1] == ana_state) {
3149 		cb_fn(cb_arg, 0);
3150 		return;
3151 	}
3152 
3153 	ctx = calloc(1, sizeof(*ctx));
3154 	if (!ctx) {
3155 		SPDK_ERRLOG("Unable to allocate context\n");
3156 		cb_fn(cb_arg, -ENOMEM);
3157 		return;
3158 	}
3159 
3160 	for (i = 1; i <= subsystem->max_nsid; i++) {
3161 		if (anagrpid == 0 || i == anagrpid) {
3162 			listener->ana_state[i - 1] = ana_state;
3163 		}
3164 	}
3165 	listener->ana_state_change_count++;
3166 
3167 	ctx->listener = listener;
3168 	ctx->cb_fn = cb_fn;
3169 	ctx->cb_arg = cb_arg;
3170 
3171 	spdk_for_each_channel(subsystem->tgt,
3172 			      subsystem_listener_update_on_pg,
3173 			      ctx,
3174 			      subsystem_listener_update_done);
3175 }
3176