xref: /spdk/lib/nvmf/nvmf.c (revision cc6920a4763d4b9a43aa40583c8397d8f14fa100)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation. All rights reserved.
5  *   Copyright (c) 2018-2019, 2021 Mellanox Technologies LTD. All rights reserved.
6  *   Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
7  *
8  *   Redistribution and use in source and binary forms, with or without
9  *   modification, are permitted provided that the following conditions
10  *   are met:
11  *
12  *     * Redistributions of source code must retain the above copyright
13  *       notice, this list of conditions and the following disclaimer.
14  *     * Redistributions in binary form must reproduce the above copyright
15  *       notice, this list of conditions and the following disclaimer in
16  *       the documentation and/or other materials provided with the
17  *       distribution.
18  *     * Neither the name of Intel Corporation nor the names of its
19  *       contributors may be used to endorse or promote products derived
20  *       from this software without specific prior written permission.
21  *
22  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 #include "spdk/stdinc.h"
36 
37 #include "spdk/bdev.h"
38 #include "spdk/bit_array.h"
39 #include "spdk/thread.h"
40 #include "spdk/nvmf.h"
41 #include "spdk/endian.h"
42 #include "spdk/string.h"
43 #include "spdk/log.h"
44 #include "spdk_internal/usdt.h"
45 
46 #include "nvmf_internal.h"
47 #include "transport.h"
48 
49 SPDK_LOG_REGISTER_COMPONENT(nvmf)
50 
51 #define SPDK_NVMF_DEFAULT_MAX_SUBSYSTEMS 1024
52 #define SPDK_NVMF_DEFAULT_ACCEPT_POLL_RATE_US 10000
53 
54 static TAILQ_HEAD(, spdk_nvmf_tgt) g_nvmf_tgts = TAILQ_HEAD_INITIALIZER(g_nvmf_tgts);
55 
56 typedef void (*nvmf_qpair_disconnect_cpl)(void *ctx, int status);
57 static void nvmf_tgt_destroy_poll_group(void *io_device, void *ctx_buf);
58 
59 /* supplied to a single call to nvmf_qpair_disconnect */
60 struct nvmf_qpair_disconnect_ctx {
61 	struct spdk_nvmf_qpair *qpair;
62 	struct spdk_nvmf_ctrlr *ctrlr;
63 	nvmf_qpair_disconnect_cb cb_fn;
64 	struct spdk_thread *thread;
65 	void *ctx;
66 	uint16_t qid;
67 };
68 
69 /*
70  * There are several times when we need to iterate through the list of all qpairs and selectively delete them.
71  * In order to do this sequentially without overlap, we must provide a context to recover the next qpair from
72  * to enable calling nvmf_qpair_disconnect on the next desired qpair.
73  */
74 struct nvmf_qpair_disconnect_many_ctx {
75 	struct spdk_nvmf_subsystem *subsystem;
76 	struct spdk_nvmf_poll_group *group;
77 	spdk_nvmf_poll_group_mod_done cpl_fn;
78 	void *cpl_ctx;
79 	uint32_t count;
80 };
81 
82 static void
83 nvmf_qpair_set_state(struct spdk_nvmf_qpair *qpair,
84 		     enum spdk_nvmf_qpair_state state)
85 {
86 	assert(qpair != NULL);
87 	assert(qpair->group->thread == spdk_get_thread());
88 
89 	qpair->state = state;
90 }
91 
92 static int
93 nvmf_poll_group_poll(void *ctx)
94 {
95 	struct spdk_nvmf_poll_group *group = ctx;
96 	int rc;
97 	int count = 0;
98 	struct spdk_nvmf_transport_poll_group *tgroup;
99 
100 	TAILQ_FOREACH(tgroup, &group->tgroups, link) {
101 		rc = nvmf_transport_poll_group_poll(tgroup);
102 		if (rc < 0) {
103 			return SPDK_POLLER_BUSY;
104 		}
105 		count += rc;
106 	}
107 
108 	return count > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE;
109 }
110 
111 static int
112 nvmf_tgt_create_poll_group(void *io_device, void *ctx_buf)
113 {
114 	struct spdk_nvmf_tgt *tgt = io_device;
115 	struct spdk_nvmf_poll_group *group = ctx_buf;
116 	struct spdk_nvmf_transport *transport;
117 	struct spdk_thread *thread = spdk_get_thread();
118 	uint32_t sid;
119 	int rc;
120 
121 	SPDK_DTRACE_PROBE1(nvmf_create_poll_group, spdk_thread_get_id(thread));
122 
123 	TAILQ_INIT(&group->tgroups);
124 	TAILQ_INIT(&group->qpairs);
125 
126 	TAILQ_FOREACH(transport, &tgt->transports, link) {
127 		rc = nvmf_poll_group_add_transport(group, transport);
128 		if (rc != 0) {
129 			return rc;
130 		}
131 	}
132 
133 	group->num_sgroups = tgt->max_subsystems;
134 	group->sgroups = calloc(tgt->max_subsystems, sizeof(struct spdk_nvmf_subsystem_poll_group));
135 	if (!group->sgroups) {
136 		return -ENOMEM;
137 	}
138 
139 	for (sid = 0; sid < tgt->max_subsystems; sid++) {
140 		struct spdk_nvmf_subsystem *subsystem;
141 
142 		subsystem = tgt->subsystems[sid];
143 		if (!subsystem) {
144 			continue;
145 		}
146 
147 		if (nvmf_poll_group_add_subsystem(group, subsystem, NULL, NULL) != 0) {
148 			nvmf_tgt_destroy_poll_group(io_device, ctx_buf);
149 			return -1;
150 		}
151 	}
152 
153 	pthread_mutex_lock(&tgt->mutex);
154 	TAILQ_INSERT_TAIL(&tgt->poll_groups, group, link);
155 	pthread_mutex_unlock(&tgt->mutex);
156 
157 	group->poller = SPDK_POLLER_REGISTER(nvmf_poll_group_poll, group, 0);
158 	group->thread = thread;
159 
160 	return 0;
161 }
162 
163 static void
164 nvmf_tgt_destroy_poll_group(void *io_device, void *ctx_buf)
165 {
166 	struct spdk_nvmf_tgt *tgt = io_device;
167 	struct spdk_nvmf_poll_group *group = ctx_buf;
168 	struct spdk_nvmf_transport_poll_group *tgroup, *tmp;
169 	struct spdk_nvmf_subsystem_poll_group *sgroup;
170 	uint32_t sid, nsid;
171 
172 	SPDK_DTRACE_PROBE1(nvmf_destroy_poll_group, spdk_thread_get_id(group->thread));
173 
174 	pthread_mutex_lock(&tgt->mutex);
175 	TAILQ_REMOVE(&tgt->poll_groups, group, link);
176 	pthread_mutex_unlock(&tgt->mutex);
177 
178 	TAILQ_FOREACH_SAFE(tgroup, &group->tgroups, link, tmp) {
179 		TAILQ_REMOVE(&group->tgroups, tgroup, link);
180 		nvmf_transport_poll_group_destroy(tgroup);
181 	}
182 
183 	for (sid = 0; sid < group->num_sgroups; sid++) {
184 		sgroup = &group->sgroups[sid];
185 
186 		for (nsid = 0; nsid < sgroup->num_ns; nsid++) {
187 			if (sgroup->ns_info[nsid].channel) {
188 				spdk_put_io_channel(sgroup->ns_info[nsid].channel);
189 				sgroup->ns_info[nsid].channel = NULL;
190 			}
191 		}
192 
193 		free(sgroup->ns_info);
194 	}
195 
196 	free(group->sgroups);
197 
198 	spdk_poller_unregister(&group->poller);
199 
200 	if (group->destroy_cb_fn) {
201 		group->destroy_cb_fn(group->destroy_cb_arg, 0);
202 	}
203 }
204 
205 static void
206 _nvmf_tgt_disconnect_next_qpair(void *ctx)
207 {
208 	struct spdk_nvmf_qpair *qpair;
209 	struct nvmf_qpair_disconnect_many_ctx *qpair_ctx = ctx;
210 	struct spdk_nvmf_poll_group *group = qpair_ctx->group;
211 	struct spdk_io_channel *ch;
212 	int rc = 0;
213 
214 	qpair = TAILQ_FIRST(&group->qpairs);
215 
216 	if (qpair) {
217 		rc = spdk_nvmf_qpair_disconnect(qpair, _nvmf_tgt_disconnect_next_qpair, ctx);
218 	}
219 
220 	if (!qpair || rc != 0) {
221 		/* When the refcount from the channels reaches 0, nvmf_tgt_destroy_poll_group will be called. */
222 		ch = spdk_io_channel_from_ctx(group);
223 		spdk_put_io_channel(ch);
224 		free(qpair_ctx);
225 	}
226 }
227 
228 static void
229 nvmf_tgt_destroy_poll_group_qpairs(struct spdk_nvmf_poll_group *group)
230 {
231 	struct nvmf_qpair_disconnect_many_ctx *ctx;
232 
233 	SPDK_DTRACE_PROBE1(nvmf_destroy_poll_group_qpairs, spdk_thread_get_id(group->thread));
234 
235 	ctx = calloc(1, sizeof(struct nvmf_qpair_disconnect_many_ctx));
236 	if (!ctx) {
237 		SPDK_ERRLOG("Failed to allocate memory for destroy poll group ctx\n");
238 		return;
239 	}
240 
241 	ctx->group = group;
242 	_nvmf_tgt_disconnect_next_qpair(ctx);
243 }
244 
245 static int
246 nvmf_tgt_accept(void *ctx)
247 {
248 	struct spdk_nvmf_tgt *tgt = ctx;
249 	struct spdk_nvmf_transport *transport, *tmp;
250 	int count = 0;
251 
252 	TAILQ_FOREACH_SAFE(transport, &tgt->transports, link, tmp) {
253 		count += nvmf_transport_accept(transport);
254 	}
255 
256 	return count > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE;
257 }
258 
259 struct spdk_nvmf_tgt *
260 spdk_nvmf_tgt_create(struct spdk_nvmf_target_opts *opts)
261 {
262 	struct spdk_nvmf_tgt *tgt, *tmp_tgt;
263 	uint32_t acceptor_poll_rate;
264 
265 	if (strnlen(opts->name, NVMF_TGT_NAME_MAX_LENGTH) == NVMF_TGT_NAME_MAX_LENGTH) {
266 		SPDK_ERRLOG("Provided target name exceeds the max length of %u.\n", NVMF_TGT_NAME_MAX_LENGTH);
267 		return NULL;
268 	}
269 
270 	TAILQ_FOREACH(tmp_tgt, &g_nvmf_tgts, link) {
271 		if (!strncmp(opts->name, tmp_tgt->name, NVMF_TGT_NAME_MAX_LENGTH)) {
272 			SPDK_ERRLOG("Provided target name must be unique.\n");
273 			return NULL;
274 		}
275 	}
276 
277 	tgt = calloc(1, sizeof(*tgt));
278 	if (!tgt) {
279 		return NULL;
280 	}
281 
282 	snprintf(tgt->name, NVMF_TGT_NAME_MAX_LENGTH, "%s", opts->name);
283 
284 	if (!opts || !opts->max_subsystems) {
285 		tgt->max_subsystems = SPDK_NVMF_DEFAULT_MAX_SUBSYSTEMS;
286 	} else {
287 		tgt->max_subsystems = opts->max_subsystems;
288 	}
289 
290 	if (!opts || !opts->acceptor_poll_rate) {
291 		acceptor_poll_rate = SPDK_NVMF_DEFAULT_ACCEPT_POLL_RATE_US;
292 	} else {
293 		acceptor_poll_rate = opts->acceptor_poll_rate;
294 	}
295 
296 	if (!opts) {
297 		tgt->crdt[0] = 0;
298 		tgt->crdt[1] = 0;
299 		tgt->crdt[2] = 0;
300 	} else {
301 		tgt->crdt[0] = opts->crdt[0];
302 		tgt->crdt[1] = opts->crdt[1];
303 		tgt->crdt[2] = opts->crdt[2];
304 	}
305 
306 	if (!opts) {
307 		tgt->discovery_filter = SPDK_NVMF_TGT_DISCOVERY_MATCH_ANY;
308 	} else {
309 		tgt->discovery_filter = opts->discovery_filter;
310 	}
311 
312 	tgt->discovery_genctr = 0;
313 	TAILQ_INIT(&tgt->transports);
314 	TAILQ_INIT(&tgt->poll_groups);
315 
316 	tgt->subsystems = calloc(tgt->max_subsystems, sizeof(struct spdk_nvmf_subsystem *));
317 	if (!tgt->subsystems) {
318 		free(tgt);
319 		return NULL;
320 	}
321 
322 	pthread_mutex_init(&tgt->mutex, NULL);
323 
324 	tgt->accept_poller = SPDK_POLLER_REGISTER(nvmf_tgt_accept, tgt, acceptor_poll_rate);
325 	if (!tgt->accept_poller) {
326 		pthread_mutex_destroy(&tgt->mutex);
327 		free(tgt->subsystems);
328 		free(tgt);
329 		return NULL;
330 	}
331 
332 	spdk_io_device_register(tgt,
333 				nvmf_tgt_create_poll_group,
334 				nvmf_tgt_destroy_poll_group,
335 				sizeof(struct spdk_nvmf_poll_group),
336 				tgt->name);
337 
338 	TAILQ_INSERT_HEAD(&g_nvmf_tgts, tgt, link);
339 
340 	return tgt;
341 }
342 
343 static void
344 _nvmf_tgt_destroy_next_transport(void *ctx)
345 {
346 	struct spdk_nvmf_tgt *tgt = ctx;
347 	struct spdk_nvmf_transport *transport;
348 
349 	if (!TAILQ_EMPTY(&tgt->transports)) {
350 		transport = TAILQ_FIRST(&tgt->transports);
351 		TAILQ_REMOVE(&tgt->transports, transport, link);
352 		spdk_nvmf_transport_destroy(transport, _nvmf_tgt_destroy_next_transport, tgt);
353 	} else {
354 		spdk_nvmf_tgt_destroy_done_fn *destroy_cb_fn = tgt->destroy_cb_fn;
355 		void *destroy_cb_arg = tgt->destroy_cb_arg;
356 
357 		pthread_mutex_destroy(&tgt->mutex);
358 		free(tgt);
359 
360 		if (destroy_cb_fn) {
361 			destroy_cb_fn(destroy_cb_arg, 0);
362 		}
363 	}
364 }
365 
366 static void
367 nvmf_tgt_destroy_cb(void *io_device)
368 {
369 	struct spdk_nvmf_tgt *tgt = io_device;
370 	uint32_t i;
371 	int rc;
372 
373 	if (tgt->subsystems) {
374 		for (i = 0; i < tgt->max_subsystems; i++) {
375 			if (tgt->subsystems[i]) {
376 				nvmf_subsystem_remove_all_listeners(tgt->subsystems[i], true);
377 
378 				rc = spdk_nvmf_subsystem_destroy(tgt->subsystems[i], nvmf_tgt_destroy_cb, tgt);
379 				if (rc) {
380 					if (rc == -EINPROGRESS) {
381 						/* If rc is -EINPROGRESS, nvmf_tgt_destroy_cb will be called again when subsystem #i
382 						 * is destroyed, nvmf_tgt_destroy_cb will continue to destroy other subsystems if any */
383 						return;
384 					} else {
385 						SPDK_ERRLOG("Failed to destroy subsystem, id %u, rc %d\n", tgt->subsystems[i]->id, rc);
386 						assert(0);
387 					}
388 				}
389 			}
390 		}
391 		free(tgt->subsystems);
392 	}
393 
394 	_nvmf_tgt_destroy_next_transport(tgt);
395 }
396 
397 void
398 spdk_nvmf_tgt_destroy(struct spdk_nvmf_tgt *tgt,
399 		      spdk_nvmf_tgt_destroy_done_fn cb_fn,
400 		      void *cb_arg)
401 {
402 	tgt->destroy_cb_fn = cb_fn;
403 	tgt->destroy_cb_arg = cb_arg;
404 
405 	spdk_poller_unregister(&tgt->accept_poller);
406 
407 	TAILQ_REMOVE(&g_nvmf_tgts, tgt, link);
408 
409 	spdk_io_device_unregister(tgt, nvmf_tgt_destroy_cb);
410 }
411 
412 const char *
413 spdk_nvmf_tgt_get_name(struct spdk_nvmf_tgt *tgt)
414 {
415 	return tgt->name;
416 }
417 
418 struct spdk_nvmf_tgt *
419 spdk_nvmf_get_tgt(const char *name)
420 {
421 	struct spdk_nvmf_tgt *tgt;
422 	uint32_t num_targets = 0;
423 
424 	TAILQ_FOREACH(tgt, &g_nvmf_tgts, link) {
425 		if (name) {
426 			if (!strncmp(tgt->name, name, NVMF_TGT_NAME_MAX_LENGTH)) {
427 				return tgt;
428 			}
429 		}
430 		num_targets++;
431 	}
432 
433 	/*
434 	 * special case. If there is only one target and
435 	 * no name was specified, return the only available
436 	 * target. If there is more than one target, name must
437 	 * be specified.
438 	 */
439 	if (!name && num_targets == 1) {
440 		return TAILQ_FIRST(&g_nvmf_tgts);
441 	}
442 
443 	return NULL;
444 }
445 
446 struct spdk_nvmf_tgt *
447 spdk_nvmf_get_first_tgt(void)
448 {
449 	return TAILQ_FIRST(&g_nvmf_tgts);
450 }
451 
452 struct spdk_nvmf_tgt *
453 spdk_nvmf_get_next_tgt(struct spdk_nvmf_tgt *prev)
454 {
455 	return TAILQ_NEXT(prev, link);
456 }
457 
458 static void
459 nvmf_write_subsystem_config_json(struct spdk_json_write_ctx *w,
460 				 struct spdk_nvmf_subsystem *subsystem)
461 {
462 	struct spdk_nvmf_host *host;
463 	struct spdk_nvmf_subsystem_listener *listener;
464 	const struct spdk_nvme_transport_id *trid;
465 	struct spdk_nvmf_ns *ns;
466 	struct spdk_nvmf_ns_opts ns_opts;
467 	uint32_t max_namespaces;
468 	char uuid_str[SPDK_UUID_STRING_LEN];
469 
470 	if (spdk_nvmf_subsystem_get_type(subsystem) != SPDK_NVMF_SUBTYPE_NVME) {
471 		return;
472 	}
473 
474 	/* { */
475 	spdk_json_write_object_begin(w);
476 	spdk_json_write_named_string(w, "method", "nvmf_create_subsystem");
477 
478 	/*     "params" : { */
479 	spdk_json_write_named_object_begin(w, "params");
480 	spdk_json_write_named_string(w, "nqn", spdk_nvmf_subsystem_get_nqn(subsystem));
481 	spdk_json_write_named_bool(w, "allow_any_host", spdk_nvmf_subsystem_get_allow_any_host(subsystem));
482 	spdk_json_write_named_string(w, "serial_number", spdk_nvmf_subsystem_get_sn(subsystem));
483 	spdk_json_write_named_string(w, "model_number", spdk_nvmf_subsystem_get_mn(subsystem));
484 
485 	max_namespaces = spdk_nvmf_subsystem_get_max_namespaces(subsystem);
486 	if (max_namespaces != 0) {
487 		spdk_json_write_named_uint32(w, "max_namespaces", max_namespaces);
488 	}
489 
490 	spdk_json_write_named_uint32(w, "min_cntlid", spdk_nvmf_subsystem_get_min_cntlid(subsystem));
491 	spdk_json_write_named_uint32(w, "max_cntlid", spdk_nvmf_subsystem_get_max_cntlid(subsystem));
492 	spdk_json_write_named_bool(w, "ana_reporting", nvmf_subsystem_get_ana_reporting(subsystem));
493 
494 	/*     } "params" */
495 	spdk_json_write_object_end(w);
496 
497 	/* } */
498 	spdk_json_write_object_end(w);
499 
500 	for (listener = spdk_nvmf_subsystem_get_first_listener(subsystem); listener != NULL;
501 	     listener = spdk_nvmf_subsystem_get_next_listener(subsystem, listener)) {
502 		trid = spdk_nvmf_subsystem_listener_get_trid(listener);
503 
504 		spdk_json_write_object_begin(w);
505 		spdk_json_write_named_string(w, "method", "nvmf_subsystem_add_listener");
506 
507 		/*     "params" : { */
508 		spdk_json_write_named_object_begin(w, "params");
509 
510 		spdk_json_write_named_string(w, "nqn", spdk_nvmf_subsystem_get_nqn(subsystem));
511 		nvmf_transport_listen_dump_opts(listener->transport, trid, w);
512 
513 		/*     } "params" */
514 		spdk_json_write_object_end(w);
515 
516 		/* } */
517 		spdk_json_write_object_end(w);
518 	}
519 
520 	for (host = spdk_nvmf_subsystem_get_first_host(subsystem); host != NULL;
521 	     host = spdk_nvmf_subsystem_get_next_host(subsystem, host)) {
522 
523 		spdk_json_write_object_begin(w);
524 		spdk_json_write_named_string(w, "method", "nvmf_subsystem_add_host");
525 
526 		/*     "params" : { */
527 		spdk_json_write_named_object_begin(w, "params");
528 
529 		spdk_json_write_named_string(w, "nqn", spdk_nvmf_subsystem_get_nqn(subsystem));
530 		spdk_json_write_named_string(w, "host", spdk_nvmf_host_get_nqn(host));
531 
532 		/*     } "params" */
533 		spdk_json_write_object_end(w);
534 
535 		/* } */
536 		spdk_json_write_object_end(w);
537 	}
538 
539 	for (ns = spdk_nvmf_subsystem_get_first_ns(subsystem); ns != NULL;
540 	     ns = spdk_nvmf_subsystem_get_next_ns(subsystem, ns)) {
541 		spdk_nvmf_ns_get_opts(ns, &ns_opts, sizeof(ns_opts));
542 
543 		spdk_json_write_object_begin(w);
544 		spdk_json_write_named_string(w, "method", "nvmf_subsystem_add_ns");
545 
546 		/*     "params" : { */
547 		spdk_json_write_named_object_begin(w, "params");
548 
549 		spdk_json_write_named_string(w, "nqn", spdk_nvmf_subsystem_get_nqn(subsystem));
550 
551 		/*     "namespace" : { */
552 		spdk_json_write_named_object_begin(w, "namespace");
553 
554 		spdk_json_write_named_uint32(w, "nsid", spdk_nvmf_ns_get_id(ns));
555 		spdk_json_write_named_string(w, "bdev_name", spdk_bdev_get_name(spdk_nvmf_ns_get_bdev(ns)));
556 
557 		if (!spdk_mem_all_zero(ns_opts.nguid, sizeof(ns_opts.nguid))) {
558 			SPDK_STATIC_ASSERT(sizeof(ns_opts.nguid) == sizeof(uint64_t) * 2, "size mismatch");
559 			spdk_json_write_named_string_fmt(w, "nguid", "%016"PRIX64"%016"PRIX64, from_be64(&ns_opts.nguid[0]),
560 							 from_be64(&ns_opts.nguid[8]));
561 		}
562 
563 		if (!spdk_mem_all_zero(ns_opts.eui64, sizeof(ns_opts.eui64))) {
564 			SPDK_STATIC_ASSERT(sizeof(ns_opts.eui64) == sizeof(uint64_t), "size mismatch");
565 			spdk_json_write_named_string_fmt(w, "eui64", "%016"PRIX64, from_be64(&ns_opts.eui64));
566 		}
567 
568 		if (!spdk_mem_all_zero(&ns_opts.uuid, sizeof(ns_opts.uuid))) {
569 			spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &ns_opts.uuid);
570 			spdk_json_write_named_string(w, "uuid",  uuid_str);
571 		}
572 
573 		if (nvmf_subsystem_get_ana_reporting(subsystem)) {
574 			spdk_json_write_named_uint32(w, "anagrpid", ns_opts.anagrpid);
575 		}
576 
577 		/*     "namespace" */
578 		spdk_json_write_object_end(w);
579 
580 		/*     } "params" */
581 		spdk_json_write_object_end(w);
582 
583 		/* } */
584 		spdk_json_write_object_end(w);
585 	}
586 }
587 
588 void
589 spdk_nvmf_tgt_write_config_json(struct spdk_json_write_ctx *w, struct spdk_nvmf_tgt *tgt)
590 {
591 	struct spdk_nvmf_subsystem *subsystem;
592 	struct spdk_nvmf_transport *transport;
593 
594 	spdk_json_write_object_begin(w);
595 	spdk_json_write_named_string(w, "method", "nvmf_set_max_subsystems");
596 
597 	spdk_json_write_named_object_begin(w, "params");
598 	spdk_json_write_named_uint32(w, "max_subsystems", tgt->max_subsystems);
599 	spdk_json_write_object_end(w);
600 
601 	spdk_json_write_object_end(w);
602 
603 	spdk_json_write_object_begin(w);
604 	spdk_json_write_named_string(w, "method", "nvmf_set_crdt");
605 	spdk_json_write_named_object_begin(w, "params");
606 	spdk_json_write_named_uint32(w, "crdt1", tgt->crdt[0]);
607 	spdk_json_write_named_uint32(w, "crdt2", tgt->crdt[1]);
608 	spdk_json_write_named_uint32(w, "crdt3", tgt->crdt[2]);
609 	spdk_json_write_object_end(w);
610 	spdk_json_write_object_end(w);
611 
612 	/* write transports */
613 	TAILQ_FOREACH(transport, &tgt->transports, link) {
614 		spdk_json_write_object_begin(w);
615 		spdk_json_write_named_string(w, "method", "nvmf_create_transport");
616 		nvmf_transport_dump_opts(transport, w, true);
617 		spdk_json_write_object_end(w);
618 	}
619 
620 	subsystem = spdk_nvmf_subsystem_get_first(tgt);
621 	while (subsystem) {
622 		nvmf_write_subsystem_config_json(w, subsystem);
623 		subsystem = spdk_nvmf_subsystem_get_next(subsystem);
624 	}
625 }
626 
627 static void
628 nvmf_listen_opts_copy(struct spdk_nvmf_listen_opts *opts,
629 		      const struct spdk_nvmf_listen_opts *opts_src, size_t opts_size)
630 {
631 	assert(opts);
632 	assert(opts_src);
633 
634 	opts->opts_size = opts_size;
635 
636 #define SET_FIELD(field) \
637     if (offsetof(struct spdk_nvmf_listen_opts, field) + sizeof(opts->field) <= opts_size) { \
638                  opts->field = opts_src->field; \
639     } \
640 
641 	SET_FIELD(transport_specific);
642 #undef SET_FIELD
643 
644 	/* Do not remove this statement, you should always update this statement when you adding a new field,
645 	 * and do not forget to add the SET_FIELD statement for your added field. */
646 	SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_listen_opts) == 16, "Incorrect size");
647 }
648 
649 void
650 spdk_nvmf_listen_opts_init(struct spdk_nvmf_listen_opts *opts, size_t opts_size)
651 {
652 	struct spdk_nvmf_listen_opts opts_local = {};
653 
654 	/* local version of opts should have defaults set here */
655 
656 	nvmf_listen_opts_copy(opts, &opts_local, opts_size);
657 }
658 
659 int
660 spdk_nvmf_tgt_listen_ext(struct spdk_nvmf_tgt *tgt, const struct spdk_nvme_transport_id *trid,
661 			 struct spdk_nvmf_listen_opts *opts)
662 {
663 	struct spdk_nvmf_transport *transport;
664 	int rc;
665 	struct spdk_nvmf_listen_opts opts_local = {};
666 
667 	if (!opts) {
668 		SPDK_ERRLOG("opts should not be NULL\n");
669 		return -EINVAL;
670 	}
671 
672 	if (!opts->opts_size) {
673 		SPDK_ERRLOG("The opts_size in opts structure should not be zero\n");
674 		return -EINVAL;
675 	}
676 
677 	transport = spdk_nvmf_tgt_get_transport(tgt, trid->trstring);
678 	if (!transport) {
679 		SPDK_ERRLOG("Unable to find %s transport. The transport must be created first also make sure it is properly registered.\n",
680 			    trid->trstring);
681 		return -EINVAL;
682 	}
683 
684 	nvmf_listen_opts_copy(&opts_local, opts, opts->opts_size);
685 	rc = spdk_nvmf_transport_listen(transport, trid, &opts_local);
686 	if (rc < 0) {
687 		SPDK_ERRLOG("Unable to listen on address '%s'\n", trid->traddr);
688 	}
689 
690 	return rc;
691 }
692 
693 int
694 spdk_nvmf_tgt_stop_listen(struct spdk_nvmf_tgt *tgt,
695 			  struct spdk_nvme_transport_id *trid)
696 {
697 	struct spdk_nvmf_transport *transport;
698 	int rc;
699 
700 	transport = spdk_nvmf_tgt_get_transport(tgt, trid->trstring);
701 	if (!transport) {
702 		SPDK_ERRLOG("Unable to find %s transport. The transport must be created first also make sure it is properly registered.\n",
703 			    trid->trstring);
704 		return -EINVAL;
705 	}
706 
707 	rc = spdk_nvmf_transport_stop_listen(transport, trid);
708 	if (rc < 0) {
709 		SPDK_ERRLOG("Failed to stop listening on address '%s'\n", trid->traddr);
710 		return rc;
711 	}
712 	return 0;
713 }
714 
715 struct spdk_nvmf_tgt_add_transport_ctx {
716 	struct spdk_nvmf_tgt *tgt;
717 	struct spdk_nvmf_transport *transport;
718 	spdk_nvmf_tgt_add_transport_done_fn cb_fn;
719 	void *cb_arg;
720 	int status;
721 };
722 
723 static void
724 _nvmf_tgt_remove_transport_done(struct spdk_io_channel_iter *i, int status)
725 {
726 	struct spdk_nvmf_tgt_add_transport_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
727 
728 	ctx->cb_fn(ctx->cb_arg, ctx->status);
729 	free(ctx);
730 }
731 
732 static void
733 _nvmf_tgt_remove_transport(struct spdk_io_channel_iter *i)
734 {
735 	struct spdk_nvmf_tgt_add_transport_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
736 	struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
737 	struct spdk_nvmf_poll_group *group = spdk_io_channel_get_ctx(ch);
738 	struct spdk_nvmf_transport_poll_group *tgroup, *tmp;
739 
740 	TAILQ_FOREACH_SAFE(tgroup, &group->tgroups, link, tmp) {
741 		if (tgroup->transport == ctx->transport) {
742 			TAILQ_REMOVE(&group->tgroups, tgroup, link);
743 			nvmf_transport_poll_group_destroy(tgroup);
744 		}
745 	}
746 
747 	spdk_for_each_channel_continue(i, 0);
748 }
749 
750 static void
751 _nvmf_tgt_add_transport_done(struct spdk_io_channel_iter *i, int status)
752 {
753 	struct spdk_nvmf_tgt_add_transport_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
754 
755 	if (status) {
756 		ctx->status = status;
757 		spdk_for_each_channel(ctx->tgt,
758 				      _nvmf_tgt_remove_transport,
759 				      ctx,
760 				      _nvmf_tgt_remove_transport_done);
761 		return;
762 	}
763 
764 	ctx->transport->tgt = ctx->tgt;
765 	TAILQ_INSERT_TAIL(&ctx->tgt->transports, ctx->transport, link);
766 	ctx->cb_fn(ctx->cb_arg, status);
767 	free(ctx);
768 }
769 
770 static void
771 _nvmf_tgt_add_transport(struct spdk_io_channel_iter *i)
772 {
773 	struct spdk_nvmf_tgt_add_transport_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
774 	struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
775 	struct spdk_nvmf_poll_group *group = spdk_io_channel_get_ctx(ch);
776 	int rc;
777 
778 	rc = nvmf_poll_group_add_transport(group, ctx->transport);
779 	spdk_for_each_channel_continue(i, rc);
780 }
781 
782 void spdk_nvmf_tgt_add_transport(struct spdk_nvmf_tgt *tgt,
783 				 struct spdk_nvmf_transport *transport,
784 				 spdk_nvmf_tgt_add_transport_done_fn cb_fn,
785 				 void *cb_arg)
786 {
787 	struct spdk_nvmf_tgt_add_transport_ctx *ctx;
788 
789 	if (spdk_nvmf_tgt_get_transport(tgt, transport->ops->name)) {
790 		cb_fn(cb_arg, -EEXIST);
791 		return; /* transport already created */
792 	}
793 
794 	ctx = calloc(1, sizeof(*ctx));
795 	if (!ctx) {
796 		cb_fn(cb_arg, -ENOMEM);
797 		return;
798 	}
799 
800 	ctx->tgt = tgt;
801 	ctx->transport = transport;
802 	ctx->cb_fn = cb_fn;
803 	ctx->cb_arg = cb_arg;
804 
805 	spdk_for_each_channel(tgt,
806 			      _nvmf_tgt_add_transport,
807 			      ctx,
808 			      _nvmf_tgt_add_transport_done);
809 }
810 
811 struct spdk_nvmf_subsystem *
812 spdk_nvmf_tgt_find_subsystem(struct spdk_nvmf_tgt *tgt, const char *subnqn)
813 {
814 	struct spdk_nvmf_subsystem	*subsystem;
815 	uint32_t sid;
816 
817 	if (!subnqn) {
818 		return NULL;
819 	}
820 
821 	/* Ensure that subnqn is null terminated */
822 	if (!memchr(subnqn, '\0', SPDK_NVMF_NQN_MAX_LEN + 1)) {
823 		SPDK_ERRLOG("Connect SUBNQN is not null terminated\n");
824 		return NULL;
825 	}
826 
827 	for (sid = 0; sid < tgt->max_subsystems; sid++) {
828 		subsystem = tgt->subsystems[sid];
829 		if (subsystem == NULL) {
830 			continue;
831 		}
832 
833 		if (strcmp(subnqn, subsystem->subnqn) == 0) {
834 			return subsystem;
835 		}
836 	}
837 
838 	return NULL;
839 }
840 
841 struct spdk_nvmf_transport *
842 spdk_nvmf_tgt_get_transport(struct spdk_nvmf_tgt *tgt, const char *transport_name)
843 {
844 	struct spdk_nvmf_transport *transport;
845 
846 	TAILQ_FOREACH(transport, &tgt->transports, link) {
847 		if (!strncasecmp(transport->ops->name, transport_name, SPDK_NVMF_TRSTRING_MAX_LEN)) {
848 			return transport;
849 		}
850 	}
851 	return NULL;
852 }
853 
854 struct nvmf_new_qpair_ctx {
855 	struct spdk_nvmf_qpair *qpair;
856 	struct spdk_nvmf_poll_group *group;
857 };
858 
859 static void
860 _nvmf_poll_group_add(void *_ctx)
861 {
862 	struct nvmf_new_qpair_ctx *ctx = _ctx;
863 	struct spdk_nvmf_qpair *qpair = ctx->qpair;
864 	struct spdk_nvmf_poll_group *group = ctx->group;
865 
866 	free(_ctx);
867 
868 	if (spdk_nvmf_poll_group_add(group, qpair) != 0) {
869 		SPDK_ERRLOG("Unable to add the qpair to a poll group.\n");
870 		spdk_nvmf_qpair_disconnect(qpair, NULL, NULL);
871 	}
872 }
873 
874 void
875 spdk_nvmf_tgt_new_qpair(struct spdk_nvmf_tgt *tgt, struct spdk_nvmf_qpair *qpair)
876 {
877 	struct spdk_nvmf_poll_group *group;
878 	struct nvmf_new_qpair_ctx *ctx;
879 
880 	group = spdk_nvmf_get_optimal_poll_group(qpair);
881 	if (group == NULL) {
882 		if (tgt->next_poll_group == NULL) {
883 			tgt->next_poll_group = TAILQ_FIRST(&tgt->poll_groups);
884 			if (tgt->next_poll_group == NULL) {
885 				SPDK_ERRLOG("No poll groups exist.\n");
886 				spdk_nvmf_qpair_disconnect(qpair, NULL, NULL);
887 				return;
888 			}
889 		}
890 		group = tgt->next_poll_group;
891 		tgt->next_poll_group = TAILQ_NEXT(group, link);
892 	}
893 
894 	ctx = calloc(1, sizeof(*ctx));
895 	if (!ctx) {
896 		SPDK_ERRLOG("Unable to send message to poll group.\n");
897 		spdk_nvmf_qpair_disconnect(qpair, NULL, NULL);
898 		return;
899 	}
900 
901 	ctx->qpair = qpair;
902 	ctx->group = group;
903 
904 	spdk_thread_send_msg(group->thread, _nvmf_poll_group_add, ctx);
905 }
906 
907 struct spdk_nvmf_poll_group *
908 spdk_nvmf_poll_group_create(struct spdk_nvmf_tgt *tgt)
909 {
910 	struct spdk_io_channel *ch;
911 
912 	ch = spdk_get_io_channel(tgt);
913 	if (!ch) {
914 		SPDK_ERRLOG("Unable to get I/O channel for target\n");
915 		return NULL;
916 	}
917 
918 	return spdk_io_channel_get_ctx(ch);
919 }
920 
921 void
922 spdk_nvmf_poll_group_destroy(struct spdk_nvmf_poll_group *group,
923 			     spdk_nvmf_poll_group_destroy_done_fn cb_fn,
924 			     void *cb_arg)
925 {
926 	assert(group->destroy_cb_fn == NULL);
927 	group->destroy_cb_fn = cb_fn;
928 	group->destroy_cb_arg = cb_arg;
929 
930 	/* This function will put the io_channel associated with this poll group */
931 	nvmf_tgt_destroy_poll_group_qpairs(group);
932 }
933 
934 int
935 spdk_nvmf_poll_group_add(struct spdk_nvmf_poll_group *group,
936 			 struct spdk_nvmf_qpair *qpair)
937 {
938 	int rc = -1;
939 	struct spdk_nvmf_transport_poll_group *tgroup;
940 
941 	TAILQ_INIT(&qpair->outstanding);
942 	qpair->group = group;
943 	qpair->ctrlr = NULL;
944 	qpair->disconnect_started = false;
945 
946 	TAILQ_FOREACH(tgroup, &group->tgroups, link) {
947 		if (tgroup->transport == qpair->transport) {
948 			rc = nvmf_transport_poll_group_add(tgroup, qpair);
949 			break;
950 		}
951 	}
952 
953 	/* We add the qpair to the group only it is successfully added into the tgroup */
954 	if (rc == 0) {
955 		SPDK_DTRACE_PROBE2(nvmf_poll_group_add_qpair, qpair, spdk_thread_get_id(group->thread));
956 		TAILQ_INSERT_TAIL(&group->qpairs, qpair, link);
957 		nvmf_qpair_set_state(qpair, SPDK_NVMF_QPAIR_ACTIVE);
958 	}
959 
960 	return rc;
961 }
962 
963 static void
964 _nvmf_ctrlr_destruct(void *ctx)
965 {
966 	struct spdk_nvmf_ctrlr *ctrlr = ctx;
967 
968 	nvmf_ctrlr_destruct(ctrlr);
969 }
970 
971 static void
972 _nvmf_transport_qpair_fini_complete(void *cb_ctx)
973 {
974 	struct nvmf_qpair_disconnect_ctx *qpair_ctx = cb_ctx;
975 
976 	if (qpair_ctx->cb_fn) {
977 		spdk_thread_send_msg(qpair_ctx->thread, qpair_ctx->cb_fn, qpair_ctx->ctx);
978 	}
979 	free(qpair_ctx);
980 }
981 
982 static void
983 _nvmf_transport_qpair_fini(void *ctx)
984 {
985 	struct nvmf_qpair_disconnect_ctx *qpair_ctx = ctx;
986 
987 	spdk_nvmf_poll_group_remove(qpair_ctx->qpair);
988 	nvmf_transport_qpair_fini(qpair_ctx->qpair, _nvmf_transport_qpair_fini_complete, qpair_ctx);
989 }
990 
991 static void
992 _nvmf_ctrlr_free_from_qpair(void *ctx)
993 {
994 	struct nvmf_qpair_disconnect_ctx *qpair_ctx = ctx;
995 	struct spdk_nvmf_ctrlr *ctrlr = qpair_ctx->ctrlr;
996 	uint32_t count;
997 
998 	spdk_bit_array_clear(ctrlr->qpair_mask, qpair_ctx->qid);
999 	count = spdk_bit_array_count_set(ctrlr->qpair_mask);
1000 	if (count == 0) {
1001 		assert(!ctrlr->in_destruct);
1002 		ctrlr->in_destruct = true;
1003 		spdk_thread_send_msg(ctrlr->subsys->thread, _nvmf_ctrlr_destruct, ctrlr);
1004 	}
1005 
1006 	spdk_thread_send_msg(qpair_ctx->thread, _nvmf_transport_qpair_fini, qpair_ctx);
1007 }
1008 
1009 void
1010 spdk_nvmf_poll_group_remove(struct spdk_nvmf_qpair *qpair)
1011 {
1012 	struct spdk_nvmf_transport_poll_group *tgroup;
1013 	int rc;
1014 
1015 	SPDK_DTRACE_PROBE2(nvmf_poll_group_remove_qpair, qpair,
1016 			   spdk_thread_get_id(qpair->group->thread));
1017 	nvmf_qpair_set_state(qpair, SPDK_NVMF_QPAIR_ERROR);
1018 
1019 	/* Find the tgroup and remove the qpair from the tgroup */
1020 	TAILQ_FOREACH(tgroup, &qpair->group->tgroups, link) {
1021 		if (tgroup->transport == qpair->transport) {
1022 			rc = nvmf_transport_poll_group_remove(tgroup, qpair);
1023 			if (rc && (rc != ENOTSUP)) {
1024 				SPDK_ERRLOG("Cannot remove qpair=%p from transport group=%p\n",
1025 					    qpair, tgroup);
1026 			}
1027 			break;
1028 		}
1029 	}
1030 
1031 	TAILQ_REMOVE(&qpair->group->qpairs, qpair, link);
1032 	qpair->group = NULL;
1033 }
1034 
1035 static void
1036 _nvmf_qpair_destroy(void *ctx, int status)
1037 {
1038 	struct nvmf_qpair_disconnect_ctx *qpair_ctx = ctx;
1039 	struct spdk_nvmf_qpair *qpair = qpair_ctx->qpair;
1040 	struct spdk_nvmf_ctrlr *ctrlr = qpair->ctrlr;
1041 	struct spdk_nvmf_request *req, *tmp;
1042 	struct spdk_nvmf_subsystem_poll_group *sgroup;
1043 
1044 	assert(qpair->state == SPDK_NVMF_QPAIR_DEACTIVATING);
1045 	qpair_ctx->qid = qpair->qid;
1046 
1047 	if (ctrlr) {
1048 		if (0 == qpair->qid) {
1049 			assert(qpair->group->stat.current_admin_qpairs > 0);
1050 			qpair->group->stat.current_admin_qpairs--;
1051 		} else {
1052 			assert(qpair->group->stat.current_io_qpairs > 0);
1053 			qpair->group->stat.current_io_qpairs--;
1054 		}
1055 
1056 		sgroup = &qpair->group->sgroups[ctrlr->subsys->id];
1057 		TAILQ_FOREACH_SAFE(req, &sgroup->queued, link, tmp) {
1058 			if (req->qpair == qpair) {
1059 				TAILQ_REMOVE(&sgroup->queued, req, link);
1060 				if (nvmf_transport_req_free(req)) {
1061 					SPDK_ERRLOG("Transport request free error!/n");
1062 				}
1063 			}
1064 		}
1065 	}
1066 
1067 	if (!ctrlr || !ctrlr->thread) {
1068 		spdk_nvmf_poll_group_remove(qpair);
1069 		nvmf_transport_qpair_fini(qpair, _nvmf_transport_qpair_fini_complete, qpair_ctx);
1070 		return;
1071 	}
1072 
1073 	qpair_ctx->ctrlr = ctrlr;
1074 	spdk_thread_send_msg(ctrlr->thread, _nvmf_ctrlr_free_from_qpair, qpair_ctx);
1075 }
1076 
1077 static void
1078 _nvmf_qpair_disconnect_msg(void *ctx)
1079 {
1080 	struct nvmf_qpair_disconnect_ctx *qpair_ctx = ctx;
1081 
1082 	spdk_nvmf_qpair_disconnect(qpair_ctx->qpair, qpair_ctx->cb_fn, qpair_ctx->ctx);
1083 	free(ctx);
1084 }
1085 
1086 int
1087 spdk_nvmf_qpair_disconnect(struct spdk_nvmf_qpair *qpair, nvmf_qpair_disconnect_cb cb_fn, void *ctx)
1088 {
1089 	struct spdk_nvmf_poll_group *group = qpair->group;
1090 	struct nvmf_qpair_disconnect_ctx *qpair_ctx;
1091 
1092 	if (__atomic_test_and_set(&qpair->disconnect_started, __ATOMIC_RELAXED)) {
1093 		if (cb_fn) {
1094 			cb_fn(ctx);
1095 		}
1096 		return 0;
1097 	}
1098 
1099 	/* If we get a qpair in the uninitialized state, we can just destroy it immediately */
1100 	if (qpair->state == SPDK_NVMF_QPAIR_UNINITIALIZED) {
1101 		nvmf_transport_qpair_fini(qpair, NULL, NULL);
1102 		if (cb_fn) {
1103 			cb_fn(ctx);
1104 		}
1105 		return 0;
1106 	}
1107 
1108 	assert(group != NULL);
1109 	if (spdk_get_thread() != group->thread) {
1110 		/* clear the atomic so we can set it on the next call on the proper thread. */
1111 		__atomic_clear(&qpair->disconnect_started, __ATOMIC_RELAXED);
1112 		qpair_ctx = calloc(1, sizeof(struct nvmf_qpair_disconnect_ctx));
1113 		if (!qpair_ctx) {
1114 			SPDK_ERRLOG("Unable to allocate context for nvmf_qpair_disconnect\n");
1115 			return -ENOMEM;
1116 		}
1117 		qpair_ctx->qpair = qpair;
1118 		qpair_ctx->cb_fn = cb_fn;
1119 		qpair_ctx->thread = group->thread;
1120 		qpair_ctx->ctx = ctx;
1121 		spdk_thread_send_msg(group->thread, _nvmf_qpair_disconnect_msg, qpair_ctx);
1122 		return 0;
1123 	}
1124 
1125 	SPDK_DTRACE_PROBE2(nvmf_qpair_disconnect, qpair, spdk_thread_get_id(group->thread));
1126 	assert(qpair->state == SPDK_NVMF_QPAIR_ACTIVE);
1127 	nvmf_qpair_set_state(qpair, SPDK_NVMF_QPAIR_DEACTIVATING);
1128 
1129 	qpair_ctx = calloc(1, sizeof(struct nvmf_qpair_disconnect_ctx));
1130 	if (!qpair_ctx) {
1131 		SPDK_ERRLOG("Unable to allocate context for nvmf_qpair_disconnect\n");
1132 		return -ENOMEM;
1133 	}
1134 
1135 	qpair_ctx->qpair = qpair;
1136 	qpair_ctx->cb_fn = cb_fn;
1137 	qpair_ctx->thread = group->thread;
1138 	qpair_ctx->ctx = ctx;
1139 
1140 	/* Check for outstanding I/O */
1141 	if (!TAILQ_EMPTY(&qpair->outstanding)) {
1142 		SPDK_DTRACE_PROBE2(nvmf_poll_group_drain_qpair, qpair, spdk_thread_get_id(group->thread));
1143 		qpair->state_cb = _nvmf_qpair_destroy;
1144 		qpair->state_cb_arg = qpair_ctx;
1145 		nvmf_qpair_free_aer(qpair);
1146 		return 0;
1147 	}
1148 
1149 	_nvmf_qpair_destroy(qpair_ctx, 0);
1150 
1151 	return 0;
1152 }
1153 
1154 int
1155 spdk_nvmf_qpair_get_peer_trid(struct spdk_nvmf_qpair *qpair,
1156 			      struct spdk_nvme_transport_id *trid)
1157 {
1158 	return nvmf_transport_qpair_get_peer_trid(qpair, trid);
1159 }
1160 
1161 int
1162 spdk_nvmf_qpair_get_local_trid(struct spdk_nvmf_qpair *qpair,
1163 			       struct spdk_nvme_transport_id *trid)
1164 {
1165 	return nvmf_transport_qpair_get_local_trid(qpair, trid);
1166 }
1167 
1168 int
1169 spdk_nvmf_qpair_get_listen_trid(struct spdk_nvmf_qpair *qpair,
1170 				struct spdk_nvme_transport_id *trid)
1171 {
1172 	return nvmf_transport_qpair_get_listen_trid(qpair, trid);
1173 }
1174 
1175 int
1176 nvmf_poll_group_add_transport(struct spdk_nvmf_poll_group *group,
1177 			      struct spdk_nvmf_transport *transport)
1178 {
1179 	struct spdk_nvmf_transport_poll_group *tgroup;
1180 
1181 	TAILQ_FOREACH(tgroup, &group->tgroups, link) {
1182 		if (tgroup->transport == transport) {
1183 			/* Transport already in the poll group */
1184 			return 0;
1185 		}
1186 	}
1187 
1188 	tgroup = nvmf_transport_poll_group_create(transport);
1189 	if (!tgroup) {
1190 		SPDK_ERRLOG("Unable to create poll group for transport\n");
1191 		return -1;
1192 	}
1193 
1194 	tgroup->group = group;
1195 	TAILQ_INSERT_TAIL(&group->tgroups, tgroup, link);
1196 
1197 	return 0;
1198 }
1199 
1200 static int
1201 poll_group_update_subsystem(struct spdk_nvmf_poll_group *group,
1202 			    struct spdk_nvmf_subsystem *subsystem)
1203 {
1204 	struct spdk_nvmf_subsystem_poll_group *sgroup;
1205 	uint32_t new_num_ns, old_num_ns;
1206 	uint32_t i, j;
1207 	struct spdk_nvmf_ns *ns;
1208 	struct spdk_nvmf_registrant *reg, *tmp;
1209 	struct spdk_io_channel *ch;
1210 	struct spdk_nvmf_subsystem_pg_ns_info *ns_info;
1211 	struct spdk_nvmf_ctrlr *ctrlr;
1212 	bool ns_changed;
1213 
1214 	/* Make sure our poll group has memory for this subsystem allocated */
1215 	if (subsystem->id >= group->num_sgroups) {
1216 		return -ENOMEM;
1217 	}
1218 
1219 	sgroup = &group->sgroups[subsystem->id];
1220 
1221 	/* Make sure the array of namespace information is the correct size */
1222 	new_num_ns = subsystem->max_nsid;
1223 	old_num_ns = sgroup->num_ns;
1224 
1225 	ns_changed = false;
1226 
1227 	if (old_num_ns == 0) {
1228 		if (new_num_ns > 0) {
1229 			/* First allocation */
1230 			sgroup->ns_info = calloc(new_num_ns, sizeof(struct spdk_nvmf_subsystem_pg_ns_info));
1231 			if (!sgroup->ns_info) {
1232 				return -ENOMEM;
1233 			}
1234 		}
1235 	} else if (new_num_ns > old_num_ns) {
1236 		void *buf;
1237 
1238 		/* Make the array larger */
1239 		buf = realloc(sgroup->ns_info, new_num_ns * sizeof(struct spdk_nvmf_subsystem_pg_ns_info));
1240 		if (!buf) {
1241 			return -ENOMEM;
1242 		}
1243 
1244 		sgroup->ns_info = buf;
1245 
1246 		/* Null out the new namespace information slots */
1247 		for (i = old_num_ns; i < new_num_ns; i++) {
1248 			memset(&sgroup->ns_info[i], 0, sizeof(struct spdk_nvmf_subsystem_pg_ns_info));
1249 		}
1250 	} else if (new_num_ns < old_num_ns) {
1251 		void *buf;
1252 
1253 		/* Free the extra I/O channels */
1254 		for (i = new_num_ns; i < old_num_ns; i++) {
1255 			ns_info = &sgroup->ns_info[i];
1256 
1257 			if (ns_info->channel) {
1258 				spdk_put_io_channel(ns_info->channel);
1259 				ns_info->channel = NULL;
1260 			}
1261 		}
1262 
1263 		/* Make the array smaller */
1264 		if (new_num_ns > 0) {
1265 			buf = realloc(sgroup->ns_info, new_num_ns * sizeof(struct spdk_nvmf_subsystem_pg_ns_info));
1266 			if (!buf) {
1267 				return -ENOMEM;
1268 			}
1269 			sgroup->ns_info = buf;
1270 		} else {
1271 			free(sgroup->ns_info);
1272 			sgroup->ns_info = NULL;
1273 		}
1274 	}
1275 
1276 	sgroup->num_ns = new_num_ns;
1277 
1278 	/* Detect bdevs that were added or removed */
1279 	for (i = 0; i < sgroup->num_ns; i++) {
1280 		ns = subsystem->ns[i];
1281 		ns_info = &sgroup->ns_info[i];
1282 		ch = ns_info->channel;
1283 
1284 		if (ns == NULL && ch == NULL) {
1285 			/* Both NULL. Leave empty */
1286 		} else if (ns == NULL && ch != NULL) {
1287 			/* There was a channel here, but the namespace is gone. */
1288 			ns_changed = true;
1289 			spdk_put_io_channel(ch);
1290 			ns_info->channel = NULL;
1291 		} else if (ns != NULL && ch == NULL) {
1292 			/* A namespace appeared but there is no channel yet */
1293 			ns_changed = true;
1294 			ch = spdk_bdev_get_io_channel(ns->desc);
1295 			if (ch == NULL) {
1296 				SPDK_ERRLOG("Could not allocate I/O channel.\n");
1297 				return -ENOMEM;
1298 			}
1299 			ns_info->channel = ch;
1300 		} else if (spdk_uuid_compare(&ns_info->uuid, spdk_bdev_get_uuid(ns->bdev)) != 0) {
1301 			/* A namespace was here before, but was replaced by a new one. */
1302 			ns_changed = true;
1303 			spdk_put_io_channel(ns_info->channel);
1304 			memset(ns_info, 0, sizeof(*ns_info));
1305 
1306 			ch = spdk_bdev_get_io_channel(ns->desc);
1307 			if (ch == NULL) {
1308 				SPDK_ERRLOG("Could not allocate I/O channel.\n");
1309 				return -ENOMEM;
1310 			}
1311 			ns_info->channel = ch;
1312 		} else if (ns_info->num_blocks != spdk_bdev_get_num_blocks(ns->bdev)) {
1313 			/* Namespace is still there but size has changed */
1314 			SPDK_DEBUGLOG(nvmf, "Namespace resized: subsystem_id %u,"
1315 				      " nsid %u, pg %p, old %" PRIu64 ", new %" PRIu64 "\n",
1316 				      subsystem->id,
1317 				      ns->nsid,
1318 				      group,
1319 				      ns_info->num_blocks,
1320 				      spdk_bdev_get_num_blocks(ns->bdev));
1321 			ns_changed = true;
1322 		}
1323 
1324 		if (ns == NULL) {
1325 			memset(ns_info, 0, sizeof(*ns_info));
1326 		} else {
1327 			ns_info->uuid = *spdk_bdev_get_uuid(ns->bdev);
1328 			ns_info->num_blocks = spdk_bdev_get_num_blocks(ns->bdev);
1329 			ns_info->crkey = ns->crkey;
1330 			ns_info->rtype = ns->rtype;
1331 			if (ns->holder) {
1332 				ns_info->holder_id = ns->holder->hostid;
1333 			}
1334 
1335 			memset(&ns_info->reg_hostid, 0, SPDK_NVMF_MAX_NUM_REGISTRANTS * sizeof(struct spdk_uuid));
1336 			j = 0;
1337 			TAILQ_FOREACH_SAFE(reg, &ns->registrants, link, tmp) {
1338 				if (j >= SPDK_NVMF_MAX_NUM_REGISTRANTS) {
1339 					SPDK_ERRLOG("Maximum %u registrants can support.\n", SPDK_NVMF_MAX_NUM_REGISTRANTS);
1340 					return -EINVAL;
1341 				}
1342 				ns_info->reg_hostid[j++] = reg->hostid;
1343 			}
1344 		}
1345 	}
1346 
1347 	if (ns_changed) {
1348 		TAILQ_FOREACH(ctrlr, &subsystem->ctrlrs, link) {
1349 			/* It is possible that a ctrlr was added but the admin_qpair hasn't been
1350 			 * assigned yet.
1351 			 */
1352 			if (!ctrlr->admin_qpair) {
1353 				continue;
1354 			}
1355 			if (ctrlr->admin_qpair->group == group) {
1356 				nvmf_ctrlr_async_event_ns_notice(ctrlr);
1357 				nvmf_ctrlr_async_event_ana_change_notice(ctrlr);
1358 			}
1359 		}
1360 	}
1361 
1362 	return 0;
1363 }
1364 
1365 int
1366 nvmf_poll_group_update_subsystem(struct spdk_nvmf_poll_group *group,
1367 				 struct spdk_nvmf_subsystem *subsystem)
1368 {
1369 	return poll_group_update_subsystem(group, subsystem);
1370 }
1371 
1372 int
1373 nvmf_poll_group_add_subsystem(struct spdk_nvmf_poll_group *group,
1374 			      struct spdk_nvmf_subsystem *subsystem,
1375 			      spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg)
1376 {
1377 	int rc = 0;
1378 	struct spdk_nvmf_subsystem_poll_group *sgroup = &group->sgroups[subsystem->id];
1379 	uint32_t i;
1380 
1381 	TAILQ_INIT(&sgroup->queued);
1382 
1383 	rc = poll_group_update_subsystem(group, subsystem);
1384 	if (rc) {
1385 		nvmf_poll_group_remove_subsystem(group, subsystem, NULL, NULL);
1386 		goto fini;
1387 	}
1388 
1389 	sgroup->state = SPDK_NVMF_SUBSYSTEM_ACTIVE;
1390 
1391 	for (i = 0; i < sgroup->num_ns; i++) {
1392 		sgroup->ns_info[i].state = SPDK_NVMF_SUBSYSTEM_ACTIVE;
1393 	}
1394 
1395 fini:
1396 	if (cb_fn) {
1397 		cb_fn(cb_arg, rc);
1398 	}
1399 
1400 	return rc;
1401 }
1402 
1403 static void
1404 _nvmf_poll_group_remove_subsystem_cb(void *ctx, int status)
1405 {
1406 	struct nvmf_qpair_disconnect_many_ctx *qpair_ctx = ctx;
1407 	struct spdk_nvmf_subsystem *subsystem;
1408 	struct spdk_nvmf_poll_group *group;
1409 	struct spdk_nvmf_subsystem_poll_group *sgroup;
1410 	spdk_nvmf_poll_group_mod_done cpl_fn = NULL;
1411 	void *cpl_ctx = NULL;
1412 	uint32_t nsid;
1413 
1414 	group = qpair_ctx->group;
1415 	subsystem = qpair_ctx->subsystem;
1416 	cpl_fn = qpair_ctx->cpl_fn;
1417 	cpl_ctx = qpair_ctx->cpl_ctx;
1418 	sgroup = &group->sgroups[subsystem->id];
1419 
1420 	if (status) {
1421 		goto fini;
1422 	}
1423 
1424 	for (nsid = 0; nsid < sgroup->num_ns; nsid++) {
1425 		if (sgroup->ns_info[nsid].channel) {
1426 			spdk_put_io_channel(sgroup->ns_info[nsid].channel);
1427 			sgroup->ns_info[nsid].channel = NULL;
1428 		}
1429 	}
1430 
1431 	sgroup->num_ns = 0;
1432 	free(sgroup->ns_info);
1433 	sgroup->ns_info = NULL;
1434 fini:
1435 	free(qpair_ctx);
1436 	if (cpl_fn) {
1437 		cpl_fn(cpl_ctx, status);
1438 	}
1439 }
1440 
1441 static void nvmf_poll_group_remove_subsystem_msg(void *ctx);
1442 
1443 static void
1444 remove_subsystem_qpair_cb(void *ctx)
1445 {
1446 	struct nvmf_qpair_disconnect_many_ctx *qpair_ctx = ctx;
1447 
1448 	assert(qpair_ctx->count > 0);
1449 	qpair_ctx->count--;
1450 	if (qpair_ctx->count == 0) {
1451 		/* All of the asynchronous callbacks for this context have been
1452 		 * completed.  Call nvmf_poll_group_remove_subsystem_msg() again
1453 		 * to check if all associated qpairs for this subsystem have
1454 		 * been removed from the poll group.
1455 		 */
1456 		nvmf_poll_group_remove_subsystem_msg(ctx);
1457 	}
1458 }
1459 
1460 static void
1461 nvmf_poll_group_remove_subsystem_msg(void *ctx)
1462 {
1463 	struct spdk_nvmf_qpair *qpair, *qpair_tmp;
1464 	struct spdk_nvmf_subsystem *subsystem;
1465 	struct spdk_nvmf_poll_group *group;
1466 	struct nvmf_qpair_disconnect_many_ctx *qpair_ctx = ctx;
1467 	bool qpairs_found = false;
1468 	int rc = 0;
1469 
1470 	group = qpair_ctx->group;
1471 	subsystem = qpair_ctx->subsystem;
1472 
1473 	/* Initialize count to 1.  This acts like a ref count, to ensure that if spdk_nvmf_qpair_disconnect
1474 	 * immediately invokes the callback (i.e. the qpairs is already in process of being disconnected)
1475 	 * that we don't recursively call nvmf_poll_group_remove_subsystem_msg before we've iterated the
1476 	 * full list of qpairs.
1477 	 */
1478 	qpair_ctx->count = 1;
1479 	TAILQ_FOREACH_SAFE(qpair, &group->qpairs, link, qpair_tmp) {
1480 		if ((qpair->ctrlr != NULL) && (qpair->ctrlr->subsys == subsystem)) {
1481 			qpairs_found = true;
1482 			qpair_ctx->count++;
1483 			rc = spdk_nvmf_qpair_disconnect(qpair, remove_subsystem_qpair_cb, ctx);
1484 			if (rc) {
1485 				break;
1486 			}
1487 		}
1488 	}
1489 	qpair_ctx->count--;
1490 
1491 	if (!qpairs_found) {
1492 		_nvmf_poll_group_remove_subsystem_cb(ctx, 0);
1493 		return;
1494 	}
1495 
1496 	if (qpair_ctx->count == 0 || rc) {
1497 		/* If count == 0, it means there were some qpairs in the poll group but they
1498 		 * were already in process of being disconnected.  So we send a message to this
1499 		 * same thread so that this function executes again later.  We won't actually
1500 		 * invoke the remove_subsystem_cb until all of the qpairs are actually removed
1501 		 * from the poll group.
1502 		 */
1503 		spdk_thread_send_msg(spdk_get_thread(), nvmf_poll_group_remove_subsystem_msg, ctx);
1504 	}
1505 }
1506 
1507 void
1508 nvmf_poll_group_remove_subsystem(struct spdk_nvmf_poll_group *group,
1509 				 struct spdk_nvmf_subsystem *subsystem,
1510 				 spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg)
1511 {
1512 	struct spdk_nvmf_subsystem_poll_group *sgroup;
1513 	struct nvmf_qpair_disconnect_many_ctx *ctx;
1514 	uint32_t i;
1515 
1516 	ctx = calloc(1, sizeof(struct nvmf_qpair_disconnect_many_ctx));
1517 	if (!ctx) {
1518 		SPDK_ERRLOG("Unable to allocate memory for context to remove poll subsystem\n");
1519 		if (cb_fn) {
1520 			cb_fn(cb_arg, -1);
1521 		}
1522 		return;
1523 	}
1524 
1525 	ctx->group = group;
1526 	ctx->subsystem = subsystem;
1527 	ctx->cpl_fn = cb_fn;
1528 	ctx->cpl_ctx = cb_arg;
1529 
1530 	sgroup = &group->sgroups[subsystem->id];
1531 	sgroup->state = SPDK_NVMF_SUBSYSTEM_INACTIVE;
1532 
1533 	for (i = 0; i < sgroup->num_ns; i++) {
1534 		sgroup->ns_info[i].state = SPDK_NVMF_SUBSYSTEM_INACTIVE;
1535 	}
1536 
1537 	nvmf_poll_group_remove_subsystem_msg(ctx);
1538 }
1539 
1540 void
1541 nvmf_poll_group_pause_subsystem(struct spdk_nvmf_poll_group *group,
1542 				struct spdk_nvmf_subsystem *subsystem,
1543 				uint32_t nsid,
1544 				spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg)
1545 {
1546 	struct spdk_nvmf_subsystem_poll_group *sgroup;
1547 	struct spdk_nvmf_subsystem_pg_ns_info *ns_info = NULL;
1548 	int rc = 0;
1549 
1550 	if (subsystem->id >= group->num_sgroups) {
1551 		rc = -1;
1552 		goto fini;
1553 	}
1554 
1555 	sgroup = &group->sgroups[subsystem->id];
1556 	if (sgroup->state == SPDK_NVMF_SUBSYSTEM_PAUSED) {
1557 		goto fini;
1558 	}
1559 	sgroup->state = SPDK_NVMF_SUBSYSTEM_PAUSING;
1560 
1561 	/* NOTE: This implicitly also checks for 0, since 0 - 1 wraps around to UINT32_MAX. */
1562 	if (nsid - 1 < sgroup->num_ns) {
1563 		ns_info  = &sgroup->ns_info[nsid - 1];
1564 		ns_info->state = SPDK_NVMF_SUBSYSTEM_PAUSING;
1565 	}
1566 
1567 	if (sgroup->mgmt_io_outstanding > 0) {
1568 		assert(sgroup->cb_fn == NULL);
1569 		sgroup->cb_fn = cb_fn;
1570 		assert(sgroup->cb_arg == NULL);
1571 		sgroup->cb_arg = cb_arg;
1572 		return;
1573 	}
1574 
1575 	if (ns_info != NULL && ns_info->io_outstanding > 0) {
1576 		assert(sgroup->cb_fn == NULL);
1577 		sgroup->cb_fn = cb_fn;
1578 		assert(sgroup->cb_arg == NULL);
1579 		sgroup->cb_arg = cb_arg;
1580 		return;
1581 	}
1582 
1583 	assert(sgroup->mgmt_io_outstanding == 0);
1584 	sgroup->state = SPDK_NVMF_SUBSYSTEM_PAUSED;
1585 fini:
1586 	if (cb_fn) {
1587 		cb_fn(cb_arg, rc);
1588 	}
1589 }
1590 
1591 void
1592 nvmf_poll_group_resume_subsystem(struct spdk_nvmf_poll_group *group,
1593 				 struct spdk_nvmf_subsystem *subsystem,
1594 				 spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg)
1595 {
1596 	struct spdk_nvmf_request *req, *tmp;
1597 	struct spdk_nvmf_subsystem_poll_group *sgroup;
1598 	int rc = 0;
1599 	uint32_t i;
1600 
1601 	if (subsystem->id >= group->num_sgroups) {
1602 		rc = -1;
1603 		goto fini;
1604 	}
1605 
1606 	sgroup = &group->sgroups[subsystem->id];
1607 
1608 	if (sgroup->state == SPDK_NVMF_SUBSYSTEM_ACTIVE) {
1609 		goto fini;
1610 	}
1611 
1612 	rc = poll_group_update_subsystem(group, subsystem);
1613 	if (rc) {
1614 		goto fini;
1615 	}
1616 
1617 	for (i = 0; i < sgroup->num_ns; i++) {
1618 		sgroup->ns_info[i].state = SPDK_NVMF_SUBSYSTEM_ACTIVE;
1619 	}
1620 
1621 	sgroup->state = SPDK_NVMF_SUBSYSTEM_ACTIVE;
1622 
1623 	/* Release all queued requests */
1624 	TAILQ_FOREACH_SAFE(req, &sgroup->queued, link, tmp) {
1625 		TAILQ_REMOVE(&sgroup->queued, req, link);
1626 		assert(req->zcopy_phase == NVMF_ZCOPY_PHASE_NONE);
1627 		spdk_nvmf_request_exec(req);
1628 	}
1629 fini:
1630 	if (cb_fn) {
1631 		cb_fn(cb_arg, rc);
1632 	}
1633 }
1634 
1635 
1636 struct spdk_nvmf_poll_group *
1637 spdk_nvmf_get_optimal_poll_group(struct spdk_nvmf_qpair *qpair)
1638 {
1639 	struct spdk_nvmf_transport_poll_group *tgroup;
1640 
1641 	tgroup = nvmf_transport_get_optimal_poll_group(qpair->transport, qpair);
1642 
1643 	if (tgroup == NULL) {
1644 		return NULL;
1645 	}
1646 
1647 	return tgroup->group;
1648 }
1649 
1650 void
1651 spdk_nvmf_poll_group_dump_stat(struct spdk_nvmf_poll_group *group, struct spdk_json_write_ctx *w)
1652 {
1653 	struct spdk_nvmf_transport_poll_group *tgroup;
1654 
1655 	spdk_json_write_object_begin(w);
1656 
1657 	spdk_json_write_named_string(w, "name", spdk_thread_get_name(spdk_get_thread()));
1658 	spdk_json_write_named_uint32(w, "admin_qpairs", group->stat.admin_qpairs);
1659 	spdk_json_write_named_uint32(w, "io_qpairs", group->stat.io_qpairs);
1660 	spdk_json_write_named_uint32(w, "current_admin_qpairs", group->stat.current_admin_qpairs);
1661 	spdk_json_write_named_uint32(w, "current_io_qpairs", group->stat.current_io_qpairs);
1662 	spdk_json_write_named_uint64(w, "pending_bdev_io", group->stat.pending_bdev_io);
1663 
1664 	spdk_json_write_named_array_begin(w, "transports");
1665 
1666 	TAILQ_FOREACH(tgroup, &group->tgroups, link) {
1667 		spdk_json_write_object_begin(w);
1668 		/*
1669 		 * The trtype field intentionally contains a transport name as this is more informative.
1670 		 * The field has not been renamed for backward compatibility.
1671 		 */
1672 		spdk_json_write_named_string(w, "trtype", spdk_nvmf_get_transport_name(tgroup->transport));
1673 
1674 		if (tgroup->transport->ops->poll_group_dump_stat) {
1675 			tgroup->transport->ops->poll_group_dump_stat(tgroup, w);
1676 		}
1677 
1678 		spdk_json_write_object_end(w);
1679 	}
1680 
1681 	spdk_json_write_array_end(w);
1682 	spdk_json_write_object_end(w);
1683 }
1684