xref: /spdk/lib/nvmf/nvmf.c (revision 6d2caa652b778f85d1c3386310b95ed93527245d)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation. All rights reserved.
5  *   Copyright (c) 2018-2019, 2021 Mellanox Technologies LTD. All rights reserved.
6  *   Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
7  *
8  *   Redistribution and use in source and binary forms, with or without
9  *   modification, are permitted provided that the following conditions
10  *   are met:
11  *
12  *     * Redistributions of source code must retain the above copyright
13  *       notice, this list of conditions and the following disclaimer.
14  *     * Redistributions in binary form must reproduce the above copyright
15  *       notice, this list of conditions and the following disclaimer in
16  *       the documentation and/or other materials provided with the
17  *       distribution.
18  *     * Neither the name of Intel Corporation nor the names of its
19  *       contributors may be used to endorse or promote products derived
20  *       from this software without specific prior written permission.
21  *
22  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 #include "spdk/stdinc.h"
36 
37 #include "spdk/bdev.h"
38 #include "spdk/bit_array.h"
39 #include "spdk/thread.h"
40 #include "spdk/nvmf.h"
41 #include "spdk/endian.h"
42 #include "spdk/string.h"
43 #include "spdk/log.h"
44 #include "spdk_internal/usdt.h"
45 
46 #include "nvmf_internal.h"
47 #include "transport.h"
48 
49 SPDK_LOG_REGISTER_COMPONENT(nvmf)
50 
51 #define SPDK_NVMF_DEFAULT_MAX_SUBSYSTEMS 1024
52 #define SPDK_NVMF_DEFAULT_ACCEPT_POLL_RATE_US 10000
53 
54 static TAILQ_HEAD(, spdk_nvmf_tgt) g_nvmf_tgts = TAILQ_HEAD_INITIALIZER(g_nvmf_tgts);
55 
56 typedef void (*nvmf_qpair_disconnect_cpl)(void *ctx, int status);
57 static void nvmf_tgt_destroy_poll_group(void *io_device, void *ctx_buf);
58 
59 /* supplied to a single call to nvmf_qpair_disconnect */
60 struct nvmf_qpair_disconnect_ctx {
61 	struct spdk_nvmf_qpair *qpair;
62 	struct spdk_nvmf_ctrlr *ctrlr;
63 	nvmf_qpair_disconnect_cb cb_fn;
64 	struct spdk_thread *thread;
65 	void *ctx;
66 	uint16_t qid;
67 };
68 
69 /*
70  * There are several times when we need to iterate through the list of all qpairs and selectively delete them.
71  * In order to do this sequentially without overlap, we must provide a context to recover the next qpair from
72  * to enable calling nvmf_qpair_disconnect on the next desired qpair.
73  */
74 struct nvmf_qpair_disconnect_many_ctx {
75 	struct spdk_nvmf_subsystem *subsystem;
76 	struct spdk_nvmf_poll_group *group;
77 	spdk_nvmf_poll_group_mod_done cpl_fn;
78 	void *cpl_ctx;
79 	uint32_t count;
80 };
81 
82 static void
83 nvmf_qpair_set_state(struct spdk_nvmf_qpair *qpair,
84 		     enum spdk_nvmf_qpair_state state)
85 {
86 	assert(qpair != NULL);
87 	assert(qpair->group->thread == spdk_get_thread());
88 
89 	qpair->state = state;
90 }
91 
92 static int
93 nvmf_poll_group_poll(void *ctx)
94 {
95 	struct spdk_nvmf_poll_group *group = ctx;
96 	int rc;
97 	int count = 0;
98 	struct spdk_nvmf_transport_poll_group *tgroup;
99 
100 	TAILQ_FOREACH(tgroup, &group->tgroups, link) {
101 		rc = nvmf_transport_poll_group_poll(tgroup);
102 		if (rc < 0) {
103 			return SPDK_POLLER_BUSY;
104 		}
105 		count += rc;
106 	}
107 
108 	return count > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE;
109 }
110 
111 static int
112 nvmf_tgt_create_poll_group(void *io_device, void *ctx_buf)
113 {
114 	struct spdk_nvmf_tgt *tgt = io_device;
115 	struct spdk_nvmf_poll_group *group = ctx_buf;
116 	struct spdk_nvmf_transport *transport;
117 	struct spdk_thread *thread = spdk_get_thread();
118 	uint32_t sid;
119 	int rc;
120 
121 	SPDK_DTRACE_PROBE1(nvmf_create_poll_group, spdk_thread_get_id(thread));
122 
123 	TAILQ_INIT(&group->tgroups);
124 	TAILQ_INIT(&group->qpairs);
125 
126 	TAILQ_FOREACH(transport, &tgt->transports, link) {
127 		rc = nvmf_poll_group_add_transport(group, transport);
128 		if (rc != 0) {
129 			return rc;
130 		}
131 	}
132 
133 	group->num_sgroups = tgt->max_subsystems;
134 	group->sgroups = calloc(tgt->max_subsystems, sizeof(struct spdk_nvmf_subsystem_poll_group));
135 	if (!group->sgroups) {
136 		return -ENOMEM;
137 	}
138 
139 	for (sid = 0; sid < tgt->max_subsystems; sid++) {
140 		struct spdk_nvmf_subsystem *subsystem;
141 
142 		subsystem = tgt->subsystems[sid];
143 		if (!subsystem) {
144 			continue;
145 		}
146 
147 		if (nvmf_poll_group_add_subsystem(group, subsystem, NULL, NULL) != 0) {
148 			nvmf_tgt_destroy_poll_group(io_device, ctx_buf);
149 			return -1;
150 		}
151 	}
152 
153 	pthread_mutex_lock(&tgt->mutex);
154 	TAILQ_INSERT_TAIL(&tgt->poll_groups, group, link);
155 	pthread_mutex_unlock(&tgt->mutex);
156 
157 	group->poller = SPDK_POLLER_REGISTER(nvmf_poll_group_poll, group, 0);
158 	group->thread = thread;
159 
160 	return 0;
161 }
162 
163 static void
164 nvmf_tgt_destroy_poll_group(void *io_device, void *ctx_buf)
165 {
166 	struct spdk_nvmf_tgt *tgt = io_device;
167 	struct spdk_nvmf_poll_group *group = ctx_buf;
168 	struct spdk_nvmf_transport_poll_group *tgroup, *tmp;
169 	struct spdk_nvmf_subsystem_poll_group *sgroup;
170 	uint32_t sid, nsid;
171 
172 	SPDK_DTRACE_PROBE1(nvmf_destroy_poll_group, spdk_thread_get_id(group->thread));
173 
174 	pthread_mutex_lock(&tgt->mutex);
175 	TAILQ_REMOVE(&tgt->poll_groups, group, link);
176 	pthread_mutex_unlock(&tgt->mutex);
177 
178 	TAILQ_FOREACH_SAFE(tgroup, &group->tgroups, link, tmp) {
179 		TAILQ_REMOVE(&group->tgroups, tgroup, link);
180 		nvmf_transport_poll_group_destroy(tgroup);
181 	}
182 
183 	for (sid = 0; sid < group->num_sgroups; sid++) {
184 		sgroup = &group->sgroups[sid];
185 
186 		for (nsid = 0; nsid < sgroup->num_ns; nsid++) {
187 			if (sgroup->ns_info[nsid].channel) {
188 				spdk_put_io_channel(sgroup->ns_info[nsid].channel);
189 				sgroup->ns_info[nsid].channel = NULL;
190 			}
191 		}
192 
193 		free(sgroup->ns_info);
194 	}
195 
196 	free(group->sgroups);
197 
198 	spdk_poller_unregister(&group->poller);
199 
200 	if (group->destroy_cb_fn) {
201 		group->destroy_cb_fn(group->destroy_cb_arg, 0);
202 	}
203 }
204 
205 static void
206 _nvmf_tgt_disconnect_next_qpair(void *ctx)
207 {
208 	struct spdk_nvmf_qpair *qpair;
209 	struct nvmf_qpair_disconnect_many_ctx *qpair_ctx = ctx;
210 	struct spdk_nvmf_poll_group *group = qpair_ctx->group;
211 	struct spdk_io_channel *ch;
212 	int rc = 0;
213 
214 	qpair = TAILQ_FIRST(&group->qpairs);
215 
216 	if (qpair) {
217 		rc = spdk_nvmf_qpair_disconnect(qpair, _nvmf_tgt_disconnect_next_qpair, ctx);
218 	}
219 
220 	if (!qpair || rc != 0) {
221 		/* When the refcount from the channels reaches 0, nvmf_tgt_destroy_poll_group will be called. */
222 		ch = spdk_io_channel_from_ctx(group);
223 		spdk_put_io_channel(ch);
224 		free(qpair_ctx);
225 	}
226 }
227 
228 static void
229 nvmf_tgt_destroy_poll_group_qpairs(struct spdk_nvmf_poll_group *group)
230 {
231 	struct nvmf_qpair_disconnect_many_ctx *ctx;
232 
233 	SPDK_DTRACE_PROBE1(nvmf_destroy_poll_group_qpairs, spdk_thread_get_id(group->thread));
234 
235 	ctx = calloc(1, sizeof(struct nvmf_qpair_disconnect_many_ctx));
236 	if (!ctx) {
237 		SPDK_ERRLOG("Failed to allocate memory for destroy poll group ctx\n");
238 		return;
239 	}
240 
241 	ctx->group = group;
242 	_nvmf_tgt_disconnect_next_qpair(ctx);
243 }
244 
245 static int
246 nvmf_tgt_accept(void *ctx)
247 {
248 	struct spdk_nvmf_tgt *tgt = ctx;
249 	struct spdk_nvmf_transport *transport, *tmp;
250 	int count = 0;
251 
252 	TAILQ_FOREACH_SAFE(transport, &tgt->transports, link, tmp) {
253 		count += nvmf_transport_accept(transport);
254 	}
255 
256 	return count > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE;
257 }
258 
259 struct spdk_nvmf_tgt *
260 spdk_nvmf_tgt_create(struct spdk_nvmf_target_opts *opts)
261 {
262 	struct spdk_nvmf_tgt *tgt, *tmp_tgt;
263 	uint32_t acceptor_poll_rate;
264 
265 	if (strnlen(opts->name, NVMF_TGT_NAME_MAX_LENGTH) == NVMF_TGT_NAME_MAX_LENGTH) {
266 		SPDK_ERRLOG("Provided target name exceeds the max length of %u.\n", NVMF_TGT_NAME_MAX_LENGTH);
267 		return NULL;
268 	}
269 
270 	TAILQ_FOREACH(tmp_tgt, &g_nvmf_tgts, link) {
271 		if (!strncmp(opts->name, tmp_tgt->name, NVMF_TGT_NAME_MAX_LENGTH)) {
272 			SPDK_ERRLOG("Provided target name must be unique.\n");
273 			return NULL;
274 		}
275 	}
276 
277 	tgt = calloc(1, sizeof(*tgt));
278 	if (!tgt) {
279 		return NULL;
280 	}
281 
282 	snprintf(tgt->name, NVMF_TGT_NAME_MAX_LENGTH, "%s", opts->name);
283 
284 	if (!opts || !opts->max_subsystems) {
285 		tgt->max_subsystems = SPDK_NVMF_DEFAULT_MAX_SUBSYSTEMS;
286 	} else {
287 		tgt->max_subsystems = opts->max_subsystems;
288 	}
289 
290 	if (!opts || !opts->acceptor_poll_rate) {
291 		acceptor_poll_rate = SPDK_NVMF_DEFAULT_ACCEPT_POLL_RATE_US;
292 	} else {
293 		acceptor_poll_rate = opts->acceptor_poll_rate;
294 	}
295 
296 	if (!opts) {
297 		tgt->crdt[0] = 0;
298 		tgt->crdt[1] = 0;
299 		tgt->crdt[2] = 0;
300 	} else {
301 		tgt->crdt[0] = opts->crdt[0];
302 		tgt->crdt[1] = opts->crdt[1];
303 		tgt->crdt[2] = opts->crdt[2];
304 	}
305 
306 	tgt->discovery_genctr = 0;
307 	TAILQ_INIT(&tgt->transports);
308 	TAILQ_INIT(&tgt->poll_groups);
309 
310 	tgt->subsystems = calloc(tgt->max_subsystems, sizeof(struct spdk_nvmf_subsystem *));
311 	if (!tgt->subsystems) {
312 		free(tgt);
313 		return NULL;
314 	}
315 
316 	pthread_mutex_init(&tgt->mutex, NULL);
317 
318 	tgt->accept_poller = SPDK_POLLER_REGISTER(nvmf_tgt_accept, tgt, acceptor_poll_rate);
319 	if (!tgt->accept_poller) {
320 		pthread_mutex_destroy(&tgt->mutex);
321 		free(tgt->subsystems);
322 		free(tgt);
323 		return NULL;
324 	}
325 
326 	spdk_io_device_register(tgt,
327 				nvmf_tgt_create_poll_group,
328 				nvmf_tgt_destroy_poll_group,
329 				sizeof(struct spdk_nvmf_poll_group),
330 				tgt->name);
331 
332 	TAILQ_INSERT_HEAD(&g_nvmf_tgts, tgt, link);
333 
334 	return tgt;
335 }
336 
337 static void
338 _nvmf_tgt_destroy_next_transport(void *ctx)
339 {
340 	struct spdk_nvmf_tgt *tgt = ctx;
341 	struct spdk_nvmf_transport *transport;
342 
343 	if (!TAILQ_EMPTY(&tgt->transports)) {
344 		transport = TAILQ_FIRST(&tgt->transports);
345 		TAILQ_REMOVE(&tgt->transports, transport, link);
346 		spdk_nvmf_transport_destroy(transport, _nvmf_tgt_destroy_next_transport, tgt);
347 	} else {
348 		spdk_nvmf_tgt_destroy_done_fn *destroy_cb_fn = tgt->destroy_cb_fn;
349 		void *destroy_cb_arg = tgt->destroy_cb_arg;
350 
351 		pthread_mutex_destroy(&tgt->mutex);
352 		free(tgt);
353 
354 		if (destroy_cb_fn) {
355 			destroy_cb_fn(destroy_cb_arg, 0);
356 		}
357 	}
358 }
359 
360 static void
361 nvmf_tgt_destroy_cb(void *io_device)
362 {
363 	struct spdk_nvmf_tgt *tgt = io_device;
364 	uint32_t i;
365 	int rc;
366 
367 	if (tgt->subsystems) {
368 		for (i = 0; i < tgt->max_subsystems; i++) {
369 			if (tgt->subsystems[i]) {
370 				nvmf_subsystem_remove_all_listeners(tgt->subsystems[i], true);
371 
372 				rc = spdk_nvmf_subsystem_destroy(tgt->subsystems[i], nvmf_tgt_destroy_cb, tgt);
373 				if (rc) {
374 					if (rc == -EINPROGRESS) {
375 						/* If rc is -EINPROGRESS, nvmf_tgt_destroy_cb will be called again when subsystem #i
376 						 * is destroyed, nvmf_tgt_destroy_cb will continue to destroy other subsystems if any */
377 						return;
378 					} else {
379 						SPDK_ERRLOG("Failed to destroy subsystem, id %u, rc %d\n", tgt->subsystems[i]->id, rc);
380 						assert(0);
381 					}
382 				}
383 			}
384 		}
385 		free(tgt->subsystems);
386 	}
387 
388 	_nvmf_tgt_destroy_next_transport(tgt);
389 }
390 
391 void
392 spdk_nvmf_tgt_destroy(struct spdk_nvmf_tgt *tgt,
393 		      spdk_nvmf_tgt_destroy_done_fn cb_fn,
394 		      void *cb_arg)
395 {
396 	tgt->destroy_cb_fn = cb_fn;
397 	tgt->destroy_cb_arg = cb_arg;
398 
399 	spdk_poller_unregister(&tgt->accept_poller);
400 
401 	TAILQ_REMOVE(&g_nvmf_tgts, tgt, link);
402 
403 	spdk_io_device_unregister(tgt, nvmf_tgt_destroy_cb);
404 }
405 
406 const char *
407 spdk_nvmf_tgt_get_name(struct spdk_nvmf_tgt *tgt)
408 {
409 	return tgt->name;
410 }
411 
412 struct spdk_nvmf_tgt *
413 spdk_nvmf_get_tgt(const char *name)
414 {
415 	struct spdk_nvmf_tgt *tgt;
416 	uint32_t num_targets = 0;
417 
418 	TAILQ_FOREACH(tgt, &g_nvmf_tgts, link) {
419 		if (name) {
420 			if (!strncmp(tgt->name, name, NVMF_TGT_NAME_MAX_LENGTH)) {
421 				return tgt;
422 			}
423 		}
424 		num_targets++;
425 	}
426 
427 	/*
428 	 * special case. If there is only one target and
429 	 * no name was specified, return the only available
430 	 * target. If there is more than one target, name must
431 	 * be specified.
432 	 */
433 	if (!name && num_targets == 1) {
434 		return TAILQ_FIRST(&g_nvmf_tgts);
435 	}
436 
437 	return NULL;
438 }
439 
440 struct spdk_nvmf_tgt *
441 spdk_nvmf_get_first_tgt(void)
442 {
443 	return TAILQ_FIRST(&g_nvmf_tgts);
444 }
445 
446 struct spdk_nvmf_tgt *
447 spdk_nvmf_get_next_tgt(struct spdk_nvmf_tgt *prev)
448 {
449 	return TAILQ_NEXT(prev, link);
450 }
451 
452 static void
453 nvmf_write_subsystem_config_json(struct spdk_json_write_ctx *w,
454 				 struct spdk_nvmf_subsystem *subsystem)
455 {
456 	struct spdk_nvmf_host *host;
457 	struct spdk_nvmf_subsystem_listener *listener;
458 	const struct spdk_nvme_transport_id *trid;
459 	struct spdk_nvmf_ns *ns;
460 	struct spdk_nvmf_ns_opts ns_opts;
461 	uint32_t max_namespaces;
462 	char uuid_str[SPDK_UUID_STRING_LEN];
463 
464 	if (spdk_nvmf_subsystem_get_type(subsystem) != SPDK_NVMF_SUBTYPE_NVME) {
465 		return;
466 	}
467 
468 	/* { */
469 	spdk_json_write_object_begin(w);
470 	spdk_json_write_named_string(w, "method", "nvmf_create_subsystem");
471 
472 	/*     "params" : { */
473 	spdk_json_write_named_object_begin(w, "params");
474 	spdk_json_write_named_string(w, "nqn", spdk_nvmf_subsystem_get_nqn(subsystem));
475 	spdk_json_write_named_bool(w, "allow_any_host", spdk_nvmf_subsystem_get_allow_any_host(subsystem));
476 	spdk_json_write_named_string(w, "serial_number", spdk_nvmf_subsystem_get_sn(subsystem));
477 	spdk_json_write_named_string(w, "model_number", spdk_nvmf_subsystem_get_mn(subsystem));
478 
479 	max_namespaces = spdk_nvmf_subsystem_get_max_namespaces(subsystem);
480 	if (max_namespaces != 0) {
481 		spdk_json_write_named_uint32(w, "max_namespaces", max_namespaces);
482 	}
483 
484 	spdk_json_write_named_uint32(w, "min_cntlid", spdk_nvmf_subsystem_get_min_cntlid(subsystem));
485 	spdk_json_write_named_uint32(w, "max_cntlid", spdk_nvmf_subsystem_get_max_cntlid(subsystem));
486 	spdk_json_write_named_bool(w, "ana_reporting", nvmf_subsystem_get_ana_reporting(subsystem));
487 
488 	/*     } "params" */
489 	spdk_json_write_object_end(w);
490 
491 	/* } */
492 	spdk_json_write_object_end(w);
493 
494 	for (listener = spdk_nvmf_subsystem_get_first_listener(subsystem); listener != NULL;
495 	     listener = spdk_nvmf_subsystem_get_next_listener(subsystem, listener)) {
496 		trid = spdk_nvmf_subsystem_listener_get_trid(listener);
497 
498 		spdk_json_write_object_begin(w);
499 		spdk_json_write_named_string(w, "method", "nvmf_subsystem_add_listener");
500 
501 		/*     "params" : { */
502 		spdk_json_write_named_object_begin(w, "params");
503 
504 		spdk_json_write_named_string(w, "nqn", spdk_nvmf_subsystem_get_nqn(subsystem));
505 		nvmf_transport_listen_dump_opts(listener->transport, trid, w);
506 
507 		/*     } "params" */
508 		spdk_json_write_object_end(w);
509 
510 		/* } */
511 		spdk_json_write_object_end(w);
512 	}
513 
514 	for (host = spdk_nvmf_subsystem_get_first_host(subsystem); host != NULL;
515 	     host = spdk_nvmf_subsystem_get_next_host(subsystem, host)) {
516 
517 		spdk_json_write_object_begin(w);
518 		spdk_json_write_named_string(w, "method", "nvmf_subsystem_add_host");
519 
520 		/*     "params" : { */
521 		spdk_json_write_named_object_begin(w, "params");
522 
523 		spdk_json_write_named_string(w, "nqn", spdk_nvmf_subsystem_get_nqn(subsystem));
524 		spdk_json_write_named_string(w, "host", spdk_nvmf_host_get_nqn(host));
525 
526 		/*     } "params" */
527 		spdk_json_write_object_end(w);
528 
529 		/* } */
530 		spdk_json_write_object_end(w);
531 	}
532 
533 	for (ns = spdk_nvmf_subsystem_get_first_ns(subsystem); ns != NULL;
534 	     ns = spdk_nvmf_subsystem_get_next_ns(subsystem, ns)) {
535 		spdk_nvmf_ns_get_opts(ns, &ns_opts, sizeof(ns_opts));
536 
537 		spdk_json_write_object_begin(w);
538 		spdk_json_write_named_string(w, "method", "nvmf_subsystem_add_ns");
539 
540 		/*     "params" : { */
541 		spdk_json_write_named_object_begin(w, "params");
542 
543 		spdk_json_write_named_string(w, "nqn", spdk_nvmf_subsystem_get_nqn(subsystem));
544 
545 		/*     "namespace" : { */
546 		spdk_json_write_named_object_begin(w, "namespace");
547 
548 		spdk_json_write_named_uint32(w, "nsid", spdk_nvmf_ns_get_id(ns));
549 		spdk_json_write_named_string(w, "bdev_name", spdk_bdev_get_name(spdk_nvmf_ns_get_bdev(ns)));
550 
551 		if (!spdk_mem_all_zero(ns_opts.nguid, sizeof(ns_opts.nguid))) {
552 			SPDK_STATIC_ASSERT(sizeof(ns_opts.nguid) == sizeof(uint64_t) * 2, "size mismatch");
553 			spdk_json_write_named_string_fmt(w, "nguid", "%016"PRIX64"%016"PRIX64, from_be64(&ns_opts.nguid[0]),
554 							 from_be64(&ns_opts.nguid[8]));
555 		}
556 
557 		if (!spdk_mem_all_zero(ns_opts.eui64, sizeof(ns_opts.eui64))) {
558 			SPDK_STATIC_ASSERT(sizeof(ns_opts.eui64) == sizeof(uint64_t), "size mismatch");
559 			spdk_json_write_named_string_fmt(w, "eui64", "%016"PRIX64, from_be64(&ns_opts.eui64));
560 		}
561 
562 		if (!spdk_mem_all_zero(&ns_opts.uuid, sizeof(ns_opts.uuid))) {
563 			spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &ns_opts.uuid);
564 			spdk_json_write_named_string(w, "uuid",  uuid_str);
565 		}
566 
567 		if (nvmf_subsystem_get_ana_reporting(subsystem)) {
568 			spdk_json_write_named_uint32(w, "anagrpid", ns_opts.anagrpid);
569 		}
570 
571 		/*     "namespace" */
572 		spdk_json_write_object_end(w);
573 
574 		/*     } "params" */
575 		spdk_json_write_object_end(w);
576 
577 		/* } */
578 		spdk_json_write_object_end(w);
579 	}
580 }
581 
582 void
583 spdk_nvmf_tgt_write_config_json(struct spdk_json_write_ctx *w, struct spdk_nvmf_tgt *tgt)
584 {
585 	struct spdk_nvmf_subsystem *subsystem;
586 	struct spdk_nvmf_transport *transport;
587 
588 	spdk_json_write_object_begin(w);
589 	spdk_json_write_named_string(w, "method", "nvmf_set_max_subsystems");
590 
591 	spdk_json_write_named_object_begin(w, "params");
592 	spdk_json_write_named_uint32(w, "max_subsystems", tgt->max_subsystems);
593 	spdk_json_write_object_end(w);
594 
595 	spdk_json_write_object_end(w);
596 
597 	spdk_json_write_object_begin(w);
598 	spdk_json_write_named_string(w, "method", "nvmf_set_crdt");
599 	spdk_json_write_named_object_begin(w, "params");
600 	spdk_json_write_named_uint32(w, "crdt1", tgt->crdt[0]);
601 	spdk_json_write_named_uint32(w, "crdt2", tgt->crdt[1]);
602 	spdk_json_write_named_uint32(w, "crdt3", tgt->crdt[2]);
603 	spdk_json_write_object_end(w);
604 	spdk_json_write_object_end(w);
605 
606 	/* write transports */
607 	TAILQ_FOREACH(transport, &tgt->transports, link) {
608 		spdk_json_write_object_begin(w);
609 		spdk_json_write_named_string(w, "method", "nvmf_create_transport");
610 		nvmf_transport_dump_opts(transport, w, true);
611 		spdk_json_write_object_end(w);
612 	}
613 
614 	subsystem = spdk_nvmf_subsystem_get_first(tgt);
615 	while (subsystem) {
616 		nvmf_write_subsystem_config_json(w, subsystem);
617 		subsystem = spdk_nvmf_subsystem_get_next(subsystem);
618 	}
619 }
620 
621 static void
622 nvmf_listen_opts_copy(struct spdk_nvmf_listen_opts *opts,
623 		      const struct spdk_nvmf_listen_opts *opts_src, size_t opts_size)
624 {
625 	assert(opts);
626 	assert(opts_src);
627 
628 	opts->opts_size = opts_size;
629 
630 #define SET_FIELD(field) \
631     if (offsetof(struct spdk_nvmf_listen_opts, field) + sizeof(opts->field) <= opts_size) { \
632                  opts->field = opts_src->field; \
633     } \
634 
635 	SET_FIELD(transport_specific);
636 #undef SET_FIELD
637 
638 	/* Do not remove this statement, you should always update this statement when you adding a new field,
639 	 * and do not forget to add the SET_FIELD statement for your added field. */
640 	SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_listen_opts) == 16, "Incorrect size");
641 }
642 
643 void
644 spdk_nvmf_listen_opts_init(struct spdk_nvmf_listen_opts *opts, size_t opts_size)
645 {
646 	struct spdk_nvmf_listen_opts opts_local = {};
647 
648 	/* local version of opts should have defaults set here */
649 
650 	nvmf_listen_opts_copy(opts, &opts_local, opts_size);
651 }
652 
653 int
654 spdk_nvmf_tgt_listen_ext(struct spdk_nvmf_tgt *tgt, const struct spdk_nvme_transport_id *trid,
655 			 struct spdk_nvmf_listen_opts *opts)
656 {
657 	struct spdk_nvmf_transport *transport;
658 	int rc;
659 	struct spdk_nvmf_listen_opts opts_local = {};
660 
661 	if (!opts) {
662 		SPDK_ERRLOG("opts should not be NULL\n");
663 		return -EINVAL;
664 	}
665 
666 	if (!opts->opts_size) {
667 		SPDK_ERRLOG("The opts_size in opts structure should not be zero\n");
668 		return -EINVAL;
669 	}
670 
671 	transport = spdk_nvmf_tgt_get_transport(tgt, trid->trstring);
672 	if (!transport) {
673 		SPDK_ERRLOG("Unable to find %s transport. The transport must be created first also make sure it is properly registered.\n",
674 			    trid->trstring);
675 		return -EINVAL;
676 	}
677 
678 	nvmf_listen_opts_copy(&opts_local, opts, opts->opts_size);
679 	rc = spdk_nvmf_transport_listen(transport, trid, &opts_local);
680 	if (rc < 0) {
681 		SPDK_ERRLOG("Unable to listen on address '%s'\n", trid->traddr);
682 	}
683 
684 	return rc;
685 }
686 
687 int
688 spdk_nvmf_tgt_stop_listen(struct spdk_nvmf_tgt *tgt,
689 			  struct spdk_nvme_transport_id *trid)
690 {
691 	struct spdk_nvmf_transport *transport;
692 	int rc;
693 
694 	transport = spdk_nvmf_tgt_get_transport(tgt, trid->trstring);
695 	if (!transport) {
696 		SPDK_ERRLOG("Unable to find %s transport. The transport must be created first also make sure it is properly registered.\n",
697 			    trid->trstring);
698 		return -EINVAL;
699 	}
700 
701 	rc = spdk_nvmf_transport_stop_listen(transport, trid);
702 	if (rc < 0) {
703 		SPDK_ERRLOG("Failed to stop listening on address '%s'\n", trid->traddr);
704 		return rc;
705 	}
706 	return 0;
707 }
708 
709 struct spdk_nvmf_tgt_add_transport_ctx {
710 	struct spdk_nvmf_tgt *tgt;
711 	struct spdk_nvmf_transport *transport;
712 	spdk_nvmf_tgt_add_transport_done_fn cb_fn;
713 	void *cb_arg;
714 };
715 
716 static void
717 _nvmf_tgt_add_transport_done(struct spdk_io_channel_iter *i, int status)
718 {
719 	struct spdk_nvmf_tgt_add_transport_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
720 
721 	if (status) {
722 		TAILQ_REMOVE(&ctx->tgt->transports, ctx->transport, link);
723 	}
724 	ctx->cb_fn(ctx->cb_arg, status);
725 
726 	free(ctx);
727 }
728 
729 static void
730 _nvmf_tgt_add_transport(struct spdk_io_channel_iter *i)
731 {
732 	struct spdk_nvmf_tgt_add_transport_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
733 	struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
734 	struct spdk_nvmf_poll_group *group = spdk_io_channel_get_ctx(ch);
735 	int rc;
736 
737 	rc = nvmf_poll_group_add_transport(group, ctx->transport);
738 	spdk_for_each_channel_continue(i, rc);
739 }
740 
741 void spdk_nvmf_tgt_add_transport(struct spdk_nvmf_tgt *tgt,
742 				 struct spdk_nvmf_transport *transport,
743 				 spdk_nvmf_tgt_add_transport_done_fn cb_fn,
744 				 void *cb_arg)
745 {
746 	struct spdk_nvmf_tgt_add_transport_ctx *ctx;
747 
748 	if (spdk_nvmf_tgt_get_transport(tgt, transport->ops->name)) {
749 		cb_fn(cb_arg, -EEXIST);
750 		return; /* transport already created */
751 	}
752 
753 	transport->tgt = tgt;
754 	TAILQ_INSERT_TAIL(&tgt->transports, transport, link);
755 
756 	ctx = calloc(1, sizeof(*ctx));
757 	if (!ctx) {
758 		cb_fn(cb_arg, -ENOMEM);
759 		return;
760 	}
761 
762 	ctx->tgt = tgt;
763 	ctx->transport = transport;
764 	ctx->cb_fn = cb_fn;
765 	ctx->cb_arg = cb_arg;
766 
767 	spdk_for_each_channel(tgt,
768 			      _nvmf_tgt_add_transport,
769 			      ctx,
770 			      _nvmf_tgt_add_transport_done);
771 }
772 
773 struct spdk_nvmf_subsystem *
774 spdk_nvmf_tgt_find_subsystem(struct spdk_nvmf_tgt *tgt, const char *subnqn)
775 {
776 	struct spdk_nvmf_subsystem	*subsystem;
777 	uint32_t sid;
778 
779 	if (!subnqn) {
780 		return NULL;
781 	}
782 
783 	/* Ensure that subnqn is null terminated */
784 	if (!memchr(subnqn, '\0', SPDK_NVMF_NQN_MAX_LEN + 1)) {
785 		SPDK_ERRLOG("Connect SUBNQN is not null terminated\n");
786 		return NULL;
787 	}
788 
789 	for (sid = 0; sid < tgt->max_subsystems; sid++) {
790 		subsystem = tgt->subsystems[sid];
791 		if (subsystem == NULL) {
792 			continue;
793 		}
794 
795 		if (strcmp(subnqn, subsystem->subnqn) == 0) {
796 			return subsystem;
797 		}
798 	}
799 
800 	return NULL;
801 }
802 
803 struct spdk_nvmf_transport *
804 spdk_nvmf_tgt_get_transport(struct spdk_nvmf_tgt *tgt, const char *transport_name)
805 {
806 	struct spdk_nvmf_transport *transport;
807 
808 	TAILQ_FOREACH(transport, &tgt->transports, link) {
809 		if (!strncasecmp(transport->ops->name, transport_name, SPDK_NVMF_TRSTRING_MAX_LEN)) {
810 			return transport;
811 		}
812 	}
813 	return NULL;
814 }
815 
816 struct nvmf_new_qpair_ctx {
817 	struct spdk_nvmf_qpair *qpair;
818 	struct spdk_nvmf_poll_group *group;
819 };
820 
821 static void
822 _nvmf_poll_group_add(void *_ctx)
823 {
824 	struct nvmf_new_qpair_ctx *ctx = _ctx;
825 	struct spdk_nvmf_qpair *qpair = ctx->qpair;
826 	struct spdk_nvmf_poll_group *group = ctx->group;
827 
828 	free(_ctx);
829 
830 	if (spdk_nvmf_poll_group_add(group, qpair) != 0) {
831 		SPDK_ERRLOG("Unable to add the qpair to a poll group.\n");
832 		spdk_nvmf_qpair_disconnect(qpair, NULL, NULL);
833 	}
834 }
835 
836 void
837 spdk_nvmf_tgt_new_qpair(struct spdk_nvmf_tgt *tgt, struct spdk_nvmf_qpair *qpair)
838 {
839 	struct spdk_nvmf_poll_group *group;
840 	struct nvmf_new_qpair_ctx *ctx;
841 
842 	group = spdk_nvmf_get_optimal_poll_group(qpair);
843 	if (group == NULL) {
844 		if (tgt->next_poll_group == NULL) {
845 			tgt->next_poll_group = TAILQ_FIRST(&tgt->poll_groups);
846 			if (tgt->next_poll_group == NULL) {
847 				SPDK_ERRLOG("No poll groups exist.\n");
848 				spdk_nvmf_qpair_disconnect(qpair, NULL, NULL);
849 				return;
850 			}
851 		}
852 		group = tgt->next_poll_group;
853 		tgt->next_poll_group = TAILQ_NEXT(group, link);
854 	}
855 
856 	ctx = calloc(1, sizeof(*ctx));
857 	if (!ctx) {
858 		SPDK_ERRLOG("Unable to send message to poll group.\n");
859 		spdk_nvmf_qpair_disconnect(qpair, NULL, NULL);
860 		return;
861 	}
862 
863 	ctx->qpair = qpair;
864 	ctx->group = group;
865 
866 	spdk_thread_send_msg(group->thread, _nvmf_poll_group_add, ctx);
867 }
868 
869 struct spdk_nvmf_poll_group *
870 spdk_nvmf_poll_group_create(struct spdk_nvmf_tgt *tgt)
871 {
872 	struct spdk_io_channel *ch;
873 
874 	ch = spdk_get_io_channel(tgt);
875 	if (!ch) {
876 		SPDK_ERRLOG("Unable to get I/O channel for target\n");
877 		return NULL;
878 	}
879 
880 	return spdk_io_channel_get_ctx(ch);
881 }
882 
883 void
884 spdk_nvmf_poll_group_destroy(struct spdk_nvmf_poll_group *group,
885 			     spdk_nvmf_poll_group_destroy_done_fn cb_fn,
886 			     void *cb_arg)
887 {
888 	assert(group->destroy_cb_fn == NULL);
889 	group->destroy_cb_fn = cb_fn;
890 	group->destroy_cb_arg = cb_arg;
891 
892 	/* This function will put the io_channel associated with this poll group */
893 	nvmf_tgt_destroy_poll_group_qpairs(group);
894 }
895 
896 int
897 spdk_nvmf_poll_group_add(struct spdk_nvmf_poll_group *group,
898 			 struct spdk_nvmf_qpair *qpair)
899 {
900 	int rc = -1;
901 	struct spdk_nvmf_transport_poll_group *tgroup;
902 
903 	TAILQ_INIT(&qpair->outstanding);
904 	qpair->group = group;
905 	qpair->ctrlr = NULL;
906 	qpair->disconnect_started = false;
907 
908 	TAILQ_FOREACH(tgroup, &group->tgroups, link) {
909 		if (tgroup->transport == qpair->transport) {
910 			rc = nvmf_transport_poll_group_add(tgroup, qpair);
911 			break;
912 		}
913 	}
914 
915 	/* We add the qpair to the group only it is succesfully added into the tgroup */
916 	if (rc == 0) {
917 		SPDK_DTRACE_PROBE2(nvmf_poll_group_add_qpair, qpair, spdk_thread_get_id(group->thread));
918 		TAILQ_INSERT_TAIL(&group->qpairs, qpair, link);
919 		nvmf_qpair_set_state(qpair, SPDK_NVMF_QPAIR_ACTIVE);
920 	}
921 
922 	return rc;
923 }
924 
925 static void
926 _nvmf_ctrlr_destruct(void *ctx)
927 {
928 	struct spdk_nvmf_ctrlr *ctrlr = ctx;
929 
930 	nvmf_ctrlr_destruct(ctrlr);
931 }
932 
933 static void
934 _nvmf_transport_qpair_fini_complete(void *cb_ctx)
935 {
936 	struct nvmf_qpair_disconnect_ctx *qpair_ctx = cb_ctx;
937 
938 	if (qpair_ctx->cb_fn) {
939 		spdk_thread_send_msg(qpair_ctx->thread, qpair_ctx->cb_fn, qpair_ctx->ctx);
940 	}
941 	free(qpair_ctx);
942 }
943 
944 static void
945 _nvmf_transport_qpair_fini(void *ctx)
946 {
947 	struct nvmf_qpair_disconnect_ctx *qpair_ctx = ctx;
948 
949 	spdk_nvmf_poll_group_remove(qpair_ctx->qpair);
950 	nvmf_transport_qpair_fini(qpair_ctx->qpair, _nvmf_transport_qpair_fini_complete, qpair_ctx);
951 }
952 
953 static void
954 _nvmf_ctrlr_free_from_qpair(void *ctx)
955 {
956 	struct nvmf_qpair_disconnect_ctx *qpair_ctx = ctx;
957 	struct spdk_nvmf_ctrlr *ctrlr = qpair_ctx->ctrlr;
958 	uint32_t count;
959 
960 	spdk_bit_array_clear(ctrlr->qpair_mask, qpair_ctx->qid);
961 	count = spdk_bit_array_count_set(ctrlr->qpair_mask);
962 	if (count == 0) {
963 		assert(!ctrlr->in_destruct);
964 		ctrlr->in_destruct = true;
965 		spdk_thread_send_msg(ctrlr->subsys->thread, _nvmf_ctrlr_destruct, ctrlr);
966 	}
967 
968 	spdk_thread_send_msg(qpair_ctx->thread, _nvmf_transport_qpair_fini, qpair_ctx);
969 }
970 
971 void
972 spdk_nvmf_poll_group_remove(struct spdk_nvmf_qpair *qpair)
973 {
974 	struct spdk_nvmf_transport_poll_group *tgroup;
975 	int rc;
976 
977 	SPDK_DTRACE_PROBE2(nvmf_poll_group_remove_qpair, qpair,
978 			   spdk_thread_get_id(qpair->group->thread));
979 	nvmf_qpair_set_state(qpair, SPDK_NVMF_QPAIR_ERROR);
980 
981 	/* Find the tgroup and remove the qpair from the tgroup */
982 	TAILQ_FOREACH(tgroup, &qpair->group->tgroups, link) {
983 		if (tgroup->transport == qpair->transport) {
984 			rc = nvmf_transport_poll_group_remove(tgroup, qpair);
985 			if (rc && (rc != ENOTSUP)) {
986 				SPDK_ERRLOG("Cannot remove qpair=%p from transport group=%p\n",
987 					    qpair, tgroup);
988 			}
989 			break;
990 		}
991 	}
992 
993 	TAILQ_REMOVE(&qpair->group->qpairs, qpair, link);
994 	qpair->group = NULL;
995 }
996 
997 static void
998 _nvmf_qpair_destroy(void *ctx, int status)
999 {
1000 	struct nvmf_qpair_disconnect_ctx *qpair_ctx = ctx;
1001 	struct spdk_nvmf_qpair *qpair = qpair_ctx->qpair;
1002 	struct spdk_nvmf_ctrlr *ctrlr = qpair->ctrlr;
1003 	struct spdk_nvmf_request *req, *tmp;
1004 	struct spdk_nvmf_subsystem_poll_group *sgroup;
1005 
1006 	assert(qpair->state == SPDK_NVMF_QPAIR_DEACTIVATING);
1007 	qpair_ctx->qid = qpair->qid;
1008 
1009 	if (ctrlr) {
1010 		if (0 == qpair->qid) {
1011 			assert(qpair->group->stat.current_admin_qpairs > 0);
1012 			qpair->group->stat.current_admin_qpairs--;
1013 		} else {
1014 			assert(qpair->group->stat.current_io_qpairs > 0);
1015 			qpair->group->stat.current_io_qpairs--;
1016 		}
1017 
1018 		sgroup = &qpair->group->sgroups[ctrlr->subsys->id];
1019 		TAILQ_FOREACH_SAFE(req, &sgroup->queued, link, tmp) {
1020 			if (req->qpair == qpair) {
1021 				TAILQ_REMOVE(&sgroup->queued, req, link);
1022 				if (nvmf_transport_req_free(req)) {
1023 					SPDK_ERRLOG("Transport request free error!/n");
1024 				}
1025 			}
1026 		}
1027 	}
1028 
1029 	if (!ctrlr || !ctrlr->thread) {
1030 		spdk_nvmf_poll_group_remove(qpair);
1031 		nvmf_transport_qpair_fini(qpair, _nvmf_transport_qpair_fini_complete, qpair_ctx);
1032 		return;
1033 	}
1034 
1035 	qpair_ctx->ctrlr = ctrlr;
1036 	spdk_thread_send_msg(ctrlr->thread, _nvmf_ctrlr_free_from_qpair, qpair_ctx);
1037 }
1038 
1039 static void
1040 _nvmf_qpair_disconnect_msg(void *ctx)
1041 {
1042 	struct nvmf_qpair_disconnect_ctx *qpair_ctx = ctx;
1043 
1044 	spdk_nvmf_qpair_disconnect(qpair_ctx->qpair, qpair_ctx->cb_fn, qpair_ctx->ctx);
1045 	free(ctx);
1046 }
1047 
1048 int
1049 spdk_nvmf_qpair_disconnect(struct spdk_nvmf_qpair *qpair, nvmf_qpair_disconnect_cb cb_fn, void *ctx)
1050 {
1051 	struct spdk_nvmf_poll_group *group = qpair->group;
1052 	struct nvmf_qpair_disconnect_ctx *qpair_ctx;
1053 
1054 	if (__atomic_test_and_set(&qpair->disconnect_started, __ATOMIC_RELAXED)) {
1055 		if (cb_fn) {
1056 			cb_fn(ctx);
1057 		}
1058 		return 0;
1059 	}
1060 
1061 	/* If we get a qpair in the uninitialized state, we can just destroy it immediately */
1062 	if (qpair->state == SPDK_NVMF_QPAIR_UNINITIALIZED) {
1063 		nvmf_transport_qpair_fini(qpair, NULL, NULL);
1064 		if (cb_fn) {
1065 			cb_fn(ctx);
1066 		}
1067 		return 0;
1068 	}
1069 
1070 	assert(group != NULL);
1071 	if (spdk_get_thread() != group->thread) {
1072 		/* clear the atomic so we can set it on the next call on the proper thread. */
1073 		__atomic_clear(&qpair->disconnect_started, __ATOMIC_RELAXED);
1074 		qpair_ctx = calloc(1, sizeof(struct nvmf_qpair_disconnect_ctx));
1075 		if (!qpair_ctx) {
1076 			SPDK_ERRLOG("Unable to allocate context for nvmf_qpair_disconnect\n");
1077 			return -ENOMEM;
1078 		}
1079 		qpair_ctx->qpair = qpair;
1080 		qpair_ctx->cb_fn = cb_fn;
1081 		qpair_ctx->thread = group->thread;
1082 		qpair_ctx->ctx = ctx;
1083 		spdk_thread_send_msg(group->thread, _nvmf_qpair_disconnect_msg, qpair_ctx);
1084 		return 0;
1085 	}
1086 
1087 	SPDK_DTRACE_PROBE2(nvmf_qpair_disconnect, qpair, spdk_thread_get_id(group->thread));
1088 	assert(qpair->state == SPDK_NVMF_QPAIR_ACTIVE);
1089 	nvmf_qpair_set_state(qpair, SPDK_NVMF_QPAIR_DEACTIVATING);
1090 
1091 	qpair_ctx = calloc(1, sizeof(struct nvmf_qpair_disconnect_ctx));
1092 	if (!qpair_ctx) {
1093 		SPDK_ERRLOG("Unable to allocate context for nvmf_qpair_disconnect\n");
1094 		return -ENOMEM;
1095 	}
1096 
1097 	qpair_ctx->qpair = qpair;
1098 	qpair_ctx->cb_fn = cb_fn;
1099 	qpair_ctx->thread = group->thread;
1100 	qpair_ctx->ctx = ctx;
1101 
1102 	/* Check for outstanding I/O */
1103 	if (!TAILQ_EMPTY(&qpair->outstanding)) {
1104 		SPDK_DTRACE_PROBE2(nvmf_poll_group_drain_qpair, qpair, spdk_thread_get_id(group->thread));
1105 		qpair->state_cb = _nvmf_qpair_destroy;
1106 		qpair->state_cb_arg = qpair_ctx;
1107 		nvmf_qpair_free_aer(qpair);
1108 		return 0;
1109 	}
1110 
1111 	_nvmf_qpair_destroy(qpair_ctx, 0);
1112 
1113 	return 0;
1114 }
1115 
1116 int
1117 spdk_nvmf_qpair_get_peer_trid(struct spdk_nvmf_qpair *qpair,
1118 			      struct spdk_nvme_transport_id *trid)
1119 {
1120 	return nvmf_transport_qpair_get_peer_trid(qpair, trid);
1121 }
1122 
1123 int
1124 spdk_nvmf_qpair_get_local_trid(struct spdk_nvmf_qpair *qpair,
1125 			       struct spdk_nvme_transport_id *trid)
1126 {
1127 	return nvmf_transport_qpair_get_local_trid(qpair, trid);
1128 }
1129 
1130 int
1131 spdk_nvmf_qpair_get_listen_trid(struct spdk_nvmf_qpair *qpair,
1132 				struct spdk_nvme_transport_id *trid)
1133 {
1134 	return nvmf_transport_qpair_get_listen_trid(qpair, trid);
1135 }
1136 
1137 int
1138 nvmf_poll_group_add_transport(struct spdk_nvmf_poll_group *group,
1139 			      struct spdk_nvmf_transport *transport)
1140 {
1141 	struct spdk_nvmf_transport_poll_group *tgroup;
1142 
1143 	TAILQ_FOREACH(tgroup, &group->tgroups, link) {
1144 		if (tgroup->transport == transport) {
1145 			/* Transport already in the poll group */
1146 			return 0;
1147 		}
1148 	}
1149 
1150 	tgroup = nvmf_transport_poll_group_create(transport);
1151 	if (!tgroup) {
1152 		SPDK_ERRLOG("Unable to create poll group for transport\n");
1153 		return -1;
1154 	}
1155 
1156 	tgroup->group = group;
1157 	TAILQ_INSERT_TAIL(&group->tgroups, tgroup, link);
1158 
1159 	return 0;
1160 }
1161 
1162 static int
1163 poll_group_update_subsystem(struct spdk_nvmf_poll_group *group,
1164 			    struct spdk_nvmf_subsystem *subsystem)
1165 {
1166 	struct spdk_nvmf_subsystem_poll_group *sgroup;
1167 	uint32_t new_num_ns, old_num_ns;
1168 	uint32_t i, j;
1169 	struct spdk_nvmf_ns *ns;
1170 	struct spdk_nvmf_registrant *reg, *tmp;
1171 	struct spdk_io_channel *ch;
1172 	struct spdk_nvmf_subsystem_pg_ns_info *ns_info;
1173 	struct spdk_nvmf_ctrlr *ctrlr;
1174 	bool ns_changed;
1175 
1176 	/* Make sure our poll group has memory for this subsystem allocated */
1177 	if (subsystem->id >= group->num_sgroups) {
1178 		return -ENOMEM;
1179 	}
1180 
1181 	sgroup = &group->sgroups[subsystem->id];
1182 
1183 	/* Make sure the array of namespace information is the correct size */
1184 	new_num_ns = subsystem->max_nsid;
1185 	old_num_ns = sgroup->num_ns;
1186 
1187 	ns_changed = false;
1188 
1189 	if (old_num_ns == 0) {
1190 		if (new_num_ns > 0) {
1191 			/* First allocation */
1192 			sgroup->ns_info = calloc(new_num_ns, sizeof(struct spdk_nvmf_subsystem_pg_ns_info));
1193 			if (!sgroup->ns_info) {
1194 				return -ENOMEM;
1195 			}
1196 		}
1197 	} else if (new_num_ns > old_num_ns) {
1198 		void *buf;
1199 
1200 		/* Make the array larger */
1201 		buf = realloc(sgroup->ns_info, new_num_ns * sizeof(struct spdk_nvmf_subsystem_pg_ns_info));
1202 		if (!buf) {
1203 			return -ENOMEM;
1204 		}
1205 
1206 		sgroup->ns_info = buf;
1207 
1208 		/* Null out the new namespace information slots */
1209 		for (i = old_num_ns; i < new_num_ns; i++) {
1210 			memset(&sgroup->ns_info[i], 0, sizeof(struct spdk_nvmf_subsystem_pg_ns_info));
1211 		}
1212 	} else if (new_num_ns < old_num_ns) {
1213 		void *buf;
1214 
1215 		/* Free the extra I/O channels */
1216 		for (i = new_num_ns; i < old_num_ns; i++) {
1217 			ns_info = &sgroup->ns_info[i];
1218 
1219 			if (ns_info->channel) {
1220 				spdk_put_io_channel(ns_info->channel);
1221 				ns_info->channel = NULL;
1222 			}
1223 		}
1224 
1225 		/* Make the array smaller */
1226 		if (new_num_ns > 0) {
1227 			buf = realloc(sgroup->ns_info, new_num_ns * sizeof(struct spdk_nvmf_subsystem_pg_ns_info));
1228 			if (!buf) {
1229 				return -ENOMEM;
1230 			}
1231 			sgroup->ns_info = buf;
1232 		} else {
1233 			free(sgroup->ns_info);
1234 			sgroup->ns_info = NULL;
1235 		}
1236 	}
1237 
1238 	sgroup->num_ns = new_num_ns;
1239 
1240 	/* Detect bdevs that were added or removed */
1241 	for (i = 0; i < sgroup->num_ns; i++) {
1242 		ns = subsystem->ns[i];
1243 		ns_info = &sgroup->ns_info[i];
1244 		ch = ns_info->channel;
1245 
1246 		if (ns == NULL && ch == NULL) {
1247 			/* Both NULL. Leave empty */
1248 		} else if (ns == NULL && ch != NULL) {
1249 			/* There was a channel here, but the namespace is gone. */
1250 			ns_changed = true;
1251 			spdk_put_io_channel(ch);
1252 			ns_info->channel = NULL;
1253 		} else if (ns != NULL && ch == NULL) {
1254 			/* A namespace appeared but there is no channel yet */
1255 			ns_changed = true;
1256 			ch = spdk_bdev_get_io_channel(ns->desc);
1257 			if (ch == NULL) {
1258 				SPDK_ERRLOG("Could not allocate I/O channel.\n");
1259 				return -ENOMEM;
1260 			}
1261 			ns_info->channel = ch;
1262 		} else if (spdk_uuid_compare(&ns_info->uuid, spdk_bdev_get_uuid(ns->bdev)) != 0) {
1263 			/* A namespace was here before, but was replaced by a new one. */
1264 			ns_changed = true;
1265 			spdk_put_io_channel(ns_info->channel);
1266 			memset(ns_info, 0, sizeof(*ns_info));
1267 
1268 			ch = spdk_bdev_get_io_channel(ns->desc);
1269 			if (ch == NULL) {
1270 				SPDK_ERRLOG("Could not allocate I/O channel.\n");
1271 				return -ENOMEM;
1272 			}
1273 			ns_info->channel = ch;
1274 		} else if (ns_info->num_blocks != spdk_bdev_get_num_blocks(ns->bdev)) {
1275 			/* Namespace is still there but size has changed */
1276 			SPDK_DEBUGLOG(nvmf, "Namespace resized: subsystem_id %u,"
1277 				      " nsid %u, pg %p, old %" PRIu64 ", new %" PRIu64 "\n",
1278 				      subsystem->id,
1279 				      ns->nsid,
1280 				      group,
1281 				      ns_info->num_blocks,
1282 				      spdk_bdev_get_num_blocks(ns->bdev));
1283 			ns_changed = true;
1284 		}
1285 
1286 		if (ns == NULL) {
1287 			memset(ns_info, 0, sizeof(*ns_info));
1288 		} else {
1289 			ns_info->uuid = *spdk_bdev_get_uuid(ns->bdev);
1290 			ns_info->num_blocks = spdk_bdev_get_num_blocks(ns->bdev);
1291 			ns_info->crkey = ns->crkey;
1292 			ns_info->rtype = ns->rtype;
1293 			if (ns->holder) {
1294 				ns_info->holder_id = ns->holder->hostid;
1295 			}
1296 
1297 			memset(&ns_info->reg_hostid, 0, SPDK_NVMF_MAX_NUM_REGISTRANTS * sizeof(struct spdk_uuid));
1298 			j = 0;
1299 			TAILQ_FOREACH_SAFE(reg, &ns->registrants, link, tmp) {
1300 				if (j >= SPDK_NVMF_MAX_NUM_REGISTRANTS) {
1301 					SPDK_ERRLOG("Maximum %u registrants can support.\n", SPDK_NVMF_MAX_NUM_REGISTRANTS);
1302 					return -EINVAL;
1303 				}
1304 				ns_info->reg_hostid[j++] = reg->hostid;
1305 			}
1306 		}
1307 	}
1308 
1309 	if (ns_changed) {
1310 		TAILQ_FOREACH(ctrlr, &subsystem->ctrlrs, link) {
1311 			/* It is possible that a ctrlr was added but the admin_qpair hasn't been
1312 			 * assigned yet.
1313 			 */
1314 			if (!ctrlr->admin_qpair) {
1315 				continue;
1316 			}
1317 			if (ctrlr->admin_qpair->group == group) {
1318 				nvmf_ctrlr_async_event_ns_notice(ctrlr);
1319 				nvmf_ctrlr_async_event_ana_change_notice(ctrlr);
1320 			}
1321 		}
1322 	}
1323 
1324 	return 0;
1325 }
1326 
1327 int
1328 nvmf_poll_group_update_subsystem(struct spdk_nvmf_poll_group *group,
1329 				 struct spdk_nvmf_subsystem *subsystem)
1330 {
1331 	return poll_group_update_subsystem(group, subsystem);
1332 }
1333 
1334 int
1335 nvmf_poll_group_add_subsystem(struct spdk_nvmf_poll_group *group,
1336 			      struct spdk_nvmf_subsystem *subsystem,
1337 			      spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg)
1338 {
1339 	int rc = 0;
1340 	struct spdk_nvmf_subsystem_poll_group *sgroup = &group->sgroups[subsystem->id];
1341 	uint32_t i;
1342 
1343 	TAILQ_INIT(&sgroup->queued);
1344 
1345 	rc = poll_group_update_subsystem(group, subsystem);
1346 	if (rc) {
1347 		nvmf_poll_group_remove_subsystem(group, subsystem, NULL, NULL);
1348 		goto fini;
1349 	}
1350 
1351 	sgroup->state = SPDK_NVMF_SUBSYSTEM_ACTIVE;
1352 
1353 	for (i = 0; i < sgroup->num_ns; i++) {
1354 		sgroup->ns_info[i].state = SPDK_NVMF_SUBSYSTEM_ACTIVE;
1355 	}
1356 
1357 fini:
1358 	if (cb_fn) {
1359 		cb_fn(cb_arg, rc);
1360 	}
1361 
1362 	return rc;
1363 }
1364 
1365 static void
1366 _nvmf_poll_group_remove_subsystem_cb(void *ctx, int status)
1367 {
1368 	struct nvmf_qpair_disconnect_many_ctx *qpair_ctx = ctx;
1369 	struct spdk_nvmf_subsystem *subsystem;
1370 	struct spdk_nvmf_poll_group *group;
1371 	struct spdk_nvmf_subsystem_poll_group *sgroup;
1372 	spdk_nvmf_poll_group_mod_done cpl_fn = NULL;
1373 	void *cpl_ctx = NULL;
1374 	uint32_t nsid;
1375 
1376 	group = qpair_ctx->group;
1377 	subsystem = qpair_ctx->subsystem;
1378 	cpl_fn = qpair_ctx->cpl_fn;
1379 	cpl_ctx = qpair_ctx->cpl_ctx;
1380 	sgroup = &group->sgroups[subsystem->id];
1381 
1382 	if (status) {
1383 		goto fini;
1384 	}
1385 
1386 	for (nsid = 0; nsid < sgroup->num_ns; nsid++) {
1387 		if (sgroup->ns_info[nsid].channel) {
1388 			spdk_put_io_channel(sgroup->ns_info[nsid].channel);
1389 			sgroup->ns_info[nsid].channel = NULL;
1390 		}
1391 	}
1392 
1393 	sgroup->num_ns = 0;
1394 	free(sgroup->ns_info);
1395 	sgroup->ns_info = NULL;
1396 fini:
1397 	free(qpair_ctx);
1398 	if (cpl_fn) {
1399 		cpl_fn(cpl_ctx, status);
1400 	}
1401 }
1402 
1403 static void nvmf_poll_group_remove_subsystem_msg(void *ctx);
1404 
1405 static void
1406 remove_subsystem_qpair_cb(void *ctx)
1407 {
1408 	struct nvmf_qpair_disconnect_many_ctx *qpair_ctx = ctx;
1409 
1410 	assert(qpair_ctx->count > 0);
1411 	qpair_ctx->count--;
1412 	if (qpair_ctx->count == 0) {
1413 		/* All of the asynchronous callbacks for this context have been
1414 		 * completed.  Call nvmf_poll_group_remove_subsystem_msg() again
1415 		 * to check if all associated qpairs for this subsystem have
1416 		 * been removed from the poll group.
1417 		 */
1418 		nvmf_poll_group_remove_subsystem_msg(ctx);
1419 	}
1420 }
1421 
1422 static void
1423 nvmf_poll_group_remove_subsystem_msg(void *ctx)
1424 {
1425 	struct spdk_nvmf_qpair *qpair, *qpair_tmp;
1426 	struct spdk_nvmf_subsystem *subsystem;
1427 	struct spdk_nvmf_poll_group *group;
1428 	struct nvmf_qpair_disconnect_many_ctx *qpair_ctx = ctx;
1429 	bool qpairs_found = false;
1430 	int rc = 0;
1431 
1432 	group = qpair_ctx->group;
1433 	subsystem = qpair_ctx->subsystem;
1434 
1435 	/* Initialize count to 1.  This acts like a ref count, to ensure that if spdk_nvmf_qpair_disconnect
1436 	 * immediately invokes the callback (i.e. the qpairs is already in process of being disconnected)
1437 	 * that we don't recursively call nvmf_poll_group_remove_subsystem_msg before we've iterated the
1438 	 * full list of qpairs.
1439 	 */
1440 	qpair_ctx->count = 1;
1441 	TAILQ_FOREACH_SAFE(qpair, &group->qpairs, link, qpair_tmp) {
1442 		if ((qpair->ctrlr != NULL) && (qpair->ctrlr->subsys == subsystem)) {
1443 			qpairs_found = true;
1444 			qpair_ctx->count++;
1445 			rc = spdk_nvmf_qpair_disconnect(qpair, remove_subsystem_qpair_cb, ctx);
1446 			if (rc) {
1447 				break;
1448 			}
1449 		}
1450 	}
1451 	qpair_ctx->count--;
1452 
1453 	if (!qpairs_found) {
1454 		_nvmf_poll_group_remove_subsystem_cb(ctx, 0);
1455 		return;
1456 	}
1457 
1458 	if (qpair_ctx->count == 0 || rc) {
1459 		/* If count == 0, it means there were some qpairs in the poll group but they
1460 		 * were already in process of being disconnected.  So we send a message to this
1461 		 * same thread so that this function executes again later.  We won't actually
1462 		 * invoke the remove_subsystem_cb until all of the qpairs are actually removed
1463 		 * from the poll group.
1464 		 */
1465 		spdk_thread_send_msg(spdk_get_thread(), nvmf_poll_group_remove_subsystem_msg, ctx);
1466 	}
1467 }
1468 
1469 void
1470 nvmf_poll_group_remove_subsystem(struct spdk_nvmf_poll_group *group,
1471 				 struct spdk_nvmf_subsystem *subsystem,
1472 				 spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg)
1473 {
1474 	struct spdk_nvmf_subsystem_poll_group *sgroup;
1475 	struct nvmf_qpair_disconnect_many_ctx *ctx;
1476 	uint32_t i;
1477 
1478 	ctx = calloc(1, sizeof(struct nvmf_qpair_disconnect_many_ctx));
1479 	if (!ctx) {
1480 		SPDK_ERRLOG("Unable to allocate memory for context to remove poll subsystem\n");
1481 		if (cb_fn) {
1482 			cb_fn(cb_arg, -1);
1483 		}
1484 		return;
1485 	}
1486 
1487 	ctx->group = group;
1488 	ctx->subsystem = subsystem;
1489 	ctx->cpl_fn = cb_fn;
1490 	ctx->cpl_ctx = cb_arg;
1491 
1492 	sgroup = &group->sgroups[subsystem->id];
1493 	sgroup->state = SPDK_NVMF_SUBSYSTEM_INACTIVE;
1494 
1495 	for (i = 0; i < sgroup->num_ns; i++) {
1496 		sgroup->ns_info[i].state = SPDK_NVMF_SUBSYSTEM_INACTIVE;
1497 	}
1498 
1499 	nvmf_poll_group_remove_subsystem_msg(ctx);
1500 }
1501 
1502 void
1503 nvmf_poll_group_pause_subsystem(struct spdk_nvmf_poll_group *group,
1504 				struct spdk_nvmf_subsystem *subsystem,
1505 				uint32_t nsid,
1506 				spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg)
1507 {
1508 	struct spdk_nvmf_subsystem_poll_group *sgroup;
1509 	struct spdk_nvmf_subsystem_pg_ns_info *ns_info = NULL;
1510 	int rc = 0;
1511 
1512 	if (subsystem->id >= group->num_sgroups) {
1513 		rc = -1;
1514 		goto fini;
1515 	}
1516 
1517 	sgroup = &group->sgroups[subsystem->id];
1518 	if (sgroup == NULL) {
1519 		rc = -1;
1520 		goto fini;
1521 	}
1522 
1523 	if (sgroup->state == SPDK_NVMF_SUBSYSTEM_PAUSED) {
1524 		goto fini;
1525 	}
1526 	sgroup->state = SPDK_NVMF_SUBSYSTEM_PAUSING;
1527 
1528 	/* NOTE: This implicitly also checks for 0, since 0 - 1 wraps around to UINT32_MAX. */
1529 	if (nsid - 1 < sgroup->num_ns) {
1530 		ns_info  = &sgroup->ns_info[nsid - 1];
1531 		ns_info->state = SPDK_NVMF_SUBSYSTEM_PAUSING;
1532 	}
1533 
1534 	if (sgroup->mgmt_io_outstanding > 0) {
1535 		assert(sgroup->cb_fn == NULL);
1536 		sgroup->cb_fn = cb_fn;
1537 		assert(sgroup->cb_arg == NULL);
1538 		sgroup->cb_arg = cb_arg;
1539 		return;
1540 	}
1541 
1542 	if (ns_info != NULL && ns_info->io_outstanding > 0) {
1543 		assert(sgroup->cb_fn == NULL);
1544 		sgroup->cb_fn = cb_fn;
1545 		assert(sgroup->cb_arg == NULL);
1546 		sgroup->cb_arg = cb_arg;
1547 		return;
1548 	}
1549 
1550 	assert(sgroup->mgmt_io_outstanding == 0);
1551 	sgroup->state = SPDK_NVMF_SUBSYSTEM_PAUSED;
1552 fini:
1553 	if (cb_fn) {
1554 		cb_fn(cb_arg, rc);
1555 	}
1556 }
1557 
1558 void
1559 nvmf_poll_group_resume_subsystem(struct spdk_nvmf_poll_group *group,
1560 				 struct spdk_nvmf_subsystem *subsystem,
1561 				 spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg)
1562 {
1563 	struct spdk_nvmf_request *req, *tmp;
1564 	struct spdk_nvmf_subsystem_poll_group *sgroup;
1565 	int rc = 0;
1566 	uint32_t i;
1567 
1568 	if (subsystem->id >= group->num_sgroups) {
1569 		rc = -1;
1570 		goto fini;
1571 	}
1572 
1573 	sgroup = &group->sgroups[subsystem->id];
1574 
1575 	if (sgroup->state == SPDK_NVMF_SUBSYSTEM_ACTIVE) {
1576 		goto fini;
1577 	}
1578 
1579 	rc = poll_group_update_subsystem(group, subsystem);
1580 	if (rc) {
1581 		goto fini;
1582 	}
1583 
1584 	for (i = 0; i < sgroup->num_ns; i++) {
1585 		sgroup->ns_info[i].state = SPDK_NVMF_SUBSYSTEM_ACTIVE;
1586 	}
1587 
1588 	sgroup->state = SPDK_NVMF_SUBSYSTEM_ACTIVE;
1589 
1590 	/* Release all queued requests */
1591 	TAILQ_FOREACH_SAFE(req, &sgroup->queued, link, tmp) {
1592 		TAILQ_REMOVE(&sgroup->queued, req, link);
1593 		assert(req->zcopy_phase == NVMF_ZCOPY_PHASE_NONE);
1594 		spdk_nvmf_request_exec(req);
1595 	}
1596 fini:
1597 	if (cb_fn) {
1598 		cb_fn(cb_arg, rc);
1599 	}
1600 }
1601 
1602 
1603 struct spdk_nvmf_poll_group *
1604 spdk_nvmf_get_optimal_poll_group(struct spdk_nvmf_qpair *qpair)
1605 {
1606 	struct spdk_nvmf_transport_poll_group *tgroup;
1607 
1608 	tgroup = nvmf_transport_get_optimal_poll_group(qpair->transport, qpair);
1609 
1610 	if (tgroup == NULL) {
1611 		return NULL;
1612 	}
1613 
1614 	return tgroup->group;
1615 }
1616 
1617 void
1618 spdk_nvmf_poll_group_dump_stat(struct spdk_nvmf_poll_group *group, struct spdk_json_write_ctx *w)
1619 {
1620 	struct spdk_nvmf_transport_poll_group *tgroup;
1621 
1622 	spdk_json_write_object_begin(w);
1623 
1624 	spdk_json_write_named_string(w, "name", spdk_thread_get_name(spdk_get_thread()));
1625 	spdk_json_write_named_uint32(w, "admin_qpairs", group->stat.admin_qpairs);
1626 	spdk_json_write_named_uint32(w, "io_qpairs", group->stat.io_qpairs);
1627 	spdk_json_write_named_uint32(w, "current_admin_qpairs", group->stat.current_admin_qpairs);
1628 	spdk_json_write_named_uint32(w, "current_io_qpairs", group->stat.current_io_qpairs);
1629 	spdk_json_write_named_uint64(w, "pending_bdev_io", group->stat.pending_bdev_io);
1630 
1631 	spdk_json_write_named_array_begin(w, "transports");
1632 
1633 	TAILQ_FOREACH(tgroup, &group->tgroups, link) {
1634 		spdk_json_write_object_begin(w);
1635 		/*
1636 		 * The trtype field intentionally contains a transport name as this is more informative.
1637 		 * The field has not been renamed for backward compatibility.
1638 		 */
1639 		spdk_json_write_named_string(w, "trtype", spdk_nvmf_get_transport_name(tgroup->transport));
1640 
1641 		if (tgroup->transport->ops->poll_group_dump_stat) {
1642 			tgroup->transport->ops->poll_group_dump_stat(tgroup, w);
1643 		}
1644 
1645 		spdk_json_write_object_end(w);
1646 	}
1647 
1648 	spdk_json_write_array_end(w);
1649 	spdk_json_write_object_end(w);
1650 }
1651