xref: /spdk/lib/nvmf/nvmf.c (revision 6f338d4bf3a8a91b7abe377a605a321ea2b05bf7)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (c) Intel Corporation. All rights reserved.
3  *   Copyright (c) 2018-2019, 2021 Mellanox Technologies LTD. All rights reserved.
4  *   Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
5  */
6 
7 #include "spdk/stdinc.h"
8 
9 #include "spdk/bdev.h"
10 #include "spdk/bit_array.h"
11 #include "spdk/thread.h"
12 #include "spdk/nvmf.h"
13 #include "spdk/endian.h"
14 #include "spdk/string.h"
15 #include "spdk/log.h"
16 #include "spdk_internal/usdt.h"
17 
18 #include "nvmf_internal.h"
19 #include "transport.h"
20 
21 SPDK_LOG_REGISTER_COMPONENT(nvmf)
22 
23 #define SPDK_NVMF_DEFAULT_MAX_SUBSYSTEMS 1024
24 
25 static TAILQ_HEAD(, spdk_nvmf_tgt) g_nvmf_tgts = TAILQ_HEAD_INITIALIZER(g_nvmf_tgts);
26 
27 typedef void (*nvmf_qpair_disconnect_cpl)(void *ctx, int status);
28 
29 /* supplied to a single call to nvmf_qpair_disconnect */
30 struct nvmf_qpair_disconnect_ctx {
31 	struct spdk_nvmf_qpair *qpair;
32 	struct spdk_nvmf_ctrlr *ctrlr;
33 	nvmf_qpair_disconnect_cb cb_fn;
34 	struct spdk_thread *thread;
35 	void *ctx;
36 	uint16_t qid;
37 };
38 
39 /*
40  * There are several times when we need to iterate through the list of all qpairs and selectively delete them.
41  * In order to do this sequentially without overlap, we must provide a context to recover the next qpair from
42  * to enable calling nvmf_qpair_disconnect on the next desired qpair.
43  */
44 struct nvmf_qpair_disconnect_many_ctx {
45 	struct spdk_nvmf_subsystem *subsystem;
46 	struct spdk_nvmf_poll_group *group;
47 	spdk_nvmf_poll_group_mod_done cpl_fn;
48 	void *cpl_ctx;
49 	uint32_t count;
50 };
51 
52 static void
53 nvmf_qpair_set_state(struct spdk_nvmf_qpair *qpair,
54 		     enum spdk_nvmf_qpair_state state)
55 {
56 	assert(qpair != NULL);
57 	assert(qpair->group->thread == spdk_get_thread());
58 
59 	qpair->state = state;
60 }
61 
62 static int
63 nvmf_poll_group_poll(void *ctx)
64 {
65 	struct spdk_nvmf_poll_group *group = ctx;
66 	int rc;
67 	int count = 0;
68 	struct spdk_nvmf_transport_poll_group *tgroup;
69 
70 	TAILQ_FOREACH(tgroup, &group->tgroups, link) {
71 		rc = nvmf_transport_poll_group_poll(tgroup);
72 		if (rc < 0) {
73 			return SPDK_POLLER_BUSY;
74 		}
75 		count += rc;
76 	}
77 
78 	return count > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE;
79 }
80 
81 /*
82  * Reset and clean up the poll group (I/O channel code will actually free the
83  * group).
84  */
85 static void
86 nvmf_tgt_cleanup_poll_group(struct spdk_nvmf_poll_group *group)
87 {
88 	struct spdk_nvmf_transport_poll_group *tgroup, *tmp;
89 	struct spdk_nvmf_subsystem_poll_group *sgroup;
90 	uint32_t sid, nsid;
91 
92 	TAILQ_FOREACH_SAFE(tgroup, &group->tgroups, link, tmp) {
93 		TAILQ_REMOVE(&group->tgroups, tgroup, link);
94 		nvmf_transport_poll_group_destroy(tgroup);
95 	}
96 
97 	for (sid = 0; sid < group->num_sgroups; sid++) {
98 		sgroup = &group->sgroups[sid];
99 
100 		assert(sgroup != NULL);
101 
102 		for (nsid = 0; nsid < sgroup->num_ns; nsid++) {
103 			if (sgroup->ns_info[nsid].channel) {
104 				spdk_put_io_channel(sgroup->ns_info[nsid].channel);
105 				sgroup->ns_info[nsid].channel = NULL;
106 			}
107 		}
108 
109 		free(sgroup->ns_info);
110 	}
111 
112 	free(group->sgroups);
113 
114 	spdk_poller_unregister(&group->poller);
115 
116 	if (group->destroy_cb_fn) {
117 		group->destroy_cb_fn(group->destroy_cb_arg, 0);
118 	}
119 }
120 
121 /*
122  * Callback to unregister a poll group from the target, and clean up its state.
123  */
124 static void
125 nvmf_tgt_destroy_poll_group(void *io_device, void *ctx_buf)
126 {
127 	struct spdk_nvmf_tgt *tgt = io_device;
128 	struct spdk_nvmf_poll_group *group = ctx_buf;
129 
130 	SPDK_DTRACE_PROBE1(nvmf_destroy_poll_group, spdk_thread_get_id(group->thread));
131 
132 	pthread_mutex_lock(&tgt->mutex);
133 	TAILQ_REMOVE(&tgt->poll_groups, group, link);
134 	pthread_mutex_unlock(&tgt->mutex);
135 
136 	nvmf_tgt_cleanup_poll_group(group);
137 }
138 
139 static int
140 nvmf_tgt_create_poll_group(void *io_device, void *ctx_buf)
141 {
142 	struct spdk_nvmf_tgt *tgt = io_device;
143 	struct spdk_nvmf_poll_group *group = ctx_buf;
144 	struct spdk_nvmf_transport *transport;
145 	struct spdk_thread *thread = spdk_get_thread();
146 	uint32_t sid;
147 	int rc;
148 
149 	TAILQ_INIT(&group->tgroups);
150 	TAILQ_INIT(&group->qpairs);
151 	group->thread = thread;
152 
153 	group->poller = SPDK_POLLER_REGISTER(nvmf_poll_group_poll, group, 0);
154 
155 	SPDK_DTRACE_PROBE1(nvmf_create_poll_group, spdk_thread_get_id(thread));
156 
157 	TAILQ_FOREACH(transport, &tgt->transports, link) {
158 		rc = nvmf_poll_group_add_transport(group, transport);
159 		if (rc != 0) {
160 			nvmf_tgt_cleanup_poll_group(group);
161 			return rc;
162 		}
163 	}
164 
165 	group->num_sgroups = tgt->max_subsystems;
166 	group->sgroups = calloc(tgt->max_subsystems, sizeof(struct spdk_nvmf_subsystem_poll_group));
167 	if (!group->sgroups) {
168 		nvmf_tgt_cleanup_poll_group(group);
169 		return -ENOMEM;
170 	}
171 
172 	for (sid = 0; sid < tgt->max_subsystems; sid++) {
173 		struct spdk_nvmf_subsystem *subsystem;
174 
175 		subsystem = tgt->subsystems[sid];
176 		if (!subsystem) {
177 			continue;
178 		}
179 
180 		if (nvmf_poll_group_add_subsystem(group, subsystem, NULL, NULL) != 0) {
181 			nvmf_tgt_cleanup_poll_group(group);
182 			return -1;
183 		}
184 	}
185 
186 	pthread_mutex_lock(&tgt->mutex);
187 	TAILQ_INSERT_TAIL(&tgt->poll_groups, group, link);
188 	pthread_mutex_unlock(&tgt->mutex);
189 
190 	return 0;
191 }
192 
193 static void
194 _nvmf_tgt_disconnect_next_qpair(void *ctx)
195 {
196 	struct spdk_nvmf_qpair *qpair;
197 	struct nvmf_qpair_disconnect_many_ctx *qpair_ctx = ctx;
198 	struct spdk_nvmf_poll_group *group = qpair_ctx->group;
199 	struct spdk_io_channel *ch;
200 	int rc = 0;
201 
202 	qpair = TAILQ_FIRST(&group->qpairs);
203 
204 	if (qpair) {
205 		rc = spdk_nvmf_qpair_disconnect(qpair, _nvmf_tgt_disconnect_next_qpair, ctx);
206 	}
207 
208 	if (!qpair || rc != 0) {
209 		/* When the refcount from the channels reaches 0, nvmf_tgt_destroy_poll_group will be called. */
210 		ch = spdk_io_channel_from_ctx(group);
211 		spdk_put_io_channel(ch);
212 		free(qpair_ctx);
213 	}
214 }
215 
216 static void
217 nvmf_tgt_destroy_poll_group_qpairs(struct spdk_nvmf_poll_group *group)
218 {
219 	struct nvmf_qpair_disconnect_many_ctx *ctx;
220 
221 	SPDK_DTRACE_PROBE1(nvmf_destroy_poll_group_qpairs, spdk_thread_get_id(group->thread));
222 
223 	ctx = calloc(1, sizeof(struct nvmf_qpair_disconnect_many_ctx));
224 	if (!ctx) {
225 		SPDK_ERRLOG("Failed to allocate memory for destroy poll group ctx\n");
226 		return;
227 	}
228 
229 	ctx->group = group;
230 	_nvmf_tgt_disconnect_next_qpair(ctx);
231 }
232 
233 struct spdk_nvmf_tgt *
234 spdk_nvmf_tgt_create(struct spdk_nvmf_target_opts *opts)
235 {
236 	struct spdk_nvmf_tgt *tgt, *tmp_tgt;
237 
238 	if (strnlen(opts->name, NVMF_TGT_NAME_MAX_LENGTH) == NVMF_TGT_NAME_MAX_LENGTH) {
239 		SPDK_ERRLOG("Provided target name exceeds the max length of %u.\n", NVMF_TGT_NAME_MAX_LENGTH);
240 		return NULL;
241 	}
242 
243 	TAILQ_FOREACH(tmp_tgt, &g_nvmf_tgts, link) {
244 		if (!strncmp(opts->name, tmp_tgt->name, NVMF_TGT_NAME_MAX_LENGTH)) {
245 			SPDK_ERRLOG("Provided target name must be unique.\n");
246 			return NULL;
247 		}
248 	}
249 
250 	tgt = calloc(1, sizeof(*tgt));
251 	if (!tgt) {
252 		return NULL;
253 	}
254 
255 	snprintf(tgt->name, NVMF_TGT_NAME_MAX_LENGTH, "%s", opts->name);
256 
257 	if (!opts || !opts->max_subsystems) {
258 		tgt->max_subsystems = SPDK_NVMF_DEFAULT_MAX_SUBSYSTEMS;
259 	} else {
260 		tgt->max_subsystems = opts->max_subsystems;
261 	}
262 
263 	if (!opts) {
264 		tgt->crdt[0] = 0;
265 		tgt->crdt[1] = 0;
266 		tgt->crdt[2] = 0;
267 	} else {
268 		tgt->crdt[0] = opts->crdt[0];
269 		tgt->crdt[1] = opts->crdt[1];
270 		tgt->crdt[2] = opts->crdt[2];
271 	}
272 
273 	if (!opts) {
274 		tgt->discovery_filter = SPDK_NVMF_TGT_DISCOVERY_MATCH_ANY;
275 	} else {
276 		tgt->discovery_filter = opts->discovery_filter;
277 	}
278 
279 	tgt->discovery_genctr = 0;
280 	TAILQ_INIT(&tgt->transports);
281 	TAILQ_INIT(&tgt->poll_groups);
282 
283 	tgt->subsystems = calloc(tgt->max_subsystems, sizeof(struct spdk_nvmf_subsystem *));
284 	if (!tgt->subsystems) {
285 		free(tgt);
286 		return NULL;
287 	}
288 
289 	pthread_mutex_init(&tgt->mutex, NULL);
290 
291 	spdk_io_device_register(tgt,
292 				nvmf_tgt_create_poll_group,
293 				nvmf_tgt_destroy_poll_group,
294 				sizeof(struct spdk_nvmf_poll_group),
295 				tgt->name);
296 
297 	TAILQ_INSERT_HEAD(&g_nvmf_tgts, tgt, link);
298 
299 	return tgt;
300 }
301 
302 static void
303 _nvmf_tgt_destroy_next_transport(void *ctx)
304 {
305 	struct spdk_nvmf_tgt *tgt = ctx;
306 	struct spdk_nvmf_transport *transport;
307 
308 	if (!TAILQ_EMPTY(&tgt->transports)) {
309 		transport = TAILQ_FIRST(&tgt->transports);
310 		TAILQ_REMOVE(&tgt->transports, transport, link);
311 		spdk_nvmf_transport_destroy(transport, _nvmf_tgt_destroy_next_transport, tgt);
312 	} else {
313 		spdk_nvmf_tgt_destroy_done_fn *destroy_cb_fn = tgt->destroy_cb_fn;
314 		void *destroy_cb_arg = tgt->destroy_cb_arg;
315 
316 		pthread_mutex_destroy(&tgt->mutex);
317 		free(tgt);
318 
319 		if (destroy_cb_fn) {
320 			destroy_cb_fn(destroy_cb_arg, 0);
321 		}
322 	}
323 }
324 
325 static void
326 nvmf_tgt_destroy_cb(void *io_device)
327 {
328 	struct spdk_nvmf_tgt *tgt = io_device;
329 	uint32_t i;
330 	int rc;
331 
332 	if (tgt->subsystems) {
333 		for (i = 0; i < tgt->max_subsystems; i++) {
334 			if (tgt->subsystems[i]) {
335 				nvmf_subsystem_remove_all_listeners(tgt->subsystems[i], true);
336 
337 				rc = spdk_nvmf_subsystem_destroy(tgt->subsystems[i], nvmf_tgt_destroy_cb, tgt);
338 				if (rc) {
339 					if (rc == -EINPROGRESS) {
340 						/* If rc is -EINPROGRESS, nvmf_tgt_destroy_cb will be called again when subsystem #i
341 						 * is destroyed, nvmf_tgt_destroy_cb will continue to destroy other subsystems if any */
342 						return;
343 					} else {
344 						SPDK_ERRLOG("Failed to destroy subsystem, id %u, rc %d\n", tgt->subsystems[i]->id, rc);
345 						assert(0);
346 					}
347 				}
348 			}
349 		}
350 		free(tgt->subsystems);
351 	}
352 
353 	_nvmf_tgt_destroy_next_transport(tgt);
354 }
355 
356 void
357 spdk_nvmf_tgt_destroy(struct spdk_nvmf_tgt *tgt,
358 		      spdk_nvmf_tgt_destroy_done_fn cb_fn,
359 		      void *cb_arg)
360 {
361 	tgt->destroy_cb_fn = cb_fn;
362 	tgt->destroy_cb_arg = cb_arg;
363 
364 	TAILQ_REMOVE(&g_nvmf_tgts, tgt, link);
365 
366 	spdk_io_device_unregister(tgt, nvmf_tgt_destroy_cb);
367 }
368 
369 const char *
370 spdk_nvmf_tgt_get_name(struct spdk_nvmf_tgt *tgt)
371 {
372 	return tgt->name;
373 }
374 
375 struct spdk_nvmf_tgt *
376 spdk_nvmf_get_tgt(const char *name)
377 {
378 	struct spdk_nvmf_tgt *tgt;
379 	uint32_t num_targets = 0;
380 
381 	TAILQ_FOREACH(tgt, &g_nvmf_tgts, link) {
382 		if (name) {
383 			if (!strncmp(tgt->name, name, NVMF_TGT_NAME_MAX_LENGTH)) {
384 				return tgt;
385 			}
386 		}
387 		num_targets++;
388 	}
389 
390 	/*
391 	 * special case. If there is only one target and
392 	 * no name was specified, return the only available
393 	 * target. If there is more than one target, name must
394 	 * be specified.
395 	 */
396 	if (!name && num_targets == 1) {
397 		return TAILQ_FIRST(&g_nvmf_tgts);
398 	}
399 
400 	return NULL;
401 }
402 
403 struct spdk_nvmf_tgt *
404 spdk_nvmf_get_first_tgt(void)
405 {
406 	return TAILQ_FIRST(&g_nvmf_tgts);
407 }
408 
409 struct spdk_nvmf_tgt *
410 spdk_nvmf_get_next_tgt(struct spdk_nvmf_tgt *prev)
411 {
412 	return TAILQ_NEXT(prev, link);
413 }
414 
415 static void
416 nvmf_write_subsystem_config_json(struct spdk_json_write_ctx *w,
417 				 struct spdk_nvmf_subsystem *subsystem)
418 {
419 	struct spdk_nvmf_host *host;
420 	struct spdk_nvmf_subsystem_listener *listener;
421 	const struct spdk_nvme_transport_id *trid;
422 	struct spdk_nvmf_ns *ns;
423 	struct spdk_nvmf_ns_opts ns_opts;
424 	uint32_t max_namespaces;
425 	char uuid_str[SPDK_UUID_STRING_LEN];
426 
427 	if (spdk_nvmf_subsystem_get_type(subsystem) != SPDK_NVMF_SUBTYPE_NVME) {
428 		return;
429 	}
430 
431 	/* { */
432 	spdk_json_write_object_begin(w);
433 	spdk_json_write_named_string(w, "method", "nvmf_create_subsystem");
434 
435 	/*     "params" : { */
436 	spdk_json_write_named_object_begin(w, "params");
437 	spdk_json_write_named_string(w, "nqn", spdk_nvmf_subsystem_get_nqn(subsystem));
438 	spdk_json_write_named_bool(w, "allow_any_host", spdk_nvmf_subsystem_get_allow_any_host(subsystem));
439 	spdk_json_write_named_string(w, "serial_number", spdk_nvmf_subsystem_get_sn(subsystem));
440 	spdk_json_write_named_string(w, "model_number", spdk_nvmf_subsystem_get_mn(subsystem));
441 
442 	max_namespaces = spdk_nvmf_subsystem_get_max_namespaces(subsystem);
443 	if (max_namespaces != 0) {
444 		spdk_json_write_named_uint32(w, "max_namespaces", max_namespaces);
445 	}
446 
447 	spdk_json_write_named_uint32(w, "min_cntlid", spdk_nvmf_subsystem_get_min_cntlid(subsystem));
448 	spdk_json_write_named_uint32(w, "max_cntlid", spdk_nvmf_subsystem_get_max_cntlid(subsystem));
449 	spdk_json_write_named_bool(w, "ana_reporting", nvmf_subsystem_get_ana_reporting(subsystem));
450 
451 	/*     } "params" */
452 	spdk_json_write_object_end(w);
453 
454 	/* } */
455 	spdk_json_write_object_end(w);
456 
457 	for (listener = spdk_nvmf_subsystem_get_first_listener(subsystem); listener != NULL;
458 	     listener = spdk_nvmf_subsystem_get_next_listener(subsystem, listener)) {
459 		trid = spdk_nvmf_subsystem_listener_get_trid(listener);
460 
461 		spdk_json_write_object_begin(w);
462 		spdk_json_write_named_string(w, "method", "nvmf_subsystem_add_listener");
463 
464 		/*     "params" : { */
465 		spdk_json_write_named_object_begin(w, "params");
466 
467 		spdk_json_write_named_string(w, "nqn", spdk_nvmf_subsystem_get_nqn(subsystem));
468 		nvmf_transport_listen_dump_opts(listener->transport, trid, w);
469 
470 		/*     } "params" */
471 		spdk_json_write_object_end(w);
472 
473 		/* } */
474 		spdk_json_write_object_end(w);
475 	}
476 
477 	for (host = spdk_nvmf_subsystem_get_first_host(subsystem); host != NULL;
478 	     host = spdk_nvmf_subsystem_get_next_host(subsystem, host)) {
479 
480 		spdk_json_write_object_begin(w);
481 		spdk_json_write_named_string(w, "method", "nvmf_subsystem_add_host");
482 
483 		/*     "params" : { */
484 		spdk_json_write_named_object_begin(w, "params");
485 
486 		spdk_json_write_named_string(w, "nqn", spdk_nvmf_subsystem_get_nqn(subsystem));
487 		spdk_json_write_named_string(w, "host", spdk_nvmf_host_get_nqn(host));
488 
489 		/*     } "params" */
490 		spdk_json_write_object_end(w);
491 
492 		/* } */
493 		spdk_json_write_object_end(w);
494 	}
495 
496 	for (ns = spdk_nvmf_subsystem_get_first_ns(subsystem); ns != NULL;
497 	     ns = spdk_nvmf_subsystem_get_next_ns(subsystem, ns)) {
498 		spdk_nvmf_ns_get_opts(ns, &ns_opts, sizeof(ns_opts));
499 
500 		spdk_json_write_object_begin(w);
501 		spdk_json_write_named_string(w, "method", "nvmf_subsystem_add_ns");
502 
503 		/*     "params" : { */
504 		spdk_json_write_named_object_begin(w, "params");
505 
506 		spdk_json_write_named_string(w, "nqn", spdk_nvmf_subsystem_get_nqn(subsystem));
507 
508 		/*     "namespace" : { */
509 		spdk_json_write_named_object_begin(w, "namespace");
510 
511 		spdk_json_write_named_uint32(w, "nsid", spdk_nvmf_ns_get_id(ns));
512 		spdk_json_write_named_string(w, "bdev_name", spdk_bdev_get_name(spdk_nvmf_ns_get_bdev(ns)));
513 
514 		if (!spdk_mem_all_zero(ns_opts.nguid, sizeof(ns_opts.nguid))) {
515 			SPDK_STATIC_ASSERT(sizeof(ns_opts.nguid) == sizeof(uint64_t) * 2, "size mismatch");
516 			spdk_json_write_named_string_fmt(w, "nguid", "%016"PRIX64"%016"PRIX64, from_be64(&ns_opts.nguid[0]),
517 							 from_be64(&ns_opts.nguid[8]));
518 		}
519 
520 		if (!spdk_mem_all_zero(ns_opts.eui64, sizeof(ns_opts.eui64))) {
521 			SPDK_STATIC_ASSERT(sizeof(ns_opts.eui64) == sizeof(uint64_t), "size mismatch");
522 			spdk_json_write_named_string_fmt(w, "eui64", "%016"PRIX64, from_be64(&ns_opts.eui64));
523 		}
524 
525 		if (!spdk_mem_all_zero(&ns_opts.uuid, sizeof(ns_opts.uuid))) {
526 			spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &ns_opts.uuid);
527 			spdk_json_write_named_string(w, "uuid",  uuid_str);
528 		}
529 
530 		if (nvmf_subsystem_get_ana_reporting(subsystem)) {
531 			spdk_json_write_named_uint32(w, "anagrpid", ns_opts.anagrpid);
532 		}
533 
534 		/*     "namespace" */
535 		spdk_json_write_object_end(w);
536 
537 		/*     } "params" */
538 		spdk_json_write_object_end(w);
539 
540 		/* } */
541 		spdk_json_write_object_end(w);
542 	}
543 }
544 
545 void
546 spdk_nvmf_tgt_write_config_json(struct spdk_json_write_ctx *w, struct spdk_nvmf_tgt *tgt)
547 {
548 	struct spdk_nvmf_subsystem *subsystem;
549 	struct spdk_nvmf_transport *transport;
550 
551 	spdk_json_write_object_begin(w);
552 	spdk_json_write_named_string(w, "method", "nvmf_set_max_subsystems");
553 
554 	spdk_json_write_named_object_begin(w, "params");
555 	spdk_json_write_named_uint32(w, "max_subsystems", tgt->max_subsystems);
556 	spdk_json_write_object_end(w);
557 
558 	spdk_json_write_object_end(w);
559 
560 	spdk_json_write_object_begin(w);
561 	spdk_json_write_named_string(w, "method", "nvmf_set_crdt");
562 	spdk_json_write_named_object_begin(w, "params");
563 	spdk_json_write_named_uint32(w, "crdt1", tgt->crdt[0]);
564 	spdk_json_write_named_uint32(w, "crdt2", tgt->crdt[1]);
565 	spdk_json_write_named_uint32(w, "crdt3", tgt->crdt[2]);
566 	spdk_json_write_object_end(w);
567 	spdk_json_write_object_end(w);
568 
569 	/* write transports */
570 	TAILQ_FOREACH(transport, &tgt->transports, link) {
571 		spdk_json_write_object_begin(w);
572 		spdk_json_write_named_string(w, "method", "nvmf_create_transport");
573 		nvmf_transport_dump_opts(transport, w, true);
574 		spdk_json_write_object_end(w);
575 	}
576 
577 	subsystem = spdk_nvmf_subsystem_get_first(tgt);
578 	while (subsystem) {
579 		nvmf_write_subsystem_config_json(w, subsystem);
580 		subsystem = spdk_nvmf_subsystem_get_next(subsystem);
581 	}
582 }
583 
584 static void
585 nvmf_listen_opts_copy(struct spdk_nvmf_listen_opts *opts,
586 		      const struct spdk_nvmf_listen_opts *opts_src, size_t opts_size)
587 {
588 	assert(opts);
589 	assert(opts_src);
590 
591 	opts->opts_size = opts_size;
592 
593 #define SET_FIELD(field) \
594     if (offsetof(struct spdk_nvmf_listen_opts, field) + sizeof(opts->field) <= opts_size) { \
595                  opts->field = opts_src->field; \
596     } \
597 
598 	SET_FIELD(transport_specific);
599 #undef SET_FIELD
600 
601 	/* Do not remove this statement, you should always update this statement when you adding a new field,
602 	 * and do not forget to add the SET_FIELD statement for your added field. */
603 	SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_listen_opts) == 16, "Incorrect size");
604 }
605 
606 void
607 spdk_nvmf_listen_opts_init(struct spdk_nvmf_listen_opts *opts, size_t opts_size)
608 {
609 	struct spdk_nvmf_listen_opts opts_local = {};
610 
611 	/* local version of opts should have defaults set here */
612 
613 	nvmf_listen_opts_copy(opts, &opts_local, opts_size);
614 }
615 
616 int
617 spdk_nvmf_tgt_listen_ext(struct spdk_nvmf_tgt *tgt, const struct spdk_nvme_transport_id *trid,
618 			 struct spdk_nvmf_listen_opts *opts)
619 {
620 	struct spdk_nvmf_transport *transport;
621 	int rc;
622 	struct spdk_nvmf_listen_opts opts_local = {};
623 
624 	if (!opts) {
625 		SPDK_ERRLOG("opts should not be NULL\n");
626 		return -EINVAL;
627 	}
628 
629 	if (!opts->opts_size) {
630 		SPDK_ERRLOG("The opts_size in opts structure should not be zero\n");
631 		return -EINVAL;
632 	}
633 
634 	transport = spdk_nvmf_tgt_get_transport(tgt, trid->trstring);
635 	if (!transport) {
636 		SPDK_ERRLOG("Unable to find %s transport. The transport must be created first also make sure it is properly registered.\n",
637 			    trid->trstring);
638 		return -EINVAL;
639 	}
640 
641 	nvmf_listen_opts_copy(&opts_local, opts, opts->opts_size);
642 	rc = spdk_nvmf_transport_listen(transport, trid, &opts_local);
643 	if (rc < 0) {
644 		SPDK_ERRLOG("Unable to listen on address '%s'\n", trid->traddr);
645 	}
646 
647 	return rc;
648 }
649 
650 int
651 spdk_nvmf_tgt_stop_listen(struct spdk_nvmf_tgt *tgt,
652 			  struct spdk_nvme_transport_id *trid)
653 {
654 	struct spdk_nvmf_transport *transport;
655 	int rc;
656 
657 	transport = spdk_nvmf_tgt_get_transport(tgt, trid->trstring);
658 	if (!transport) {
659 		SPDK_ERRLOG("Unable to find %s transport. The transport must be created first also make sure it is properly registered.\n",
660 			    trid->trstring);
661 		return -EINVAL;
662 	}
663 
664 	rc = spdk_nvmf_transport_stop_listen(transport, trid);
665 	if (rc < 0) {
666 		SPDK_ERRLOG("Failed to stop listening on address '%s'\n", trid->traddr);
667 		return rc;
668 	}
669 	return 0;
670 }
671 
672 struct spdk_nvmf_tgt_add_transport_ctx {
673 	struct spdk_nvmf_tgt *tgt;
674 	struct spdk_nvmf_transport *transport;
675 	spdk_nvmf_tgt_add_transport_done_fn cb_fn;
676 	void *cb_arg;
677 	int status;
678 };
679 
680 static void
681 _nvmf_tgt_remove_transport_done(struct spdk_io_channel_iter *i, int status)
682 {
683 	struct spdk_nvmf_tgt_add_transport_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
684 
685 	ctx->cb_fn(ctx->cb_arg, ctx->status);
686 	free(ctx);
687 }
688 
689 static void
690 _nvmf_tgt_remove_transport(struct spdk_io_channel_iter *i)
691 {
692 	struct spdk_nvmf_tgt_add_transport_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
693 	struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
694 	struct spdk_nvmf_poll_group *group = spdk_io_channel_get_ctx(ch);
695 	struct spdk_nvmf_transport_poll_group *tgroup, *tmp;
696 
697 	TAILQ_FOREACH_SAFE(tgroup, &group->tgroups, link, tmp) {
698 		if (tgroup->transport == ctx->transport) {
699 			TAILQ_REMOVE(&group->tgroups, tgroup, link);
700 			nvmf_transport_poll_group_destroy(tgroup);
701 		}
702 	}
703 
704 	spdk_for_each_channel_continue(i, 0);
705 }
706 
707 static void
708 _nvmf_tgt_add_transport_done(struct spdk_io_channel_iter *i, int status)
709 {
710 	struct spdk_nvmf_tgt_add_transport_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
711 
712 	if (status) {
713 		ctx->status = status;
714 		spdk_for_each_channel(ctx->tgt,
715 				      _nvmf_tgt_remove_transport,
716 				      ctx,
717 				      _nvmf_tgt_remove_transport_done);
718 		return;
719 	}
720 
721 	ctx->transport->tgt = ctx->tgt;
722 	TAILQ_INSERT_TAIL(&ctx->tgt->transports, ctx->transport, link);
723 	ctx->cb_fn(ctx->cb_arg, status);
724 	free(ctx);
725 }
726 
727 static void
728 _nvmf_tgt_add_transport(struct spdk_io_channel_iter *i)
729 {
730 	struct spdk_nvmf_tgt_add_transport_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
731 	struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
732 	struct spdk_nvmf_poll_group *group = spdk_io_channel_get_ctx(ch);
733 	int rc;
734 
735 	rc = nvmf_poll_group_add_transport(group, ctx->transport);
736 	spdk_for_each_channel_continue(i, rc);
737 }
738 
739 void
740 spdk_nvmf_tgt_add_transport(struct spdk_nvmf_tgt *tgt,
741 			    struct spdk_nvmf_transport *transport,
742 			    spdk_nvmf_tgt_add_transport_done_fn cb_fn,
743 			    void *cb_arg)
744 {
745 	struct spdk_nvmf_tgt_add_transport_ctx *ctx;
746 
747 	SPDK_DTRACE_PROBE2(nvmf_tgt_add_transport, transport, tgt->name);
748 
749 	if (spdk_nvmf_tgt_get_transport(tgt, transport->ops->name)) {
750 		cb_fn(cb_arg, -EEXIST);
751 		return; /* transport already created */
752 	}
753 
754 	ctx = calloc(1, sizeof(*ctx));
755 	if (!ctx) {
756 		cb_fn(cb_arg, -ENOMEM);
757 		return;
758 	}
759 
760 	ctx->tgt = tgt;
761 	ctx->transport = transport;
762 	ctx->cb_fn = cb_fn;
763 	ctx->cb_arg = cb_arg;
764 
765 	spdk_for_each_channel(tgt,
766 			      _nvmf_tgt_add_transport,
767 			      ctx,
768 			      _nvmf_tgt_add_transport_done);
769 }
770 
771 struct spdk_nvmf_subsystem *
772 spdk_nvmf_tgt_find_subsystem(struct spdk_nvmf_tgt *tgt, const char *subnqn)
773 {
774 	struct spdk_nvmf_subsystem	*subsystem;
775 	uint32_t sid;
776 
777 	if (!subnqn) {
778 		return NULL;
779 	}
780 
781 	/* Ensure that subnqn is null terminated */
782 	if (!memchr(subnqn, '\0', SPDK_NVMF_NQN_MAX_LEN + 1)) {
783 		SPDK_ERRLOG("Connect SUBNQN is not null terminated\n");
784 		return NULL;
785 	}
786 
787 	for (sid = 0; sid < tgt->max_subsystems; sid++) {
788 		subsystem = tgt->subsystems[sid];
789 		if (subsystem == NULL) {
790 			continue;
791 		}
792 
793 		if (strcmp(subnqn, subsystem->subnqn) == 0) {
794 			return subsystem;
795 		}
796 	}
797 
798 	return NULL;
799 }
800 
801 struct spdk_nvmf_transport *
802 spdk_nvmf_tgt_get_transport(struct spdk_nvmf_tgt *tgt, const char *transport_name)
803 {
804 	struct spdk_nvmf_transport *transport;
805 
806 	TAILQ_FOREACH(transport, &tgt->transports, link) {
807 		if (!strncasecmp(transport->ops->name, transport_name, SPDK_NVMF_TRSTRING_MAX_LEN)) {
808 			return transport;
809 		}
810 	}
811 	return NULL;
812 }
813 
814 struct nvmf_new_qpair_ctx {
815 	struct spdk_nvmf_qpair *qpair;
816 	struct spdk_nvmf_poll_group *group;
817 };
818 
819 static void
820 _nvmf_poll_group_add(void *_ctx)
821 {
822 	struct nvmf_new_qpair_ctx *ctx = _ctx;
823 	struct spdk_nvmf_qpair *qpair = ctx->qpair;
824 	struct spdk_nvmf_poll_group *group = ctx->group;
825 
826 	free(_ctx);
827 
828 	if (spdk_nvmf_poll_group_add(group, qpair) != 0) {
829 		SPDK_ERRLOG("Unable to add the qpair to a poll group.\n");
830 		spdk_nvmf_qpair_disconnect(qpair, NULL, NULL);
831 	}
832 }
833 
834 void
835 spdk_nvmf_tgt_new_qpair(struct spdk_nvmf_tgt *tgt, struct spdk_nvmf_qpair *qpair)
836 {
837 	struct spdk_nvmf_poll_group *group;
838 	struct nvmf_new_qpair_ctx *ctx;
839 
840 	group = spdk_nvmf_get_optimal_poll_group(qpair);
841 	if (group == NULL) {
842 		if (tgt->next_poll_group == NULL) {
843 			tgt->next_poll_group = TAILQ_FIRST(&tgt->poll_groups);
844 			if (tgt->next_poll_group == NULL) {
845 				SPDK_ERRLOG("No poll groups exist.\n");
846 				spdk_nvmf_qpair_disconnect(qpair, NULL, NULL);
847 				return;
848 			}
849 		}
850 		group = tgt->next_poll_group;
851 		tgt->next_poll_group = TAILQ_NEXT(group, link);
852 	}
853 
854 	ctx = calloc(1, sizeof(*ctx));
855 	if (!ctx) {
856 		SPDK_ERRLOG("Unable to send message to poll group.\n");
857 		spdk_nvmf_qpair_disconnect(qpair, NULL, NULL);
858 		return;
859 	}
860 
861 	ctx->qpair = qpair;
862 	ctx->group = group;
863 
864 	spdk_thread_send_msg(group->thread, _nvmf_poll_group_add, ctx);
865 }
866 
867 struct spdk_nvmf_poll_group *
868 spdk_nvmf_poll_group_create(struct spdk_nvmf_tgt *tgt)
869 {
870 	struct spdk_io_channel *ch;
871 
872 	ch = spdk_get_io_channel(tgt);
873 	if (!ch) {
874 		SPDK_ERRLOG("Unable to get I/O channel for target\n");
875 		return NULL;
876 	}
877 
878 	return spdk_io_channel_get_ctx(ch);
879 }
880 
881 void
882 spdk_nvmf_poll_group_destroy(struct spdk_nvmf_poll_group *group,
883 			     spdk_nvmf_poll_group_destroy_done_fn cb_fn,
884 			     void *cb_arg)
885 {
886 	assert(group->destroy_cb_fn == NULL);
887 	group->destroy_cb_fn = cb_fn;
888 	group->destroy_cb_arg = cb_arg;
889 
890 	/* This function will put the io_channel associated with this poll group */
891 	nvmf_tgt_destroy_poll_group_qpairs(group);
892 }
893 
894 int
895 spdk_nvmf_poll_group_add(struct spdk_nvmf_poll_group *group,
896 			 struct spdk_nvmf_qpair *qpair)
897 {
898 	int rc = -1;
899 	struct spdk_nvmf_transport_poll_group *tgroup;
900 
901 	TAILQ_INIT(&qpair->outstanding);
902 	qpair->group = group;
903 	qpair->ctrlr = NULL;
904 	qpair->disconnect_started = false;
905 
906 	TAILQ_FOREACH(tgroup, &group->tgroups, link) {
907 		if (tgroup->transport == qpair->transport) {
908 			rc = nvmf_transport_poll_group_add(tgroup, qpair);
909 			break;
910 		}
911 	}
912 
913 	/* We add the qpair to the group only it is successfully added into the tgroup */
914 	if (rc == 0) {
915 		SPDK_DTRACE_PROBE2(nvmf_poll_group_add_qpair, qpair, spdk_thread_get_id(group->thread));
916 		TAILQ_INSERT_TAIL(&group->qpairs, qpair, link);
917 		nvmf_qpair_set_state(qpair, SPDK_NVMF_QPAIR_ACTIVE);
918 	}
919 
920 	return rc;
921 }
922 
923 static void
924 _nvmf_ctrlr_destruct(void *ctx)
925 {
926 	struct spdk_nvmf_ctrlr *ctrlr = ctx;
927 
928 	nvmf_ctrlr_destruct(ctrlr);
929 }
930 
931 static void
932 _nvmf_ctrlr_free_from_qpair(void *ctx)
933 {
934 	struct nvmf_qpair_disconnect_ctx *qpair_ctx = ctx;
935 	struct spdk_nvmf_ctrlr *ctrlr = qpair_ctx->ctrlr;
936 	uint32_t count;
937 
938 	spdk_bit_array_clear(ctrlr->qpair_mask, qpair_ctx->qid);
939 	count = spdk_bit_array_count_set(ctrlr->qpair_mask);
940 	if (count == 0) {
941 		assert(!ctrlr->in_destruct);
942 		SPDK_DEBUGLOG(nvmf, "Last qpair %u, destroy ctrlr 0x%hx\n", qpair_ctx->qid, ctrlr->cntlid);
943 		ctrlr->in_destruct = true;
944 		spdk_thread_send_msg(ctrlr->subsys->thread, _nvmf_ctrlr_destruct, ctrlr);
945 	}
946 	free(qpair_ctx);
947 }
948 
949 static void
950 _nvmf_transport_qpair_fini_complete(void *cb_ctx)
951 {
952 	struct nvmf_qpair_disconnect_ctx *qpair_ctx = cb_ctx;
953 	struct spdk_nvmf_ctrlr *ctrlr;
954 	/* Store cb args since cb_ctx can be freed in _nvmf_ctrlr_free_from_qpair */
955 	nvmf_qpair_disconnect_cb cb_fn = qpair_ctx->cb_fn;
956 	void *cb_arg = qpair_ctx->ctx;
957 	struct spdk_thread *cb_thread = qpair_ctx->thread;
958 
959 	ctrlr = qpair_ctx->ctrlr;
960 	SPDK_DEBUGLOG(nvmf, "Finish destroying qid %u\n", qpair_ctx->qid);
961 
962 	if (ctrlr) {
963 		if (qpair_ctx->qid == 0) {
964 			/* Admin qpair is removed, so set the pointer to NULL.
965 			 * This operation is safe since we are on ctrlr thread now, admin qpair's thread is the same
966 			 * as controller's thread */
967 			assert(ctrlr->thread == spdk_get_thread());
968 			ctrlr->admin_qpair = NULL;
969 		}
970 		/* Free qpair id from controller's bit mask and destroy the controller if it is the last qpair */
971 		if (ctrlr->thread) {
972 			spdk_thread_send_msg(ctrlr->thread, _nvmf_ctrlr_free_from_qpair, qpair_ctx);
973 		} else {
974 			_nvmf_ctrlr_free_from_qpair(qpair_ctx);
975 		}
976 	} else {
977 		free(qpair_ctx);
978 	}
979 
980 	if (cb_fn) {
981 		spdk_thread_send_msg(cb_thread, cb_fn, cb_arg);
982 	}
983 }
984 
985 void
986 spdk_nvmf_poll_group_remove(struct spdk_nvmf_qpair *qpair)
987 {
988 	struct spdk_nvmf_transport_poll_group *tgroup;
989 	int rc;
990 
991 	SPDK_DTRACE_PROBE2(nvmf_poll_group_remove_qpair, qpair,
992 			   spdk_thread_get_id(qpair->group->thread));
993 	nvmf_qpair_set_state(qpair, SPDK_NVMF_QPAIR_ERROR);
994 
995 	/* Find the tgroup and remove the qpair from the tgroup */
996 	TAILQ_FOREACH(tgroup, &qpair->group->tgroups, link) {
997 		if (tgroup->transport == qpair->transport) {
998 			rc = nvmf_transport_poll_group_remove(tgroup, qpair);
999 			if (rc && (rc != ENOTSUP)) {
1000 				SPDK_ERRLOG("Cannot remove qpair=%p from transport group=%p\n",
1001 					    qpair, tgroup);
1002 			}
1003 			break;
1004 		}
1005 	}
1006 
1007 	TAILQ_REMOVE(&qpair->group->qpairs, qpair, link);
1008 	qpair->group = NULL;
1009 }
1010 
1011 static void
1012 _nvmf_qpair_destroy(void *ctx, int status)
1013 {
1014 	struct nvmf_qpair_disconnect_ctx *qpair_ctx = ctx;
1015 	struct spdk_nvmf_qpair *qpair = qpair_ctx->qpair;
1016 	struct spdk_nvmf_ctrlr *ctrlr = qpair->ctrlr;
1017 	struct spdk_nvmf_request *req, *tmp;
1018 	struct spdk_nvmf_subsystem_poll_group *sgroup;
1019 
1020 	assert(qpair->state == SPDK_NVMF_QPAIR_DEACTIVATING);
1021 	qpair_ctx->qid = qpair->qid;
1022 
1023 	if (ctrlr) {
1024 		if (0 == qpair->qid) {
1025 			assert(qpair->group->stat.current_admin_qpairs > 0);
1026 			qpair->group->stat.current_admin_qpairs--;
1027 		} else {
1028 			assert(qpair->group->stat.current_io_qpairs > 0);
1029 			qpair->group->stat.current_io_qpairs--;
1030 		}
1031 
1032 		sgroup = &qpair->group->sgroups[ctrlr->subsys->id];
1033 		TAILQ_FOREACH_SAFE(req, &sgroup->queued, link, tmp) {
1034 			if (req->qpair == qpair) {
1035 				TAILQ_REMOVE(&sgroup->queued, req, link);
1036 				if (nvmf_transport_req_free(req)) {
1037 					SPDK_ERRLOG("Transport request free error!/n");
1038 				}
1039 			}
1040 		}
1041 	}
1042 
1043 	qpair_ctx->ctrlr = ctrlr;
1044 	spdk_nvmf_poll_group_remove(qpair);
1045 	nvmf_transport_qpair_fini(qpair, _nvmf_transport_qpair_fini_complete, qpair_ctx);
1046 }
1047 
1048 static void
1049 _nvmf_qpair_disconnect_msg(void *ctx)
1050 {
1051 	struct nvmf_qpair_disconnect_ctx *qpair_ctx = ctx;
1052 
1053 	spdk_nvmf_qpair_disconnect(qpair_ctx->qpair, qpair_ctx->cb_fn, qpair_ctx->ctx);
1054 	free(ctx);
1055 }
1056 
1057 int
1058 spdk_nvmf_qpair_disconnect(struct spdk_nvmf_qpair *qpair, nvmf_qpair_disconnect_cb cb_fn, void *ctx)
1059 {
1060 	struct spdk_nvmf_poll_group *group = qpair->group;
1061 	struct nvmf_qpair_disconnect_ctx *qpair_ctx;
1062 
1063 	if (__atomic_test_and_set(&qpair->disconnect_started, __ATOMIC_RELAXED)) {
1064 		if (cb_fn) {
1065 			cb_fn(ctx);
1066 		}
1067 		return 0;
1068 	}
1069 
1070 	/* If we get a qpair in the uninitialized state, we can just destroy it immediately */
1071 	if (qpair->state == SPDK_NVMF_QPAIR_UNINITIALIZED) {
1072 		nvmf_transport_qpair_fini(qpair, NULL, NULL);
1073 		if (cb_fn) {
1074 			cb_fn(ctx);
1075 		}
1076 		return 0;
1077 	}
1078 
1079 	assert(group != NULL);
1080 	if (spdk_get_thread() != group->thread) {
1081 		/* clear the atomic so we can set it on the next call on the proper thread. */
1082 		__atomic_clear(&qpair->disconnect_started, __ATOMIC_RELAXED);
1083 		qpair_ctx = calloc(1, sizeof(struct nvmf_qpair_disconnect_ctx));
1084 		if (!qpair_ctx) {
1085 			SPDK_ERRLOG("Unable to allocate context for nvmf_qpair_disconnect\n");
1086 			return -ENOMEM;
1087 		}
1088 		qpair_ctx->qpair = qpair;
1089 		qpair_ctx->cb_fn = cb_fn;
1090 		qpair_ctx->thread = group->thread;
1091 		qpair_ctx->ctx = ctx;
1092 		spdk_thread_send_msg(group->thread, _nvmf_qpair_disconnect_msg, qpair_ctx);
1093 		return 0;
1094 	}
1095 
1096 	SPDK_DTRACE_PROBE2(nvmf_qpair_disconnect, qpair, spdk_thread_get_id(group->thread));
1097 	assert(qpair->state == SPDK_NVMF_QPAIR_ACTIVE);
1098 	nvmf_qpair_set_state(qpair, SPDK_NVMF_QPAIR_DEACTIVATING);
1099 
1100 	qpair_ctx = calloc(1, sizeof(struct nvmf_qpair_disconnect_ctx));
1101 	if (!qpair_ctx) {
1102 		SPDK_ERRLOG("Unable to allocate context for nvmf_qpair_disconnect\n");
1103 		return -ENOMEM;
1104 	}
1105 
1106 	qpair_ctx->qpair = qpair;
1107 	qpair_ctx->cb_fn = cb_fn;
1108 	qpair_ctx->thread = group->thread;
1109 	qpair_ctx->ctx = ctx;
1110 
1111 	/* Check for outstanding I/O */
1112 	if (!TAILQ_EMPTY(&qpair->outstanding)) {
1113 		SPDK_DTRACE_PROBE2(nvmf_poll_group_drain_qpair, qpair, spdk_thread_get_id(group->thread));
1114 		qpair->state_cb = _nvmf_qpair_destroy;
1115 		qpair->state_cb_arg = qpair_ctx;
1116 		nvmf_qpair_abort_pending_zcopy_reqs(qpair);
1117 		nvmf_qpair_free_aer(qpair);
1118 		return 0;
1119 	}
1120 
1121 	_nvmf_qpair_destroy(qpair_ctx, 0);
1122 
1123 	return 0;
1124 }
1125 
1126 int
1127 spdk_nvmf_qpair_get_peer_trid(struct spdk_nvmf_qpair *qpair,
1128 			      struct spdk_nvme_transport_id *trid)
1129 {
1130 	return nvmf_transport_qpair_get_peer_trid(qpair, trid);
1131 }
1132 
1133 int
1134 spdk_nvmf_qpair_get_local_trid(struct spdk_nvmf_qpair *qpair,
1135 			       struct spdk_nvme_transport_id *trid)
1136 {
1137 	return nvmf_transport_qpair_get_local_trid(qpair, trid);
1138 }
1139 
1140 int
1141 spdk_nvmf_qpair_get_listen_trid(struct spdk_nvmf_qpair *qpair,
1142 				struct spdk_nvme_transport_id *trid)
1143 {
1144 	return nvmf_transport_qpair_get_listen_trid(qpair, trid);
1145 }
1146 
1147 int
1148 nvmf_poll_group_add_transport(struct spdk_nvmf_poll_group *group,
1149 			      struct spdk_nvmf_transport *transport)
1150 {
1151 	struct spdk_nvmf_transport_poll_group *tgroup;
1152 
1153 	TAILQ_FOREACH(tgroup, &group->tgroups, link) {
1154 		if (tgroup->transport == transport) {
1155 			/* Transport already in the poll group */
1156 			return 0;
1157 		}
1158 	}
1159 
1160 	tgroup = nvmf_transport_poll_group_create(transport, group);
1161 	if (!tgroup) {
1162 		SPDK_ERRLOG("Unable to create poll group for transport\n");
1163 		return -1;
1164 	}
1165 	SPDK_DTRACE_PROBE2(nvmf_transport_poll_group_create, transport, spdk_thread_get_id(group->thread));
1166 
1167 	tgroup->group = group;
1168 	TAILQ_INSERT_TAIL(&group->tgroups, tgroup, link);
1169 
1170 	return 0;
1171 }
1172 
1173 static int
1174 poll_group_update_subsystem(struct spdk_nvmf_poll_group *group,
1175 			    struct spdk_nvmf_subsystem *subsystem)
1176 {
1177 	struct spdk_nvmf_subsystem_poll_group *sgroup;
1178 	uint32_t new_num_ns, old_num_ns;
1179 	uint32_t i, j;
1180 	struct spdk_nvmf_ns *ns;
1181 	struct spdk_nvmf_registrant *reg, *tmp;
1182 	struct spdk_io_channel *ch;
1183 	struct spdk_nvmf_subsystem_pg_ns_info *ns_info;
1184 	struct spdk_nvmf_ctrlr *ctrlr;
1185 	bool ns_changed;
1186 
1187 	/* Make sure our poll group has memory for this subsystem allocated */
1188 	if (subsystem->id >= group->num_sgroups) {
1189 		return -ENOMEM;
1190 	}
1191 
1192 	sgroup = &group->sgroups[subsystem->id];
1193 
1194 	/* Make sure the array of namespace information is the correct size */
1195 	new_num_ns = subsystem->max_nsid;
1196 	old_num_ns = sgroup->num_ns;
1197 
1198 	ns_changed = false;
1199 
1200 	if (old_num_ns == 0) {
1201 		if (new_num_ns > 0) {
1202 			/* First allocation */
1203 			sgroup->ns_info = calloc(new_num_ns, sizeof(struct spdk_nvmf_subsystem_pg_ns_info));
1204 			if (!sgroup->ns_info) {
1205 				return -ENOMEM;
1206 			}
1207 		}
1208 	} else if (new_num_ns > old_num_ns) {
1209 		void *buf;
1210 
1211 		/* Make the array larger */
1212 		buf = realloc(sgroup->ns_info, new_num_ns * sizeof(struct spdk_nvmf_subsystem_pg_ns_info));
1213 		if (!buf) {
1214 			return -ENOMEM;
1215 		}
1216 
1217 		sgroup->ns_info = buf;
1218 
1219 		/* Null out the new namespace information slots */
1220 		for (i = old_num_ns; i < new_num_ns; i++) {
1221 			memset(&sgroup->ns_info[i], 0, sizeof(struct spdk_nvmf_subsystem_pg_ns_info));
1222 		}
1223 	} else if (new_num_ns < old_num_ns) {
1224 		void *buf;
1225 
1226 		/* Free the extra I/O channels */
1227 		for (i = new_num_ns; i < old_num_ns; i++) {
1228 			ns_info = &sgroup->ns_info[i];
1229 
1230 			if (ns_info->channel) {
1231 				spdk_put_io_channel(ns_info->channel);
1232 				ns_info->channel = NULL;
1233 			}
1234 		}
1235 
1236 		/* Make the array smaller */
1237 		if (new_num_ns > 0) {
1238 			buf = realloc(sgroup->ns_info, new_num_ns * sizeof(struct spdk_nvmf_subsystem_pg_ns_info));
1239 			if (!buf) {
1240 				return -ENOMEM;
1241 			}
1242 			sgroup->ns_info = buf;
1243 		} else {
1244 			free(sgroup->ns_info);
1245 			sgroup->ns_info = NULL;
1246 		}
1247 	}
1248 
1249 	sgroup->num_ns = new_num_ns;
1250 
1251 	/* Detect bdevs that were added or removed */
1252 	for (i = 0; i < sgroup->num_ns; i++) {
1253 		ns = subsystem->ns[i];
1254 		ns_info = &sgroup->ns_info[i];
1255 		ch = ns_info->channel;
1256 
1257 		if (ns == NULL && ch == NULL) {
1258 			/* Both NULL. Leave empty */
1259 		} else if (ns == NULL && ch != NULL) {
1260 			/* There was a channel here, but the namespace is gone. */
1261 			ns_changed = true;
1262 			spdk_put_io_channel(ch);
1263 			ns_info->channel = NULL;
1264 		} else if (ns != NULL && ch == NULL) {
1265 			/* A namespace appeared but there is no channel yet */
1266 			ns_changed = true;
1267 			ch = spdk_bdev_get_io_channel(ns->desc);
1268 			if (ch == NULL) {
1269 				SPDK_ERRLOG("Could not allocate I/O channel.\n");
1270 				return -ENOMEM;
1271 			}
1272 			ns_info->channel = ch;
1273 		} else if (spdk_uuid_compare(&ns_info->uuid, spdk_bdev_get_uuid(ns->bdev)) != 0) {
1274 			/* A namespace was here before, but was replaced by a new one. */
1275 			ns_changed = true;
1276 			spdk_put_io_channel(ns_info->channel);
1277 			memset(ns_info, 0, sizeof(*ns_info));
1278 
1279 			ch = spdk_bdev_get_io_channel(ns->desc);
1280 			if (ch == NULL) {
1281 				SPDK_ERRLOG("Could not allocate I/O channel.\n");
1282 				return -ENOMEM;
1283 			}
1284 			ns_info->channel = ch;
1285 		} else if (ns_info->num_blocks != spdk_bdev_get_num_blocks(ns->bdev)) {
1286 			/* Namespace is still there but size has changed */
1287 			SPDK_DEBUGLOG(nvmf, "Namespace resized: subsystem_id %u,"
1288 				      " nsid %u, pg %p, old %" PRIu64 ", new %" PRIu64 "\n",
1289 				      subsystem->id,
1290 				      ns->nsid,
1291 				      group,
1292 				      ns_info->num_blocks,
1293 				      spdk_bdev_get_num_blocks(ns->bdev));
1294 			ns_changed = true;
1295 		}
1296 
1297 		if (ns == NULL) {
1298 			memset(ns_info, 0, sizeof(*ns_info));
1299 		} else {
1300 			ns_info->uuid = *spdk_bdev_get_uuid(ns->bdev);
1301 			ns_info->num_blocks = spdk_bdev_get_num_blocks(ns->bdev);
1302 			ns_info->crkey = ns->crkey;
1303 			ns_info->rtype = ns->rtype;
1304 			if (ns->holder) {
1305 				ns_info->holder_id = ns->holder->hostid;
1306 			}
1307 
1308 			memset(&ns_info->reg_hostid, 0, SPDK_NVMF_MAX_NUM_REGISTRANTS * sizeof(struct spdk_uuid));
1309 			j = 0;
1310 			TAILQ_FOREACH_SAFE(reg, &ns->registrants, link, tmp) {
1311 				if (j >= SPDK_NVMF_MAX_NUM_REGISTRANTS) {
1312 					SPDK_ERRLOG("Maximum %u registrants can support.\n", SPDK_NVMF_MAX_NUM_REGISTRANTS);
1313 					return -EINVAL;
1314 				}
1315 				ns_info->reg_hostid[j++] = reg->hostid;
1316 			}
1317 		}
1318 	}
1319 
1320 	if (ns_changed) {
1321 		TAILQ_FOREACH(ctrlr, &subsystem->ctrlrs, link) {
1322 			/* It is possible that a ctrlr was added but the admin_qpair hasn't been
1323 			 * assigned yet.
1324 			 */
1325 			if (!ctrlr->admin_qpair) {
1326 				continue;
1327 			}
1328 			if (ctrlr->admin_qpair->group == group) {
1329 				nvmf_ctrlr_async_event_ns_notice(ctrlr);
1330 				nvmf_ctrlr_async_event_ana_change_notice(ctrlr);
1331 			}
1332 		}
1333 	}
1334 
1335 	return 0;
1336 }
1337 
1338 int
1339 nvmf_poll_group_update_subsystem(struct spdk_nvmf_poll_group *group,
1340 				 struct spdk_nvmf_subsystem *subsystem)
1341 {
1342 	return poll_group_update_subsystem(group, subsystem);
1343 }
1344 
1345 int
1346 nvmf_poll_group_add_subsystem(struct spdk_nvmf_poll_group *group,
1347 			      struct spdk_nvmf_subsystem *subsystem,
1348 			      spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg)
1349 {
1350 	int rc = 0;
1351 	struct spdk_nvmf_subsystem_poll_group *sgroup = &group->sgroups[subsystem->id];
1352 	uint32_t i;
1353 
1354 	TAILQ_INIT(&sgroup->queued);
1355 
1356 	rc = poll_group_update_subsystem(group, subsystem);
1357 	if (rc) {
1358 		nvmf_poll_group_remove_subsystem(group, subsystem, NULL, NULL);
1359 		goto fini;
1360 	}
1361 
1362 	sgroup->state = SPDK_NVMF_SUBSYSTEM_ACTIVE;
1363 
1364 	for (i = 0; i < sgroup->num_ns; i++) {
1365 		sgroup->ns_info[i].state = SPDK_NVMF_SUBSYSTEM_ACTIVE;
1366 	}
1367 
1368 fini:
1369 	if (cb_fn) {
1370 		cb_fn(cb_arg, rc);
1371 	}
1372 
1373 	SPDK_DTRACE_PROBE2(nvmf_poll_group_add_subsystem, spdk_thread_get_id(group->thread),
1374 			   subsystem->subnqn);
1375 
1376 	return rc;
1377 }
1378 
1379 static void
1380 _nvmf_poll_group_remove_subsystem_cb(void *ctx, int status)
1381 {
1382 	struct nvmf_qpair_disconnect_many_ctx *qpair_ctx = ctx;
1383 	struct spdk_nvmf_subsystem *subsystem;
1384 	struct spdk_nvmf_poll_group *group;
1385 	struct spdk_nvmf_subsystem_poll_group *sgroup;
1386 	spdk_nvmf_poll_group_mod_done cpl_fn = NULL;
1387 	void *cpl_ctx = NULL;
1388 	uint32_t nsid;
1389 
1390 	group = qpair_ctx->group;
1391 	subsystem = qpair_ctx->subsystem;
1392 	cpl_fn = qpair_ctx->cpl_fn;
1393 	cpl_ctx = qpair_ctx->cpl_ctx;
1394 	sgroup = &group->sgroups[subsystem->id];
1395 
1396 	if (status) {
1397 		goto fini;
1398 	}
1399 
1400 	for (nsid = 0; nsid < sgroup->num_ns; nsid++) {
1401 		if (sgroup->ns_info[nsid].channel) {
1402 			spdk_put_io_channel(sgroup->ns_info[nsid].channel);
1403 			sgroup->ns_info[nsid].channel = NULL;
1404 		}
1405 	}
1406 
1407 	sgroup->num_ns = 0;
1408 	free(sgroup->ns_info);
1409 	sgroup->ns_info = NULL;
1410 fini:
1411 	free(qpair_ctx);
1412 	if (cpl_fn) {
1413 		cpl_fn(cpl_ctx, status);
1414 	}
1415 }
1416 
1417 static void nvmf_poll_group_remove_subsystem_msg(void *ctx);
1418 
1419 static void
1420 remove_subsystem_qpair_cb(void *ctx)
1421 {
1422 	struct nvmf_qpair_disconnect_many_ctx *qpair_ctx = ctx;
1423 
1424 	assert(qpair_ctx->count > 0);
1425 	qpair_ctx->count--;
1426 	if (qpair_ctx->count == 0) {
1427 		/* All of the asynchronous callbacks for this context have been
1428 		 * completed.  Call nvmf_poll_group_remove_subsystem_msg() again
1429 		 * to check if all associated qpairs for this subsystem have
1430 		 * been removed from the poll group.
1431 		 */
1432 		nvmf_poll_group_remove_subsystem_msg(ctx);
1433 	}
1434 }
1435 
1436 static void
1437 nvmf_poll_group_remove_subsystem_msg(void *ctx)
1438 {
1439 	struct spdk_nvmf_qpair *qpair, *qpair_tmp;
1440 	struct spdk_nvmf_subsystem *subsystem;
1441 	struct spdk_nvmf_poll_group *group;
1442 	struct nvmf_qpair_disconnect_many_ctx *qpair_ctx = ctx;
1443 	bool qpairs_found = false;
1444 	int rc = 0;
1445 
1446 	group = qpair_ctx->group;
1447 	subsystem = qpair_ctx->subsystem;
1448 
1449 	/* Initialize count to 1.  This acts like a ref count, to ensure that if spdk_nvmf_qpair_disconnect
1450 	 * immediately invokes the callback (i.e. the qpairs is already in process of being disconnected)
1451 	 * that we don't recursively call nvmf_poll_group_remove_subsystem_msg before we've iterated the
1452 	 * full list of qpairs.
1453 	 */
1454 	qpair_ctx->count = 1;
1455 	TAILQ_FOREACH_SAFE(qpair, &group->qpairs, link, qpair_tmp) {
1456 		if ((qpair->ctrlr != NULL) && (qpair->ctrlr->subsys == subsystem)) {
1457 			qpairs_found = true;
1458 			qpair_ctx->count++;
1459 			rc = spdk_nvmf_qpair_disconnect(qpair, remove_subsystem_qpair_cb, ctx);
1460 			if (rc) {
1461 				break;
1462 			}
1463 		}
1464 	}
1465 	qpair_ctx->count--;
1466 
1467 	if (!qpairs_found) {
1468 		_nvmf_poll_group_remove_subsystem_cb(ctx, 0);
1469 		return;
1470 	}
1471 
1472 	if (qpair_ctx->count == 0 || rc) {
1473 		/* If count == 0, it means there were some qpairs in the poll group but they
1474 		 * were already in process of being disconnected.  So we send a message to this
1475 		 * same thread so that this function executes again later.  We won't actually
1476 		 * invoke the remove_subsystem_cb until all of the qpairs are actually removed
1477 		 * from the poll group.
1478 		 */
1479 		spdk_thread_send_msg(spdk_get_thread(), nvmf_poll_group_remove_subsystem_msg, ctx);
1480 	}
1481 }
1482 
1483 void
1484 nvmf_poll_group_remove_subsystem(struct spdk_nvmf_poll_group *group,
1485 				 struct spdk_nvmf_subsystem *subsystem,
1486 				 spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg)
1487 {
1488 	struct spdk_nvmf_subsystem_poll_group *sgroup;
1489 	struct nvmf_qpair_disconnect_many_ctx *ctx;
1490 	uint32_t i;
1491 
1492 	SPDK_DTRACE_PROBE3(nvmf_poll_group_remove_subsystem, group, spdk_thread_get_id(group->thread),
1493 			   subsystem->subnqn);
1494 
1495 	ctx = calloc(1, sizeof(struct nvmf_qpair_disconnect_many_ctx));
1496 	if (!ctx) {
1497 		SPDK_ERRLOG("Unable to allocate memory for context to remove poll subsystem\n");
1498 		if (cb_fn) {
1499 			cb_fn(cb_arg, -1);
1500 		}
1501 		return;
1502 	}
1503 
1504 	ctx->group = group;
1505 	ctx->subsystem = subsystem;
1506 	ctx->cpl_fn = cb_fn;
1507 	ctx->cpl_ctx = cb_arg;
1508 
1509 	sgroup = &group->sgroups[subsystem->id];
1510 	sgroup->state = SPDK_NVMF_SUBSYSTEM_INACTIVE;
1511 
1512 	for (i = 0; i < sgroup->num_ns; i++) {
1513 		sgroup->ns_info[i].state = SPDK_NVMF_SUBSYSTEM_INACTIVE;
1514 	}
1515 
1516 	nvmf_poll_group_remove_subsystem_msg(ctx);
1517 }
1518 
1519 void
1520 nvmf_poll_group_pause_subsystem(struct spdk_nvmf_poll_group *group,
1521 				struct spdk_nvmf_subsystem *subsystem,
1522 				uint32_t nsid,
1523 				spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg)
1524 {
1525 	struct spdk_nvmf_subsystem_poll_group *sgroup;
1526 	struct spdk_nvmf_subsystem_pg_ns_info *ns_info = NULL;
1527 	int rc = 0;
1528 	uint32_t i;
1529 
1530 	if (subsystem->id >= group->num_sgroups) {
1531 		rc = -1;
1532 		goto fini;
1533 	}
1534 
1535 	sgroup = &group->sgroups[subsystem->id];
1536 	if (sgroup->state == SPDK_NVMF_SUBSYSTEM_PAUSED) {
1537 		goto fini;
1538 	}
1539 	sgroup->state = SPDK_NVMF_SUBSYSTEM_PAUSING;
1540 
1541 	if (nsid == SPDK_NVME_GLOBAL_NS_TAG) {
1542 		for (i = 0; i < sgroup->num_ns; i++) {
1543 			ns_info = &sgroup->ns_info[i];
1544 			ns_info->state = SPDK_NVMF_SUBSYSTEM_PAUSING;
1545 		}
1546 	} else {
1547 		/* NOTE: This implicitly also checks for 0, since 0 - 1 wraps around to UINT32_MAX. */
1548 		if (nsid - 1 < sgroup->num_ns) {
1549 			ns_info  = &sgroup->ns_info[nsid - 1];
1550 			ns_info->state = SPDK_NVMF_SUBSYSTEM_PAUSING;
1551 		}
1552 	}
1553 
1554 	if (sgroup->mgmt_io_outstanding > 0) {
1555 		assert(sgroup->cb_fn == NULL);
1556 		sgroup->cb_fn = cb_fn;
1557 		assert(sgroup->cb_arg == NULL);
1558 		sgroup->cb_arg = cb_arg;
1559 		return;
1560 	}
1561 
1562 	if (nsid == SPDK_NVME_GLOBAL_NS_TAG) {
1563 		for (i = 0; i < sgroup->num_ns; i++) {
1564 			ns_info = &sgroup->ns_info[i];
1565 
1566 			if (ns_info->io_outstanding > 0) {
1567 				assert(sgroup->cb_fn == NULL);
1568 				sgroup->cb_fn = cb_fn;
1569 				assert(sgroup->cb_arg == NULL);
1570 				sgroup->cb_arg = cb_arg;
1571 				return;
1572 			}
1573 		}
1574 	} else {
1575 		if (ns_info != NULL && ns_info->io_outstanding > 0) {
1576 			assert(sgroup->cb_fn == NULL);
1577 			sgroup->cb_fn = cb_fn;
1578 			assert(sgroup->cb_arg == NULL);
1579 			sgroup->cb_arg = cb_arg;
1580 			return;
1581 		}
1582 	}
1583 
1584 	assert(sgroup->mgmt_io_outstanding == 0);
1585 	sgroup->state = SPDK_NVMF_SUBSYSTEM_PAUSED;
1586 fini:
1587 	if (cb_fn) {
1588 		cb_fn(cb_arg, rc);
1589 	}
1590 }
1591 
1592 void
1593 nvmf_poll_group_resume_subsystem(struct spdk_nvmf_poll_group *group,
1594 				 struct spdk_nvmf_subsystem *subsystem,
1595 				 spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg)
1596 {
1597 	struct spdk_nvmf_request *req, *tmp;
1598 	struct spdk_nvmf_subsystem_poll_group *sgroup;
1599 	int rc = 0;
1600 	uint32_t i;
1601 
1602 	if (subsystem->id >= group->num_sgroups) {
1603 		rc = -1;
1604 		goto fini;
1605 	}
1606 
1607 	sgroup = &group->sgroups[subsystem->id];
1608 
1609 	if (sgroup->state == SPDK_NVMF_SUBSYSTEM_ACTIVE) {
1610 		goto fini;
1611 	}
1612 
1613 	rc = poll_group_update_subsystem(group, subsystem);
1614 	if (rc) {
1615 		goto fini;
1616 	}
1617 
1618 	for (i = 0; i < sgroup->num_ns; i++) {
1619 		sgroup->ns_info[i].state = SPDK_NVMF_SUBSYSTEM_ACTIVE;
1620 	}
1621 
1622 	sgroup->state = SPDK_NVMF_SUBSYSTEM_ACTIVE;
1623 
1624 	/* Release all queued requests */
1625 	TAILQ_FOREACH_SAFE(req, &sgroup->queued, link, tmp) {
1626 		TAILQ_REMOVE(&sgroup->queued, req, link);
1627 		if (spdk_nvmf_request_using_zcopy(req)) {
1628 			spdk_nvmf_request_zcopy_start(req);
1629 		} else {
1630 			spdk_nvmf_request_exec(req);
1631 		}
1632 
1633 	}
1634 fini:
1635 	if (cb_fn) {
1636 		cb_fn(cb_arg, rc);
1637 	}
1638 }
1639 
1640 
1641 struct spdk_nvmf_poll_group *
1642 spdk_nvmf_get_optimal_poll_group(struct spdk_nvmf_qpair *qpair)
1643 {
1644 	struct spdk_nvmf_transport_poll_group *tgroup;
1645 
1646 	tgroup = nvmf_transport_get_optimal_poll_group(qpair->transport, qpair);
1647 
1648 	if (tgroup == NULL) {
1649 		return NULL;
1650 	}
1651 
1652 	return tgroup->group;
1653 }
1654 
1655 void
1656 spdk_nvmf_poll_group_dump_stat(struct spdk_nvmf_poll_group *group, struct spdk_json_write_ctx *w)
1657 {
1658 	struct spdk_nvmf_transport_poll_group *tgroup;
1659 
1660 	spdk_json_write_object_begin(w);
1661 
1662 	spdk_json_write_named_string(w, "name", spdk_thread_get_name(spdk_get_thread()));
1663 	spdk_json_write_named_uint32(w, "admin_qpairs", group->stat.admin_qpairs);
1664 	spdk_json_write_named_uint32(w, "io_qpairs", group->stat.io_qpairs);
1665 	spdk_json_write_named_uint32(w, "current_admin_qpairs", group->stat.current_admin_qpairs);
1666 	spdk_json_write_named_uint32(w, "current_io_qpairs", group->stat.current_io_qpairs);
1667 	spdk_json_write_named_uint64(w, "pending_bdev_io", group->stat.pending_bdev_io);
1668 
1669 	spdk_json_write_named_array_begin(w, "transports");
1670 
1671 	TAILQ_FOREACH(tgroup, &group->tgroups, link) {
1672 		spdk_json_write_object_begin(w);
1673 		/*
1674 		 * The trtype field intentionally contains a transport name as this is more informative.
1675 		 * The field has not been renamed for backward compatibility.
1676 		 */
1677 		spdk_json_write_named_string(w, "trtype", spdk_nvmf_get_transport_name(tgroup->transport));
1678 
1679 		if (tgroup->transport->ops->poll_group_dump_stat) {
1680 			tgroup->transport->ops->poll_group_dump_stat(tgroup, w);
1681 		}
1682 
1683 		spdk_json_write_object_end(w);
1684 	}
1685 
1686 	spdk_json_write_array_end(w);
1687 	spdk_json_write_object_end(w);
1688 }
1689