xref: /spdk/lib/nvme/nvme_ctrlr.c (revision c39647df83e4be9bcc49025132c48bf2414ef8b1)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation. All rights reserved.
5  *   Copyright (c) 2019-2021 Mellanox Technologies LTD. All rights reserved.
6  *   Copyright (c) 2021, 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
7  *
8  *   Redistribution and use in source and binary forms, with or without
9  *   modification, are permitted provided that the following conditions
10  *   are met:
11  *
12  *     * Redistributions of source code must retain the above copyright
13  *       notice, this list of conditions and the following disclaimer.
14  *     * Redistributions in binary form must reproduce the above copyright
15  *       notice, this list of conditions and the following disclaimer in
16  *       the documentation and/or other materials provided with the
17  *       distribution.
18  *     * Neither the name of Intel Corporation nor the names of its
19  *       contributors may be used to endorse or promote products derived
20  *       from this software without specific prior written permission.
21  *
22  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 #include "spdk/stdinc.h"
36 
37 #include "nvme_internal.h"
38 #include "nvme_io_msg.h"
39 
40 #include "spdk/env.h"
41 #include "spdk/string.h"
42 #include "spdk/endian.h"
43 
44 struct nvme_active_ns_ctx;
45 
46 static int nvme_ctrlr_construct_and_submit_aer(struct spdk_nvme_ctrlr *ctrlr,
47 		struct nvme_async_event_request *aer);
48 static void nvme_ctrlr_identify_active_ns_async(struct nvme_active_ns_ctx *ctx);
49 static int nvme_ctrlr_identify_ns_async(struct spdk_nvme_ns *ns);
50 static int nvme_ctrlr_identify_ns_iocs_specific_async(struct spdk_nvme_ns *ns);
51 static int nvme_ctrlr_identify_id_desc_async(struct spdk_nvme_ns *ns);
52 static void nvme_ctrlr_init_cap(struct spdk_nvme_ctrlr *ctrlr);
53 static void nvme_ctrlr_set_state(struct spdk_nvme_ctrlr *ctrlr, enum nvme_ctrlr_state state,
54 				 uint64_t timeout_in_ms);
55 
56 static int
57 nvme_ns_cmp(struct spdk_nvme_ns *ns1, struct spdk_nvme_ns *ns2)
58 {
59 	if (ns1->id < ns2->id) {
60 		return -1;
61 	} else if (ns1->id > ns2->id) {
62 		return 1;
63 	} else {
64 		return 0;
65 	}
66 }
67 
68 RB_GENERATE_STATIC(nvme_ns_tree, spdk_nvme_ns, node, nvme_ns_cmp);
69 
70 #define CTRLR_STRING(ctrlr) \
71 	((ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_TCP || ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_RDMA) ? \
72 	ctrlr->trid.subnqn : ctrlr->trid.traddr)
73 
74 #define NVME_CTRLR_ERRLOG(ctrlr, format, ...) \
75 	SPDK_ERRLOG("[%s] " format, CTRLR_STRING(ctrlr), ##__VA_ARGS__);
76 
77 #define NVME_CTRLR_WARNLOG(ctrlr, format, ...) \
78 	SPDK_WARNLOG("[%s] " format, CTRLR_STRING(ctrlr), ##__VA_ARGS__);
79 
80 #define NVME_CTRLR_NOTICELOG(ctrlr, format, ...) \
81 	SPDK_NOTICELOG("[%s] " format, CTRLR_STRING(ctrlr), ##__VA_ARGS__);
82 
83 #define NVME_CTRLR_INFOLOG(ctrlr, format, ...) \
84 	SPDK_INFOLOG(nvme, "[%s] " format, CTRLR_STRING(ctrlr), ##__VA_ARGS__);
85 
86 #ifdef DEBUG
87 #define NVME_CTRLR_DEBUGLOG(ctrlr, format, ...) \
88 	SPDK_DEBUGLOG(nvme, "[%s] " format, CTRLR_STRING(ctrlr), ##__VA_ARGS__);
89 #else
90 #define NVME_CTRLR_DEBUGLOG(ctrlr, ...) do { } while (0)
91 #endif
92 
93 #define nvme_ctrlr_get_reg_async(ctrlr, reg, sz, cb_fn, cb_arg) \
94 	nvme_transport_ctrlr_get_reg_ ## sz ## _async(ctrlr, \
95 		offsetof(struct spdk_nvme_registers, reg), cb_fn, cb_arg)
96 
97 #define nvme_ctrlr_set_reg_async(ctrlr, reg, sz, val, cb_fn, cb_arg) \
98 	nvme_transport_ctrlr_set_reg_ ## sz ## _async(ctrlr, \
99 		offsetof(struct spdk_nvme_registers, reg), val, cb_fn, cb_arg)
100 
101 #define nvme_ctrlr_get_cc_async(ctrlr, cb_fn, cb_arg) \
102 	nvme_ctrlr_get_reg_async(ctrlr, cc, 4, cb_fn, cb_arg)
103 
104 #define nvme_ctrlr_get_csts_async(ctrlr, cb_fn, cb_arg) \
105 	nvme_ctrlr_get_reg_async(ctrlr, csts, 4, cb_fn, cb_arg)
106 
107 #define nvme_ctrlr_get_cap_async(ctrlr, cb_fn, cb_arg) \
108 	nvme_ctrlr_get_reg_async(ctrlr, cap, 8, cb_fn, cb_arg)
109 
110 #define nvme_ctrlr_get_vs_async(ctrlr, cb_fn, cb_arg) \
111 	nvme_ctrlr_get_reg_async(ctrlr, vs, 4, cb_fn, cb_arg)
112 
113 #define nvme_ctrlr_set_cc_async(ctrlr, value, cb_fn, cb_arg) \
114 	nvme_ctrlr_set_reg_async(ctrlr, cc, 4, value, cb_fn, cb_arg)
115 
116 static int
117 nvme_ctrlr_get_cc(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cc_register *cc)
118 {
119 	return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, cc.raw),
120 					      &cc->raw);
121 }
122 
123 static int
124 nvme_ctrlr_get_csts(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_csts_register *csts)
125 {
126 	return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, csts.raw),
127 					      &csts->raw);
128 }
129 
130 int
131 nvme_ctrlr_get_cap(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cap_register *cap)
132 {
133 	return nvme_transport_ctrlr_get_reg_8(ctrlr, offsetof(struct spdk_nvme_registers, cap.raw),
134 					      &cap->raw);
135 }
136 
137 int
138 nvme_ctrlr_get_vs(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_vs_register *vs)
139 {
140 	return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, vs.raw),
141 					      &vs->raw);
142 }
143 
144 int
145 nvme_ctrlr_get_cmbsz(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cmbsz_register *cmbsz)
146 {
147 	return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, cmbsz.raw),
148 					      &cmbsz->raw);
149 }
150 
151 int
152 nvme_ctrlr_get_pmrcap(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_pmrcap_register *pmrcap)
153 {
154 	return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, pmrcap.raw),
155 					      &pmrcap->raw);
156 }
157 
158 int
159 nvme_ctrlr_get_bpinfo(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_bpinfo_register *bpinfo)
160 {
161 	return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, bpinfo.raw),
162 					      &bpinfo->raw);
163 }
164 
165 int
166 nvme_ctrlr_set_bprsel(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_bprsel_register *bprsel)
167 {
168 	return nvme_transport_ctrlr_set_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, bprsel.raw),
169 					      bprsel->raw);
170 }
171 
172 int
173 nvme_ctrlr_set_bpmbl(struct spdk_nvme_ctrlr *ctrlr, uint64_t bpmbl_value)
174 {
175 	return nvme_transport_ctrlr_set_reg_8(ctrlr, offsetof(struct spdk_nvme_registers, bpmbl),
176 					      bpmbl_value);
177 }
178 
179 static int
180 nvme_ctrlr_set_nssr(struct spdk_nvme_ctrlr *ctrlr, uint32_t nssr_value)
181 {
182 	return nvme_transport_ctrlr_set_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, nssr),
183 					      nssr_value);
184 }
185 
186 bool
187 nvme_ctrlr_multi_iocs_enabled(struct spdk_nvme_ctrlr *ctrlr)
188 {
189 	return ctrlr->cap.bits.css & SPDK_NVME_CAP_CSS_IOCS &&
190 	       ctrlr->opts.command_set == SPDK_NVME_CC_CSS_IOCS;
191 }
192 
193 /* When the field in spdk_nvme_ctrlr_opts are changed and you change this function, please
194  * also update the nvme_ctrl_opts_init function in nvme_ctrlr.c
195  */
196 void
197 spdk_nvme_ctrlr_get_default_ctrlr_opts(struct spdk_nvme_ctrlr_opts *opts, size_t opts_size)
198 {
199 	char host_id_str[SPDK_UUID_STRING_LEN];
200 
201 	assert(opts);
202 
203 	opts->opts_size = opts_size;
204 
205 #define FIELD_OK(field) \
206 	offsetof(struct spdk_nvme_ctrlr_opts, field) + sizeof(opts->field) <= opts_size
207 
208 #define SET_FIELD(field, value) \
209 	if (offsetof(struct spdk_nvme_ctrlr_opts, field) + sizeof(opts->field) <= opts_size) { \
210 		opts->field = value; \
211 	} \
212 
213 	SET_FIELD(num_io_queues, DEFAULT_MAX_IO_QUEUES);
214 	SET_FIELD(use_cmb_sqs, false);
215 	SET_FIELD(no_shn_notification, false);
216 	SET_FIELD(arb_mechanism, SPDK_NVME_CC_AMS_RR);
217 	SET_FIELD(arbitration_burst, 0);
218 	SET_FIELD(low_priority_weight, 0);
219 	SET_FIELD(medium_priority_weight, 0);
220 	SET_FIELD(high_priority_weight, 0);
221 	SET_FIELD(keep_alive_timeout_ms, MIN_KEEP_ALIVE_TIMEOUT_IN_MS);
222 	SET_FIELD(transport_retry_count, SPDK_NVME_DEFAULT_RETRY_COUNT);
223 	SET_FIELD(io_queue_size, DEFAULT_IO_QUEUE_SIZE);
224 
225 	if (nvme_driver_init() == 0) {
226 		if (FIELD_OK(hostnqn)) {
227 			spdk_uuid_fmt_lower(host_id_str, sizeof(host_id_str),
228 					    &g_spdk_nvme_driver->default_extended_host_id);
229 			snprintf(opts->hostnqn, sizeof(opts->hostnqn),
230 				 "nqn.2014-08.org.nvmexpress:uuid:%s", host_id_str);
231 		}
232 
233 		if (FIELD_OK(extended_host_id)) {
234 			memcpy(opts->extended_host_id, &g_spdk_nvme_driver->default_extended_host_id,
235 			       sizeof(opts->extended_host_id));
236 		}
237 
238 	}
239 
240 	SET_FIELD(io_queue_requests, DEFAULT_IO_QUEUE_REQUESTS);
241 
242 	if (FIELD_OK(src_addr)) {
243 		memset(opts->src_addr, 0, sizeof(opts->src_addr));
244 	}
245 
246 	if (FIELD_OK(src_svcid)) {
247 		memset(opts->src_svcid, 0, sizeof(opts->src_svcid));
248 	}
249 
250 	if (FIELD_OK(host_id)) {
251 		memset(opts->host_id, 0, sizeof(opts->host_id));
252 	}
253 
254 	SET_FIELD(command_set, CHAR_BIT);
255 	SET_FIELD(admin_timeout_ms, NVME_MAX_ADMIN_TIMEOUT_IN_SECS * 1000);
256 	SET_FIELD(header_digest, false);
257 	SET_FIELD(data_digest, false);
258 	SET_FIELD(disable_error_logging, false);
259 	SET_FIELD(transport_ack_timeout, SPDK_NVME_DEFAULT_TRANSPORT_ACK_TIMEOUT);
260 	SET_FIELD(admin_queue_size, DEFAULT_ADMIN_QUEUE_SIZE);
261 	SET_FIELD(fabrics_connect_timeout_us, NVME_FABRIC_CONNECT_COMMAND_TIMEOUT);
262 	SET_FIELD(disable_read_ana_log_page, false);
263 
264 #undef FIELD_OK
265 #undef SET_FIELD
266 }
267 
268 const struct spdk_nvme_ctrlr_opts *
269 spdk_nvme_ctrlr_get_opts(struct spdk_nvme_ctrlr *ctrlr)
270 {
271 	return &ctrlr->opts;
272 }
273 
274 /**
275  * This function will be called when the process allocates the IO qpair.
276  * Note: the ctrlr_lock must be held when calling this function.
277  */
278 static void
279 nvme_ctrlr_proc_add_io_qpair(struct spdk_nvme_qpair *qpair)
280 {
281 	struct spdk_nvme_ctrlr_process	*active_proc;
282 	struct spdk_nvme_ctrlr		*ctrlr = qpair->ctrlr;
283 
284 	active_proc = nvme_ctrlr_get_current_process(ctrlr);
285 	if (active_proc) {
286 		TAILQ_INSERT_TAIL(&active_proc->allocated_io_qpairs, qpair, per_process_tailq);
287 		qpair->active_proc = active_proc;
288 	}
289 }
290 
291 /**
292  * This function will be called when the process frees the IO qpair.
293  * Note: the ctrlr_lock must be held when calling this function.
294  */
295 static void
296 nvme_ctrlr_proc_remove_io_qpair(struct spdk_nvme_qpair *qpair)
297 {
298 	struct spdk_nvme_ctrlr_process	*active_proc;
299 	struct spdk_nvme_ctrlr		*ctrlr = qpair->ctrlr;
300 	struct spdk_nvme_qpair          *active_qpair, *tmp_qpair;
301 
302 	active_proc = nvme_ctrlr_get_current_process(ctrlr);
303 	if (!active_proc) {
304 		return;
305 	}
306 
307 	TAILQ_FOREACH_SAFE(active_qpair, &active_proc->allocated_io_qpairs,
308 			   per_process_tailq, tmp_qpair) {
309 		if (active_qpair == qpair) {
310 			TAILQ_REMOVE(&active_proc->allocated_io_qpairs,
311 				     active_qpair, per_process_tailq);
312 
313 			break;
314 		}
315 	}
316 }
317 
318 void
319 spdk_nvme_ctrlr_get_default_io_qpair_opts(struct spdk_nvme_ctrlr *ctrlr,
320 		struct spdk_nvme_io_qpair_opts *opts,
321 		size_t opts_size)
322 {
323 	assert(ctrlr);
324 
325 	assert(opts);
326 
327 	memset(opts, 0, opts_size);
328 
329 #define FIELD_OK(field) \
330 	offsetof(struct spdk_nvme_io_qpair_opts, field) + sizeof(opts->field) <= opts_size
331 
332 	if (FIELD_OK(qprio)) {
333 		opts->qprio = SPDK_NVME_QPRIO_URGENT;
334 	}
335 
336 	if (FIELD_OK(io_queue_size)) {
337 		opts->io_queue_size = ctrlr->opts.io_queue_size;
338 	}
339 
340 	if (FIELD_OK(io_queue_requests)) {
341 		opts->io_queue_requests = ctrlr->opts.io_queue_requests;
342 	}
343 
344 	if (FIELD_OK(delay_cmd_submit)) {
345 		opts->delay_cmd_submit = false;
346 	}
347 
348 	if (FIELD_OK(sq.vaddr)) {
349 		opts->sq.vaddr = NULL;
350 	}
351 
352 	if (FIELD_OK(sq.paddr)) {
353 		opts->sq.paddr = 0;
354 	}
355 
356 	if (FIELD_OK(sq.buffer_size)) {
357 		opts->sq.buffer_size = 0;
358 	}
359 
360 	if (FIELD_OK(cq.vaddr)) {
361 		opts->cq.vaddr = NULL;
362 	}
363 
364 	if (FIELD_OK(cq.paddr)) {
365 		opts->cq.paddr = 0;
366 	}
367 
368 	if (FIELD_OK(cq.buffer_size)) {
369 		opts->cq.buffer_size = 0;
370 	}
371 
372 	if (FIELD_OK(create_only)) {
373 		opts->create_only = false;
374 	}
375 
376 	if (FIELD_OK(async_mode)) {
377 		opts->async_mode = false;
378 	}
379 
380 #undef FIELD_OK
381 }
382 
383 static struct spdk_nvme_qpair *
384 nvme_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr,
385 			   const struct spdk_nvme_io_qpair_opts *opts)
386 {
387 	int32_t					qid;
388 	struct spdk_nvme_qpair			*qpair;
389 	union spdk_nvme_cc_register		cc;
390 
391 	if (!ctrlr) {
392 		return NULL;
393 	}
394 
395 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
396 	cc.raw = ctrlr->process_init_cc.raw;
397 
398 	if (opts->qprio & ~SPDK_NVME_CREATE_IO_SQ_QPRIO_MASK) {
399 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
400 		return NULL;
401 	}
402 
403 	/*
404 	 * Only value SPDK_NVME_QPRIO_URGENT(0) is valid for the
405 	 * default round robin arbitration method.
406 	 */
407 	if ((cc.bits.ams == SPDK_NVME_CC_AMS_RR) && (opts->qprio != SPDK_NVME_QPRIO_URGENT)) {
408 		NVME_CTRLR_ERRLOG(ctrlr, "invalid queue priority for default round robin arbitration method\n");
409 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
410 		return NULL;
411 	}
412 
413 	qid = spdk_nvme_ctrlr_alloc_qid(ctrlr);
414 	if (qid < 0) {
415 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
416 		return NULL;
417 	}
418 
419 	qpair = nvme_transport_ctrlr_create_io_qpair(ctrlr, qid, opts);
420 	if (qpair == NULL) {
421 		NVME_CTRLR_ERRLOG(ctrlr, "nvme_transport_ctrlr_create_io_qpair() failed\n");
422 		spdk_nvme_ctrlr_free_qid(ctrlr, qid);
423 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
424 		return NULL;
425 	}
426 
427 	TAILQ_INSERT_TAIL(&ctrlr->active_io_qpairs, qpair, tailq);
428 
429 	nvme_ctrlr_proc_add_io_qpair(qpair);
430 
431 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
432 
433 	return qpair;
434 }
435 
436 int
437 spdk_nvme_ctrlr_connect_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
438 {
439 	int rc;
440 
441 	if (nvme_qpair_get_state(qpair) != NVME_QPAIR_DISCONNECTED) {
442 		return -EISCONN;
443 	}
444 
445 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
446 	rc = nvme_transport_ctrlr_connect_qpair(ctrlr, qpair);
447 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
448 
449 	if (ctrlr->quirks & NVME_QUIRK_DELAY_AFTER_QUEUE_ALLOC) {
450 		spdk_delay_us(100);
451 	}
452 
453 	return rc;
454 }
455 
456 void
457 spdk_nvme_ctrlr_disconnect_io_qpair(struct spdk_nvme_qpair *qpair)
458 {
459 	struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr;
460 
461 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
462 	nvme_transport_ctrlr_disconnect_qpair(ctrlr, qpair);
463 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
464 }
465 
466 struct spdk_nvme_qpair *
467 spdk_nvme_ctrlr_alloc_io_qpair(struct spdk_nvme_ctrlr *ctrlr,
468 			       const struct spdk_nvme_io_qpair_opts *user_opts,
469 			       size_t opts_size)
470 {
471 
472 	struct spdk_nvme_qpair		*qpair;
473 	struct spdk_nvme_io_qpair_opts	opts;
474 	int				rc;
475 
476 	if (spdk_unlikely(ctrlr->state != NVME_CTRLR_STATE_READY)) {
477 		/* When controller is resetting or initializing, free_io_qids is deleted or not created yet.
478 		 * We can't create IO qpair in that case */
479 		return NULL;
480 	}
481 
482 	/*
483 	 * Get the default options, then overwrite them with the user-provided options
484 	 * up to opts_size.
485 	 *
486 	 * This allows for extensions of the opts structure without breaking
487 	 * ABI compatibility.
488 	 */
489 	spdk_nvme_ctrlr_get_default_io_qpair_opts(ctrlr, &opts, sizeof(opts));
490 	if (user_opts) {
491 		memcpy(&opts, user_opts, spdk_min(sizeof(opts), opts_size));
492 
493 		/* If user passes buffers, make sure they're big enough for the requested queue size */
494 		if (opts.sq.vaddr) {
495 			if (opts.sq.buffer_size < (opts.io_queue_size * sizeof(struct spdk_nvme_cmd))) {
496 				NVME_CTRLR_ERRLOG(ctrlr, "sq buffer size %" PRIx64 " is too small for sq size %zx\n",
497 						  opts.sq.buffer_size, (opts.io_queue_size * sizeof(struct spdk_nvme_cmd)));
498 				return NULL;
499 			}
500 		}
501 		if (opts.cq.vaddr) {
502 			if (opts.cq.buffer_size < (opts.io_queue_size * sizeof(struct spdk_nvme_cpl))) {
503 				NVME_CTRLR_ERRLOG(ctrlr, "cq buffer size %" PRIx64 " is too small for cq size %zx\n",
504 						  opts.cq.buffer_size, (opts.io_queue_size * sizeof(struct spdk_nvme_cpl)));
505 				return NULL;
506 			}
507 		}
508 	}
509 
510 	qpair = nvme_ctrlr_create_io_qpair(ctrlr, &opts);
511 
512 	if (qpair == NULL || opts.create_only == true) {
513 		return qpair;
514 	}
515 
516 	rc = spdk_nvme_ctrlr_connect_io_qpair(ctrlr, qpair);
517 	if (rc != 0) {
518 		NVME_CTRLR_ERRLOG(ctrlr, "nvme_transport_ctrlr_connect_io_qpair() failed\n");
519 		nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
520 		nvme_ctrlr_proc_remove_io_qpair(qpair);
521 		TAILQ_REMOVE(&ctrlr->active_io_qpairs, qpair, tailq);
522 		spdk_bit_array_set(ctrlr->free_io_qids, qpair->id);
523 		nvme_transport_ctrlr_delete_io_qpair(ctrlr, qpair);
524 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
525 		return NULL;
526 	}
527 
528 	return qpair;
529 }
530 
531 int
532 spdk_nvme_ctrlr_reconnect_io_qpair(struct spdk_nvme_qpair *qpair)
533 {
534 	struct spdk_nvme_ctrlr *ctrlr;
535 	enum nvme_qpair_state qpair_state;
536 	int rc;
537 
538 	assert(qpair != NULL);
539 	assert(nvme_qpair_is_admin_queue(qpair) == false);
540 	assert(qpair->ctrlr != NULL);
541 
542 	ctrlr = qpair->ctrlr;
543 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
544 	qpair_state = nvme_qpair_get_state(qpair);
545 
546 	if (ctrlr->is_removed) {
547 		rc = -ENODEV;
548 		goto out;
549 	}
550 
551 	if (ctrlr->is_resetting || qpair_state == NVME_QPAIR_DISCONNECTING) {
552 		rc = -EAGAIN;
553 		goto out;
554 	}
555 
556 	if (ctrlr->is_failed || qpair_state == NVME_QPAIR_DESTROYING) {
557 		rc = -ENXIO;
558 		goto out;
559 	}
560 
561 	if (qpair_state != NVME_QPAIR_DISCONNECTED) {
562 		rc = 0;
563 		goto out;
564 	}
565 
566 	rc = nvme_transport_ctrlr_connect_qpair(ctrlr, qpair);
567 	if (rc) {
568 		rc = -EAGAIN;
569 		goto out;
570 	}
571 
572 out:
573 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
574 	return rc;
575 }
576 
577 spdk_nvme_qp_failure_reason
578 spdk_nvme_ctrlr_get_admin_qp_failure_reason(struct spdk_nvme_ctrlr *ctrlr)
579 {
580 	return ctrlr->adminq->transport_failure_reason;
581 }
582 
583 /*
584  * This internal function will attempt to take the controller
585  * lock before calling disconnect on a controller qpair.
586  * Functions already holding the controller lock should
587  * call nvme_transport_ctrlr_disconnect_qpair directly.
588  */
589 void
590 nvme_ctrlr_disconnect_qpair(struct spdk_nvme_qpair *qpair)
591 {
592 	struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr;
593 
594 	assert(ctrlr != NULL);
595 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
596 	nvme_transport_ctrlr_disconnect_qpair(ctrlr, qpair);
597 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
598 }
599 
600 int
601 spdk_nvme_ctrlr_free_io_qpair(struct spdk_nvme_qpair *qpair)
602 {
603 	struct spdk_nvme_ctrlr *ctrlr;
604 
605 	if (qpair == NULL) {
606 		return 0;
607 	}
608 
609 	ctrlr = qpair->ctrlr;
610 
611 	if (qpair->in_completion_context) {
612 		/*
613 		 * There are many cases where it is convenient to delete an io qpair in the context
614 		 *  of that qpair's completion routine.  To handle this properly, set a flag here
615 		 *  so that the completion routine will perform an actual delete after the context
616 		 *  unwinds.
617 		 */
618 		qpair->delete_after_completion_context = 1;
619 		return 0;
620 	}
621 
622 	if (qpair->poll_group && qpair->poll_group->in_completion_context) {
623 		/* Same as above, but in a poll group. */
624 		qpair->poll_group->num_qpairs_to_delete++;
625 		qpair->delete_after_completion_context = 1;
626 		return 0;
627 	}
628 
629 	nvme_transport_ctrlr_disconnect_qpair(ctrlr, qpair);
630 
631 	if (qpair->poll_group) {
632 		spdk_nvme_poll_group_remove(qpair->poll_group->group, qpair);
633 	}
634 
635 	/* Do not retry. */
636 	nvme_qpair_set_state(qpair, NVME_QPAIR_DESTROYING);
637 
638 	/* In the multi-process case, a process may call this function on a foreign
639 	 * I/O qpair (i.e. one that this process did not create) when that qpairs process
640 	 * exits unexpectedly.  In that case, we must not try to abort any reqs associated
641 	 * with that qpair, since the callbacks will also be foreign to this process.
642 	 */
643 	if (qpair->active_proc == nvme_ctrlr_get_current_process(ctrlr)) {
644 		nvme_qpair_abort_all_queued_reqs(qpair, 0);
645 	}
646 
647 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
648 
649 	nvme_ctrlr_proc_remove_io_qpair(qpair);
650 
651 	TAILQ_REMOVE(&ctrlr->active_io_qpairs, qpair, tailq);
652 	spdk_nvme_ctrlr_free_qid(ctrlr, qpair->id);
653 
654 	nvme_transport_ctrlr_delete_io_qpair(ctrlr, qpair);
655 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
656 	return 0;
657 }
658 
659 static void
660 nvme_ctrlr_construct_intel_support_log_page_list(struct spdk_nvme_ctrlr *ctrlr,
661 		struct spdk_nvme_intel_log_page_directory *log_page_directory)
662 {
663 	if (log_page_directory == NULL) {
664 		return;
665 	}
666 
667 	if (ctrlr->cdata.vid != SPDK_PCI_VID_INTEL) {
668 		return;
669 	}
670 
671 	ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_PAGE_DIRECTORY] = true;
672 
673 	if (log_page_directory->read_latency_log_len ||
674 	    (ctrlr->quirks & NVME_INTEL_QUIRK_READ_LATENCY)) {
675 		ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_READ_CMD_LATENCY] = true;
676 	}
677 	if (log_page_directory->write_latency_log_len ||
678 	    (ctrlr->quirks & NVME_INTEL_QUIRK_WRITE_LATENCY)) {
679 		ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_WRITE_CMD_LATENCY] = true;
680 	}
681 	if (log_page_directory->temperature_statistics_log_len) {
682 		ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_TEMPERATURE] = true;
683 	}
684 	if (log_page_directory->smart_log_len) {
685 		ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_SMART] = true;
686 	}
687 	if (log_page_directory->marketing_description_log_len) {
688 		ctrlr->log_page_supported[SPDK_NVME_INTEL_MARKETING_DESCRIPTION] = true;
689 	}
690 }
691 
692 struct intel_log_pages_ctx {
693 	struct spdk_nvme_intel_log_page_directory log_page_directory;
694 	struct spdk_nvme_ctrlr *ctrlr;
695 };
696 
697 static void
698 nvme_ctrlr_set_intel_support_log_pages_done(void *arg, const struct spdk_nvme_cpl *cpl)
699 {
700 	struct intel_log_pages_ctx *ctx = arg;
701 	struct spdk_nvme_ctrlr *ctrlr = ctx->ctrlr;
702 
703 	if (!spdk_nvme_cpl_is_error(cpl)) {
704 		nvme_ctrlr_construct_intel_support_log_page_list(ctrlr, &ctx->log_page_directory);
705 	}
706 
707 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES,
708 			     ctrlr->opts.admin_timeout_ms);
709 	free(ctx);
710 }
711 
712 static int nvme_ctrlr_set_intel_support_log_pages(struct spdk_nvme_ctrlr *ctrlr)
713 {
714 	int rc = 0;
715 	struct intel_log_pages_ctx *ctx;
716 
717 	ctx = calloc(1, sizeof(*ctx));
718 	if (!ctx) {
719 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES,
720 				     ctrlr->opts.admin_timeout_ms);
721 		return 0;
722 	}
723 
724 	ctx->ctrlr = ctrlr;
725 
726 	rc = spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_INTEL_LOG_PAGE_DIRECTORY,
727 					      SPDK_NVME_GLOBAL_NS_TAG, &ctx->log_page_directory,
728 					      sizeof(struct spdk_nvme_intel_log_page_directory),
729 					      0, nvme_ctrlr_set_intel_support_log_pages_done, ctx);
730 	if (rc != 0) {
731 		free(ctx);
732 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES,
733 				     ctrlr->opts.admin_timeout_ms);
734 		return 0;
735 	}
736 
737 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_SUPPORTED_INTEL_LOG_PAGES,
738 			     ctrlr->opts.admin_timeout_ms);
739 
740 	return 0;
741 }
742 
743 static int
744 nvme_ctrlr_alloc_ana_log_page(struct spdk_nvme_ctrlr *ctrlr)
745 {
746 	uint32_t ana_log_page_size;
747 
748 	ana_log_page_size = sizeof(struct spdk_nvme_ana_page) + ctrlr->cdata.nanagrpid *
749 			    sizeof(struct spdk_nvme_ana_group_descriptor) + ctrlr->active_ns_count *
750 			    sizeof(uint32_t);
751 
752 	/* Number of active namespaces may have changed.
753 	 * Check if ANA log page fits into existing buffer.
754 	 */
755 	if (ana_log_page_size > ctrlr->ana_log_page_size) {
756 		void *new_buffer;
757 
758 		if (ctrlr->ana_log_page) {
759 			new_buffer = realloc(ctrlr->ana_log_page, ana_log_page_size);
760 		} else {
761 			new_buffer = calloc(1, ana_log_page_size);
762 		}
763 
764 		if (!new_buffer) {
765 			NVME_CTRLR_ERRLOG(ctrlr, "could not allocate ANA log page buffer, size %u\n",
766 					  ana_log_page_size);
767 			return -ENXIO;
768 		}
769 
770 		ctrlr->ana_log_page = new_buffer;
771 		if (ctrlr->copied_ana_desc) {
772 			new_buffer = realloc(ctrlr->copied_ana_desc, ana_log_page_size);
773 		} else {
774 			new_buffer = calloc(1, ana_log_page_size);
775 		}
776 
777 		if (!new_buffer) {
778 			NVME_CTRLR_ERRLOG(ctrlr, "could not allocate a buffer to parse ANA descriptor, size %u\n",
779 					  ana_log_page_size);
780 			return -ENOMEM;
781 		}
782 
783 		ctrlr->copied_ana_desc = new_buffer;
784 		ctrlr->ana_log_page_size = ana_log_page_size;
785 	}
786 
787 	return 0;
788 }
789 
790 static int
791 nvme_ctrlr_update_ana_log_page(struct spdk_nvme_ctrlr *ctrlr)
792 {
793 	struct nvme_completion_poll_status *status;
794 	int rc;
795 
796 	rc = nvme_ctrlr_alloc_ana_log_page(ctrlr);
797 	if (rc != 0) {
798 		return rc;
799 	}
800 
801 	status = calloc(1, sizeof(*status));
802 	if (status == NULL) {
803 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n");
804 		return -ENOMEM;
805 	}
806 
807 	rc = spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS,
808 					      SPDK_NVME_GLOBAL_NS_TAG, ctrlr->ana_log_page,
809 					      ctrlr->ana_log_page_size, 0,
810 					      nvme_completion_poll_cb, status);
811 	if (rc != 0) {
812 		free(status);
813 		return rc;
814 	}
815 
816 	if (nvme_wait_for_completion_robust_lock_timeout(ctrlr->adminq, status, &ctrlr->ctrlr_lock,
817 			ctrlr->opts.admin_timeout_ms * 1000)) {
818 		if (!status->timed_out) {
819 			free(status);
820 		}
821 		return -EIO;
822 	}
823 
824 	free(status);
825 	return 0;
826 }
827 
828 static int
829 nvme_ctrlr_init_ana_log_page(struct spdk_nvme_ctrlr *ctrlr)
830 {
831 	int rc;
832 
833 	rc = nvme_ctrlr_alloc_ana_log_page(ctrlr);
834 	if (rc) {
835 		return rc;
836 	}
837 
838 	return nvme_ctrlr_update_ana_log_page(ctrlr);
839 }
840 
841 static int
842 nvme_ctrlr_update_ns_ana_states(const struct spdk_nvme_ana_group_descriptor *desc,
843 				void *cb_arg)
844 {
845 	struct spdk_nvme_ctrlr *ctrlr = cb_arg;
846 	struct spdk_nvme_ns *ns;
847 	uint32_t i, nsid;
848 
849 	for (i = 0; i < desc->num_of_nsid; i++) {
850 		nsid = desc->nsid[i];
851 		if (nsid == 0 || nsid > ctrlr->cdata.nn) {
852 			continue;
853 		}
854 
855 		ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
856 		assert(ns != NULL);
857 
858 		ns->ana_group_id = desc->ana_group_id;
859 		ns->ana_state = desc->ana_state;
860 	}
861 
862 	return 0;
863 }
864 
865 int
866 nvme_ctrlr_parse_ana_log_page(struct spdk_nvme_ctrlr *ctrlr,
867 			      spdk_nvme_parse_ana_log_page_cb cb_fn, void *cb_arg)
868 {
869 	struct spdk_nvme_ana_group_descriptor *copied_desc;
870 	uint8_t *orig_desc;
871 	uint32_t i, desc_size, copy_len;
872 	int rc = 0;
873 
874 	if (ctrlr->ana_log_page == NULL) {
875 		return -EINVAL;
876 	}
877 
878 	copied_desc = ctrlr->copied_ana_desc;
879 
880 	orig_desc = (uint8_t *)ctrlr->ana_log_page + sizeof(struct spdk_nvme_ana_page);
881 	copy_len = ctrlr->ana_log_page_size - sizeof(struct spdk_nvme_ana_page);
882 
883 	for (i = 0; i < ctrlr->ana_log_page->num_ana_group_desc; i++) {
884 		memcpy(copied_desc, orig_desc, copy_len);
885 
886 		rc = cb_fn(copied_desc, cb_arg);
887 		if (rc != 0) {
888 			break;
889 		}
890 
891 		desc_size = sizeof(struct spdk_nvme_ana_group_descriptor) +
892 			    copied_desc->num_of_nsid * sizeof(uint32_t);
893 		orig_desc += desc_size;
894 		copy_len -= desc_size;
895 	}
896 
897 	return rc;
898 }
899 
900 static int
901 nvme_ctrlr_set_supported_log_pages(struct spdk_nvme_ctrlr *ctrlr)
902 {
903 	int	rc = 0;
904 
905 	memset(ctrlr->log_page_supported, 0, sizeof(ctrlr->log_page_supported));
906 	/* Mandatory pages */
907 	ctrlr->log_page_supported[SPDK_NVME_LOG_ERROR] = true;
908 	ctrlr->log_page_supported[SPDK_NVME_LOG_HEALTH_INFORMATION] = true;
909 	ctrlr->log_page_supported[SPDK_NVME_LOG_FIRMWARE_SLOT] = true;
910 	if (ctrlr->cdata.lpa.celp) {
911 		ctrlr->log_page_supported[SPDK_NVME_LOG_COMMAND_EFFECTS_LOG] = true;
912 	}
913 
914 	if (ctrlr->cdata.cmic.ana_reporting) {
915 		ctrlr->log_page_supported[SPDK_NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS] = true;
916 		if (!ctrlr->opts.disable_read_ana_log_page) {
917 			rc = nvme_ctrlr_init_ana_log_page(ctrlr);
918 			if (rc == 0) {
919 				nvme_ctrlr_parse_ana_log_page(ctrlr, nvme_ctrlr_update_ns_ana_states,
920 							      ctrlr);
921 			}
922 		}
923 	}
924 
925 	if (ctrlr->cdata.vid == SPDK_PCI_VID_INTEL && !(ctrlr->quirks & NVME_INTEL_QUIRK_NO_LOG_PAGES)) {
926 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_INTEL_LOG_PAGES,
927 				     ctrlr->opts.admin_timeout_ms);
928 
929 	} else {
930 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES,
931 				     ctrlr->opts.admin_timeout_ms);
932 
933 	}
934 
935 	return rc;
936 }
937 
938 static void
939 nvme_ctrlr_set_intel_supported_features(struct spdk_nvme_ctrlr *ctrlr)
940 {
941 	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_MAX_LBA] = true;
942 	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_NATIVE_MAX_LBA] = true;
943 	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_POWER_GOVERNOR_SETTING] = true;
944 	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_SMBUS_ADDRESS] = true;
945 	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_LED_PATTERN] = true;
946 	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_RESET_TIMED_WORKLOAD_COUNTERS] = true;
947 	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING] = true;
948 }
949 
950 static void
951 nvme_ctrlr_set_arbitration_feature(struct spdk_nvme_ctrlr *ctrlr)
952 {
953 	uint32_t cdw11;
954 	struct nvme_completion_poll_status *status;
955 
956 	if (ctrlr->opts.arbitration_burst == 0) {
957 		return;
958 	}
959 
960 	if (ctrlr->opts.arbitration_burst > 7) {
961 		NVME_CTRLR_WARNLOG(ctrlr, "Valid arbitration burst values is from 0-7\n");
962 		return;
963 	}
964 
965 	status = calloc(1, sizeof(*status));
966 	if (!status) {
967 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n");
968 		return;
969 	}
970 
971 	cdw11 = ctrlr->opts.arbitration_burst;
972 
973 	if (spdk_nvme_ctrlr_get_flags(ctrlr) & SPDK_NVME_CTRLR_WRR_SUPPORTED) {
974 		cdw11 |= (uint32_t)ctrlr->opts.low_priority_weight << 8;
975 		cdw11 |= (uint32_t)ctrlr->opts.medium_priority_weight << 16;
976 		cdw11 |= (uint32_t)ctrlr->opts.high_priority_weight << 24;
977 	}
978 
979 	if (spdk_nvme_ctrlr_cmd_set_feature(ctrlr, SPDK_NVME_FEAT_ARBITRATION,
980 					    cdw11, 0, NULL, 0,
981 					    nvme_completion_poll_cb, status) < 0) {
982 		NVME_CTRLR_ERRLOG(ctrlr, "Set arbitration feature failed\n");
983 		free(status);
984 		return;
985 	}
986 
987 	if (nvme_wait_for_completion_timeout(ctrlr->adminq, status,
988 					     ctrlr->opts.admin_timeout_ms * 1000)) {
989 		NVME_CTRLR_ERRLOG(ctrlr, "Timeout to set arbitration feature\n");
990 	}
991 
992 	if (!status->timed_out) {
993 		free(status);
994 	}
995 }
996 
997 static void
998 nvme_ctrlr_set_supported_features(struct spdk_nvme_ctrlr *ctrlr)
999 {
1000 	memset(ctrlr->feature_supported, 0, sizeof(ctrlr->feature_supported));
1001 	/* Mandatory features */
1002 	ctrlr->feature_supported[SPDK_NVME_FEAT_ARBITRATION] = true;
1003 	ctrlr->feature_supported[SPDK_NVME_FEAT_POWER_MANAGEMENT] = true;
1004 	ctrlr->feature_supported[SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD] = true;
1005 	ctrlr->feature_supported[SPDK_NVME_FEAT_ERROR_RECOVERY] = true;
1006 	ctrlr->feature_supported[SPDK_NVME_FEAT_NUMBER_OF_QUEUES] = true;
1007 	ctrlr->feature_supported[SPDK_NVME_FEAT_INTERRUPT_COALESCING] = true;
1008 	ctrlr->feature_supported[SPDK_NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION] = true;
1009 	ctrlr->feature_supported[SPDK_NVME_FEAT_WRITE_ATOMICITY] = true;
1010 	ctrlr->feature_supported[SPDK_NVME_FEAT_ASYNC_EVENT_CONFIGURATION] = true;
1011 	/* Optional features */
1012 	if (ctrlr->cdata.vwc.present) {
1013 		ctrlr->feature_supported[SPDK_NVME_FEAT_VOLATILE_WRITE_CACHE] = true;
1014 	}
1015 	if (ctrlr->cdata.apsta.supported) {
1016 		ctrlr->feature_supported[SPDK_NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION] = true;
1017 	}
1018 	if (ctrlr->cdata.hmpre) {
1019 		ctrlr->feature_supported[SPDK_NVME_FEAT_HOST_MEM_BUFFER] = true;
1020 	}
1021 	if (ctrlr->cdata.vid == SPDK_PCI_VID_INTEL) {
1022 		nvme_ctrlr_set_intel_supported_features(ctrlr);
1023 	}
1024 
1025 	nvme_ctrlr_set_arbitration_feature(ctrlr);
1026 }
1027 
1028 bool
1029 spdk_nvme_ctrlr_is_failed(struct spdk_nvme_ctrlr *ctrlr)
1030 {
1031 	return ctrlr->is_failed;
1032 }
1033 
1034 void
1035 nvme_ctrlr_fail(struct spdk_nvme_ctrlr *ctrlr, bool hot_remove)
1036 {
1037 	/*
1038 	 * Set the flag here and leave the work failure of qpairs to
1039 	 * spdk_nvme_qpair_process_completions().
1040 	 */
1041 	if (hot_remove) {
1042 		ctrlr->is_removed = true;
1043 	}
1044 
1045 	if (ctrlr->is_failed) {
1046 		NVME_CTRLR_NOTICELOG(ctrlr, "already in failed state\n");
1047 		return;
1048 	}
1049 
1050 	ctrlr->is_failed = true;
1051 	nvme_transport_ctrlr_disconnect_qpair(ctrlr, ctrlr->adminq);
1052 	NVME_CTRLR_ERRLOG(ctrlr, "in failed state.\n");
1053 }
1054 
1055 /**
1056  * This public API function will try to take the controller lock.
1057  * Any private functions being called from a thread already holding
1058  * the ctrlr lock should call nvme_ctrlr_fail directly.
1059  */
1060 void
1061 spdk_nvme_ctrlr_fail(struct spdk_nvme_ctrlr *ctrlr)
1062 {
1063 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
1064 	nvme_ctrlr_fail(ctrlr, false);
1065 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
1066 }
1067 
1068 static void
1069 nvme_ctrlr_shutdown_set_cc_done(void *_ctx, uint64_t value, const struct spdk_nvme_cpl *cpl)
1070 {
1071 	struct nvme_ctrlr_detach_ctx *ctx = _ctx;
1072 	struct spdk_nvme_ctrlr *ctrlr = ctx->ctrlr;
1073 
1074 	if (spdk_nvme_cpl_is_error(cpl)) {
1075 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to write CC.SHN\n");
1076 		ctx->shutdown_complete = true;
1077 		return;
1078 	}
1079 
1080 	if (ctrlr->opts.no_shn_notification) {
1081 		ctx->shutdown_complete = true;
1082 		return;
1083 	}
1084 
1085 	/*
1086 	 * The NVMe specification defines RTD3E to be the time between
1087 	 *  setting SHN = 1 until the controller will set SHST = 10b.
1088 	 * If the device doesn't report RTD3 entry latency, or if it
1089 	 *  reports RTD3 entry latency less than 10 seconds, pick
1090 	 *  10 seconds as a reasonable amount of time to
1091 	 *  wait before proceeding.
1092 	 */
1093 	NVME_CTRLR_DEBUGLOG(ctrlr, "RTD3E = %" PRIu32 " us\n", ctrlr->cdata.rtd3e);
1094 	ctx->shutdown_timeout_ms = SPDK_CEIL_DIV(ctrlr->cdata.rtd3e, 1000);
1095 	ctx->shutdown_timeout_ms = spdk_max(ctx->shutdown_timeout_ms, 10000);
1096 	NVME_CTRLR_DEBUGLOG(ctrlr, "shutdown timeout = %" PRIu32 " ms\n", ctx->shutdown_timeout_ms);
1097 
1098 	ctx->shutdown_start_tsc = spdk_get_ticks();
1099 	ctx->state = NVME_CTRLR_DETACH_CHECK_CSTS;
1100 }
1101 
1102 static void
1103 nvme_ctrlr_shutdown_get_cc_done(void *_ctx, uint64_t value, const struct spdk_nvme_cpl *cpl)
1104 {
1105 	struct nvme_ctrlr_detach_ctx *ctx = _ctx;
1106 	struct spdk_nvme_ctrlr *ctrlr = ctx->ctrlr;
1107 	union spdk_nvme_cc_register cc;
1108 	int rc;
1109 
1110 	if (spdk_nvme_cpl_is_error(cpl)) {
1111 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CC register\n");
1112 		ctx->shutdown_complete = true;
1113 		return;
1114 	}
1115 
1116 	assert(value <= UINT32_MAX);
1117 	cc.raw = (uint32_t)value;
1118 
1119 	if (ctrlr->opts.no_shn_notification) {
1120 		NVME_CTRLR_INFOLOG(ctrlr, "Disable SSD without shutdown notification\n");
1121 		if (cc.bits.en == 0) {
1122 			ctx->shutdown_complete = true;
1123 			return;
1124 		}
1125 
1126 		cc.bits.en = 0;
1127 	} else {
1128 		cc.bits.shn = SPDK_NVME_SHN_NORMAL;
1129 	}
1130 
1131 	rc = nvme_ctrlr_set_cc_async(ctrlr, cc.raw, nvme_ctrlr_shutdown_set_cc_done, ctx);
1132 	if (rc != 0) {
1133 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to write CC.SHN\n");
1134 		ctx->shutdown_complete = true;
1135 	}
1136 }
1137 
1138 static void
1139 nvme_ctrlr_shutdown_async(struct spdk_nvme_ctrlr *ctrlr,
1140 			  struct nvme_ctrlr_detach_ctx *ctx)
1141 {
1142 	int rc;
1143 
1144 	if (ctrlr->is_removed) {
1145 		ctx->shutdown_complete = true;
1146 		return;
1147 	}
1148 
1149 	ctx->state = NVME_CTRLR_DETACH_SET_CC;
1150 	rc = nvme_ctrlr_get_cc_async(ctrlr, nvme_ctrlr_shutdown_get_cc_done, ctx);
1151 	if (rc != 0) {
1152 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CC register\n");
1153 		ctx->shutdown_complete = true;
1154 	}
1155 }
1156 
1157 static void
1158 nvme_ctrlr_shutdown_get_csts_done(void *_ctx, uint64_t value, const struct spdk_nvme_cpl *cpl)
1159 {
1160 	struct nvme_ctrlr_detach_ctx *ctx = _ctx;
1161 
1162 	if (spdk_nvme_cpl_is_error(cpl)) {
1163 		NVME_CTRLR_ERRLOG(ctx->ctrlr, "Failed to read the CSTS register\n");
1164 		ctx->shutdown_complete = true;
1165 		return;
1166 	}
1167 
1168 	assert(value <= UINT32_MAX);
1169 	ctx->csts.raw = (uint32_t)value;
1170 	ctx->state = NVME_CTRLR_DETACH_GET_CSTS_DONE;
1171 }
1172 
1173 static int
1174 nvme_ctrlr_shutdown_poll_async(struct spdk_nvme_ctrlr *ctrlr,
1175 			       struct nvme_ctrlr_detach_ctx *ctx)
1176 {
1177 	union spdk_nvme_csts_register	csts;
1178 	uint32_t			ms_waited;
1179 
1180 	switch (ctx->state) {
1181 	case NVME_CTRLR_DETACH_SET_CC:
1182 	case NVME_CTRLR_DETACH_GET_CSTS:
1183 		/* We're still waiting for the register operation to complete */
1184 		spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
1185 		return -EAGAIN;
1186 
1187 	case NVME_CTRLR_DETACH_CHECK_CSTS:
1188 		ctx->state = NVME_CTRLR_DETACH_GET_CSTS;
1189 		if (nvme_ctrlr_get_csts_async(ctrlr, nvme_ctrlr_shutdown_get_csts_done, ctx)) {
1190 			NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CSTS register\n");
1191 			return -EIO;
1192 		}
1193 		return -EAGAIN;
1194 
1195 	case NVME_CTRLR_DETACH_GET_CSTS_DONE:
1196 		ctx->state = NVME_CTRLR_DETACH_CHECK_CSTS;
1197 		break;
1198 
1199 	default:
1200 		assert(0 && "Should never happen");
1201 		return -EINVAL;
1202 	}
1203 
1204 	ms_waited = (spdk_get_ticks() - ctx->shutdown_start_tsc) * 1000 / spdk_get_ticks_hz();
1205 	csts.raw = ctx->csts.raw;
1206 
1207 	if (csts.bits.shst == SPDK_NVME_SHST_COMPLETE) {
1208 		NVME_CTRLR_DEBUGLOG(ctrlr, "shutdown complete in %u milliseconds\n", ms_waited);
1209 		return 0;
1210 	}
1211 
1212 	if (ms_waited < ctx->shutdown_timeout_ms) {
1213 		return -EAGAIN;
1214 	}
1215 
1216 	NVME_CTRLR_ERRLOG(ctrlr, "did not shutdown within %u milliseconds\n",
1217 			  ctx->shutdown_timeout_ms);
1218 	if (ctrlr->quirks & NVME_QUIRK_SHST_COMPLETE) {
1219 		NVME_CTRLR_ERRLOG(ctrlr, "likely due to shutdown handling in the VMWare emulated NVMe SSD\n");
1220 	}
1221 
1222 	return 0;
1223 }
1224 
1225 static inline uint64_t
1226 nvme_ctrlr_get_ready_timeout(struct spdk_nvme_ctrlr *ctrlr)
1227 {
1228 	return ctrlr->cap.bits.to * 500;
1229 }
1230 
1231 static void
1232 nvme_ctrlr_set_cc_en_done(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl)
1233 {
1234 	struct spdk_nvme_ctrlr *ctrlr = ctx;
1235 
1236 	if (spdk_nvme_cpl_is_error(cpl)) {
1237 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to set the CC register\n");
1238 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
1239 		return;
1240 	}
1241 
1242 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1,
1243 			     nvme_ctrlr_get_ready_timeout(ctrlr));
1244 }
1245 
1246 static int
1247 nvme_ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr)
1248 {
1249 	union spdk_nvme_cc_register	cc;
1250 	int				rc;
1251 
1252 	rc = nvme_transport_ctrlr_enable(ctrlr);
1253 	if (rc != 0) {
1254 		NVME_CTRLR_ERRLOG(ctrlr, "transport ctrlr_enable failed\n");
1255 		return rc;
1256 	}
1257 
1258 	cc.raw = ctrlr->process_init_cc.raw;
1259 	if (cc.bits.en != 0) {
1260 		NVME_CTRLR_ERRLOG(ctrlr, "called with CC.EN = 1\n");
1261 		return -EINVAL;
1262 	}
1263 
1264 	cc.bits.en = 1;
1265 	cc.bits.css = 0;
1266 	cc.bits.shn = 0;
1267 	cc.bits.iosqes = 6; /* SQ entry size == 64 == 2^6 */
1268 	cc.bits.iocqes = 4; /* CQ entry size == 16 == 2^4 */
1269 
1270 	/* Page size is 2 ^ (12 + mps). */
1271 	cc.bits.mps = spdk_u32log2(ctrlr->page_size) - 12;
1272 
1273 	/*
1274 	 * Since NVMe 1.0, a controller should have at least one bit set in CAP.CSS.
1275 	 * A controller that does not have any bit set in CAP.CSS is not spec compliant.
1276 	 * Try to support such a controller regardless.
1277 	 */
1278 	if (ctrlr->cap.bits.css == 0) {
1279 		NVME_CTRLR_INFOLOG(ctrlr, "Drive reports no command sets supported. Assuming NVM is supported.\n");
1280 		ctrlr->cap.bits.css = SPDK_NVME_CAP_CSS_NVM;
1281 	}
1282 
1283 	/*
1284 	 * If the user did not explicitly request a command set, or supplied a value larger than
1285 	 * what can be saved in CC.CSS, use the most reasonable default.
1286 	 */
1287 	if (ctrlr->opts.command_set >= CHAR_BIT) {
1288 		if (ctrlr->cap.bits.css & SPDK_NVME_CAP_CSS_IOCS) {
1289 			ctrlr->opts.command_set = SPDK_NVME_CC_CSS_IOCS;
1290 		} else if (ctrlr->cap.bits.css & SPDK_NVME_CAP_CSS_NVM) {
1291 			ctrlr->opts.command_set = SPDK_NVME_CC_CSS_NVM;
1292 		} else if (ctrlr->cap.bits.css & SPDK_NVME_CAP_CSS_NOIO) {
1293 			ctrlr->opts.command_set = SPDK_NVME_CC_CSS_NOIO;
1294 		} else {
1295 			/* Invalid supported bits detected, falling back to NVM. */
1296 			ctrlr->opts.command_set = SPDK_NVME_CC_CSS_NVM;
1297 		}
1298 	}
1299 
1300 	/* Verify that the selected command set is supported by the controller. */
1301 	if (!(ctrlr->cap.bits.css & (1u << ctrlr->opts.command_set))) {
1302 		NVME_CTRLR_DEBUGLOG(ctrlr, "Requested I/O command set %u but supported mask is 0x%x\n",
1303 				    ctrlr->opts.command_set, ctrlr->cap.bits.css);
1304 		NVME_CTRLR_DEBUGLOG(ctrlr, "Falling back to NVM. Assuming NVM is supported.\n");
1305 		ctrlr->opts.command_set = SPDK_NVME_CC_CSS_NVM;
1306 	}
1307 
1308 	cc.bits.css = ctrlr->opts.command_set;
1309 
1310 	switch (ctrlr->opts.arb_mechanism) {
1311 	case SPDK_NVME_CC_AMS_RR:
1312 		break;
1313 	case SPDK_NVME_CC_AMS_WRR:
1314 		if (SPDK_NVME_CAP_AMS_WRR & ctrlr->cap.bits.ams) {
1315 			break;
1316 		}
1317 		return -EINVAL;
1318 	case SPDK_NVME_CC_AMS_VS:
1319 		if (SPDK_NVME_CAP_AMS_VS & ctrlr->cap.bits.ams) {
1320 			break;
1321 		}
1322 		return -EINVAL;
1323 	default:
1324 		return -EINVAL;
1325 	}
1326 
1327 	cc.bits.ams = ctrlr->opts.arb_mechanism;
1328 	ctrlr->process_init_cc.raw = cc.raw;
1329 
1330 	if (nvme_ctrlr_set_cc_async(ctrlr, cc.raw, nvme_ctrlr_set_cc_en_done, ctrlr)) {
1331 		NVME_CTRLR_ERRLOG(ctrlr, "set_cc() failed\n");
1332 		return -EIO;
1333 	}
1334 
1335 	return 0;
1336 }
1337 
1338 static const char *
1339 nvme_ctrlr_state_string(enum nvme_ctrlr_state state)
1340 {
1341 	switch (state) {
1342 	case NVME_CTRLR_STATE_INIT_DELAY:
1343 		return "delay init";
1344 	case NVME_CTRLR_STATE_CONNECT_ADMINQ:
1345 		return "connect adminq";
1346 	case NVME_CTRLR_STATE_WAIT_FOR_CONNECT_ADMINQ:
1347 		return "wait for connect adminq";
1348 	case NVME_CTRLR_STATE_READ_VS:
1349 		return "read vs";
1350 	case NVME_CTRLR_STATE_READ_VS_WAIT_FOR_VS:
1351 		return "read vs wait for vs";
1352 	case NVME_CTRLR_STATE_READ_CAP:
1353 		return "read cap";
1354 	case NVME_CTRLR_STATE_READ_CAP_WAIT_FOR_CAP:
1355 		return "read cap wait for cap";
1356 	case NVME_CTRLR_STATE_CHECK_EN:
1357 		return "check en";
1358 	case NVME_CTRLR_STATE_CHECK_EN_WAIT_FOR_CC:
1359 		return "check en wait for cc";
1360 	case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1:
1361 		return "disable and wait for CSTS.RDY = 1";
1362 	case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1_WAIT_FOR_CSTS:
1363 		return "disable and wait for CSTS.RDY = 1 reg";
1364 	case NVME_CTRLR_STATE_SET_EN_0:
1365 		return "set CC.EN = 0";
1366 	case NVME_CTRLR_STATE_SET_EN_0_WAIT_FOR_CC:
1367 		return "set CC.EN = 0 wait for cc";
1368 	case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0:
1369 		return "disable and wait for CSTS.RDY = 0";
1370 	case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0_WAIT_FOR_CSTS:
1371 		return "disable and wait for CSTS.RDY = 0 reg";
1372 	case NVME_CTRLR_STATE_ENABLE:
1373 		return "enable controller by writing CC.EN = 1";
1374 	case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_CC:
1375 		return "enable controller by writing CC.EN = 1 reg";
1376 	case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1:
1377 		return "wait for CSTS.RDY = 1";
1378 	case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1_WAIT_FOR_CSTS:
1379 		return "wait for CSTS.RDY = 1 reg";
1380 	case NVME_CTRLR_STATE_RESET_ADMIN_QUEUE:
1381 		return "reset admin queue";
1382 	case NVME_CTRLR_STATE_IDENTIFY:
1383 		return "identify controller";
1384 	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY:
1385 		return "wait for identify controller";
1386 	case NVME_CTRLR_STATE_CONFIGURE_AER:
1387 		return "configure AER";
1388 	case NVME_CTRLR_STATE_WAIT_FOR_CONFIGURE_AER:
1389 		return "wait for configure aer";
1390 	case NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT:
1391 		return "set keep alive timeout";
1392 	case NVME_CTRLR_STATE_WAIT_FOR_KEEP_ALIVE_TIMEOUT:
1393 		return "wait for set keep alive timeout";
1394 	case NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC:
1395 		return "identify controller iocs specific";
1396 	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_IOCS_SPECIFIC:
1397 		return "wait for identify controller iocs specific";
1398 	case NVME_CTRLR_STATE_GET_ZNS_CMD_EFFECTS_LOG:
1399 		return "get zns cmd and effects log page";
1400 	case NVME_CTRLR_STATE_WAIT_FOR_GET_ZNS_CMD_EFFECTS_LOG:
1401 		return "wait for get zns cmd and effects log page";
1402 	case NVME_CTRLR_STATE_SET_NUM_QUEUES:
1403 		return "set number of queues";
1404 	case NVME_CTRLR_STATE_WAIT_FOR_SET_NUM_QUEUES:
1405 		return "wait for set number of queues";
1406 	case NVME_CTRLR_STATE_IDENTIFY_ACTIVE_NS:
1407 		return "identify active ns";
1408 	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ACTIVE_NS:
1409 		return "wait for identify active ns";
1410 	case NVME_CTRLR_STATE_IDENTIFY_NS:
1411 		return "identify ns";
1412 	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS:
1413 		return "wait for identify ns";
1414 	case NVME_CTRLR_STATE_IDENTIFY_ID_DESCS:
1415 		return "identify namespace id descriptors";
1416 	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ID_DESCS:
1417 		return "wait for identify namespace id descriptors";
1418 	case NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC:
1419 		return "identify ns iocs specific";
1420 	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS_IOCS_SPECIFIC:
1421 		return "wait for identify ns iocs specific";
1422 	case NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES:
1423 		return "set supported log pages";
1424 	case NVME_CTRLR_STATE_SET_SUPPORTED_INTEL_LOG_PAGES:
1425 		return "set supported INTEL log pages";
1426 	case NVME_CTRLR_STATE_WAIT_FOR_SUPPORTED_INTEL_LOG_PAGES:
1427 		return "wait for supported INTEL log pages";
1428 	case NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES:
1429 		return "set supported features";
1430 	case NVME_CTRLR_STATE_SET_DB_BUF_CFG:
1431 		return "set doorbell buffer config";
1432 	case NVME_CTRLR_STATE_WAIT_FOR_DB_BUF_CFG:
1433 		return "wait for doorbell buffer config";
1434 	case NVME_CTRLR_STATE_SET_HOST_ID:
1435 		return "set host ID";
1436 	case NVME_CTRLR_STATE_WAIT_FOR_HOST_ID:
1437 		return "wait for set host ID";
1438 	case NVME_CTRLR_STATE_READY:
1439 		return "ready";
1440 	case NVME_CTRLR_STATE_ERROR:
1441 		return "error";
1442 	}
1443 	return "unknown";
1444 };
1445 
1446 static void
1447 _nvme_ctrlr_set_state(struct spdk_nvme_ctrlr *ctrlr, enum nvme_ctrlr_state state,
1448 		      uint64_t timeout_in_ms, bool quiet)
1449 {
1450 	uint64_t ticks_per_ms, timeout_in_ticks, now_ticks;
1451 
1452 	ctrlr->state = state;
1453 	if (timeout_in_ms == NVME_TIMEOUT_KEEP_EXISTING) {
1454 		if (!quiet) {
1455 			NVME_CTRLR_DEBUGLOG(ctrlr, "setting state to %s (keeping existing timeout)\n",
1456 					    nvme_ctrlr_state_string(ctrlr->state));
1457 		}
1458 		return;
1459 	}
1460 
1461 	if (timeout_in_ms == NVME_TIMEOUT_INFINITE) {
1462 		goto inf;
1463 	}
1464 
1465 	ticks_per_ms = spdk_get_ticks_hz() / 1000;
1466 	if (timeout_in_ms > UINT64_MAX / ticks_per_ms) {
1467 		NVME_CTRLR_ERRLOG(ctrlr,
1468 				  "Specified timeout would cause integer overflow. Defaulting to no timeout.\n");
1469 		goto inf;
1470 	}
1471 
1472 	now_ticks = spdk_get_ticks();
1473 	timeout_in_ticks = timeout_in_ms * ticks_per_ms;
1474 	if (timeout_in_ticks > UINT64_MAX - now_ticks) {
1475 		NVME_CTRLR_ERRLOG(ctrlr,
1476 				  "Specified timeout would cause integer overflow. Defaulting to no timeout.\n");
1477 		goto inf;
1478 	}
1479 
1480 	ctrlr->state_timeout_tsc = timeout_in_ticks + now_ticks;
1481 	if (!quiet) {
1482 		NVME_CTRLR_DEBUGLOG(ctrlr, "setting state to %s (timeout %" PRIu64 " ms)\n",
1483 				    nvme_ctrlr_state_string(ctrlr->state), timeout_in_ms);
1484 	}
1485 	return;
1486 inf:
1487 	if (!quiet) {
1488 		NVME_CTRLR_DEBUGLOG(ctrlr, "setting state to %s (no timeout)\n",
1489 				    nvme_ctrlr_state_string(ctrlr->state));
1490 	}
1491 	ctrlr->state_timeout_tsc = NVME_TIMEOUT_INFINITE;
1492 }
1493 
1494 static void
1495 nvme_ctrlr_set_state(struct spdk_nvme_ctrlr *ctrlr, enum nvme_ctrlr_state state,
1496 		     uint64_t timeout_in_ms)
1497 {
1498 	_nvme_ctrlr_set_state(ctrlr, state, timeout_in_ms, false);
1499 }
1500 
1501 static void
1502 nvme_ctrlr_set_state_quiet(struct spdk_nvme_ctrlr *ctrlr, enum nvme_ctrlr_state state,
1503 			   uint64_t timeout_in_ms)
1504 {
1505 	_nvme_ctrlr_set_state(ctrlr, state, timeout_in_ms, true);
1506 }
1507 
1508 static void
1509 nvme_ctrlr_free_zns_specific_data(struct spdk_nvme_ctrlr *ctrlr)
1510 {
1511 	spdk_free(ctrlr->cdata_zns);
1512 	ctrlr->cdata_zns = NULL;
1513 }
1514 
1515 static void
1516 nvme_ctrlr_free_iocs_specific_data(struct spdk_nvme_ctrlr *ctrlr)
1517 {
1518 	nvme_ctrlr_free_zns_specific_data(ctrlr);
1519 }
1520 
1521 static void
1522 nvme_ctrlr_free_doorbell_buffer(struct spdk_nvme_ctrlr *ctrlr)
1523 {
1524 	if (ctrlr->shadow_doorbell) {
1525 		spdk_free(ctrlr->shadow_doorbell);
1526 		ctrlr->shadow_doorbell = NULL;
1527 	}
1528 
1529 	if (ctrlr->eventidx) {
1530 		spdk_free(ctrlr->eventidx);
1531 		ctrlr->eventidx = NULL;
1532 	}
1533 }
1534 
1535 static void
1536 nvme_ctrlr_set_doorbell_buffer_config_done(void *arg, const struct spdk_nvme_cpl *cpl)
1537 {
1538 	struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg;
1539 
1540 	if (spdk_nvme_cpl_is_error(cpl)) {
1541 		NVME_CTRLR_WARNLOG(ctrlr, "Doorbell buffer config failed\n");
1542 	} else {
1543 		NVME_CTRLR_INFOLOG(ctrlr, "Doorbell buffer config enabled\n");
1544 	}
1545 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_HOST_ID,
1546 			     ctrlr->opts.admin_timeout_ms);
1547 }
1548 
1549 static int
1550 nvme_ctrlr_set_doorbell_buffer_config(struct spdk_nvme_ctrlr *ctrlr)
1551 {
1552 	int rc = 0;
1553 	uint64_t prp1, prp2, len;
1554 
1555 	if (!ctrlr->cdata.oacs.doorbell_buffer_config) {
1556 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_HOST_ID,
1557 				     ctrlr->opts.admin_timeout_ms);
1558 		return 0;
1559 	}
1560 
1561 	if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) {
1562 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_HOST_ID,
1563 				     ctrlr->opts.admin_timeout_ms);
1564 		return 0;
1565 	}
1566 
1567 	/* only 1 page size for doorbell buffer */
1568 	ctrlr->shadow_doorbell = spdk_zmalloc(ctrlr->page_size, ctrlr->page_size,
1569 					      NULL, SPDK_ENV_LCORE_ID_ANY,
1570 					      SPDK_MALLOC_DMA | SPDK_MALLOC_SHARE);
1571 	if (ctrlr->shadow_doorbell == NULL) {
1572 		rc = -ENOMEM;
1573 		goto error;
1574 	}
1575 
1576 	len = ctrlr->page_size;
1577 	prp1 = spdk_vtophys(ctrlr->shadow_doorbell, &len);
1578 	if (prp1 == SPDK_VTOPHYS_ERROR || len != ctrlr->page_size) {
1579 		rc = -EFAULT;
1580 		goto error;
1581 	}
1582 
1583 	ctrlr->eventidx = spdk_zmalloc(ctrlr->page_size, ctrlr->page_size,
1584 				       NULL, SPDK_ENV_LCORE_ID_ANY,
1585 				       SPDK_MALLOC_DMA | SPDK_MALLOC_SHARE);
1586 	if (ctrlr->eventidx == NULL) {
1587 		rc = -ENOMEM;
1588 		goto error;
1589 	}
1590 
1591 	len = ctrlr->page_size;
1592 	prp2 = spdk_vtophys(ctrlr->eventidx, &len);
1593 	if (prp2 == SPDK_VTOPHYS_ERROR || len != ctrlr->page_size) {
1594 		rc = -EFAULT;
1595 		goto error;
1596 	}
1597 
1598 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_DB_BUF_CFG,
1599 			     ctrlr->opts.admin_timeout_ms);
1600 
1601 	rc = nvme_ctrlr_cmd_doorbell_buffer_config(ctrlr, prp1, prp2,
1602 			nvme_ctrlr_set_doorbell_buffer_config_done, ctrlr);
1603 	if (rc != 0) {
1604 		goto error;
1605 	}
1606 
1607 	return 0;
1608 
1609 error:
1610 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
1611 	nvme_ctrlr_free_doorbell_buffer(ctrlr);
1612 	return rc;
1613 }
1614 
1615 static void
1616 nvme_ctrlr_abort_queued_aborts(struct spdk_nvme_ctrlr *ctrlr)
1617 {
1618 	struct nvme_request	*req, *tmp;
1619 	struct spdk_nvme_cpl	cpl = {};
1620 
1621 	cpl.status.sc = SPDK_NVME_SC_ABORTED_SQ_DELETION;
1622 	cpl.status.sct = SPDK_NVME_SCT_GENERIC;
1623 
1624 	STAILQ_FOREACH_SAFE(req, &ctrlr->queued_aborts, stailq, tmp) {
1625 		STAILQ_REMOVE_HEAD(&ctrlr->queued_aborts, stailq);
1626 
1627 		nvme_complete_request(req->cb_fn, req->cb_arg, req->qpair, req, &cpl);
1628 		nvme_free_request(req);
1629 	}
1630 }
1631 
1632 int
1633 spdk_nvme_ctrlr_disconnect(struct spdk_nvme_ctrlr *ctrlr)
1634 {
1635 	struct spdk_nvme_qpair	*qpair;
1636 
1637 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
1638 	ctrlr->prepare_for_reset = false;
1639 
1640 	if (ctrlr->is_resetting || ctrlr->is_removed) {
1641 		/*
1642 		 * Controller is already resetting or has been removed. Return
1643 		 *  immediately since there is no need to kick off another
1644 		 *  reset in these cases.
1645 		 */
1646 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
1647 		return ctrlr->is_resetting ? -EBUSY : -ENXIO;
1648 	}
1649 
1650 	ctrlr->is_resetting = true;
1651 	ctrlr->is_failed = false;
1652 
1653 	NVME_CTRLR_NOTICELOG(ctrlr, "resetting controller\n");
1654 
1655 	/* Disable keep-alive, it'll be re-enabled as part of the init process */
1656 	ctrlr->keep_alive_interval_ticks = 0;
1657 
1658 	/* Abort all of the queued abort requests */
1659 	nvme_ctrlr_abort_queued_aborts(ctrlr);
1660 
1661 	nvme_transport_admin_qpair_abort_aers(ctrlr->adminq);
1662 
1663 	/* Disable all queues before disabling the controller hardware. */
1664 	TAILQ_FOREACH(qpair, &ctrlr->active_io_qpairs, tailq) {
1665 		qpair->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_LOCAL;
1666 	}
1667 
1668 	ctrlr->adminq->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_LOCAL;
1669 	nvme_transport_ctrlr_disconnect_qpair(ctrlr, ctrlr->adminq);
1670 
1671 	/* Doorbell buffer config is invalid during reset */
1672 	nvme_ctrlr_free_doorbell_buffer(ctrlr);
1673 
1674 	/* I/O Command Set Specific Identify Controller data is invalidated during reset */
1675 	nvme_ctrlr_free_iocs_specific_data(ctrlr);
1676 
1677 	spdk_bit_array_free(&ctrlr->free_io_qids);
1678 
1679 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
1680 	return 0;
1681 }
1682 
1683 void
1684 spdk_nvme_ctrlr_reconnect_async(struct spdk_nvme_ctrlr *ctrlr)
1685 {
1686 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
1687 
1688 	/* Set the state back to INIT to cause a full hardware reset. */
1689 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, NVME_TIMEOUT_INFINITE);
1690 
1691 	/* Return without releasing ctrlr_lock. ctrlr_lock will be released when
1692 	 * spdk_nvme_ctrlr_reset_poll_async() returns 0.
1693 	 */
1694 }
1695 
1696 static int
1697 nvme_ctrlr_reset_pre(struct spdk_nvme_ctrlr *ctrlr)
1698 {
1699 	int rc;
1700 
1701 	rc = spdk_nvme_ctrlr_disconnect(ctrlr);
1702 	if (rc != 0) {
1703 		return rc;
1704 	}
1705 
1706 	spdk_nvme_ctrlr_reconnect_async(ctrlr);
1707 	return 0;
1708 }
1709 
1710 /**
1711  * This function will be called when the controller is being reinitialized.
1712  * Note: the ctrlr_lock must be held when calling this function.
1713  */
1714 int
1715 spdk_nvme_ctrlr_reconnect_poll_async(struct spdk_nvme_ctrlr *ctrlr)
1716 {
1717 	struct spdk_nvme_ns *ns, *tmp_ns;
1718 	struct spdk_nvme_qpair	*qpair;
1719 	int rc = 0, rc_tmp = 0;
1720 	bool async;
1721 
1722 	if (nvme_ctrlr_process_init(ctrlr) != 0) {
1723 		NVME_CTRLR_ERRLOG(ctrlr, "controller reinitialization failed\n");
1724 		rc = -1;
1725 	}
1726 	if (ctrlr->state != NVME_CTRLR_STATE_READY && rc != -1) {
1727 		return -EAGAIN;
1728 	}
1729 
1730 	/*
1731 	 * For non-fabrics controllers, the memory locations of the transport qpair
1732 	 * don't change when the controller is reset. They simply need to be
1733 	 * re-enabled with admin commands to the controller. For fabric
1734 	 * controllers we need to disconnect and reconnect the qpair on its
1735 	 * own thread outside of the context of the reset.
1736 	 */
1737 	if (rc == 0 && !spdk_nvme_ctrlr_is_fabrics(ctrlr)) {
1738 		/* Reinitialize qpairs */
1739 		TAILQ_FOREACH(qpair, &ctrlr->active_io_qpairs, tailq) {
1740 			assert(spdk_bit_array_get(ctrlr->free_io_qids, qpair->id));
1741 			spdk_bit_array_clear(ctrlr->free_io_qids, qpair->id);
1742 
1743 			/* Force a synchronous connect. We can't currently handle an asynchronous
1744 			 * operation here. */
1745 			async = qpair->async;
1746 			qpair->async = false;
1747 			rc_tmp = nvme_transport_ctrlr_connect_qpair(ctrlr, qpair);
1748 			qpair->async = async;
1749 
1750 			if (rc_tmp != 0) {
1751 				rc = rc_tmp;
1752 				qpair->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_LOCAL;
1753 				continue;
1754 			}
1755 		}
1756 	}
1757 
1758 	/*
1759 	 * Take this opportunity to remove inactive namespaces. During a reset namespace
1760 	 * handles can be invalidated.
1761 	 */
1762 	RB_FOREACH_SAFE(ns, nvme_ns_tree, &ctrlr->ns, tmp_ns) {
1763 		if (!ns->active) {
1764 			RB_REMOVE(nvme_ns_tree, &ctrlr->ns, ns);
1765 			spdk_free(ns);
1766 		}
1767 	}
1768 
1769 	if (rc) {
1770 		nvme_ctrlr_fail(ctrlr, false);
1771 	}
1772 	ctrlr->is_resetting = false;
1773 
1774 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
1775 
1776 	if (!ctrlr->cdata.oaes.ns_attribute_notices) {
1777 		/*
1778 		 * If controller doesn't support ns_attribute_notices and
1779 		 * namespace attributes change (e.g. number of namespaces)
1780 		 * we need to update system handling device reset.
1781 		 */
1782 		nvme_io_msg_ctrlr_update(ctrlr);
1783 	}
1784 
1785 	return rc;
1786 }
1787 
1788 static void
1789 nvme_ctrlr_reset_ctx_init(struct spdk_nvme_ctrlr_reset_ctx *ctrlr_reset_ctx,
1790 			  struct spdk_nvme_ctrlr *ctrlr)
1791 {
1792 	ctrlr_reset_ctx->ctrlr = ctrlr;
1793 }
1794 
1795 static int
1796 nvme_ctrlr_reset_poll_async(struct spdk_nvme_ctrlr_reset_ctx *ctrlr_reset_ctx)
1797 {
1798 	struct spdk_nvme_ctrlr *ctrlr = ctrlr_reset_ctx->ctrlr;
1799 
1800 	return spdk_nvme_ctrlr_reconnect_poll_async(ctrlr);
1801 }
1802 
1803 int
1804 spdk_nvme_ctrlr_reset_poll_async(struct spdk_nvme_ctrlr_reset_ctx *ctrlr_reset_ctx)
1805 {
1806 	int rc;
1807 	if (!ctrlr_reset_ctx) {
1808 		return -EINVAL;
1809 	}
1810 	rc = nvme_ctrlr_reset_poll_async(ctrlr_reset_ctx);
1811 	if (rc == -EAGAIN) {
1812 		return rc;
1813 	}
1814 
1815 	free(ctrlr_reset_ctx);
1816 	return rc;
1817 }
1818 
1819 int
1820 spdk_nvme_ctrlr_reset_async(struct spdk_nvme_ctrlr *ctrlr,
1821 			    struct spdk_nvme_ctrlr_reset_ctx **reset_ctx)
1822 {
1823 	struct spdk_nvme_ctrlr_reset_ctx *ctrlr_reset_ctx;
1824 	int rc;
1825 
1826 	ctrlr_reset_ctx = calloc(1, sizeof(*ctrlr_reset_ctx));
1827 	if (!ctrlr_reset_ctx) {
1828 		return -ENOMEM;
1829 	}
1830 
1831 	rc = nvme_ctrlr_reset_pre(ctrlr);
1832 	if (rc != 0) {
1833 		free(ctrlr_reset_ctx);
1834 	} else {
1835 		nvme_ctrlr_reset_ctx_init(ctrlr_reset_ctx, ctrlr);
1836 		*reset_ctx = ctrlr_reset_ctx;
1837 	}
1838 
1839 	return rc;
1840 }
1841 
1842 int
1843 spdk_nvme_ctrlr_reset(struct spdk_nvme_ctrlr *ctrlr)
1844 {
1845 	struct spdk_nvme_ctrlr_reset_ctx reset_ctx = {};
1846 	int rc;
1847 
1848 	rc = nvme_ctrlr_reset_pre(ctrlr);
1849 	if (rc != 0) {
1850 		if (rc == -EBUSY) {
1851 			rc = 0;
1852 		}
1853 		return rc;
1854 	}
1855 	nvme_ctrlr_reset_ctx_init(&reset_ctx, ctrlr);
1856 
1857 	while (true) {
1858 		rc = nvme_ctrlr_reset_poll_async(&reset_ctx);
1859 		if (rc != -EAGAIN) {
1860 			break;
1861 		}
1862 	}
1863 
1864 	return rc;
1865 }
1866 
1867 void
1868 spdk_nvme_ctrlr_prepare_for_reset(struct spdk_nvme_ctrlr *ctrlr)
1869 {
1870 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
1871 	ctrlr->prepare_for_reset = true;
1872 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
1873 }
1874 
1875 int
1876 spdk_nvme_ctrlr_reset_subsystem(struct spdk_nvme_ctrlr *ctrlr)
1877 {
1878 	union spdk_nvme_cap_register cap;
1879 	int rc = 0;
1880 
1881 	cap = spdk_nvme_ctrlr_get_regs_cap(ctrlr);
1882 	if (cap.bits.nssrs == 0) {
1883 		NVME_CTRLR_WARNLOG(ctrlr, "subsystem reset is not supported\n");
1884 		return -ENOTSUP;
1885 	}
1886 
1887 	NVME_CTRLR_NOTICELOG(ctrlr, "resetting subsystem\n");
1888 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
1889 	ctrlr->is_resetting = true;
1890 	rc = nvme_ctrlr_set_nssr(ctrlr, SPDK_NVME_NSSR_VALUE);
1891 	ctrlr->is_resetting = false;
1892 
1893 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
1894 	/*
1895 	 * No more cleanup at this point like in the ctrlr reset. A subsystem reset will cause
1896 	 * a hot remove for PCIe transport. The hot remove handling does all the necessary ctrlr cleanup.
1897 	 */
1898 	return rc;
1899 }
1900 
1901 int
1902 spdk_nvme_ctrlr_set_trid(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_transport_id *trid)
1903 {
1904 	int rc = 0;
1905 
1906 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
1907 
1908 	if (ctrlr->is_failed == false) {
1909 		rc = -EPERM;
1910 		goto out;
1911 	}
1912 
1913 	if (trid->trtype != ctrlr->trid.trtype) {
1914 		rc = -EINVAL;
1915 		goto out;
1916 	}
1917 
1918 	if (strncmp(trid->subnqn, ctrlr->trid.subnqn, SPDK_NVMF_NQN_MAX_LEN)) {
1919 		rc = -EINVAL;
1920 		goto out;
1921 	}
1922 
1923 	ctrlr->trid = *trid;
1924 
1925 out:
1926 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
1927 	return rc;
1928 }
1929 
1930 void
1931 spdk_nvme_ctrlr_set_remove_cb(struct spdk_nvme_ctrlr *ctrlr,
1932 			      spdk_nvme_remove_cb remove_cb, void *remove_ctx)
1933 {
1934 	if (!spdk_process_is_primary()) {
1935 		return;
1936 	}
1937 
1938 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
1939 	ctrlr->remove_cb = remove_cb;
1940 	ctrlr->cb_ctx = remove_ctx;
1941 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
1942 }
1943 
1944 static void
1945 nvme_ctrlr_identify_done(void *arg, const struct spdk_nvme_cpl *cpl)
1946 {
1947 	struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg;
1948 
1949 	if (spdk_nvme_cpl_is_error(cpl)) {
1950 		NVME_CTRLR_ERRLOG(ctrlr, "nvme_identify_controller failed!\n");
1951 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
1952 		return;
1953 	}
1954 
1955 	/*
1956 	 * Use MDTS to ensure our default max_xfer_size doesn't exceed what the
1957 	 *  controller supports.
1958 	 */
1959 	ctrlr->max_xfer_size = nvme_transport_ctrlr_get_max_xfer_size(ctrlr);
1960 	NVME_CTRLR_DEBUGLOG(ctrlr, "transport max_xfer_size %u\n", ctrlr->max_xfer_size);
1961 	if (ctrlr->cdata.mdts > 0) {
1962 		ctrlr->max_xfer_size = spdk_min(ctrlr->max_xfer_size,
1963 						ctrlr->min_page_size * (1 << ctrlr->cdata.mdts));
1964 		NVME_CTRLR_DEBUGLOG(ctrlr, "MDTS max_xfer_size %u\n", ctrlr->max_xfer_size);
1965 	}
1966 
1967 	NVME_CTRLR_DEBUGLOG(ctrlr, "CNTLID 0x%04" PRIx16 "\n", ctrlr->cdata.cntlid);
1968 	if (ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
1969 		ctrlr->cntlid = ctrlr->cdata.cntlid;
1970 	} else {
1971 		/*
1972 		 * Fabrics controllers should already have CNTLID from the Connect command.
1973 		 *
1974 		 * If CNTLID from Connect doesn't match CNTLID in the Identify Controller data,
1975 		 * trust the one from Connect.
1976 		 */
1977 		if (ctrlr->cntlid != ctrlr->cdata.cntlid) {
1978 			NVME_CTRLR_DEBUGLOG(ctrlr, "Identify CNTLID 0x%04" PRIx16 " != Connect CNTLID 0x%04" PRIx16 "\n",
1979 					    ctrlr->cdata.cntlid, ctrlr->cntlid);
1980 		}
1981 	}
1982 
1983 	if (ctrlr->cdata.sgls.supported) {
1984 		assert(ctrlr->cdata.sgls.supported != 0x3);
1985 		ctrlr->flags |= SPDK_NVME_CTRLR_SGL_SUPPORTED;
1986 		if (ctrlr->cdata.sgls.supported == 0x2) {
1987 			ctrlr->flags |= SPDK_NVME_CTRLR_SGL_REQUIRES_DWORD_ALIGNMENT;
1988 		}
1989 
1990 		ctrlr->max_sges = nvme_transport_ctrlr_get_max_sges(ctrlr);
1991 		NVME_CTRLR_DEBUGLOG(ctrlr, "transport max_sges %u\n", ctrlr->max_sges);
1992 	}
1993 
1994 	if (ctrlr->cdata.oacs.security && !(ctrlr->quirks & NVME_QUIRK_OACS_SECURITY)) {
1995 		ctrlr->flags |= SPDK_NVME_CTRLR_SECURITY_SEND_RECV_SUPPORTED;
1996 	}
1997 
1998 	if (ctrlr->cdata.oacs.directives) {
1999 		ctrlr->flags |= SPDK_NVME_CTRLR_DIRECTIVES_SUPPORTED;
2000 	}
2001 
2002 	NVME_CTRLR_DEBUGLOG(ctrlr, "fuses compare and write: %d\n",
2003 			    ctrlr->cdata.fuses.compare_and_write);
2004 	if (ctrlr->cdata.fuses.compare_and_write) {
2005 		ctrlr->flags |= SPDK_NVME_CTRLR_COMPARE_AND_WRITE_SUPPORTED;
2006 	}
2007 
2008 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CONFIGURE_AER,
2009 			     ctrlr->opts.admin_timeout_ms);
2010 }
2011 
2012 static int
2013 nvme_ctrlr_identify(struct spdk_nvme_ctrlr *ctrlr)
2014 {
2015 	int	rc;
2016 
2017 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY,
2018 			     ctrlr->opts.admin_timeout_ms);
2019 
2020 	rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_CTRLR, 0, 0, 0,
2021 				     &ctrlr->cdata, sizeof(ctrlr->cdata),
2022 				     nvme_ctrlr_identify_done, ctrlr);
2023 	if (rc != 0) {
2024 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
2025 		return rc;
2026 	}
2027 
2028 	return 0;
2029 }
2030 
2031 static void
2032 nvme_ctrlr_get_zns_cmd_and_effects_log_done(void *arg, const struct spdk_nvme_cpl *cpl)
2033 {
2034 	struct spdk_nvme_cmds_and_effect_log_page *log_page;
2035 	struct spdk_nvme_ctrlr *ctrlr = arg;
2036 
2037 	if (spdk_nvme_cpl_is_error(cpl)) {
2038 		NVME_CTRLR_ERRLOG(ctrlr, "nvme_ctrlr_get_zns_cmd_and_effects_log failed!\n");
2039 		spdk_free(ctrlr->tmp_ptr);
2040 		ctrlr->tmp_ptr = NULL;
2041 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
2042 		return;
2043 	}
2044 
2045 	log_page = ctrlr->tmp_ptr;
2046 
2047 	if (log_page->io_cmds_supported[SPDK_NVME_OPC_ZONE_APPEND].csupp) {
2048 		ctrlr->flags |= SPDK_NVME_CTRLR_ZONE_APPEND_SUPPORTED;
2049 	}
2050 	spdk_free(ctrlr->tmp_ptr);
2051 	ctrlr->tmp_ptr = NULL;
2052 
2053 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_NUM_QUEUES, ctrlr->opts.admin_timeout_ms);
2054 }
2055 
2056 static int
2057 nvme_ctrlr_get_zns_cmd_and_effects_log(struct spdk_nvme_ctrlr *ctrlr)
2058 {
2059 	int rc;
2060 
2061 	assert(!ctrlr->tmp_ptr);
2062 	ctrlr->tmp_ptr = spdk_zmalloc(sizeof(struct spdk_nvme_cmds_and_effect_log_page), 64, NULL,
2063 				      SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE | SPDK_MALLOC_DMA);
2064 	if (!ctrlr->tmp_ptr) {
2065 		rc = -ENOMEM;
2066 		goto error;
2067 	}
2068 
2069 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_GET_ZNS_CMD_EFFECTS_LOG,
2070 			     ctrlr->opts.admin_timeout_ms);
2071 
2072 	rc = spdk_nvme_ctrlr_cmd_get_log_page_ext(ctrlr, SPDK_NVME_LOG_COMMAND_EFFECTS_LOG,
2073 			0, ctrlr->tmp_ptr, sizeof(struct spdk_nvme_cmds_and_effect_log_page),
2074 			0, 0, 0, SPDK_NVME_CSI_ZNS << 24,
2075 			nvme_ctrlr_get_zns_cmd_and_effects_log_done, ctrlr);
2076 	if (rc != 0) {
2077 		goto error;
2078 	}
2079 
2080 	return 0;
2081 
2082 error:
2083 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
2084 	spdk_free(ctrlr->tmp_ptr);
2085 	ctrlr->tmp_ptr = NULL;
2086 	return rc;
2087 }
2088 
2089 static void
2090 nvme_ctrlr_identify_zns_specific_done(void *arg, const struct spdk_nvme_cpl *cpl)
2091 {
2092 	struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg;
2093 
2094 	if (spdk_nvme_cpl_is_error(cpl)) {
2095 		/* no need to print an error, the controller simply does not support ZNS */
2096 		nvme_ctrlr_free_zns_specific_data(ctrlr);
2097 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_NUM_QUEUES,
2098 				     ctrlr->opts.admin_timeout_ms);
2099 		return;
2100 	}
2101 
2102 	/* A zero zasl value means use mdts */
2103 	if (ctrlr->cdata_zns->zasl) {
2104 		uint32_t max_append = ctrlr->min_page_size * (1 << ctrlr->cdata_zns->zasl);
2105 		ctrlr->max_zone_append_size = spdk_min(ctrlr->max_xfer_size, max_append);
2106 	} else {
2107 		ctrlr->max_zone_append_size = ctrlr->max_xfer_size;
2108 	}
2109 
2110 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_GET_ZNS_CMD_EFFECTS_LOG,
2111 			     ctrlr->opts.admin_timeout_ms);
2112 }
2113 
2114 /**
2115  * This function will try to fetch the I/O Command Specific Controller data structure for
2116  * each I/O Command Set supported by SPDK.
2117  *
2118  * If an I/O Command Set is not supported by the controller, "Invalid Field in Command"
2119  * will be returned. Since we are fetching in a exploratively way, getting an error back
2120  * from the controller should not be treated as fatal.
2121  *
2122  * I/O Command Sets not supported by SPDK will be skipped (e.g. Key Value Command Set).
2123  *
2124  * I/O Command Sets without a IOCS specific data structure (i.e. a zero-filled IOCS specific
2125  * data structure) will be skipped (e.g. NVM Command Set, Key Value Command Set).
2126  */
2127 static int
2128 nvme_ctrlr_identify_iocs_specific(struct spdk_nvme_ctrlr *ctrlr)
2129 {
2130 	int	rc;
2131 
2132 	if (!nvme_ctrlr_multi_iocs_enabled(ctrlr)) {
2133 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_NUM_QUEUES,
2134 				     ctrlr->opts.admin_timeout_ms);
2135 		return 0;
2136 	}
2137 
2138 	/*
2139 	 * Since SPDK currently only needs to fetch a single Command Set, keep the code here,
2140 	 * instead of creating multiple NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC substates,
2141 	 * which would require additional functions and complexity for no good reason.
2142 	 */
2143 	assert(!ctrlr->cdata_zns);
2144 	ctrlr->cdata_zns = spdk_zmalloc(sizeof(*ctrlr->cdata_zns), 64, NULL, SPDK_ENV_SOCKET_ID_ANY,
2145 					SPDK_MALLOC_SHARE | SPDK_MALLOC_DMA);
2146 	if (!ctrlr->cdata_zns) {
2147 		rc = -ENOMEM;
2148 		goto error;
2149 	}
2150 
2151 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_IOCS_SPECIFIC,
2152 			     ctrlr->opts.admin_timeout_ms);
2153 
2154 	rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_CTRLR_IOCS, 0, 0, SPDK_NVME_CSI_ZNS,
2155 				     ctrlr->cdata_zns, sizeof(*ctrlr->cdata_zns),
2156 				     nvme_ctrlr_identify_zns_specific_done, ctrlr);
2157 	if (rc != 0) {
2158 		goto error;
2159 	}
2160 
2161 	return 0;
2162 
2163 error:
2164 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
2165 	nvme_ctrlr_free_zns_specific_data(ctrlr);
2166 	return rc;
2167 }
2168 
2169 enum nvme_active_ns_state {
2170 	NVME_ACTIVE_NS_STATE_IDLE,
2171 	NVME_ACTIVE_NS_STATE_PROCESSING,
2172 	NVME_ACTIVE_NS_STATE_DONE,
2173 	NVME_ACTIVE_NS_STATE_ERROR
2174 };
2175 
2176 typedef void (*nvme_active_ns_ctx_deleter)(struct nvme_active_ns_ctx *);
2177 
2178 struct nvme_active_ns_ctx {
2179 	struct spdk_nvme_ctrlr *ctrlr;
2180 	uint32_t page_count;
2181 	uint32_t next_nsid;
2182 	uint32_t *new_ns_list;
2183 	nvme_active_ns_ctx_deleter deleter;
2184 
2185 	enum nvme_active_ns_state state;
2186 };
2187 
2188 static struct nvme_active_ns_ctx *
2189 nvme_active_ns_ctx_create(struct spdk_nvme_ctrlr *ctrlr, nvme_active_ns_ctx_deleter deleter)
2190 {
2191 	struct nvme_active_ns_ctx *ctx;
2192 	uint32_t *new_ns_list = NULL;
2193 
2194 	ctx = calloc(1, sizeof(*ctx));
2195 	if (!ctx) {
2196 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate nvme_active_ns_ctx!\n");
2197 		return NULL;
2198 	}
2199 
2200 	new_ns_list = spdk_zmalloc(sizeof(struct spdk_nvme_ns_list), ctrlr->page_size,
2201 				   NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_SHARE);
2202 	if (!new_ns_list) {
2203 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate active_ns_list!\n");
2204 		free(ctx);
2205 		return NULL;
2206 	}
2207 
2208 	ctx->page_count = 1;
2209 	ctx->new_ns_list = new_ns_list;
2210 	ctx->ctrlr = ctrlr;
2211 	ctx->deleter = deleter;
2212 
2213 	return ctx;
2214 }
2215 
2216 static void
2217 nvme_active_ns_ctx_destroy(struct nvme_active_ns_ctx *ctx)
2218 {
2219 	spdk_free(ctx->new_ns_list);
2220 	free(ctx);
2221 }
2222 
2223 static int
2224 nvme_ctrlr_destruct_namespace(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid)
2225 {
2226 	struct spdk_nvme_ns tmp, *ns;
2227 
2228 	assert(ctrlr != NULL);
2229 
2230 	tmp.id = nsid;
2231 	ns = RB_FIND(nvme_ns_tree, &ctrlr->ns, &tmp);
2232 	if (ns == NULL) {
2233 		return -EINVAL;
2234 	}
2235 
2236 	nvme_ns_destruct(ns);
2237 	ns->active = false;
2238 
2239 	return 0;
2240 }
2241 
2242 static int
2243 nvme_ctrlr_construct_namespace(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid)
2244 {
2245 	struct spdk_nvme_ns *ns;
2246 
2247 	if (nsid < 1 || nsid > ctrlr->cdata.nn) {
2248 		return -EINVAL;
2249 	}
2250 
2251 	/* Namespaces are constructed on demand, so simply request it. */
2252 	ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
2253 	if (ns == NULL) {
2254 		return -ENOMEM;
2255 	}
2256 
2257 	ns->active = true;
2258 
2259 	return 0;
2260 }
2261 
2262 static void
2263 nvme_ctrlr_identify_active_ns_swap(struct spdk_nvme_ctrlr *ctrlr, uint32_t *new_ns_list,
2264 				   size_t max_entries)
2265 {
2266 	uint32_t active_ns_count = 0;
2267 	size_t i;
2268 	uint32_t nsid;
2269 	struct spdk_nvme_ns *ns, *tmp_ns;
2270 	int rc;
2271 
2272 	/* First, remove namespaces that no longer exist */
2273 	RB_FOREACH_SAFE(ns, nvme_ns_tree, &ctrlr->ns, tmp_ns) {
2274 		nsid = new_ns_list[0];
2275 		active_ns_count = 0;
2276 		while (nsid != 0) {
2277 			if (nsid == ns->id) {
2278 				break;
2279 			}
2280 
2281 			nsid = new_ns_list[active_ns_count++];
2282 		}
2283 
2284 		if (nsid != ns->id) {
2285 			/* Did not find this namespace id in the new list. */
2286 			NVME_CTRLR_DEBUGLOG(ctrlr, "Namespace %u was removed\n", ns->id);
2287 			nvme_ctrlr_destruct_namespace(ctrlr, ns->id);
2288 		}
2289 	}
2290 
2291 	/* Next, add new namespaces */
2292 	active_ns_count = 0;
2293 	for (i = 0; i < max_entries; i++) {
2294 		nsid = new_ns_list[active_ns_count];
2295 
2296 		if (nsid == 0) {
2297 			break;
2298 		}
2299 
2300 		/* If the namespace already exists, this will not construct it a second time. */
2301 		rc = nvme_ctrlr_construct_namespace(ctrlr, nsid);
2302 		if (rc != 0) {
2303 			/* We can't easily handle a failure here. But just move on. */
2304 			assert(false);
2305 			NVME_CTRLR_DEBUGLOG(ctrlr, "Failed to allocate a namespace object.\n");
2306 			continue;
2307 		}
2308 
2309 		active_ns_count++;
2310 	}
2311 
2312 	ctrlr->active_ns_count = active_ns_count;
2313 }
2314 
2315 static void
2316 nvme_ctrlr_identify_active_ns_async_done(void *arg, const struct spdk_nvme_cpl *cpl)
2317 {
2318 	struct nvme_active_ns_ctx *ctx = arg;
2319 	uint32_t *new_ns_list = NULL;
2320 
2321 	if (spdk_nvme_cpl_is_error(cpl)) {
2322 		ctx->state = NVME_ACTIVE_NS_STATE_ERROR;
2323 		goto out;
2324 	}
2325 
2326 	ctx->next_nsid = ctx->new_ns_list[1024 * ctx->page_count - 1];
2327 	if (ctx->next_nsid == 0) {
2328 		ctx->state = NVME_ACTIVE_NS_STATE_DONE;
2329 		goto out;
2330 	}
2331 
2332 	ctx->page_count++;
2333 	new_ns_list = spdk_realloc(ctx->new_ns_list,
2334 				   ctx->page_count * sizeof(struct spdk_nvme_ns_list),
2335 				   ctx->ctrlr->page_size);
2336 	if (!new_ns_list) {
2337 		SPDK_ERRLOG("Failed to reallocate active_ns_list!\n");
2338 		ctx->state = NVME_ACTIVE_NS_STATE_ERROR;
2339 		goto out;
2340 	}
2341 
2342 	ctx->new_ns_list = new_ns_list;
2343 	nvme_ctrlr_identify_active_ns_async(ctx);
2344 	return;
2345 
2346 out:
2347 	if (ctx->deleter) {
2348 		ctx->deleter(ctx);
2349 	}
2350 }
2351 
2352 static void
2353 nvme_ctrlr_identify_active_ns_async(struct nvme_active_ns_ctx *ctx)
2354 {
2355 	struct spdk_nvme_ctrlr *ctrlr = ctx->ctrlr;
2356 	uint32_t i;
2357 	int rc;
2358 
2359 	if (ctrlr->cdata.nn == 0) {
2360 		ctx->state = NVME_ACTIVE_NS_STATE_DONE;
2361 		goto out;
2362 	}
2363 
2364 	assert(ctx->new_ns_list != NULL);
2365 
2366 	/*
2367 	 * If controller doesn't support active ns list CNS 0x02 dummy up
2368 	 * an active ns list, i.e. all namespaces report as active
2369 	 */
2370 	if (ctrlr->vs.raw < SPDK_NVME_VERSION(1, 1, 0) || ctrlr->quirks & NVME_QUIRK_IDENTIFY_CNS) {
2371 		uint32_t *new_ns_list;
2372 
2373 		/*
2374 		 * Active NS list must always end with zero element.
2375 		 * So, we allocate for cdata.nn+1.
2376 		 */
2377 		ctx->page_count = spdk_divide_round_up(ctrlr->cdata.nn + 1,
2378 						       sizeof(struct spdk_nvme_ns_list) / sizeof(new_ns_list[0]));
2379 		new_ns_list = spdk_realloc(ctx->new_ns_list,
2380 					   ctx->page_count * sizeof(struct spdk_nvme_ns_list),
2381 					   ctx->ctrlr->page_size);
2382 		if (!new_ns_list) {
2383 			SPDK_ERRLOG("Failed to reallocate active_ns_list!\n");
2384 			ctx->state = NVME_ACTIVE_NS_STATE_ERROR;
2385 			goto out;
2386 		}
2387 
2388 		ctx->new_ns_list = new_ns_list;
2389 		ctx->new_ns_list[ctrlr->cdata.nn] = 0;
2390 		for (i = 0; i < ctrlr->cdata.nn; i++) {
2391 			ctx->new_ns_list[i] = i + 1;
2392 		}
2393 
2394 		ctx->state = NVME_ACTIVE_NS_STATE_DONE;
2395 		goto out;
2396 	}
2397 
2398 	ctx->state = NVME_ACTIVE_NS_STATE_PROCESSING;
2399 	rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_ACTIVE_NS_LIST, 0, ctx->next_nsid, 0,
2400 				     &ctx->new_ns_list[1024 * (ctx->page_count - 1)], sizeof(struct spdk_nvme_ns_list),
2401 				     nvme_ctrlr_identify_active_ns_async_done, ctx);
2402 	if (rc != 0) {
2403 		ctx->state = NVME_ACTIVE_NS_STATE_ERROR;
2404 		goto out;
2405 	}
2406 
2407 	return;
2408 
2409 out:
2410 	if (ctx->deleter) {
2411 		ctx->deleter(ctx);
2412 	}
2413 }
2414 
2415 static void
2416 _nvme_active_ns_ctx_deleter(struct nvme_active_ns_ctx *ctx)
2417 {
2418 	struct spdk_nvme_ctrlr *ctrlr = ctx->ctrlr;
2419 	struct spdk_nvme_ns *ns;
2420 
2421 	if (ctx->state == NVME_ACTIVE_NS_STATE_ERROR) {
2422 		nvme_active_ns_ctx_destroy(ctx);
2423 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
2424 		return;
2425 	}
2426 
2427 	assert(ctx->state == NVME_ACTIVE_NS_STATE_DONE);
2428 
2429 	RB_FOREACH(ns, nvme_ns_tree, &ctrlr->ns) {
2430 		nvme_ns_free_iocs_specific_data(ns);
2431 	}
2432 
2433 	nvme_ctrlr_identify_active_ns_swap(ctrlr, ctx->new_ns_list, ctx->page_count * 1024);
2434 	nvme_active_ns_ctx_destroy(ctx);
2435 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_NS, ctrlr->opts.admin_timeout_ms);
2436 }
2437 
2438 static void
2439 _nvme_ctrlr_identify_active_ns(struct spdk_nvme_ctrlr *ctrlr)
2440 {
2441 	struct nvme_active_ns_ctx *ctx;
2442 
2443 	ctx = nvme_active_ns_ctx_create(ctrlr, _nvme_active_ns_ctx_deleter);
2444 	if (!ctx) {
2445 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
2446 		return;
2447 	}
2448 
2449 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ACTIVE_NS,
2450 			     ctrlr->opts.admin_timeout_ms);
2451 	nvme_ctrlr_identify_active_ns_async(ctx);
2452 }
2453 
2454 int
2455 nvme_ctrlr_identify_active_ns(struct spdk_nvme_ctrlr *ctrlr)
2456 {
2457 	struct nvme_active_ns_ctx *ctx;
2458 	int rc;
2459 
2460 	ctx = nvme_active_ns_ctx_create(ctrlr, NULL);
2461 	if (!ctx) {
2462 		return -ENOMEM;
2463 	}
2464 
2465 	nvme_ctrlr_identify_active_ns_async(ctx);
2466 	while (ctx->state == NVME_ACTIVE_NS_STATE_PROCESSING) {
2467 		rc = spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
2468 		if (rc < 0) {
2469 			ctx->state = NVME_ACTIVE_NS_STATE_ERROR;
2470 			break;
2471 		}
2472 	}
2473 
2474 	if (ctx->state == NVME_ACTIVE_NS_STATE_ERROR) {
2475 		nvme_active_ns_ctx_destroy(ctx);
2476 		return -ENXIO;
2477 	}
2478 
2479 	assert(ctx->state == NVME_ACTIVE_NS_STATE_DONE);
2480 	nvme_ctrlr_identify_active_ns_swap(ctrlr, ctx->new_ns_list, ctx->page_count * 1024);
2481 	nvme_active_ns_ctx_destroy(ctx);
2482 
2483 	return 0;
2484 }
2485 
2486 static void
2487 nvme_ctrlr_identify_ns_async_done(void *arg, const struct spdk_nvme_cpl *cpl)
2488 {
2489 	struct spdk_nvme_ns *ns = (struct spdk_nvme_ns *)arg;
2490 	struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr;
2491 	uint32_t nsid;
2492 	int rc;
2493 
2494 	if (spdk_nvme_cpl_is_error(cpl)) {
2495 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
2496 		return;
2497 	}
2498 
2499 	nvme_ns_set_identify_data(ns);
2500 
2501 	/* move on to the next active NS */
2502 	nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, ns->id);
2503 	ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
2504 	if (ns == NULL) {
2505 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_ID_DESCS,
2506 				     ctrlr->opts.admin_timeout_ms);
2507 		return;
2508 	}
2509 	ns->ctrlr = ctrlr;
2510 	ns->id = nsid;
2511 
2512 	rc = nvme_ctrlr_identify_ns_async(ns);
2513 	if (rc) {
2514 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
2515 	}
2516 }
2517 
2518 static int
2519 nvme_ctrlr_identify_ns_async(struct spdk_nvme_ns *ns)
2520 {
2521 	struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr;
2522 	struct spdk_nvme_ns_data *nsdata;
2523 
2524 	nsdata = &ns->nsdata;
2525 
2526 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS,
2527 			     ctrlr->opts.admin_timeout_ms);
2528 	return nvme_ctrlr_cmd_identify(ns->ctrlr, SPDK_NVME_IDENTIFY_NS, 0, ns->id, 0,
2529 				       nsdata, sizeof(*nsdata),
2530 				       nvme_ctrlr_identify_ns_async_done, ns);
2531 }
2532 
2533 static int
2534 nvme_ctrlr_identify_namespaces(struct spdk_nvme_ctrlr *ctrlr)
2535 {
2536 	uint32_t nsid;
2537 	struct spdk_nvme_ns *ns;
2538 	int rc;
2539 
2540 	nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr);
2541 	ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
2542 	if (ns == NULL) {
2543 		/* No active NS, move on to the next state */
2544 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_ID_DESCS,
2545 				     ctrlr->opts.admin_timeout_ms);
2546 		return 0;
2547 	}
2548 
2549 	ns->ctrlr = ctrlr;
2550 	ns->id = nsid;
2551 
2552 	rc = nvme_ctrlr_identify_ns_async(ns);
2553 	if (rc) {
2554 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
2555 	}
2556 
2557 	return rc;
2558 }
2559 
2560 static int
2561 nvme_ctrlr_identify_namespaces_iocs_specific_next(struct spdk_nvme_ctrlr *ctrlr, uint32_t prev_nsid)
2562 {
2563 	uint32_t nsid;
2564 	struct spdk_nvme_ns *ns;
2565 	int rc;
2566 
2567 	if (!prev_nsid) {
2568 		nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr);
2569 	} else {
2570 		/* move on to the next active NS */
2571 		nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, prev_nsid);
2572 	}
2573 
2574 	ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
2575 	if (ns == NULL) {
2576 		/* No first/next active NS, move on to the next state */
2577 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES,
2578 				     ctrlr->opts.admin_timeout_ms);
2579 		return 0;
2580 	}
2581 
2582 	/* loop until we find a ns which has (supported) iocs specific data */
2583 	while (!nvme_ns_has_supported_iocs_specific_data(ns)) {
2584 		nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, ns->id);
2585 		ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
2586 		if (ns == NULL) {
2587 			/* no namespace with (supported) iocs specific data found */
2588 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES,
2589 					     ctrlr->opts.admin_timeout_ms);
2590 			return 0;
2591 		}
2592 	}
2593 
2594 	rc = nvme_ctrlr_identify_ns_iocs_specific_async(ns);
2595 	if (rc) {
2596 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
2597 	}
2598 
2599 	return rc;
2600 }
2601 
2602 static void
2603 nvme_ctrlr_identify_ns_zns_specific_async_done(void *arg, const struct spdk_nvme_cpl *cpl)
2604 {
2605 	struct spdk_nvme_ns *ns = (struct spdk_nvme_ns *)arg;
2606 	struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr;
2607 
2608 	if (spdk_nvme_cpl_is_error(cpl)) {
2609 		nvme_ns_free_zns_specific_data(ns);
2610 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
2611 		return;
2612 	}
2613 
2614 	nvme_ctrlr_identify_namespaces_iocs_specific_next(ctrlr, ns->id);
2615 }
2616 
2617 static int
2618 nvme_ctrlr_identify_ns_iocs_specific_async(struct spdk_nvme_ns *ns)
2619 {
2620 	struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr;
2621 	int rc;
2622 
2623 	switch (ns->csi) {
2624 	case SPDK_NVME_CSI_ZNS:
2625 		break;
2626 	default:
2627 		/*
2628 		 * This switch must handle all cases for which
2629 		 * nvme_ns_has_supported_iocs_specific_data() returns true,
2630 		 * other cases should never happen.
2631 		 */
2632 		assert(0);
2633 	}
2634 
2635 	assert(!ns->nsdata_zns);
2636 	ns->nsdata_zns = spdk_zmalloc(sizeof(*ns->nsdata_zns), 64, NULL, SPDK_ENV_SOCKET_ID_ANY,
2637 				      SPDK_MALLOC_SHARE);
2638 	if (!ns->nsdata_zns) {
2639 		return -ENOMEM;
2640 	}
2641 
2642 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS_IOCS_SPECIFIC,
2643 			     ctrlr->opts.admin_timeout_ms);
2644 	rc = nvme_ctrlr_cmd_identify(ns->ctrlr, SPDK_NVME_IDENTIFY_NS_IOCS, 0, ns->id, ns->csi,
2645 				     ns->nsdata_zns, sizeof(*ns->nsdata_zns),
2646 				     nvme_ctrlr_identify_ns_zns_specific_async_done, ns);
2647 	if (rc) {
2648 		nvme_ns_free_zns_specific_data(ns);
2649 	}
2650 
2651 	return rc;
2652 }
2653 
2654 static int
2655 nvme_ctrlr_identify_namespaces_iocs_specific(struct spdk_nvme_ctrlr *ctrlr)
2656 {
2657 	if (!nvme_ctrlr_multi_iocs_enabled(ctrlr)) {
2658 		/* Multi IOCS not supported/enabled, move on to the next state */
2659 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES,
2660 				     ctrlr->opts.admin_timeout_ms);
2661 		return 0;
2662 	}
2663 
2664 	return nvme_ctrlr_identify_namespaces_iocs_specific_next(ctrlr, 0);
2665 }
2666 
2667 static void
2668 nvme_ctrlr_identify_id_desc_async_done(void *arg, const struct spdk_nvme_cpl *cpl)
2669 {
2670 	struct spdk_nvme_ns *ns = (struct spdk_nvme_ns *)arg;
2671 	struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr;
2672 	uint32_t nsid;
2673 	int rc;
2674 
2675 	if (spdk_nvme_cpl_is_error(cpl)) {
2676 		/*
2677 		 * Many controllers claim to be compatible with NVMe 1.3, however,
2678 		 * they do not implement NS ID Desc List. Therefore, instead of setting
2679 		 * the state to NVME_CTRLR_STATE_ERROR, silently ignore the completion
2680 		 * error and move on to the next state.
2681 		 *
2682 		 * The proper way is to create a new quirk for controllers that violate
2683 		 * the NVMe 1.3 spec by not supporting NS ID Desc List.
2684 		 * (Re-using the NVME_QUIRK_IDENTIFY_CNS quirk is not possible, since
2685 		 * it is too generic and was added in order to handle controllers that
2686 		 * violate the NVMe 1.1 spec by not supporting ACTIVE LIST).
2687 		 */
2688 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC,
2689 				     ctrlr->opts.admin_timeout_ms);
2690 		return;
2691 	}
2692 
2693 	nvme_ns_set_id_desc_list_data(ns);
2694 
2695 	/* move on to the next active NS */
2696 	nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, ns->id);
2697 	ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
2698 	if (ns == NULL) {
2699 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC,
2700 				     ctrlr->opts.admin_timeout_ms);
2701 		return;
2702 	}
2703 
2704 	rc = nvme_ctrlr_identify_id_desc_async(ns);
2705 	if (rc) {
2706 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
2707 	}
2708 }
2709 
2710 static int
2711 nvme_ctrlr_identify_id_desc_async(struct spdk_nvme_ns *ns)
2712 {
2713 	struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr;
2714 
2715 	memset(ns->id_desc_list, 0, sizeof(ns->id_desc_list));
2716 
2717 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ID_DESCS,
2718 			     ctrlr->opts.admin_timeout_ms);
2719 	return nvme_ctrlr_cmd_identify(ns->ctrlr, SPDK_NVME_IDENTIFY_NS_ID_DESCRIPTOR_LIST,
2720 				       0, ns->id, 0, ns->id_desc_list, sizeof(ns->id_desc_list),
2721 				       nvme_ctrlr_identify_id_desc_async_done, ns);
2722 }
2723 
2724 static int
2725 nvme_ctrlr_identify_id_desc_namespaces(struct spdk_nvme_ctrlr *ctrlr)
2726 {
2727 	uint32_t nsid;
2728 	struct spdk_nvme_ns *ns;
2729 	int rc;
2730 
2731 	if ((ctrlr->vs.raw < SPDK_NVME_VERSION(1, 3, 0) &&
2732 	     !(ctrlr->cap.bits.css & SPDK_NVME_CAP_CSS_IOCS)) ||
2733 	    (ctrlr->quirks & NVME_QUIRK_IDENTIFY_CNS)) {
2734 		NVME_CTRLR_DEBUGLOG(ctrlr, "Version < 1.3; not attempting to retrieve NS ID Descriptor List\n");
2735 		/* NS ID Desc List not supported, move on to the next state */
2736 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC,
2737 				     ctrlr->opts.admin_timeout_ms);
2738 		return 0;
2739 	}
2740 
2741 	nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr);
2742 	ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
2743 	if (ns == NULL) {
2744 		/* No active NS, move on to the next state */
2745 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC,
2746 				     ctrlr->opts.admin_timeout_ms);
2747 		return 0;
2748 	}
2749 
2750 	rc = nvme_ctrlr_identify_id_desc_async(ns);
2751 	if (rc) {
2752 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
2753 	}
2754 
2755 	return rc;
2756 }
2757 
2758 static void
2759 nvme_ctrlr_update_nvmf_ioccsz(struct spdk_nvme_ctrlr *ctrlr)
2760 {
2761 	if (ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_RDMA ||
2762 	    ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_TCP ||
2763 	    ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_FC) {
2764 		if (ctrlr->cdata.nvmf_specific.ioccsz < 4) {
2765 			NVME_CTRLR_ERRLOG(ctrlr, "Incorrect IOCCSZ %u, the minimum value should be 4\n",
2766 					  ctrlr->cdata.nvmf_specific.ioccsz);
2767 			ctrlr->cdata.nvmf_specific.ioccsz = 4;
2768 			assert(0);
2769 		}
2770 		ctrlr->ioccsz_bytes = ctrlr->cdata.nvmf_specific.ioccsz * 16 - sizeof(struct spdk_nvme_cmd);
2771 		ctrlr->icdoff = ctrlr->cdata.nvmf_specific.icdoff;
2772 	}
2773 }
2774 
2775 static void
2776 nvme_ctrlr_set_num_queues_done(void *arg, const struct spdk_nvme_cpl *cpl)
2777 {
2778 	uint32_t cq_allocated, sq_allocated, min_allocated, i;
2779 	struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg;
2780 
2781 	if (spdk_nvme_cpl_is_error(cpl)) {
2782 		NVME_CTRLR_ERRLOG(ctrlr, "Set Features - Number of Queues failed!\n");
2783 		ctrlr->opts.num_io_queues = 0;
2784 	} else {
2785 		/*
2786 		 * Data in cdw0 is 0-based.
2787 		 * Lower 16-bits indicate number of submission queues allocated.
2788 		 * Upper 16-bits indicate number of completion queues allocated.
2789 		 */
2790 		sq_allocated = (cpl->cdw0 & 0xFFFF) + 1;
2791 		cq_allocated = (cpl->cdw0 >> 16) + 1;
2792 
2793 		/*
2794 		 * For 1:1 queue mapping, set number of allocated queues to be minimum of
2795 		 * submission and completion queues.
2796 		 */
2797 		min_allocated = spdk_min(sq_allocated, cq_allocated);
2798 
2799 		/* Set number of queues to be minimum of requested and actually allocated. */
2800 		ctrlr->opts.num_io_queues = spdk_min(min_allocated, ctrlr->opts.num_io_queues);
2801 	}
2802 
2803 	ctrlr->free_io_qids = spdk_bit_array_create(ctrlr->opts.num_io_queues + 1);
2804 	if (ctrlr->free_io_qids == NULL) {
2805 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
2806 		return;
2807 	}
2808 
2809 	/* Initialize list of free I/O queue IDs. QID 0 is the admin queue (implicitly allocated). */
2810 	for (i = 1; i <= ctrlr->opts.num_io_queues; i++) {
2811 		spdk_nvme_ctrlr_free_qid(ctrlr, i);
2812 	}
2813 
2814 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_ACTIVE_NS,
2815 			     ctrlr->opts.admin_timeout_ms);
2816 }
2817 
2818 static int
2819 nvme_ctrlr_set_num_queues(struct spdk_nvme_ctrlr *ctrlr)
2820 {
2821 	int rc;
2822 
2823 	if (ctrlr->opts.num_io_queues > SPDK_NVME_MAX_IO_QUEUES) {
2824 		NVME_CTRLR_NOTICELOG(ctrlr, "Limiting requested num_io_queues %u to max %d\n",
2825 				     ctrlr->opts.num_io_queues, SPDK_NVME_MAX_IO_QUEUES);
2826 		ctrlr->opts.num_io_queues = SPDK_NVME_MAX_IO_QUEUES;
2827 	} else if (ctrlr->opts.num_io_queues < 1) {
2828 		NVME_CTRLR_NOTICELOG(ctrlr, "Requested num_io_queues 0, increasing to 1\n");
2829 		ctrlr->opts.num_io_queues = 1;
2830 	}
2831 
2832 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_SET_NUM_QUEUES,
2833 			     ctrlr->opts.admin_timeout_ms);
2834 
2835 	rc = nvme_ctrlr_cmd_set_num_queues(ctrlr, ctrlr->opts.num_io_queues,
2836 					   nvme_ctrlr_set_num_queues_done, ctrlr);
2837 	if (rc != 0) {
2838 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
2839 		return rc;
2840 	}
2841 
2842 	return 0;
2843 }
2844 
2845 static void
2846 nvme_ctrlr_set_keep_alive_timeout_done(void *arg, const struct spdk_nvme_cpl *cpl)
2847 {
2848 	uint32_t keep_alive_interval_us;
2849 	struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg;
2850 
2851 	if (spdk_nvme_cpl_is_error(cpl)) {
2852 		if ((cpl->status.sct == SPDK_NVME_SCT_GENERIC) &&
2853 		    (cpl->status.sc == SPDK_NVME_SC_INVALID_FIELD)) {
2854 			NVME_CTRLR_DEBUGLOG(ctrlr, "Keep alive timeout Get Feature is not supported\n");
2855 		} else {
2856 			NVME_CTRLR_ERRLOG(ctrlr, "Keep alive timeout Get Feature failed: SC %x SCT %x\n",
2857 					  cpl->status.sc, cpl->status.sct);
2858 			ctrlr->opts.keep_alive_timeout_ms = 0;
2859 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
2860 			return;
2861 		}
2862 	} else {
2863 		if (ctrlr->opts.keep_alive_timeout_ms != cpl->cdw0) {
2864 			NVME_CTRLR_DEBUGLOG(ctrlr, "Controller adjusted keep alive timeout to %u ms\n",
2865 					    cpl->cdw0);
2866 		}
2867 
2868 		ctrlr->opts.keep_alive_timeout_ms = cpl->cdw0;
2869 	}
2870 
2871 	if (ctrlr->opts.keep_alive_timeout_ms == 0) {
2872 		ctrlr->keep_alive_interval_ticks = 0;
2873 	} else {
2874 		keep_alive_interval_us = ctrlr->opts.keep_alive_timeout_ms * 1000 / 2;
2875 
2876 		NVME_CTRLR_DEBUGLOG(ctrlr, "Sending keep alive every %u us\n", keep_alive_interval_us);
2877 
2878 		ctrlr->keep_alive_interval_ticks = (keep_alive_interval_us * spdk_get_ticks_hz()) /
2879 						   UINT64_C(1000000);
2880 
2881 		/* Schedule the first Keep Alive to be sent as soon as possible. */
2882 		ctrlr->next_keep_alive_tick = spdk_get_ticks();
2883 	}
2884 
2885 	if (spdk_nvme_ctrlr_is_discovery(ctrlr)) {
2886 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE);
2887 	} else {
2888 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC,
2889 				     ctrlr->opts.admin_timeout_ms);
2890 	}
2891 }
2892 
2893 static int
2894 nvme_ctrlr_set_keep_alive_timeout(struct spdk_nvme_ctrlr *ctrlr)
2895 {
2896 	int rc;
2897 
2898 	if (ctrlr->opts.keep_alive_timeout_ms == 0) {
2899 		if (spdk_nvme_ctrlr_is_discovery(ctrlr)) {
2900 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE);
2901 		} else {
2902 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC,
2903 					     ctrlr->opts.admin_timeout_ms);
2904 		}
2905 		return 0;
2906 	}
2907 
2908 	/* Note: Discovery controller identify data does not populate KAS according to spec. */
2909 	if (!spdk_nvme_ctrlr_is_discovery(ctrlr) && ctrlr->cdata.kas == 0) {
2910 		NVME_CTRLR_DEBUGLOG(ctrlr, "Controller KAS is 0 - not enabling Keep Alive\n");
2911 		ctrlr->opts.keep_alive_timeout_ms = 0;
2912 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC,
2913 				     ctrlr->opts.admin_timeout_ms);
2914 		return 0;
2915 	}
2916 
2917 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_KEEP_ALIVE_TIMEOUT,
2918 			     ctrlr->opts.admin_timeout_ms);
2919 
2920 	/* Retrieve actual keep alive timeout, since the controller may have adjusted it. */
2921 	rc = spdk_nvme_ctrlr_cmd_get_feature(ctrlr, SPDK_NVME_FEAT_KEEP_ALIVE_TIMER, 0, NULL, 0,
2922 					     nvme_ctrlr_set_keep_alive_timeout_done, ctrlr);
2923 	if (rc != 0) {
2924 		NVME_CTRLR_ERRLOG(ctrlr, "Keep alive timeout Get Feature failed: %d\n", rc);
2925 		ctrlr->opts.keep_alive_timeout_ms = 0;
2926 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
2927 		return rc;
2928 	}
2929 
2930 	return 0;
2931 }
2932 
2933 static void
2934 nvme_ctrlr_set_host_id_done(void *arg, const struct spdk_nvme_cpl *cpl)
2935 {
2936 	struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg;
2937 
2938 	if (spdk_nvme_cpl_is_error(cpl)) {
2939 		/*
2940 		 * Treat Set Features - Host ID failure as non-fatal, since the Host ID feature
2941 		 * is optional.
2942 		 */
2943 		NVME_CTRLR_WARNLOG(ctrlr, "Set Features - Host ID failed: SC 0x%x SCT 0x%x\n",
2944 				   cpl->status.sc, cpl->status.sct);
2945 	} else {
2946 		NVME_CTRLR_DEBUGLOG(ctrlr, "Set Features - Host ID was successful\n");
2947 	}
2948 
2949 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE);
2950 }
2951 
2952 static int
2953 nvme_ctrlr_set_host_id(struct spdk_nvme_ctrlr *ctrlr)
2954 {
2955 	uint8_t *host_id;
2956 	uint32_t host_id_size;
2957 	int rc;
2958 
2959 	if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) {
2960 		/*
2961 		 * NVMe-oF sends the host ID during Connect and doesn't allow
2962 		 * Set Features - Host Identifier after Connect, so we don't need to do anything here.
2963 		 */
2964 		NVME_CTRLR_DEBUGLOG(ctrlr, "NVMe-oF transport - not sending Set Features - Host ID\n");
2965 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE);
2966 		return 0;
2967 	}
2968 
2969 	if (ctrlr->cdata.ctratt.host_id_exhid_supported) {
2970 		NVME_CTRLR_DEBUGLOG(ctrlr, "Using 128-bit extended host identifier\n");
2971 		host_id = ctrlr->opts.extended_host_id;
2972 		host_id_size = sizeof(ctrlr->opts.extended_host_id);
2973 	} else {
2974 		NVME_CTRLR_DEBUGLOG(ctrlr, "Using 64-bit host identifier\n");
2975 		host_id = ctrlr->opts.host_id;
2976 		host_id_size = sizeof(ctrlr->opts.host_id);
2977 	}
2978 
2979 	/* If the user specified an all-zeroes host identifier, don't send the command. */
2980 	if (spdk_mem_all_zero(host_id, host_id_size)) {
2981 		NVME_CTRLR_DEBUGLOG(ctrlr, "User did not specify host ID - not sending Set Features - Host ID\n");
2982 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE);
2983 		return 0;
2984 	}
2985 
2986 	SPDK_LOGDUMP(nvme, "host_id", host_id, host_id_size);
2987 
2988 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_HOST_ID,
2989 			     ctrlr->opts.admin_timeout_ms);
2990 
2991 	rc = nvme_ctrlr_cmd_set_host_id(ctrlr, host_id, host_id_size, nvme_ctrlr_set_host_id_done, ctrlr);
2992 	if (rc != 0) {
2993 		NVME_CTRLR_ERRLOG(ctrlr, "Set Features - Host ID failed: %d\n", rc);
2994 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
2995 		return rc;
2996 	}
2997 
2998 	return 0;
2999 }
3000 
3001 void
3002 nvme_ctrlr_update_namespaces(struct spdk_nvme_ctrlr *ctrlr)
3003 {
3004 	uint32_t nsid;
3005 	struct spdk_nvme_ns *ns;
3006 
3007 	for (nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr);
3008 	     nsid != 0; nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid)) {
3009 		ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
3010 		nvme_ns_construct(ns, nsid, ctrlr);
3011 	}
3012 }
3013 
3014 static int
3015 nvme_ctrlr_clear_changed_ns_log(struct spdk_nvme_ctrlr *ctrlr)
3016 {
3017 	struct nvme_completion_poll_status	*status;
3018 	int		rc = -ENOMEM;
3019 	char		*buffer = NULL;
3020 	uint32_t	nsid;
3021 	size_t		buf_size = (SPDK_NVME_MAX_CHANGED_NAMESPACES * sizeof(uint32_t));
3022 
3023 	buffer = spdk_dma_zmalloc(buf_size, 4096, NULL);
3024 	if (!buffer) {
3025 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate buffer for getting "
3026 				  "changed ns log.\n");
3027 		return rc;
3028 	}
3029 
3030 	status = calloc(1, sizeof(*status));
3031 	if (!status) {
3032 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n");
3033 		goto free_buffer;
3034 	}
3035 
3036 	rc = spdk_nvme_ctrlr_cmd_get_log_page(ctrlr,
3037 					      SPDK_NVME_LOG_CHANGED_NS_LIST,
3038 					      SPDK_NVME_GLOBAL_NS_TAG,
3039 					      buffer, buf_size, 0,
3040 					      nvme_completion_poll_cb, status);
3041 
3042 	if (rc) {
3043 		NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_cmd_get_log_page() failed: rc=%d\n", rc);
3044 		free(status);
3045 		goto free_buffer;
3046 	}
3047 
3048 	rc = nvme_wait_for_completion_timeout(ctrlr->adminq, status,
3049 					      ctrlr->opts.admin_timeout_ms * 1000);
3050 	if (!status->timed_out) {
3051 		free(status);
3052 	}
3053 
3054 	if (rc) {
3055 		NVME_CTRLR_ERRLOG(ctrlr, "wait for spdk_nvme_ctrlr_cmd_get_log_page failed: rc=%d\n", rc);
3056 		goto free_buffer;
3057 	}
3058 
3059 	/* only check the case of overflow. */
3060 	nsid = from_le32(buffer);
3061 	if (nsid == 0xffffffffu) {
3062 		NVME_CTRLR_WARNLOG(ctrlr, "changed ns log overflowed.\n");
3063 	}
3064 
3065 free_buffer:
3066 	spdk_dma_free(buffer);
3067 	return rc;
3068 }
3069 
3070 void
3071 nvme_ctrlr_process_async_event(struct spdk_nvme_ctrlr *ctrlr,
3072 			       const struct spdk_nvme_cpl *cpl)
3073 {
3074 	union spdk_nvme_async_event_completion event;
3075 	struct spdk_nvme_ctrlr_process *active_proc;
3076 	int rc;
3077 
3078 	event.raw = cpl->cdw0;
3079 
3080 	if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) &&
3081 	    (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGED)) {
3082 		nvme_ctrlr_clear_changed_ns_log(ctrlr);
3083 
3084 		rc = nvme_ctrlr_identify_active_ns(ctrlr);
3085 		if (rc) {
3086 			return;
3087 		}
3088 		nvme_ctrlr_update_namespaces(ctrlr);
3089 		nvme_io_msg_ctrlr_update(ctrlr);
3090 	}
3091 
3092 	if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) &&
3093 	    (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_ANA_CHANGE)) {
3094 		if (!ctrlr->opts.disable_read_ana_log_page) {
3095 			rc = nvme_ctrlr_update_ana_log_page(ctrlr);
3096 			if (rc) {
3097 				return;
3098 			}
3099 			nvme_ctrlr_parse_ana_log_page(ctrlr, nvme_ctrlr_update_ns_ana_states,
3100 						      ctrlr);
3101 		}
3102 	}
3103 
3104 	active_proc = nvme_ctrlr_get_current_process(ctrlr);
3105 	if (active_proc && active_proc->aer_cb_fn) {
3106 		active_proc->aer_cb_fn(active_proc->aer_cb_arg, cpl);
3107 	}
3108 }
3109 
3110 static void
3111 nvme_ctrlr_queue_async_event(struct spdk_nvme_ctrlr *ctrlr,
3112 			     const struct spdk_nvme_cpl *cpl)
3113 {
3114 	struct  spdk_nvme_ctrlr_aer_completion_list *nvme_event;
3115 	struct spdk_nvme_ctrlr_process *proc;
3116 
3117 	/* Add async event to each process objects event list */
3118 	TAILQ_FOREACH(proc, &ctrlr->active_procs, tailq) {
3119 		/* Must be shared memory so other processes can access */
3120 		nvme_event = spdk_zmalloc(sizeof(*nvme_event), 0, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE);
3121 		if (!nvme_event) {
3122 			NVME_CTRLR_ERRLOG(ctrlr, "Alloc nvme event failed, ignore the event\n");
3123 			return;
3124 		}
3125 		nvme_event->cpl = *cpl;
3126 
3127 		STAILQ_INSERT_TAIL(&proc->async_events, nvme_event, link);
3128 	}
3129 }
3130 
3131 void
3132 nvme_ctrlr_complete_queued_async_events(struct spdk_nvme_ctrlr *ctrlr)
3133 {
3134 	struct  spdk_nvme_ctrlr_aer_completion_list  *nvme_event, *nvme_event_tmp;
3135 	struct spdk_nvme_ctrlr_process	*active_proc;
3136 
3137 	active_proc = nvme_ctrlr_get_current_process(ctrlr);
3138 
3139 	STAILQ_FOREACH_SAFE(nvme_event, &active_proc->async_events, link, nvme_event_tmp) {
3140 		STAILQ_REMOVE(&active_proc->async_events, nvme_event,
3141 			      spdk_nvme_ctrlr_aer_completion_list, link);
3142 		nvme_ctrlr_process_async_event(ctrlr, &nvme_event->cpl);
3143 		spdk_free(nvme_event);
3144 
3145 	}
3146 }
3147 
3148 static void
3149 nvme_ctrlr_async_event_cb(void *arg, const struct spdk_nvme_cpl *cpl)
3150 {
3151 	struct nvme_async_event_request	*aer = arg;
3152 	struct spdk_nvme_ctrlr		*ctrlr = aer->ctrlr;
3153 
3154 	if (cpl->status.sct == SPDK_NVME_SCT_GENERIC &&
3155 	    cpl->status.sc == SPDK_NVME_SC_ABORTED_SQ_DELETION) {
3156 		/*
3157 		 *  This is simulated when controller is being shut down, to
3158 		 *  effectively abort outstanding asynchronous event requests
3159 		 *  and make sure all memory is freed.  Do not repost the
3160 		 *  request in this case.
3161 		 */
3162 		return;
3163 	}
3164 
3165 	if (cpl->status.sct == SPDK_NVME_SCT_COMMAND_SPECIFIC &&
3166 	    cpl->status.sc == SPDK_NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED) {
3167 		/*
3168 		 *  SPDK will only send as many AERs as the device says it supports,
3169 		 *  so this status code indicates an out-of-spec device.  Do not repost
3170 		 *  the request in this case.
3171 		 */
3172 		NVME_CTRLR_ERRLOG(ctrlr, "Controller appears out-of-spec for asynchronous event request\n"
3173 				  "handling.  Do not repost this AER.\n");
3174 		return;
3175 	}
3176 
3177 	/* Add the events to the list */
3178 	nvme_ctrlr_queue_async_event(ctrlr, cpl);
3179 
3180 	/* If the ctrlr was removed or in the destruct state, we should not send aer again */
3181 	if (ctrlr->is_removed || ctrlr->is_destructed) {
3182 		return;
3183 	}
3184 
3185 	/*
3186 	 * Repost another asynchronous event request to replace the one
3187 	 *  that just completed.
3188 	 */
3189 	if (nvme_ctrlr_construct_and_submit_aer(ctrlr, aer)) {
3190 		/*
3191 		 * We can't do anything to recover from a failure here,
3192 		 * so just print a warning message and leave the AER unsubmitted.
3193 		 */
3194 		NVME_CTRLR_ERRLOG(ctrlr, "resubmitting AER failed!\n");
3195 	}
3196 }
3197 
3198 static int
3199 nvme_ctrlr_construct_and_submit_aer(struct spdk_nvme_ctrlr *ctrlr,
3200 				    struct nvme_async_event_request *aer)
3201 {
3202 	struct nvme_request *req;
3203 
3204 	aer->ctrlr = ctrlr;
3205 	req = nvme_allocate_request_null(ctrlr->adminq, nvme_ctrlr_async_event_cb, aer);
3206 	aer->req = req;
3207 	if (req == NULL) {
3208 		return -1;
3209 	}
3210 
3211 	req->cmd.opc = SPDK_NVME_OPC_ASYNC_EVENT_REQUEST;
3212 	return nvme_ctrlr_submit_admin_request(ctrlr, req);
3213 }
3214 
3215 static void
3216 nvme_ctrlr_configure_aer_done(void *arg, const struct spdk_nvme_cpl *cpl)
3217 {
3218 	struct nvme_async_event_request		*aer;
3219 	int					rc;
3220 	uint32_t				i;
3221 	struct spdk_nvme_ctrlr *ctrlr =	(struct spdk_nvme_ctrlr *)arg;
3222 
3223 	if (spdk_nvme_cpl_is_error(cpl)) {
3224 		NVME_CTRLR_NOTICELOG(ctrlr, "nvme_ctrlr_configure_aer failed!\n");
3225 		ctrlr->num_aers = 0;
3226 	} else {
3227 		/* aerl is a zero-based value, so we need to add 1 here. */
3228 		ctrlr->num_aers = spdk_min(NVME_MAX_ASYNC_EVENTS, (ctrlr->cdata.aerl + 1));
3229 	}
3230 
3231 	for (i = 0; i < ctrlr->num_aers; i++) {
3232 		aer = &ctrlr->aer[i];
3233 		rc = nvme_ctrlr_construct_and_submit_aer(ctrlr, aer);
3234 		if (rc) {
3235 			NVME_CTRLR_ERRLOG(ctrlr, "nvme_ctrlr_construct_and_submit_aer failed!\n");
3236 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
3237 			return;
3238 		}
3239 	}
3240 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT, ctrlr->opts.admin_timeout_ms);
3241 }
3242 
3243 static int
3244 nvme_ctrlr_configure_aer(struct spdk_nvme_ctrlr *ctrlr)
3245 {
3246 	union spdk_nvme_feat_async_event_configuration	config;
3247 	int						rc;
3248 
3249 	config.raw = 0;
3250 
3251 	if (spdk_nvme_ctrlr_is_discovery(ctrlr)) {
3252 		config.bits.discovery_log_change_notice = 1;
3253 	} else {
3254 		config.bits.crit_warn.bits.available_spare = 1;
3255 		config.bits.crit_warn.bits.temperature = 1;
3256 		config.bits.crit_warn.bits.device_reliability = 1;
3257 		config.bits.crit_warn.bits.read_only = 1;
3258 		config.bits.crit_warn.bits.volatile_memory_backup = 1;
3259 
3260 		if (ctrlr->vs.raw >= SPDK_NVME_VERSION(1, 2, 0)) {
3261 			if (ctrlr->cdata.oaes.ns_attribute_notices) {
3262 				config.bits.ns_attr_notice = 1;
3263 			}
3264 			if (ctrlr->cdata.oaes.fw_activation_notices) {
3265 				config.bits.fw_activation_notice = 1;
3266 			}
3267 			if (ctrlr->cdata.oaes.ana_change_notices) {
3268 				config.bits.ana_change_notice = 1;
3269 			}
3270 		}
3271 		if (ctrlr->vs.raw >= SPDK_NVME_VERSION(1, 3, 0) && ctrlr->cdata.lpa.telemetry) {
3272 			config.bits.telemetry_log_notice = 1;
3273 		}
3274 	}
3275 
3276 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_CONFIGURE_AER,
3277 			     ctrlr->opts.admin_timeout_ms);
3278 
3279 	rc = nvme_ctrlr_cmd_set_async_event_config(ctrlr, config,
3280 			nvme_ctrlr_configure_aer_done,
3281 			ctrlr);
3282 	if (rc != 0) {
3283 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
3284 		return rc;
3285 	}
3286 
3287 	return 0;
3288 }
3289 
3290 struct spdk_nvme_ctrlr_process *
3291 nvme_ctrlr_get_process(struct spdk_nvme_ctrlr *ctrlr, pid_t pid)
3292 {
3293 	struct spdk_nvme_ctrlr_process	*active_proc;
3294 
3295 	TAILQ_FOREACH(active_proc, &ctrlr->active_procs, tailq) {
3296 		if (active_proc->pid == pid) {
3297 			return active_proc;
3298 		}
3299 	}
3300 
3301 	return NULL;
3302 }
3303 
3304 struct spdk_nvme_ctrlr_process *
3305 nvme_ctrlr_get_current_process(struct spdk_nvme_ctrlr *ctrlr)
3306 {
3307 	return nvme_ctrlr_get_process(ctrlr, getpid());
3308 }
3309 
3310 /**
3311  * This function will be called when a process is using the controller.
3312  *  1. For the primary process, it is called when constructing the controller.
3313  *  2. For the secondary process, it is called at probing the controller.
3314  * Note: will check whether the process is already added for the same process.
3315  */
3316 int
3317 nvme_ctrlr_add_process(struct spdk_nvme_ctrlr *ctrlr, void *devhandle)
3318 {
3319 	struct spdk_nvme_ctrlr_process	*ctrlr_proc;
3320 	pid_t				pid = getpid();
3321 
3322 	/* Check whether the process is already added or not */
3323 	if (nvme_ctrlr_get_process(ctrlr, pid)) {
3324 		return 0;
3325 	}
3326 
3327 	/* Initialize the per process properties for this ctrlr */
3328 	ctrlr_proc = spdk_zmalloc(sizeof(struct spdk_nvme_ctrlr_process),
3329 				  64, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE);
3330 	if (ctrlr_proc == NULL) {
3331 		NVME_CTRLR_ERRLOG(ctrlr, "failed to allocate memory to track the process props\n");
3332 
3333 		return -1;
3334 	}
3335 
3336 	ctrlr_proc->is_primary = spdk_process_is_primary();
3337 	ctrlr_proc->pid = pid;
3338 	STAILQ_INIT(&ctrlr_proc->active_reqs);
3339 	ctrlr_proc->devhandle = devhandle;
3340 	ctrlr_proc->ref = 0;
3341 	TAILQ_INIT(&ctrlr_proc->allocated_io_qpairs);
3342 	STAILQ_INIT(&ctrlr_proc->async_events);
3343 
3344 	TAILQ_INSERT_TAIL(&ctrlr->active_procs, ctrlr_proc, tailq);
3345 
3346 	return 0;
3347 }
3348 
3349 /**
3350  * This function will be called when the process detaches the controller.
3351  * Note: the ctrlr_lock must be held when calling this function.
3352  */
3353 static void
3354 nvme_ctrlr_remove_process(struct spdk_nvme_ctrlr *ctrlr,
3355 			  struct spdk_nvme_ctrlr_process *proc)
3356 {
3357 	struct spdk_nvme_qpair	*qpair, *tmp_qpair;
3358 
3359 	assert(STAILQ_EMPTY(&proc->active_reqs));
3360 
3361 	TAILQ_FOREACH_SAFE(qpair, &proc->allocated_io_qpairs, per_process_tailq, tmp_qpair) {
3362 		spdk_nvme_ctrlr_free_io_qpair(qpair);
3363 	}
3364 
3365 	TAILQ_REMOVE(&ctrlr->active_procs, proc, tailq);
3366 
3367 	if (ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
3368 		spdk_pci_device_detach(proc->devhandle);
3369 	}
3370 
3371 	spdk_free(proc);
3372 }
3373 
3374 /**
3375  * This function will be called when the process exited unexpectedly
3376  *  in order to free any incomplete nvme request, allocated IO qpairs
3377  *  and allocated memory.
3378  * Note: the ctrlr_lock must be held when calling this function.
3379  */
3380 static void
3381 nvme_ctrlr_cleanup_process(struct spdk_nvme_ctrlr_process *proc)
3382 {
3383 	struct nvme_request	*req, *tmp_req;
3384 	struct spdk_nvme_qpair	*qpair, *tmp_qpair;
3385 	struct spdk_nvme_ctrlr_aer_completion_list *event;
3386 
3387 	STAILQ_FOREACH_SAFE(req, &proc->active_reqs, stailq, tmp_req) {
3388 		STAILQ_REMOVE(&proc->active_reqs, req, nvme_request, stailq);
3389 
3390 		assert(req->pid == proc->pid);
3391 
3392 		nvme_free_request(req);
3393 	}
3394 
3395 	/* Remove async event from each process objects event list */
3396 	while (!STAILQ_EMPTY(&proc->async_events)) {
3397 		event = STAILQ_FIRST(&proc->async_events);
3398 		STAILQ_REMOVE_HEAD(&proc->async_events, link);
3399 		spdk_free(event);
3400 	}
3401 
3402 	TAILQ_FOREACH_SAFE(qpair, &proc->allocated_io_qpairs, per_process_tailq, tmp_qpair) {
3403 		TAILQ_REMOVE(&proc->allocated_io_qpairs, qpair, per_process_tailq);
3404 
3405 		/*
3406 		 * The process may have been killed while some qpairs were in their
3407 		 *  completion context.  Clear that flag here to allow these IO
3408 		 *  qpairs to be deleted.
3409 		 */
3410 		qpair->in_completion_context = 0;
3411 
3412 		qpair->no_deletion_notification_needed = 1;
3413 
3414 		spdk_nvme_ctrlr_free_io_qpair(qpair);
3415 	}
3416 
3417 	spdk_free(proc);
3418 }
3419 
3420 /**
3421  * This function will be called when destructing the controller.
3422  *  1. There is no more admin request on this controller.
3423  *  2. Clean up any left resource allocation when its associated process is gone.
3424  */
3425 void
3426 nvme_ctrlr_free_processes(struct spdk_nvme_ctrlr *ctrlr)
3427 {
3428 	struct spdk_nvme_ctrlr_process	*active_proc, *tmp;
3429 
3430 	/* Free all the processes' properties and make sure no pending admin IOs */
3431 	TAILQ_FOREACH_SAFE(active_proc, &ctrlr->active_procs, tailq, tmp) {
3432 		TAILQ_REMOVE(&ctrlr->active_procs, active_proc, tailq);
3433 
3434 		assert(STAILQ_EMPTY(&active_proc->active_reqs));
3435 
3436 		spdk_free(active_proc);
3437 	}
3438 }
3439 
3440 /**
3441  * This function will be called when any other process attaches or
3442  *  detaches the controller in order to cleanup those unexpectedly
3443  *  terminated processes.
3444  * Note: the ctrlr_lock must be held when calling this function.
3445  */
3446 static int
3447 nvme_ctrlr_remove_inactive_proc(struct spdk_nvme_ctrlr *ctrlr)
3448 {
3449 	struct spdk_nvme_ctrlr_process	*active_proc, *tmp;
3450 	int				active_proc_count = 0;
3451 
3452 	TAILQ_FOREACH_SAFE(active_proc, &ctrlr->active_procs, tailq, tmp) {
3453 		if ((kill(active_proc->pid, 0) == -1) && (errno == ESRCH)) {
3454 			NVME_CTRLR_ERRLOG(ctrlr, "process %d terminated unexpected\n", active_proc->pid);
3455 
3456 			TAILQ_REMOVE(&ctrlr->active_procs, active_proc, tailq);
3457 
3458 			nvme_ctrlr_cleanup_process(active_proc);
3459 		} else {
3460 			active_proc_count++;
3461 		}
3462 	}
3463 
3464 	return active_proc_count;
3465 }
3466 
3467 void
3468 nvme_ctrlr_proc_get_ref(struct spdk_nvme_ctrlr *ctrlr)
3469 {
3470 	struct spdk_nvme_ctrlr_process	*active_proc;
3471 
3472 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
3473 
3474 	nvme_ctrlr_remove_inactive_proc(ctrlr);
3475 
3476 	active_proc = nvme_ctrlr_get_current_process(ctrlr);
3477 	if (active_proc) {
3478 		active_proc->ref++;
3479 	}
3480 
3481 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
3482 }
3483 
3484 void
3485 nvme_ctrlr_proc_put_ref(struct spdk_nvme_ctrlr *ctrlr)
3486 {
3487 	struct spdk_nvme_ctrlr_process	*active_proc;
3488 	int				proc_count;
3489 
3490 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
3491 
3492 	proc_count = nvme_ctrlr_remove_inactive_proc(ctrlr);
3493 
3494 	active_proc = nvme_ctrlr_get_current_process(ctrlr);
3495 	if (active_proc) {
3496 		active_proc->ref--;
3497 		assert(active_proc->ref >= 0);
3498 
3499 		/*
3500 		 * The last active process will be removed at the end of
3501 		 * the destruction of the controller.
3502 		 */
3503 		if (active_proc->ref == 0 && proc_count != 1) {
3504 			nvme_ctrlr_remove_process(ctrlr, active_proc);
3505 		}
3506 	}
3507 
3508 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
3509 }
3510 
3511 int
3512 nvme_ctrlr_get_ref_count(struct spdk_nvme_ctrlr *ctrlr)
3513 {
3514 	struct spdk_nvme_ctrlr_process	*active_proc;
3515 	int				ref = 0;
3516 
3517 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
3518 
3519 	nvme_ctrlr_remove_inactive_proc(ctrlr);
3520 
3521 	TAILQ_FOREACH(active_proc, &ctrlr->active_procs, tailq) {
3522 		ref += active_proc->ref;
3523 	}
3524 
3525 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
3526 
3527 	return ref;
3528 }
3529 
3530 /**
3531  *  Get the PCI device handle which is only visible to its associated process.
3532  */
3533 struct spdk_pci_device *
3534 nvme_ctrlr_proc_get_devhandle(struct spdk_nvme_ctrlr *ctrlr)
3535 {
3536 	struct spdk_nvme_ctrlr_process	*active_proc;
3537 	struct spdk_pci_device		*devhandle = NULL;
3538 
3539 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
3540 
3541 	active_proc = nvme_ctrlr_get_current_process(ctrlr);
3542 	if (active_proc) {
3543 		devhandle = active_proc->devhandle;
3544 	}
3545 
3546 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
3547 
3548 	return devhandle;
3549 }
3550 
3551 static void
3552 nvme_ctrlr_process_init_vs_done(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl)
3553 {
3554 	struct spdk_nvme_ctrlr *ctrlr = ctx;
3555 
3556 	if (spdk_nvme_cpl_is_error(cpl)) {
3557 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the VS register\n");
3558 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
3559 		return;
3560 	}
3561 
3562 	assert(value <= UINT32_MAX);
3563 	ctrlr->vs.raw = (uint32_t)value;
3564 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READ_CAP, NVME_TIMEOUT_INFINITE);
3565 }
3566 
3567 static void
3568 nvme_ctrlr_process_init_cap_done(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl)
3569 {
3570 	struct spdk_nvme_ctrlr *ctrlr = ctx;
3571 
3572 	if (spdk_nvme_cpl_is_error(cpl)) {
3573 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CAP register\n");
3574 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
3575 		return;
3576 	}
3577 
3578 	ctrlr->cap.raw = value;
3579 	nvme_ctrlr_init_cap(ctrlr);
3580 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CHECK_EN, NVME_TIMEOUT_INFINITE);
3581 }
3582 
3583 static void
3584 nvme_ctrlr_process_init_check_en(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl)
3585 {
3586 	struct spdk_nvme_ctrlr *ctrlr = ctx;
3587 	enum nvme_ctrlr_state state;
3588 
3589 	if (spdk_nvme_cpl_is_error(cpl)) {
3590 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CC register\n");
3591 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
3592 		return;
3593 	}
3594 
3595 	assert(value <= UINT32_MAX);
3596 	ctrlr->process_init_cc.raw = (uint32_t)value;
3597 
3598 	if (ctrlr->process_init_cc.bits.en) {
3599 		NVME_CTRLR_DEBUGLOG(ctrlr, "CC.EN = 1\n");
3600 		state = NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1;
3601 	} else {
3602 		state = NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0;
3603 	}
3604 
3605 	nvme_ctrlr_set_state(ctrlr, state, nvme_ctrlr_get_ready_timeout(ctrlr));
3606 }
3607 
3608 static void
3609 nvme_ctrlr_process_init_set_en_0(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl)
3610 {
3611 	struct spdk_nvme_ctrlr *ctrlr = ctx;
3612 
3613 	if (spdk_nvme_cpl_is_error(cpl)) {
3614 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to write the CC register\n");
3615 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
3616 		return;
3617 	}
3618 
3619 	/*
3620 	 * Wait 2.5 seconds before accessing PCI registers.
3621 	 * Not using sleep() to avoid blocking other controller's initialization.
3622 	 */
3623 	if (ctrlr->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY) {
3624 		NVME_CTRLR_DEBUGLOG(ctrlr, "Applying quirk: delay 2.5 seconds before reading registers\n");
3625 		ctrlr->sleep_timeout_tsc = spdk_get_ticks() + (2500 * spdk_get_ticks_hz() / 1000);
3626 	}
3627 
3628 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0,
3629 			     nvme_ctrlr_get_ready_timeout(ctrlr));
3630 }
3631 
3632 static void
3633 nvme_ctrlr_process_init_set_en_0_read_cc(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl)
3634 {
3635 	struct spdk_nvme_ctrlr *ctrlr = ctx;
3636 	union spdk_nvme_cc_register cc;
3637 	int rc;
3638 
3639 	if (spdk_nvme_cpl_is_error(cpl)) {
3640 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CC register\n");
3641 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
3642 		return;
3643 	}
3644 
3645 	assert(value <= UINT32_MAX);
3646 	cc.raw = (uint32_t)value;
3647 	cc.bits.en = 0;
3648 	ctrlr->process_init_cc.raw = cc.raw;
3649 
3650 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_EN_0_WAIT_FOR_CC,
3651 			     nvme_ctrlr_get_ready_timeout(ctrlr));
3652 
3653 	rc = nvme_ctrlr_set_cc_async(ctrlr, cc.raw, nvme_ctrlr_process_init_set_en_0, ctrlr);
3654 	if (rc != 0) {
3655 		NVME_CTRLR_ERRLOG(ctrlr, "set_cc() failed\n");
3656 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
3657 	}
3658 }
3659 
3660 static void
3661 nvme_ctrlr_process_init_wait_for_ready_1(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl)
3662 {
3663 	struct spdk_nvme_ctrlr *ctrlr = ctx;
3664 	union spdk_nvme_csts_register csts;
3665 
3666 	if (spdk_nvme_cpl_is_error(cpl)) {
3667 		/* While a device is resetting, it may be unable to service MMIO reads
3668 		 * temporarily. Allow for this case.
3669 		 */
3670 		if (!ctrlr->is_failed && ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE) {
3671 			NVME_CTRLR_DEBUGLOG(ctrlr, "Failed to read the CSTS register\n");
3672 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1,
3673 					     NVME_TIMEOUT_KEEP_EXISTING);
3674 		} else {
3675 			NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CSTS register\n");
3676 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
3677 		}
3678 
3679 		return;
3680 	}
3681 
3682 	assert(value <= UINT32_MAX);
3683 	csts.raw = (uint32_t)value;
3684 	if (csts.bits.rdy == 1) {
3685 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_EN_0,
3686 				     nvme_ctrlr_get_ready_timeout(ctrlr));
3687 	} else {
3688 		NVME_CTRLR_DEBUGLOG(ctrlr, "CC.EN = 1 && CSTS.RDY = 0 - waiting for reset to complete\n");
3689 		nvme_ctrlr_set_state_quiet(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1,
3690 					   NVME_TIMEOUT_KEEP_EXISTING);
3691 	}
3692 }
3693 
3694 static void
3695 nvme_ctrlr_process_init_wait_for_ready_0(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl)
3696 {
3697 	struct spdk_nvme_ctrlr *ctrlr = ctx;
3698 	union spdk_nvme_csts_register csts;
3699 
3700 	if (spdk_nvme_cpl_is_error(cpl)) {
3701 		/* While a device is resetting, it may be unable to service MMIO reads
3702 		 * temporarily. Allow for this case.
3703 		 */
3704 		if (!ctrlr->is_failed && ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE) {
3705 			NVME_CTRLR_DEBUGLOG(ctrlr, "Failed to read the CSTS register\n");
3706 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0,
3707 					     NVME_TIMEOUT_KEEP_EXISTING);
3708 		} else {
3709 			NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CSTS register\n");
3710 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
3711 		}
3712 
3713 		return;
3714 	}
3715 
3716 	assert(value <= UINT32_MAX);
3717 	csts.raw = (uint32_t)value;
3718 	if (csts.bits.rdy == 0) {
3719 		NVME_CTRLR_DEBUGLOG(ctrlr, "CC.EN = 0 && CSTS.RDY = 0\n");
3720 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE,
3721 				     nvme_ctrlr_get_ready_timeout(ctrlr));
3722 		/*
3723 		 * Delay 100us before setting CC.EN = 1.  Some NVMe SSDs miss CC.EN getting
3724 		 *  set to 1 if it is too soon after CSTS.RDY is reported as 0.
3725 		 */
3726 		spdk_delay_us(100);
3727 	} else {
3728 		nvme_ctrlr_set_state_quiet(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0,
3729 					   NVME_TIMEOUT_KEEP_EXISTING);
3730 	}
3731 }
3732 
3733 static void
3734 nvme_ctrlr_process_init_enable_wait_for_ready_1(void *ctx, uint64_t value,
3735 		const struct spdk_nvme_cpl *cpl)
3736 {
3737 	struct spdk_nvme_ctrlr *ctrlr = ctx;
3738 	union spdk_nvme_csts_register csts;
3739 
3740 	if (spdk_nvme_cpl_is_error(cpl)) {
3741 		/* While a device is resetting, it may be unable to service MMIO reads
3742 		 * temporarily. Allow for this case.
3743 		 */
3744 		if (!ctrlr->is_failed && ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE) {
3745 			NVME_CTRLR_DEBUGLOG(ctrlr, "Failed to read the CSTS register\n");
3746 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1,
3747 					     NVME_TIMEOUT_KEEP_EXISTING);
3748 		} else {
3749 			NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CSTS register\n");
3750 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
3751 		}
3752 
3753 		return;
3754 	}
3755 
3756 	assert(value <= UINT32_MAX);
3757 	csts.raw = value;
3758 	if (csts.bits.rdy == 1) {
3759 		NVME_CTRLR_DEBUGLOG(ctrlr, "CC.EN = 1 && CSTS.RDY = 1 - controller is ready\n");
3760 		/*
3761 		 * The controller has been enabled.
3762 		 *  Perform the rest of initialization serially.
3763 		 */
3764 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_RESET_ADMIN_QUEUE,
3765 				     ctrlr->opts.admin_timeout_ms);
3766 	} else {
3767 		nvme_ctrlr_set_state_quiet(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1,
3768 					   NVME_TIMEOUT_KEEP_EXISTING);
3769 	}
3770 }
3771 
3772 /**
3773  * This function will be called repeatedly during initialization until the controller is ready.
3774  */
3775 int
3776 nvme_ctrlr_process_init(struct spdk_nvme_ctrlr *ctrlr)
3777 {
3778 	uint32_t ready_timeout_in_ms;
3779 	uint64_t ticks;
3780 	int rc = 0;
3781 
3782 	ticks = spdk_get_ticks();
3783 
3784 	/*
3785 	 * May need to avoid accessing any register on the target controller
3786 	 * for a while. Return early without touching the FSM.
3787 	 * Check sleep_timeout_tsc > 0 for unit test.
3788 	 */
3789 	if ((ctrlr->sleep_timeout_tsc > 0) &&
3790 	    (ticks <= ctrlr->sleep_timeout_tsc)) {
3791 		return 0;
3792 	}
3793 	ctrlr->sleep_timeout_tsc = 0;
3794 
3795 	ready_timeout_in_ms = nvme_ctrlr_get_ready_timeout(ctrlr);
3796 
3797 	/*
3798 	 * Check if the current initialization step is done or has timed out.
3799 	 */
3800 	switch (ctrlr->state) {
3801 	case NVME_CTRLR_STATE_INIT_DELAY:
3802 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, ready_timeout_in_ms);
3803 		if (ctrlr->quirks & NVME_QUIRK_DELAY_BEFORE_INIT) {
3804 			/*
3805 			 * Controller may need some delay before it's enabled.
3806 			 *
3807 			 * This is a workaround for an issue where the PCIe-attached NVMe controller
3808 			 * is not ready after VFIO reset. We delay the initialization rather than the
3809 			 * enabling itself, because this is required only for the very first enabling
3810 			 * - directly after a VFIO reset.
3811 			 */
3812 			NVME_CTRLR_DEBUGLOG(ctrlr, "Adding 2 second delay before initializing the controller\n");
3813 			ctrlr->sleep_timeout_tsc = ticks + (2000 * spdk_get_ticks_hz() / 1000);
3814 		}
3815 		break;
3816 
3817 	case NVME_CTRLR_STATE_CONNECT_ADMINQ: /* synonymous with NVME_CTRLR_STATE_INIT */
3818 		rc = nvme_transport_ctrlr_connect_qpair(ctrlr, ctrlr->adminq);
3819 		if (rc == 0) {
3820 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_CONNECT_ADMINQ,
3821 					     NVME_TIMEOUT_INFINITE);
3822 		} else {
3823 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
3824 		}
3825 		break;
3826 
3827 	case NVME_CTRLR_STATE_WAIT_FOR_CONNECT_ADMINQ:
3828 		spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
3829 
3830 		switch (nvme_qpair_get_state(ctrlr->adminq)) {
3831 		case NVME_QPAIR_CONNECTING:
3832 			break;
3833 		case NVME_QPAIR_CONNECTED:
3834 			nvme_qpair_set_state(ctrlr->adminq, NVME_QPAIR_ENABLED);
3835 		/* Fall through */
3836 		case NVME_QPAIR_ENABLED:
3837 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READ_VS,
3838 					     NVME_TIMEOUT_INFINITE);
3839 			/* Abort any queued requests that were sent while the adminq was connecting
3840 			 * to avoid stalling the init process during a reset, as requests don't get
3841 			 * resubmitted while the controller is resetting and subsequent commands
3842 			 * would get queued too.
3843 			 */
3844 			nvme_qpair_abort_queued_reqs(ctrlr->adminq, 0);
3845 			break;
3846 		default:
3847 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
3848 			break;
3849 		}
3850 
3851 		break;
3852 
3853 	case NVME_CTRLR_STATE_READ_VS:
3854 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READ_VS_WAIT_FOR_VS, NVME_TIMEOUT_INFINITE);
3855 		rc = nvme_ctrlr_get_vs_async(ctrlr, nvme_ctrlr_process_init_vs_done, ctrlr);
3856 		break;
3857 
3858 	case NVME_CTRLR_STATE_READ_CAP:
3859 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READ_CAP_WAIT_FOR_CAP, NVME_TIMEOUT_INFINITE);
3860 		rc = nvme_ctrlr_get_cap_async(ctrlr, nvme_ctrlr_process_init_cap_done, ctrlr);
3861 		break;
3862 
3863 	case NVME_CTRLR_STATE_CHECK_EN:
3864 		/* Begin the hardware initialization by making sure the controller is disabled. */
3865 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CHECK_EN_WAIT_FOR_CC, ready_timeout_in_ms);
3866 		rc = nvme_ctrlr_get_cc_async(ctrlr, nvme_ctrlr_process_init_check_en, ctrlr);
3867 		break;
3868 
3869 	case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1:
3870 		/*
3871 		 * Controller is currently enabled. We need to disable it to cause a reset.
3872 		 *
3873 		 * If CC.EN = 1 && CSTS.RDY = 0, the controller is in the process of becoming ready.
3874 		 *  Wait for the ready bit to be 1 before disabling the controller.
3875 		 */
3876 		nvme_ctrlr_set_state_quiet(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1_WAIT_FOR_CSTS,
3877 					   NVME_TIMEOUT_KEEP_EXISTING);
3878 		rc = nvme_ctrlr_get_csts_async(ctrlr, nvme_ctrlr_process_init_wait_for_ready_1, ctrlr);
3879 		break;
3880 
3881 	case NVME_CTRLR_STATE_SET_EN_0:
3882 		NVME_CTRLR_DEBUGLOG(ctrlr, "Setting CC.EN = 0\n");
3883 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_EN_0_WAIT_FOR_CC, ready_timeout_in_ms);
3884 		rc = nvme_ctrlr_get_cc_async(ctrlr, nvme_ctrlr_process_init_set_en_0_read_cc, ctrlr);
3885 		break;
3886 
3887 	case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0:
3888 		nvme_ctrlr_set_state_quiet(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0_WAIT_FOR_CSTS,
3889 					   NVME_TIMEOUT_KEEP_EXISTING);
3890 		rc = nvme_ctrlr_get_csts_async(ctrlr, nvme_ctrlr_process_init_wait_for_ready_0, ctrlr);
3891 		break;
3892 
3893 	case NVME_CTRLR_STATE_ENABLE:
3894 		NVME_CTRLR_DEBUGLOG(ctrlr, "Setting CC.EN = 1\n");
3895 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_CC, ready_timeout_in_ms);
3896 		rc = nvme_ctrlr_enable(ctrlr);
3897 		return rc;
3898 
3899 	case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1:
3900 		nvme_ctrlr_set_state_quiet(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1_WAIT_FOR_CSTS,
3901 					   NVME_TIMEOUT_KEEP_EXISTING);
3902 		rc = nvme_ctrlr_get_csts_async(ctrlr, nvme_ctrlr_process_init_enable_wait_for_ready_1,
3903 					       ctrlr);
3904 		break;
3905 
3906 	case NVME_CTRLR_STATE_RESET_ADMIN_QUEUE:
3907 		nvme_transport_qpair_reset(ctrlr->adminq);
3908 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY, NVME_TIMEOUT_INFINITE);
3909 		break;
3910 
3911 	case NVME_CTRLR_STATE_IDENTIFY:
3912 		rc = nvme_ctrlr_identify(ctrlr);
3913 		break;
3914 
3915 	case NVME_CTRLR_STATE_CONFIGURE_AER:
3916 		rc = nvme_ctrlr_configure_aer(ctrlr);
3917 		break;
3918 
3919 	case NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT:
3920 		rc = nvme_ctrlr_set_keep_alive_timeout(ctrlr);
3921 		break;
3922 
3923 	case NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC:
3924 		rc = nvme_ctrlr_identify_iocs_specific(ctrlr);
3925 		break;
3926 
3927 	case NVME_CTRLR_STATE_GET_ZNS_CMD_EFFECTS_LOG:
3928 		rc = nvme_ctrlr_get_zns_cmd_and_effects_log(ctrlr);
3929 		break;
3930 
3931 	case NVME_CTRLR_STATE_SET_NUM_QUEUES:
3932 		nvme_ctrlr_update_nvmf_ioccsz(ctrlr);
3933 		rc = nvme_ctrlr_set_num_queues(ctrlr);
3934 		break;
3935 
3936 	case NVME_CTRLR_STATE_IDENTIFY_ACTIVE_NS:
3937 		_nvme_ctrlr_identify_active_ns(ctrlr);
3938 		break;
3939 
3940 	case NVME_CTRLR_STATE_IDENTIFY_NS:
3941 		rc = nvme_ctrlr_identify_namespaces(ctrlr);
3942 		break;
3943 
3944 	case NVME_CTRLR_STATE_IDENTIFY_ID_DESCS:
3945 		rc = nvme_ctrlr_identify_id_desc_namespaces(ctrlr);
3946 		break;
3947 
3948 	case NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC:
3949 		rc = nvme_ctrlr_identify_namespaces_iocs_specific(ctrlr);
3950 		break;
3951 
3952 	case NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES:
3953 		rc = nvme_ctrlr_set_supported_log_pages(ctrlr);
3954 		break;
3955 
3956 	case NVME_CTRLR_STATE_SET_SUPPORTED_INTEL_LOG_PAGES:
3957 		rc = nvme_ctrlr_set_intel_support_log_pages(ctrlr);
3958 		break;
3959 
3960 	case NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES:
3961 		nvme_ctrlr_set_supported_features(ctrlr);
3962 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_DB_BUF_CFG,
3963 				     ctrlr->opts.admin_timeout_ms);
3964 		break;
3965 
3966 	case NVME_CTRLR_STATE_SET_DB_BUF_CFG:
3967 		rc = nvme_ctrlr_set_doorbell_buffer_config(ctrlr);
3968 		break;
3969 
3970 	case NVME_CTRLR_STATE_SET_HOST_ID:
3971 		rc = nvme_ctrlr_set_host_id(ctrlr);
3972 		break;
3973 
3974 	case NVME_CTRLR_STATE_READY:
3975 		NVME_CTRLR_DEBUGLOG(ctrlr, "Ctrlr already in ready state\n");
3976 		return 0;
3977 
3978 	case NVME_CTRLR_STATE_ERROR:
3979 		NVME_CTRLR_ERRLOG(ctrlr, "Ctrlr is in error state\n");
3980 		return -1;
3981 
3982 	case NVME_CTRLR_STATE_READ_VS_WAIT_FOR_VS:
3983 	case NVME_CTRLR_STATE_READ_CAP_WAIT_FOR_CAP:
3984 	case NVME_CTRLR_STATE_CHECK_EN_WAIT_FOR_CC:
3985 	case NVME_CTRLR_STATE_SET_EN_0_WAIT_FOR_CC:
3986 	case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1_WAIT_FOR_CSTS:
3987 	case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0_WAIT_FOR_CSTS:
3988 	case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_CC:
3989 	case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1_WAIT_FOR_CSTS:
3990 	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY:
3991 	case NVME_CTRLR_STATE_WAIT_FOR_CONFIGURE_AER:
3992 	case NVME_CTRLR_STATE_WAIT_FOR_KEEP_ALIVE_TIMEOUT:
3993 	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_IOCS_SPECIFIC:
3994 	case NVME_CTRLR_STATE_WAIT_FOR_GET_ZNS_CMD_EFFECTS_LOG:
3995 	case NVME_CTRLR_STATE_WAIT_FOR_SET_NUM_QUEUES:
3996 	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ACTIVE_NS:
3997 	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS:
3998 	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ID_DESCS:
3999 	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS_IOCS_SPECIFIC:
4000 	case NVME_CTRLR_STATE_WAIT_FOR_SUPPORTED_INTEL_LOG_PAGES:
4001 	case NVME_CTRLR_STATE_WAIT_FOR_DB_BUF_CFG:
4002 	case NVME_CTRLR_STATE_WAIT_FOR_HOST_ID:
4003 		spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
4004 		break;
4005 
4006 	default:
4007 		assert(0);
4008 		return -1;
4009 	}
4010 
4011 	/* Note: we use the ticks captured when we entered this function.
4012 	 * This covers environments where the SPDK process gets swapped out after
4013 	 * we tried to advance the state but before we check the timeout here.
4014 	 * It is not normal for this to happen, but harmless to handle it in this
4015 	 * way.
4016 	 */
4017 	if (ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE &&
4018 	    ticks > ctrlr->state_timeout_tsc) {
4019 		NVME_CTRLR_ERRLOG(ctrlr, "Initialization timed out in state %d (%s)\n",
4020 				  ctrlr->state, nvme_ctrlr_state_string(ctrlr->state));
4021 		return -1;
4022 	}
4023 
4024 	return rc;
4025 }
4026 
4027 int
4028 nvme_robust_mutex_init_recursive_shared(pthread_mutex_t *mtx)
4029 {
4030 	pthread_mutexattr_t attr;
4031 	int rc = 0;
4032 
4033 	if (pthread_mutexattr_init(&attr)) {
4034 		return -1;
4035 	}
4036 	if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE) ||
4037 #ifndef __FreeBSD__
4038 	    pthread_mutexattr_setrobust(&attr, PTHREAD_MUTEX_ROBUST) ||
4039 	    pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED) ||
4040 #endif
4041 	    pthread_mutex_init(mtx, &attr)) {
4042 		rc = -1;
4043 	}
4044 	pthread_mutexattr_destroy(&attr);
4045 	return rc;
4046 }
4047 
4048 int
4049 nvme_ctrlr_construct(struct spdk_nvme_ctrlr *ctrlr)
4050 {
4051 	int rc;
4052 
4053 	if (ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
4054 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT_DELAY, NVME_TIMEOUT_INFINITE);
4055 	} else {
4056 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, NVME_TIMEOUT_INFINITE);
4057 	}
4058 
4059 	if (ctrlr->opts.admin_queue_size > SPDK_NVME_ADMIN_QUEUE_MAX_ENTRIES) {
4060 		NVME_CTRLR_ERRLOG(ctrlr, "admin_queue_size %u exceeds max defined by NVMe spec, use max value\n",
4061 				  ctrlr->opts.admin_queue_size);
4062 		ctrlr->opts.admin_queue_size = SPDK_NVME_ADMIN_QUEUE_MAX_ENTRIES;
4063 	}
4064 
4065 	if (ctrlr->opts.admin_queue_size < SPDK_NVME_ADMIN_QUEUE_MIN_ENTRIES) {
4066 		NVME_CTRLR_ERRLOG(ctrlr,
4067 				  "admin_queue_size %u is less than minimum defined by NVMe spec, use min value\n",
4068 				  ctrlr->opts.admin_queue_size);
4069 		ctrlr->opts.admin_queue_size = SPDK_NVME_ADMIN_QUEUE_MIN_ENTRIES;
4070 	}
4071 
4072 	ctrlr->flags = 0;
4073 	ctrlr->free_io_qids = NULL;
4074 	ctrlr->is_resetting = false;
4075 	ctrlr->is_failed = false;
4076 	ctrlr->is_destructed = false;
4077 
4078 	TAILQ_INIT(&ctrlr->active_io_qpairs);
4079 	STAILQ_INIT(&ctrlr->queued_aborts);
4080 	ctrlr->outstanding_aborts = 0;
4081 
4082 	ctrlr->ana_log_page = NULL;
4083 	ctrlr->ana_log_page_size = 0;
4084 
4085 	rc = nvme_robust_mutex_init_recursive_shared(&ctrlr->ctrlr_lock);
4086 	if (rc != 0) {
4087 		return rc;
4088 	}
4089 
4090 	TAILQ_INIT(&ctrlr->active_procs);
4091 	STAILQ_INIT(&ctrlr->register_operations);
4092 
4093 	RB_INIT(&ctrlr->ns);
4094 
4095 	return rc;
4096 }
4097 
4098 static void
4099 nvme_ctrlr_init_cap(struct spdk_nvme_ctrlr *ctrlr)
4100 {
4101 	if (ctrlr->cap.bits.ams & SPDK_NVME_CAP_AMS_WRR) {
4102 		ctrlr->flags |= SPDK_NVME_CTRLR_WRR_SUPPORTED;
4103 	}
4104 
4105 	ctrlr->min_page_size = 1u << (12 + ctrlr->cap.bits.mpsmin);
4106 
4107 	/* For now, always select page_size == min_page_size. */
4108 	ctrlr->page_size = ctrlr->min_page_size;
4109 
4110 	ctrlr->opts.io_queue_size = spdk_max(ctrlr->opts.io_queue_size, SPDK_NVME_IO_QUEUE_MIN_ENTRIES);
4111 	ctrlr->opts.io_queue_size = spdk_min(ctrlr->opts.io_queue_size, MAX_IO_QUEUE_ENTRIES);
4112 	if (ctrlr->quirks & NVME_QUIRK_MINIMUM_IO_QUEUE_SIZE &&
4113 	    ctrlr->opts.io_queue_size == DEFAULT_IO_QUEUE_SIZE) {
4114 		/* If the user specifically set an IO queue size different than the
4115 		 * default, use that value.  Otherwise overwrite with the quirked value.
4116 		 * This allows this quirk to be overridden when necessary.
4117 		 * However, cap.mqes still needs to be respected.
4118 		 */
4119 		ctrlr->opts.io_queue_size = DEFAULT_IO_QUEUE_SIZE_FOR_QUIRK;
4120 	}
4121 	ctrlr->opts.io_queue_size = spdk_min(ctrlr->opts.io_queue_size, ctrlr->cap.bits.mqes + 1u);
4122 
4123 	ctrlr->opts.io_queue_requests = spdk_max(ctrlr->opts.io_queue_requests, ctrlr->opts.io_queue_size);
4124 }
4125 
4126 void
4127 nvme_ctrlr_destruct_finish(struct spdk_nvme_ctrlr *ctrlr)
4128 {
4129 	pthread_mutex_destroy(&ctrlr->ctrlr_lock);
4130 }
4131 
4132 void
4133 nvme_ctrlr_destruct_async(struct spdk_nvme_ctrlr *ctrlr,
4134 			  struct nvme_ctrlr_detach_ctx *ctx)
4135 {
4136 	struct spdk_nvme_qpair *qpair, *tmp;
4137 
4138 	NVME_CTRLR_DEBUGLOG(ctrlr, "Prepare to destruct SSD\n");
4139 
4140 	ctrlr->is_destructed = true;
4141 
4142 	spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
4143 
4144 	nvme_ctrlr_abort_queued_aborts(ctrlr);
4145 	nvme_transport_admin_qpair_abort_aers(ctrlr->adminq);
4146 
4147 	TAILQ_FOREACH_SAFE(qpair, &ctrlr->active_io_qpairs, tailq, tmp) {
4148 		spdk_nvme_ctrlr_free_io_qpair(qpair);
4149 	}
4150 
4151 	nvme_ctrlr_free_doorbell_buffer(ctrlr);
4152 	nvme_ctrlr_free_iocs_specific_data(ctrlr);
4153 
4154 	nvme_ctrlr_shutdown_async(ctrlr, ctx);
4155 }
4156 
4157 int
4158 nvme_ctrlr_destruct_poll_async(struct spdk_nvme_ctrlr *ctrlr,
4159 			       struct nvme_ctrlr_detach_ctx *ctx)
4160 {
4161 	struct spdk_nvme_ns *ns, *tmp_ns;
4162 	int rc = 0;
4163 
4164 	if (!ctx->shutdown_complete) {
4165 		rc = nvme_ctrlr_shutdown_poll_async(ctrlr, ctx);
4166 		if (rc == -EAGAIN) {
4167 			return -EAGAIN;
4168 		}
4169 		/* Destruct ctrlr forcefully for any other error. */
4170 	}
4171 
4172 	if (ctx->cb_fn) {
4173 		ctx->cb_fn(ctrlr);
4174 	}
4175 
4176 	nvme_transport_ctrlr_disconnect_qpair(ctrlr, ctrlr->adminq);
4177 
4178 	RB_FOREACH_SAFE(ns, nvme_ns_tree, &ctrlr->ns, tmp_ns) {
4179 		nvme_ctrlr_destruct_namespace(ctrlr, ns->id);
4180 		RB_REMOVE(nvme_ns_tree, &ctrlr->ns, ns);
4181 		spdk_free(ns);
4182 	}
4183 
4184 	ctrlr->active_ns_count = 0;
4185 
4186 	spdk_bit_array_free(&ctrlr->free_io_qids);
4187 
4188 	free(ctrlr->ana_log_page);
4189 	free(ctrlr->copied_ana_desc);
4190 	ctrlr->ana_log_page = NULL;
4191 	ctrlr->copied_ana_desc = NULL;
4192 	ctrlr->ana_log_page_size = 0;
4193 
4194 	nvme_transport_ctrlr_destruct(ctrlr);
4195 
4196 	return rc;
4197 }
4198 
4199 void
4200 nvme_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr)
4201 {
4202 	struct nvme_ctrlr_detach_ctx ctx = { .ctrlr = ctrlr };
4203 	int rc;
4204 
4205 	nvme_ctrlr_destruct_async(ctrlr, &ctx);
4206 
4207 	while (1) {
4208 		rc = nvme_ctrlr_destruct_poll_async(ctrlr, &ctx);
4209 		if (rc != -EAGAIN) {
4210 			break;
4211 		}
4212 		nvme_delay(1000);
4213 	}
4214 }
4215 
4216 int
4217 nvme_ctrlr_submit_admin_request(struct spdk_nvme_ctrlr *ctrlr,
4218 				struct nvme_request *req)
4219 {
4220 	return nvme_qpair_submit_request(ctrlr->adminq, req);
4221 }
4222 
4223 static void
4224 nvme_keep_alive_completion(void *cb_ctx, const struct spdk_nvme_cpl *cpl)
4225 {
4226 	/* Do nothing */
4227 }
4228 
4229 /*
4230  * Check if we need to send a Keep Alive command.
4231  * Caller must hold ctrlr->ctrlr_lock.
4232  */
4233 static int
4234 nvme_ctrlr_keep_alive(struct spdk_nvme_ctrlr *ctrlr)
4235 {
4236 	uint64_t now;
4237 	struct nvme_request *req;
4238 	struct spdk_nvme_cmd *cmd;
4239 	int rc = 0;
4240 
4241 	now = spdk_get_ticks();
4242 	if (now < ctrlr->next_keep_alive_tick) {
4243 		return rc;
4244 	}
4245 
4246 	req = nvme_allocate_request_null(ctrlr->adminq, nvme_keep_alive_completion, NULL);
4247 	if (req == NULL) {
4248 		return rc;
4249 	}
4250 
4251 	cmd = &req->cmd;
4252 	cmd->opc = SPDK_NVME_OPC_KEEP_ALIVE;
4253 
4254 	rc = nvme_ctrlr_submit_admin_request(ctrlr, req);
4255 	if (rc != 0) {
4256 		NVME_CTRLR_ERRLOG(ctrlr, "Submitting Keep Alive failed\n");
4257 		rc = -ENXIO;
4258 	}
4259 
4260 	ctrlr->next_keep_alive_tick = now + ctrlr->keep_alive_interval_ticks;
4261 	return rc;
4262 }
4263 
4264 int32_t
4265 spdk_nvme_ctrlr_process_admin_completions(struct spdk_nvme_ctrlr *ctrlr)
4266 {
4267 	int32_t num_completions;
4268 	int32_t rc;
4269 	struct spdk_nvme_ctrlr_process	*active_proc;
4270 
4271 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
4272 
4273 	if (ctrlr->keep_alive_interval_ticks) {
4274 		rc = nvme_ctrlr_keep_alive(ctrlr);
4275 		if (rc) {
4276 			nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
4277 			return rc;
4278 		}
4279 	}
4280 
4281 	rc = nvme_io_msg_process(ctrlr);
4282 	if (rc < 0) {
4283 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
4284 		return rc;
4285 	}
4286 	num_completions = rc;
4287 
4288 	rc = spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
4289 
4290 	/* Each process has an async list, complete the ones for this process object */
4291 	active_proc = nvme_ctrlr_get_current_process(ctrlr);
4292 	if (active_proc) {
4293 		nvme_ctrlr_complete_queued_async_events(ctrlr);
4294 	}
4295 
4296 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
4297 
4298 	if (rc < 0) {
4299 		num_completions = rc;
4300 	} else {
4301 		num_completions += rc;
4302 	}
4303 
4304 	return num_completions;
4305 }
4306 
4307 const struct spdk_nvme_ctrlr_data *
4308 spdk_nvme_ctrlr_get_data(struct spdk_nvme_ctrlr *ctrlr)
4309 {
4310 	return &ctrlr->cdata;
4311 }
4312 
4313 union spdk_nvme_csts_register spdk_nvme_ctrlr_get_regs_csts(struct spdk_nvme_ctrlr *ctrlr)
4314 {
4315 	union spdk_nvme_csts_register csts;
4316 
4317 	if (nvme_ctrlr_get_csts(ctrlr, &csts)) {
4318 		csts.raw = SPDK_NVME_INVALID_REGISTER_VALUE;
4319 	}
4320 	return csts;
4321 }
4322 
4323 union spdk_nvme_cc_register spdk_nvme_ctrlr_get_regs_cc(struct spdk_nvme_ctrlr *ctrlr)
4324 {
4325 	union spdk_nvme_cc_register cc;
4326 
4327 	if (nvme_ctrlr_get_cc(ctrlr, &cc)) {
4328 		cc.raw = SPDK_NVME_INVALID_REGISTER_VALUE;
4329 	}
4330 	return cc;
4331 }
4332 
4333 union spdk_nvme_cap_register spdk_nvme_ctrlr_get_regs_cap(struct spdk_nvme_ctrlr *ctrlr)
4334 {
4335 	return ctrlr->cap;
4336 }
4337 
4338 union spdk_nvme_vs_register spdk_nvme_ctrlr_get_regs_vs(struct spdk_nvme_ctrlr *ctrlr)
4339 {
4340 	return ctrlr->vs;
4341 }
4342 
4343 union spdk_nvme_cmbsz_register spdk_nvme_ctrlr_get_regs_cmbsz(struct spdk_nvme_ctrlr *ctrlr)
4344 {
4345 	union spdk_nvme_cmbsz_register cmbsz;
4346 
4347 	if (nvme_ctrlr_get_cmbsz(ctrlr, &cmbsz)) {
4348 		cmbsz.raw = 0;
4349 	}
4350 
4351 	return cmbsz;
4352 }
4353 
4354 union spdk_nvme_pmrcap_register spdk_nvme_ctrlr_get_regs_pmrcap(struct spdk_nvme_ctrlr *ctrlr)
4355 {
4356 	union spdk_nvme_pmrcap_register pmrcap;
4357 
4358 	if (nvme_ctrlr_get_pmrcap(ctrlr, &pmrcap)) {
4359 		pmrcap.raw = 0;
4360 	}
4361 
4362 	return pmrcap;
4363 }
4364 
4365 union spdk_nvme_bpinfo_register spdk_nvme_ctrlr_get_regs_bpinfo(struct spdk_nvme_ctrlr *ctrlr)
4366 {
4367 	union spdk_nvme_bpinfo_register bpinfo;
4368 
4369 	if (nvme_ctrlr_get_bpinfo(ctrlr, &bpinfo)) {
4370 		bpinfo.raw = 0;
4371 	}
4372 
4373 	return bpinfo;
4374 }
4375 
4376 uint64_t
4377 spdk_nvme_ctrlr_get_pmrsz(struct spdk_nvme_ctrlr *ctrlr)
4378 {
4379 	return ctrlr->pmr_size;
4380 }
4381 
4382 uint32_t
4383 spdk_nvme_ctrlr_get_num_ns(struct spdk_nvme_ctrlr *ctrlr)
4384 {
4385 	return ctrlr->cdata.nn;
4386 }
4387 
4388 bool
4389 spdk_nvme_ctrlr_is_active_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid)
4390 {
4391 	struct spdk_nvme_ns tmp, *ns;
4392 
4393 	tmp.id = nsid;
4394 	ns = RB_FIND(nvme_ns_tree, &ctrlr->ns, &tmp);
4395 
4396 	if (ns != NULL) {
4397 		return ns->active;
4398 	}
4399 
4400 	return false;
4401 }
4402 
4403 uint32_t
4404 spdk_nvme_ctrlr_get_first_active_ns(struct spdk_nvme_ctrlr *ctrlr)
4405 {
4406 	struct spdk_nvme_ns *ns;
4407 
4408 	ns = RB_MIN(nvme_ns_tree, &ctrlr->ns);
4409 	if (ns == NULL) {
4410 		return 0;
4411 	}
4412 
4413 	while (ns != NULL) {
4414 		if (ns->active) {
4415 			return ns->id;
4416 		}
4417 
4418 		ns = RB_NEXT(nvme_ns_tree, &ctrlr->ns, ns);
4419 	}
4420 
4421 	return 0;
4422 }
4423 
4424 uint32_t
4425 spdk_nvme_ctrlr_get_next_active_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t prev_nsid)
4426 {
4427 	struct spdk_nvme_ns tmp, *ns;
4428 
4429 	tmp.id = prev_nsid;
4430 	ns = RB_FIND(nvme_ns_tree, &ctrlr->ns, &tmp);
4431 	if (ns == NULL) {
4432 		return 0;
4433 	}
4434 
4435 	ns = RB_NEXT(nvme_ns_tree, &ctrlr->ns, ns);
4436 	while (ns != NULL) {
4437 		if (ns->active) {
4438 			return ns->id;
4439 		}
4440 
4441 		ns = RB_NEXT(nvme_ns_tree, &ctrlr->ns, ns);
4442 	}
4443 
4444 	return 0;
4445 }
4446 
4447 struct spdk_nvme_ns *
4448 spdk_nvme_ctrlr_get_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid)
4449 {
4450 	struct spdk_nvme_ns tmp;
4451 	struct spdk_nvme_ns *ns;
4452 
4453 	if (nsid < 1 || nsid > ctrlr->cdata.nn) {
4454 		return NULL;
4455 	}
4456 
4457 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
4458 
4459 	tmp.id = nsid;
4460 	ns = RB_FIND(nvme_ns_tree, &ctrlr->ns, &tmp);
4461 
4462 	if (ns == NULL) {
4463 		ns = spdk_zmalloc(sizeof(struct spdk_nvme_ns), 64, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE);
4464 		if (ns == NULL) {
4465 			nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
4466 			return NULL;
4467 		}
4468 
4469 		NVME_CTRLR_DEBUGLOG(ctrlr, "Namespace %u was added\n", nsid);
4470 		ns->id = nsid;
4471 		RB_INSERT(nvme_ns_tree, &ctrlr->ns, ns);
4472 	}
4473 
4474 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
4475 
4476 	return ns;
4477 }
4478 
4479 struct spdk_pci_device *
4480 spdk_nvme_ctrlr_get_pci_device(struct spdk_nvme_ctrlr *ctrlr)
4481 {
4482 	if (ctrlr == NULL) {
4483 		return NULL;
4484 	}
4485 
4486 	if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) {
4487 		return NULL;
4488 	}
4489 
4490 	return nvme_ctrlr_proc_get_devhandle(ctrlr);
4491 }
4492 
4493 uint32_t
4494 spdk_nvme_ctrlr_get_max_xfer_size(const struct spdk_nvme_ctrlr *ctrlr)
4495 {
4496 	return ctrlr->max_xfer_size;
4497 }
4498 
4499 void
4500 spdk_nvme_ctrlr_register_aer_callback(struct spdk_nvme_ctrlr *ctrlr,
4501 				      spdk_nvme_aer_cb aer_cb_fn,
4502 				      void *aer_cb_arg)
4503 {
4504 	struct spdk_nvme_ctrlr_process *active_proc;
4505 
4506 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
4507 
4508 	active_proc = nvme_ctrlr_get_current_process(ctrlr);
4509 	if (active_proc) {
4510 		active_proc->aer_cb_fn = aer_cb_fn;
4511 		active_proc->aer_cb_arg = aer_cb_arg;
4512 	}
4513 
4514 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
4515 }
4516 
4517 void
4518 spdk_nvme_ctrlr_register_timeout_callback(struct spdk_nvme_ctrlr *ctrlr,
4519 		uint64_t timeout_io_us, uint64_t timeout_admin_us,
4520 		spdk_nvme_timeout_cb cb_fn, void *cb_arg)
4521 {
4522 	struct spdk_nvme_ctrlr_process	*active_proc;
4523 
4524 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
4525 
4526 	active_proc = nvme_ctrlr_get_current_process(ctrlr);
4527 	if (active_proc) {
4528 		active_proc->timeout_io_ticks = timeout_io_us * spdk_get_ticks_hz() / 1000000ULL;
4529 		active_proc->timeout_admin_ticks = timeout_admin_us * spdk_get_ticks_hz() / 1000000ULL;
4530 		active_proc->timeout_cb_fn = cb_fn;
4531 		active_proc->timeout_cb_arg = cb_arg;
4532 	}
4533 
4534 	ctrlr->timeout_enabled = true;
4535 
4536 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
4537 }
4538 
4539 bool
4540 spdk_nvme_ctrlr_is_log_page_supported(struct spdk_nvme_ctrlr *ctrlr, uint8_t log_page)
4541 {
4542 	/* No bounds check necessary, since log_page is uint8_t and log_page_supported has 256 entries */
4543 	SPDK_STATIC_ASSERT(sizeof(ctrlr->log_page_supported) == 256, "log_page_supported size mismatch");
4544 	return ctrlr->log_page_supported[log_page];
4545 }
4546 
4547 bool
4548 spdk_nvme_ctrlr_is_feature_supported(struct spdk_nvme_ctrlr *ctrlr, uint8_t feature_code)
4549 {
4550 	/* No bounds check necessary, since feature_code is uint8_t and feature_supported has 256 entries */
4551 	SPDK_STATIC_ASSERT(sizeof(ctrlr->feature_supported) == 256, "feature_supported size mismatch");
4552 	return ctrlr->feature_supported[feature_code];
4553 }
4554 
4555 int
4556 spdk_nvme_ctrlr_attach_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid,
4557 			  struct spdk_nvme_ctrlr_list *payload)
4558 {
4559 	struct nvme_completion_poll_status	*status;
4560 	struct spdk_nvme_ns			*ns;
4561 	int					res;
4562 
4563 	if (nsid == 0) {
4564 		return -EINVAL;
4565 	}
4566 
4567 	status = calloc(1, sizeof(*status));
4568 	if (!status) {
4569 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n");
4570 		return -ENOMEM;
4571 	}
4572 
4573 	res = nvme_ctrlr_cmd_attach_ns(ctrlr, nsid, payload,
4574 				       nvme_completion_poll_cb, status);
4575 	if (res) {
4576 		free(status);
4577 		return res;
4578 	}
4579 	if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) {
4580 		NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_attach_ns failed!\n");
4581 		if (!status->timed_out) {
4582 			free(status);
4583 		}
4584 		return -ENXIO;
4585 	}
4586 	free(status);
4587 
4588 	res = nvme_ctrlr_identify_active_ns(ctrlr);
4589 	if (res) {
4590 		return res;
4591 	}
4592 
4593 	ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
4594 	return nvme_ns_construct(ns, nsid, ctrlr);
4595 }
4596 
4597 int
4598 spdk_nvme_ctrlr_detach_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid,
4599 			  struct spdk_nvme_ctrlr_list *payload)
4600 {
4601 	struct nvme_completion_poll_status	*status;
4602 	int					res;
4603 
4604 	if (nsid == 0) {
4605 		return -EINVAL;
4606 	}
4607 
4608 	status = calloc(1, sizeof(*status));
4609 	if (!status) {
4610 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n");
4611 		return -ENOMEM;
4612 	}
4613 
4614 	res = nvme_ctrlr_cmd_detach_ns(ctrlr, nsid, payload,
4615 				       nvme_completion_poll_cb, status);
4616 	if (res) {
4617 		free(status);
4618 		return res;
4619 	}
4620 	if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) {
4621 		NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_detach_ns failed!\n");
4622 		if (!status->timed_out) {
4623 			free(status);
4624 		}
4625 		return -ENXIO;
4626 	}
4627 	free(status);
4628 
4629 	return nvme_ctrlr_identify_active_ns(ctrlr);
4630 }
4631 
4632 uint32_t
4633 spdk_nvme_ctrlr_create_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns_data *payload)
4634 {
4635 	struct nvme_completion_poll_status	*status;
4636 	int					res;
4637 	uint32_t				nsid;
4638 
4639 	status = calloc(1, sizeof(*status));
4640 	if (!status) {
4641 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n");
4642 		return 0;
4643 	}
4644 
4645 	res = nvme_ctrlr_cmd_create_ns(ctrlr, payload, nvme_completion_poll_cb, status);
4646 	if (res) {
4647 		free(status);
4648 		return 0;
4649 	}
4650 	if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) {
4651 		NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_create_ns failed!\n");
4652 		if (!status->timed_out) {
4653 			free(status);
4654 		}
4655 		return 0;
4656 	}
4657 
4658 	nsid = status->cpl.cdw0;
4659 	free(status);
4660 
4661 	assert(nsid > 0);
4662 
4663 	/* Return the namespace ID that was created */
4664 	return nsid;
4665 }
4666 
4667 int
4668 spdk_nvme_ctrlr_delete_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid)
4669 {
4670 	struct nvme_completion_poll_status	*status;
4671 	int					res;
4672 
4673 	if (nsid == 0) {
4674 		return -EINVAL;
4675 	}
4676 
4677 	status = calloc(1, sizeof(*status));
4678 	if (!status) {
4679 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n");
4680 		return -ENOMEM;
4681 	}
4682 
4683 	res = nvme_ctrlr_cmd_delete_ns(ctrlr, nsid, nvme_completion_poll_cb, status);
4684 	if (res) {
4685 		free(status);
4686 		return res;
4687 	}
4688 	if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) {
4689 		NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_delete_ns failed!\n");
4690 		if (!status->timed_out) {
4691 			free(status);
4692 		}
4693 		return -ENXIO;
4694 	}
4695 	free(status);
4696 
4697 	return nvme_ctrlr_identify_active_ns(ctrlr);
4698 }
4699 
4700 int
4701 spdk_nvme_ctrlr_format(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid,
4702 		       struct spdk_nvme_format *format)
4703 {
4704 	struct nvme_completion_poll_status	*status;
4705 	int					res;
4706 
4707 	status = calloc(1, sizeof(*status));
4708 	if (!status) {
4709 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n");
4710 		return -ENOMEM;
4711 	}
4712 
4713 	res = nvme_ctrlr_cmd_format(ctrlr, nsid, format, nvme_completion_poll_cb,
4714 				    status);
4715 	if (res) {
4716 		free(status);
4717 		return res;
4718 	}
4719 	if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) {
4720 		NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_format failed!\n");
4721 		if (!status->timed_out) {
4722 			free(status);
4723 		}
4724 		return -ENXIO;
4725 	}
4726 	free(status);
4727 
4728 	return spdk_nvme_ctrlr_reset(ctrlr);
4729 }
4730 
4731 int
4732 spdk_nvme_ctrlr_update_firmware(struct spdk_nvme_ctrlr *ctrlr, void *payload, uint32_t size,
4733 				int slot, enum spdk_nvme_fw_commit_action commit_action, struct spdk_nvme_status *completion_status)
4734 {
4735 	struct spdk_nvme_fw_commit		fw_commit;
4736 	struct nvme_completion_poll_status	*status;
4737 	int					res;
4738 	unsigned int				size_remaining;
4739 	unsigned int				offset;
4740 	unsigned int				transfer;
4741 	void					*p;
4742 
4743 	if (!completion_status) {
4744 		return -EINVAL;
4745 	}
4746 	memset(completion_status, 0, sizeof(struct spdk_nvme_status));
4747 	if (size % 4) {
4748 		NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_update_firmware invalid size!\n");
4749 		return -1;
4750 	}
4751 
4752 	/* Current support only for SPDK_NVME_FW_COMMIT_REPLACE_IMG
4753 	 * and SPDK_NVME_FW_COMMIT_REPLACE_AND_ENABLE_IMG
4754 	 */
4755 	if ((commit_action != SPDK_NVME_FW_COMMIT_REPLACE_IMG) &&
4756 	    (commit_action != SPDK_NVME_FW_COMMIT_REPLACE_AND_ENABLE_IMG)) {
4757 		NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_update_firmware invalid command!\n");
4758 		return -1;
4759 	}
4760 
4761 	status = calloc(1, sizeof(*status));
4762 	if (!status) {
4763 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n");
4764 		return -ENOMEM;
4765 	}
4766 
4767 	/* Firmware download */
4768 	size_remaining = size;
4769 	offset = 0;
4770 	p = payload;
4771 
4772 	while (size_remaining > 0) {
4773 		transfer = spdk_min(size_remaining, ctrlr->min_page_size);
4774 
4775 		memset(status, 0, sizeof(*status));
4776 		res = nvme_ctrlr_cmd_fw_image_download(ctrlr, transfer, offset, p,
4777 						       nvme_completion_poll_cb,
4778 						       status);
4779 		if (res) {
4780 			free(status);
4781 			return res;
4782 		}
4783 
4784 		if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) {
4785 			NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_fw_image_download failed!\n");
4786 			if (!status->timed_out) {
4787 				free(status);
4788 			}
4789 			return -ENXIO;
4790 		}
4791 		p += transfer;
4792 		offset += transfer;
4793 		size_remaining -= transfer;
4794 	}
4795 
4796 	/* Firmware commit */
4797 	memset(&fw_commit, 0, sizeof(struct spdk_nvme_fw_commit));
4798 	fw_commit.fs = slot;
4799 	fw_commit.ca = commit_action;
4800 
4801 	memset(status, 0, sizeof(*status));
4802 	res = nvme_ctrlr_cmd_fw_commit(ctrlr, &fw_commit, nvme_completion_poll_cb,
4803 				       status);
4804 	if (res) {
4805 		free(status);
4806 		return res;
4807 	}
4808 
4809 	res = nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock);
4810 
4811 	memcpy(completion_status, &status->cpl.status, sizeof(struct spdk_nvme_status));
4812 
4813 	if (!status->timed_out) {
4814 		free(status);
4815 	}
4816 
4817 	if (res) {
4818 		if (completion_status->sct != SPDK_NVME_SCT_COMMAND_SPECIFIC ||
4819 		    completion_status->sc != SPDK_NVME_SC_FIRMWARE_REQ_NVM_RESET) {
4820 			if (completion_status->sct == SPDK_NVME_SCT_COMMAND_SPECIFIC  &&
4821 			    completion_status->sc == SPDK_NVME_SC_FIRMWARE_REQ_CONVENTIONAL_RESET) {
4822 				NVME_CTRLR_NOTICELOG(ctrlr,
4823 						     "firmware activation requires conventional reset to be performed. !\n");
4824 			} else {
4825 				NVME_CTRLR_ERRLOG(ctrlr, "nvme_ctrlr_cmd_fw_commit failed!\n");
4826 			}
4827 			return -ENXIO;
4828 		}
4829 	}
4830 
4831 	return spdk_nvme_ctrlr_reset(ctrlr);
4832 }
4833 
4834 int
4835 spdk_nvme_ctrlr_reserve_cmb(struct spdk_nvme_ctrlr *ctrlr)
4836 {
4837 	int rc, size;
4838 	union spdk_nvme_cmbsz_register cmbsz;
4839 
4840 	cmbsz = spdk_nvme_ctrlr_get_regs_cmbsz(ctrlr);
4841 
4842 	if (cmbsz.bits.rds == 0 || cmbsz.bits.wds == 0) {
4843 		return -ENOTSUP;
4844 	}
4845 
4846 	size = cmbsz.bits.sz * (0x1000 << (cmbsz.bits.szu * 4));
4847 
4848 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
4849 	rc = nvme_transport_ctrlr_reserve_cmb(ctrlr);
4850 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
4851 
4852 	if (rc < 0) {
4853 		return rc;
4854 	}
4855 
4856 	return size;
4857 }
4858 
4859 void *
4860 spdk_nvme_ctrlr_map_cmb(struct spdk_nvme_ctrlr *ctrlr, size_t *size)
4861 {
4862 	void *buf;
4863 
4864 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
4865 	buf = nvme_transport_ctrlr_map_cmb(ctrlr, size);
4866 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
4867 
4868 	return buf;
4869 }
4870 
4871 void
4872 spdk_nvme_ctrlr_unmap_cmb(struct spdk_nvme_ctrlr *ctrlr)
4873 {
4874 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
4875 	nvme_transport_ctrlr_unmap_cmb(ctrlr);
4876 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
4877 }
4878 
4879 int
4880 spdk_nvme_ctrlr_enable_pmr(struct spdk_nvme_ctrlr *ctrlr)
4881 {
4882 	int rc;
4883 
4884 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
4885 	rc = nvme_transport_ctrlr_enable_pmr(ctrlr);
4886 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
4887 
4888 	return rc;
4889 }
4890 
4891 int
4892 spdk_nvme_ctrlr_disable_pmr(struct spdk_nvme_ctrlr *ctrlr)
4893 {
4894 	int rc;
4895 
4896 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
4897 	rc = nvme_transport_ctrlr_disable_pmr(ctrlr);
4898 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
4899 
4900 	return rc;
4901 }
4902 
4903 void *
4904 spdk_nvme_ctrlr_map_pmr(struct spdk_nvme_ctrlr *ctrlr, size_t *size)
4905 {
4906 	void *buf;
4907 
4908 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
4909 	buf = nvme_transport_ctrlr_map_pmr(ctrlr, size);
4910 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
4911 
4912 	return buf;
4913 }
4914 
4915 int
4916 spdk_nvme_ctrlr_unmap_pmr(struct spdk_nvme_ctrlr *ctrlr)
4917 {
4918 	int rc;
4919 
4920 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
4921 	rc = nvme_transport_ctrlr_unmap_pmr(ctrlr);
4922 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
4923 
4924 	return rc;
4925 }
4926 
4927 int spdk_nvme_ctrlr_read_boot_partition_start(struct spdk_nvme_ctrlr *ctrlr, void *payload,
4928 		uint32_t bprsz, uint32_t bprof, uint32_t bpid)
4929 {
4930 	union spdk_nvme_bprsel_register bprsel;
4931 	union spdk_nvme_bpinfo_register bpinfo;
4932 	uint64_t bpmbl, bpmb_size;
4933 
4934 	if (ctrlr->cap.bits.bps == 0) {
4935 		return -ENOTSUP;
4936 	}
4937 
4938 	if (nvme_ctrlr_get_bpinfo(ctrlr, &bpinfo)) {
4939 		NVME_CTRLR_ERRLOG(ctrlr, "get bpinfo failed\n");
4940 		return -EIO;
4941 	}
4942 
4943 	if (bpinfo.bits.brs == SPDK_NVME_BRS_READ_IN_PROGRESS) {
4944 		NVME_CTRLR_ERRLOG(ctrlr, "Boot Partition read already initiated\n");
4945 		return -EALREADY;
4946 	}
4947 
4948 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
4949 
4950 	bpmb_size = bprsz * 4096;
4951 	bpmbl = spdk_vtophys(payload, &bpmb_size);
4952 	if (bpmbl == SPDK_VTOPHYS_ERROR) {
4953 		NVME_CTRLR_ERRLOG(ctrlr, "spdk_vtophys of bpmbl failed\n");
4954 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
4955 		return -EFAULT;
4956 	}
4957 
4958 	if (bpmb_size != bprsz * 4096) {
4959 		NVME_CTRLR_ERRLOG(ctrlr, "Boot Partition buffer is not physically contiguous\n");
4960 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
4961 		return -EFAULT;
4962 	}
4963 
4964 	if (nvme_ctrlr_set_bpmbl(ctrlr, bpmbl)) {
4965 		NVME_CTRLR_ERRLOG(ctrlr, "set_bpmbl() failed\n");
4966 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
4967 		return -EIO;
4968 	}
4969 
4970 	bprsel.bits.bpid = bpid;
4971 	bprsel.bits.bprof = bprof;
4972 	bprsel.bits.bprsz = bprsz;
4973 
4974 	if (nvme_ctrlr_set_bprsel(ctrlr, &bprsel)) {
4975 		NVME_CTRLR_ERRLOG(ctrlr, "set_bprsel() failed\n");
4976 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
4977 		return -EIO;
4978 	}
4979 
4980 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
4981 	return 0;
4982 }
4983 
4984 int spdk_nvme_ctrlr_read_boot_partition_poll(struct spdk_nvme_ctrlr *ctrlr)
4985 {
4986 	int rc = 0;
4987 	union spdk_nvme_bpinfo_register bpinfo;
4988 
4989 	if (nvme_ctrlr_get_bpinfo(ctrlr, &bpinfo)) {
4990 		NVME_CTRLR_ERRLOG(ctrlr, "get bpinfo failed\n");
4991 		return -EIO;
4992 	}
4993 
4994 	switch (bpinfo.bits.brs) {
4995 	case SPDK_NVME_BRS_NO_READ:
4996 		NVME_CTRLR_ERRLOG(ctrlr, "Boot Partition read not initiated\n");
4997 		rc = -EINVAL;
4998 		break;
4999 	case SPDK_NVME_BRS_READ_IN_PROGRESS:
5000 		NVME_CTRLR_DEBUGLOG(ctrlr, "Boot Partition read in progress\n");
5001 		rc = -EAGAIN;
5002 		break;
5003 	case SPDK_NVME_BRS_READ_ERROR:
5004 		NVME_CTRLR_ERRLOG(ctrlr, "Error completing Boot Partition read\n");
5005 		rc = -EIO;
5006 		break;
5007 	case SPDK_NVME_BRS_READ_SUCCESS:
5008 		NVME_CTRLR_INFOLOG(ctrlr, "Boot Partition read completed successfully\n");
5009 		break;
5010 	default:
5011 		NVME_CTRLR_ERRLOG(ctrlr, "Invalid Boot Partition read status\n");
5012 		rc = -EINVAL;
5013 	}
5014 
5015 	return rc;
5016 }
5017 
5018 static void
5019 nvme_write_boot_partition_cb(void *arg, const struct spdk_nvme_cpl *cpl)
5020 {
5021 	int res;
5022 	struct spdk_nvme_ctrlr *ctrlr = arg;
5023 	struct spdk_nvme_fw_commit fw_commit;
5024 	struct spdk_nvme_cpl err_cpl =
5025 	{.status = {.sct = SPDK_NVME_SCT_GENERIC, .sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR }};
5026 
5027 	if (spdk_nvme_cpl_is_error(cpl)) {
5028 		NVME_CTRLR_ERRLOG(ctrlr, "Write Boot Partition failed\n");
5029 		ctrlr->bp_write_cb_fn(ctrlr->bp_write_cb_arg, cpl);
5030 		return;
5031 	}
5032 
5033 	if (ctrlr->bp_ws == SPDK_NVME_BP_WS_DOWNLOADING) {
5034 		NVME_CTRLR_DEBUGLOG(ctrlr, "Boot Partition Downloading at Offset %d Success\n", ctrlr->fw_offset);
5035 		ctrlr->fw_payload += ctrlr->fw_transfer_size;
5036 		ctrlr->fw_offset += ctrlr->fw_transfer_size;
5037 		ctrlr->fw_size_remaining -= ctrlr->fw_transfer_size;
5038 		ctrlr->fw_transfer_size = spdk_min(ctrlr->fw_size_remaining, ctrlr->min_page_size);
5039 		res = nvme_ctrlr_cmd_fw_image_download(ctrlr, ctrlr->fw_transfer_size, ctrlr->fw_offset,
5040 						       ctrlr->fw_payload, nvme_write_boot_partition_cb, ctrlr);
5041 		if (res) {
5042 			NVME_CTRLR_ERRLOG(ctrlr, "nvme_ctrlr_cmd_fw_image_download failed!\n");
5043 			ctrlr->bp_write_cb_fn(ctrlr->bp_write_cb_arg, &err_cpl);
5044 			return;
5045 		}
5046 
5047 		if (ctrlr->fw_transfer_size < ctrlr->min_page_size) {
5048 			ctrlr->bp_ws = SPDK_NVME_BP_WS_DOWNLOADED;
5049 		}
5050 	} else if (ctrlr->bp_ws == SPDK_NVME_BP_WS_DOWNLOADED) {
5051 		NVME_CTRLR_DEBUGLOG(ctrlr, "Boot Partition Download Success\n");
5052 		memset(&fw_commit, 0, sizeof(struct spdk_nvme_fw_commit));
5053 		fw_commit.bpid = ctrlr->bpid;
5054 		fw_commit.ca = SPDK_NVME_FW_COMMIT_REPLACE_BOOT_PARTITION;
5055 		res = nvme_ctrlr_cmd_fw_commit(ctrlr, &fw_commit,
5056 					       nvme_write_boot_partition_cb, ctrlr);
5057 		if (res) {
5058 			NVME_CTRLR_ERRLOG(ctrlr, "nvme_ctrlr_cmd_fw_commit failed!\n");
5059 			NVME_CTRLR_ERRLOG(ctrlr, "commit action: %d\n", fw_commit.ca);
5060 			ctrlr->bp_write_cb_fn(ctrlr->bp_write_cb_arg, &err_cpl);
5061 			return;
5062 		}
5063 
5064 		ctrlr->bp_ws = SPDK_NVME_BP_WS_REPLACE;
5065 	} else if (ctrlr->bp_ws == SPDK_NVME_BP_WS_REPLACE) {
5066 		NVME_CTRLR_DEBUGLOG(ctrlr, "Boot Partition Replacement Success\n");
5067 		memset(&fw_commit, 0, sizeof(struct spdk_nvme_fw_commit));
5068 		fw_commit.bpid = ctrlr->bpid;
5069 		fw_commit.ca = SPDK_NVME_FW_COMMIT_ACTIVATE_BOOT_PARTITION;
5070 		res = nvme_ctrlr_cmd_fw_commit(ctrlr, &fw_commit,
5071 					       nvme_write_boot_partition_cb, ctrlr);
5072 		if (res) {
5073 			NVME_CTRLR_ERRLOG(ctrlr, "nvme_ctrlr_cmd_fw_commit failed!\n");
5074 			NVME_CTRLR_ERRLOG(ctrlr, "commit action: %d\n", fw_commit.ca);
5075 			ctrlr->bp_write_cb_fn(ctrlr->bp_write_cb_arg, &err_cpl);
5076 			return;
5077 		}
5078 
5079 		ctrlr->bp_ws = SPDK_NVME_BP_WS_ACTIVATE;
5080 	} else if (ctrlr->bp_ws == SPDK_NVME_BP_WS_ACTIVATE) {
5081 		NVME_CTRLR_DEBUGLOG(ctrlr, "Boot Partition Activation Success\n");
5082 		ctrlr->bp_write_cb_fn(ctrlr->bp_write_cb_arg, cpl);
5083 	} else {
5084 		NVME_CTRLR_ERRLOG(ctrlr, "Invalid Boot Partition write state\n");
5085 		ctrlr->bp_write_cb_fn(ctrlr->bp_write_cb_arg, &err_cpl);
5086 		return;
5087 	}
5088 }
5089 
5090 int spdk_nvme_ctrlr_write_boot_partition(struct spdk_nvme_ctrlr *ctrlr,
5091 		void *payload, uint32_t size, uint32_t bpid,
5092 		spdk_nvme_cmd_cb cb_fn, void *cb_arg)
5093 {
5094 	int res;
5095 
5096 	if (ctrlr->cap.bits.bps == 0) {
5097 		return -ENOTSUP;
5098 	}
5099 
5100 	ctrlr->bp_ws = SPDK_NVME_BP_WS_DOWNLOADING;
5101 	ctrlr->bpid = bpid;
5102 	ctrlr->bp_write_cb_fn = cb_fn;
5103 	ctrlr->bp_write_cb_arg = cb_arg;
5104 	ctrlr->fw_offset = 0;
5105 	ctrlr->fw_size_remaining = size;
5106 	ctrlr->fw_payload = payload;
5107 	ctrlr->fw_transfer_size = spdk_min(ctrlr->fw_size_remaining, ctrlr->min_page_size);
5108 
5109 	res = nvme_ctrlr_cmd_fw_image_download(ctrlr, ctrlr->fw_transfer_size, ctrlr->fw_offset,
5110 					       ctrlr->fw_payload, nvme_write_boot_partition_cb, ctrlr);
5111 
5112 	return res;
5113 }
5114 
5115 bool
5116 spdk_nvme_ctrlr_is_discovery(struct spdk_nvme_ctrlr *ctrlr)
5117 {
5118 	assert(ctrlr);
5119 
5120 	return !strncmp(ctrlr->trid.subnqn, SPDK_NVMF_DISCOVERY_NQN,
5121 			strlen(SPDK_NVMF_DISCOVERY_NQN));
5122 }
5123 
5124 bool
5125 spdk_nvme_ctrlr_is_fabrics(struct spdk_nvme_ctrlr *ctrlr)
5126 {
5127 	assert(ctrlr);
5128 
5129 	return spdk_nvme_trtype_is_fabrics(ctrlr->trid.trtype);
5130 }
5131 
5132 int
5133 spdk_nvme_ctrlr_security_receive(struct spdk_nvme_ctrlr *ctrlr, uint8_t secp,
5134 				 uint16_t spsp, uint8_t nssf, void *payload, size_t size)
5135 {
5136 	struct nvme_completion_poll_status	*status;
5137 	int					res;
5138 
5139 	status = calloc(1, sizeof(*status));
5140 	if (!status) {
5141 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n");
5142 		return -ENOMEM;
5143 	}
5144 
5145 	res = spdk_nvme_ctrlr_cmd_security_receive(ctrlr, secp, spsp, nssf, payload, size,
5146 			nvme_completion_poll_cb, status);
5147 	if (res) {
5148 		free(status);
5149 		return res;
5150 	}
5151 	if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) {
5152 		NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_cmd_security_receive failed!\n");
5153 		if (!status->timed_out) {
5154 			free(status);
5155 		}
5156 		return -ENXIO;
5157 	}
5158 	free(status);
5159 
5160 	return 0;
5161 }
5162 
5163 int
5164 spdk_nvme_ctrlr_security_send(struct spdk_nvme_ctrlr *ctrlr, uint8_t secp,
5165 			      uint16_t spsp, uint8_t nssf, void *payload, size_t size)
5166 {
5167 	struct nvme_completion_poll_status	*status;
5168 	int					res;
5169 
5170 	status = calloc(1, sizeof(*status));
5171 	if (!status) {
5172 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n");
5173 		return -ENOMEM;
5174 	}
5175 
5176 	res = spdk_nvme_ctrlr_cmd_security_send(ctrlr, secp, spsp, nssf, payload, size,
5177 						nvme_completion_poll_cb,
5178 						status);
5179 	if (res) {
5180 		free(status);
5181 		return res;
5182 	}
5183 	if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) {
5184 		NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_cmd_security_send failed!\n");
5185 		if (!status->timed_out) {
5186 			free(status);
5187 		}
5188 		return -ENXIO;
5189 	}
5190 
5191 	free(status);
5192 
5193 	return 0;
5194 }
5195 
5196 uint64_t
5197 spdk_nvme_ctrlr_get_flags(struct spdk_nvme_ctrlr *ctrlr)
5198 {
5199 	return ctrlr->flags;
5200 }
5201 
5202 const struct spdk_nvme_transport_id *
5203 spdk_nvme_ctrlr_get_transport_id(struct spdk_nvme_ctrlr *ctrlr)
5204 {
5205 	return &ctrlr->trid;
5206 }
5207 
5208 int32_t
5209 spdk_nvme_ctrlr_alloc_qid(struct spdk_nvme_ctrlr *ctrlr)
5210 {
5211 	uint32_t qid;
5212 
5213 	assert(ctrlr->free_io_qids);
5214 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
5215 	qid = spdk_bit_array_find_first_set(ctrlr->free_io_qids, 1);
5216 	if (qid > ctrlr->opts.num_io_queues) {
5217 		NVME_CTRLR_ERRLOG(ctrlr, "No free I/O queue IDs\n");
5218 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
5219 		return -1;
5220 	}
5221 
5222 	spdk_bit_array_clear(ctrlr->free_io_qids, qid);
5223 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
5224 	return qid;
5225 }
5226 
5227 void
5228 spdk_nvme_ctrlr_free_qid(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid)
5229 {
5230 	assert(qid <= ctrlr->opts.num_io_queues);
5231 
5232 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
5233 
5234 	if (spdk_likely(ctrlr->free_io_qids)) {
5235 		spdk_bit_array_set(ctrlr->free_io_qids, qid);
5236 	}
5237 
5238 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
5239 }
5240 
5241 int
5242 spdk_nvme_ctrlr_get_memory_domains(const struct spdk_nvme_ctrlr *ctrlr,
5243 				   struct spdk_memory_domain **domains, int array_size)
5244 {
5245 	return nvme_transport_ctrlr_get_memory_domains(ctrlr, domains, array_size);
5246 }
5247