xref: /spdk/lib/nvme/nvme_ctrlr.c (revision 07d28d02f73bbcd7732a5421bcaebfb067b46ca0)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (C) 2015 Intel Corporation. All rights reserved.
3  *   Copyright (c) 2019-2021 Mellanox Technologies LTD. All rights reserved.
4  *   Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
5  */
6 
7 #include "spdk/stdinc.h"
8 
9 #include "nvme_internal.h"
10 #include "nvme_io_msg.h"
11 
12 #include "spdk/env.h"
13 #include "spdk/string.h"
14 #include "spdk/endian.h"
15 
16 struct nvme_active_ns_ctx;
17 
18 static int nvme_ctrlr_construct_and_submit_aer(struct spdk_nvme_ctrlr *ctrlr,
19 		struct nvme_async_event_request *aer);
20 static void nvme_ctrlr_identify_active_ns_async(struct nvme_active_ns_ctx *ctx);
21 static int nvme_ctrlr_identify_ns_async(struct spdk_nvme_ns *ns);
22 static int nvme_ctrlr_identify_ns_iocs_specific_async(struct spdk_nvme_ns *ns);
23 static int nvme_ctrlr_identify_id_desc_async(struct spdk_nvme_ns *ns);
24 static void nvme_ctrlr_init_cap(struct spdk_nvme_ctrlr *ctrlr);
25 static void nvme_ctrlr_set_state(struct spdk_nvme_ctrlr *ctrlr, enum nvme_ctrlr_state state,
26 				 uint64_t timeout_in_ms);
27 
28 static int
29 nvme_ns_cmp(struct spdk_nvme_ns *ns1, struct spdk_nvme_ns *ns2)
30 {
31 	if (ns1->id < ns2->id) {
32 		return -1;
33 	} else if (ns1->id > ns2->id) {
34 		return 1;
35 	} else {
36 		return 0;
37 	}
38 }
39 
40 RB_GENERATE_STATIC(nvme_ns_tree, spdk_nvme_ns, node, nvme_ns_cmp);
41 
42 #define CTRLR_STRING(ctrlr) \
43 	((ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_TCP || ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_RDMA) ? \
44 	ctrlr->trid.subnqn : ctrlr->trid.traddr)
45 
46 #define NVME_CTRLR_ERRLOG(ctrlr, format, ...) \
47 	SPDK_ERRLOG("[%s] " format, CTRLR_STRING(ctrlr), ##__VA_ARGS__);
48 
49 #define NVME_CTRLR_WARNLOG(ctrlr, format, ...) \
50 	SPDK_WARNLOG("[%s] " format, CTRLR_STRING(ctrlr), ##__VA_ARGS__);
51 
52 #define NVME_CTRLR_NOTICELOG(ctrlr, format, ...) \
53 	SPDK_NOTICELOG("[%s] " format, CTRLR_STRING(ctrlr), ##__VA_ARGS__);
54 
55 #define NVME_CTRLR_INFOLOG(ctrlr, format, ...) \
56 	SPDK_INFOLOG(nvme, "[%s] " format, CTRLR_STRING(ctrlr), ##__VA_ARGS__);
57 
58 #ifdef DEBUG
59 #define NVME_CTRLR_DEBUGLOG(ctrlr, format, ...) \
60 	SPDK_DEBUGLOG(nvme, "[%s] " format, CTRLR_STRING(ctrlr), ##__VA_ARGS__);
61 #else
62 #define NVME_CTRLR_DEBUGLOG(ctrlr, ...) do { } while (0)
63 #endif
64 
65 #define nvme_ctrlr_get_reg_async(ctrlr, reg, sz, cb_fn, cb_arg) \
66 	nvme_transport_ctrlr_get_reg_ ## sz ## _async(ctrlr, \
67 		offsetof(struct spdk_nvme_registers, reg), cb_fn, cb_arg)
68 
69 #define nvme_ctrlr_set_reg_async(ctrlr, reg, sz, val, cb_fn, cb_arg) \
70 	nvme_transport_ctrlr_set_reg_ ## sz ## _async(ctrlr, \
71 		offsetof(struct spdk_nvme_registers, reg), val, cb_fn, cb_arg)
72 
73 #define nvme_ctrlr_get_cc_async(ctrlr, cb_fn, cb_arg) \
74 	nvme_ctrlr_get_reg_async(ctrlr, cc, 4, cb_fn, cb_arg)
75 
76 #define nvme_ctrlr_get_csts_async(ctrlr, cb_fn, cb_arg) \
77 	nvme_ctrlr_get_reg_async(ctrlr, csts, 4, cb_fn, cb_arg)
78 
79 #define nvme_ctrlr_get_cap_async(ctrlr, cb_fn, cb_arg) \
80 	nvme_ctrlr_get_reg_async(ctrlr, cap, 8, cb_fn, cb_arg)
81 
82 #define nvme_ctrlr_get_vs_async(ctrlr, cb_fn, cb_arg) \
83 	nvme_ctrlr_get_reg_async(ctrlr, vs, 4, cb_fn, cb_arg)
84 
85 #define nvme_ctrlr_set_cc_async(ctrlr, value, cb_fn, cb_arg) \
86 	nvme_ctrlr_set_reg_async(ctrlr, cc, 4, value, cb_fn, cb_arg)
87 
88 static int
89 nvme_ctrlr_get_cc(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cc_register *cc)
90 {
91 	return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, cc.raw),
92 					      &cc->raw);
93 }
94 
95 static int
96 nvme_ctrlr_get_csts(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_csts_register *csts)
97 {
98 	return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, csts.raw),
99 					      &csts->raw);
100 }
101 
102 int
103 nvme_ctrlr_get_cap(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cap_register *cap)
104 {
105 	return nvme_transport_ctrlr_get_reg_8(ctrlr, offsetof(struct spdk_nvme_registers, cap.raw),
106 					      &cap->raw);
107 }
108 
109 int
110 nvme_ctrlr_get_vs(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_vs_register *vs)
111 {
112 	return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, vs.raw),
113 					      &vs->raw);
114 }
115 
116 int
117 nvme_ctrlr_get_cmbsz(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cmbsz_register *cmbsz)
118 {
119 	return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, cmbsz.raw),
120 					      &cmbsz->raw);
121 }
122 
123 int
124 nvme_ctrlr_get_pmrcap(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_pmrcap_register *pmrcap)
125 {
126 	return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, pmrcap.raw),
127 					      &pmrcap->raw);
128 }
129 
130 int
131 nvme_ctrlr_get_bpinfo(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_bpinfo_register *bpinfo)
132 {
133 	return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, bpinfo.raw),
134 					      &bpinfo->raw);
135 }
136 
137 int
138 nvme_ctrlr_set_bprsel(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_bprsel_register *bprsel)
139 {
140 	return nvme_transport_ctrlr_set_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, bprsel.raw),
141 					      bprsel->raw);
142 }
143 
144 int
145 nvme_ctrlr_set_bpmbl(struct spdk_nvme_ctrlr *ctrlr, uint64_t bpmbl_value)
146 {
147 	return nvme_transport_ctrlr_set_reg_8(ctrlr, offsetof(struct spdk_nvme_registers, bpmbl),
148 					      bpmbl_value);
149 }
150 
151 static int
152 nvme_ctrlr_set_nssr(struct spdk_nvme_ctrlr *ctrlr, uint32_t nssr_value)
153 {
154 	return nvme_transport_ctrlr_set_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, nssr),
155 					      nssr_value);
156 }
157 
158 bool
159 nvme_ctrlr_multi_iocs_enabled(struct spdk_nvme_ctrlr *ctrlr)
160 {
161 	return ctrlr->cap.bits.css & SPDK_NVME_CAP_CSS_IOCS &&
162 	       ctrlr->opts.command_set == SPDK_NVME_CC_CSS_IOCS;
163 }
164 
165 /* When the field in spdk_nvme_ctrlr_opts are changed and you change this function, please
166  * also update the nvme_ctrl_opts_init function in nvme_ctrlr.c
167  */
168 void
169 spdk_nvme_ctrlr_get_default_ctrlr_opts(struct spdk_nvme_ctrlr_opts *opts, size_t opts_size)
170 {
171 	char host_id_str[SPDK_UUID_STRING_LEN];
172 
173 	assert(opts);
174 
175 	opts->opts_size = opts_size;
176 
177 #define FIELD_OK(field) \
178 	offsetof(struct spdk_nvme_ctrlr_opts, field) + sizeof(opts->field) <= opts_size
179 
180 #define SET_FIELD(field, value) \
181 	if (offsetof(struct spdk_nvme_ctrlr_opts, field) + sizeof(opts->field) <= opts_size) { \
182 		opts->field = value; \
183 	} \
184 
185 	SET_FIELD(num_io_queues, DEFAULT_MAX_IO_QUEUES);
186 	SET_FIELD(use_cmb_sqs, false);
187 	SET_FIELD(no_shn_notification, false);
188 	SET_FIELD(arb_mechanism, SPDK_NVME_CC_AMS_RR);
189 	SET_FIELD(arbitration_burst, 0);
190 	SET_FIELD(low_priority_weight, 0);
191 	SET_FIELD(medium_priority_weight, 0);
192 	SET_FIELD(high_priority_weight, 0);
193 	SET_FIELD(keep_alive_timeout_ms, MIN_KEEP_ALIVE_TIMEOUT_IN_MS);
194 	SET_FIELD(transport_retry_count, SPDK_NVME_DEFAULT_RETRY_COUNT);
195 	SET_FIELD(io_queue_size, DEFAULT_IO_QUEUE_SIZE);
196 
197 	if (nvme_driver_init() == 0) {
198 		if (FIELD_OK(hostnqn)) {
199 			spdk_uuid_fmt_lower(host_id_str, sizeof(host_id_str),
200 					    &g_spdk_nvme_driver->default_extended_host_id);
201 			snprintf(opts->hostnqn, sizeof(opts->hostnqn),
202 				 "nqn.2014-08.org.nvmexpress:uuid:%s", host_id_str);
203 		}
204 
205 		if (FIELD_OK(extended_host_id)) {
206 			memcpy(opts->extended_host_id, &g_spdk_nvme_driver->default_extended_host_id,
207 			       sizeof(opts->extended_host_id));
208 		}
209 
210 	}
211 
212 	SET_FIELD(io_queue_requests, DEFAULT_IO_QUEUE_REQUESTS);
213 
214 	if (FIELD_OK(src_addr)) {
215 		memset(opts->src_addr, 0, sizeof(opts->src_addr));
216 	}
217 
218 	if (FIELD_OK(src_svcid)) {
219 		memset(opts->src_svcid, 0, sizeof(opts->src_svcid));
220 	}
221 
222 	if (FIELD_OK(host_id)) {
223 		memset(opts->host_id, 0, sizeof(opts->host_id));
224 	}
225 
226 	SET_FIELD(command_set, CHAR_BIT);
227 	SET_FIELD(admin_timeout_ms, NVME_MAX_ADMIN_TIMEOUT_IN_SECS * 1000);
228 	SET_FIELD(header_digest, false);
229 	SET_FIELD(data_digest, false);
230 	SET_FIELD(disable_error_logging, false);
231 	SET_FIELD(transport_ack_timeout, SPDK_NVME_DEFAULT_TRANSPORT_ACK_TIMEOUT);
232 	SET_FIELD(admin_queue_size, DEFAULT_ADMIN_QUEUE_SIZE);
233 	SET_FIELD(fabrics_connect_timeout_us, NVME_FABRIC_CONNECT_COMMAND_TIMEOUT);
234 	SET_FIELD(disable_read_ana_log_page, false);
235 	SET_FIELD(disable_read_changed_ns_list_log_page, false);
236 
237 	if (FIELD_OK(psk)) {
238 		memset(opts->psk, 0, sizeof(opts->psk));
239 	}
240 
241 #undef FIELD_OK
242 #undef SET_FIELD
243 }
244 
245 const struct spdk_nvme_ctrlr_opts *
246 spdk_nvme_ctrlr_get_opts(struct spdk_nvme_ctrlr *ctrlr)
247 {
248 	return &ctrlr->opts;
249 }
250 
251 /**
252  * This function will be called when the process allocates the IO qpair.
253  * Note: the ctrlr_lock must be held when calling this function.
254  */
255 static void
256 nvme_ctrlr_proc_add_io_qpair(struct spdk_nvme_qpair *qpair)
257 {
258 	struct spdk_nvme_ctrlr_process	*active_proc;
259 	struct spdk_nvme_ctrlr		*ctrlr = qpair->ctrlr;
260 
261 	active_proc = nvme_ctrlr_get_current_process(ctrlr);
262 	if (active_proc) {
263 		TAILQ_INSERT_TAIL(&active_proc->allocated_io_qpairs, qpair, per_process_tailq);
264 		qpair->active_proc = active_proc;
265 	}
266 }
267 
268 /**
269  * This function will be called when the process frees the IO qpair.
270  * Note: the ctrlr_lock must be held when calling this function.
271  */
272 static void
273 nvme_ctrlr_proc_remove_io_qpair(struct spdk_nvme_qpair *qpair)
274 {
275 	struct spdk_nvme_ctrlr_process	*active_proc;
276 	struct spdk_nvme_ctrlr		*ctrlr = qpair->ctrlr;
277 	struct spdk_nvme_qpair          *active_qpair, *tmp_qpair;
278 
279 	active_proc = nvme_ctrlr_get_current_process(ctrlr);
280 	if (!active_proc) {
281 		return;
282 	}
283 
284 	TAILQ_FOREACH_SAFE(active_qpair, &active_proc->allocated_io_qpairs,
285 			   per_process_tailq, tmp_qpair) {
286 		if (active_qpair == qpair) {
287 			TAILQ_REMOVE(&active_proc->allocated_io_qpairs,
288 				     active_qpair, per_process_tailq);
289 
290 			break;
291 		}
292 	}
293 }
294 
295 void
296 spdk_nvme_ctrlr_get_default_io_qpair_opts(struct spdk_nvme_ctrlr *ctrlr,
297 		struct spdk_nvme_io_qpair_opts *opts,
298 		size_t opts_size)
299 {
300 	assert(ctrlr);
301 
302 	assert(opts);
303 
304 	memset(opts, 0, opts_size);
305 
306 #define FIELD_OK(field) \
307 	offsetof(struct spdk_nvme_io_qpair_opts, field) + sizeof(opts->field) <= opts_size
308 
309 	if (FIELD_OK(qprio)) {
310 		opts->qprio = SPDK_NVME_QPRIO_URGENT;
311 	}
312 
313 	if (FIELD_OK(io_queue_size)) {
314 		opts->io_queue_size = ctrlr->opts.io_queue_size;
315 	}
316 
317 	if (FIELD_OK(io_queue_requests)) {
318 		opts->io_queue_requests = ctrlr->opts.io_queue_requests;
319 	}
320 
321 	if (FIELD_OK(delay_cmd_submit)) {
322 		opts->delay_cmd_submit = false;
323 	}
324 
325 	if (FIELD_OK(sq.vaddr)) {
326 		opts->sq.vaddr = NULL;
327 	}
328 
329 	if (FIELD_OK(sq.paddr)) {
330 		opts->sq.paddr = 0;
331 	}
332 
333 	if (FIELD_OK(sq.buffer_size)) {
334 		opts->sq.buffer_size = 0;
335 	}
336 
337 	if (FIELD_OK(cq.vaddr)) {
338 		opts->cq.vaddr = NULL;
339 	}
340 
341 	if (FIELD_OK(cq.paddr)) {
342 		opts->cq.paddr = 0;
343 	}
344 
345 	if (FIELD_OK(cq.buffer_size)) {
346 		opts->cq.buffer_size = 0;
347 	}
348 
349 	if (FIELD_OK(create_only)) {
350 		opts->create_only = false;
351 	}
352 
353 	if (FIELD_OK(async_mode)) {
354 		opts->async_mode = false;
355 	}
356 
357 #undef FIELD_OK
358 }
359 
360 static struct spdk_nvme_qpair *
361 nvme_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr,
362 			   const struct spdk_nvme_io_qpair_opts *opts)
363 {
364 	int32_t					qid;
365 	struct spdk_nvme_qpair			*qpair;
366 	union spdk_nvme_cc_register		cc;
367 
368 	if (!ctrlr) {
369 		return NULL;
370 	}
371 
372 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
373 	cc.raw = ctrlr->process_init_cc.raw;
374 
375 	if (opts->qprio & ~SPDK_NVME_CREATE_IO_SQ_QPRIO_MASK) {
376 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
377 		return NULL;
378 	}
379 
380 	/*
381 	 * Only value SPDK_NVME_QPRIO_URGENT(0) is valid for the
382 	 * default round robin arbitration method.
383 	 */
384 	if ((cc.bits.ams == SPDK_NVME_CC_AMS_RR) && (opts->qprio != SPDK_NVME_QPRIO_URGENT)) {
385 		NVME_CTRLR_ERRLOG(ctrlr, "invalid queue priority for default round robin arbitration method\n");
386 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
387 		return NULL;
388 	}
389 
390 	qid = spdk_nvme_ctrlr_alloc_qid(ctrlr);
391 	if (qid < 0) {
392 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
393 		return NULL;
394 	}
395 
396 	qpair = nvme_transport_ctrlr_create_io_qpair(ctrlr, qid, opts);
397 	if (qpair == NULL) {
398 		NVME_CTRLR_ERRLOG(ctrlr, "nvme_transport_ctrlr_create_io_qpair() failed\n");
399 		spdk_nvme_ctrlr_free_qid(ctrlr, qid);
400 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
401 		return NULL;
402 	}
403 
404 	TAILQ_INSERT_TAIL(&ctrlr->active_io_qpairs, qpair, tailq);
405 
406 	nvme_ctrlr_proc_add_io_qpair(qpair);
407 
408 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
409 
410 	return qpair;
411 }
412 
413 int
414 spdk_nvme_ctrlr_connect_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
415 {
416 	int rc;
417 
418 	if (nvme_qpair_get_state(qpair) != NVME_QPAIR_DISCONNECTED) {
419 		return -EISCONN;
420 	}
421 
422 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
423 	rc = nvme_transport_ctrlr_connect_qpair(ctrlr, qpair);
424 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
425 
426 	if (ctrlr->quirks & NVME_QUIRK_DELAY_AFTER_QUEUE_ALLOC) {
427 		spdk_delay_us(100);
428 	}
429 
430 	return rc;
431 }
432 
433 void
434 spdk_nvme_ctrlr_disconnect_io_qpair(struct spdk_nvme_qpair *qpair)
435 {
436 	struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr;
437 
438 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
439 	nvme_transport_ctrlr_disconnect_qpair(ctrlr, qpair);
440 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
441 }
442 
443 struct spdk_nvme_qpair *
444 spdk_nvme_ctrlr_alloc_io_qpair(struct spdk_nvme_ctrlr *ctrlr,
445 			       const struct spdk_nvme_io_qpair_opts *user_opts,
446 			       size_t opts_size)
447 {
448 
449 	struct spdk_nvme_qpair		*qpair;
450 	struct spdk_nvme_io_qpair_opts	opts;
451 	int				rc;
452 
453 	if (spdk_unlikely(ctrlr->state != NVME_CTRLR_STATE_READY)) {
454 		/* When controller is resetting or initializing, free_io_qids is deleted or not created yet.
455 		 * We can't create IO qpair in that case */
456 		return NULL;
457 	}
458 
459 	/*
460 	 * Get the default options, then overwrite them with the user-provided options
461 	 * up to opts_size.
462 	 *
463 	 * This allows for extensions of the opts structure without breaking
464 	 * ABI compatibility.
465 	 */
466 	spdk_nvme_ctrlr_get_default_io_qpair_opts(ctrlr, &opts, sizeof(opts));
467 	if (user_opts) {
468 		memcpy(&opts, user_opts, spdk_min(sizeof(opts), opts_size));
469 
470 		/* If user passes buffers, make sure they're big enough for the requested queue size */
471 		if (opts.sq.vaddr) {
472 			if (opts.sq.buffer_size < (opts.io_queue_size * sizeof(struct spdk_nvme_cmd))) {
473 				NVME_CTRLR_ERRLOG(ctrlr, "sq buffer size %" PRIx64 " is too small for sq size %zx\n",
474 						  opts.sq.buffer_size, (opts.io_queue_size * sizeof(struct spdk_nvme_cmd)));
475 				return NULL;
476 			}
477 		}
478 		if (opts.cq.vaddr) {
479 			if (opts.cq.buffer_size < (opts.io_queue_size * sizeof(struct spdk_nvme_cpl))) {
480 				NVME_CTRLR_ERRLOG(ctrlr, "cq buffer size %" PRIx64 " is too small for cq size %zx\n",
481 						  opts.cq.buffer_size, (opts.io_queue_size * sizeof(struct spdk_nvme_cpl)));
482 				return NULL;
483 			}
484 		}
485 	}
486 
487 	qpair = nvme_ctrlr_create_io_qpair(ctrlr, &opts);
488 
489 	if (qpair == NULL || opts.create_only == true) {
490 		return qpair;
491 	}
492 
493 	rc = spdk_nvme_ctrlr_connect_io_qpair(ctrlr, qpair);
494 	if (rc != 0) {
495 		NVME_CTRLR_ERRLOG(ctrlr, "nvme_transport_ctrlr_connect_io_qpair() failed\n");
496 		nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
497 		nvme_ctrlr_proc_remove_io_qpair(qpair);
498 		TAILQ_REMOVE(&ctrlr->active_io_qpairs, qpair, tailq);
499 		spdk_bit_array_set(ctrlr->free_io_qids, qpair->id);
500 		nvme_transport_ctrlr_delete_io_qpair(ctrlr, qpair);
501 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
502 		return NULL;
503 	}
504 
505 	return qpair;
506 }
507 
508 int
509 spdk_nvme_ctrlr_reconnect_io_qpair(struct spdk_nvme_qpair *qpair)
510 {
511 	struct spdk_nvme_ctrlr *ctrlr;
512 	enum nvme_qpair_state qpair_state;
513 	int rc;
514 
515 	assert(qpair != NULL);
516 	assert(nvme_qpair_is_admin_queue(qpair) == false);
517 	assert(qpair->ctrlr != NULL);
518 
519 	ctrlr = qpair->ctrlr;
520 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
521 	qpair_state = nvme_qpair_get_state(qpair);
522 
523 	if (ctrlr->is_removed) {
524 		rc = -ENODEV;
525 		goto out;
526 	}
527 
528 	if (ctrlr->is_resetting || qpair_state == NVME_QPAIR_DISCONNECTING) {
529 		rc = -EAGAIN;
530 		goto out;
531 	}
532 
533 	if (ctrlr->is_failed || qpair_state == NVME_QPAIR_DESTROYING) {
534 		rc = -ENXIO;
535 		goto out;
536 	}
537 
538 	if (qpair_state != NVME_QPAIR_DISCONNECTED) {
539 		rc = 0;
540 		goto out;
541 	}
542 
543 	rc = nvme_transport_ctrlr_connect_qpair(ctrlr, qpair);
544 	if (rc) {
545 		rc = -EAGAIN;
546 		goto out;
547 	}
548 
549 out:
550 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
551 	return rc;
552 }
553 
554 spdk_nvme_qp_failure_reason
555 spdk_nvme_ctrlr_get_admin_qp_failure_reason(struct spdk_nvme_ctrlr *ctrlr)
556 {
557 	return ctrlr->adminq->transport_failure_reason;
558 }
559 
560 /*
561  * This internal function will attempt to take the controller
562  * lock before calling disconnect on a controller qpair.
563  * Functions already holding the controller lock should
564  * call nvme_transport_ctrlr_disconnect_qpair directly.
565  */
566 void
567 nvme_ctrlr_disconnect_qpair(struct spdk_nvme_qpair *qpair)
568 {
569 	struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr;
570 
571 	assert(ctrlr != NULL);
572 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
573 	nvme_transport_ctrlr_disconnect_qpair(ctrlr, qpair);
574 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
575 }
576 
577 int
578 spdk_nvme_ctrlr_free_io_qpair(struct spdk_nvme_qpair *qpair)
579 {
580 	struct spdk_nvme_ctrlr *ctrlr;
581 
582 	if (qpair == NULL) {
583 		return 0;
584 	}
585 
586 	ctrlr = qpair->ctrlr;
587 
588 	if (qpair->in_completion_context) {
589 		/*
590 		 * There are many cases where it is convenient to delete an io qpair in the context
591 		 *  of that qpair's completion routine.  To handle this properly, set a flag here
592 		 *  so that the completion routine will perform an actual delete after the context
593 		 *  unwinds.
594 		 */
595 		qpair->delete_after_completion_context = 1;
596 		return 0;
597 	}
598 
599 	qpair->destroy_in_progress = 1;
600 
601 	nvme_transport_ctrlr_disconnect_qpair(ctrlr, qpair);
602 
603 	if (qpair->poll_group && (qpair->active_proc == nvme_ctrlr_get_current_process(ctrlr))) {
604 		spdk_nvme_poll_group_remove(qpair->poll_group->group, qpair);
605 	}
606 
607 	/* Do not retry. */
608 	nvme_qpair_set_state(qpair, NVME_QPAIR_DESTROYING);
609 
610 	/* In the multi-process case, a process may call this function on a foreign
611 	 * I/O qpair (i.e. one that this process did not create) when that qpairs process
612 	 * exits unexpectedly.  In that case, we must not try to abort any reqs associated
613 	 * with that qpair, since the callbacks will also be foreign to this process.
614 	 */
615 	if (qpair->active_proc == nvme_ctrlr_get_current_process(ctrlr)) {
616 		nvme_qpair_abort_all_queued_reqs(qpair);
617 	}
618 
619 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
620 
621 	nvme_ctrlr_proc_remove_io_qpair(qpair);
622 
623 	TAILQ_REMOVE(&ctrlr->active_io_qpairs, qpair, tailq);
624 	spdk_nvme_ctrlr_free_qid(ctrlr, qpair->id);
625 
626 	nvme_transport_ctrlr_delete_io_qpair(ctrlr, qpair);
627 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
628 	return 0;
629 }
630 
631 static void
632 nvme_ctrlr_construct_intel_support_log_page_list(struct spdk_nvme_ctrlr *ctrlr,
633 		struct spdk_nvme_intel_log_page_directory *log_page_directory)
634 {
635 	if (log_page_directory == NULL) {
636 		return;
637 	}
638 
639 	assert(ctrlr->cdata.vid == SPDK_PCI_VID_INTEL);
640 
641 	ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_PAGE_DIRECTORY] = true;
642 
643 	if (log_page_directory->read_latency_log_len ||
644 	    (ctrlr->quirks & NVME_INTEL_QUIRK_READ_LATENCY)) {
645 		ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_READ_CMD_LATENCY] = true;
646 	}
647 	if (log_page_directory->write_latency_log_len ||
648 	    (ctrlr->quirks & NVME_INTEL_QUIRK_WRITE_LATENCY)) {
649 		ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_WRITE_CMD_LATENCY] = true;
650 	}
651 	if (log_page_directory->temperature_statistics_log_len) {
652 		ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_TEMPERATURE] = true;
653 	}
654 	if (log_page_directory->smart_log_len) {
655 		ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_SMART] = true;
656 	}
657 	if (log_page_directory->marketing_description_log_len) {
658 		ctrlr->log_page_supported[SPDK_NVME_INTEL_MARKETING_DESCRIPTION] = true;
659 	}
660 }
661 
662 struct intel_log_pages_ctx {
663 	struct spdk_nvme_intel_log_page_directory log_page_directory;
664 	struct spdk_nvme_ctrlr *ctrlr;
665 };
666 
667 static void
668 nvme_ctrlr_set_intel_support_log_pages_done(void *arg, const struct spdk_nvme_cpl *cpl)
669 {
670 	struct intel_log_pages_ctx *ctx = arg;
671 	struct spdk_nvme_ctrlr *ctrlr = ctx->ctrlr;
672 
673 	if (!spdk_nvme_cpl_is_error(cpl)) {
674 		nvme_ctrlr_construct_intel_support_log_page_list(ctrlr, &ctx->log_page_directory);
675 	}
676 
677 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES,
678 			     ctrlr->opts.admin_timeout_ms);
679 	free(ctx);
680 }
681 
682 static int
683 nvme_ctrlr_set_intel_support_log_pages(struct spdk_nvme_ctrlr *ctrlr)
684 {
685 	int rc = 0;
686 	struct intel_log_pages_ctx *ctx;
687 
688 	ctx = calloc(1, sizeof(*ctx));
689 	if (!ctx) {
690 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES,
691 				     ctrlr->opts.admin_timeout_ms);
692 		return 0;
693 	}
694 
695 	ctx->ctrlr = ctrlr;
696 
697 	rc = spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_INTEL_LOG_PAGE_DIRECTORY,
698 					      SPDK_NVME_GLOBAL_NS_TAG, &ctx->log_page_directory,
699 					      sizeof(struct spdk_nvme_intel_log_page_directory),
700 					      0, nvme_ctrlr_set_intel_support_log_pages_done, ctx);
701 	if (rc != 0) {
702 		free(ctx);
703 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES,
704 				     ctrlr->opts.admin_timeout_ms);
705 		return 0;
706 	}
707 
708 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_SUPPORTED_INTEL_LOG_PAGES,
709 			     ctrlr->opts.admin_timeout_ms);
710 
711 	return 0;
712 }
713 
714 static int
715 nvme_ctrlr_alloc_ana_log_page(struct spdk_nvme_ctrlr *ctrlr)
716 {
717 	uint32_t ana_log_page_size;
718 
719 	ana_log_page_size = sizeof(struct spdk_nvme_ana_page) + ctrlr->cdata.nanagrpid *
720 			    sizeof(struct spdk_nvme_ana_group_descriptor) + ctrlr->active_ns_count *
721 			    sizeof(uint32_t);
722 
723 	/* Number of active namespaces may have changed.
724 	 * Check if ANA log page fits into existing buffer.
725 	 */
726 	if (ana_log_page_size > ctrlr->ana_log_page_size) {
727 		void *new_buffer;
728 
729 		if (ctrlr->ana_log_page) {
730 			new_buffer = realloc(ctrlr->ana_log_page, ana_log_page_size);
731 		} else {
732 			new_buffer = calloc(1, ana_log_page_size);
733 		}
734 
735 		if (!new_buffer) {
736 			NVME_CTRLR_ERRLOG(ctrlr, "could not allocate ANA log page buffer, size %u\n",
737 					  ana_log_page_size);
738 			return -ENXIO;
739 		}
740 
741 		ctrlr->ana_log_page = new_buffer;
742 		if (ctrlr->copied_ana_desc) {
743 			new_buffer = realloc(ctrlr->copied_ana_desc, ana_log_page_size);
744 		} else {
745 			new_buffer = calloc(1, ana_log_page_size);
746 		}
747 
748 		if (!new_buffer) {
749 			NVME_CTRLR_ERRLOG(ctrlr, "could not allocate a buffer to parse ANA descriptor, size %u\n",
750 					  ana_log_page_size);
751 			return -ENOMEM;
752 		}
753 
754 		ctrlr->copied_ana_desc = new_buffer;
755 		ctrlr->ana_log_page_size = ana_log_page_size;
756 	}
757 
758 	return 0;
759 }
760 
761 static int
762 nvme_ctrlr_update_ana_log_page(struct spdk_nvme_ctrlr *ctrlr)
763 {
764 	struct nvme_completion_poll_status *status;
765 	int rc;
766 
767 	rc = nvme_ctrlr_alloc_ana_log_page(ctrlr);
768 	if (rc != 0) {
769 		return rc;
770 	}
771 
772 	status = calloc(1, sizeof(*status));
773 	if (status == NULL) {
774 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n");
775 		return -ENOMEM;
776 	}
777 
778 	rc = spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS,
779 					      SPDK_NVME_GLOBAL_NS_TAG, ctrlr->ana_log_page,
780 					      ctrlr->ana_log_page_size, 0,
781 					      nvme_completion_poll_cb, status);
782 	if (rc != 0) {
783 		free(status);
784 		return rc;
785 	}
786 
787 	if (nvme_wait_for_completion_robust_lock_timeout(ctrlr->adminq, status, &ctrlr->ctrlr_lock,
788 			ctrlr->opts.admin_timeout_ms * 1000)) {
789 		if (!status->timed_out) {
790 			free(status);
791 		}
792 		return -EIO;
793 	}
794 
795 	free(status);
796 	return 0;
797 }
798 
799 static int
800 nvme_ctrlr_update_ns_ana_states(const struct spdk_nvme_ana_group_descriptor *desc,
801 				void *cb_arg)
802 {
803 	struct spdk_nvme_ctrlr *ctrlr = cb_arg;
804 	struct spdk_nvme_ns *ns;
805 	uint32_t i, nsid;
806 
807 	for (i = 0; i < desc->num_of_nsid; i++) {
808 		nsid = desc->nsid[i];
809 		if (nsid == 0 || nsid > ctrlr->cdata.nn) {
810 			continue;
811 		}
812 
813 		ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
814 		assert(ns != NULL);
815 
816 		ns->ana_group_id = desc->ana_group_id;
817 		ns->ana_state = desc->ana_state;
818 	}
819 
820 	return 0;
821 }
822 
823 int
824 nvme_ctrlr_parse_ana_log_page(struct spdk_nvme_ctrlr *ctrlr,
825 			      spdk_nvme_parse_ana_log_page_cb cb_fn, void *cb_arg)
826 {
827 	struct spdk_nvme_ana_group_descriptor *copied_desc;
828 	uint8_t *orig_desc;
829 	uint32_t i, desc_size, copy_len;
830 	int rc = 0;
831 
832 	if (ctrlr->ana_log_page == NULL) {
833 		return -EINVAL;
834 	}
835 
836 	copied_desc = ctrlr->copied_ana_desc;
837 
838 	orig_desc = (uint8_t *)ctrlr->ana_log_page + sizeof(struct spdk_nvme_ana_page);
839 	copy_len = ctrlr->ana_log_page_size - sizeof(struct spdk_nvme_ana_page);
840 
841 	for (i = 0; i < ctrlr->ana_log_page->num_ana_group_desc; i++) {
842 		memcpy(copied_desc, orig_desc, copy_len);
843 
844 		rc = cb_fn(copied_desc, cb_arg);
845 		if (rc != 0) {
846 			break;
847 		}
848 
849 		desc_size = sizeof(struct spdk_nvme_ana_group_descriptor) +
850 			    copied_desc->num_of_nsid * sizeof(uint32_t);
851 		orig_desc += desc_size;
852 		copy_len -= desc_size;
853 	}
854 
855 	return rc;
856 }
857 
858 static int
859 nvme_ctrlr_set_supported_log_pages(struct spdk_nvme_ctrlr *ctrlr)
860 {
861 	int	rc = 0;
862 
863 	memset(ctrlr->log_page_supported, 0, sizeof(ctrlr->log_page_supported));
864 	/* Mandatory pages */
865 	ctrlr->log_page_supported[SPDK_NVME_LOG_ERROR] = true;
866 	ctrlr->log_page_supported[SPDK_NVME_LOG_HEALTH_INFORMATION] = true;
867 	ctrlr->log_page_supported[SPDK_NVME_LOG_FIRMWARE_SLOT] = true;
868 	if (ctrlr->cdata.lpa.celp) {
869 		ctrlr->log_page_supported[SPDK_NVME_LOG_COMMAND_EFFECTS_LOG] = true;
870 	}
871 
872 	if (ctrlr->cdata.cmic.ana_reporting) {
873 		ctrlr->log_page_supported[SPDK_NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS] = true;
874 		if (!ctrlr->opts.disable_read_ana_log_page) {
875 			rc = nvme_ctrlr_update_ana_log_page(ctrlr);
876 			if (rc == 0) {
877 				nvme_ctrlr_parse_ana_log_page(ctrlr, nvme_ctrlr_update_ns_ana_states,
878 							      ctrlr);
879 			}
880 		}
881 	}
882 
883 	if (ctrlr->cdata.ctratt.fdps) {
884 		ctrlr->log_page_supported[SPDK_NVME_LOG_FDP_CONFIGURATIONS] = true;
885 		ctrlr->log_page_supported[SPDK_NVME_LOG_RECLAIM_UNIT_HANDLE_USAGE] = true;
886 		ctrlr->log_page_supported[SPDK_NVME_LOG_FDP_STATISTICS] = true;
887 		ctrlr->log_page_supported[SPDK_NVME_LOG_FDP_EVENTS] = true;
888 	}
889 
890 	if (ctrlr->cdata.vid == SPDK_PCI_VID_INTEL &&
891 	    ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE &&
892 	    !(ctrlr->quirks & NVME_INTEL_QUIRK_NO_LOG_PAGES)) {
893 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_INTEL_LOG_PAGES,
894 				     ctrlr->opts.admin_timeout_ms);
895 
896 	} else {
897 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES,
898 				     ctrlr->opts.admin_timeout_ms);
899 
900 	}
901 
902 	return rc;
903 }
904 
905 static void
906 nvme_ctrlr_set_intel_supported_features(struct spdk_nvme_ctrlr *ctrlr)
907 {
908 	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_MAX_LBA] = true;
909 	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_NATIVE_MAX_LBA] = true;
910 	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_POWER_GOVERNOR_SETTING] = true;
911 	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_SMBUS_ADDRESS] = true;
912 	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_LED_PATTERN] = true;
913 	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_RESET_TIMED_WORKLOAD_COUNTERS] = true;
914 	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING] = true;
915 }
916 
917 static void
918 nvme_ctrlr_set_arbitration_feature(struct spdk_nvme_ctrlr *ctrlr)
919 {
920 	uint32_t cdw11;
921 	struct nvme_completion_poll_status *status;
922 
923 	if (ctrlr->opts.arbitration_burst == 0) {
924 		return;
925 	}
926 
927 	if (ctrlr->opts.arbitration_burst > 7) {
928 		NVME_CTRLR_WARNLOG(ctrlr, "Valid arbitration burst values is from 0-7\n");
929 		return;
930 	}
931 
932 	status = calloc(1, sizeof(*status));
933 	if (!status) {
934 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n");
935 		return;
936 	}
937 
938 	cdw11 = ctrlr->opts.arbitration_burst;
939 
940 	if (spdk_nvme_ctrlr_get_flags(ctrlr) & SPDK_NVME_CTRLR_WRR_SUPPORTED) {
941 		cdw11 |= (uint32_t)ctrlr->opts.low_priority_weight << 8;
942 		cdw11 |= (uint32_t)ctrlr->opts.medium_priority_weight << 16;
943 		cdw11 |= (uint32_t)ctrlr->opts.high_priority_weight << 24;
944 	}
945 
946 	if (spdk_nvme_ctrlr_cmd_set_feature(ctrlr, SPDK_NVME_FEAT_ARBITRATION,
947 					    cdw11, 0, NULL, 0,
948 					    nvme_completion_poll_cb, status) < 0) {
949 		NVME_CTRLR_ERRLOG(ctrlr, "Set arbitration feature failed\n");
950 		free(status);
951 		return;
952 	}
953 
954 	if (nvme_wait_for_completion_timeout(ctrlr->adminq, status,
955 					     ctrlr->opts.admin_timeout_ms * 1000)) {
956 		NVME_CTRLR_ERRLOG(ctrlr, "Timeout to set arbitration feature\n");
957 	}
958 
959 	if (!status->timed_out) {
960 		free(status);
961 	}
962 }
963 
964 static void
965 nvme_ctrlr_set_supported_features(struct spdk_nvme_ctrlr *ctrlr)
966 {
967 	memset(ctrlr->feature_supported, 0, sizeof(ctrlr->feature_supported));
968 	/* Mandatory features */
969 	ctrlr->feature_supported[SPDK_NVME_FEAT_ARBITRATION] = true;
970 	ctrlr->feature_supported[SPDK_NVME_FEAT_POWER_MANAGEMENT] = true;
971 	ctrlr->feature_supported[SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD] = true;
972 	ctrlr->feature_supported[SPDK_NVME_FEAT_ERROR_RECOVERY] = true;
973 	ctrlr->feature_supported[SPDK_NVME_FEAT_NUMBER_OF_QUEUES] = true;
974 	ctrlr->feature_supported[SPDK_NVME_FEAT_INTERRUPT_COALESCING] = true;
975 	ctrlr->feature_supported[SPDK_NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION] = true;
976 	ctrlr->feature_supported[SPDK_NVME_FEAT_WRITE_ATOMICITY] = true;
977 	ctrlr->feature_supported[SPDK_NVME_FEAT_ASYNC_EVENT_CONFIGURATION] = true;
978 	/* Optional features */
979 	if (ctrlr->cdata.vwc.present) {
980 		ctrlr->feature_supported[SPDK_NVME_FEAT_VOLATILE_WRITE_CACHE] = true;
981 	}
982 	if (ctrlr->cdata.apsta.supported) {
983 		ctrlr->feature_supported[SPDK_NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION] = true;
984 	}
985 	if (ctrlr->cdata.hmpre) {
986 		ctrlr->feature_supported[SPDK_NVME_FEAT_HOST_MEM_BUFFER] = true;
987 	}
988 	if (ctrlr->cdata.vid == SPDK_PCI_VID_INTEL) {
989 		nvme_ctrlr_set_intel_supported_features(ctrlr);
990 	}
991 
992 	nvme_ctrlr_set_arbitration_feature(ctrlr);
993 }
994 
995 bool
996 spdk_nvme_ctrlr_is_failed(struct spdk_nvme_ctrlr *ctrlr)
997 {
998 	return ctrlr->is_failed;
999 }
1000 
1001 void
1002 nvme_ctrlr_fail(struct spdk_nvme_ctrlr *ctrlr, bool hot_remove)
1003 {
1004 	/*
1005 	 * Set the flag here and leave the work failure of qpairs to
1006 	 * spdk_nvme_qpair_process_completions().
1007 	 */
1008 	if (hot_remove) {
1009 		ctrlr->is_removed = true;
1010 	}
1011 
1012 	if (ctrlr->is_failed) {
1013 		NVME_CTRLR_NOTICELOG(ctrlr, "already in failed state\n");
1014 		return;
1015 	}
1016 
1017 	if (ctrlr->is_disconnecting) {
1018 		NVME_CTRLR_DEBUGLOG(ctrlr, "already disconnecting\n");
1019 		return;
1020 	}
1021 
1022 	ctrlr->is_failed = true;
1023 	nvme_transport_ctrlr_disconnect_qpair(ctrlr, ctrlr->adminq);
1024 	NVME_CTRLR_ERRLOG(ctrlr, "in failed state.\n");
1025 }
1026 
1027 /**
1028  * This public API function will try to take the controller lock.
1029  * Any private functions being called from a thread already holding
1030  * the ctrlr lock should call nvme_ctrlr_fail directly.
1031  */
1032 void
1033 spdk_nvme_ctrlr_fail(struct spdk_nvme_ctrlr *ctrlr)
1034 {
1035 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
1036 	nvme_ctrlr_fail(ctrlr, false);
1037 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
1038 }
1039 
1040 static void
1041 nvme_ctrlr_shutdown_set_cc_done(void *_ctx, uint64_t value, const struct spdk_nvme_cpl *cpl)
1042 {
1043 	struct nvme_ctrlr_detach_ctx *ctx = _ctx;
1044 	struct spdk_nvme_ctrlr *ctrlr = ctx->ctrlr;
1045 
1046 	if (spdk_nvme_cpl_is_error(cpl)) {
1047 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to write CC.SHN\n");
1048 		ctx->shutdown_complete = true;
1049 		return;
1050 	}
1051 
1052 	if (ctrlr->opts.no_shn_notification) {
1053 		ctx->shutdown_complete = true;
1054 		return;
1055 	}
1056 
1057 	/*
1058 	 * The NVMe specification defines RTD3E to be the time between
1059 	 *  setting SHN = 1 until the controller will set SHST = 10b.
1060 	 * If the device doesn't report RTD3 entry latency, or if it
1061 	 *  reports RTD3 entry latency less than 10 seconds, pick
1062 	 *  10 seconds as a reasonable amount of time to
1063 	 *  wait before proceeding.
1064 	 */
1065 	NVME_CTRLR_DEBUGLOG(ctrlr, "RTD3E = %" PRIu32 " us\n", ctrlr->cdata.rtd3e);
1066 	ctx->shutdown_timeout_ms = SPDK_CEIL_DIV(ctrlr->cdata.rtd3e, 1000);
1067 	ctx->shutdown_timeout_ms = spdk_max(ctx->shutdown_timeout_ms, 10000);
1068 	NVME_CTRLR_DEBUGLOG(ctrlr, "shutdown timeout = %" PRIu32 " ms\n", ctx->shutdown_timeout_ms);
1069 
1070 	ctx->shutdown_start_tsc = spdk_get_ticks();
1071 	ctx->state = NVME_CTRLR_DETACH_CHECK_CSTS;
1072 }
1073 
1074 static void
1075 nvme_ctrlr_shutdown_get_cc_done(void *_ctx, uint64_t value, const struct spdk_nvme_cpl *cpl)
1076 {
1077 	struct nvme_ctrlr_detach_ctx *ctx = _ctx;
1078 	struct spdk_nvme_ctrlr *ctrlr = ctx->ctrlr;
1079 	union spdk_nvme_cc_register cc;
1080 	int rc;
1081 
1082 	if (spdk_nvme_cpl_is_error(cpl)) {
1083 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CC register\n");
1084 		ctx->shutdown_complete = true;
1085 		return;
1086 	}
1087 
1088 	assert(value <= UINT32_MAX);
1089 	cc.raw = (uint32_t)value;
1090 
1091 	if (ctrlr->opts.no_shn_notification) {
1092 		NVME_CTRLR_INFOLOG(ctrlr, "Disable SSD without shutdown notification\n");
1093 		if (cc.bits.en == 0) {
1094 			ctx->shutdown_complete = true;
1095 			return;
1096 		}
1097 
1098 		cc.bits.en = 0;
1099 	} else {
1100 		cc.bits.shn = SPDK_NVME_SHN_NORMAL;
1101 	}
1102 
1103 	rc = nvme_ctrlr_set_cc_async(ctrlr, cc.raw, nvme_ctrlr_shutdown_set_cc_done, ctx);
1104 	if (rc != 0) {
1105 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to write CC.SHN\n");
1106 		ctx->shutdown_complete = true;
1107 	}
1108 }
1109 
1110 static void
1111 nvme_ctrlr_shutdown_async(struct spdk_nvme_ctrlr *ctrlr,
1112 			  struct nvme_ctrlr_detach_ctx *ctx)
1113 {
1114 	int rc;
1115 
1116 	if (ctrlr->is_removed) {
1117 		ctx->shutdown_complete = true;
1118 		return;
1119 	}
1120 
1121 	if (ctrlr->adminq == NULL ||
1122 	    ctrlr->adminq->transport_failure_reason != SPDK_NVME_QPAIR_FAILURE_NONE) {
1123 		NVME_CTRLR_INFOLOG(ctrlr, "Adminq is not connected.\n");
1124 		ctx->shutdown_complete = true;
1125 		return;
1126 	}
1127 
1128 	ctx->state = NVME_CTRLR_DETACH_SET_CC;
1129 	rc = nvme_ctrlr_get_cc_async(ctrlr, nvme_ctrlr_shutdown_get_cc_done, ctx);
1130 	if (rc != 0) {
1131 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CC register\n");
1132 		ctx->shutdown_complete = true;
1133 	}
1134 }
1135 
1136 static void
1137 nvme_ctrlr_shutdown_get_csts_done(void *_ctx, uint64_t value, const struct spdk_nvme_cpl *cpl)
1138 {
1139 	struct nvme_ctrlr_detach_ctx *ctx = _ctx;
1140 
1141 	if (spdk_nvme_cpl_is_error(cpl)) {
1142 		NVME_CTRLR_ERRLOG(ctx->ctrlr, "Failed to read the CSTS register\n");
1143 		ctx->shutdown_complete = true;
1144 		return;
1145 	}
1146 
1147 	assert(value <= UINT32_MAX);
1148 	ctx->csts.raw = (uint32_t)value;
1149 	ctx->state = NVME_CTRLR_DETACH_GET_CSTS_DONE;
1150 }
1151 
1152 static int
1153 nvme_ctrlr_shutdown_poll_async(struct spdk_nvme_ctrlr *ctrlr,
1154 			       struct nvme_ctrlr_detach_ctx *ctx)
1155 {
1156 	union spdk_nvme_csts_register	csts;
1157 	uint32_t			ms_waited;
1158 
1159 	switch (ctx->state) {
1160 	case NVME_CTRLR_DETACH_SET_CC:
1161 	case NVME_CTRLR_DETACH_GET_CSTS:
1162 		/* We're still waiting for the register operation to complete */
1163 		spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
1164 		return -EAGAIN;
1165 
1166 	case NVME_CTRLR_DETACH_CHECK_CSTS:
1167 		ctx->state = NVME_CTRLR_DETACH_GET_CSTS;
1168 		if (nvme_ctrlr_get_csts_async(ctrlr, nvme_ctrlr_shutdown_get_csts_done, ctx)) {
1169 			NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CSTS register\n");
1170 			return -EIO;
1171 		}
1172 		return -EAGAIN;
1173 
1174 	case NVME_CTRLR_DETACH_GET_CSTS_DONE:
1175 		ctx->state = NVME_CTRLR_DETACH_CHECK_CSTS;
1176 		break;
1177 
1178 	default:
1179 		assert(0 && "Should never happen");
1180 		return -EINVAL;
1181 	}
1182 
1183 	ms_waited = (spdk_get_ticks() - ctx->shutdown_start_tsc) * 1000 / spdk_get_ticks_hz();
1184 	csts.raw = ctx->csts.raw;
1185 
1186 	if (csts.bits.shst == SPDK_NVME_SHST_COMPLETE) {
1187 		NVME_CTRLR_DEBUGLOG(ctrlr, "shutdown complete in %u milliseconds\n", ms_waited);
1188 		return 0;
1189 	}
1190 
1191 	if (ms_waited < ctx->shutdown_timeout_ms) {
1192 		return -EAGAIN;
1193 	}
1194 
1195 	NVME_CTRLR_ERRLOG(ctrlr, "did not shutdown within %u milliseconds\n",
1196 			  ctx->shutdown_timeout_ms);
1197 	if (ctrlr->quirks & NVME_QUIRK_SHST_COMPLETE) {
1198 		NVME_CTRLR_ERRLOG(ctrlr, "likely due to shutdown handling in the VMWare emulated NVMe SSD\n");
1199 	}
1200 
1201 	return 0;
1202 }
1203 
1204 static inline uint64_t
1205 nvme_ctrlr_get_ready_timeout(struct spdk_nvme_ctrlr *ctrlr)
1206 {
1207 	return ctrlr->cap.bits.to * 500;
1208 }
1209 
1210 static void
1211 nvme_ctrlr_set_cc_en_done(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl)
1212 {
1213 	struct spdk_nvme_ctrlr *ctrlr = ctx;
1214 
1215 	if (spdk_nvme_cpl_is_error(cpl)) {
1216 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to set the CC register\n");
1217 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
1218 		return;
1219 	}
1220 
1221 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1,
1222 			     nvme_ctrlr_get_ready_timeout(ctrlr));
1223 }
1224 
1225 static int
1226 nvme_ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr)
1227 {
1228 	union spdk_nvme_cc_register	cc;
1229 	int				rc;
1230 
1231 	rc = nvme_transport_ctrlr_enable(ctrlr);
1232 	if (rc != 0) {
1233 		NVME_CTRLR_ERRLOG(ctrlr, "transport ctrlr_enable failed\n");
1234 		return rc;
1235 	}
1236 
1237 	cc.raw = ctrlr->process_init_cc.raw;
1238 	if (cc.bits.en != 0) {
1239 		NVME_CTRLR_ERRLOG(ctrlr, "called with CC.EN = 1\n");
1240 		return -EINVAL;
1241 	}
1242 
1243 	cc.bits.en = 1;
1244 	cc.bits.css = 0;
1245 	cc.bits.shn = 0;
1246 	cc.bits.iosqes = 6; /* SQ entry size == 64 == 2^6 */
1247 	cc.bits.iocqes = 4; /* CQ entry size == 16 == 2^4 */
1248 
1249 	/* Page size is 2 ^ (12 + mps). */
1250 	cc.bits.mps = spdk_u32log2(ctrlr->page_size) - 12;
1251 
1252 	/*
1253 	 * Since NVMe 1.0, a controller should have at least one bit set in CAP.CSS.
1254 	 * A controller that does not have any bit set in CAP.CSS is not spec compliant.
1255 	 * Try to support such a controller regardless.
1256 	 */
1257 	if (ctrlr->cap.bits.css == 0) {
1258 		NVME_CTRLR_INFOLOG(ctrlr, "Drive reports no command sets supported. Assuming NVM is supported.\n");
1259 		ctrlr->cap.bits.css = SPDK_NVME_CAP_CSS_NVM;
1260 	}
1261 
1262 	/*
1263 	 * If the user did not explicitly request a command set, or supplied a value larger than
1264 	 * what can be saved in CC.CSS, use the most reasonable default.
1265 	 */
1266 	if (ctrlr->opts.command_set >= CHAR_BIT) {
1267 		if (ctrlr->cap.bits.css & SPDK_NVME_CAP_CSS_IOCS) {
1268 			ctrlr->opts.command_set = SPDK_NVME_CC_CSS_IOCS;
1269 		} else if (ctrlr->cap.bits.css & SPDK_NVME_CAP_CSS_NVM) {
1270 			ctrlr->opts.command_set = SPDK_NVME_CC_CSS_NVM;
1271 		} else if (ctrlr->cap.bits.css & SPDK_NVME_CAP_CSS_NOIO) {
1272 			ctrlr->opts.command_set = SPDK_NVME_CC_CSS_NOIO;
1273 		} else {
1274 			/* Invalid supported bits detected, falling back to NVM. */
1275 			ctrlr->opts.command_set = SPDK_NVME_CC_CSS_NVM;
1276 		}
1277 	}
1278 
1279 	/* Verify that the selected command set is supported by the controller. */
1280 	if (!(ctrlr->cap.bits.css & (1u << ctrlr->opts.command_set))) {
1281 		NVME_CTRLR_DEBUGLOG(ctrlr, "Requested I/O command set %u but supported mask is 0x%x\n",
1282 				    ctrlr->opts.command_set, ctrlr->cap.bits.css);
1283 		NVME_CTRLR_DEBUGLOG(ctrlr, "Falling back to NVM. Assuming NVM is supported.\n");
1284 		ctrlr->opts.command_set = SPDK_NVME_CC_CSS_NVM;
1285 	}
1286 
1287 	cc.bits.css = ctrlr->opts.command_set;
1288 
1289 	switch (ctrlr->opts.arb_mechanism) {
1290 	case SPDK_NVME_CC_AMS_RR:
1291 		break;
1292 	case SPDK_NVME_CC_AMS_WRR:
1293 		if (SPDK_NVME_CAP_AMS_WRR & ctrlr->cap.bits.ams) {
1294 			break;
1295 		}
1296 		return -EINVAL;
1297 	case SPDK_NVME_CC_AMS_VS:
1298 		if (SPDK_NVME_CAP_AMS_VS & ctrlr->cap.bits.ams) {
1299 			break;
1300 		}
1301 		return -EINVAL;
1302 	default:
1303 		return -EINVAL;
1304 	}
1305 
1306 	cc.bits.ams = ctrlr->opts.arb_mechanism;
1307 	ctrlr->process_init_cc.raw = cc.raw;
1308 
1309 	if (nvme_ctrlr_set_cc_async(ctrlr, cc.raw, nvme_ctrlr_set_cc_en_done, ctrlr)) {
1310 		NVME_CTRLR_ERRLOG(ctrlr, "set_cc() failed\n");
1311 		return -EIO;
1312 	}
1313 
1314 	return 0;
1315 }
1316 
1317 static const char *
1318 nvme_ctrlr_state_string(enum nvme_ctrlr_state state)
1319 {
1320 	switch (state) {
1321 	case NVME_CTRLR_STATE_INIT_DELAY:
1322 		return "delay init";
1323 	case NVME_CTRLR_STATE_CONNECT_ADMINQ:
1324 		return "connect adminq";
1325 	case NVME_CTRLR_STATE_WAIT_FOR_CONNECT_ADMINQ:
1326 		return "wait for connect adminq";
1327 	case NVME_CTRLR_STATE_READ_VS:
1328 		return "read vs";
1329 	case NVME_CTRLR_STATE_READ_VS_WAIT_FOR_VS:
1330 		return "read vs wait for vs";
1331 	case NVME_CTRLR_STATE_READ_CAP:
1332 		return "read cap";
1333 	case NVME_CTRLR_STATE_READ_CAP_WAIT_FOR_CAP:
1334 		return "read cap wait for cap";
1335 	case NVME_CTRLR_STATE_CHECK_EN:
1336 		return "check en";
1337 	case NVME_CTRLR_STATE_CHECK_EN_WAIT_FOR_CC:
1338 		return "check en wait for cc";
1339 	case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1:
1340 		return "disable and wait for CSTS.RDY = 1";
1341 	case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1_WAIT_FOR_CSTS:
1342 		return "disable and wait for CSTS.RDY = 1 reg";
1343 	case NVME_CTRLR_STATE_SET_EN_0:
1344 		return "set CC.EN = 0";
1345 	case NVME_CTRLR_STATE_SET_EN_0_WAIT_FOR_CC:
1346 		return "set CC.EN = 0 wait for cc";
1347 	case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0:
1348 		return "disable and wait for CSTS.RDY = 0";
1349 	case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0_WAIT_FOR_CSTS:
1350 		return "disable and wait for CSTS.RDY = 0 reg";
1351 	case NVME_CTRLR_STATE_DISABLED:
1352 		return "controller is disabled";
1353 	case NVME_CTRLR_STATE_ENABLE:
1354 		return "enable controller by writing CC.EN = 1";
1355 	case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_CC:
1356 		return "enable controller by writing CC.EN = 1 reg";
1357 	case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1:
1358 		return "wait for CSTS.RDY = 1";
1359 	case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1_WAIT_FOR_CSTS:
1360 		return "wait for CSTS.RDY = 1 reg";
1361 	case NVME_CTRLR_STATE_RESET_ADMIN_QUEUE:
1362 		return "reset admin queue";
1363 	case NVME_CTRLR_STATE_IDENTIFY:
1364 		return "identify controller";
1365 	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY:
1366 		return "wait for identify controller";
1367 	case NVME_CTRLR_STATE_CONFIGURE_AER:
1368 		return "configure AER";
1369 	case NVME_CTRLR_STATE_WAIT_FOR_CONFIGURE_AER:
1370 		return "wait for configure aer";
1371 	case NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT:
1372 		return "set keep alive timeout";
1373 	case NVME_CTRLR_STATE_WAIT_FOR_KEEP_ALIVE_TIMEOUT:
1374 		return "wait for set keep alive timeout";
1375 	case NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC:
1376 		return "identify controller iocs specific";
1377 	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_IOCS_SPECIFIC:
1378 		return "wait for identify controller iocs specific";
1379 	case NVME_CTRLR_STATE_GET_ZNS_CMD_EFFECTS_LOG:
1380 		return "get zns cmd and effects log page";
1381 	case NVME_CTRLR_STATE_WAIT_FOR_GET_ZNS_CMD_EFFECTS_LOG:
1382 		return "wait for get zns cmd and effects log page";
1383 	case NVME_CTRLR_STATE_SET_NUM_QUEUES:
1384 		return "set number of queues";
1385 	case NVME_CTRLR_STATE_WAIT_FOR_SET_NUM_QUEUES:
1386 		return "wait for set number of queues";
1387 	case NVME_CTRLR_STATE_IDENTIFY_ACTIVE_NS:
1388 		return "identify active ns";
1389 	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ACTIVE_NS:
1390 		return "wait for identify active ns";
1391 	case NVME_CTRLR_STATE_IDENTIFY_NS:
1392 		return "identify ns";
1393 	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS:
1394 		return "wait for identify ns";
1395 	case NVME_CTRLR_STATE_IDENTIFY_ID_DESCS:
1396 		return "identify namespace id descriptors";
1397 	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ID_DESCS:
1398 		return "wait for identify namespace id descriptors";
1399 	case NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC:
1400 		return "identify ns iocs specific";
1401 	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS_IOCS_SPECIFIC:
1402 		return "wait for identify ns iocs specific";
1403 	case NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES:
1404 		return "set supported log pages";
1405 	case NVME_CTRLR_STATE_SET_SUPPORTED_INTEL_LOG_PAGES:
1406 		return "set supported INTEL log pages";
1407 	case NVME_CTRLR_STATE_WAIT_FOR_SUPPORTED_INTEL_LOG_PAGES:
1408 		return "wait for supported INTEL log pages";
1409 	case NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES:
1410 		return "set supported features";
1411 	case NVME_CTRLR_STATE_SET_DB_BUF_CFG:
1412 		return "set doorbell buffer config";
1413 	case NVME_CTRLR_STATE_WAIT_FOR_DB_BUF_CFG:
1414 		return "wait for doorbell buffer config";
1415 	case NVME_CTRLR_STATE_SET_HOST_ID:
1416 		return "set host ID";
1417 	case NVME_CTRLR_STATE_WAIT_FOR_HOST_ID:
1418 		return "wait for set host ID";
1419 	case NVME_CTRLR_STATE_TRANSPORT_READY:
1420 		return "transport ready";
1421 	case NVME_CTRLR_STATE_READY:
1422 		return "ready";
1423 	case NVME_CTRLR_STATE_ERROR:
1424 		return "error";
1425 	}
1426 	return "unknown";
1427 };
1428 
1429 static void
1430 _nvme_ctrlr_set_state(struct spdk_nvme_ctrlr *ctrlr, enum nvme_ctrlr_state state,
1431 		      uint64_t timeout_in_ms, bool quiet)
1432 {
1433 	uint64_t ticks_per_ms, timeout_in_ticks, now_ticks;
1434 
1435 	ctrlr->state = state;
1436 	if (timeout_in_ms == NVME_TIMEOUT_KEEP_EXISTING) {
1437 		if (!quiet) {
1438 			NVME_CTRLR_DEBUGLOG(ctrlr, "setting state to %s (keeping existing timeout)\n",
1439 					    nvme_ctrlr_state_string(ctrlr->state));
1440 		}
1441 		return;
1442 	}
1443 
1444 	if (timeout_in_ms == NVME_TIMEOUT_INFINITE) {
1445 		goto inf;
1446 	}
1447 
1448 	ticks_per_ms = spdk_get_ticks_hz() / 1000;
1449 	if (timeout_in_ms > UINT64_MAX / ticks_per_ms) {
1450 		NVME_CTRLR_ERRLOG(ctrlr,
1451 				  "Specified timeout would cause integer overflow. Defaulting to no timeout.\n");
1452 		goto inf;
1453 	}
1454 
1455 	now_ticks = spdk_get_ticks();
1456 	timeout_in_ticks = timeout_in_ms * ticks_per_ms;
1457 	if (timeout_in_ticks > UINT64_MAX - now_ticks) {
1458 		NVME_CTRLR_ERRLOG(ctrlr,
1459 				  "Specified timeout would cause integer overflow. Defaulting to no timeout.\n");
1460 		goto inf;
1461 	}
1462 
1463 	ctrlr->state_timeout_tsc = timeout_in_ticks + now_ticks;
1464 	if (!quiet) {
1465 		NVME_CTRLR_DEBUGLOG(ctrlr, "setting state to %s (timeout %" PRIu64 " ms)\n",
1466 				    nvme_ctrlr_state_string(ctrlr->state), timeout_in_ms);
1467 	}
1468 	return;
1469 inf:
1470 	if (!quiet) {
1471 		NVME_CTRLR_DEBUGLOG(ctrlr, "setting state to %s (no timeout)\n",
1472 				    nvme_ctrlr_state_string(ctrlr->state));
1473 	}
1474 	ctrlr->state_timeout_tsc = NVME_TIMEOUT_INFINITE;
1475 }
1476 
1477 static void
1478 nvme_ctrlr_set_state(struct spdk_nvme_ctrlr *ctrlr, enum nvme_ctrlr_state state,
1479 		     uint64_t timeout_in_ms)
1480 {
1481 	_nvme_ctrlr_set_state(ctrlr, state, timeout_in_ms, false);
1482 }
1483 
1484 static void
1485 nvme_ctrlr_set_state_quiet(struct spdk_nvme_ctrlr *ctrlr, enum nvme_ctrlr_state state,
1486 			   uint64_t timeout_in_ms)
1487 {
1488 	_nvme_ctrlr_set_state(ctrlr, state, timeout_in_ms, true);
1489 }
1490 
1491 static void
1492 nvme_ctrlr_free_zns_specific_data(struct spdk_nvme_ctrlr *ctrlr)
1493 {
1494 	spdk_free(ctrlr->cdata_zns);
1495 	ctrlr->cdata_zns = NULL;
1496 }
1497 
1498 static void
1499 nvme_ctrlr_free_iocs_specific_data(struct spdk_nvme_ctrlr *ctrlr)
1500 {
1501 	nvme_ctrlr_free_zns_specific_data(ctrlr);
1502 }
1503 
1504 static void
1505 nvme_ctrlr_free_doorbell_buffer(struct spdk_nvme_ctrlr *ctrlr)
1506 {
1507 	if (ctrlr->shadow_doorbell) {
1508 		spdk_free(ctrlr->shadow_doorbell);
1509 		ctrlr->shadow_doorbell = NULL;
1510 	}
1511 
1512 	if (ctrlr->eventidx) {
1513 		spdk_free(ctrlr->eventidx);
1514 		ctrlr->eventidx = NULL;
1515 	}
1516 }
1517 
1518 static void
1519 nvme_ctrlr_set_doorbell_buffer_config_done(void *arg, const struct spdk_nvme_cpl *cpl)
1520 {
1521 	struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg;
1522 
1523 	if (spdk_nvme_cpl_is_error(cpl)) {
1524 		NVME_CTRLR_WARNLOG(ctrlr, "Doorbell buffer config failed\n");
1525 	} else {
1526 		NVME_CTRLR_INFOLOG(ctrlr, "Doorbell buffer config enabled\n");
1527 	}
1528 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_HOST_ID,
1529 			     ctrlr->opts.admin_timeout_ms);
1530 }
1531 
1532 static int
1533 nvme_ctrlr_set_doorbell_buffer_config(struct spdk_nvme_ctrlr *ctrlr)
1534 {
1535 	int rc = 0;
1536 	uint64_t prp1, prp2, len;
1537 
1538 	if (!ctrlr->cdata.oacs.doorbell_buffer_config) {
1539 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_HOST_ID,
1540 				     ctrlr->opts.admin_timeout_ms);
1541 		return 0;
1542 	}
1543 
1544 	if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) {
1545 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_HOST_ID,
1546 				     ctrlr->opts.admin_timeout_ms);
1547 		return 0;
1548 	}
1549 
1550 	/* only 1 page size for doorbell buffer */
1551 	ctrlr->shadow_doorbell = spdk_zmalloc(ctrlr->page_size, ctrlr->page_size,
1552 					      NULL, SPDK_ENV_LCORE_ID_ANY,
1553 					      SPDK_MALLOC_DMA | SPDK_MALLOC_SHARE);
1554 	if (ctrlr->shadow_doorbell == NULL) {
1555 		rc = -ENOMEM;
1556 		goto error;
1557 	}
1558 
1559 	len = ctrlr->page_size;
1560 	prp1 = spdk_vtophys(ctrlr->shadow_doorbell, &len);
1561 	if (prp1 == SPDK_VTOPHYS_ERROR || len != ctrlr->page_size) {
1562 		rc = -EFAULT;
1563 		goto error;
1564 	}
1565 
1566 	ctrlr->eventidx = spdk_zmalloc(ctrlr->page_size, ctrlr->page_size,
1567 				       NULL, SPDK_ENV_LCORE_ID_ANY,
1568 				       SPDK_MALLOC_DMA | SPDK_MALLOC_SHARE);
1569 	if (ctrlr->eventidx == NULL) {
1570 		rc = -ENOMEM;
1571 		goto error;
1572 	}
1573 
1574 	len = ctrlr->page_size;
1575 	prp2 = spdk_vtophys(ctrlr->eventidx, &len);
1576 	if (prp2 == SPDK_VTOPHYS_ERROR || len != ctrlr->page_size) {
1577 		rc = -EFAULT;
1578 		goto error;
1579 	}
1580 
1581 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_DB_BUF_CFG,
1582 			     ctrlr->opts.admin_timeout_ms);
1583 
1584 	rc = nvme_ctrlr_cmd_doorbell_buffer_config(ctrlr, prp1, prp2,
1585 			nvme_ctrlr_set_doorbell_buffer_config_done, ctrlr);
1586 	if (rc != 0) {
1587 		goto error;
1588 	}
1589 
1590 	return 0;
1591 
1592 error:
1593 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
1594 	nvme_ctrlr_free_doorbell_buffer(ctrlr);
1595 	return rc;
1596 }
1597 
1598 void
1599 nvme_ctrlr_abort_queued_aborts(struct spdk_nvme_ctrlr *ctrlr)
1600 {
1601 	struct nvme_request	*req, *tmp;
1602 	struct spdk_nvme_cpl	cpl = {};
1603 
1604 	cpl.status.sc = SPDK_NVME_SC_ABORTED_SQ_DELETION;
1605 	cpl.status.sct = SPDK_NVME_SCT_GENERIC;
1606 
1607 	STAILQ_FOREACH_SAFE(req, &ctrlr->queued_aborts, stailq, tmp) {
1608 		STAILQ_REMOVE_HEAD(&ctrlr->queued_aborts, stailq);
1609 		ctrlr->outstanding_aborts++;
1610 
1611 		nvme_complete_request(req->cb_fn, req->cb_arg, req->qpair, req, &cpl);
1612 		nvme_free_request(req);
1613 	}
1614 }
1615 
1616 static int
1617 nvme_ctrlr_disconnect(struct spdk_nvme_ctrlr *ctrlr)
1618 {
1619 	if (ctrlr->is_resetting || ctrlr->is_removed) {
1620 		/*
1621 		 * Controller is already resetting or has been removed. Return
1622 		 *  immediately since there is no need to kick off another
1623 		 *  reset in these cases.
1624 		 */
1625 		return ctrlr->is_resetting ? -EBUSY : -ENXIO;
1626 	}
1627 
1628 	ctrlr->is_resetting = true;
1629 	ctrlr->is_failed = false;
1630 	ctrlr->is_disconnecting = true;
1631 	ctrlr->prepare_for_reset = true;
1632 
1633 	NVME_CTRLR_NOTICELOG(ctrlr, "resetting controller\n");
1634 
1635 	/* Disable keep-alive, it'll be re-enabled as part of the init process */
1636 	ctrlr->keep_alive_interval_ticks = 0;
1637 
1638 	/* Abort all of the queued abort requests */
1639 	nvme_ctrlr_abort_queued_aborts(ctrlr);
1640 
1641 	nvme_transport_admin_qpair_abort_aers(ctrlr->adminq);
1642 
1643 	ctrlr->adminq->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_LOCAL;
1644 	nvme_transport_ctrlr_disconnect_qpair(ctrlr, ctrlr->adminq);
1645 
1646 	return 0;
1647 }
1648 
1649 static void
1650 nvme_ctrlr_disconnect_done(struct spdk_nvme_ctrlr *ctrlr)
1651 {
1652 	assert(ctrlr->is_failed == false);
1653 	ctrlr->is_disconnecting = false;
1654 
1655 	/* Doorbell buffer config is invalid during reset */
1656 	nvme_ctrlr_free_doorbell_buffer(ctrlr);
1657 
1658 	/* I/O Command Set Specific Identify Controller data is invalidated during reset */
1659 	nvme_ctrlr_free_iocs_specific_data(ctrlr);
1660 
1661 	spdk_bit_array_free(&ctrlr->free_io_qids);
1662 }
1663 
1664 int
1665 spdk_nvme_ctrlr_disconnect(struct spdk_nvme_ctrlr *ctrlr)
1666 {
1667 	int rc;
1668 
1669 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
1670 	rc = nvme_ctrlr_disconnect(ctrlr);
1671 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
1672 
1673 	return rc;
1674 }
1675 
1676 void
1677 spdk_nvme_ctrlr_reconnect_async(struct spdk_nvme_ctrlr *ctrlr)
1678 {
1679 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
1680 
1681 	ctrlr->prepare_for_reset = false;
1682 
1683 	/* Set the state back to INIT to cause a full hardware reset. */
1684 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, NVME_TIMEOUT_INFINITE);
1685 
1686 	/* Return without releasing ctrlr_lock. ctrlr_lock will be released when
1687 	 * spdk_nvme_ctrlr_reset_poll_async() returns 0.
1688 	 */
1689 }
1690 
1691 /**
1692  * This function will be called when the controller is being reinitialized.
1693  * Note: the ctrlr_lock must be held when calling this function.
1694  */
1695 int
1696 spdk_nvme_ctrlr_reconnect_poll_async(struct spdk_nvme_ctrlr *ctrlr)
1697 {
1698 	struct spdk_nvme_ns *ns, *tmp_ns;
1699 	struct spdk_nvme_qpair	*qpair;
1700 	int rc = 0, rc_tmp = 0;
1701 	bool async;
1702 
1703 	if (nvme_ctrlr_process_init(ctrlr) != 0) {
1704 		NVME_CTRLR_ERRLOG(ctrlr, "controller reinitialization failed\n");
1705 		rc = -1;
1706 	}
1707 	if (ctrlr->state != NVME_CTRLR_STATE_READY && rc != -1) {
1708 		return -EAGAIN;
1709 	}
1710 
1711 	/*
1712 	 * For non-fabrics controllers, the memory locations of the transport qpair
1713 	 * don't change when the controller is reset. They simply need to be
1714 	 * re-enabled with admin commands to the controller. For fabric
1715 	 * controllers we need to disconnect and reconnect the qpair on its
1716 	 * own thread outside of the context of the reset.
1717 	 */
1718 	if (rc == 0 && !spdk_nvme_ctrlr_is_fabrics(ctrlr)) {
1719 		/* Reinitialize qpairs */
1720 		TAILQ_FOREACH(qpair, &ctrlr->active_io_qpairs, tailq) {
1721 			assert(spdk_bit_array_get(ctrlr->free_io_qids, qpair->id));
1722 			spdk_bit_array_clear(ctrlr->free_io_qids, qpair->id);
1723 
1724 			/* Force a synchronous connect. We can't currently handle an asynchronous
1725 			 * operation here. */
1726 			async = qpair->async;
1727 			qpair->async = false;
1728 			rc_tmp = nvme_transport_ctrlr_connect_qpair(ctrlr, qpair);
1729 			qpair->async = async;
1730 
1731 			if (rc_tmp != 0) {
1732 				rc = rc_tmp;
1733 				qpair->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_LOCAL;
1734 				continue;
1735 			}
1736 		}
1737 	}
1738 
1739 	/*
1740 	 * Take this opportunity to remove inactive namespaces. During a reset namespace
1741 	 * handles can be invalidated.
1742 	 */
1743 	RB_FOREACH_SAFE(ns, nvme_ns_tree, &ctrlr->ns, tmp_ns) {
1744 		if (!ns->active) {
1745 			RB_REMOVE(nvme_ns_tree, &ctrlr->ns, ns);
1746 			spdk_free(ns);
1747 		}
1748 	}
1749 
1750 	if (rc) {
1751 		nvme_ctrlr_fail(ctrlr, false);
1752 	}
1753 	ctrlr->is_resetting = false;
1754 
1755 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
1756 
1757 	if (!ctrlr->cdata.oaes.ns_attribute_notices) {
1758 		/*
1759 		 * If controller doesn't support ns_attribute_notices and
1760 		 * namespace attributes change (e.g. number of namespaces)
1761 		 * we need to update system handling device reset.
1762 		 */
1763 		nvme_io_msg_ctrlr_update(ctrlr);
1764 	}
1765 
1766 	return rc;
1767 }
1768 
1769 /*
1770  * For PCIe transport, spdk_nvme_ctrlr_disconnect() will do a Controller Level Reset
1771  * (Change CC.EN from 1 to 0) as a operation to disconnect the admin qpair.
1772  * The following two functions are added to do a Controller Level Reset. They have
1773  * to be called under the nvme controller's lock.
1774  */
1775 void
1776 nvme_ctrlr_disable(struct spdk_nvme_ctrlr *ctrlr)
1777 {
1778 	assert(ctrlr->is_disconnecting == true);
1779 
1780 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CHECK_EN, NVME_TIMEOUT_INFINITE);
1781 }
1782 
1783 int
1784 nvme_ctrlr_disable_poll(struct spdk_nvme_ctrlr *ctrlr)
1785 {
1786 	int rc = 0;
1787 
1788 	if (nvme_ctrlr_process_init(ctrlr) != 0) {
1789 		NVME_CTRLR_ERRLOG(ctrlr, "failed to disable controller\n");
1790 		rc = -1;
1791 	}
1792 
1793 	if (ctrlr->state != NVME_CTRLR_STATE_DISABLED && rc != -1) {
1794 		return -EAGAIN;
1795 	}
1796 
1797 	return rc;
1798 }
1799 
1800 static void
1801 nvme_ctrlr_fail_io_qpairs(struct spdk_nvme_ctrlr *ctrlr)
1802 {
1803 	struct spdk_nvme_qpair	*qpair;
1804 
1805 	TAILQ_FOREACH(qpair, &ctrlr->active_io_qpairs, tailq) {
1806 		qpair->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_LOCAL;
1807 	}
1808 }
1809 
1810 int
1811 spdk_nvme_ctrlr_reset(struct spdk_nvme_ctrlr *ctrlr)
1812 {
1813 	int rc;
1814 
1815 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
1816 
1817 	rc = nvme_ctrlr_disconnect(ctrlr);
1818 	if (rc == 0) {
1819 		nvme_ctrlr_fail_io_qpairs(ctrlr);
1820 	}
1821 
1822 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
1823 
1824 	if (rc != 0) {
1825 		if (rc == -EBUSY) {
1826 			rc = 0;
1827 		}
1828 		return rc;
1829 	}
1830 
1831 	while (1) {
1832 		rc = spdk_nvme_ctrlr_process_admin_completions(ctrlr);
1833 		if (rc == -ENXIO) {
1834 			break;
1835 		}
1836 	}
1837 
1838 	spdk_nvme_ctrlr_reconnect_async(ctrlr);
1839 
1840 	while (true) {
1841 		rc = spdk_nvme_ctrlr_reconnect_poll_async(ctrlr);
1842 		if (rc != -EAGAIN) {
1843 			break;
1844 		}
1845 	}
1846 
1847 	return rc;
1848 }
1849 
1850 int
1851 spdk_nvme_ctrlr_reset_subsystem(struct spdk_nvme_ctrlr *ctrlr)
1852 {
1853 	union spdk_nvme_cap_register cap;
1854 	int rc = 0;
1855 
1856 	cap = spdk_nvme_ctrlr_get_regs_cap(ctrlr);
1857 	if (cap.bits.nssrs == 0) {
1858 		NVME_CTRLR_WARNLOG(ctrlr, "subsystem reset is not supported\n");
1859 		return -ENOTSUP;
1860 	}
1861 
1862 	NVME_CTRLR_NOTICELOG(ctrlr, "resetting subsystem\n");
1863 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
1864 	ctrlr->is_resetting = true;
1865 	rc = nvme_ctrlr_set_nssr(ctrlr, SPDK_NVME_NSSR_VALUE);
1866 	ctrlr->is_resetting = false;
1867 
1868 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
1869 	/*
1870 	 * No more cleanup at this point like in the ctrlr reset. A subsystem reset will cause
1871 	 * a hot remove for PCIe transport. The hot remove handling does all the necessary ctrlr cleanup.
1872 	 */
1873 	return rc;
1874 }
1875 
1876 int
1877 spdk_nvme_ctrlr_set_trid(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_transport_id *trid)
1878 {
1879 	int rc = 0;
1880 
1881 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
1882 
1883 	if (ctrlr->is_failed == false) {
1884 		rc = -EPERM;
1885 		goto out;
1886 	}
1887 
1888 	if (trid->trtype != ctrlr->trid.trtype) {
1889 		rc = -EINVAL;
1890 		goto out;
1891 	}
1892 
1893 	if (strncmp(trid->subnqn, ctrlr->trid.subnqn, SPDK_NVMF_NQN_MAX_LEN)) {
1894 		rc = -EINVAL;
1895 		goto out;
1896 	}
1897 
1898 	ctrlr->trid = *trid;
1899 
1900 out:
1901 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
1902 	return rc;
1903 }
1904 
1905 void
1906 spdk_nvme_ctrlr_set_remove_cb(struct spdk_nvme_ctrlr *ctrlr,
1907 			      spdk_nvme_remove_cb remove_cb, void *remove_ctx)
1908 {
1909 	if (!spdk_process_is_primary()) {
1910 		return;
1911 	}
1912 
1913 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
1914 	ctrlr->remove_cb = remove_cb;
1915 	ctrlr->cb_ctx = remove_ctx;
1916 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
1917 }
1918 
1919 static void
1920 nvme_ctrlr_identify_done(void *arg, const struct spdk_nvme_cpl *cpl)
1921 {
1922 	struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg;
1923 
1924 	if (spdk_nvme_cpl_is_error(cpl)) {
1925 		NVME_CTRLR_ERRLOG(ctrlr, "nvme_identify_controller failed!\n");
1926 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
1927 		return;
1928 	}
1929 
1930 	/*
1931 	 * Use MDTS to ensure our default max_xfer_size doesn't exceed what the
1932 	 *  controller supports.
1933 	 */
1934 	ctrlr->max_xfer_size = nvme_transport_ctrlr_get_max_xfer_size(ctrlr);
1935 	NVME_CTRLR_DEBUGLOG(ctrlr, "transport max_xfer_size %u\n", ctrlr->max_xfer_size);
1936 	if (ctrlr->cdata.mdts > 0) {
1937 		ctrlr->max_xfer_size = spdk_min(ctrlr->max_xfer_size,
1938 						ctrlr->min_page_size * (1 << ctrlr->cdata.mdts));
1939 		NVME_CTRLR_DEBUGLOG(ctrlr, "MDTS max_xfer_size %u\n", ctrlr->max_xfer_size);
1940 	}
1941 
1942 	NVME_CTRLR_DEBUGLOG(ctrlr, "CNTLID 0x%04" PRIx16 "\n", ctrlr->cdata.cntlid);
1943 	if (ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
1944 		ctrlr->cntlid = ctrlr->cdata.cntlid;
1945 	} else {
1946 		/*
1947 		 * Fabrics controllers should already have CNTLID from the Connect command.
1948 		 *
1949 		 * If CNTLID from Connect doesn't match CNTLID in the Identify Controller data,
1950 		 * trust the one from Connect.
1951 		 */
1952 		if (ctrlr->cntlid != ctrlr->cdata.cntlid) {
1953 			NVME_CTRLR_DEBUGLOG(ctrlr, "Identify CNTLID 0x%04" PRIx16 " != Connect CNTLID 0x%04" PRIx16 "\n",
1954 					    ctrlr->cdata.cntlid, ctrlr->cntlid);
1955 		}
1956 	}
1957 
1958 	if (ctrlr->cdata.sgls.supported && !(ctrlr->quirks & NVME_QUIRK_NOT_USE_SGL)) {
1959 		assert(ctrlr->cdata.sgls.supported != 0x3);
1960 		ctrlr->flags |= SPDK_NVME_CTRLR_SGL_SUPPORTED;
1961 		if (ctrlr->cdata.sgls.supported == 0x2) {
1962 			ctrlr->flags |= SPDK_NVME_CTRLR_SGL_REQUIRES_DWORD_ALIGNMENT;
1963 		}
1964 
1965 		ctrlr->max_sges = nvme_transport_ctrlr_get_max_sges(ctrlr);
1966 		NVME_CTRLR_DEBUGLOG(ctrlr, "transport max_sges %u\n", ctrlr->max_sges);
1967 	}
1968 
1969 	if (ctrlr->cdata.sgls.metadata_address && !(ctrlr->quirks & NVME_QUIRK_NOT_USE_SGL)) {
1970 		ctrlr->flags |= SPDK_NVME_CTRLR_MPTR_SGL_SUPPORTED;
1971 	}
1972 
1973 	if (ctrlr->cdata.oacs.security && !(ctrlr->quirks & NVME_QUIRK_OACS_SECURITY)) {
1974 		ctrlr->flags |= SPDK_NVME_CTRLR_SECURITY_SEND_RECV_SUPPORTED;
1975 	}
1976 
1977 	if (ctrlr->cdata.oacs.directives) {
1978 		ctrlr->flags |= SPDK_NVME_CTRLR_DIRECTIVES_SUPPORTED;
1979 	}
1980 
1981 	NVME_CTRLR_DEBUGLOG(ctrlr, "fuses compare and write: %d\n",
1982 			    ctrlr->cdata.fuses.compare_and_write);
1983 	if (ctrlr->cdata.fuses.compare_and_write) {
1984 		ctrlr->flags |= SPDK_NVME_CTRLR_COMPARE_AND_WRITE_SUPPORTED;
1985 	}
1986 
1987 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CONFIGURE_AER,
1988 			     ctrlr->opts.admin_timeout_ms);
1989 }
1990 
1991 static int
1992 nvme_ctrlr_identify(struct spdk_nvme_ctrlr *ctrlr)
1993 {
1994 	int	rc;
1995 
1996 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY,
1997 			     ctrlr->opts.admin_timeout_ms);
1998 
1999 	rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_CTRLR, 0, 0, 0,
2000 				     &ctrlr->cdata, sizeof(ctrlr->cdata),
2001 				     nvme_ctrlr_identify_done, ctrlr);
2002 	if (rc != 0) {
2003 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
2004 		return rc;
2005 	}
2006 
2007 	return 0;
2008 }
2009 
2010 static void
2011 nvme_ctrlr_get_zns_cmd_and_effects_log_done(void *arg, const struct spdk_nvme_cpl *cpl)
2012 {
2013 	struct spdk_nvme_cmds_and_effect_log_page *log_page;
2014 	struct spdk_nvme_ctrlr *ctrlr = arg;
2015 
2016 	if (spdk_nvme_cpl_is_error(cpl)) {
2017 		NVME_CTRLR_ERRLOG(ctrlr, "nvme_ctrlr_get_zns_cmd_and_effects_log failed!\n");
2018 		spdk_free(ctrlr->tmp_ptr);
2019 		ctrlr->tmp_ptr = NULL;
2020 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
2021 		return;
2022 	}
2023 
2024 	log_page = ctrlr->tmp_ptr;
2025 
2026 	if (log_page->io_cmds_supported[SPDK_NVME_OPC_ZONE_APPEND].csupp) {
2027 		ctrlr->flags |= SPDK_NVME_CTRLR_ZONE_APPEND_SUPPORTED;
2028 	}
2029 	spdk_free(ctrlr->tmp_ptr);
2030 	ctrlr->tmp_ptr = NULL;
2031 
2032 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_NUM_QUEUES, ctrlr->opts.admin_timeout_ms);
2033 }
2034 
2035 static int
2036 nvme_ctrlr_get_zns_cmd_and_effects_log(struct spdk_nvme_ctrlr *ctrlr)
2037 {
2038 	int rc;
2039 
2040 	assert(!ctrlr->tmp_ptr);
2041 	ctrlr->tmp_ptr = spdk_zmalloc(sizeof(struct spdk_nvme_cmds_and_effect_log_page), 64, NULL,
2042 				      SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE | SPDK_MALLOC_DMA);
2043 	if (!ctrlr->tmp_ptr) {
2044 		rc = -ENOMEM;
2045 		goto error;
2046 	}
2047 
2048 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_GET_ZNS_CMD_EFFECTS_LOG,
2049 			     ctrlr->opts.admin_timeout_ms);
2050 
2051 	rc = spdk_nvme_ctrlr_cmd_get_log_page_ext(ctrlr, SPDK_NVME_LOG_COMMAND_EFFECTS_LOG,
2052 			0, ctrlr->tmp_ptr, sizeof(struct spdk_nvme_cmds_and_effect_log_page),
2053 			0, 0, 0, SPDK_NVME_CSI_ZNS << 24,
2054 			nvme_ctrlr_get_zns_cmd_and_effects_log_done, ctrlr);
2055 	if (rc != 0) {
2056 		goto error;
2057 	}
2058 
2059 	return 0;
2060 
2061 error:
2062 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
2063 	spdk_free(ctrlr->tmp_ptr);
2064 	ctrlr->tmp_ptr = NULL;
2065 	return rc;
2066 }
2067 
2068 static void
2069 nvme_ctrlr_identify_zns_specific_done(void *arg, const struct spdk_nvme_cpl *cpl)
2070 {
2071 	struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg;
2072 
2073 	if (spdk_nvme_cpl_is_error(cpl)) {
2074 		/* no need to print an error, the controller simply does not support ZNS */
2075 		nvme_ctrlr_free_zns_specific_data(ctrlr);
2076 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_NUM_QUEUES,
2077 				     ctrlr->opts.admin_timeout_ms);
2078 		return;
2079 	}
2080 
2081 	/* A zero zasl value means use mdts */
2082 	if (ctrlr->cdata_zns->zasl) {
2083 		uint32_t max_append = ctrlr->min_page_size * (1 << ctrlr->cdata_zns->zasl);
2084 		ctrlr->max_zone_append_size = spdk_min(ctrlr->max_xfer_size, max_append);
2085 	} else {
2086 		ctrlr->max_zone_append_size = ctrlr->max_xfer_size;
2087 	}
2088 
2089 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_GET_ZNS_CMD_EFFECTS_LOG,
2090 			     ctrlr->opts.admin_timeout_ms);
2091 }
2092 
2093 /**
2094  * This function will try to fetch the I/O Command Specific Controller data structure for
2095  * each I/O Command Set supported by SPDK.
2096  *
2097  * If an I/O Command Set is not supported by the controller, "Invalid Field in Command"
2098  * will be returned. Since we are fetching in a exploratively way, getting an error back
2099  * from the controller should not be treated as fatal.
2100  *
2101  * I/O Command Sets not supported by SPDK will be skipped (e.g. Key Value Command Set).
2102  *
2103  * I/O Command Sets without a IOCS specific data structure (i.e. a zero-filled IOCS specific
2104  * data structure) will be skipped (e.g. NVM Command Set, Key Value Command Set).
2105  */
2106 static int
2107 nvme_ctrlr_identify_iocs_specific(struct spdk_nvme_ctrlr *ctrlr)
2108 {
2109 	int	rc;
2110 
2111 	if (!nvme_ctrlr_multi_iocs_enabled(ctrlr)) {
2112 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_NUM_QUEUES,
2113 				     ctrlr->opts.admin_timeout_ms);
2114 		return 0;
2115 	}
2116 
2117 	/*
2118 	 * Since SPDK currently only needs to fetch a single Command Set, keep the code here,
2119 	 * instead of creating multiple NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC substates,
2120 	 * which would require additional functions and complexity for no good reason.
2121 	 */
2122 	assert(!ctrlr->cdata_zns);
2123 	ctrlr->cdata_zns = spdk_zmalloc(sizeof(*ctrlr->cdata_zns), 64, NULL, SPDK_ENV_SOCKET_ID_ANY,
2124 					SPDK_MALLOC_SHARE | SPDK_MALLOC_DMA);
2125 	if (!ctrlr->cdata_zns) {
2126 		rc = -ENOMEM;
2127 		goto error;
2128 	}
2129 
2130 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_IOCS_SPECIFIC,
2131 			     ctrlr->opts.admin_timeout_ms);
2132 
2133 	rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_CTRLR_IOCS, 0, 0, SPDK_NVME_CSI_ZNS,
2134 				     ctrlr->cdata_zns, sizeof(*ctrlr->cdata_zns),
2135 				     nvme_ctrlr_identify_zns_specific_done, ctrlr);
2136 	if (rc != 0) {
2137 		goto error;
2138 	}
2139 
2140 	return 0;
2141 
2142 error:
2143 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
2144 	nvme_ctrlr_free_zns_specific_data(ctrlr);
2145 	return rc;
2146 }
2147 
2148 enum nvme_active_ns_state {
2149 	NVME_ACTIVE_NS_STATE_IDLE,
2150 	NVME_ACTIVE_NS_STATE_PROCESSING,
2151 	NVME_ACTIVE_NS_STATE_DONE,
2152 	NVME_ACTIVE_NS_STATE_ERROR
2153 };
2154 
2155 typedef void (*nvme_active_ns_ctx_deleter)(struct nvme_active_ns_ctx *);
2156 
2157 struct nvme_active_ns_ctx {
2158 	struct spdk_nvme_ctrlr *ctrlr;
2159 	uint32_t page_count;
2160 	uint32_t next_nsid;
2161 	uint32_t *new_ns_list;
2162 	nvme_active_ns_ctx_deleter deleter;
2163 
2164 	enum nvme_active_ns_state state;
2165 };
2166 
2167 static struct nvme_active_ns_ctx *
2168 nvme_active_ns_ctx_create(struct spdk_nvme_ctrlr *ctrlr, nvme_active_ns_ctx_deleter deleter)
2169 {
2170 	struct nvme_active_ns_ctx *ctx;
2171 	uint32_t *new_ns_list = NULL;
2172 
2173 	ctx = calloc(1, sizeof(*ctx));
2174 	if (!ctx) {
2175 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate nvme_active_ns_ctx!\n");
2176 		return NULL;
2177 	}
2178 
2179 	new_ns_list = spdk_zmalloc(sizeof(struct spdk_nvme_ns_list), ctrlr->page_size,
2180 				   NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_SHARE);
2181 	if (!new_ns_list) {
2182 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate active_ns_list!\n");
2183 		free(ctx);
2184 		return NULL;
2185 	}
2186 
2187 	ctx->page_count = 1;
2188 	ctx->new_ns_list = new_ns_list;
2189 	ctx->ctrlr = ctrlr;
2190 	ctx->deleter = deleter;
2191 
2192 	return ctx;
2193 }
2194 
2195 static void
2196 nvme_active_ns_ctx_destroy(struct nvme_active_ns_ctx *ctx)
2197 {
2198 	spdk_free(ctx->new_ns_list);
2199 	free(ctx);
2200 }
2201 
2202 static int
2203 nvme_ctrlr_destruct_namespace(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid)
2204 {
2205 	struct spdk_nvme_ns tmp, *ns;
2206 
2207 	assert(ctrlr != NULL);
2208 
2209 	tmp.id = nsid;
2210 	ns = RB_FIND(nvme_ns_tree, &ctrlr->ns, &tmp);
2211 	if (ns == NULL) {
2212 		return -EINVAL;
2213 	}
2214 
2215 	nvme_ns_destruct(ns);
2216 	ns->active = false;
2217 
2218 	return 0;
2219 }
2220 
2221 static int
2222 nvme_ctrlr_construct_namespace(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid)
2223 {
2224 	struct spdk_nvme_ns *ns;
2225 
2226 	if (nsid < 1 || nsid > ctrlr->cdata.nn) {
2227 		return -EINVAL;
2228 	}
2229 
2230 	/* Namespaces are constructed on demand, so simply request it. */
2231 	ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
2232 	if (ns == NULL) {
2233 		return -ENOMEM;
2234 	}
2235 
2236 	ns->active = true;
2237 
2238 	return 0;
2239 }
2240 
2241 static void
2242 nvme_ctrlr_identify_active_ns_swap(struct spdk_nvme_ctrlr *ctrlr, uint32_t *new_ns_list,
2243 				   size_t max_entries)
2244 {
2245 	uint32_t active_ns_count = 0;
2246 	size_t i;
2247 	uint32_t nsid;
2248 	struct spdk_nvme_ns *ns, *tmp_ns;
2249 	int rc;
2250 
2251 	/* First, remove namespaces that no longer exist */
2252 	RB_FOREACH_SAFE(ns, nvme_ns_tree, &ctrlr->ns, tmp_ns) {
2253 		nsid = new_ns_list[0];
2254 		active_ns_count = 0;
2255 		while (nsid != 0) {
2256 			if (nsid == ns->id) {
2257 				break;
2258 			}
2259 
2260 			nsid = new_ns_list[active_ns_count++];
2261 		}
2262 
2263 		if (nsid != ns->id) {
2264 			/* Did not find this namespace id in the new list. */
2265 			NVME_CTRLR_DEBUGLOG(ctrlr, "Namespace %u was removed\n", ns->id);
2266 			nvme_ctrlr_destruct_namespace(ctrlr, ns->id);
2267 		}
2268 	}
2269 
2270 	/* Next, add new namespaces */
2271 	active_ns_count = 0;
2272 	for (i = 0; i < max_entries; i++) {
2273 		nsid = new_ns_list[active_ns_count];
2274 
2275 		if (nsid == 0) {
2276 			break;
2277 		}
2278 
2279 		/* If the namespace already exists, this will not construct it a second time. */
2280 		rc = nvme_ctrlr_construct_namespace(ctrlr, nsid);
2281 		if (rc != 0) {
2282 			/* We can't easily handle a failure here. But just move on. */
2283 			assert(false);
2284 			NVME_CTRLR_DEBUGLOG(ctrlr, "Failed to allocate a namespace object.\n");
2285 			continue;
2286 		}
2287 
2288 		active_ns_count++;
2289 	}
2290 
2291 	ctrlr->active_ns_count = active_ns_count;
2292 }
2293 
2294 static void
2295 nvme_ctrlr_identify_active_ns_async_done(void *arg, const struct spdk_nvme_cpl *cpl)
2296 {
2297 	struct nvme_active_ns_ctx *ctx = arg;
2298 	uint32_t *new_ns_list = NULL;
2299 
2300 	if (spdk_nvme_cpl_is_error(cpl)) {
2301 		ctx->state = NVME_ACTIVE_NS_STATE_ERROR;
2302 		goto out;
2303 	}
2304 
2305 	ctx->next_nsid = ctx->new_ns_list[1024 * ctx->page_count - 1];
2306 	if (ctx->next_nsid == 0) {
2307 		ctx->state = NVME_ACTIVE_NS_STATE_DONE;
2308 		goto out;
2309 	}
2310 
2311 	ctx->page_count++;
2312 	new_ns_list = spdk_realloc(ctx->new_ns_list,
2313 				   ctx->page_count * sizeof(struct spdk_nvme_ns_list),
2314 				   ctx->ctrlr->page_size);
2315 	if (!new_ns_list) {
2316 		SPDK_ERRLOG("Failed to reallocate active_ns_list!\n");
2317 		ctx->state = NVME_ACTIVE_NS_STATE_ERROR;
2318 		goto out;
2319 	}
2320 
2321 	ctx->new_ns_list = new_ns_list;
2322 	nvme_ctrlr_identify_active_ns_async(ctx);
2323 	return;
2324 
2325 out:
2326 	if (ctx->deleter) {
2327 		ctx->deleter(ctx);
2328 	}
2329 }
2330 
2331 static void
2332 nvme_ctrlr_identify_active_ns_async(struct nvme_active_ns_ctx *ctx)
2333 {
2334 	struct spdk_nvme_ctrlr *ctrlr = ctx->ctrlr;
2335 	uint32_t i;
2336 	int rc;
2337 
2338 	if (ctrlr->cdata.nn == 0) {
2339 		ctx->state = NVME_ACTIVE_NS_STATE_DONE;
2340 		goto out;
2341 	}
2342 
2343 	assert(ctx->new_ns_list != NULL);
2344 
2345 	/*
2346 	 * If controller doesn't support active ns list CNS 0x02 dummy up
2347 	 * an active ns list, i.e. all namespaces report as active
2348 	 */
2349 	if (ctrlr->vs.raw < SPDK_NVME_VERSION(1, 1, 0) || ctrlr->quirks & NVME_QUIRK_IDENTIFY_CNS) {
2350 		uint32_t *new_ns_list;
2351 
2352 		/*
2353 		 * Active NS list must always end with zero element.
2354 		 * So, we allocate for cdata.nn+1.
2355 		 */
2356 		ctx->page_count = spdk_divide_round_up(ctrlr->cdata.nn + 1,
2357 						       sizeof(struct spdk_nvme_ns_list) / sizeof(new_ns_list[0]));
2358 		new_ns_list = spdk_realloc(ctx->new_ns_list,
2359 					   ctx->page_count * sizeof(struct spdk_nvme_ns_list),
2360 					   ctx->ctrlr->page_size);
2361 		if (!new_ns_list) {
2362 			SPDK_ERRLOG("Failed to reallocate active_ns_list!\n");
2363 			ctx->state = NVME_ACTIVE_NS_STATE_ERROR;
2364 			goto out;
2365 		}
2366 
2367 		ctx->new_ns_list = new_ns_list;
2368 		ctx->new_ns_list[ctrlr->cdata.nn] = 0;
2369 		for (i = 0; i < ctrlr->cdata.nn; i++) {
2370 			ctx->new_ns_list[i] = i + 1;
2371 		}
2372 
2373 		ctx->state = NVME_ACTIVE_NS_STATE_DONE;
2374 		goto out;
2375 	}
2376 
2377 	ctx->state = NVME_ACTIVE_NS_STATE_PROCESSING;
2378 	rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_ACTIVE_NS_LIST, 0, ctx->next_nsid, 0,
2379 				     &ctx->new_ns_list[1024 * (ctx->page_count - 1)], sizeof(struct spdk_nvme_ns_list),
2380 				     nvme_ctrlr_identify_active_ns_async_done, ctx);
2381 	if (rc != 0) {
2382 		ctx->state = NVME_ACTIVE_NS_STATE_ERROR;
2383 		goto out;
2384 	}
2385 
2386 	return;
2387 
2388 out:
2389 	if (ctx->deleter) {
2390 		ctx->deleter(ctx);
2391 	}
2392 }
2393 
2394 static void
2395 _nvme_active_ns_ctx_deleter(struct nvme_active_ns_ctx *ctx)
2396 {
2397 	struct spdk_nvme_ctrlr *ctrlr = ctx->ctrlr;
2398 	struct spdk_nvme_ns *ns;
2399 
2400 	if (ctx->state == NVME_ACTIVE_NS_STATE_ERROR) {
2401 		nvme_active_ns_ctx_destroy(ctx);
2402 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
2403 		return;
2404 	}
2405 
2406 	assert(ctx->state == NVME_ACTIVE_NS_STATE_DONE);
2407 
2408 	RB_FOREACH(ns, nvme_ns_tree, &ctrlr->ns) {
2409 		nvme_ns_free_iocs_specific_data(ns);
2410 	}
2411 
2412 	nvme_ctrlr_identify_active_ns_swap(ctrlr, ctx->new_ns_list, ctx->page_count * 1024);
2413 	nvme_active_ns_ctx_destroy(ctx);
2414 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_NS, ctrlr->opts.admin_timeout_ms);
2415 }
2416 
2417 static void
2418 _nvme_ctrlr_identify_active_ns(struct spdk_nvme_ctrlr *ctrlr)
2419 {
2420 	struct nvme_active_ns_ctx *ctx;
2421 
2422 	ctx = nvme_active_ns_ctx_create(ctrlr, _nvme_active_ns_ctx_deleter);
2423 	if (!ctx) {
2424 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
2425 		return;
2426 	}
2427 
2428 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ACTIVE_NS,
2429 			     ctrlr->opts.admin_timeout_ms);
2430 	nvme_ctrlr_identify_active_ns_async(ctx);
2431 }
2432 
2433 int
2434 nvme_ctrlr_identify_active_ns(struct spdk_nvme_ctrlr *ctrlr)
2435 {
2436 	struct nvme_active_ns_ctx *ctx;
2437 	int rc;
2438 
2439 	ctx = nvme_active_ns_ctx_create(ctrlr, NULL);
2440 	if (!ctx) {
2441 		return -ENOMEM;
2442 	}
2443 
2444 	nvme_ctrlr_identify_active_ns_async(ctx);
2445 	while (ctx->state == NVME_ACTIVE_NS_STATE_PROCESSING) {
2446 		rc = spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
2447 		if (rc < 0) {
2448 			ctx->state = NVME_ACTIVE_NS_STATE_ERROR;
2449 			break;
2450 		}
2451 	}
2452 
2453 	if (ctx->state == NVME_ACTIVE_NS_STATE_ERROR) {
2454 		nvme_active_ns_ctx_destroy(ctx);
2455 		return -ENXIO;
2456 	}
2457 
2458 	assert(ctx->state == NVME_ACTIVE_NS_STATE_DONE);
2459 	nvme_ctrlr_identify_active_ns_swap(ctrlr, ctx->new_ns_list, ctx->page_count * 1024);
2460 	nvme_active_ns_ctx_destroy(ctx);
2461 
2462 	return 0;
2463 }
2464 
2465 static void
2466 nvme_ctrlr_identify_ns_async_done(void *arg, const struct spdk_nvme_cpl *cpl)
2467 {
2468 	struct spdk_nvme_ns *ns = (struct spdk_nvme_ns *)arg;
2469 	struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr;
2470 	uint32_t nsid;
2471 	int rc;
2472 
2473 	if (spdk_nvme_cpl_is_error(cpl)) {
2474 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
2475 		return;
2476 	}
2477 
2478 	nvme_ns_set_identify_data(ns);
2479 
2480 	/* move on to the next active NS */
2481 	nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, ns->id);
2482 	ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
2483 	if (ns == NULL) {
2484 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_ID_DESCS,
2485 				     ctrlr->opts.admin_timeout_ms);
2486 		return;
2487 	}
2488 	ns->ctrlr = ctrlr;
2489 	ns->id = nsid;
2490 
2491 	rc = nvme_ctrlr_identify_ns_async(ns);
2492 	if (rc) {
2493 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
2494 	}
2495 }
2496 
2497 static int
2498 nvme_ctrlr_identify_ns_async(struct spdk_nvme_ns *ns)
2499 {
2500 	struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr;
2501 	struct spdk_nvme_ns_data *nsdata;
2502 
2503 	nsdata = &ns->nsdata;
2504 
2505 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS,
2506 			     ctrlr->opts.admin_timeout_ms);
2507 	return nvme_ctrlr_cmd_identify(ns->ctrlr, SPDK_NVME_IDENTIFY_NS, 0, ns->id, 0,
2508 				       nsdata, sizeof(*nsdata),
2509 				       nvme_ctrlr_identify_ns_async_done, ns);
2510 }
2511 
2512 static int
2513 nvme_ctrlr_identify_namespaces(struct spdk_nvme_ctrlr *ctrlr)
2514 {
2515 	uint32_t nsid;
2516 	struct spdk_nvme_ns *ns;
2517 	int rc;
2518 
2519 	nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr);
2520 	ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
2521 	if (ns == NULL) {
2522 		/* No active NS, move on to the next state */
2523 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_ID_DESCS,
2524 				     ctrlr->opts.admin_timeout_ms);
2525 		return 0;
2526 	}
2527 
2528 	ns->ctrlr = ctrlr;
2529 	ns->id = nsid;
2530 
2531 	rc = nvme_ctrlr_identify_ns_async(ns);
2532 	if (rc) {
2533 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
2534 	}
2535 
2536 	return rc;
2537 }
2538 
2539 static int
2540 nvme_ctrlr_identify_namespaces_iocs_specific_next(struct spdk_nvme_ctrlr *ctrlr, uint32_t prev_nsid)
2541 {
2542 	uint32_t nsid;
2543 	struct spdk_nvme_ns *ns;
2544 	int rc;
2545 
2546 	if (!prev_nsid) {
2547 		nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr);
2548 	} else {
2549 		/* move on to the next active NS */
2550 		nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, prev_nsid);
2551 	}
2552 
2553 	ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
2554 	if (ns == NULL) {
2555 		/* No first/next active NS, move on to the next state */
2556 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES,
2557 				     ctrlr->opts.admin_timeout_ms);
2558 		return 0;
2559 	}
2560 
2561 	/* loop until we find a ns which has (supported) iocs specific data */
2562 	while (!nvme_ns_has_supported_iocs_specific_data(ns)) {
2563 		nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, ns->id);
2564 		ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
2565 		if (ns == NULL) {
2566 			/* no namespace with (supported) iocs specific data found */
2567 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES,
2568 					     ctrlr->opts.admin_timeout_ms);
2569 			return 0;
2570 		}
2571 	}
2572 
2573 	rc = nvme_ctrlr_identify_ns_iocs_specific_async(ns);
2574 	if (rc) {
2575 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
2576 	}
2577 
2578 	return rc;
2579 }
2580 
2581 static void
2582 nvme_ctrlr_identify_ns_zns_specific_async_done(void *arg, const struct spdk_nvme_cpl *cpl)
2583 {
2584 	struct spdk_nvme_ns *ns = (struct spdk_nvme_ns *)arg;
2585 	struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr;
2586 
2587 	if (spdk_nvme_cpl_is_error(cpl)) {
2588 		nvme_ns_free_zns_specific_data(ns);
2589 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
2590 		return;
2591 	}
2592 
2593 	nvme_ctrlr_identify_namespaces_iocs_specific_next(ctrlr, ns->id);
2594 }
2595 
2596 static int
2597 nvme_ctrlr_identify_ns_iocs_specific_async(struct spdk_nvme_ns *ns)
2598 {
2599 	struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr;
2600 	int rc;
2601 
2602 	switch (ns->csi) {
2603 	case SPDK_NVME_CSI_ZNS:
2604 		break;
2605 	default:
2606 		/*
2607 		 * This switch must handle all cases for which
2608 		 * nvme_ns_has_supported_iocs_specific_data() returns true,
2609 		 * other cases should never happen.
2610 		 */
2611 		assert(0);
2612 	}
2613 
2614 	assert(!ns->nsdata_zns);
2615 	ns->nsdata_zns = spdk_zmalloc(sizeof(*ns->nsdata_zns), 64, NULL, SPDK_ENV_SOCKET_ID_ANY,
2616 				      SPDK_MALLOC_SHARE);
2617 	if (!ns->nsdata_zns) {
2618 		return -ENOMEM;
2619 	}
2620 
2621 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS_IOCS_SPECIFIC,
2622 			     ctrlr->opts.admin_timeout_ms);
2623 	rc = nvme_ctrlr_cmd_identify(ns->ctrlr, SPDK_NVME_IDENTIFY_NS_IOCS, 0, ns->id, ns->csi,
2624 				     ns->nsdata_zns, sizeof(*ns->nsdata_zns),
2625 				     nvme_ctrlr_identify_ns_zns_specific_async_done, ns);
2626 	if (rc) {
2627 		nvme_ns_free_zns_specific_data(ns);
2628 	}
2629 
2630 	return rc;
2631 }
2632 
2633 static int
2634 nvme_ctrlr_identify_namespaces_iocs_specific(struct spdk_nvme_ctrlr *ctrlr)
2635 {
2636 	if (!nvme_ctrlr_multi_iocs_enabled(ctrlr)) {
2637 		/* Multi IOCS not supported/enabled, move on to the next state */
2638 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES,
2639 				     ctrlr->opts.admin_timeout_ms);
2640 		return 0;
2641 	}
2642 
2643 	return nvme_ctrlr_identify_namespaces_iocs_specific_next(ctrlr, 0);
2644 }
2645 
2646 static void
2647 nvme_ctrlr_identify_id_desc_async_done(void *arg, const struct spdk_nvme_cpl *cpl)
2648 {
2649 	struct spdk_nvme_ns *ns = (struct spdk_nvme_ns *)arg;
2650 	struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr;
2651 	uint32_t nsid;
2652 	int rc;
2653 
2654 	if (spdk_nvme_cpl_is_error(cpl)) {
2655 		/*
2656 		 * Many controllers claim to be compatible with NVMe 1.3, however,
2657 		 * they do not implement NS ID Desc List. Therefore, instead of setting
2658 		 * the state to NVME_CTRLR_STATE_ERROR, silently ignore the completion
2659 		 * error and move on to the next state.
2660 		 *
2661 		 * The proper way is to create a new quirk for controllers that violate
2662 		 * the NVMe 1.3 spec by not supporting NS ID Desc List.
2663 		 * (Re-using the NVME_QUIRK_IDENTIFY_CNS quirk is not possible, since
2664 		 * it is too generic and was added in order to handle controllers that
2665 		 * violate the NVMe 1.1 spec by not supporting ACTIVE LIST).
2666 		 */
2667 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC,
2668 				     ctrlr->opts.admin_timeout_ms);
2669 		return;
2670 	}
2671 
2672 	nvme_ns_set_id_desc_list_data(ns);
2673 
2674 	/* move on to the next active NS */
2675 	nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, ns->id);
2676 	ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
2677 	if (ns == NULL) {
2678 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC,
2679 				     ctrlr->opts.admin_timeout_ms);
2680 		return;
2681 	}
2682 
2683 	rc = nvme_ctrlr_identify_id_desc_async(ns);
2684 	if (rc) {
2685 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
2686 	}
2687 }
2688 
2689 static int
2690 nvme_ctrlr_identify_id_desc_async(struct spdk_nvme_ns *ns)
2691 {
2692 	struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr;
2693 
2694 	memset(ns->id_desc_list, 0, sizeof(ns->id_desc_list));
2695 
2696 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ID_DESCS,
2697 			     ctrlr->opts.admin_timeout_ms);
2698 	return nvme_ctrlr_cmd_identify(ns->ctrlr, SPDK_NVME_IDENTIFY_NS_ID_DESCRIPTOR_LIST,
2699 				       0, ns->id, 0, ns->id_desc_list, sizeof(ns->id_desc_list),
2700 				       nvme_ctrlr_identify_id_desc_async_done, ns);
2701 }
2702 
2703 static int
2704 nvme_ctrlr_identify_id_desc_namespaces(struct spdk_nvme_ctrlr *ctrlr)
2705 {
2706 	uint32_t nsid;
2707 	struct spdk_nvme_ns *ns;
2708 	int rc;
2709 
2710 	if ((ctrlr->vs.raw < SPDK_NVME_VERSION(1, 3, 0) &&
2711 	     !(ctrlr->cap.bits.css & SPDK_NVME_CAP_CSS_IOCS)) ||
2712 	    (ctrlr->quirks & NVME_QUIRK_IDENTIFY_CNS)) {
2713 		NVME_CTRLR_DEBUGLOG(ctrlr, "Version < 1.3; not attempting to retrieve NS ID Descriptor List\n");
2714 		/* NS ID Desc List not supported, move on to the next state */
2715 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC,
2716 				     ctrlr->opts.admin_timeout_ms);
2717 		return 0;
2718 	}
2719 
2720 	nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr);
2721 	ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
2722 	if (ns == NULL) {
2723 		/* No active NS, move on to the next state */
2724 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC,
2725 				     ctrlr->opts.admin_timeout_ms);
2726 		return 0;
2727 	}
2728 
2729 	rc = nvme_ctrlr_identify_id_desc_async(ns);
2730 	if (rc) {
2731 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
2732 	}
2733 
2734 	return rc;
2735 }
2736 
2737 static void
2738 nvme_ctrlr_update_nvmf_ioccsz(struct spdk_nvme_ctrlr *ctrlr)
2739 {
2740 	if (spdk_nvme_ctrlr_is_fabrics(ctrlr)) {
2741 		if (ctrlr->cdata.nvmf_specific.ioccsz < 4) {
2742 			NVME_CTRLR_ERRLOG(ctrlr, "Incorrect IOCCSZ %u, the minimum value should be 4\n",
2743 					  ctrlr->cdata.nvmf_specific.ioccsz);
2744 			ctrlr->cdata.nvmf_specific.ioccsz = 4;
2745 			assert(0);
2746 		}
2747 		ctrlr->ioccsz_bytes = ctrlr->cdata.nvmf_specific.ioccsz * 16 - sizeof(struct spdk_nvme_cmd);
2748 		ctrlr->icdoff = ctrlr->cdata.nvmf_specific.icdoff;
2749 	}
2750 }
2751 
2752 static void
2753 nvme_ctrlr_set_num_queues_done(void *arg, const struct spdk_nvme_cpl *cpl)
2754 {
2755 	uint32_t cq_allocated, sq_allocated, min_allocated, i;
2756 	struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg;
2757 
2758 	if (spdk_nvme_cpl_is_error(cpl)) {
2759 		NVME_CTRLR_ERRLOG(ctrlr, "Set Features - Number of Queues failed!\n");
2760 		ctrlr->opts.num_io_queues = 0;
2761 	} else {
2762 		/*
2763 		 * Data in cdw0 is 0-based.
2764 		 * Lower 16-bits indicate number of submission queues allocated.
2765 		 * Upper 16-bits indicate number of completion queues allocated.
2766 		 */
2767 		sq_allocated = (cpl->cdw0 & 0xFFFF) + 1;
2768 		cq_allocated = (cpl->cdw0 >> 16) + 1;
2769 
2770 		/*
2771 		 * For 1:1 queue mapping, set number of allocated queues to be minimum of
2772 		 * submission and completion queues.
2773 		 */
2774 		min_allocated = spdk_min(sq_allocated, cq_allocated);
2775 
2776 		/* Set number of queues to be minimum of requested and actually allocated. */
2777 		ctrlr->opts.num_io_queues = spdk_min(min_allocated, ctrlr->opts.num_io_queues);
2778 	}
2779 
2780 	ctrlr->free_io_qids = spdk_bit_array_create(ctrlr->opts.num_io_queues + 1);
2781 	if (ctrlr->free_io_qids == NULL) {
2782 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
2783 		return;
2784 	}
2785 
2786 	/* Initialize list of free I/O queue IDs. QID 0 is the admin queue (implicitly allocated). */
2787 	for (i = 1; i <= ctrlr->opts.num_io_queues; i++) {
2788 		spdk_nvme_ctrlr_free_qid(ctrlr, i);
2789 	}
2790 
2791 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_ACTIVE_NS,
2792 			     ctrlr->opts.admin_timeout_ms);
2793 }
2794 
2795 static int
2796 nvme_ctrlr_set_num_queues(struct spdk_nvme_ctrlr *ctrlr)
2797 {
2798 	int rc;
2799 
2800 	if (ctrlr->opts.num_io_queues > SPDK_NVME_MAX_IO_QUEUES) {
2801 		NVME_CTRLR_NOTICELOG(ctrlr, "Limiting requested num_io_queues %u to max %d\n",
2802 				     ctrlr->opts.num_io_queues, SPDK_NVME_MAX_IO_QUEUES);
2803 		ctrlr->opts.num_io_queues = SPDK_NVME_MAX_IO_QUEUES;
2804 	} else if (ctrlr->opts.num_io_queues < 1) {
2805 		NVME_CTRLR_NOTICELOG(ctrlr, "Requested num_io_queues 0, increasing to 1\n");
2806 		ctrlr->opts.num_io_queues = 1;
2807 	}
2808 
2809 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_SET_NUM_QUEUES,
2810 			     ctrlr->opts.admin_timeout_ms);
2811 
2812 	rc = nvme_ctrlr_cmd_set_num_queues(ctrlr, ctrlr->opts.num_io_queues,
2813 					   nvme_ctrlr_set_num_queues_done, ctrlr);
2814 	if (rc != 0) {
2815 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
2816 		return rc;
2817 	}
2818 
2819 	return 0;
2820 }
2821 
2822 static void
2823 nvme_ctrlr_set_keep_alive_timeout_done(void *arg, const struct spdk_nvme_cpl *cpl)
2824 {
2825 	uint32_t keep_alive_interval_us;
2826 	struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg;
2827 
2828 	if (spdk_nvme_cpl_is_error(cpl)) {
2829 		if ((cpl->status.sct == SPDK_NVME_SCT_GENERIC) &&
2830 		    (cpl->status.sc == SPDK_NVME_SC_INVALID_FIELD)) {
2831 			NVME_CTRLR_DEBUGLOG(ctrlr, "Keep alive timeout Get Feature is not supported\n");
2832 		} else {
2833 			NVME_CTRLR_ERRLOG(ctrlr, "Keep alive timeout Get Feature failed: SC %x SCT %x\n",
2834 					  cpl->status.sc, cpl->status.sct);
2835 			ctrlr->opts.keep_alive_timeout_ms = 0;
2836 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
2837 			return;
2838 		}
2839 	} else {
2840 		if (ctrlr->opts.keep_alive_timeout_ms != cpl->cdw0) {
2841 			NVME_CTRLR_DEBUGLOG(ctrlr, "Controller adjusted keep alive timeout to %u ms\n",
2842 					    cpl->cdw0);
2843 		}
2844 
2845 		ctrlr->opts.keep_alive_timeout_ms = cpl->cdw0;
2846 	}
2847 
2848 	if (ctrlr->opts.keep_alive_timeout_ms == 0) {
2849 		ctrlr->keep_alive_interval_ticks = 0;
2850 	} else {
2851 		keep_alive_interval_us = ctrlr->opts.keep_alive_timeout_ms * 1000 / 2;
2852 
2853 		NVME_CTRLR_DEBUGLOG(ctrlr, "Sending keep alive every %u us\n", keep_alive_interval_us);
2854 
2855 		ctrlr->keep_alive_interval_ticks = (keep_alive_interval_us * spdk_get_ticks_hz()) /
2856 						   UINT64_C(1000000);
2857 
2858 		/* Schedule the first Keep Alive to be sent as soon as possible. */
2859 		ctrlr->next_keep_alive_tick = spdk_get_ticks();
2860 	}
2861 
2862 	if (spdk_nvme_ctrlr_is_discovery(ctrlr)) {
2863 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE);
2864 	} else {
2865 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC,
2866 				     ctrlr->opts.admin_timeout_ms);
2867 	}
2868 }
2869 
2870 static int
2871 nvme_ctrlr_set_keep_alive_timeout(struct spdk_nvme_ctrlr *ctrlr)
2872 {
2873 	int rc;
2874 
2875 	if (ctrlr->opts.keep_alive_timeout_ms == 0) {
2876 		if (spdk_nvme_ctrlr_is_discovery(ctrlr)) {
2877 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE);
2878 		} else {
2879 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC,
2880 					     ctrlr->opts.admin_timeout_ms);
2881 		}
2882 		return 0;
2883 	}
2884 
2885 	/* Note: Discovery controller identify data does not populate KAS according to spec. */
2886 	if (!spdk_nvme_ctrlr_is_discovery(ctrlr) && ctrlr->cdata.kas == 0) {
2887 		NVME_CTRLR_DEBUGLOG(ctrlr, "Controller KAS is 0 - not enabling Keep Alive\n");
2888 		ctrlr->opts.keep_alive_timeout_ms = 0;
2889 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC,
2890 				     ctrlr->opts.admin_timeout_ms);
2891 		return 0;
2892 	}
2893 
2894 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_KEEP_ALIVE_TIMEOUT,
2895 			     ctrlr->opts.admin_timeout_ms);
2896 
2897 	/* Retrieve actual keep alive timeout, since the controller may have adjusted it. */
2898 	rc = spdk_nvme_ctrlr_cmd_get_feature(ctrlr, SPDK_NVME_FEAT_KEEP_ALIVE_TIMER, 0, NULL, 0,
2899 					     nvme_ctrlr_set_keep_alive_timeout_done, ctrlr);
2900 	if (rc != 0) {
2901 		NVME_CTRLR_ERRLOG(ctrlr, "Keep alive timeout Get Feature failed: %d\n", rc);
2902 		ctrlr->opts.keep_alive_timeout_ms = 0;
2903 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
2904 		return rc;
2905 	}
2906 
2907 	return 0;
2908 }
2909 
2910 static void
2911 nvme_ctrlr_set_host_id_done(void *arg, const struct spdk_nvme_cpl *cpl)
2912 {
2913 	struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg;
2914 
2915 	if (spdk_nvme_cpl_is_error(cpl)) {
2916 		/*
2917 		 * Treat Set Features - Host ID failure as non-fatal, since the Host ID feature
2918 		 * is optional.
2919 		 */
2920 		NVME_CTRLR_WARNLOG(ctrlr, "Set Features - Host ID failed: SC 0x%x SCT 0x%x\n",
2921 				   cpl->status.sc, cpl->status.sct);
2922 	} else {
2923 		NVME_CTRLR_DEBUGLOG(ctrlr, "Set Features - Host ID was successful\n");
2924 	}
2925 
2926 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_TRANSPORT_READY, ctrlr->opts.admin_timeout_ms);
2927 }
2928 
2929 static int
2930 nvme_ctrlr_set_host_id(struct spdk_nvme_ctrlr *ctrlr)
2931 {
2932 	uint8_t *host_id;
2933 	uint32_t host_id_size;
2934 	int rc;
2935 
2936 	if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) {
2937 		/*
2938 		 * NVMe-oF sends the host ID during Connect and doesn't allow
2939 		 * Set Features - Host Identifier after Connect, so we don't need to do anything here.
2940 		 */
2941 		NVME_CTRLR_DEBUGLOG(ctrlr, "NVMe-oF transport - not sending Set Features - Host ID\n");
2942 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_TRANSPORT_READY, ctrlr->opts.admin_timeout_ms);
2943 		return 0;
2944 	}
2945 
2946 	if (ctrlr->cdata.ctratt.host_id_exhid_supported) {
2947 		NVME_CTRLR_DEBUGLOG(ctrlr, "Using 128-bit extended host identifier\n");
2948 		host_id = ctrlr->opts.extended_host_id;
2949 		host_id_size = sizeof(ctrlr->opts.extended_host_id);
2950 	} else {
2951 		NVME_CTRLR_DEBUGLOG(ctrlr, "Using 64-bit host identifier\n");
2952 		host_id = ctrlr->opts.host_id;
2953 		host_id_size = sizeof(ctrlr->opts.host_id);
2954 	}
2955 
2956 	/* If the user specified an all-zeroes host identifier, don't send the command. */
2957 	if (spdk_mem_all_zero(host_id, host_id_size)) {
2958 		NVME_CTRLR_DEBUGLOG(ctrlr, "User did not specify host ID - not sending Set Features - Host ID\n");
2959 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_TRANSPORT_READY, ctrlr->opts.admin_timeout_ms);
2960 		return 0;
2961 	}
2962 
2963 	SPDK_LOGDUMP(nvme, "host_id", host_id, host_id_size);
2964 
2965 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_HOST_ID,
2966 			     ctrlr->opts.admin_timeout_ms);
2967 
2968 	rc = nvme_ctrlr_cmd_set_host_id(ctrlr, host_id, host_id_size, nvme_ctrlr_set_host_id_done, ctrlr);
2969 	if (rc != 0) {
2970 		NVME_CTRLR_ERRLOG(ctrlr, "Set Features - Host ID failed: %d\n", rc);
2971 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
2972 		return rc;
2973 	}
2974 
2975 	return 0;
2976 }
2977 
2978 void
2979 nvme_ctrlr_update_namespaces(struct spdk_nvme_ctrlr *ctrlr)
2980 {
2981 	uint32_t nsid;
2982 	struct spdk_nvme_ns *ns;
2983 
2984 	for (nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr);
2985 	     nsid != 0; nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid)) {
2986 		ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
2987 		nvme_ns_destruct(ns);
2988 		nvme_ns_construct(ns, nsid, ctrlr);
2989 	}
2990 }
2991 
2992 static int
2993 nvme_ctrlr_clear_changed_ns_log(struct spdk_nvme_ctrlr *ctrlr)
2994 {
2995 	struct nvme_completion_poll_status	*status;
2996 	int		rc = -ENOMEM;
2997 	char		*buffer = NULL;
2998 	uint32_t	nsid;
2999 	size_t		buf_size = (SPDK_NVME_MAX_CHANGED_NAMESPACES * sizeof(uint32_t));
3000 
3001 	if (ctrlr->opts.disable_read_changed_ns_list_log_page) {
3002 		return 0;
3003 	}
3004 
3005 	buffer = spdk_dma_zmalloc(buf_size, 4096, NULL);
3006 	if (!buffer) {
3007 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate buffer for getting "
3008 				  "changed ns log.\n");
3009 		return rc;
3010 	}
3011 
3012 	status = calloc(1, sizeof(*status));
3013 	if (!status) {
3014 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n");
3015 		goto free_buffer;
3016 	}
3017 
3018 	rc = spdk_nvme_ctrlr_cmd_get_log_page(ctrlr,
3019 					      SPDK_NVME_LOG_CHANGED_NS_LIST,
3020 					      SPDK_NVME_GLOBAL_NS_TAG,
3021 					      buffer, buf_size, 0,
3022 					      nvme_completion_poll_cb, status);
3023 
3024 	if (rc) {
3025 		NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_cmd_get_log_page() failed: rc=%d\n", rc);
3026 		free(status);
3027 		goto free_buffer;
3028 	}
3029 
3030 	rc = nvme_wait_for_completion_timeout(ctrlr->adminq, status,
3031 					      ctrlr->opts.admin_timeout_ms * 1000);
3032 	if (!status->timed_out) {
3033 		free(status);
3034 	}
3035 
3036 	if (rc) {
3037 		NVME_CTRLR_ERRLOG(ctrlr, "wait for spdk_nvme_ctrlr_cmd_get_log_page failed: rc=%d\n", rc);
3038 		goto free_buffer;
3039 	}
3040 
3041 	/* only check the case of overflow. */
3042 	nsid = from_le32(buffer);
3043 	if (nsid == 0xffffffffu) {
3044 		NVME_CTRLR_WARNLOG(ctrlr, "changed ns log overflowed.\n");
3045 	}
3046 
3047 free_buffer:
3048 	spdk_dma_free(buffer);
3049 	return rc;
3050 }
3051 
3052 void
3053 nvme_ctrlr_process_async_event(struct spdk_nvme_ctrlr *ctrlr,
3054 			       const struct spdk_nvme_cpl *cpl)
3055 {
3056 	union spdk_nvme_async_event_completion event;
3057 	struct spdk_nvme_ctrlr_process *active_proc;
3058 	int rc;
3059 
3060 	event.raw = cpl->cdw0;
3061 
3062 	if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) &&
3063 	    (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGED)) {
3064 		nvme_ctrlr_clear_changed_ns_log(ctrlr);
3065 
3066 		rc = nvme_ctrlr_identify_active_ns(ctrlr);
3067 		if (rc) {
3068 			return;
3069 		}
3070 		nvme_ctrlr_update_namespaces(ctrlr);
3071 		nvme_io_msg_ctrlr_update(ctrlr);
3072 	}
3073 
3074 	if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) &&
3075 	    (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_ANA_CHANGE)) {
3076 		if (!ctrlr->opts.disable_read_ana_log_page) {
3077 			rc = nvme_ctrlr_update_ana_log_page(ctrlr);
3078 			if (rc) {
3079 				return;
3080 			}
3081 			nvme_ctrlr_parse_ana_log_page(ctrlr, nvme_ctrlr_update_ns_ana_states,
3082 						      ctrlr);
3083 		}
3084 	}
3085 
3086 	active_proc = nvme_ctrlr_get_current_process(ctrlr);
3087 	if (active_proc && active_proc->aer_cb_fn) {
3088 		active_proc->aer_cb_fn(active_proc->aer_cb_arg, cpl);
3089 	}
3090 }
3091 
3092 static void
3093 nvme_ctrlr_queue_async_event(struct spdk_nvme_ctrlr *ctrlr,
3094 			     const struct spdk_nvme_cpl *cpl)
3095 {
3096 	struct  spdk_nvme_ctrlr_aer_completion_list *nvme_event;
3097 	struct spdk_nvme_ctrlr_process *proc;
3098 
3099 	/* Add async event to each process objects event list */
3100 	TAILQ_FOREACH(proc, &ctrlr->active_procs, tailq) {
3101 		/* Must be shared memory so other processes can access */
3102 		nvme_event = spdk_zmalloc(sizeof(*nvme_event), 0, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE);
3103 		if (!nvme_event) {
3104 			NVME_CTRLR_ERRLOG(ctrlr, "Alloc nvme event failed, ignore the event\n");
3105 			return;
3106 		}
3107 		nvme_event->cpl = *cpl;
3108 
3109 		STAILQ_INSERT_TAIL(&proc->async_events, nvme_event, link);
3110 	}
3111 }
3112 
3113 void
3114 nvme_ctrlr_complete_queued_async_events(struct spdk_nvme_ctrlr *ctrlr)
3115 {
3116 	struct  spdk_nvme_ctrlr_aer_completion_list  *nvme_event, *nvme_event_tmp;
3117 	struct spdk_nvme_ctrlr_process	*active_proc;
3118 
3119 	active_proc = nvme_ctrlr_get_current_process(ctrlr);
3120 
3121 	STAILQ_FOREACH_SAFE(nvme_event, &active_proc->async_events, link, nvme_event_tmp) {
3122 		STAILQ_REMOVE(&active_proc->async_events, nvme_event,
3123 			      spdk_nvme_ctrlr_aer_completion_list, link);
3124 		nvme_ctrlr_process_async_event(ctrlr, &nvme_event->cpl);
3125 		spdk_free(nvme_event);
3126 
3127 	}
3128 }
3129 
3130 static void
3131 nvme_ctrlr_async_event_cb(void *arg, const struct spdk_nvme_cpl *cpl)
3132 {
3133 	struct nvme_async_event_request	*aer = arg;
3134 	struct spdk_nvme_ctrlr		*ctrlr = aer->ctrlr;
3135 
3136 	if (cpl->status.sct == SPDK_NVME_SCT_GENERIC &&
3137 	    cpl->status.sc == SPDK_NVME_SC_ABORTED_SQ_DELETION) {
3138 		/*
3139 		 *  This is simulated when controller is being shut down, to
3140 		 *  effectively abort outstanding asynchronous event requests
3141 		 *  and make sure all memory is freed.  Do not repost the
3142 		 *  request in this case.
3143 		 */
3144 		return;
3145 	}
3146 
3147 	if (cpl->status.sct == SPDK_NVME_SCT_COMMAND_SPECIFIC &&
3148 	    cpl->status.sc == SPDK_NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED) {
3149 		/*
3150 		 *  SPDK will only send as many AERs as the device says it supports,
3151 		 *  so this status code indicates an out-of-spec device.  Do not repost
3152 		 *  the request in this case.
3153 		 */
3154 		NVME_CTRLR_ERRLOG(ctrlr, "Controller appears out-of-spec for asynchronous event request\n"
3155 				  "handling.  Do not repost this AER.\n");
3156 		return;
3157 	}
3158 
3159 	/* Add the events to the list */
3160 	nvme_ctrlr_queue_async_event(ctrlr, cpl);
3161 
3162 	/* If the ctrlr was removed or in the destruct state, we should not send aer again */
3163 	if (ctrlr->is_removed || ctrlr->is_destructed) {
3164 		return;
3165 	}
3166 
3167 	/*
3168 	 * Repost another asynchronous event request to replace the one
3169 	 *  that just completed.
3170 	 */
3171 	if (nvme_ctrlr_construct_and_submit_aer(ctrlr, aer)) {
3172 		/*
3173 		 * We can't do anything to recover from a failure here,
3174 		 * so just print a warning message and leave the AER unsubmitted.
3175 		 */
3176 		NVME_CTRLR_ERRLOG(ctrlr, "resubmitting AER failed!\n");
3177 	}
3178 }
3179 
3180 static int
3181 nvme_ctrlr_construct_and_submit_aer(struct spdk_nvme_ctrlr *ctrlr,
3182 				    struct nvme_async_event_request *aer)
3183 {
3184 	struct nvme_request *req;
3185 
3186 	aer->ctrlr = ctrlr;
3187 	req = nvme_allocate_request_null(ctrlr->adminq, nvme_ctrlr_async_event_cb, aer);
3188 	aer->req = req;
3189 	if (req == NULL) {
3190 		return -1;
3191 	}
3192 
3193 	req->cmd.opc = SPDK_NVME_OPC_ASYNC_EVENT_REQUEST;
3194 	return nvme_ctrlr_submit_admin_request(ctrlr, req);
3195 }
3196 
3197 static void
3198 nvme_ctrlr_configure_aer_done(void *arg, const struct spdk_nvme_cpl *cpl)
3199 {
3200 	struct nvme_async_event_request		*aer;
3201 	int					rc;
3202 	uint32_t				i;
3203 	struct spdk_nvme_ctrlr *ctrlr =	(struct spdk_nvme_ctrlr *)arg;
3204 
3205 	if (spdk_nvme_cpl_is_error(cpl)) {
3206 		NVME_CTRLR_NOTICELOG(ctrlr, "nvme_ctrlr_configure_aer failed!\n");
3207 		ctrlr->num_aers = 0;
3208 	} else {
3209 		/* aerl is a zero-based value, so we need to add 1 here. */
3210 		ctrlr->num_aers = spdk_min(NVME_MAX_ASYNC_EVENTS, (ctrlr->cdata.aerl + 1));
3211 	}
3212 
3213 	for (i = 0; i < ctrlr->num_aers; i++) {
3214 		aer = &ctrlr->aer[i];
3215 		rc = nvme_ctrlr_construct_and_submit_aer(ctrlr, aer);
3216 		if (rc) {
3217 			NVME_CTRLR_ERRLOG(ctrlr, "nvme_ctrlr_construct_and_submit_aer failed!\n");
3218 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
3219 			return;
3220 		}
3221 	}
3222 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT, ctrlr->opts.admin_timeout_ms);
3223 }
3224 
3225 static int
3226 nvme_ctrlr_configure_aer(struct spdk_nvme_ctrlr *ctrlr)
3227 {
3228 	union spdk_nvme_feat_async_event_configuration	config;
3229 	int						rc;
3230 
3231 	config.raw = 0;
3232 
3233 	if (spdk_nvme_ctrlr_is_discovery(ctrlr)) {
3234 		config.bits.discovery_log_change_notice = 1;
3235 	} else {
3236 		config.bits.crit_warn.bits.available_spare = 1;
3237 		config.bits.crit_warn.bits.temperature = 1;
3238 		config.bits.crit_warn.bits.device_reliability = 1;
3239 		config.bits.crit_warn.bits.read_only = 1;
3240 		config.bits.crit_warn.bits.volatile_memory_backup = 1;
3241 
3242 		if (ctrlr->vs.raw >= SPDK_NVME_VERSION(1, 2, 0)) {
3243 			if (ctrlr->cdata.oaes.ns_attribute_notices) {
3244 				config.bits.ns_attr_notice = 1;
3245 			}
3246 			if (ctrlr->cdata.oaes.fw_activation_notices) {
3247 				config.bits.fw_activation_notice = 1;
3248 			}
3249 			if (ctrlr->cdata.oaes.ana_change_notices) {
3250 				config.bits.ana_change_notice = 1;
3251 			}
3252 		}
3253 		if (ctrlr->vs.raw >= SPDK_NVME_VERSION(1, 3, 0) && ctrlr->cdata.lpa.telemetry) {
3254 			config.bits.telemetry_log_notice = 1;
3255 		}
3256 	}
3257 
3258 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_CONFIGURE_AER,
3259 			     ctrlr->opts.admin_timeout_ms);
3260 
3261 	rc = nvme_ctrlr_cmd_set_async_event_config(ctrlr, config,
3262 			nvme_ctrlr_configure_aer_done,
3263 			ctrlr);
3264 	if (rc != 0) {
3265 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
3266 		return rc;
3267 	}
3268 
3269 	return 0;
3270 }
3271 
3272 struct spdk_nvme_ctrlr_process *
3273 nvme_ctrlr_get_process(struct spdk_nvme_ctrlr *ctrlr, pid_t pid)
3274 {
3275 	struct spdk_nvme_ctrlr_process	*active_proc;
3276 
3277 	TAILQ_FOREACH(active_proc, &ctrlr->active_procs, tailq) {
3278 		if (active_proc->pid == pid) {
3279 			return active_proc;
3280 		}
3281 	}
3282 
3283 	return NULL;
3284 }
3285 
3286 struct spdk_nvme_ctrlr_process *
3287 nvme_ctrlr_get_current_process(struct spdk_nvme_ctrlr *ctrlr)
3288 {
3289 	return nvme_ctrlr_get_process(ctrlr, getpid());
3290 }
3291 
3292 /**
3293  * This function will be called when a process is using the controller.
3294  *  1. For the primary process, it is called when constructing the controller.
3295  *  2. For the secondary process, it is called at probing the controller.
3296  * Note: will check whether the process is already added for the same process.
3297  */
3298 int
3299 nvme_ctrlr_add_process(struct spdk_nvme_ctrlr *ctrlr, void *devhandle)
3300 {
3301 	struct spdk_nvme_ctrlr_process	*ctrlr_proc;
3302 	pid_t				pid = getpid();
3303 
3304 	/* Check whether the process is already added or not */
3305 	if (nvme_ctrlr_get_process(ctrlr, pid)) {
3306 		return 0;
3307 	}
3308 
3309 	/* Initialize the per process properties for this ctrlr */
3310 	ctrlr_proc = spdk_zmalloc(sizeof(struct spdk_nvme_ctrlr_process),
3311 				  64, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE);
3312 	if (ctrlr_proc == NULL) {
3313 		NVME_CTRLR_ERRLOG(ctrlr, "failed to allocate memory to track the process props\n");
3314 
3315 		return -1;
3316 	}
3317 
3318 	ctrlr_proc->is_primary = spdk_process_is_primary();
3319 	ctrlr_proc->pid = pid;
3320 	STAILQ_INIT(&ctrlr_proc->active_reqs);
3321 	ctrlr_proc->devhandle = devhandle;
3322 	ctrlr_proc->ref = 0;
3323 	TAILQ_INIT(&ctrlr_proc->allocated_io_qpairs);
3324 	STAILQ_INIT(&ctrlr_proc->async_events);
3325 
3326 	TAILQ_INSERT_TAIL(&ctrlr->active_procs, ctrlr_proc, tailq);
3327 
3328 	return 0;
3329 }
3330 
3331 /**
3332  * This function will be called when the process detaches the controller.
3333  * Note: the ctrlr_lock must be held when calling this function.
3334  */
3335 static void
3336 nvme_ctrlr_remove_process(struct spdk_nvme_ctrlr *ctrlr,
3337 			  struct spdk_nvme_ctrlr_process *proc)
3338 {
3339 	struct spdk_nvme_qpair	*qpair, *tmp_qpair;
3340 
3341 	assert(STAILQ_EMPTY(&proc->active_reqs));
3342 
3343 	TAILQ_FOREACH_SAFE(qpair, &proc->allocated_io_qpairs, per_process_tailq, tmp_qpair) {
3344 		spdk_nvme_ctrlr_free_io_qpair(qpair);
3345 	}
3346 
3347 	TAILQ_REMOVE(&ctrlr->active_procs, proc, tailq);
3348 
3349 	if (ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
3350 		spdk_pci_device_detach(proc->devhandle);
3351 	}
3352 
3353 	spdk_free(proc);
3354 }
3355 
3356 /**
3357  * This function will be called when the process exited unexpectedly
3358  *  in order to free any incomplete nvme request, allocated IO qpairs
3359  *  and allocated memory.
3360  * Note: the ctrlr_lock must be held when calling this function.
3361  */
3362 static void
3363 nvme_ctrlr_cleanup_process(struct spdk_nvme_ctrlr_process *proc)
3364 {
3365 	struct nvme_request	*req, *tmp_req;
3366 	struct spdk_nvme_qpair	*qpair, *tmp_qpair;
3367 	struct spdk_nvme_ctrlr_aer_completion_list *event;
3368 
3369 	STAILQ_FOREACH_SAFE(req, &proc->active_reqs, stailq, tmp_req) {
3370 		STAILQ_REMOVE(&proc->active_reqs, req, nvme_request, stailq);
3371 
3372 		assert(req->pid == proc->pid);
3373 		if (req->user_buffer && req->payload_size) {
3374 			spdk_free(req->payload.contig_or_cb_arg);
3375 		}
3376 		nvme_free_request(req);
3377 	}
3378 
3379 	/* Remove async event from each process objects event list */
3380 	while (!STAILQ_EMPTY(&proc->async_events)) {
3381 		event = STAILQ_FIRST(&proc->async_events);
3382 		STAILQ_REMOVE_HEAD(&proc->async_events, link);
3383 		spdk_free(event);
3384 	}
3385 
3386 	TAILQ_FOREACH_SAFE(qpair, &proc->allocated_io_qpairs, per_process_tailq, tmp_qpair) {
3387 		TAILQ_REMOVE(&proc->allocated_io_qpairs, qpair, per_process_tailq);
3388 
3389 		/*
3390 		 * The process may have been killed while some qpairs were in their
3391 		 *  completion context.  Clear that flag here to allow these IO
3392 		 *  qpairs to be deleted.
3393 		 */
3394 		qpair->in_completion_context = 0;
3395 
3396 		qpair->no_deletion_notification_needed = 1;
3397 
3398 		spdk_nvme_ctrlr_free_io_qpair(qpair);
3399 	}
3400 
3401 	spdk_free(proc);
3402 }
3403 
3404 /**
3405  * This function will be called when destructing the controller.
3406  *  1. There is no more admin request on this controller.
3407  *  2. Clean up any left resource allocation when its associated process is gone.
3408  */
3409 void
3410 nvme_ctrlr_free_processes(struct spdk_nvme_ctrlr *ctrlr)
3411 {
3412 	struct spdk_nvme_ctrlr_process	*active_proc, *tmp;
3413 
3414 	/* Free all the processes' properties and make sure no pending admin IOs */
3415 	TAILQ_FOREACH_SAFE(active_proc, &ctrlr->active_procs, tailq, tmp) {
3416 		TAILQ_REMOVE(&ctrlr->active_procs, active_proc, tailq);
3417 
3418 		assert(STAILQ_EMPTY(&active_proc->active_reqs));
3419 
3420 		spdk_free(active_proc);
3421 	}
3422 }
3423 
3424 /**
3425  * This function will be called when any other process attaches or
3426  *  detaches the controller in order to cleanup those unexpectedly
3427  *  terminated processes.
3428  * Note: the ctrlr_lock must be held when calling this function.
3429  */
3430 static int
3431 nvme_ctrlr_remove_inactive_proc(struct spdk_nvme_ctrlr *ctrlr)
3432 {
3433 	struct spdk_nvme_ctrlr_process	*active_proc, *tmp;
3434 	int				active_proc_count = 0;
3435 
3436 	TAILQ_FOREACH_SAFE(active_proc, &ctrlr->active_procs, tailq, tmp) {
3437 		if ((kill(active_proc->pid, 0) == -1) && (errno == ESRCH)) {
3438 			NVME_CTRLR_ERRLOG(ctrlr, "process %d terminated unexpected\n", active_proc->pid);
3439 
3440 			TAILQ_REMOVE(&ctrlr->active_procs, active_proc, tailq);
3441 
3442 			nvme_ctrlr_cleanup_process(active_proc);
3443 		} else {
3444 			active_proc_count++;
3445 		}
3446 	}
3447 
3448 	return active_proc_count;
3449 }
3450 
3451 void
3452 nvme_ctrlr_proc_get_ref(struct spdk_nvme_ctrlr *ctrlr)
3453 {
3454 	struct spdk_nvme_ctrlr_process	*active_proc;
3455 
3456 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
3457 
3458 	nvme_ctrlr_remove_inactive_proc(ctrlr);
3459 
3460 	active_proc = nvme_ctrlr_get_current_process(ctrlr);
3461 	if (active_proc) {
3462 		active_proc->ref++;
3463 	}
3464 
3465 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
3466 }
3467 
3468 void
3469 nvme_ctrlr_proc_put_ref(struct spdk_nvme_ctrlr *ctrlr)
3470 {
3471 	struct spdk_nvme_ctrlr_process	*active_proc;
3472 	int				proc_count;
3473 
3474 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
3475 
3476 	proc_count = nvme_ctrlr_remove_inactive_proc(ctrlr);
3477 
3478 	active_proc = nvme_ctrlr_get_current_process(ctrlr);
3479 	if (active_proc) {
3480 		active_proc->ref--;
3481 		assert(active_proc->ref >= 0);
3482 
3483 		/*
3484 		 * The last active process will be removed at the end of
3485 		 * the destruction of the controller.
3486 		 */
3487 		if (active_proc->ref == 0 && proc_count != 1) {
3488 			nvme_ctrlr_remove_process(ctrlr, active_proc);
3489 		}
3490 	}
3491 
3492 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
3493 }
3494 
3495 int
3496 nvme_ctrlr_get_ref_count(struct spdk_nvme_ctrlr *ctrlr)
3497 {
3498 	struct spdk_nvme_ctrlr_process	*active_proc;
3499 	int				ref = 0;
3500 
3501 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
3502 
3503 	nvme_ctrlr_remove_inactive_proc(ctrlr);
3504 
3505 	TAILQ_FOREACH(active_proc, &ctrlr->active_procs, tailq) {
3506 		ref += active_proc->ref;
3507 	}
3508 
3509 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
3510 
3511 	return ref;
3512 }
3513 
3514 /**
3515  *  Get the PCI device handle which is only visible to its associated process.
3516  */
3517 struct spdk_pci_device *
3518 nvme_ctrlr_proc_get_devhandle(struct spdk_nvme_ctrlr *ctrlr)
3519 {
3520 	struct spdk_nvme_ctrlr_process	*active_proc;
3521 	struct spdk_pci_device		*devhandle = NULL;
3522 
3523 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
3524 
3525 	active_proc = nvme_ctrlr_get_current_process(ctrlr);
3526 	if (active_proc) {
3527 		devhandle = active_proc->devhandle;
3528 	}
3529 
3530 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
3531 
3532 	return devhandle;
3533 }
3534 
3535 static void
3536 nvme_ctrlr_process_init_vs_done(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl)
3537 {
3538 	struct spdk_nvme_ctrlr *ctrlr = ctx;
3539 
3540 	if (spdk_nvme_cpl_is_error(cpl)) {
3541 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the VS register\n");
3542 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
3543 		return;
3544 	}
3545 
3546 	assert(value <= UINT32_MAX);
3547 	ctrlr->vs.raw = (uint32_t)value;
3548 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READ_CAP, NVME_TIMEOUT_INFINITE);
3549 }
3550 
3551 static void
3552 nvme_ctrlr_process_init_cap_done(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl)
3553 {
3554 	struct spdk_nvme_ctrlr *ctrlr = ctx;
3555 
3556 	if (spdk_nvme_cpl_is_error(cpl)) {
3557 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CAP register\n");
3558 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
3559 		return;
3560 	}
3561 
3562 	ctrlr->cap.raw = value;
3563 	nvme_ctrlr_init_cap(ctrlr);
3564 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CHECK_EN, NVME_TIMEOUT_INFINITE);
3565 }
3566 
3567 static void
3568 nvme_ctrlr_process_init_check_en(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl)
3569 {
3570 	struct spdk_nvme_ctrlr *ctrlr = ctx;
3571 	enum nvme_ctrlr_state state;
3572 
3573 	if (spdk_nvme_cpl_is_error(cpl)) {
3574 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CC register\n");
3575 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
3576 		return;
3577 	}
3578 
3579 	assert(value <= UINT32_MAX);
3580 	ctrlr->process_init_cc.raw = (uint32_t)value;
3581 
3582 	if (ctrlr->process_init_cc.bits.en) {
3583 		NVME_CTRLR_DEBUGLOG(ctrlr, "CC.EN = 1\n");
3584 		state = NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1;
3585 	} else {
3586 		state = NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0;
3587 	}
3588 
3589 	nvme_ctrlr_set_state(ctrlr, state, nvme_ctrlr_get_ready_timeout(ctrlr));
3590 }
3591 
3592 static void
3593 nvme_ctrlr_process_init_set_en_0(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl)
3594 {
3595 	struct spdk_nvme_ctrlr *ctrlr = ctx;
3596 
3597 	if (spdk_nvme_cpl_is_error(cpl)) {
3598 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to write the CC register\n");
3599 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
3600 		return;
3601 	}
3602 
3603 	/*
3604 	 * Wait 2.5 seconds before accessing PCI registers.
3605 	 * Not using sleep() to avoid blocking other controller's initialization.
3606 	 */
3607 	if (ctrlr->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY) {
3608 		NVME_CTRLR_DEBUGLOG(ctrlr, "Applying quirk: delay 2.5 seconds before reading registers\n");
3609 		ctrlr->sleep_timeout_tsc = spdk_get_ticks() + (2500 * spdk_get_ticks_hz() / 1000);
3610 	}
3611 
3612 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0,
3613 			     nvme_ctrlr_get_ready_timeout(ctrlr));
3614 }
3615 
3616 static void
3617 nvme_ctrlr_process_init_set_en_0_read_cc(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl)
3618 {
3619 	struct spdk_nvme_ctrlr *ctrlr = ctx;
3620 	union spdk_nvme_cc_register cc;
3621 	int rc;
3622 
3623 	if (spdk_nvme_cpl_is_error(cpl)) {
3624 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CC register\n");
3625 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
3626 		return;
3627 	}
3628 
3629 	assert(value <= UINT32_MAX);
3630 	cc.raw = (uint32_t)value;
3631 	cc.bits.en = 0;
3632 	ctrlr->process_init_cc.raw = cc.raw;
3633 
3634 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_EN_0_WAIT_FOR_CC,
3635 			     nvme_ctrlr_get_ready_timeout(ctrlr));
3636 
3637 	rc = nvme_ctrlr_set_cc_async(ctrlr, cc.raw, nvme_ctrlr_process_init_set_en_0, ctrlr);
3638 	if (rc != 0) {
3639 		NVME_CTRLR_ERRLOG(ctrlr, "set_cc() failed\n");
3640 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
3641 	}
3642 }
3643 
3644 static void
3645 nvme_ctrlr_process_init_wait_for_ready_1(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl)
3646 {
3647 	struct spdk_nvme_ctrlr *ctrlr = ctx;
3648 	union spdk_nvme_csts_register csts;
3649 
3650 	if (spdk_nvme_cpl_is_error(cpl)) {
3651 		/* While a device is resetting, it may be unable to service MMIO reads
3652 		 * temporarily. Allow for this case.
3653 		 */
3654 		if (!ctrlr->is_failed && ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE) {
3655 			NVME_CTRLR_DEBUGLOG(ctrlr, "Failed to read the CSTS register\n");
3656 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1,
3657 					     NVME_TIMEOUT_KEEP_EXISTING);
3658 		} else {
3659 			NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CSTS register\n");
3660 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
3661 		}
3662 
3663 		return;
3664 	}
3665 
3666 	assert(value <= UINT32_MAX);
3667 	csts.raw = (uint32_t)value;
3668 	if (csts.bits.rdy == 1 || csts.bits.cfs == 1) {
3669 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_EN_0,
3670 				     nvme_ctrlr_get_ready_timeout(ctrlr));
3671 	} else {
3672 		NVME_CTRLR_DEBUGLOG(ctrlr, "CC.EN = 1 && CSTS.RDY = 0 - waiting for reset to complete\n");
3673 		nvme_ctrlr_set_state_quiet(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1,
3674 					   NVME_TIMEOUT_KEEP_EXISTING);
3675 	}
3676 }
3677 
3678 static void
3679 nvme_ctrlr_process_init_wait_for_ready_0(void *ctx, uint64_t value, const struct spdk_nvme_cpl *cpl)
3680 {
3681 	struct spdk_nvme_ctrlr *ctrlr = ctx;
3682 	union spdk_nvme_csts_register csts;
3683 
3684 	if (spdk_nvme_cpl_is_error(cpl)) {
3685 		/* While a device is resetting, it may be unable to service MMIO reads
3686 		 * temporarily. Allow for this case.
3687 		 */
3688 		if (!ctrlr->is_failed && ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE) {
3689 			NVME_CTRLR_DEBUGLOG(ctrlr, "Failed to read the CSTS register\n");
3690 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0,
3691 					     NVME_TIMEOUT_KEEP_EXISTING);
3692 		} else {
3693 			NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CSTS register\n");
3694 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
3695 		}
3696 
3697 		return;
3698 	}
3699 
3700 	assert(value <= UINT32_MAX);
3701 	csts.raw = (uint32_t)value;
3702 	if (csts.bits.rdy == 0) {
3703 		NVME_CTRLR_DEBUGLOG(ctrlr, "CC.EN = 0 && CSTS.RDY = 0\n");
3704 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLED,
3705 				     nvme_ctrlr_get_ready_timeout(ctrlr));
3706 	} else {
3707 		nvme_ctrlr_set_state_quiet(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0,
3708 					   NVME_TIMEOUT_KEEP_EXISTING);
3709 	}
3710 }
3711 
3712 static void
3713 nvme_ctrlr_process_init_enable_wait_for_ready_1(void *ctx, uint64_t value,
3714 		const struct spdk_nvme_cpl *cpl)
3715 {
3716 	struct spdk_nvme_ctrlr *ctrlr = ctx;
3717 	union spdk_nvme_csts_register csts;
3718 
3719 	if (spdk_nvme_cpl_is_error(cpl)) {
3720 		/* While a device is resetting, it may be unable to service MMIO reads
3721 		 * temporarily. Allow for this case.
3722 		 */
3723 		if (!ctrlr->is_failed && ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE) {
3724 			NVME_CTRLR_DEBUGLOG(ctrlr, "Failed to read the CSTS register\n");
3725 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1,
3726 					     NVME_TIMEOUT_KEEP_EXISTING);
3727 		} else {
3728 			NVME_CTRLR_ERRLOG(ctrlr, "Failed to read the CSTS register\n");
3729 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
3730 		}
3731 
3732 		return;
3733 	}
3734 
3735 	assert(value <= UINT32_MAX);
3736 	csts.raw = value;
3737 	if (csts.bits.rdy == 1) {
3738 		NVME_CTRLR_DEBUGLOG(ctrlr, "CC.EN = 1 && CSTS.RDY = 1 - controller is ready\n");
3739 		/*
3740 		 * The controller has been enabled.
3741 		 *  Perform the rest of initialization serially.
3742 		 */
3743 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_RESET_ADMIN_QUEUE,
3744 				     ctrlr->opts.admin_timeout_ms);
3745 	} else {
3746 		nvme_ctrlr_set_state_quiet(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1,
3747 					   NVME_TIMEOUT_KEEP_EXISTING);
3748 	}
3749 }
3750 
3751 /**
3752  * This function will be called repeatedly during initialization until the controller is ready.
3753  */
3754 int
3755 nvme_ctrlr_process_init(struct spdk_nvme_ctrlr *ctrlr)
3756 {
3757 	uint32_t ready_timeout_in_ms;
3758 	uint64_t ticks;
3759 	int rc = 0;
3760 
3761 	ticks = spdk_get_ticks();
3762 
3763 	/*
3764 	 * May need to avoid accessing any register on the target controller
3765 	 * for a while. Return early without touching the FSM.
3766 	 * Check sleep_timeout_tsc > 0 for unit test.
3767 	 */
3768 	if ((ctrlr->sleep_timeout_tsc > 0) &&
3769 	    (ticks <= ctrlr->sleep_timeout_tsc)) {
3770 		return 0;
3771 	}
3772 	ctrlr->sleep_timeout_tsc = 0;
3773 
3774 	ready_timeout_in_ms = nvme_ctrlr_get_ready_timeout(ctrlr);
3775 
3776 	/*
3777 	 * Check if the current initialization step is done or has timed out.
3778 	 */
3779 	switch (ctrlr->state) {
3780 	case NVME_CTRLR_STATE_INIT_DELAY:
3781 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, ready_timeout_in_ms);
3782 		if (ctrlr->quirks & NVME_QUIRK_DELAY_BEFORE_INIT) {
3783 			/*
3784 			 * Controller may need some delay before it's enabled.
3785 			 *
3786 			 * This is a workaround for an issue where the PCIe-attached NVMe controller
3787 			 * is not ready after VFIO reset. We delay the initialization rather than the
3788 			 * enabling itself, because this is required only for the very first enabling
3789 			 * - directly after a VFIO reset.
3790 			 */
3791 			NVME_CTRLR_DEBUGLOG(ctrlr, "Adding 2 second delay before initializing the controller\n");
3792 			ctrlr->sleep_timeout_tsc = ticks + (2000 * spdk_get_ticks_hz() / 1000);
3793 		}
3794 		break;
3795 
3796 	case NVME_CTRLR_STATE_CONNECT_ADMINQ: /* synonymous with NVME_CTRLR_STATE_INIT */
3797 		rc = nvme_transport_ctrlr_connect_qpair(ctrlr, ctrlr->adminq);
3798 		if (rc == 0) {
3799 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_CONNECT_ADMINQ,
3800 					     NVME_TIMEOUT_INFINITE);
3801 		} else {
3802 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
3803 		}
3804 		break;
3805 
3806 	case NVME_CTRLR_STATE_WAIT_FOR_CONNECT_ADMINQ:
3807 		spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
3808 
3809 		switch (nvme_qpair_get_state(ctrlr->adminq)) {
3810 		case NVME_QPAIR_CONNECTING:
3811 			break;
3812 		case NVME_QPAIR_CONNECTED:
3813 			nvme_qpair_set_state(ctrlr->adminq, NVME_QPAIR_ENABLED);
3814 		/* Fall through */
3815 		case NVME_QPAIR_ENABLED:
3816 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READ_VS,
3817 					     NVME_TIMEOUT_INFINITE);
3818 			/* Abort any queued requests that were sent while the adminq was connecting
3819 			 * to avoid stalling the init process during a reset, as requests don't get
3820 			 * resubmitted while the controller is resetting and subsequent commands
3821 			 * would get queued too.
3822 			 */
3823 			nvme_qpair_abort_queued_reqs(ctrlr->adminq);
3824 			break;
3825 		case NVME_QPAIR_DISCONNECTING:
3826 			assert(ctrlr->adminq->async == true);
3827 			break;
3828 		case NVME_QPAIR_DISCONNECTED:
3829 		/* fallthrough */
3830 		default:
3831 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
3832 			break;
3833 		}
3834 
3835 		break;
3836 
3837 	case NVME_CTRLR_STATE_READ_VS:
3838 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READ_VS_WAIT_FOR_VS, NVME_TIMEOUT_INFINITE);
3839 		rc = nvme_ctrlr_get_vs_async(ctrlr, nvme_ctrlr_process_init_vs_done, ctrlr);
3840 		break;
3841 
3842 	case NVME_CTRLR_STATE_READ_CAP:
3843 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READ_CAP_WAIT_FOR_CAP, NVME_TIMEOUT_INFINITE);
3844 		rc = nvme_ctrlr_get_cap_async(ctrlr, nvme_ctrlr_process_init_cap_done, ctrlr);
3845 		break;
3846 
3847 	case NVME_CTRLR_STATE_CHECK_EN:
3848 		/* Begin the hardware initialization by making sure the controller is disabled. */
3849 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CHECK_EN_WAIT_FOR_CC, ready_timeout_in_ms);
3850 		rc = nvme_ctrlr_get_cc_async(ctrlr, nvme_ctrlr_process_init_check_en, ctrlr);
3851 		break;
3852 
3853 	case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1:
3854 		/*
3855 		 * Controller is currently enabled. We need to disable it to cause a reset.
3856 		 *
3857 		 * If CC.EN = 1 && CSTS.RDY = 0, the controller is in the process of becoming ready.
3858 		 *  Wait for the ready bit to be 1 before disabling the controller.
3859 		 */
3860 		nvme_ctrlr_set_state_quiet(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1_WAIT_FOR_CSTS,
3861 					   NVME_TIMEOUT_KEEP_EXISTING);
3862 		rc = nvme_ctrlr_get_csts_async(ctrlr, nvme_ctrlr_process_init_wait_for_ready_1, ctrlr);
3863 		break;
3864 
3865 	case NVME_CTRLR_STATE_SET_EN_0:
3866 		NVME_CTRLR_DEBUGLOG(ctrlr, "Setting CC.EN = 0\n");
3867 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_EN_0_WAIT_FOR_CC, ready_timeout_in_ms);
3868 		rc = nvme_ctrlr_get_cc_async(ctrlr, nvme_ctrlr_process_init_set_en_0_read_cc, ctrlr);
3869 		break;
3870 
3871 	case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0:
3872 		nvme_ctrlr_set_state_quiet(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0_WAIT_FOR_CSTS,
3873 					   NVME_TIMEOUT_KEEP_EXISTING);
3874 		rc = nvme_ctrlr_get_csts_async(ctrlr, nvme_ctrlr_process_init_wait_for_ready_0, ctrlr);
3875 		break;
3876 
3877 	case NVME_CTRLR_STATE_DISABLED:
3878 		if (ctrlr->is_disconnecting) {
3879 			NVME_CTRLR_DEBUGLOG(ctrlr, "Ctrlr was disabled.\n");
3880 		} else {
3881 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE, ready_timeout_in_ms);
3882 
3883 			/*
3884 			 * Delay 100us before setting CC.EN = 1.  Some NVMe SSDs miss CC.EN getting
3885 			 *  set to 1 if it is too soon after CSTS.RDY is reported as 0.
3886 			 */
3887 			spdk_delay_us(100);
3888 		}
3889 		break;
3890 
3891 	case NVME_CTRLR_STATE_ENABLE:
3892 		NVME_CTRLR_DEBUGLOG(ctrlr, "Setting CC.EN = 1\n");
3893 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_CC, ready_timeout_in_ms);
3894 		rc = nvme_ctrlr_enable(ctrlr);
3895 		if (rc) {
3896 			NVME_CTRLR_ERRLOG(ctrlr, "Ctrlr enable failed with error: %d", rc);
3897 		}
3898 		return rc;
3899 
3900 	case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1:
3901 		nvme_ctrlr_set_state_quiet(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1_WAIT_FOR_CSTS,
3902 					   NVME_TIMEOUT_KEEP_EXISTING);
3903 		rc = nvme_ctrlr_get_csts_async(ctrlr, nvme_ctrlr_process_init_enable_wait_for_ready_1,
3904 					       ctrlr);
3905 		break;
3906 
3907 	case NVME_CTRLR_STATE_RESET_ADMIN_QUEUE:
3908 		nvme_transport_qpair_reset(ctrlr->adminq);
3909 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY, NVME_TIMEOUT_INFINITE);
3910 		break;
3911 
3912 	case NVME_CTRLR_STATE_IDENTIFY:
3913 		rc = nvme_ctrlr_identify(ctrlr);
3914 		break;
3915 
3916 	case NVME_CTRLR_STATE_CONFIGURE_AER:
3917 		rc = nvme_ctrlr_configure_aer(ctrlr);
3918 		break;
3919 
3920 	case NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT:
3921 		rc = nvme_ctrlr_set_keep_alive_timeout(ctrlr);
3922 		break;
3923 
3924 	case NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC:
3925 		rc = nvme_ctrlr_identify_iocs_specific(ctrlr);
3926 		break;
3927 
3928 	case NVME_CTRLR_STATE_GET_ZNS_CMD_EFFECTS_LOG:
3929 		rc = nvme_ctrlr_get_zns_cmd_and_effects_log(ctrlr);
3930 		break;
3931 
3932 	case NVME_CTRLR_STATE_SET_NUM_QUEUES:
3933 		nvme_ctrlr_update_nvmf_ioccsz(ctrlr);
3934 		rc = nvme_ctrlr_set_num_queues(ctrlr);
3935 		break;
3936 
3937 	case NVME_CTRLR_STATE_IDENTIFY_ACTIVE_NS:
3938 		_nvme_ctrlr_identify_active_ns(ctrlr);
3939 		break;
3940 
3941 	case NVME_CTRLR_STATE_IDENTIFY_NS:
3942 		rc = nvme_ctrlr_identify_namespaces(ctrlr);
3943 		break;
3944 
3945 	case NVME_CTRLR_STATE_IDENTIFY_ID_DESCS:
3946 		rc = nvme_ctrlr_identify_id_desc_namespaces(ctrlr);
3947 		break;
3948 
3949 	case NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC:
3950 		rc = nvme_ctrlr_identify_namespaces_iocs_specific(ctrlr);
3951 		break;
3952 
3953 	case NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES:
3954 		rc = nvme_ctrlr_set_supported_log_pages(ctrlr);
3955 		break;
3956 
3957 	case NVME_CTRLR_STATE_SET_SUPPORTED_INTEL_LOG_PAGES:
3958 		rc = nvme_ctrlr_set_intel_support_log_pages(ctrlr);
3959 		break;
3960 
3961 	case NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES:
3962 		nvme_ctrlr_set_supported_features(ctrlr);
3963 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_DB_BUF_CFG,
3964 				     ctrlr->opts.admin_timeout_ms);
3965 		break;
3966 
3967 	case NVME_CTRLR_STATE_SET_DB_BUF_CFG:
3968 		rc = nvme_ctrlr_set_doorbell_buffer_config(ctrlr);
3969 		break;
3970 
3971 	case NVME_CTRLR_STATE_SET_HOST_ID:
3972 		rc = nvme_ctrlr_set_host_id(ctrlr);
3973 		break;
3974 
3975 	case NVME_CTRLR_STATE_TRANSPORT_READY:
3976 		rc = nvme_transport_ctrlr_ready(ctrlr);
3977 		if (rc) {
3978 			NVME_CTRLR_ERRLOG(ctrlr, "Transport controller ready step failed: rc %d\n", rc);
3979 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
3980 		} else {
3981 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE);
3982 		}
3983 		break;
3984 
3985 	case NVME_CTRLR_STATE_READY:
3986 		NVME_CTRLR_DEBUGLOG(ctrlr, "Ctrlr already in ready state\n");
3987 		return 0;
3988 
3989 	case NVME_CTRLR_STATE_ERROR:
3990 		NVME_CTRLR_ERRLOG(ctrlr, "Ctrlr is in error state\n");
3991 		return -1;
3992 
3993 	case NVME_CTRLR_STATE_READ_VS_WAIT_FOR_VS:
3994 	case NVME_CTRLR_STATE_READ_CAP_WAIT_FOR_CAP:
3995 	case NVME_CTRLR_STATE_CHECK_EN_WAIT_FOR_CC:
3996 	case NVME_CTRLR_STATE_SET_EN_0_WAIT_FOR_CC:
3997 	case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1_WAIT_FOR_CSTS:
3998 	case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0_WAIT_FOR_CSTS:
3999 	case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_CC:
4000 	case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1_WAIT_FOR_CSTS:
4001 	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY:
4002 	case NVME_CTRLR_STATE_WAIT_FOR_CONFIGURE_AER:
4003 	case NVME_CTRLR_STATE_WAIT_FOR_KEEP_ALIVE_TIMEOUT:
4004 	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_IOCS_SPECIFIC:
4005 	case NVME_CTRLR_STATE_WAIT_FOR_GET_ZNS_CMD_EFFECTS_LOG:
4006 	case NVME_CTRLR_STATE_WAIT_FOR_SET_NUM_QUEUES:
4007 	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ACTIVE_NS:
4008 	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS:
4009 	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ID_DESCS:
4010 	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS_IOCS_SPECIFIC:
4011 	case NVME_CTRLR_STATE_WAIT_FOR_SUPPORTED_INTEL_LOG_PAGES:
4012 	case NVME_CTRLR_STATE_WAIT_FOR_DB_BUF_CFG:
4013 	case NVME_CTRLR_STATE_WAIT_FOR_HOST_ID:
4014 		/*
4015 		 * nvme_ctrlr_process_init() may be called from the completion context
4016 		 * for the admin qpair. Avoid recursive calls for this case.
4017 		 */
4018 		if (!ctrlr->adminq->in_completion_context) {
4019 			spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
4020 		}
4021 		break;
4022 
4023 	default:
4024 		assert(0);
4025 		return -1;
4026 	}
4027 
4028 	if (rc) {
4029 		NVME_CTRLR_ERRLOG(ctrlr, "Ctrlr operation failed with error: %d, ctrlr state: %d (%s)\n",
4030 				  rc, ctrlr->state, nvme_ctrlr_state_string(ctrlr->state));
4031 	}
4032 
4033 	/* Note: we use the ticks captured when we entered this function.
4034 	 * This covers environments where the SPDK process gets swapped out after
4035 	 * we tried to advance the state but before we check the timeout here.
4036 	 * It is not normal for this to happen, but harmless to handle it in this
4037 	 * way.
4038 	 */
4039 	if (ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE &&
4040 	    ticks > ctrlr->state_timeout_tsc) {
4041 		NVME_CTRLR_ERRLOG(ctrlr, "Initialization timed out in state %d (%s)\n",
4042 				  ctrlr->state, nvme_ctrlr_state_string(ctrlr->state));
4043 		return -1;
4044 	}
4045 
4046 	return rc;
4047 }
4048 
4049 int
4050 nvme_robust_mutex_init_recursive_shared(pthread_mutex_t *mtx)
4051 {
4052 	pthread_mutexattr_t attr;
4053 	int rc = 0;
4054 
4055 	if (pthread_mutexattr_init(&attr)) {
4056 		return -1;
4057 	}
4058 	if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE) ||
4059 #ifndef __FreeBSD__
4060 	    pthread_mutexattr_setrobust(&attr, PTHREAD_MUTEX_ROBUST) ||
4061 	    pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED) ||
4062 #endif
4063 	    pthread_mutex_init(mtx, &attr)) {
4064 		rc = -1;
4065 	}
4066 	pthread_mutexattr_destroy(&attr);
4067 	return rc;
4068 }
4069 
4070 int
4071 nvme_ctrlr_construct(struct spdk_nvme_ctrlr *ctrlr)
4072 {
4073 	int rc;
4074 
4075 	if (ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
4076 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT_DELAY, NVME_TIMEOUT_INFINITE);
4077 	} else {
4078 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, NVME_TIMEOUT_INFINITE);
4079 	}
4080 
4081 	if (ctrlr->opts.admin_queue_size > SPDK_NVME_ADMIN_QUEUE_MAX_ENTRIES) {
4082 		NVME_CTRLR_ERRLOG(ctrlr, "admin_queue_size %u exceeds max defined by NVMe spec, use max value\n",
4083 				  ctrlr->opts.admin_queue_size);
4084 		ctrlr->opts.admin_queue_size = SPDK_NVME_ADMIN_QUEUE_MAX_ENTRIES;
4085 	}
4086 
4087 	if (ctrlr->quirks & NVME_QUIRK_MINIMUM_ADMIN_QUEUE_SIZE &&
4088 	    (ctrlr->opts.admin_queue_size % SPDK_NVME_ADMIN_QUEUE_QUIRK_ENTRIES_MULTIPLE) != 0) {
4089 		NVME_CTRLR_ERRLOG(ctrlr,
4090 				  "admin_queue_size %u is invalid for this NVMe device, adjust to next multiple\n",
4091 				  ctrlr->opts.admin_queue_size);
4092 		ctrlr->opts.admin_queue_size = SPDK_ALIGN_CEIL(ctrlr->opts.admin_queue_size,
4093 					       SPDK_NVME_ADMIN_QUEUE_QUIRK_ENTRIES_MULTIPLE);
4094 	}
4095 
4096 	if (ctrlr->opts.admin_queue_size < SPDK_NVME_ADMIN_QUEUE_MIN_ENTRIES) {
4097 		NVME_CTRLR_ERRLOG(ctrlr,
4098 				  "admin_queue_size %u is less than minimum defined by NVMe spec, use min value\n",
4099 				  ctrlr->opts.admin_queue_size);
4100 		ctrlr->opts.admin_queue_size = SPDK_NVME_ADMIN_QUEUE_MIN_ENTRIES;
4101 	}
4102 
4103 	ctrlr->flags = 0;
4104 	ctrlr->free_io_qids = NULL;
4105 	ctrlr->is_resetting = false;
4106 	ctrlr->is_failed = false;
4107 	ctrlr->is_destructed = false;
4108 
4109 	TAILQ_INIT(&ctrlr->active_io_qpairs);
4110 	STAILQ_INIT(&ctrlr->queued_aborts);
4111 	ctrlr->outstanding_aborts = 0;
4112 
4113 	ctrlr->ana_log_page = NULL;
4114 	ctrlr->ana_log_page_size = 0;
4115 
4116 	rc = nvme_robust_mutex_init_recursive_shared(&ctrlr->ctrlr_lock);
4117 	if (rc != 0) {
4118 		return rc;
4119 	}
4120 
4121 	TAILQ_INIT(&ctrlr->active_procs);
4122 	STAILQ_INIT(&ctrlr->register_operations);
4123 
4124 	RB_INIT(&ctrlr->ns);
4125 
4126 	return rc;
4127 }
4128 
4129 static void
4130 nvme_ctrlr_init_cap(struct spdk_nvme_ctrlr *ctrlr)
4131 {
4132 	if (ctrlr->cap.bits.ams & SPDK_NVME_CAP_AMS_WRR) {
4133 		ctrlr->flags |= SPDK_NVME_CTRLR_WRR_SUPPORTED;
4134 	}
4135 
4136 	ctrlr->min_page_size = 1u << (12 + ctrlr->cap.bits.mpsmin);
4137 
4138 	/* For now, always select page_size == min_page_size. */
4139 	ctrlr->page_size = ctrlr->min_page_size;
4140 
4141 	ctrlr->opts.io_queue_size = spdk_max(ctrlr->opts.io_queue_size, SPDK_NVME_IO_QUEUE_MIN_ENTRIES);
4142 	ctrlr->opts.io_queue_size = spdk_min(ctrlr->opts.io_queue_size, MAX_IO_QUEUE_ENTRIES);
4143 	if (ctrlr->quirks & NVME_QUIRK_MINIMUM_IO_QUEUE_SIZE &&
4144 	    ctrlr->opts.io_queue_size == DEFAULT_IO_QUEUE_SIZE) {
4145 		/* If the user specifically set an IO queue size different than the
4146 		 * default, use that value.  Otherwise overwrite with the quirked value.
4147 		 * This allows this quirk to be overridden when necessary.
4148 		 * However, cap.mqes still needs to be respected.
4149 		 */
4150 		ctrlr->opts.io_queue_size = DEFAULT_IO_QUEUE_SIZE_FOR_QUIRK;
4151 	}
4152 	ctrlr->opts.io_queue_size = spdk_min(ctrlr->opts.io_queue_size, ctrlr->cap.bits.mqes + 1u);
4153 
4154 	ctrlr->opts.io_queue_requests = spdk_max(ctrlr->opts.io_queue_requests, ctrlr->opts.io_queue_size);
4155 }
4156 
4157 void
4158 nvme_ctrlr_destruct_finish(struct spdk_nvme_ctrlr *ctrlr)
4159 {
4160 	pthread_mutex_destroy(&ctrlr->ctrlr_lock);
4161 }
4162 
4163 void
4164 nvme_ctrlr_destruct_async(struct spdk_nvme_ctrlr *ctrlr,
4165 			  struct nvme_ctrlr_detach_ctx *ctx)
4166 {
4167 	struct spdk_nvme_qpair *qpair, *tmp;
4168 
4169 	NVME_CTRLR_DEBUGLOG(ctrlr, "Prepare to destruct SSD\n");
4170 
4171 	ctrlr->prepare_for_reset = false;
4172 	ctrlr->is_destructed = true;
4173 
4174 	spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
4175 
4176 	nvme_ctrlr_abort_queued_aborts(ctrlr);
4177 	nvme_transport_admin_qpair_abort_aers(ctrlr->adminq);
4178 
4179 	TAILQ_FOREACH_SAFE(qpair, &ctrlr->active_io_qpairs, tailq, tmp) {
4180 		spdk_nvme_ctrlr_free_io_qpair(qpair);
4181 	}
4182 
4183 	nvme_ctrlr_free_doorbell_buffer(ctrlr);
4184 	nvme_ctrlr_free_iocs_specific_data(ctrlr);
4185 
4186 	nvme_ctrlr_shutdown_async(ctrlr, ctx);
4187 }
4188 
4189 int
4190 nvme_ctrlr_destruct_poll_async(struct spdk_nvme_ctrlr *ctrlr,
4191 			       struct nvme_ctrlr_detach_ctx *ctx)
4192 {
4193 	struct spdk_nvme_ns *ns, *tmp_ns;
4194 	int rc = 0;
4195 
4196 	if (!ctx->shutdown_complete) {
4197 		rc = nvme_ctrlr_shutdown_poll_async(ctrlr, ctx);
4198 		if (rc == -EAGAIN) {
4199 			return -EAGAIN;
4200 		}
4201 		/* Destruct ctrlr forcefully for any other error. */
4202 	}
4203 
4204 	if (ctx->cb_fn) {
4205 		ctx->cb_fn(ctrlr);
4206 	}
4207 
4208 	nvme_transport_ctrlr_disconnect_qpair(ctrlr, ctrlr->adminq);
4209 
4210 	RB_FOREACH_SAFE(ns, nvme_ns_tree, &ctrlr->ns, tmp_ns) {
4211 		nvme_ctrlr_destruct_namespace(ctrlr, ns->id);
4212 		RB_REMOVE(nvme_ns_tree, &ctrlr->ns, ns);
4213 		spdk_free(ns);
4214 	}
4215 
4216 	ctrlr->active_ns_count = 0;
4217 
4218 	spdk_bit_array_free(&ctrlr->free_io_qids);
4219 
4220 	free(ctrlr->ana_log_page);
4221 	free(ctrlr->copied_ana_desc);
4222 	ctrlr->ana_log_page = NULL;
4223 	ctrlr->copied_ana_desc = NULL;
4224 	ctrlr->ana_log_page_size = 0;
4225 
4226 	nvme_transport_ctrlr_destruct(ctrlr);
4227 
4228 	return rc;
4229 }
4230 
4231 void
4232 nvme_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr)
4233 {
4234 	struct nvme_ctrlr_detach_ctx ctx = { .ctrlr = ctrlr };
4235 	int rc;
4236 
4237 	nvme_ctrlr_destruct_async(ctrlr, &ctx);
4238 
4239 	while (1) {
4240 		rc = nvme_ctrlr_destruct_poll_async(ctrlr, &ctx);
4241 		if (rc != -EAGAIN) {
4242 			break;
4243 		}
4244 		nvme_delay(1000);
4245 	}
4246 }
4247 
4248 int
4249 nvme_ctrlr_submit_admin_request(struct spdk_nvme_ctrlr *ctrlr,
4250 				struct nvme_request *req)
4251 {
4252 	return nvme_qpair_submit_request(ctrlr->adminq, req);
4253 }
4254 
4255 static void
4256 nvme_keep_alive_completion(void *cb_ctx, const struct spdk_nvme_cpl *cpl)
4257 {
4258 	/* Do nothing */
4259 }
4260 
4261 /*
4262  * Check if we need to send a Keep Alive command.
4263  * Caller must hold ctrlr->ctrlr_lock.
4264  */
4265 static int
4266 nvme_ctrlr_keep_alive(struct spdk_nvme_ctrlr *ctrlr)
4267 {
4268 	uint64_t now;
4269 	struct nvme_request *req;
4270 	struct spdk_nvme_cmd *cmd;
4271 	int rc = 0;
4272 
4273 	now = spdk_get_ticks();
4274 	if (now < ctrlr->next_keep_alive_tick) {
4275 		return rc;
4276 	}
4277 
4278 	req = nvme_allocate_request_null(ctrlr->adminq, nvme_keep_alive_completion, NULL);
4279 	if (req == NULL) {
4280 		return rc;
4281 	}
4282 
4283 	cmd = &req->cmd;
4284 	cmd->opc = SPDK_NVME_OPC_KEEP_ALIVE;
4285 
4286 	rc = nvme_ctrlr_submit_admin_request(ctrlr, req);
4287 	if (rc != 0) {
4288 		NVME_CTRLR_ERRLOG(ctrlr, "Submitting Keep Alive failed\n");
4289 		rc = -ENXIO;
4290 	}
4291 
4292 	ctrlr->next_keep_alive_tick = now + ctrlr->keep_alive_interval_ticks;
4293 	return rc;
4294 }
4295 
4296 int32_t
4297 spdk_nvme_ctrlr_process_admin_completions(struct spdk_nvme_ctrlr *ctrlr)
4298 {
4299 	int32_t num_completions;
4300 	int32_t rc;
4301 	struct spdk_nvme_ctrlr_process	*active_proc;
4302 
4303 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
4304 
4305 	if (ctrlr->keep_alive_interval_ticks) {
4306 		rc = nvme_ctrlr_keep_alive(ctrlr);
4307 		if (rc) {
4308 			nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
4309 			return rc;
4310 		}
4311 	}
4312 
4313 	rc = nvme_io_msg_process(ctrlr);
4314 	if (rc < 0) {
4315 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
4316 		return rc;
4317 	}
4318 	num_completions = rc;
4319 
4320 	rc = spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
4321 
4322 	/* Each process has an async list, complete the ones for this process object */
4323 	active_proc = nvme_ctrlr_get_current_process(ctrlr);
4324 	if (active_proc) {
4325 		nvme_ctrlr_complete_queued_async_events(ctrlr);
4326 	}
4327 
4328 	if (rc == -ENXIO && ctrlr->is_disconnecting) {
4329 		nvme_ctrlr_disconnect_done(ctrlr);
4330 	}
4331 
4332 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
4333 
4334 	if (rc < 0) {
4335 		num_completions = rc;
4336 	} else {
4337 		num_completions += rc;
4338 	}
4339 
4340 	return num_completions;
4341 }
4342 
4343 const struct spdk_nvme_ctrlr_data *
4344 spdk_nvme_ctrlr_get_data(struct spdk_nvme_ctrlr *ctrlr)
4345 {
4346 	return &ctrlr->cdata;
4347 }
4348 
4349 union spdk_nvme_csts_register spdk_nvme_ctrlr_get_regs_csts(struct spdk_nvme_ctrlr *ctrlr)
4350 {
4351 	union spdk_nvme_csts_register csts;
4352 
4353 	if (nvme_ctrlr_get_csts(ctrlr, &csts)) {
4354 		csts.raw = SPDK_NVME_INVALID_REGISTER_VALUE;
4355 	}
4356 	return csts;
4357 }
4358 
4359 union spdk_nvme_cc_register spdk_nvme_ctrlr_get_regs_cc(struct spdk_nvme_ctrlr *ctrlr)
4360 {
4361 	union spdk_nvme_cc_register cc;
4362 
4363 	if (nvme_ctrlr_get_cc(ctrlr, &cc)) {
4364 		cc.raw = SPDK_NVME_INVALID_REGISTER_VALUE;
4365 	}
4366 	return cc;
4367 }
4368 
4369 union spdk_nvme_cap_register spdk_nvme_ctrlr_get_regs_cap(struct spdk_nvme_ctrlr *ctrlr)
4370 {
4371 	return ctrlr->cap;
4372 }
4373 
4374 union spdk_nvme_vs_register spdk_nvme_ctrlr_get_regs_vs(struct spdk_nvme_ctrlr *ctrlr)
4375 {
4376 	return ctrlr->vs;
4377 }
4378 
4379 union spdk_nvme_cmbsz_register spdk_nvme_ctrlr_get_regs_cmbsz(struct spdk_nvme_ctrlr *ctrlr)
4380 {
4381 	union spdk_nvme_cmbsz_register cmbsz;
4382 
4383 	if (nvme_ctrlr_get_cmbsz(ctrlr, &cmbsz)) {
4384 		cmbsz.raw = 0;
4385 	}
4386 
4387 	return cmbsz;
4388 }
4389 
4390 union spdk_nvme_pmrcap_register spdk_nvme_ctrlr_get_regs_pmrcap(struct spdk_nvme_ctrlr *ctrlr)
4391 {
4392 	union spdk_nvme_pmrcap_register pmrcap;
4393 
4394 	if (nvme_ctrlr_get_pmrcap(ctrlr, &pmrcap)) {
4395 		pmrcap.raw = 0;
4396 	}
4397 
4398 	return pmrcap;
4399 }
4400 
4401 union spdk_nvme_bpinfo_register spdk_nvme_ctrlr_get_regs_bpinfo(struct spdk_nvme_ctrlr *ctrlr)
4402 {
4403 	union spdk_nvme_bpinfo_register bpinfo;
4404 
4405 	if (nvme_ctrlr_get_bpinfo(ctrlr, &bpinfo)) {
4406 		bpinfo.raw = 0;
4407 	}
4408 
4409 	return bpinfo;
4410 }
4411 
4412 uint64_t
4413 spdk_nvme_ctrlr_get_pmrsz(struct spdk_nvme_ctrlr *ctrlr)
4414 {
4415 	return ctrlr->pmr_size;
4416 }
4417 
4418 uint32_t
4419 spdk_nvme_ctrlr_get_num_ns(struct spdk_nvme_ctrlr *ctrlr)
4420 {
4421 	return ctrlr->cdata.nn;
4422 }
4423 
4424 bool
4425 spdk_nvme_ctrlr_is_active_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid)
4426 {
4427 	struct spdk_nvme_ns tmp, *ns;
4428 
4429 	tmp.id = nsid;
4430 	ns = RB_FIND(nvme_ns_tree, &ctrlr->ns, &tmp);
4431 
4432 	if (ns != NULL) {
4433 		return ns->active;
4434 	}
4435 
4436 	return false;
4437 }
4438 
4439 uint32_t
4440 spdk_nvme_ctrlr_get_first_active_ns(struct spdk_nvme_ctrlr *ctrlr)
4441 {
4442 	struct spdk_nvme_ns *ns;
4443 
4444 	ns = RB_MIN(nvme_ns_tree, &ctrlr->ns);
4445 	if (ns == NULL) {
4446 		return 0;
4447 	}
4448 
4449 	while (ns != NULL) {
4450 		if (ns->active) {
4451 			return ns->id;
4452 		}
4453 
4454 		ns = RB_NEXT(nvme_ns_tree, &ctrlr->ns, ns);
4455 	}
4456 
4457 	return 0;
4458 }
4459 
4460 uint32_t
4461 spdk_nvme_ctrlr_get_next_active_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t prev_nsid)
4462 {
4463 	struct spdk_nvme_ns tmp, *ns;
4464 
4465 	tmp.id = prev_nsid;
4466 	ns = RB_FIND(nvme_ns_tree, &ctrlr->ns, &tmp);
4467 	if (ns == NULL) {
4468 		return 0;
4469 	}
4470 
4471 	ns = RB_NEXT(nvme_ns_tree, &ctrlr->ns, ns);
4472 	while (ns != NULL) {
4473 		if (ns->active) {
4474 			return ns->id;
4475 		}
4476 
4477 		ns = RB_NEXT(nvme_ns_tree, &ctrlr->ns, ns);
4478 	}
4479 
4480 	return 0;
4481 }
4482 
4483 struct spdk_nvme_ns *
4484 spdk_nvme_ctrlr_get_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid)
4485 {
4486 	struct spdk_nvme_ns tmp;
4487 	struct spdk_nvme_ns *ns;
4488 
4489 	if (nsid < 1 || nsid > ctrlr->cdata.nn) {
4490 		return NULL;
4491 	}
4492 
4493 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
4494 
4495 	tmp.id = nsid;
4496 	ns = RB_FIND(nvme_ns_tree, &ctrlr->ns, &tmp);
4497 
4498 	if (ns == NULL) {
4499 		ns = spdk_zmalloc(sizeof(struct spdk_nvme_ns), 64, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE);
4500 		if (ns == NULL) {
4501 			nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
4502 			return NULL;
4503 		}
4504 
4505 		NVME_CTRLR_DEBUGLOG(ctrlr, "Namespace %u was added\n", nsid);
4506 		ns->id = nsid;
4507 		RB_INSERT(nvme_ns_tree, &ctrlr->ns, ns);
4508 	}
4509 
4510 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
4511 
4512 	return ns;
4513 }
4514 
4515 struct spdk_pci_device *
4516 spdk_nvme_ctrlr_get_pci_device(struct spdk_nvme_ctrlr *ctrlr)
4517 {
4518 	if (ctrlr == NULL) {
4519 		return NULL;
4520 	}
4521 
4522 	if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) {
4523 		return NULL;
4524 	}
4525 
4526 	return nvme_ctrlr_proc_get_devhandle(ctrlr);
4527 }
4528 
4529 uint32_t
4530 spdk_nvme_ctrlr_get_max_xfer_size(const struct spdk_nvme_ctrlr *ctrlr)
4531 {
4532 	return ctrlr->max_xfer_size;
4533 }
4534 
4535 void
4536 spdk_nvme_ctrlr_register_aer_callback(struct spdk_nvme_ctrlr *ctrlr,
4537 				      spdk_nvme_aer_cb aer_cb_fn,
4538 				      void *aer_cb_arg)
4539 {
4540 	struct spdk_nvme_ctrlr_process *active_proc;
4541 
4542 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
4543 
4544 	active_proc = nvme_ctrlr_get_current_process(ctrlr);
4545 	if (active_proc) {
4546 		active_proc->aer_cb_fn = aer_cb_fn;
4547 		active_proc->aer_cb_arg = aer_cb_arg;
4548 	}
4549 
4550 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
4551 }
4552 
4553 void
4554 spdk_nvme_ctrlr_disable_read_changed_ns_list_log_page(struct spdk_nvme_ctrlr *ctrlr)
4555 {
4556 	ctrlr->opts.disable_read_changed_ns_list_log_page = true;
4557 }
4558 
4559 void
4560 spdk_nvme_ctrlr_register_timeout_callback(struct spdk_nvme_ctrlr *ctrlr,
4561 		uint64_t timeout_io_us, uint64_t timeout_admin_us,
4562 		spdk_nvme_timeout_cb cb_fn, void *cb_arg)
4563 {
4564 	struct spdk_nvme_ctrlr_process	*active_proc;
4565 
4566 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
4567 
4568 	active_proc = nvme_ctrlr_get_current_process(ctrlr);
4569 	if (active_proc) {
4570 		active_proc->timeout_io_ticks = timeout_io_us * spdk_get_ticks_hz() / 1000000ULL;
4571 		active_proc->timeout_admin_ticks = timeout_admin_us * spdk_get_ticks_hz() / 1000000ULL;
4572 		active_proc->timeout_cb_fn = cb_fn;
4573 		active_proc->timeout_cb_arg = cb_arg;
4574 	}
4575 
4576 	ctrlr->timeout_enabled = true;
4577 
4578 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
4579 }
4580 
4581 bool
4582 spdk_nvme_ctrlr_is_log_page_supported(struct spdk_nvme_ctrlr *ctrlr, uint8_t log_page)
4583 {
4584 	/* No bounds check necessary, since log_page is uint8_t and log_page_supported has 256 entries */
4585 	SPDK_STATIC_ASSERT(sizeof(ctrlr->log_page_supported) == 256, "log_page_supported size mismatch");
4586 	return ctrlr->log_page_supported[log_page];
4587 }
4588 
4589 bool
4590 spdk_nvme_ctrlr_is_feature_supported(struct spdk_nvme_ctrlr *ctrlr, uint8_t feature_code)
4591 {
4592 	/* No bounds check necessary, since feature_code is uint8_t and feature_supported has 256 entries */
4593 	SPDK_STATIC_ASSERT(sizeof(ctrlr->feature_supported) == 256, "feature_supported size mismatch");
4594 	return ctrlr->feature_supported[feature_code];
4595 }
4596 
4597 int
4598 spdk_nvme_ctrlr_attach_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid,
4599 			  struct spdk_nvme_ctrlr_list *payload)
4600 {
4601 	struct nvme_completion_poll_status	*status;
4602 	struct spdk_nvme_ns			*ns;
4603 	int					res;
4604 
4605 	if (nsid == 0) {
4606 		return -EINVAL;
4607 	}
4608 
4609 	status = calloc(1, sizeof(*status));
4610 	if (!status) {
4611 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n");
4612 		return -ENOMEM;
4613 	}
4614 
4615 	res = nvme_ctrlr_cmd_attach_ns(ctrlr, nsid, payload,
4616 				       nvme_completion_poll_cb, status);
4617 	if (res) {
4618 		free(status);
4619 		return res;
4620 	}
4621 	if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) {
4622 		NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_attach_ns failed!\n");
4623 		if (!status->timed_out) {
4624 			free(status);
4625 		}
4626 		return -ENXIO;
4627 	}
4628 	free(status);
4629 
4630 	res = nvme_ctrlr_identify_active_ns(ctrlr);
4631 	if (res) {
4632 		return res;
4633 	}
4634 
4635 	ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
4636 	if (ns == NULL) {
4637 		NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_get_ns failed!\n");
4638 		return -ENXIO;
4639 	}
4640 
4641 	return nvme_ns_construct(ns, nsid, ctrlr);
4642 }
4643 
4644 int
4645 spdk_nvme_ctrlr_detach_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid,
4646 			  struct spdk_nvme_ctrlr_list *payload)
4647 {
4648 	struct nvme_completion_poll_status	*status;
4649 	int					res;
4650 
4651 	if (nsid == 0) {
4652 		return -EINVAL;
4653 	}
4654 
4655 	status = calloc(1, sizeof(*status));
4656 	if (!status) {
4657 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n");
4658 		return -ENOMEM;
4659 	}
4660 
4661 	res = nvme_ctrlr_cmd_detach_ns(ctrlr, nsid, payload,
4662 				       nvme_completion_poll_cb, status);
4663 	if (res) {
4664 		free(status);
4665 		return res;
4666 	}
4667 	if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) {
4668 		NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_detach_ns failed!\n");
4669 		if (!status->timed_out) {
4670 			free(status);
4671 		}
4672 		return -ENXIO;
4673 	}
4674 	free(status);
4675 
4676 	return nvme_ctrlr_identify_active_ns(ctrlr);
4677 }
4678 
4679 uint32_t
4680 spdk_nvme_ctrlr_create_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns_data *payload)
4681 {
4682 	struct nvme_completion_poll_status	*status;
4683 	int					res;
4684 	uint32_t				nsid;
4685 
4686 	status = calloc(1, sizeof(*status));
4687 	if (!status) {
4688 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n");
4689 		return 0;
4690 	}
4691 
4692 	res = nvme_ctrlr_cmd_create_ns(ctrlr, payload, nvme_completion_poll_cb, status);
4693 	if (res) {
4694 		free(status);
4695 		return 0;
4696 	}
4697 	if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) {
4698 		NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_create_ns failed!\n");
4699 		if (!status->timed_out) {
4700 			free(status);
4701 		}
4702 		return 0;
4703 	}
4704 
4705 	nsid = status->cpl.cdw0;
4706 	free(status);
4707 
4708 	assert(nsid > 0);
4709 
4710 	/* Return the namespace ID that was created */
4711 	return nsid;
4712 }
4713 
4714 int
4715 spdk_nvme_ctrlr_delete_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid)
4716 {
4717 	struct nvme_completion_poll_status	*status;
4718 	int					res;
4719 
4720 	if (nsid == 0) {
4721 		return -EINVAL;
4722 	}
4723 
4724 	status = calloc(1, sizeof(*status));
4725 	if (!status) {
4726 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n");
4727 		return -ENOMEM;
4728 	}
4729 
4730 	res = nvme_ctrlr_cmd_delete_ns(ctrlr, nsid, nvme_completion_poll_cb, status);
4731 	if (res) {
4732 		free(status);
4733 		return res;
4734 	}
4735 	if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) {
4736 		NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_delete_ns failed!\n");
4737 		if (!status->timed_out) {
4738 			free(status);
4739 		}
4740 		return -ENXIO;
4741 	}
4742 	free(status);
4743 
4744 	return nvme_ctrlr_identify_active_ns(ctrlr);
4745 }
4746 
4747 int
4748 spdk_nvme_ctrlr_format(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid,
4749 		       struct spdk_nvme_format *format)
4750 {
4751 	struct nvme_completion_poll_status	*status;
4752 	int					res;
4753 
4754 	status = calloc(1, sizeof(*status));
4755 	if (!status) {
4756 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n");
4757 		return -ENOMEM;
4758 	}
4759 
4760 	res = nvme_ctrlr_cmd_format(ctrlr, nsid, format, nvme_completion_poll_cb,
4761 				    status);
4762 	if (res) {
4763 		free(status);
4764 		return res;
4765 	}
4766 	if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) {
4767 		NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_format failed!\n");
4768 		if (!status->timed_out) {
4769 			free(status);
4770 		}
4771 		return -ENXIO;
4772 	}
4773 	free(status);
4774 
4775 	return spdk_nvme_ctrlr_reset(ctrlr);
4776 }
4777 
4778 int
4779 spdk_nvme_ctrlr_update_firmware(struct spdk_nvme_ctrlr *ctrlr, void *payload, uint32_t size,
4780 				int slot, enum spdk_nvme_fw_commit_action commit_action, struct spdk_nvme_status *completion_status)
4781 {
4782 	struct spdk_nvme_fw_commit		fw_commit;
4783 	struct nvme_completion_poll_status	*status;
4784 	int					res;
4785 	unsigned int				size_remaining;
4786 	unsigned int				offset;
4787 	unsigned int				transfer;
4788 	void					*p;
4789 
4790 	if (!completion_status) {
4791 		return -EINVAL;
4792 	}
4793 	memset(completion_status, 0, sizeof(struct spdk_nvme_status));
4794 	if (size % 4) {
4795 		NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_update_firmware invalid size!\n");
4796 		return -1;
4797 	}
4798 
4799 	/* Current support only for SPDK_NVME_FW_COMMIT_REPLACE_IMG
4800 	 * and SPDK_NVME_FW_COMMIT_REPLACE_AND_ENABLE_IMG
4801 	 */
4802 	if ((commit_action != SPDK_NVME_FW_COMMIT_REPLACE_IMG) &&
4803 	    (commit_action != SPDK_NVME_FW_COMMIT_REPLACE_AND_ENABLE_IMG)) {
4804 		NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_update_firmware invalid command!\n");
4805 		return -1;
4806 	}
4807 
4808 	status = calloc(1, sizeof(*status));
4809 	if (!status) {
4810 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n");
4811 		return -ENOMEM;
4812 	}
4813 
4814 	/* Firmware download */
4815 	size_remaining = size;
4816 	offset = 0;
4817 	p = payload;
4818 
4819 	while (size_remaining > 0) {
4820 		transfer = spdk_min(size_remaining, ctrlr->min_page_size);
4821 
4822 		memset(status, 0, sizeof(*status));
4823 		res = nvme_ctrlr_cmd_fw_image_download(ctrlr, transfer, offset, p,
4824 						       nvme_completion_poll_cb,
4825 						       status);
4826 		if (res) {
4827 			free(status);
4828 			return res;
4829 		}
4830 
4831 		if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) {
4832 			NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_fw_image_download failed!\n");
4833 			if (!status->timed_out) {
4834 				free(status);
4835 			}
4836 			return -ENXIO;
4837 		}
4838 		p += transfer;
4839 		offset += transfer;
4840 		size_remaining -= transfer;
4841 	}
4842 
4843 	/* Firmware commit */
4844 	memset(&fw_commit, 0, sizeof(struct spdk_nvme_fw_commit));
4845 	fw_commit.fs = slot;
4846 	fw_commit.ca = commit_action;
4847 
4848 	memset(status, 0, sizeof(*status));
4849 	res = nvme_ctrlr_cmd_fw_commit(ctrlr, &fw_commit, nvme_completion_poll_cb,
4850 				       status);
4851 	if (res) {
4852 		free(status);
4853 		return res;
4854 	}
4855 
4856 	res = nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock);
4857 
4858 	memcpy(completion_status, &status->cpl.status, sizeof(struct spdk_nvme_status));
4859 
4860 	if (!status->timed_out) {
4861 		free(status);
4862 	}
4863 
4864 	if (res) {
4865 		if (completion_status->sct != SPDK_NVME_SCT_COMMAND_SPECIFIC ||
4866 		    completion_status->sc != SPDK_NVME_SC_FIRMWARE_REQ_NVM_RESET) {
4867 			if (completion_status->sct == SPDK_NVME_SCT_COMMAND_SPECIFIC  &&
4868 			    completion_status->sc == SPDK_NVME_SC_FIRMWARE_REQ_CONVENTIONAL_RESET) {
4869 				NVME_CTRLR_NOTICELOG(ctrlr,
4870 						     "firmware activation requires conventional reset to be performed. !\n");
4871 			} else {
4872 				NVME_CTRLR_ERRLOG(ctrlr, "nvme_ctrlr_cmd_fw_commit failed!\n");
4873 			}
4874 			return -ENXIO;
4875 		}
4876 	}
4877 
4878 	return spdk_nvme_ctrlr_reset(ctrlr);
4879 }
4880 
4881 int
4882 spdk_nvme_ctrlr_reserve_cmb(struct spdk_nvme_ctrlr *ctrlr)
4883 {
4884 	int rc, size;
4885 	union spdk_nvme_cmbsz_register cmbsz;
4886 
4887 	cmbsz = spdk_nvme_ctrlr_get_regs_cmbsz(ctrlr);
4888 
4889 	if (cmbsz.bits.rds == 0 || cmbsz.bits.wds == 0) {
4890 		return -ENOTSUP;
4891 	}
4892 
4893 	size = cmbsz.bits.sz * (0x1000 << (cmbsz.bits.szu * 4));
4894 
4895 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
4896 	rc = nvme_transport_ctrlr_reserve_cmb(ctrlr);
4897 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
4898 
4899 	if (rc < 0) {
4900 		return rc;
4901 	}
4902 
4903 	return size;
4904 }
4905 
4906 void *
4907 spdk_nvme_ctrlr_map_cmb(struct spdk_nvme_ctrlr *ctrlr, size_t *size)
4908 {
4909 	void *buf;
4910 
4911 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
4912 	buf = nvme_transport_ctrlr_map_cmb(ctrlr, size);
4913 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
4914 
4915 	return buf;
4916 }
4917 
4918 void
4919 spdk_nvme_ctrlr_unmap_cmb(struct spdk_nvme_ctrlr *ctrlr)
4920 {
4921 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
4922 	nvme_transport_ctrlr_unmap_cmb(ctrlr);
4923 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
4924 }
4925 
4926 int
4927 spdk_nvme_ctrlr_enable_pmr(struct spdk_nvme_ctrlr *ctrlr)
4928 {
4929 	int rc;
4930 
4931 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
4932 	rc = nvme_transport_ctrlr_enable_pmr(ctrlr);
4933 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
4934 
4935 	return rc;
4936 }
4937 
4938 int
4939 spdk_nvme_ctrlr_disable_pmr(struct spdk_nvme_ctrlr *ctrlr)
4940 {
4941 	int rc;
4942 
4943 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
4944 	rc = nvme_transport_ctrlr_disable_pmr(ctrlr);
4945 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
4946 
4947 	return rc;
4948 }
4949 
4950 void *
4951 spdk_nvme_ctrlr_map_pmr(struct spdk_nvme_ctrlr *ctrlr, size_t *size)
4952 {
4953 	void *buf;
4954 
4955 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
4956 	buf = nvme_transport_ctrlr_map_pmr(ctrlr, size);
4957 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
4958 
4959 	return buf;
4960 }
4961 
4962 int
4963 spdk_nvme_ctrlr_unmap_pmr(struct spdk_nvme_ctrlr *ctrlr)
4964 {
4965 	int rc;
4966 
4967 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
4968 	rc = nvme_transport_ctrlr_unmap_pmr(ctrlr);
4969 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
4970 
4971 	return rc;
4972 }
4973 
4974 int
4975 spdk_nvme_ctrlr_read_boot_partition_start(struct spdk_nvme_ctrlr *ctrlr, void *payload,
4976 		uint32_t bprsz, uint32_t bprof, uint32_t bpid)
4977 {
4978 	union spdk_nvme_bprsel_register bprsel;
4979 	union spdk_nvme_bpinfo_register bpinfo;
4980 	uint64_t bpmbl, bpmb_size;
4981 
4982 	if (ctrlr->cap.bits.bps == 0) {
4983 		return -ENOTSUP;
4984 	}
4985 
4986 	if (nvme_ctrlr_get_bpinfo(ctrlr, &bpinfo)) {
4987 		NVME_CTRLR_ERRLOG(ctrlr, "get bpinfo failed\n");
4988 		return -EIO;
4989 	}
4990 
4991 	if (bpinfo.bits.brs == SPDK_NVME_BRS_READ_IN_PROGRESS) {
4992 		NVME_CTRLR_ERRLOG(ctrlr, "Boot Partition read already initiated\n");
4993 		return -EALREADY;
4994 	}
4995 
4996 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
4997 
4998 	bpmb_size = bprsz * 4096;
4999 	bpmbl = spdk_vtophys(payload, &bpmb_size);
5000 	if (bpmbl == SPDK_VTOPHYS_ERROR) {
5001 		NVME_CTRLR_ERRLOG(ctrlr, "spdk_vtophys of bpmbl failed\n");
5002 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
5003 		return -EFAULT;
5004 	}
5005 
5006 	if (bpmb_size != bprsz * 4096) {
5007 		NVME_CTRLR_ERRLOG(ctrlr, "Boot Partition buffer is not physically contiguous\n");
5008 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
5009 		return -EFAULT;
5010 	}
5011 
5012 	if (nvme_ctrlr_set_bpmbl(ctrlr, bpmbl)) {
5013 		NVME_CTRLR_ERRLOG(ctrlr, "set_bpmbl() failed\n");
5014 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
5015 		return -EIO;
5016 	}
5017 
5018 	bprsel.bits.bpid = bpid;
5019 	bprsel.bits.bprof = bprof;
5020 	bprsel.bits.bprsz = bprsz;
5021 
5022 	if (nvme_ctrlr_set_bprsel(ctrlr, &bprsel)) {
5023 		NVME_CTRLR_ERRLOG(ctrlr, "set_bprsel() failed\n");
5024 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
5025 		return -EIO;
5026 	}
5027 
5028 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
5029 	return 0;
5030 }
5031 
5032 int
5033 spdk_nvme_ctrlr_read_boot_partition_poll(struct spdk_nvme_ctrlr *ctrlr)
5034 {
5035 	int rc = 0;
5036 	union spdk_nvme_bpinfo_register bpinfo;
5037 
5038 	if (nvme_ctrlr_get_bpinfo(ctrlr, &bpinfo)) {
5039 		NVME_CTRLR_ERRLOG(ctrlr, "get bpinfo failed\n");
5040 		return -EIO;
5041 	}
5042 
5043 	switch (bpinfo.bits.brs) {
5044 	case SPDK_NVME_BRS_NO_READ:
5045 		NVME_CTRLR_ERRLOG(ctrlr, "Boot Partition read not initiated\n");
5046 		rc = -EINVAL;
5047 		break;
5048 	case SPDK_NVME_BRS_READ_IN_PROGRESS:
5049 		NVME_CTRLR_DEBUGLOG(ctrlr, "Boot Partition read in progress\n");
5050 		rc = -EAGAIN;
5051 		break;
5052 	case SPDK_NVME_BRS_READ_ERROR:
5053 		NVME_CTRLR_ERRLOG(ctrlr, "Error completing Boot Partition read\n");
5054 		rc = -EIO;
5055 		break;
5056 	case SPDK_NVME_BRS_READ_SUCCESS:
5057 		NVME_CTRLR_INFOLOG(ctrlr, "Boot Partition read completed successfully\n");
5058 		break;
5059 	default:
5060 		NVME_CTRLR_ERRLOG(ctrlr, "Invalid Boot Partition read status\n");
5061 		rc = -EINVAL;
5062 	}
5063 
5064 	return rc;
5065 }
5066 
5067 static void
5068 nvme_write_boot_partition_cb(void *arg, const struct spdk_nvme_cpl *cpl)
5069 {
5070 	int res;
5071 	struct spdk_nvme_ctrlr *ctrlr = arg;
5072 	struct spdk_nvme_fw_commit fw_commit;
5073 	struct spdk_nvme_cpl err_cpl =
5074 	{.status = {.sct = SPDK_NVME_SCT_GENERIC, .sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR }};
5075 
5076 	if (spdk_nvme_cpl_is_error(cpl)) {
5077 		NVME_CTRLR_ERRLOG(ctrlr, "Write Boot Partition failed\n");
5078 		ctrlr->bp_write_cb_fn(ctrlr->bp_write_cb_arg, cpl);
5079 		return;
5080 	}
5081 
5082 	if (ctrlr->bp_ws == SPDK_NVME_BP_WS_DOWNLOADING) {
5083 		NVME_CTRLR_DEBUGLOG(ctrlr, "Boot Partition Downloading at Offset %d Success\n", ctrlr->fw_offset);
5084 		ctrlr->fw_payload += ctrlr->fw_transfer_size;
5085 		ctrlr->fw_offset += ctrlr->fw_transfer_size;
5086 		ctrlr->fw_size_remaining -= ctrlr->fw_transfer_size;
5087 		ctrlr->fw_transfer_size = spdk_min(ctrlr->fw_size_remaining, ctrlr->min_page_size);
5088 		res = nvme_ctrlr_cmd_fw_image_download(ctrlr, ctrlr->fw_transfer_size, ctrlr->fw_offset,
5089 						       ctrlr->fw_payload, nvme_write_boot_partition_cb, ctrlr);
5090 		if (res) {
5091 			NVME_CTRLR_ERRLOG(ctrlr, "nvme_ctrlr_cmd_fw_image_download failed!\n");
5092 			ctrlr->bp_write_cb_fn(ctrlr->bp_write_cb_arg, &err_cpl);
5093 			return;
5094 		}
5095 
5096 		if (ctrlr->fw_transfer_size < ctrlr->min_page_size) {
5097 			ctrlr->bp_ws = SPDK_NVME_BP_WS_DOWNLOADED;
5098 		}
5099 	} else if (ctrlr->bp_ws == SPDK_NVME_BP_WS_DOWNLOADED) {
5100 		NVME_CTRLR_DEBUGLOG(ctrlr, "Boot Partition Download Success\n");
5101 		memset(&fw_commit, 0, sizeof(struct spdk_nvme_fw_commit));
5102 		fw_commit.bpid = ctrlr->bpid;
5103 		fw_commit.ca = SPDK_NVME_FW_COMMIT_REPLACE_BOOT_PARTITION;
5104 		res = nvme_ctrlr_cmd_fw_commit(ctrlr, &fw_commit,
5105 					       nvme_write_boot_partition_cb, ctrlr);
5106 		if (res) {
5107 			NVME_CTRLR_ERRLOG(ctrlr, "nvme_ctrlr_cmd_fw_commit failed!\n");
5108 			NVME_CTRLR_ERRLOG(ctrlr, "commit action: %d\n", fw_commit.ca);
5109 			ctrlr->bp_write_cb_fn(ctrlr->bp_write_cb_arg, &err_cpl);
5110 			return;
5111 		}
5112 
5113 		ctrlr->bp_ws = SPDK_NVME_BP_WS_REPLACE;
5114 	} else if (ctrlr->bp_ws == SPDK_NVME_BP_WS_REPLACE) {
5115 		NVME_CTRLR_DEBUGLOG(ctrlr, "Boot Partition Replacement Success\n");
5116 		memset(&fw_commit, 0, sizeof(struct spdk_nvme_fw_commit));
5117 		fw_commit.bpid = ctrlr->bpid;
5118 		fw_commit.ca = SPDK_NVME_FW_COMMIT_ACTIVATE_BOOT_PARTITION;
5119 		res = nvme_ctrlr_cmd_fw_commit(ctrlr, &fw_commit,
5120 					       nvme_write_boot_partition_cb, ctrlr);
5121 		if (res) {
5122 			NVME_CTRLR_ERRLOG(ctrlr, "nvme_ctrlr_cmd_fw_commit failed!\n");
5123 			NVME_CTRLR_ERRLOG(ctrlr, "commit action: %d\n", fw_commit.ca);
5124 			ctrlr->bp_write_cb_fn(ctrlr->bp_write_cb_arg, &err_cpl);
5125 			return;
5126 		}
5127 
5128 		ctrlr->bp_ws = SPDK_NVME_BP_WS_ACTIVATE;
5129 	} else if (ctrlr->bp_ws == SPDK_NVME_BP_WS_ACTIVATE) {
5130 		NVME_CTRLR_DEBUGLOG(ctrlr, "Boot Partition Activation Success\n");
5131 		ctrlr->bp_write_cb_fn(ctrlr->bp_write_cb_arg, cpl);
5132 	} else {
5133 		NVME_CTRLR_ERRLOG(ctrlr, "Invalid Boot Partition write state\n");
5134 		ctrlr->bp_write_cb_fn(ctrlr->bp_write_cb_arg, &err_cpl);
5135 		return;
5136 	}
5137 }
5138 
5139 int
5140 spdk_nvme_ctrlr_write_boot_partition(struct spdk_nvme_ctrlr *ctrlr,
5141 				     void *payload, uint32_t size, uint32_t bpid,
5142 				     spdk_nvme_cmd_cb cb_fn, void *cb_arg)
5143 {
5144 	int res;
5145 
5146 	if (ctrlr->cap.bits.bps == 0) {
5147 		return -ENOTSUP;
5148 	}
5149 
5150 	ctrlr->bp_ws = SPDK_NVME_BP_WS_DOWNLOADING;
5151 	ctrlr->bpid = bpid;
5152 	ctrlr->bp_write_cb_fn = cb_fn;
5153 	ctrlr->bp_write_cb_arg = cb_arg;
5154 	ctrlr->fw_offset = 0;
5155 	ctrlr->fw_size_remaining = size;
5156 	ctrlr->fw_payload = payload;
5157 	ctrlr->fw_transfer_size = spdk_min(ctrlr->fw_size_remaining, ctrlr->min_page_size);
5158 
5159 	res = nvme_ctrlr_cmd_fw_image_download(ctrlr, ctrlr->fw_transfer_size, ctrlr->fw_offset,
5160 					       ctrlr->fw_payload, nvme_write_boot_partition_cb, ctrlr);
5161 
5162 	return res;
5163 }
5164 
5165 bool
5166 spdk_nvme_ctrlr_is_discovery(struct spdk_nvme_ctrlr *ctrlr)
5167 {
5168 	assert(ctrlr);
5169 
5170 	return !strncmp(ctrlr->trid.subnqn, SPDK_NVMF_DISCOVERY_NQN,
5171 			strlen(SPDK_NVMF_DISCOVERY_NQN));
5172 }
5173 
5174 bool
5175 spdk_nvme_ctrlr_is_fabrics(struct spdk_nvme_ctrlr *ctrlr)
5176 {
5177 	assert(ctrlr);
5178 
5179 	return spdk_nvme_trtype_is_fabrics(ctrlr->trid.trtype);
5180 }
5181 
5182 int
5183 spdk_nvme_ctrlr_security_receive(struct spdk_nvme_ctrlr *ctrlr, uint8_t secp,
5184 				 uint16_t spsp, uint8_t nssf, void *payload, size_t size)
5185 {
5186 	struct nvme_completion_poll_status	*status;
5187 	int					res;
5188 
5189 	status = calloc(1, sizeof(*status));
5190 	if (!status) {
5191 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n");
5192 		return -ENOMEM;
5193 	}
5194 
5195 	res = spdk_nvme_ctrlr_cmd_security_receive(ctrlr, secp, spsp, nssf, payload, size,
5196 			nvme_completion_poll_cb, status);
5197 	if (res) {
5198 		free(status);
5199 		return res;
5200 	}
5201 	if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) {
5202 		NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_cmd_security_receive failed!\n");
5203 		if (!status->timed_out) {
5204 			free(status);
5205 		}
5206 		return -ENXIO;
5207 	}
5208 	free(status);
5209 
5210 	return 0;
5211 }
5212 
5213 int
5214 spdk_nvme_ctrlr_security_send(struct spdk_nvme_ctrlr *ctrlr, uint8_t secp,
5215 			      uint16_t spsp, uint8_t nssf, void *payload, size_t size)
5216 {
5217 	struct nvme_completion_poll_status	*status;
5218 	int					res;
5219 
5220 	status = calloc(1, sizeof(*status));
5221 	if (!status) {
5222 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n");
5223 		return -ENOMEM;
5224 	}
5225 
5226 	res = spdk_nvme_ctrlr_cmd_security_send(ctrlr, secp, spsp, nssf, payload, size,
5227 						nvme_completion_poll_cb,
5228 						status);
5229 	if (res) {
5230 		free(status);
5231 		return res;
5232 	}
5233 	if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) {
5234 		NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_cmd_security_send failed!\n");
5235 		if (!status->timed_out) {
5236 			free(status);
5237 		}
5238 		return -ENXIO;
5239 	}
5240 
5241 	free(status);
5242 
5243 	return 0;
5244 }
5245 
5246 uint64_t
5247 spdk_nvme_ctrlr_get_flags(struct spdk_nvme_ctrlr *ctrlr)
5248 {
5249 	return ctrlr->flags;
5250 }
5251 
5252 const struct spdk_nvme_transport_id *
5253 spdk_nvme_ctrlr_get_transport_id(struct spdk_nvme_ctrlr *ctrlr)
5254 {
5255 	return &ctrlr->trid;
5256 }
5257 
5258 int32_t
5259 spdk_nvme_ctrlr_alloc_qid(struct spdk_nvme_ctrlr *ctrlr)
5260 {
5261 	uint32_t qid;
5262 
5263 	assert(ctrlr->free_io_qids);
5264 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
5265 	qid = spdk_bit_array_find_first_set(ctrlr->free_io_qids, 1);
5266 	if (qid > ctrlr->opts.num_io_queues) {
5267 		NVME_CTRLR_ERRLOG(ctrlr, "No free I/O queue IDs\n");
5268 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
5269 		return -1;
5270 	}
5271 
5272 	spdk_bit_array_clear(ctrlr->free_io_qids, qid);
5273 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
5274 	return qid;
5275 }
5276 
5277 void
5278 spdk_nvme_ctrlr_free_qid(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid)
5279 {
5280 	assert(qid <= ctrlr->opts.num_io_queues);
5281 
5282 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
5283 
5284 	if (spdk_likely(ctrlr->free_io_qids)) {
5285 		spdk_bit_array_set(ctrlr->free_io_qids, qid);
5286 	}
5287 
5288 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
5289 }
5290 
5291 int
5292 spdk_nvme_ctrlr_get_memory_domains(const struct spdk_nvme_ctrlr *ctrlr,
5293 				   struct spdk_memory_domain **domains, int array_size)
5294 {
5295 	return nvme_transport_ctrlr_get_memory_domains(ctrlr, domains, array_size);
5296 }
5297