xref: /spdk/lib/nvme/nvme_ctrlr.c (revision da2fd6651a9cd4732b0910d30291821e77f4d643)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation. All rights reserved.
5  *   Copyright (c) 2019-2021 Mellanox Technologies LTD. All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "spdk/stdinc.h"
35 
36 #include "nvme_internal.h"
37 #include "nvme_io_msg.h"
38 
39 #include "spdk/env.h"
40 #include "spdk/string.h"
41 #include "spdk/endian.h"
42 
43 struct nvme_active_ns_ctx;
44 
45 static void nvme_ctrlr_destruct_namespaces(struct spdk_nvme_ctrlr *ctrlr);
46 static int nvme_ctrlr_construct_and_submit_aer(struct spdk_nvme_ctrlr *ctrlr,
47 		struct nvme_async_event_request *aer);
48 static void nvme_ctrlr_identify_active_ns_async(struct nvme_active_ns_ctx *ctx);
49 static int nvme_ctrlr_identify_ns_async(struct spdk_nvme_ns *ns);
50 static int nvme_ctrlr_identify_ns_iocs_specific_async(struct spdk_nvme_ns *ns);
51 static int nvme_ctrlr_identify_id_desc_async(struct spdk_nvme_ns *ns);
52 static void nvme_ctrlr_init_cap(struct spdk_nvme_ctrlr *ctrlr);
53 
54 #define CTRLR_STRING(ctrlr) \
55 	((ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_TCP || ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_RDMA) ? \
56 	ctrlr->trid.subnqn : ctrlr->trid.traddr)
57 
58 #define NVME_CTRLR_ERRLOG(ctrlr, format, ...) \
59 	SPDK_ERRLOG("[%s] " format, CTRLR_STRING(ctrlr), ##__VA_ARGS__);
60 
61 #define NVME_CTRLR_WARNLOG(ctrlr, format, ...) \
62 	SPDK_WARNLOG("[%s] " format, CTRLR_STRING(ctrlr), ##__VA_ARGS__);
63 
64 #define NVME_CTRLR_NOTICELOG(ctrlr, format, ...) \
65 	SPDK_NOTICELOG("[%s] " format, CTRLR_STRING(ctrlr), ##__VA_ARGS__);
66 
67 #define NVME_CTRLR_INFOLOG(ctrlr, format, ...) \
68 	SPDK_INFOLOG(nvme, "[%s] " format, CTRLR_STRING(ctrlr), ##__VA_ARGS__);
69 
70 #ifdef DEBUG
71 #define NVME_CTRLR_DEBUGLOG(ctrlr, format, ...) \
72 	SPDK_DEBUGLOG(nvme, "[%s] " format, CTRLR_STRING(ctrlr), ##__VA_ARGS__);
73 #else
74 #define NVME_CTRLR_DEBUGLOG(ctrlr, ...) do { } while (0)
75 #endif
76 
77 static int
78 nvme_ctrlr_get_cc(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cc_register *cc)
79 {
80 	return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, cc.raw),
81 					      &cc->raw);
82 }
83 
84 static int
85 nvme_ctrlr_get_csts(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_csts_register *csts)
86 {
87 	return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, csts.raw),
88 					      &csts->raw);
89 }
90 
91 int
92 nvme_ctrlr_get_cap(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cap_register *cap)
93 {
94 	return nvme_transport_ctrlr_get_reg_8(ctrlr, offsetof(struct spdk_nvme_registers, cap.raw),
95 					      &cap->raw);
96 }
97 
98 int
99 nvme_ctrlr_get_vs(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_vs_register *vs)
100 {
101 	return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, vs.raw),
102 					      &vs->raw);
103 }
104 
105 static int
106 nvme_ctrlr_set_cc(struct spdk_nvme_ctrlr *ctrlr, const union spdk_nvme_cc_register *cc)
107 {
108 	return nvme_transport_ctrlr_set_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, cc.raw),
109 					      cc->raw);
110 }
111 
112 int
113 nvme_ctrlr_get_cmbsz(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cmbsz_register *cmbsz)
114 {
115 	return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, cmbsz.raw),
116 					      &cmbsz->raw);
117 }
118 
119 int
120 nvme_ctrlr_get_pmrcap(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_pmrcap_register *pmrcap)
121 {
122 	return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, pmrcap.raw),
123 					      &pmrcap->raw);
124 }
125 
126 static int
127 nvme_ctrlr_set_nssr(struct spdk_nvme_ctrlr *ctrlr, uint32_t nssr_value)
128 {
129 	return nvme_transport_ctrlr_set_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, nssr),
130 					      nssr_value);
131 }
132 
133 bool
134 nvme_ctrlr_multi_iocs_enabled(struct spdk_nvme_ctrlr *ctrlr)
135 {
136 	return ctrlr->cap.bits.css & SPDK_NVME_CAP_CSS_IOCS &&
137 	       ctrlr->opts.command_set == SPDK_NVME_CC_CSS_IOCS;
138 }
139 
140 /* When the field in spdk_nvme_ctrlr_opts are changed and you change this function, please
141  * also update the nvme_ctrl_opts_init function in nvme_ctrlr.c
142  */
143 void
144 spdk_nvme_ctrlr_get_default_ctrlr_opts(struct spdk_nvme_ctrlr_opts *opts, size_t opts_size)
145 {
146 	char host_id_str[SPDK_UUID_STRING_LEN];
147 
148 	assert(opts);
149 
150 	opts->opts_size = opts_size;
151 
152 #define FIELD_OK(field) \
153 	offsetof(struct spdk_nvme_ctrlr_opts, field) + sizeof(opts->field) <= opts_size
154 
155 #define SET_FIELD(field, value) \
156 	if (offsetof(struct spdk_nvme_ctrlr_opts, field) + sizeof(opts->field) <= opts_size) { \
157 		opts->field = value; \
158 	} \
159 
160 	SET_FIELD(num_io_queues, DEFAULT_MAX_IO_QUEUES);
161 	SET_FIELD(use_cmb_sqs, false);
162 	SET_FIELD(no_shn_notification, false);
163 	SET_FIELD(arb_mechanism, SPDK_NVME_CC_AMS_RR);
164 	SET_FIELD(arbitration_burst, 0);
165 	SET_FIELD(low_priority_weight, 0);
166 	SET_FIELD(medium_priority_weight, 0);
167 	SET_FIELD(high_priority_weight, 0);
168 	SET_FIELD(keep_alive_timeout_ms, MIN_KEEP_ALIVE_TIMEOUT_IN_MS);
169 	SET_FIELD(transport_retry_count, SPDK_NVME_DEFAULT_RETRY_COUNT);
170 	SET_FIELD(io_queue_size, DEFAULT_IO_QUEUE_SIZE);
171 
172 	if (nvme_driver_init() == 0) {
173 		if (FIELD_OK(hostnqn)) {
174 			spdk_uuid_fmt_lower(host_id_str, sizeof(host_id_str),
175 					    &g_spdk_nvme_driver->default_extended_host_id);
176 			snprintf(opts->hostnqn, sizeof(opts->hostnqn),
177 				 "nqn.2014-08.org.nvmexpress:uuid:%s", host_id_str);
178 		}
179 
180 		if (FIELD_OK(extended_host_id)) {
181 			memcpy(opts->extended_host_id, &g_spdk_nvme_driver->default_extended_host_id,
182 			       sizeof(opts->extended_host_id));
183 		}
184 
185 	}
186 
187 	SET_FIELD(io_queue_requests, DEFAULT_IO_QUEUE_REQUESTS);
188 
189 	if (FIELD_OK(src_addr)) {
190 		memset(opts->src_addr, 0, sizeof(opts->src_addr));
191 	}
192 
193 	if (FIELD_OK(src_svcid)) {
194 		memset(opts->src_svcid, 0, sizeof(opts->src_svcid));
195 	}
196 
197 	if (FIELD_OK(host_id)) {
198 		memset(opts->host_id, 0, sizeof(opts->host_id));
199 	}
200 
201 	SET_FIELD(command_set, CHAR_BIT);
202 	SET_FIELD(admin_timeout_ms, NVME_MAX_ADMIN_TIMEOUT_IN_SECS * 1000);
203 	SET_FIELD(header_digest, false);
204 	SET_FIELD(data_digest, false);
205 	SET_FIELD(disable_error_logging, false);
206 	SET_FIELD(transport_ack_timeout, SPDK_NVME_DEFAULT_TRANSPORT_ACK_TIMEOUT);
207 	SET_FIELD(admin_queue_size, DEFAULT_ADMIN_QUEUE_SIZE);
208 	SET_FIELD(fabrics_connect_timeout_us, NVME_FABRIC_CONNECT_COMMAND_TIMEOUT);
209 	SET_FIELD(disable_read_ana_log_page, false);
210 
211 #undef FIELD_OK
212 #undef SET_FIELD
213 }
214 
215 /**
216  * This function will be called when the process allocates the IO qpair.
217  * Note: the ctrlr_lock must be held when calling this function.
218  */
219 static void
220 nvme_ctrlr_proc_add_io_qpair(struct spdk_nvme_qpair *qpair)
221 {
222 	struct spdk_nvme_ctrlr_process	*active_proc;
223 	struct spdk_nvme_ctrlr		*ctrlr = qpair->ctrlr;
224 
225 	active_proc = nvme_ctrlr_get_current_process(ctrlr);
226 	if (active_proc) {
227 		TAILQ_INSERT_TAIL(&active_proc->allocated_io_qpairs, qpair, per_process_tailq);
228 		qpair->active_proc = active_proc;
229 	}
230 }
231 
232 /**
233  * This function will be called when the process frees the IO qpair.
234  * Note: the ctrlr_lock must be held when calling this function.
235  */
236 static void
237 nvme_ctrlr_proc_remove_io_qpair(struct spdk_nvme_qpair *qpair)
238 {
239 	struct spdk_nvme_ctrlr_process	*active_proc;
240 	struct spdk_nvme_ctrlr		*ctrlr = qpair->ctrlr;
241 	struct spdk_nvme_qpair          *active_qpair, *tmp_qpair;
242 
243 	active_proc = nvme_ctrlr_get_current_process(ctrlr);
244 	if (!active_proc) {
245 		return;
246 	}
247 
248 	TAILQ_FOREACH_SAFE(active_qpair, &active_proc->allocated_io_qpairs,
249 			   per_process_tailq, tmp_qpair) {
250 		if (active_qpair == qpair) {
251 			TAILQ_REMOVE(&active_proc->allocated_io_qpairs,
252 				     active_qpair, per_process_tailq);
253 
254 			break;
255 		}
256 	}
257 }
258 
259 void
260 spdk_nvme_ctrlr_get_default_io_qpair_opts(struct spdk_nvme_ctrlr *ctrlr,
261 		struct spdk_nvme_io_qpair_opts *opts,
262 		size_t opts_size)
263 {
264 	assert(ctrlr);
265 
266 	assert(opts);
267 
268 	memset(opts, 0, opts_size);
269 
270 #define FIELD_OK(field) \
271 	offsetof(struct spdk_nvme_io_qpair_opts, field) + sizeof(opts->field) <= opts_size
272 
273 	if (FIELD_OK(qprio)) {
274 		opts->qprio = SPDK_NVME_QPRIO_URGENT;
275 	}
276 
277 	if (FIELD_OK(io_queue_size)) {
278 		opts->io_queue_size = ctrlr->opts.io_queue_size;
279 	}
280 
281 	if (FIELD_OK(io_queue_requests)) {
282 		opts->io_queue_requests = ctrlr->opts.io_queue_requests;
283 	}
284 
285 	if (FIELD_OK(delay_cmd_submit)) {
286 		opts->delay_cmd_submit = false;
287 	}
288 
289 	if (FIELD_OK(sq.vaddr)) {
290 		opts->sq.vaddr = NULL;
291 	}
292 
293 	if (FIELD_OK(sq.paddr)) {
294 		opts->sq.paddr = 0;
295 	}
296 
297 	if (FIELD_OK(sq.buffer_size)) {
298 		opts->sq.buffer_size = 0;
299 	}
300 
301 	if (FIELD_OK(cq.vaddr)) {
302 		opts->cq.vaddr = NULL;
303 	}
304 
305 	if (FIELD_OK(cq.paddr)) {
306 		opts->cq.paddr = 0;
307 	}
308 
309 	if (FIELD_OK(cq.buffer_size)) {
310 		opts->cq.buffer_size = 0;
311 	}
312 
313 	if (FIELD_OK(create_only)) {
314 		opts->create_only = false;
315 	}
316 
317 #undef FIELD_OK
318 }
319 
320 static struct spdk_nvme_qpair *
321 nvme_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr,
322 			   const struct spdk_nvme_io_qpair_opts *opts)
323 {
324 	int32_t					qid;
325 	struct spdk_nvme_qpair			*qpair;
326 	union spdk_nvme_cc_register		cc;
327 
328 	if (!ctrlr) {
329 		return NULL;
330 	}
331 
332 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
333 	if (nvme_ctrlr_get_cc(ctrlr, &cc)) {
334 		NVME_CTRLR_ERRLOG(ctrlr, "get_cc failed\n");
335 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
336 		return NULL;
337 	}
338 
339 	if (opts->qprio & ~SPDK_NVME_CREATE_IO_SQ_QPRIO_MASK) {
340 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
341 		return NULL;
342 	}
343 
344 	/*
345 	 * Only value SPDK_NVME_QPRIO_URGENT(0) is valid for the
346 	 * default round robin arbitration method.
347 	 */
348 	if ((cc.bits.ams == SPDK_NVME_CC_AMS_RR) && (opts->qprio != SPDK_NVME_QPRIO_URGENT)) {
349 		NVME_CTRLR_ERRLOG(ctrlr, "invalid queue priority for default round robin arbitration method\n");
350 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
351 		return NULL;
352 	}
353 
354 	qid = spdk_nvme_ctrlr_alloc_qid(ctrlr);
355 	if (qid < 0) {
356 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
357 		return NULL;
358 	}
359 
360 	qpair = nvme_transport_ctrlr_create_io_qpair(ctrlr, qid, opts);
361 	if (qpair == NULL) {
362 		NVME_CTRLR_ERRLOG(ctrlr, "nvme_transport_ctrlr_create_io_qpair() failed\n");
363 		spdk_nvme_ctrlr_free_qid(ctrlr, qid);
364 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
365 		return NULL;
366 	}
367 
368 	TAILQ_INSERT_TAIL(&ctrlr->active_io_qpairs, qpair, tailq);
369 
370 	nvme_ctrlr_proc_add_io_qpair(qpair);
371 
372 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
373 
374 	return qpair;
375 }
376 
377 int
378 spdk_nvme_ctrlr_connect_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
379 {
380 	int rc;
381 
382 	if (nvme_qpair_get_state(qpair) != NVME_QPAIR_DISCONNECTED) {
383 		return -EISCONN;
384 	}
385 
386 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
387 	rc = nvme_transport_ctrlr_connect_qpair(ctrlr, qpair);
388 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
389 
390 	if (ctrlr->quirks & NVME_QUIRK_DELAY_AFTER_QUEUE_ALLOC) {
391 		spdk_delay_us(100);
392 	}
393 
394 	return rc;
395 }
396 
397 void
398 spdk_nvme_ctrlr_disconnect_io_qpair(struct spdk_nvme_qpair *qpair)
399 {
400 	struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr;
401 
402 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
403 	nvme_transport_ctrlr_disconnect_qpair(ctrlr, qpair);
404 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
405 }
406 
407 struct spdk_nvme_qpair *
408 spdk_nvme_ctrlr_alloc_io_qpair(struct spdk_nvme_ctrlr *ctrlr,
409 			       const struct spdk_nvme_io_qpair_opts *user_opts,
410 			       size_t opts_size)
411 {
412 
413 	struct spdk_nvme_qpair		*qpair;
414 	struct spdk_nvme_io_qpair_opts	opts;
415 	int				rc;
416 
417 	/*
418 	 * Get the default options, then overwrite them with the user-provided options
419 	 * up to opts_size.
420 	 *
421 	 * This allows for extensions of the opts structure without breaking
422 	 * ABI compatibility.
423 	 */
424 	spdk_nvme_ctrlr_get_default_io_qpair_opts(ctrlr, &opts, sizeof(opts));
425 	if (user_opts) {
426 		memcpy(&opts, user_opts, spdk_min(sizeof(opts), opts_size));
427 
428 		/* If user passes buffers, make sure they're big enough for the requested queue size */
429 		if (opts.sq.vaddr) {
430 			if (opts.sq.buffer_size < (opts.io_queue_size * sizeof(struct spdk_nvme_cmd))) {
431 				NVME_CTRLR_ERRLOG(ctrlr, "sq buffer size %" PRIx64 " is too small for sq size %zx\n",
432 						  opts.sq.buffer_size, (opts.io_queue_size * sizeof(struct spdk_nvme_cmd)));
433 				return NULL;
434 			}
435 		}
436 		if (opts.cq.vaddr) {
437 			if (opts.cq.buffer_size < (opts.io_queue_size * sizeof(struct spdk_nvme_cpl))) {
438 				NVME_CTRLR_ERRLOG(ctrlr, "cq buffer size %" PRIx64 " is too small for cq size %zx\n",
439 						  opts.cq.buffer_size, (opts.io_queue_size * sizeof(struct spdk_nvme_cpl)));
440 				return NULL;
441 			}
442 		}
443 	}
444 
445 	qpair = nvme_ctrlr_create_io_qpair(ctrlr, &opts);
446 
447 	if (qpair == NULL || opts.create_only == true) {
448 		return qpair;
449 	}
450 
451 	rc = spdk_nvme_ctrlr_connect_io_qpair(ctrlr, qpair);
452 	if (rc != 0) {
453 		NVME_CTRLR_ERRLOG(ctrlr, "nvme_transport_ctrlr_connect_io_qpair() failed\n");
454 		nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
455 		nvme_ctrlr_proc_remove_io_qpair(qpair);
456 		TAILQ_REMOVE(&ctrlr->active_io_qpairs, qpair, tailq);
457 		spdk_bit_array_set(ctrlr->free_io_qids, qpair->id);
458 		nvme_transport_ctrlr_delete_io_qpair(ctrlr, qpair);
459 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
460 		return NULL;
461 	}
462 
463 	return qpair;
464 }
465 
466 int
467 spdk_nvme_ctrlr_reconnect_io_qpair(struct spdk_nvme_qpair *qpair)
468 {
469 	struct spdk_nvme_ctrlr *ctrlr;
470 	enum nvme_qpair_state qpair_state;
471 	int rc;
472 
473 	assert(qpair != NULL);
474 	assert(nvme_qpair_is_admin_queue(qpair) == false);
475 	assert(qpair->ctrlr != NULL);
476 
477 	ctrlr = qpair->ctrlr;
478 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
479 	qpair_state = nvme_qpair_get_state(qpair);
480 
481 	if (ctrlr->is_removed) {
482 		rc = -ENODEV;
483 		goto out;
484 	}
485 
486 	if (ctrlr->is_resetting || qpair_state == NVME_QPAIR_DISCONNECTING) {
487 		rc = -EAGAIN;
488 		goto out;
489 	}
490 
491 	if (ctrlr->is_failed || qpair_state == NVME_QPAIR_DESTROYING) {
492 		rc = -ENXIO;
493 		goto out;
494 	}
495 
496 	if (qpair_state != NVME_QPAIR_DISCONNECTED) {
497 		rc = 0;
498 		goto out;
499 	}
500 
501 	rc = nvme_transport_ctrlr_connect_qpair(ctrlr, qpair);
502 	if (rc) {
503 		rc = -EAGAIN;
504 		goto out;
505 	}
506 
507 out:
508 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
509 	return rc;
510 }
511 
512 spdk_nvme_qp_failure_reason
513 spdk_nvme_ctrlr_get_admin_qp_failure_reason(struct spdk_nvme_ctrlr *ctrlr)
514 {
515 	return ctrlr->adminq->transport_failure_reason;
516 }
517 
518 /*
519  * This internal function will attempt to take the controller
520  * lock before calling disconnect on a controller qpair.
521  * Functions already holding the controller lock should
522  * call nvme_transport_ctrlr_disconnect_qpair directly.
523  */
524 void
525 nvme_ctrlr_disconnect_qpair(struct spdk_nvme_qpair *qpair)
526 {
527 	struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr;
528 
529 	assert(ctrlr != NULL);
530 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
531 	nvme_transport_ctrlr_disconnect_qpair(ctrlr, qpair);
532 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
533 }
534 
535 int
536 spdk_nvme_ctrlr_free_io_qpair(struct spdk_nvme_qpair *qpair)
537 {
538 	struct spdk_nvme_ctrlr *ctrlr;
539 
540 	if (qpair == NULL) {
541 		return 0;
542 	}
543 
544 	ctrlr = qpair->ctrlr;
545 
546 	if (qpair->in_completion_context) {
547 		/*
548 		 * There are many cases where it is convenient to delete an io qpair in the context
549 		 *  of that qpair's completion routine.  To handle this properly, set a flag here
550 		 *  so that the completion routine will perform an actual delete after the context
551 		 *  unwinds.
552 		 */
553 		qpair->delete_after_completion_context = 1;
554 		return 0;
555 	}
556 
557 	if (qpair->poll_group && qpair->poll_group->in_completion_context) {
558 		/* Same as above, but in a poll group. */
559 		qpair->poll_group->num_qpairs_to_delete++;
560 		qpair->delete_after_completion_context = 1;
561 		return 0;
562 	}
563 
564 	if (qpair->poll_group) {
565 		spdk_nvme_poll_group_remove(qpair->poll_group->group, qpair);
566 	}
567 
568 	/* Do not retry. */
569 	nvme_qpair_set_state(qpair, NVME_QPAIR_DESTROYING);
570 
571 	/* In the multi-process case, a process may call this function on a foreign
572 	 * I/O qpair (i.e. one that this process did not create) when that qpairs process
573 	 * exits unexpectedly.  In that case, we must not try to abort any reqs associated
574 	 * with that qpair, since the callbacks will also be foreign to this process.
575 	 */
576 	if (qpair->active_proc == nvme_ctrlr_get_current_process(ctrlr)) {
577 		nvme_qpair_abort_reqs(qpair, 1);
578 	}
579 
580 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
581 
582 	nvme_ctrlr_proc_remove_io_qpair(qpair);
583 
584 	TAILQ_REMOVE(&ctrlr->active_io_qpairs, qpair, tailq);
585 	spdk_nvme_ctrlr_free_qid(ctrlr, qpair->id);
586 
587 	nvme_transport_ctrlr_delete_io_qpair(ctrlr, qpair);
588 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
589 	return 0;
590 }
591 
592 static void
593 nvme_ctrlr_construct_intel_support_log_page_list(struct spdk_nvme_ctrlr *ctrlr,
594 		struct spdk_nvme_intel_log_page_directory *log_page_directory)
595 {
596 	if (log_page_directory == NULL) {
597 		return;
598 	}
599 
600 	if (ctrlr->cdata.vid != SPDK_PCI_VID_INTEL) {
601 		return;
602 	}
603 
604 	ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_PAGE_DIRECTORY] = true;
605 
606 	if (log_page_directory->read_latency_log_len ||
607 	    (ctrlr->quirks & NVME_INTEL_QUIRK_READ_LATENCY)) {
608 		ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_READ_CMD_LATENCY] = true;
609 	}
610 	if (log_page_directory->write_latency_log_len ||
611 	    (ctrlr->quirks & NVME_INTEL_QUIRK_WRITE_LATENCY)) {
612 		ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_WRITE_CMD_LATENCY] = true;
613 	}
614 	if (log_page_directory->temperature_statistics_log_len) {
615 		ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_TEMPERATURE] = true;
616 	}
617 	if (log_page_directory->smart_log_len) {
618 		ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_SMART] = true;
619 	}
620 	if (log_page_directory->marketing_description_log_len) {
621 		ctrlr->log_page_supported[SPDK_NVME_INTEL_MARKETING_DESCRIPTION] = true;
622 	}
623 }
624 
625 static int nvme_ctrlr_set_intel_support_log_pages(struct spdk_nvme_ctrlr *ctrlr)
626 {
627 	int rc = 0;
628 	struct nvme_completion_poll_status	*status;
629 	struct spdk_nvme_intel_log_page_directory *log_page_directory;
630 
631 	log_page_directory = spdk_zmalloc(sizeof(struct spdk_nvme_intel_log_page_directory),
632 					  64, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
633 	if (log_page_directory == NULL) {
634 		NVME_CTRLR_ERRLOG(ctrlr, "could not allocate log_page_directory\n");
635 		return -ENXIO;
636 	}
637 
638 	status = calloc(1, sizeof(*status));
639 	if (!status) {
640 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n");
641 		spdk_free(log_page_directory);
642 		return -ENOMEM;
643 	}
644 
645 	rc = spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_INTEL_LOG_PAGE_DIRECTORY,
646 					      SPDK_NVME_GLOBAL_NS_TAG, log_page_directory,
647 					      sizeof(struct spdk_nvme_intel_log_page_directory),
648 					      0, nvme_completion_poll_cb, status);
649 	if (rc != 0) {
650 		spdk_free(log_page_directory);
651 		free(status);
652 		return rc;
653 	}
654 
655 	if (nvme_wait_for_completion_timeout(ctrlr->adminq, status,
656 					     ctrlr->opts.admin_timeout_ms * 1000)) {
657 		spdk_free(log_page_directory);
658 		NVME_CTRLR_WARNLOG(ctrlr, "Intel log pages not supported on Intel drive!\n");
659 		if (!status->timed_out) {
660 			free(status);
661 		}
662 		return 0;
663 	}
664 
665 	nvme_ctrlr_construct_intel_support_log_page_list(ctrlr, log_page_directory);
666 	spdk_free(log_page_directory);
667 	free(status);
668 	return 0;
669 }
670 
671 static int
672 nvme_ctrlr_update_ana_log_page(struct spdk_nvme_ctrlr *ctrlr)
673 {
674 	struct nvme_completion_poll_status *status;
675 	int rc;
676 
677 	status = calloc(1, sizeof(*status));
678 	if (status == NULL) {
679 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n");
680 		return -ENOMEM;
681 	}
682 
683 	rc = spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS,
684 					      SPDK_NVME_GLOBAL_NS_TAG, ctrlr->ana_log_page,
685 					      ctrlr->ana_log_page_size, 0,
686 					      nvme_completion_poll_cb, status);
687 	if (rc != 0) {
688 		free(status);
689 		return rc;
690 	}
691 
692 	if (nvme_wait_for_completion_robust_lock_timeout(ctrlr->adminq, status, &ctrlr->ctrlr_lock,
693 			ctrlr->opts.admin_timeout_ms * 1000)) {
694 		if (!status->timed_out) {
695 			free(status);
696 		}
697 		return -EIO;
698 	}
699 
700 	free(status);
701 	return 0;
702 }
703 
704 static int
705 nvme_ctrlr_init_ana_log_page(struct spdk_nvme_ctrlr *ctrlr)
706 {
707 	uint32_t ana_log_page_size;
708 
709 	ana_log_page_size = sizeof(struct spdk_nvme_ana_page) + ctrlr->cdata.nanagrpid *
710 			    sizeof(struct spdk_nvme_ana_group_descriptor) + ctrlr->cdata.nn *
711 			    sizeof(uint32_t);
712 
713 	ctrlr->ana_log_page = spdk_zmalloc(ana_log_page_size, 64, NULL, SPDK_ENV_SOCKET_ID_ANY,
714 					   SPDK_MALLOC_DMA);
715 	if (ctrlr->ana_log_page == NULL) {
716 		NVME_CTRLR_ERRLOG(ctrlr, "could not allocate ANA log page buffer\n");
717 		return -ENXIO;
718 	}
719 	ctrlr->copied_ana_desc = calloc(1, ana_log_page_size);
720 	if (ctrlr->copied_ana_desc == NULL) {
721 		NVME_CTRLR_ERRLOG(ctrlr, "could not allocate a buffer to parse ANA descriptor\n");
722 		return -ENOMEM;
723 	}
724 	ctrlr->ana_log_page_size = ana_log_page_size;
725 
726 
727 	return nvme_ctrlr_update_ana_log_page(ctrlr);
728 }
729 
730 static int
731 nvme_ctrlr_update_ns_ana_states(const struct spdk_nvme_ana_group_descriptor *desc,
732 				void *cb_arg)
733 {
734 	struct spdk_nvme_ctrlr *ctrlr = cb_arg;
735 	struct spdk_nvme_ns *ns;
736 	uint32_t i, nsid;
737 
738 	for (i = 0; i < desc->num_of_nsid; i++) {
739 		nsid = desc->nsid[i];
740 		if (nsid == 0 || nsid > ctrlr->cdata.nn) {
741 			continue;
742 		}
743 
744 		ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
745 		assert(ns != NULL);
746 
747 		ns->ana_group_id = desc->ana_group_id;
748 		ns->ana_state = desc->ana_state;
749 	}
750 
751 	return 0;
752 }
753 
754 int
755 nvme_ctrlr_parse_ana_log_page(struct spdk_nvme_ctrlr *ctrlr,
756 			      spdk_nvme_parse_ana_log_page_cb cb_fn, void *cb_arg)
757 {
758 	struct spdk_nvme_ana_group_descriptor *copied_desc;
759 	uint8_t *orig_desc;
760 	uint32_t i, desc_size, copy_len;
761 	int rc = 0;
762 
763 	if (ctrlr->ana_log_page == NULL) {
764 		return -EINVAL;
765 	}
766 
767 	copied_desc = ctrlr->copied_ana_desc;
768 
769 	orig_desc = (uint8_t *)ctrlr->ana_log_page + sizeof(struct spdk_nvme_ana_page);
770 	copy_len = ctrlr->ana_log_page_size - sizeof(struct spdk_nvme_ana_page);
771 
772 	for (i = 0; i < ctrlr->ana_log_page->num_ana_group_desc; i++) {
773 		memcpy(copied_desc, orig_desc, copy_len);
774 
775 		rc = cb_fn(copied_desc, cb_arg);
776 		if (rc != 0) {
777 			break;
778 		}
779 
780 		desc_size = sizeof(struct spdk_nvme_ana_group_descriptor) +
781 			    copied_desc->num_of_nsid * sizeof(uint32_t);
782 		orig_desc += desc_size;
783 		copy_len -= desc_size;
784 	}
785 
786 	return rc;
787 }
788 
789 static int
790 nvme_ctrlr_set_supported_log_pages(struct spdk_nvme_ctrlr *ctrlr)
791 {
792 	int	rc = 0;
793 
794 	memset(ctrlr->log_page_supported, 0, sizeof(ctrlr->log_page_supported));
795 	/* Mandatory pages */
796 	ctrlr->log_page_supported[SPDK_NVME_LOG_ERROR] = true;
797 	ctrlr->log_page_supported[SPDK_NVME_LOG_HEALTH_INFORMATION] = true;
798 	ctrlr->log_page_supported[SPDK_NVME_LOG_FIRMWARE_SLOT] = true;
799 	if (ctrlr->cdata.lpa.celp) {
800 		ctrlr->log_page_supported[SPDK_NVME_LOG_COMMAND_EFFECTS_LOG] = true;
801 	}
802 	if (ctrlr->cdata.vid == SPDK_PCI_VID_INTEL && !(ctrlr->quirks & NVME_INTEL_QUIRK_NO_LOG_PAGES)) {
803 		rc = nvme_ctrlr_set_intel_support_log_pages(ctrlr);
804 		if (rc != 0) {
805 			goto out;
806 		}
807 	}
808 	if (ctrlr->cdata.cmic.ana_reporting) {
809 		ctrlr->log_page_supported[SPDK_NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS] = true;
810 		if (!ctrlr->opts.disable_read_ana_log_page) {
811 			rc = nvme_ctrlr_init_ana_log_page(ctrlr);
812 			if (rc == 0) {
813 				nvme_ctrlr_parse_ana_log_page(ctrlr, nvme_ctrlr_update_ns_ana_states,
814 							      ctrlr);
815 			}
816 		}
817 	}
818 
819 out:
820 	return rc;
821 }
822 
823 static void
824 nvme_ctrlr_set_intel_supported_features(struct spdk_nvme_ctrlr *ctrlr)
825 {
826 	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_MAX_LBA] = true;
827 	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_NATIVE_MAX_LBA] = true;
828 	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_POWER_GOVERNOR_SETTING] = true;
829 	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_SMBUS_ADDRESS] = true;
830 	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_LED_PATTERN] = true;
831 	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_RESET_TIMED_WORKLOAD_COUNTERS] = true;
832 	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING] = true;
833 }
834 
835 static void
836 nvme_ctrlr_set_arbitration_feature(struct spdk_nvme_ctrlr *ctrlr)
837 {
838 	uint32_t cdw11;
839 	struct nvme_completion_poll_status *status;
840 
841 	if (ctrlr->opts.arbitration_burst == 0) {
842 		return;
843 	}
844 
845 	if (ctrlr->opts.arbitration_burst > 7) {
846 		NVME_CTRLR_WARNLOG(ctrlr, "Valid arbitration burst values is from 0-7\n");
847 		return;
848 	}
849 
850 	status = calloc(1, sizeof(*status));
851 	if (!status) {
852 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n");
853 		return;
854 	}
855 
856 	cdw11 = ctrlr->opts.arbitration_burst;
857 
858 	if (spdk_nvme_ctrlr_get_flags(ctrlr) & SPDK_NVME_CTRLR_WRR_SUPPORTED) {
859 		cdw11 |= (uint32_t)ctrlr->opts.low_priority_weight << 8;
860 		cdw11 |= (uint32_t)ctrlr->opts.medium_priority_weight << 16;
861 		cdw11 |= (uint32_t)ctrlr->opts.high_priority_weight << 24;
862 	}
863 
864 	if (spdk_nvme_ctrlr_cmd_set_feature(ctrlr, SPDK_NVME_FEAT_ARBITRATION,
865 					    cdw11, 0, NULL, 0,
866 					    nvme_completion_poll_cb, status) < 0) {
867 		NVME_CTRLR_ERRLOG(ctrlr, "Set arbitration feature failed\n");
868 		free(status);
869 		return;
870 	}
871 
872 	if (nvme_wait_for_completion_timeout(ctrlr->adminq, status,
873 					     ctrlr->opts.admin_timeout_ms * 1000)) {
874 		NVME_CTRLR_ERRLOG(ctrlr, "Timeout to set arbitration feature\n");
875 	}
876 
877 	if (!status->timed_out) {
878 		free(status);
879 	}
880 }
881 
882 static void
883 nvme_ctrlr_set_supported_features(struct spdk_nvme_ctrlr *ctrlr)
884 {
885 	memset(ctrlr->feature_supported, 0, sizeof(ctrlr->feature_supported));
886 	/* Mandatory features */
887 	ctrlr->feature_supported[SPDK_NVME_FEAT_ARBITRATION] = true;
888 	ctrlr->feature_supported[SPDK_NVME_FEAT_POWER_MANAGEMENT] = true;
889 	ctrlr->feature_supported[SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD] = true;
890 	ctrlr->feature_supported[SPDK_NVME_FEAT_ERROR_RECOVERY] = true;
891 	ctrlr->feature_supported[SPDK_NVME_FEAT_NUMBER_OF_QUEUES] = true;
892 	ctrlr->feature_supported[SPDK_NVME_FEAT_INTERRUPT_COALESCING] = true;
893 	ctrlr->feature_supported[SPDK_NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION] = true;
894 	ctrlr->feature_supported[SPDK_NVME_FEAT_WRITE_ATOMICITY] = true;
895 	ctrlr->feature_supported[SPDK_NVME_FEAT_ASYNC_EVENT_CONFIGURATION] = true;
896 	/* Optional features */
897 	if (ctrlr->cdata.vwc.present) {
898 		ctrlr->feature_supported[SPDK_NVME_FEAT_VOLATILE_WRITE_CACHE] = true;
899 	}
900 	if (ctrlr->cdata.apsta.supported) {
901 		ctrlr->feature_supported[SPDK_NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION] = true;
902 	}
903 	if (ctrlr->cdata.hmpre) {
904 		ctrlr->feature_supported[SPDK_NVME_FEAT_HOST_MEM_BUFFER] = true;
905 	}
906 	if (ctrlr->cdata.vid == SPDK_PCI_VID_INTEL) {
907 		nvme_ctrlr_set_intel_supported_features(ctrlr);
908 	}
909 
910 	nvme_ctrlr_set_arbitration_feature(ctrlr);
911 }
912 
913 bool
914 spdk_nvme_ctrlr_is_failed(struct spdk_nvme_ctrlr *ctrlr)
915 {
916 	return ctrlr->is_failed;
917 }
918 
919 void
920 nvme_ctrlr_fail(struct spdk_nvme_ctrlr *ctrlr, bool hot_remove)
921 {
922 	/*
923 	 * Set the flag here and leave the work failure of qpairs to
924 	 * spdk_nvme_qpair_process_completions().
925 	 */
926 	if (hot_remove) {
927 		ctrlr->is_removed = true;
928 	}
929 
930 	if (ctrlr->is_failed) {
931 		NVME_CTRLR_NOTICELOG(ctrlr, "already in failed state\n");
932 		return;
933 	}
934 
935 	ctrlr->is_failed = true;
936 	nvme_transport_ctrlr_disconnect_qpair(ctrlr, ctrlr->adminq);
937 	NVME_CTRLR_ERRLOG(ctrlr, "in failed state.\n");
938 }
939 
940 /**
941  * This public API function will try to take the controller lock.
942  * Any private functions being called from a thread already holding
943  * the ctrlr lock should call nvme_ctrlr_fail directly.
944  */
945 void
946 spdk_nvme_ctrlr_fail(struct spdk_nvme_ctrlr *ctrlr)
947 {
948 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
949 	nvme_ctrlr_fail(ctrlr, false);
950 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
951 }
952 
953 static void
954 nvme_ctrlr_shutdown_async(struct spdk_nvme_ctrlr *ctrlr,
955 			  struct nvme_ctrlr_detach_ctx *ctx)
956 {
957 	union spdk_nvme_cc_register	cc;
958 
959 	if (ctrlr->is_removed) {
960 		ctx->shutdown_complete = true;
961 		return;
962 	}
963 
964 	if (nvme_ctrlr_get_cc(ctrlr, &cc)) {
965 		NVME_CTRLR_ERRLOG(ctrlr, "get_cc() failed\n");
966 		ctx->shutdown_complete = true;
967 		return;
968 	}
969 
970 	cc.bits.shn = SPDK_NVME_SHN_NORMAL;
971 
972 	if (nvme_ctrlr_set_cc(ctrlr, &cc)) {
973 		NVME_CTRLR_ERRLOG(ctrlr, "set_cc() failed\n");
974 		ctx->shutdown_complete = true;
975 		return;
976 	}
977 
978 	/*
979 	 * The NVMe specification defines RTD3E to be the time between
980 	 *  setting SHN = 1 until the controller will set SHST = 10b.
981 	 * If the device doesn't report RTD3 entry latency, or if it
982 	 *  reports RTD3 entry latency less than 10 seconds, pick
983 	 *  10 seconds as a reasonable amount of time to
984 	 *  wait before proceeding.
985 	 */
986 	NVME_CTRLR_DEBUGLOG(ctrlr, "RTD3E = %" PRIu32 " us\n", ctrlr->cdata.rtd3e);
987 	ctx->shutdown_timeout_ms = SPDK_CEIL_DIV(ctrlr->cdata.rtd3e, 1000);
988 	ctx->shutdown_timeout_ms = spdk_max(ctx->shutdown_timeout_ms, 10000);
989 	NVME_CTRLR_DEBUGLOG(ctrlr, "shutdown timeout = %" PRIu32 " ms\n", ctx->shutdown_timeout_ms);
990 
991 	ctx->shutdown_start_tsc = spdk_get_ticks();
992 }
993 
994 static int
995 nvme_ctrlr_shutdown_poll_async(struct spdk_nvme_ctrlr *ctrlr,
996 			       struct nvme_ctrlr_detach_ctx *ctx)
997 {
998 	union spdk_nvme_csts_register	csts;
999 	uint32_t			ms_waited;
1000 
1001 	ms_waited = (spdk_get_ticks() - ctx->shutdown_start_tsc) * 1000 / spdk_get_ticks_hz();
1002 
1003 	if (nvme_ctrlr_get_csts(ctrlr, &csts)) {
1004 		NVME_CTRLR_ERRLOG(ctrlr, "get_csts() failed\n");
1005 		return -EIO;
1006 	}
1007 
1008 	if (csts.bits.shst == SPDK_NVME_SHST_COMPLETE) {
1009 		NVME_CTRLR_DEBUGLOG(ctrlr, "shutdown complete in %u milliseconds\n", ms_waited);
1010 		return 0;
1011 	}
1012 
1013 	if (ms_waited < ctx->shutdown_timeout_ms) {
1014 		return -EAGAIN;
1015 	}
1016 
1017 	NVME_CTRLR_ERRLOG(ctrlr, "did not shutdown within %u milliseconds\n",
1018 			  ctx->shutdown_timeout_ms);
1019 	if (ctrlr->quirks & NVME_QUIRK_SHST_COMPLETE) {
1020 		NVME_CTRLR_ERRLOG(ctrlr, "likely due to shutdown handling in the VMWare emulated NVMe SSD\n");
1021 	}
1022 
1023 	return 0;
1024 }
1025 
1026 static int
1027 nvme_ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr)
1028 {
1029 	union spdk_nvme_cc_register	cc;
1030 	int				rc;
1031 
1032 	rc = nvme_transport_ctrlr_enable(ctrlr);
1033 	if (rc != 0) {
1034 		NVME_CTRLR_ERRLOG(ctrlr, "transport ctrlr_enable failed\n");
1035 		return rc;
1036 	}
1037 
1038 	if (nvme_ctrlr_get_cc(ctrlr, &cc)) {
1039 		NVME_CTRLR_ERRLOG(ctrlr, "get_cc() failed\n");
1040 		return -EIO;
1041 	}
1042 
1043 	if (cc.bits.en != 0) {
1044 		NVME_CTRLR_ERRLOG(ctrlr, "called with CC.EN = 1\n");
1045 		return -EINVAL;
1046 	}
1047 
1048 	cc.bits.en = 1;
1049 	cc.bits.css = 0;
1050 	cc.bits.shn = 0;
1051 	cc.bits.iosqes = 6; /* SQ entry size == 64 == 2^6 */
1052 	cc.bits.iocqes = 4; /* CQ entry size == 16 == 2^4 */
1053 
1054 	/* Page size is 2 ^ (12 + mps). */
1055 	cc.bits.mps = spdk_u32log2(ctrlr->page_size) - 12;
1056 
1057 	/*
1058 	 * Since NVMe 1.0, a controller should have at least one bit set in CAP.CSS.
1059 	 * A controller that does not have any bit set in CAP.CSS is not spec compliant.
1060 	 * Try to support such a controller regardless.
1061 	 */
1062 	if (ctrlr->cap.bits.css == 0) {
1063 		NVME_CTRLR_INFOLOG(ctrlr, "Drive reports no command sets supported. Assuming NVM is supported.\n");
1064 		ctrlr->cap.bits.css = SPDK_NVME_CAP_CSS_NVM;
1065 	}
1066 
1067 	/*
1068 	 * If the user did not explicitly request a command set, or supplied a value larger than
1069 	 * what can be saved in CC.CSS, use the most reasonable default.
1070 	 */
1071 	if (ctrlr->opts.command_set >= CHAR_BIT) {
1072 		if (ctrlr->cap.bits.css & SPDK_NVME_CAP_CSS_IOCS) {
1073 			ctrlr->opts.command_set = SPDK_NVME_CC_CSS_IOCS;
1074 		} else if (ctrlr->cap.bits.css & SPDK_NVME_CAP_CSS_NVM) {
1075 			ctrlr->opts.command_set = SPDK_NVME_CC_CSS_NVM;
1076 		} else if (ctrlr->cap.bits.css & SPDK_NVME_CAP_CSS_NOIO) {
1077 			ctrlr->opts.command_set = SPDK_NVME_CC_CSS_NOIO;
1078 		} else {
1079 			/* Invalid supported bits detected, falling back to NVM. */
1080 			ctrlr->opts.command_set = SPDK_NVME_CC_CSS_NVM;
1081 		}
1082 	}
1083 
1084 	/* Verify that the selected command set is supported by the controller. */
1085 	if (!(ctrlr->cap.bits.css & (1u << ctrlr->opts.command_set))) {
1086 		NVME_CTRLR_DEBUGLOG(ctrlr, "Requested I/O command set %u but supported mask is 0x%x\n",
1087 				    ctrlr->opts.command_set, ctrlr->cap.bits.css);
1088 		NVME_CTRLR_DEBUGLOG(ctrlr, "Falling back to NVM. Assuming NVM is supported.\n");
1089 		ctrlr->opts.command_set = SPDK_NVME_CC_CSS_NVM;
1090 	}
1091 
1092 	cc.bits.css = ctrlr->opts.command_set;
1093 
1094 	switch (ctrlr->opts.arb_mechanism) {
1095 	case SPDK_NVME_CC_AMS_RR:
1096 		break;
1097 	case SPDK_NVME_CC_AMS_WRR:
1098 		if (SPDK_NVME_CAP_AMS_WRR & ctrlr->cap.bits.ams) {
1099 			break;
1100 		}
1101 		return -EINVAL;
1102 	case SPDK_NVME_CC_AMS_VS:
1103 		if (SPDK_NVME_CAP_AMS_VS & ctrlr->cap.bits.ams) {
1104 			break;
1105 		}
1106 		return -EINVAL;
1107 	default:
1108 		return -EINVAL;
1109 	}
1110 
1111 	cc.bits.ams = ctrlr->opts.arb_mechanism;
1112 
1113 	if (nvme_ctrlr_set_cc(ctrlr, &cc)) {
1114 		NVME_CTRLR_ERRLOG(ctrlr, "set_cc() failed\n");
1115 		return -EIO;
1116 	}
1117 
1118 	return 0;
1119 }
1120 
1121 static int
1122 nvme_ctrlr_disable(struct spdk_nvme_ctrlr *ctrlr)
1123 {
1124 	union spdk_nvme_cc_register	cc;
1125 
1126 	if (nvme_ctrlr_get_cc(ctrlr, &cc)) {
1127 		NVME_CTRLR_ERRLOG(ctrlr, "get_cc() failed\n");
1128 		return -EIO;
1129 	}
1130 
1131 	if (cc.bits.en == 0) {
1132 		return 0;
1133 	}
1134 
1135 	cc.bits.en = 0;
1136 
1137 	if (nvme_ctrlr_set_cc(ctrlr, &cc)) {
1138 		NVME_CTRLR_ERRLOG(ctrlr, "set_cc() failed\n");
1139 		return -EIO;
1140 	}
1141 
1142 	return 0;
1143 }
1144 
1145 #ifdef DEBUG
1146 static const char *
1147 nvme_ctrlr_state_string(enum nvme_ctrlr_state state)
1148 {
1149 	switch (state) {
1150 	case NVME_CTRLR_STATE_INIT_DELAY:
1151 		return "delay init";
1152 	case NVME_CTRLR_STATE_CONNECT_ADMINQ:
1153 		return "connect adminq";
1154 	case NVME_CTRLR_STATE_READ_VS:
1155 		return "read vs";
1156 	case NVME_CTRLR_STATE_READ_CAP:
1157 		return "read cap";
1158 	case NVME_CTRLR_STATE_CHECK_EN:
1159 		return "check en";
1160 	case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1:
1161 		return "disable and wait for CSTS.RDY = 1";
1162 	case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0:
1163 		return "disable and wait for CSTS.RDY = 0";
1164 	case NVME_CTRLR_STATE_ENABLE:
1165 		return "enable controller by writing CC.EN = 1";
1166 	case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1:
1167 		return "wait for CSTS.RDY = 1";
1168 	case NVME_CTRLR_STATE_RESET_ADMIN_QUEUE:
1169 		return "reset admin queue";
1170 	case NVME_CTRLR_STATE_IDENTIFY:
1171 		return "identify controller";
1172 	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY:
1173 		return "wait for identify controller";
1174 	case NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC:
1175 		return "identify controller iocs specific";
1176 	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_IOCS_SPECIFIC:
1177 		return "wait for identify controller iocs specific";
1178 	case NVME_CTRLR_STATE_GET_ZNS_CMD_EFFECTS_LOG:
1179 		return "get zns cmd and effects log page";
1180 	case NVME_CTRLR_STATE_WAIT_FOR_GET_ZNS_CMD_EFFECTS_LOG:
1181 		return "wait for get zns cmd and effects log page";
1182 	case NVME_CTRLR_STATE_SET_NUM_QUEUES:
1183 		return "set number of queues";
1184 	case NVME_CTRLR_STATE_WAIT_FOR_SET_NUM_QUEUES:
1185 		return "wait for set number of queues";
1186 	case NVME_CTRLR_STATE_IDENTIFY_ACTIVE_NS:
1187 		return "identify active ns";
1188 	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ACTIVE_NS:
1189 		return "wait for identify active ns";
1190 	case NVME_CTRLR_STATE_CONSTRUCT_NS:
1191 		return "construct namespaces";
1192 	case NVME_CTRLR_STATE_IDENTIFY_NS:
1193 		return "identify ns";
1194 	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS:
1195 		return "wait for identify ns";
1196 	case NVME_CTRLR_STATE_IDENTIFY_ID_DESCS:
1197 		return "identify namespace id descriptors";
1198 	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ID_DESCS:
1199 		return "wait for identify namespace id descriptors";
1200 	case NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC:
1201 		return "identify ns iocs specific";
1202 	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS_IOCS_SPECIFIC:
1203 		return "wait for identify ns iocs specific";
1204 	case NVME_CTRLR_STATE_CONFIGURE_AER:
1205 		return "configure AER";
1206 	case NVME_CTRLR_STATE_WAIT_FOR_CONFIGURE_AER:
1207 		return "wait for configure aer";
1208 	case NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES:
1209 		return "set supported log pages";
1210 	case NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES:
1211 		return "set supported features";
1212 	case NVME_CTRLR_STATE_SET_DB_BUF_CFG:
1213 		return "set doorbell buffer config";
1214 	case NVME_CTRLR_STATE_WAIT_FOR_DB_BUF_CFG:
1215 		return "wait for doorbell buffer config";
1216 	case NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT:
1217 		return "set keep alive timeout";
1218 	case NVME_CTRLR_STATE_WAIT_FOR_KEEP_ALIVE_TIMEOUT:
1219 		return "wait for set keep alive timeout";
1220 	case NVME_CTRLR_STATE_SET_HOST_ID:
1221 		return "set host ID";
1222 	case NVME_CTRLR_STATE_WAIT_FOR_HOST_ID:
1223 		return "wait for set host ID";
1224 	case NVME_CTRLR_STATE_READY:
1225 		return "ready";
1226 	case NVME_CTRLR_STATE_ERROR:
1227 		return "error";
1228 	}
1229 	return "unknown";
1230 };
1231 #endif /* DEBUG */
1232 
1233 static void
1234 nvme_ctrlr_set_state(struct spdk_nvme_ctrlr *ctrlr, enum nvme_ctrlr_state state,
1235 		     uint64_t timeout_in_ms)
1236 {
1237 	uint64_t ticks_per_ms, timeout_in_ticks, now_ticks;
1238 
1239 	ctrlr->state = state;
1240 	if (timeout_in_ms == NVME_TIMEOUT_INFINITE) {
1241 		goto inf;
1242 	}
1243 
1244 	ticks_per_ms = spdk_get_ticks_hz() / 1000;
1245 	if (timeout_in_ms > UINT64_MAX / ticks_per_ms) {
1246 		NVME_CTRLR_ERRLOG(ctrlr,
1247 				  "Specified timeout would cause integer overflow. Defaulting to no timeout.\n");
1248 		goto inf;
1249 	}
1250 
1251 	now_ticks = spdk_get_ticks();
1252 	timeout_in_ticks = timeout_in_ms * ticks_per_ms;
1253 	if (timeout_in_ticks > UINT64_MAX - now_ticks) {
1254 		NVME_CTRLR_ERRLOG(ctrlr,
1255 				  "Specified timeout would cause integer overflow. Defaulting to no timeout.\n");
1256 		goto inf;
1257 	}
1258 
1259 	ctrlr->state_timeout_tsc = timeout_in_ticks + now_ticks;
1260 	NVME_CTRLR_DEBUGLOG(ctrlr, "setting state to %s (timeout %" PRIu64 " ms)\n",
1261 			    nvme_ctrlr_state_string(ctrlr->state), timeout_in_ms);
1262 	return;
1263 inf:
1264 	NVME_CTRLR_DEBUGLOG(ctrlr, "setting state to %s (no timeout)\n",
1265 			    nvme_ctrlr_state_string(ctrlr->state));
1266 	ctrlr->state_timeout_tsc = NVME_TIMEOUT_INFINITE;
1267 }
1268 
1269 static void
1270 nvme_ctrlr_free_zns_specific_data(struct spdk_nvme_ctrlr *ctrlr)
1271 {
1272 	spdk_free(ctrlr->cdata_zns);
1273 	ctrlr->cdata_zns = NULL;
1274 }
1275 
1276 static void
1277 nvme_ctrlr_free_iocs_specific_data(struct spdk_nvme_ctrlr *ctrlr)
1278 {
1279 	nvme_ctrlr_free_zns_specific_data(ctrlr);
1280 }
1281 
1282 static void
1283 nvme_ctrlr_free_doorbell_buffer(struct spdk_nvme_ctrlr *ctrlr)
1284 {
1285 	if (ctrlr->shadow_doorbell) {
1286 		spdk_free(ctrlr->shadow_doorbell);
1287 		ctrlr->shadow_doorbell = NULL;
1288 	}
1289 
1290 	if (ctrlr->eventidx) {
1291 		spdk_free(ctrlr->eventidx);
1292 		ctrlr->eventidx = NULL;
1293 	}
1294 }
1295 
1296 static void
1297 nvme_ctrlr_set_doorbell_buffer_config_done(void *arg, const struct spdk_nvme_cpl *cpl)
1298 {
1299 	struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg;
1300 
1301 	if (spdk_nvme_cpl_is_error(cpl)) {
1302 		NVME_CTRLR_WARNLOG(ctrlr, "Doorbell buffer config failed\n");
1303 	} else {
1304 		NVME_CTRLR_INFOLOG(ctrlr, "Doorbell buffer config enabled\n");
1305 	}
1306 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT,
1307 			     ctrlr->opts.admin_timeout_ms);
1308 }
1309 
1310 static int
1311 nvme_ctrlr_set_doorbell_buffer_config(struct spdk_nvme_ctrlr *ctrlr)
1312 {
1313 	int rc = 0;
1314 	uint64_t prp1, prp2, len;
1315 
1316 	if (!ctrlr->cdata.oacs.doorbell_buffer_config) {
1317 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT,
1318 				     ctrlr->opts.admin_timeout_ms);
1319 		return 0;
1320 	}
1321 
1322 	if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) {
1323 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT,
1324 				     ctrlr->opts.admin_timeout_ms);
1325 		return 0;
1326 	}
1327 
1328 	/* only 1 page size for doorbell buffer */
1329 	ctrlr->shadow_doorbell = spdk_zmalloc(ctrlr->page_size, ctrlr->page_size,
1330 					      NULL, SPDK_ENV_LCORE_ID_ANY,
1331 					      SPDK_MALLOC_DMA | SPDK_MALLOC_SHARE);
1332 	if (ctrlr->shadow_doorbell == NULL) {
1333 		rc = -ENOMEM;
1334 		goto error;
1335 	}
1336 
1337 	len = ctrlr->page_size;
1338 	prp1 = spdk_vtophys(ctrlr->shadow_doorbell, &len);
1339 	if (prp1 == SPDK_VTOPHYS_ERROR || len != ctrlr->page_size) {
1340 		rc = -EFAULT;
1341 		goto error;
1342 	}
1343 
1344 	ctrlr->eventidx = spdk_zmalloc(ctrlr->page_size, ctrlr->page_size,
1345 				       NULL, SPDK_ENV_LCORE_ID_ANY,
1346 				       SPDK_MALLOC_DMA | SPDK_MALLOC_SHARE);
1347 	if (ctrlr->eventidx == NULL) {
1348 		rc = -ENOMEM;
1349 		goto error;
1350 	}
1351 
1352 	len = ctrlr->page_size;
1353 	prp2 = spdk_vtophys(ctrlr->eventidx, &len);
1354 	if (prp2 == SPDK_VTOPHYS_ERROR || len != ctrlr->page_size) {
1355 		rc = -EFAULT;
1356 		goto error;
1357 	}
1358 
1359 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_DB_BUF_CFG,
1360 			     ctrlr->opts.admin_timeout_ms);
1361 
1362 	rc = nvme_ctrlr_cmd_doorbell_buffer_config(ctrlr, prp1, prp2,
1363 			nvme_ctrlr_set_doorbell_buffer_config_done, ctrlr);
1364 	if (rc != 0) {
1365 		goto error;
1366 	}
1367 
1368 	return 0;
1369 
1370 error:
1371 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
1372 	nvme_ctrlr_free_doorbell_buffer(ctrlr);
1373 	return rc;
1374 }
1375 
1376 static void
1377 nvme_ctrlr_abort_queued_aborts(struct spdk_nvme_ctrlr *ctrlr)
1378 {
1379 	struct nvme_request	*req, *tmp;
1380 	struct spdk_nvme_cpl	cpl = {};
1381 
1382 	cpl.status.sc = SPDK_NVME_SC_ABORTED_SQ_DELETION;
1383 	cpl.status.sct = SPDK_NVME_SCT_GENERIC;
1384 
1385 	STAILQ_FOREACH_SAFE(req, &ctrlr->queued_aborts, stailq, tmp) {
1386 		STAILQ_REMOVE_HEAD(&ctrlr->queued_aborts, stailq);
1387 
1388 		nvme_complete_request(req->cb_fn, req->cb_arg, req->qpair, req, &cpl);
1389 		nvme_free_request(req);
1390 	}
1391 }
1392 
1393 int
1394 spdk_nvme_ctrlr_reset(struct spdk_nvme_ctrlr *ctrlr)
1395 {
1396 	int rc = 0, rc_tmp = 0;
1397 	struct spdk_nvme_qpair	*qpair;
1398 
1399 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
1400 
1401 	if (ctrlr->is_resetting || ctrlr->is_removed) {
1402 		/*
1403 		 * Controller is already resetting or has been removed. Return
1404 		 *  immediately since there is no need to kick off another
1405 		 *  reset in these cases.
1406 		 */
1407 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
1408 		return ctrlr->is_resetting ? 0 : -ENXIO;
1409 	}
1410 
1411 	ctrlr->is_resetting = true;
1412 	ctrlr->is_failed = false;
1413 
1414 	NVME_CTRLR_NOTICELOG(ctrlr, "resetting controller\n");
1415 
1416 	/* Abort all of the queued abort requests */
1417 	nvme_ctrlr_abort_queued_aborts(ctrlr);
1418 
1419 	nvme_transport_admin_qpair_abort_aers(ctrlr->adminq);
1420 
1421 	/* Disable all queues before disabling the controller hardware. */
1422 	TAILQ_FOREACH(qpair, &ctrlr->active_io_qpairs, tailq) {
1423 		qpair->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_LOCAL;
1424 	}
1425 
1426 	ctrlr->adminq->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_LOCAL;
1427 	nvme_transport_ctrlr_disconnect_qpair(ctrlr, ctrlr->adminq);
1428 
1429 	/* Doorbell buffer config is invalid during reset */
1430 	nvme_ctrlr_free_doorbell_buffer(ctrlr);
1431 
1432 	/* I/O Command Set Specific Identify Controller data is invalidated during reset */
1433 	nvme_ctrlr_free_iocs_specific_data(ctrlr);
1434 
1435 	spdk_bit_array_free(&ctrlr->free_io_qids);
1436 
1437 	/* Set the state back to INIT to cause a full hardware reset. */
1438 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, NVME_TIMEOUT_INFINITE);
1439 
1440 	while (ctrlr->state != NVME_CTRLR_STATE_READY) {
1441 		if (nvme_ctrlr_process_init(ctrlr) != 0) {
1442 			NVME_CTRLR_ERRLOG(ctrlr, "controller reinitialization failed\n");
1443 			rc = -1;
1444 			break;
1445 		}
1446 	}
1447 
1448 	/*
1449 	 * For non-fabrics controllers, the memory locations of the transport qpair
1450 	 * don't change when the controller is reset. They simply need to be
1451 	 * re-enabled with admin commands to the controller. For fabric
1452 	 * controllers we need to disconnect and reconnect the qpair on its
1453 	 * own thread outside of the context of the reset.
1454 	 */
1455 	if (rc == 0 && !spdk_nvme_ctrlr_is_fabrics(ctrlr)) {
1456 		/* Reinitialize qpairs */
1457 		TAILQ_FOREACH(qpair, &ctrlr->active_io_qpairs, tailq) {
1458 			assert(spdk_bit_array_get(ctrlr->free_io_qids, qpair->id));
1459 			spdk_bit_array_clear(ctrlr->free_io_qids, qpair->id);
1460 			rc_tmp = nvme_transport_ctrlr_connect_qpair(ctrlr, qpair);
1461 			if (rc_tmp != 0) {
1462 				rc = rc_tmp;
1463 				qpair->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_LOCAL;
1464 				continue;
1465 			}
1466 		}
1467 	}
1468 
1469 	if (rc) {
1470 		nvme_ctrlr_fail(ctrlr, false);
1471 	}
1472 	ctrlr->is_resetting = false;
1473 
1474 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
1475 
1476 	if (!ctrlr->cdata.oaes.ns_attribute_notices) {
1477 		/*
1478 		 * If controller doesn't support ns_attribute_notices and
1479 		 * namespace attributes change (e.g. number of namespaces)
1480 		 * we need to update system handling device reset.
1481 		 */
1482 		nvme_io_msg_ctrlr_update(ctrlr);
1483 	}
1484 
1485 	return rc;
1486 }
1487 
1488 int
1489 spdk_nvme_ctrlr_reset_subsystem(struct spdk_nvme_ctrlr *ctrlr)
1490 {
1491 	union spdk_nvme_cap_register cap;
1492 	int rc = 0;
1493 
1494 	cap = spdk_nvme_ctrlr_get_regs_cap(ctrlr);
1495 	if (cap.bits.nssrs == 0) {
1496 		NVME_CTRLR_WARNLOG(ctrlr, "subsystem reset is not supported\n");
1497 		return -ENOTSUP;
1498 	}
1499 
1500 	NVME_CTRLR_NOTICELOG(ctrlr, "resetting subsystem\n");
1501 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
1502 	ctrlr->is_resetting = true;
1503 	rc = nvme_ctrlr_set_nssr(ctrlr, SPDK_NVME_NSSR_VALUE);
1504 	ctrlr->is_resetting = false;
1505 
1506 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
1507 	/*
1508 	 * No more cleanup at this point like in the ctrlr reset. A subsystem reset will cause
1509 	 * a hot remove for PCIe transport. The hot remove handling does all the necessary ctrlr cleanup.
1510 	 */
1511 	return rc;
1512 }
1513 
1514 int
1515 spdk_nvme_ctrlr_set_trid(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_transport_id *trid)
1516 {
1517 	int rc = 0;
1518 
1519 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
1520 
1521 	if (ctrlr->is_failed == false) {
1522 		rc = -EPERM;
1523 		goto out;
1524 	}
1525 
1526 	if (trid->trtype != ctrlr->trid.trtype) {
1527 		rc = -EINVAL;
1528 		goto out;
1529 	}
1530 
1531 	if (strncmp(trid->subnqn, ctrlr->trid.subnqn, SPDK_NVMF_NQN_MAX_LEN)) {
1532 		rc = -EINVAL;
1533 		goto out;
1534 	}
1535 
1536 	ctrlr->trid = *trid;
1537 
1538 out:
1539 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
1540 	return rc;
1541 }
1542 
1543 void
1544 spdk_nvme_ctrlr_set_remove_cb(struct spdk_nvme_ctrlr *ctrlr,
1545 			      spdk_nvme_remove_cb remove_cb, void *remove_ctx)
1546 {
1547 	if (!spdk_process_is_primary()) {
1548 		return;
1549 	}
1550 
1551 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
1552 	ctrlr->remove_cb = remove_cb;
1553 	ctrlr->cb_ctx = remove_ctx;
1554 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
1555 }
1556 
1557 static void
1558 nvme_ctrlr_identify_done(void *arg, const struct spdk_nvme_cpl *cpl)
1559 {
1560 	struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg;
1561 
1562 	if (spdk_nvme_cpl_is_error(cpl)) {
1563 		NVME_CTRLR_ERRLOG(ctrlr, "nvme_identify_controller failed!\n");
1564 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
1565 		return;
1566 	}
1567 
1568 	/*
1569 	 * Use MDTS to ensure our default max_xfer_size doesn't exceed what the
1570 	 *  controller supports.
1571 	 */
1572 	ctrlr->max_xfer_size = nvme_transport_ctrlr_get_max_xfer_size(ctrlr);
1573 	NVME_CTRLR_DEBUGLOG(ctrlr, "transport max_xfer_size %u\n", ctrlr->max_xfer_size);
1574 	if (ctrlr->cdata.mdts > 0) {
1575 		ctrlr->max_xfer_size = spdk_min(ctrlr->max_xfer_size,
1576 						ctrlr->min_page_size * (1 << ctrlr->cdata.mdts));
1577 		NVME_CTRLR_DEBUGLOG(ctrlr, "MDTS max_xfer_size %u\n", ctrlr->max_xfer_size);
1578 	}
1579 
1580 	NVME_CTRLR_DEBUGLOG(ctrlr, "CNTLID 0x%04" PRIx16 "\n", ctrlr->cdata.cntlid);
1581 	if (ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
1582 		ctrlr->cntlid = ctrlr->cdata.cntlid;
1583 	} else {
1584 		/*
1585 		 * Fabrics controllers should already have CNTLID from the Connect command.
1586 		 *
1587 		 * If CNTLID from Connect doesn't match CNTLID in the Identify Controller data,
1588 		 * trust the one from Connect.
1589 		 */
1590 		if (ctrlr->cntlid != ctrlr->cdata.cntlid) {
1591 			NVME_CTRLR_DEBUGLOG(ctrlr, "Identify CNTLID 0x%04" PRIx16 " != Connect CNTLID 0x%04" PRIx16 "\n",
1592 					    ctrlr->cdata.cntlid, ctrlr->cntlid);
1593 		}
1594 	}
1595 
1596 	if (ctrlr->cdata.sgls.supported) {
1597 		assert(ctrlr->cdata.sgls.supported != 0x3);
1598 		ctrlr->flags |= SPDK_NVME_CTRLR_SGL_SUPPORTED;
1599 		if (ctrlr->cdata.sgls.supported == 0x2) {
1600 			ctrlr->flags |= SPDK_NVME_CTRLR_SGL_REQUIRES_DWORD_ALIGNMENT;
1601 		}
1602 		/*
1603 		 * Use MSDBD to ensure our max_sges doesn't exceed what the
1604 		 *  controller supports.
1605 		 */
1606 		ctrlr->max_sges = nvme_transport_ctrlr_get_max_sges(ctrlr);
1607 		if (ctrlr->cdata.nvmf_specific.msdbd != 0) {
1608 			ctrlr->max_sges = spdk_min(ctrlr->cdata.nvmf_specific.msdbd, ctrlr->max_sges);
1609 		} else {
1610 			/* A value 0 indicates no limit. */
1611 		}
1612 		NVME_CTRLR_DEBUGLOG(ctrlr, "transport max_sges %u\n", ctrlr->max_sges);
1613 	}
1614 
1615 	if (ctrlr->cdata.oacs.security && !(ctrlr->quirks & NVME_QUIRK_OACS_SECURITY)) {
1616 		ctrlr->flags |= SPDK_NVME_CTRLR_SECURITY_SEND_RECV_SUPPORTED;
1617 	}
1618 
1619 	if (ctrlr->cdata.oacs.directives) {
1620 		ctrlr->flags |= SPDK_NVME_CTRLR_DIRECTIVES_SUPPORTED;
1621 	}
1622 
1623 	NVME_CTRLR_DEBUGLOG(ctrlr, "fuses compare and write: %d\n",
1624 			    ctrlr->cdata.fuses.compare_and_write);
1625 	if (ctrlr->cdata.fuses.compare_and_write) {
1626 		ctrlr->flags |= SPDK_NVME_CTRLR_COMPARE_AND_WRITE_SUPPORTED;
1627 	}
1628 
1629 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC,
1630 			     ctrlr->opts.admin_timeout_ms);
1631 }
1632 
1633 static int
1634 nvme_ctrlr_identify(struct spdk_nvme_ctrlr *ctrlr)
1635 {
1636 	int	rc;
1637 
1638 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY,
1639 			     ctrlr->opts.admin_timeout_ms);
1640 
1641 	rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_CTRLR, 0, 0, 0,
1642 				     &ctrlr->cdata, sizeof(ctrlr->cdata),
1643 				     nvme_ctrlr_identify_done, ctrlr);
1644 	if (rc != 0) {
1645 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
1646 		return rc;
1647 	}
1648 
1649 	return 0;
1650 }
1651 
1652 static void
1653 nvme_ctrlr_get_zns_cmd_and_effects_log_done(void *arg, const struct spdk_nvme_cpl *cpl)
1654 {
1655 	struct spdk_nvme_cmds_and_effect_log_page *log_page;
1656 	struct spdk_nvme_ctrlr *ctrlr = arg;
1657 
1658 	if (spdk_nvme_cpl_is_error(cpl)) {
1659 		NVME_CTRLR_ERRLOG(ctrlr, "nvme_ctrlr_get_zns_cmd_and_effects_log failed!\n");
1660 		spdk_free(ctrlr->tmp_ptr);
1661 		ctrlr->tmp_ptr = NULL;
1662 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
1663 		return;
1664 	}
1665 
1666 	log_page = ctrlr->tmp_ptr;
1667 
1668 	if (log_page->io_cmds_supported[SPDK_NVME_OPC_ZONE_APPEND].csupp) {
1669 		ctrlr->flags |= SPDK_NVME_CTRLR_ZONE_APPEND_SUPPORTED;
1670 	}
1671 	spdk_free(ctrlr->tmp_ptr);
1672 	ctrlr->tmp_ptr = NULL;
1673 
1674 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_NUM_QUEUES, ctrlr->opts.admin_timeout_ms);
1675 }
1676 
1677 static int
1678 nvme_ctrlr_get_zns_cmd_and_effects_log(struct spdk_nvme_ctrlr *ctrlr)
1679 {
1680 	int rc;
1681 
1682 	assert(!ctrlr->tmp_ptr);
1683 	ctrlr->tmp_ptr = spdk_zmalloc(sizeof(struct spdk_nvme_cmds_and_effect_log_page), 64, NULL,
1684 				      SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE | SPDK_MALLOC_DMA);
1685 	if (!ctrlr->tmp_ptr) {
1686 		rc = -ENOMEM;
1687 		goto error;
1688 	}
1689 
1690 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_GET_ZNS_CMD_EFFECTS_LOG,
1691 			     ctrlr->opts.admin_timeout_ms);
1692 
1693 	rc = spdk_nvme_ctrlr_cmd_get_log_page_ext(ctrlr, SPDK_NVME_LOG_COMMAND_EFFECTS_LOG,
1694 			0, ctrlr->tmp_ptr, sizeof(struct spdk_nvme_cmds_and_effect_log_page),
1695 			0, 0, 0, SPDK_NVME_CSI_ZNS << 24,
1696 			nvme_ctrlr_get_zns_cmd_and_effects_log_done, ctrlr);
1697 	if (rc != 0) {
1698 		goto error;
1699 	}
1700 
1701 	return 0;
1702 
1703 error:
1704 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
1705 	spdk_free(ctrlr->tmp_ptr);
1706 	ctrlr->tmp_ptr = NULL;
1707 	return rc;
1708 }
1709 
1710 static void
1711 nvme_ctrlr_identify_zns_specific_done(void *arg, const struct spdk_nvme_cpl *cpl)
1712 {
1713 	struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg;
1714 
1715 	if (spdk_nvme_cpl_is_error(cpl)) {
1716 		/* no need to print an error, the controller simply does not support ZNS */
1717 		nvme_ctrlr_free_zns_specific_data(ctrlr);
1718 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_NUM_QUEUES,
1719 				     ctrlr->opts.admin_timeout_ms);
1720 		return;
1721 	}
1722 
1723 	/* A zero zasl value means use mdts */
1724 	if (ctrlr->cdata_zns->zasl) {
1725 		uint32_t max_append = ctrlr->min_page_size * (1 << ctrlr->cdata_zns->zasl);
1726 		ctrlr->max_zone_append_size = spdk_min(ctrlr->max_xfer_size, max_append);
1727 	} else {
1728 		ctrlr->max_zone_append_size = ctrlr->max_xfer_size;
1729 	}
1730 
1731 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_GET_ZNS_CMD_EFFECTS_LOG,
1732 			     ctrlr->opts.admin_timeout_ms);
1733 }
1734 
1735 /**
1736  * This function will try to fetch the I/O Command Specific Controller data structure for
1737  * each I/O Command Set supported by SPDK.
1738  *
1739  * If an I/O Command Set is not supported by the controller, "Invalid Field in Command"
1740  * will be returned. Since we are fetching in a exploratively way, getting an error back
1741  * from the controller should not be treated as fatal.
1742  *
1743  * I/O Command Sets not supported by SPDK will be skipped (e.g. Key Value Command Set).
1744  *
1745  * I/O Command Sets without a IOCS specific data structure (i.e. a zero-filled IOCS specific
1746  * data structure) will be skipped (e.g. NVM Command Set, Key Value Command Set).
1747  */
1748 static int
1749 nvme_ctrlr_identify_iocs_specific(struct spdk_nvme_ctrlr *ctrlr)
1750 {
1751 	int	rc;
1752 
1753 	if (!nvme_ctrlr_multi_iocs_enabled(ctrlr)) {
1754 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_NUM_QUEUES,
1755 				     ctrlr->opts.admin_timeout_ms);
1756 		return 0;
1757 	}
1758 
1759 	/*
1760 	 * Since SPDK currently only needs to fetch a single Command Set, keep the code here,
1761 	 * instead of creating multiple NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC substates,
1762 	 * which would require additional functions and complexity for no good reason.
1763 	 */
1764 	assert(!ctrlr->cdata_zns);
1765 	ctrlr->cdata_zns = spdk_zmalloc(sizeof(*ctrlr->cdata_zns), 64, NULL, SPDK_ENV_SOCKET_ID_ANY,
1766 					SPDK_MALLOC_SHARE | SPDK_MALLOC_DMA);
1767 	if (!ctrlr->cdata_zns) {
1768 		rc = -ENOMEM;
1769 		goto error;
1770 	}
1771 
1772 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_IOCS_SPECIFIC,
1773 			     ctrlr->opts.admin_timeout_ms);
1774 
1775 	rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_CTRLR_IOCS, 0, 0, SPDK_NVME_CSI_ZNS,
1776 				     ctrlr->cdata_zns, sizeof(*ctrlr->cdata_zns),
1777 				     nvme_ctrlr_identify_zns_specific_done, ctrlr);
1778 	if (rc != 0) {
1779 		goto error;
1780 	}
1781 
1782 	return 0;
1783 
1784 error:
1785 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
1786 	nvme_ctrlr_free_zns_specific_data(ctrlr);
1787 	return rc;
1788 }
1789 
1790 enum nvme_active_ns_state {
1791 	NVME_ACTIVE_NS_STATE_IDLE,
1792 	NVME_ACTIVE_NS_STATE_PROCESSING,
1793 	NVME_ACTIVE_NS_STATE_DONE,
1794 	NVME_ACTIVE_NS_STATE_ERROR
1795 };
1796 
1797 typedef void (*nvme_active_ns_ctx_deleter)(struct nvme_active_ns_ctx *);
1798 
1799 struct nvme_active_ns_ctx {
1800 	struct spdk_nvme_ctrlr *ctrlr;
1801 	uint32_t page;
1802 	uint32_t next_nsid;
1803 	uint32_t *new_ns_list;
1804 	nvme_active_ns_ctx_deleter deleter;
1805 
1806 	enum nvme_active_ns_state state;
1807 };
1808 
1809 static struct nvme_active_ns_ctx *
1810 nvme_active_ns_ctx_create(struct spdk_nvme_ctrlr *ctrlr, nvme_active_ns_ctx_deleter deleter)
1811 {
1812 	struct nvme_active_ns_ctx *ctx;
1813 	uint32_t *new_ns_list = NULL;
1814 
1815 	ctx = calloc(1, sizeof(*ctx));
1816 	if (!ctx) {
1817 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate nvme_active_ns_ctx!\n");
1818 		return NULL;
1819 	}
1820 
1821 	new_ns_list = spdk_zmalloc(sizeof(struct spdk_nvme_ns_list), ctrlr->page_size,
1822 				   NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_SHARE);
1823 	if (!new_ns_list) {
1824 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate active_ns_list!\n");
1825 		free(ctx);
1826 		return NULL;
1827 	}
1828 
1829 	ctx->new_ns_list = new_ns_list;
1830 	ctx->ctrlr = ctrlr;
1831 	ctx->deleter = deleter;
1832 
1833 	return ctx;
1834 }
1835 
1836 static void
1837 nvme_active_ns_ctx_destroy(struct nvme_active_ns_ctx *ctx)
1838 {
1839 	spdk_free(ctx->new_ns_list);
1840 	free(ctx);
1841 }
1842 
1843 static void
1844 nvme_ctrlr_identify_active_ns_swap(struct spdk_nvme_ctrlr *ctrlr, uint32_t **new_ns_list)
1845 {
1846 	uint32_t max_active_ns_idx = 0;
1847 
1848 	while ((*new_ns_list)[max_active_ns_idx++]);
1849 	spdk_free(ctrlr->active_ns_list);
1850 	ctrlr->active_ns_list = *new_ns_list;
1851 	ctrlr->max_active_ns_idx = max_active_ns_idx;
1852 	*new_ns_list = NULL;
1853 }
1854 
1855 static void
1856 nvme_ctrlr_identify_active_ns_async_done(void *arg, const struct spdk_nvme_cpl *cpl)
1857 {
1858 	struct nvme_active_ns_ctx *ctx = arg;
1859 	uint32_t *new_ns_list = NULL;
1860 
1861 	if (spdk_nvme_cpl_is_error(cpl)) {
1862 		ctx->state = NVME_ACTIVE_NS_STATE_ERROR;
1863 		goto out;
1864 	}
1865 
1866 	ctx->next_nsid = ctx->new_ns_list[1024 * ctx->page + 1023];
1867 	if (ctx->next_nsid == 0) {
1868 		ctx->state = NVME_ACTIVE_NS_STATE_DONE;
1869 		goto out;
1870 	}
1871 
1872 	ctx->page++;
1873 	new_ns_list = spdk_realloc(ctx->new_ns_list,
1874 				   (ctx->page + 1) * sizeof(struct spdk_nvme_ns_list),
1875 				   ctx->ctrlr->page_size);
1876 	if (!new_ns_list) {
1877 		SPDK_ERRLOG("Failed to reallocate active_ns_list!\n");
1878 		ctx->state = NVME_ACTIVE_NS_STATE_ERROR;
1879 		goto out;
1880 	}
1881 
1882 	ctx->new_ns_list = new_ns_list;
1883 	nvme_ctrlr_identify_active_ns_async(ctx);
1884 	return;
1885 
1886 out:
1887 	if (ctx->deleter) {
1888 		ctx->deleter(ctx);
1889 	}
1890 }
1891 
1892 static void
1893 nvme_ctrlr_identify_active_ns_async(struct nvme_active_ns_ctx *ctx)
1894 {
1895 	struct spdk_nvme_ctrlr *ctrlr = ctx->ctrlr;
1896 	uint32_t i;
1897 	int rc;
1898 
1899 	if (ctrlr->cdata.nn == 0) {
1900 		ctx->state = NVME_ACTIVE_NS_STATE_DONE;
1901 		goto out;
1902 	}
1903 
1904 	assert(ctx->new_ns_list != NULL);
1905 
1906 	/*
1907 	 * If controller doesn't support active ns list CNS 0x02 dummy up
1908 	 * an active ns list, i.e. all namespaces report as active
1909 	 */
1910 	if (ctrlr->vs.raw < SPDK_NVME_VERSION(1, 1, 0) || ctrlr->quirks & NVME_QUIRK_IDENTIFY_CNS) {
1911 		uint32_t *new_ns_list;
1912 		uint32_t num_pages;
1913 
1914 		/*
1915 		 * Active NS list must always end with zero element.
1916 		 * So, we allocate for cdata.nn+1.
1917 		 */
1918 		num_pages = spdk_divide_round_up(ctrlr->cdata.nn + 1,
1919 						 sizeof(struct spdk_nvme_ns_list) / sizeof(new_ns_list[0]));
1920 		new_ns_list = spdk_realloc(ctx->new_ns_list,
1921 					   num_pages * sizeof(struct spdk_nvme_ns_list),
1922 					   ctx->ctrlr->page_size);
1923 		if (!new_ns_list) {
1924 			SPDK_ERRLOG("Failed to reallocate active_ns_list!\n");
1925 			ctx->state = NVME_ACTIVE_NS_STATE_ERROR;
1926 			goto out;
1927 		}
1928 
1929 		ctx->new_ns_list = new_ns_list;
1930 		ctx->new_ns_list[ctrlr->cdata.nn] = 0;
1931 		for (i = 0; i < ctrlr->cdata.nn; i++) {
1932 			ctx->new_ns_list[i] = i + 1;
1933 		}
1934 
1935 		ctx->state = NVME_ACTIVE_NS_STATE_DONE;
1936 		goto out;
1937 	}
1938 
1939 	ctx->state = NVME_ACTIVE_NS_STATE_PROCESSING;
1940 	rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_ACTIVE_NS_LIST, 0, ctx->next_nsid, 0,
1941 				     &ctx->new_ns_list[1024 * ctx->page], sizeof(struct spdk_nvme_ns_list),
1942 				     nvme_ctrlr_identify_active_ns_async_done, ctx);
1943 	if (rc != 0) {
1944 		ctx->state = NVME_ACTIVE_NS_STATE_ERROR;
1945 		goto out;
1946 	}
1947 
1948 	return;
1949 
1950 out:
1951 	if (ctx->deleter) {
1952 		ctx->deleter(ctx);
1953 	}
1954 }
1955 
1956 static void
1957 _nvme_active_ns_ctx_deleter(struct nvme_active_ns_ctx *ctx)
1958 {
1959 	struct spdk_nvme_ctrlr *ctrlr = ctx->ctrlr;
1960 
1961 	if (ctx->state == NVME_ACTIVE_NS_STATE_ERROR) {
1962 		nvme_ctrlr_destruct_namespaces(ctrlr);
1963 		nvme_active_ns_ctx_destroy(ctx);
1964 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
1965 		return;
1966 	}
1967 
1968 	assert(ctx->state == NVME_ACTIVE_NS_STATE_DONE);
1969 	nvme_ctrlr_identify_active_ns_swap(ctrlr, &ctx->new_ns_list);
1970 	nvme_active_ns_ctx_destroy(ctx);
1971 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CONSTRUCT_NS, ctrlr->opts.admin_timeout_ms);
1972 }
1973 
1974 static void
1975 _nvme_ctrlr_identify_active_ns(struct spdk_nvme_ctrlr *ctrlr)
1976 {
1977 	struct nvme_active_ns_ctx *ctx;
1978 
1979 	ctx = nvme_active_ns_ctx_create(ctrlr, _nvme_active_ns_ctx_deleter);
1980 	if (!ctx) {
1981 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
1982 		return;
1983 	}
1984 
1985 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ACTIVE_NS,
1986 			     ctrlr->opts.admin_timeout_ms);
1987 	nvme_ctrlr_identify_active_ns_async(ctx);
1988 }
1989 
1990 int
1991 nvme_ctrlr_identify_active_ns(struct spdk_nvme_ctrlr *ctrlr)
1992 {
1993 	struct nvme_active_ns_ctx *ctx;
1994 	int rc;
1995 
1996 	ctx = nvme_active_ns_ctx_create(ctrlr, NULL);
1997 	if (!ctx) {
1998 		return -ENOMEM;
1999 	}
2000 
2001 	nvme_ctrlr_identify_active_ns_async(ctx);
2002 	while (ctx->state == NVME_ACTIVE_NS_STATE_PROCESSING) {
2003 		rc = spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
2004 		if (rc < 0) {
2005 			ctx->state = NVME_ACTIVE_NS_STATE_ERROR;
2006 			break;
2007 		}
2008 	}
2009 
2010 	if (ctx->state == NVME_ACTIVE_NS_STATE_ERROR) {
2011 		nvme_active_ns_ctx_destroy(ctx);
2012 		return -ENXIO;
2013 	}
2014 
2015 	assert(ctx->state == NVME_ACTIVE_NS_STATE_DONE);
2016 	nvme_ctrlr_identify_active_ns_swap(ctrlr, &ctx->new_ns_list);
2017 	nvme_active_ns_ctx_destroy(ctx);
2018 
2019 	return 0;
2020 }
2021 
2022 static void
2023 nvme_ctrlr_identify_ns_async_done(void *arg, const struct spdk_nvme_cpl *cpl)
2024 {
2025 	struct spdk_nvme_ns *ns = (struct spdk_nvme_ns *)arg;
2026 	struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr;
2027 	uint32_t nsid;
2028 	int rc;
2029 
2030 	if (spdk_nvme_cpl_is_error(cpl)) {
2031 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
2032 		return;
2033 	}
2034 
2035 	nvme_ns_set_identify_data(ns);
2036 
2037 	/* move on to the next active NS */
2038 	nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, ns->id);
2039 	ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
2040 	if (ns == NULL) {
2041 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_ID_DESCS,
2042 				     ctrlr->opts.admin_timeout_ms);
2043 		return;
2044 	}
2045 	ns->ctrlr = ctrlr;
2046 	ns->id = nsid;
2047 
2048 	rc = nvme_ctrlr_identify_ns_async(ns);
2049 	if (rc) {
2050 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
2051 	}
2052 }
2053 
2054 static int
2055 nvme_ctrlr_identify_ns_async(struct spdk_nvme_ns *ns)
2056 {
2057 	struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr;
2058 	struct spdk_nvme_ns_data *nsdata;
2059 
2060 	nsdata = &ns->nsdata;
2061 
2062 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS,
2063 			     ctrlr->opts.admin_timeout_ms);
2064 	return nvme_ctrlr_cmd_identify(ns->ctrlr, SPDK_NVME_IDENTIFY_NS, 0, ns->id, 0,
2065 				       nsdata, sizeof(*nsdata),
2066 				       nvme_ctrlr_identify_ns_async_done, ns);
2067 }
2068 
2069 static int
2070 nvme_ctrlr_identify_namespaces(struct spdk_nvme_ctrlr *ctrlr)
2071 {
2072 	uint32_t nsid;
2073 	struct spdk_nvme_ns *ns;
2074 	int rc;
2075 
2076 	nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr);
2077 	ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
2078 	if (ns == NULL) {
2079 		/* No active NS, move on to the next state */
2080 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_ID_DESCS,
2081 				     ctrlr->opts.admin_timeout_ms);
2082 		return 0;
2083 	}
2084 
2085 	ns->ctrlr = ctrlr;
2086 	ns->id = nsid;
2087 
2088 	rc = nvme_ctrlr_identify_ns_async(ns);
2089 	if (rc) {
2090 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
2091 	}
2092 
2093 	return rc;
2094 }
2095 
2096 static int
2097 nvme_ctrlr_identify_namespaces_iocs_specific_next(struct spdk_nvme_ctrlr *ctrlr, uint32_t prev_nsid)
2098 {
2099 	uint32_t nsid;
2100 	struct spdk_nvme_ns *ns;
2101 	int rc;
2102 
2103 	if (!prev_nsid) {
2104 		nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr);
2105 	} else {
2106 		/* move on to the next active NS */
2107 		nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, prev_nsid);
2108 	}
2109 
2110 	ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
2111 	if (ns == NULL) {
2112 		/* No first/next active NS, move on to the next state */
2113 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CONFIGURE_AER,
2114 				     ctrlr->opts.admin_timeout_ms);
2115 		return 0;
2116 	}
2117 
2118 	/* loop until we find a ns which has (supported) iocs specific data */
2119 	while (!nvme_ns_has_supported_iocs_specific_data(ns)) {
2120 		nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, ns->id);
2121 		ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
2122 		if (ns == NULL) {
2123 			/* no namespace with (supported) iocs specific data found */
2124 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CONFIGURE_AER,
2125 					     ctrlr->opts.admin_timeout_ms);
2126 			return 0;
2127 		}
2128 	}
2129 
2130 	rc = nvme_ctrlr_identify_ns_iocs_specific_async(ns);
2131 	if (rc) {
2132 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
2133 	}
2134 
2135 	return rc;
2136 }
2137 
2138 static void
2139 nvme_ctrlr_identify_ns_zns_specific_async_done(void *arg, const struct spdk_nvme_cpl *cpl)
2140 {
2141 	struct spdk_nvme_ns *ns = (struct spdk_nvme_ns *)arg;
2142 	struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr;
2143 
2144 	if (spdk_nvme_cpl_is_error(cpl)) {
2145 		nvme_ns_free_zns_specific_data(ns);
2146 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
2147 		return;
2148 	}
2149 
2150 	nvme_ctrlr_identify_namespaces_iocs_specific_next(ctrlr, ns->id);
2151 }
2152 
2153 static int
2154 nvme_ctrlr_identify_ns_iocs_specific_async(struct spdk_nvme_ns *ns)
2155 {
2156 	struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr;
2157 	int rc;
2158 
2159 	switch (ns->csi) {
2160 	case SPDK_NVME_CSI_ZNS:
2161 		break;
2162 	default:
2163 		/*
2164 		 * This switch must handle all cases for which
2165 		 * nvme_ns_has_supported_iocs_specific_data() returns true,
2166 		 * other cases should never happen.
2167 		 */
2168 		assert(0);
2169 	}
2170 
2171 	assert(!ns->nsdata_zns);
2172 	ns->nsdata_zns = spdk_zmalloc(sizeof(*ns->nsdata_zns), 64, NULL, SPDK_ENV_SOCKET_ID_ANY,
2173 				      SPDK_MALLOC_SHARE);
2174 	if (!ns->nsdata_zns) {
2175 		return -ENOMEM;
2176 	}
2177 
2178 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS_IOCS_SPECIFIC,
2179 			     ctrlr->opts.admin_timeout_ms);
2180 	rc = nvme_ctrlr_cmd_identify(ns->ctrlr, SPDK_NVME_IDENTIFY_NS_IOCS, 0, ns->id, ns->csi,
2181 				     ns->nsdata_zns, sizeof(*ns->nsdata_zns),
2182 				     nvme_ctrlr_identify_ns_zns_specific_async_done, ns);
2183 	if (rc) {
2184 		nvme_ns_free_zns_specific_data(ns);
2185 	}
2186 
2187 	return rc;
2188 }
2189 
2190 static int
2191 nvme_ctrlr_identify_namespaces_iocs_specific(struct spdk_nvme_ctrlr *ctrlr)
2192 {
2193 	if (!nvme_ctrlr_multi_iocs_enabled(ctrlr)) {
2194 		/* Multi IOCS not supported/enabled, move on to the next state */
2195 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CONFIGURE_AER,
2196 				     ctrlr->opts.admin_timeout_ms);
2197 		return 0;
2198 	}
2199 
2200 	return nvme_ctrlr_identify_namespaces_iocs_specific_next(ctrlr, 0);
2201 }
2202 
2203 static void
2204 nvme_ctrlr_identify_id_desc_async_done(void *arg, const struct spdk_nvme_cpl *cpl)
2205 {
2206 	struct spdk_nvme_ns *ns = (struct spdk_nvme_ns *)arg;
2207 	struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr;
2208 	uint32_t nsid;
2209 	int rc;
2210 
2211 	if (spdk_nvme_cpl_is_error(cpl)) {
2212 		/*
2213 		 * Many controllers claim to be compatible with NVMe 1.3, however,
2214 		 * they do not implement NS ID Desc List. Therefore, instead of setting
2215 		 * the state to NVME_CTRLR_STATE_ERROR, silently ignore the completion
2216 		 * error and move on to the next state.
2217 		 *
2218 		 * The proper way is to create a new quirk for controllers that violate
2219 		 * the NVMe 1.3 spec by not supporting NS ID Desc List.
2220 		 * (Re-using the NVME_QUIRK_IDENTIFY_CNS quirk is not possible, since
2221 		 * it is too generic and was added in order to handle controllers that
2222 		 * violate the NVMe 1.1 spec by not supporting ACTIVE LIST).
2223 		 */
2224 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC,
2225 				     ctrlr->opts.admin_timeout_ms);
2226 		return;
2227 	}
2228 
2229 	nvme_ns_set_id_desc_list_data(ns);
2230 
2231 	/* move on to the next active NS */
2232 	nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, ns->id);
2233 	ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
2234 	if (ns == NULL) {
2235 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC,
2236 				     ctrlr->opts.admin_timeout_ms);
2237 		return;
2238 	}
2239 
2240 	rc = nvme_ctrlr_identify_id_desc_async(ns);
2241 	if (rc) {
2242 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
2243 	}
2244 }
2245 
2246 static int
2247 nvme_ctrlr_identify_id_desc_async(struct spdk_nvme_ns *ns)
2248 {
2249 	struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr;
2250 
2251 	memset(ns->id_desc_list, 0, sizeof(ns->id_desc_list));
2252 
2253 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ID_DESCS,
2254 			     ctrlr->opts.admin_timeout_ms);
2255 	return nvme_ctrlr_cmd_identify(ns->ctrlr, SPDK_NVME_IDENTIFY_NS_ID_DESCRIPTOR_LIST,
2256 				       0, ns->id, 0, ns->id_desc_list, sizeof(ns->id_desc_list),
2257 				       nvme_ctrlr_identify_id_desc_async_done, ns);
2258 }
2259 
2260 static int
2261 nvme_ctrlr_identify_id_desc_namespaces(struct spdk_nvme_ctrlr *ctrlr)
2262 {
2263 	uint32_t nsid;
2264 	struct spdk_nvme_ns *ns;
2265 	int rc;
2266 
2267 	if ((ctrlr->vs.raw < SPDK_NVME_VERSION(1, 3, 0) &&
2268 	     !(ctrlr->cap.bits.css & SPDK_NVME_CAP_CSS_IOCS)) ||
2269 	    (ctrlr->quirks & NVME_QUIRK_IDENTIFY_CNS)) {
2270 		NVME_CTRLR_DEBUGLOG(ctrlr, "Version < 1.3; not attempting to retrieve NS ID Descriptor List\n");
2271 		/* NS ID Desc List not supported, move on to the next state */
2272 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC,
2273 				     ctrlr->opts.admin_timeout_ms);
2274 		return 0;
2275 	}
2276 
2277 	nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr);
2278 	ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
2279 	if (ns == NULL) {
2280 		/* No active NS, move on to the next state */
2281 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC,
2282 				     ctrlr->opts.admin_timeout_ms);
2283 		return 0;
2284 	}
2285 
2286 	rc = nvme_ctrlr_identify_id_desc_async(ns);
2287 	if (rc) {
2288 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
2289 	}
2290 
2291 	return rc;
2292 }
2293 
2294 static void
2295 nvme_ctrlr_update_nvmf_ioccsz(struct spdk_nvme_ctrlr *ctrlr)
2296 {
2297 	if (ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_RDMA ||
2298 	    ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_TCP ||
2299 	    ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_FC) {
2300 		if (ctrlr->cdata.nvmf_specific.ioccsz < 4) {
2301 			NVME_CTRLR_ERRLOG(ctrlr, "Incorrect IOCCSZ %u, the minimum value should be 4\n",
2302 					  ctrlr->cdata.nvmf_specific.ioccsz);
2303 			ctrlr->cdata.nvmf_specific.ioccsz = 4;
2304 			assert(0);
2305 		}
2306 		ctrlr->ioccsz_bytes = ctrlr->cdata.nvmf_specific.ioccsz * 16 - sizeof(struct spdk_nvme_cmd);
2307 		ctrlr->icdoff = ctrlr->cdata.nvmf_specific.icdoff;
2308 	}
2309 }
2310 
2311 static void
2312 nvme_ctrlr_set_num_queues_done(void *arg, const struct spdk_nvme_cpl *cpl)
2313 {
2314 	uint32_t cq_allocated, sq_allocated, min_allocated, i;
2315 	struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg;
2316 
2317 	if (spdk_nvme_cpl_is_error(cpl)) {
2318 		NVME_CTRLR_ERRLOG(ctrlr, "Set Features - Number of Queues failed!\n");
2319 		ctrlr->opts.num_io_queues = 0;
2320 	} else {
2321 		/*
2322 		 * Data in cdw0 is 0-based.
2323 		 * Lower 16-bits indicate number of submission queues allocated.
2324 		 * Upper 16-bits indicate number of completion queues allocated.
2325 		 */
2326 		sq_allocated = (cpl->cdw0 & 0xFFFF) + 1;
2327 		cq_allocated = (cpl->cdw0 >> 16) + 1;
2328 
2329 		/*
2330 		 * For 1:1 queue mapping, set number of allocated queues to be minimum of
2331 		 * submission and completion queues.
2332 		 */
2333 		min_allocated = spdk_min(sq_allocated, cq_allocated);
2334 
2335 		/* Set number of queues to be minimum of requested and actually allocated. */
2336 		ctrlr->opts.num_io_queues = spdk_min(min_allocated, ctrlr->opts.num_io_queues);
2337 	}
2338 
2339 	ctrlr->free_io_qids = spdk_bit_array_create(ctrlr->opts.num_io_queues + 1);
2340 	if (ctrlr->free_io_qids == NULL) {
2341 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
2342 		return;
2343 	}
2344 
2345 	/* Initialize list of free I/O queue IDs. QID 0 is the admin queue (implicitly allocated). */
2346 	for (i = 1; i <= ctrlr->opts.num_io_queues; i++) {
2347 		spdk_nvme_ctrlr_free_qid(ctrlr, i);
2348 	}
2349 
2350 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_ACTIVE_NS,
2351 			     ctrlr->opts.admin_timeout_ms);
2352 }
2353 
2354 static int
2355 nvme_ctrlr_set_num_queues(struct spdk_nvme_ctrlr *ctrlr)
2356 {
2357 	int rc;
2358 
2359 	if (ctrlr->opts.num_io_queues > SPDK_NVME_MAX_IO_QUEUES) {
2360 		NVME_CTRLR_NOTICELOG(ctrlr, "Limiting requested num_io_queues %u to max %d\n",
2361 				     ctrlr->opts.num_io_queues, SPDK_NVME_MAX_IO_QUEUES);
2362 		ctrlr->opts.num_io_queues = SPDK_NVME_MAX_IO_QUEUES;
2363 	} else if (ctrlr->opts.num_io_queues < 1) {
2364 		NVME_CTRLR_NOTICELOG(ctrlr, "Requested num_io_queues 0, increasing to 1\n");
2365 		ctrlr->opts.num_io_queues = 1;
2366 	}
2367 
2368 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_SET_NUM_QUEUES,
2369 			     ctrlr->opts.admin_timeout_ms);
2370 
2371 	rc = nvme_ctrlr_cmd_set_num_queues(ctrlr, ctrlr->opts.num_io_queues,
2372 					   nvme_ctrlr_set_num_queues_done, ctrlr);
2373 	if (rc != 0) {
2374 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
2375 		return rc;
2376 	}
2377 
2378 	return 0;
2379 }
2380 
2381 static void
2382 nvme_ctrlr_set_keep_alive_timeout_done(void *arg, const struct spdk_nvme_cpl *cpl)
2383 {
2384 	uint32_t keep_alive_interval_us;
2385 	struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg;
2386 
2387 	if (spdk_nvme_cpl_is_error(cpl)) {
2388 		if ((cpl->status.sct == SPDK_NVME_SCT_GENERIC) &&
2389 		    (cpl->status.sc == SPDK_NVME_SC_INVALID_FIELD)) {
2390 			NVME_CTRLR_DEBUGLOG(ctrlr, "Keep alive timeout Get Feature is not supported\n");
2391 		} else {
2392 			NVME_CTRLR_ERRLOG(ctrlr, "Keep alive timeout Get Feature failed: SC %x SCT %x\n",
2393 					  cpl->status.sc, cpl->status.sct);
2394 			ctrlr->opts.keep_alive_timeout_ms = 0;
2395 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
2396 			return;
2397 		}
2398 	} else {
2399 		if (ctrlr->opts.keep_alive_timeout_ms != cpl->cdw0) {
2400 			NVME_CTRLR_DEBUGLOG(ctrlr, "Controller adjusted keep alive timeout to %u ms\n",
2401 					    cpl->cdw0);
2402 		}
2403 
2404 		ctrlr->opts.keep_alive_timeout_ms = cpl->cdw0;
2405 	}
2406 
2407 	if (ctrlr->opts.keep_alive_timeout_ms == 0) {
2408 		ctrlr->keep_alive_interval_ticks = 0;
2409 	} else {
2410 		keep_alive_interval_us = ctrlr->opts.keep_alive_timeout_ms * 1000 / 2;
2411 
2412 		NVME_CTRLR_DEBUGLOG(ctrlr, "Sending keep alive every %u us\n", keep_alive_interval_us);
2413 
2414 		ctrlr->keep_alive_interval_ticks = (keep_alive_interval_us * spdk_get_ticks_hz()) /
2415 						   UINT64_C(1000000);
2416 
2417 		/* Schedule the first Keep Alive to be sent as soon as possible. */
2418 		ctrlr->next_keep_alive_tick = spdk_get_ticks();
2419 	}
2420 
2421 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_HOST_ID,
2422 			     ctrlr->opts.admin_timeout_ms);
2423 }
2424 
2425 static int
2426 nvme_ctrlr_set_keep_alive_timeout(struct spdk_nvme_ctrlr *ctrlr)
2427 {
2428 	int rc;
2429 
2430 	if (ctrlr->opts.keep_alive_timeout_ms == 0) {
2431 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_HOST_ID,
2432 				     ctrlr->opts.admin_timeout_ms);
2433 		return 0;
2434 	}
2435 
2436 	if (ctrlr->cdata.kas == 0) {
2437 		NVME_CTRLR_DEBUGLOG(ctrlr, "Controller KAS is 0 - not enabling Keep Alive\n");
2438 		ctrlr->opts.keep_alive_timeout_ms = 0;
2439 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_HOST_ID,
2440 				     ctrlr->opts.admin_timeout_ms);
2441 		return 0;
2442 	}
2443 
2444 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_KEEP_ALIVE_TIMEOUT,
2445 			     ctrlr->opts.admin_timeout_ms);
2446 
2447 	/* Retrieve actual keep alive timeout, since the controller may have adjusted it. */
2448 	rc = spdk_nvme_ctrlr_cmd_get_feature(ctrlr, SPDK_NVME_FEAT_KEEP_ALIVE_TIMER, 0, NULL, 0,
2449 					     nvme_ctrlr_set_keep_alive_timeout_done, ctrlr);
2450 	if (rc != 0) {
2451 		NVME_CTRLR_ERRLOG(ctrlr, "Keep alive timeout Get Feature failed: %d\n", rc);
2452 		ctrlr->opts.keep_alive_timeout_ms = 0;
2453 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
2454 		return rc;
2455 	}
2456 
2457 	return 0;
2458 }
2459 
2460 static void
2461 nvme_ctrlr_set_host_id_done(void *arg, const struct spdk_nvme_cpl *cpl)
2462 {
2463 	struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg;
2464 
2465 	if (spdk_nvme_cpl_is_error(cpl)) {
2466 		/*
2467 		 * Treat Set Features - Host ID failure as non-fatal, since the Host ID feature
2468 		 * is optional.
2469 		 */
2470 		NVME_CTRLR_WARNLOG(ctrlr, "Set Features - Host ID failed: SC 0x%x SCT 0x%x\n",
2471 				   cpl->status.sc, cpl->status.sct);
2472 	} else {
2473 		NVME_CTRLR_DEBUGLOG(ctrlr, "Set Features - Host ID was successful\n");
2474 	}
2475 
2476 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE);
2477 }
2478 
2479 static int
2480 nvme_ctrlr_set_host_id(struct spdk_nvme_ctrlr *ctrlr)
2481 {
2482 	uint8_t *host_id;
2483 	uint32_t host_id_size;
2484 	int rc;
2485 
2486 	if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) {
2487 		/*
2488 		 * NVMe-oF sends the host ID during Connect and doesn't allow
2489 		 * Set Features - Host Identifier after Connect, so we don't need to do anything here.
2490 		 */
2491 		NVME_CTRLR_DEBUGLOG(ctrlr, "NVMe-oF transport - not sending Set Features - Host ID\n");
2492 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE);
2493 		return 0;
2494 	}
2495 
2496 	if (ctrlr->cdata.ctratt.host_id_exhid_supported) {
2497 		NVME_CTRLR_DEBUGLOG(ctrlr, "Using 128-bit extended host identifier\n");
2498 		host_id = ctrlr->opts.extended_host_id;
2499 		host_id_size = sizeof(ctrlr->opts.extended_host_id);
2500 	} else {
2501 		NVME_CTRLR_DEBUGLOG(ctrlr, "Using 64-bit host identifier\n");
2502 		host_id = ctrlr->opts.host_id;
2503 		host_id_size = sizeof(ctrlr->opts.host_id);
2504 	}
2505 
2506 	/* If the user specified an all-zeroes host identifier, don't send the command. */
2507 	if (spdk_mem_all_zero(host_id, host_id_size)) {
2508 		NVME_CTRLR_DEBUGLOG(ctrlr, "User did not specify host ID - not sending Set Features - Host ID\n");
2509 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE);
2510 		return 0;
2511 	}
2512 
2513 	SPDK_LOGDUMP(nvme, "host_id", host_id, host_id_size);
2514 
2515 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_HOST_ID,
2516 			     ctrlr->opts.admin_timeout_ms);
2517 
2518 	rc = nvme_ctrlr_cmd_set_host_id(ctrlr, host_id, host_id_size, nvme_ctrlr_set_host_id_done, ctrlr);
2519 	if (rc != 0) {
2520 		NVME_CTRLR_ERRLOG(ctrlr, "Set Features - Host ID failed: %d\n", rc);
2521 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
2522 		return rc;
2523 	}
2524 
2525 	return 0;
2526 }
2527 
2528 static void
2529 nvme_ctrlr_destruct_namespaces(struct spdk_nvme_ctrlr *ctrlr)
2530 {
2531 	if (ctrlr->ns) {
2532 		uint32_t i, num_ns = ctrlr->num_ns;
2533 
2534 		for (i = 0; i < num_ns; i++) {
2535 			nvme_ns_destruct(&ctrlr->ns[i]);
2536 		}
2537 
2538 		spdk_free(ctrlr->ns);
2539 		ctrlr->ns = NULL;
2540 		ctrlr->num_ns = 0;
2541 	}
2542 }
2543 
2544 void
2545 nvme_ctrlr_update_namespaces(struct spdk_nvme_ctrlr *ctrlr)
2546 {
2547 	uint32_t i, nn = ctrlr->cdata.nn;
2548 	struct spdk_nvme_ns_data *nsdata;
2549 	bool ns_is_active;
2550 
2551 	for (i = 0; i < nn; i++) {
2552 		uint32_t		nsid = i + 1;
2553 		struct spdk_nvme_ns	*ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
2554 
2555 		assert(ns != NULL);
2556 		nsdata = &ns->nsdata;
2557 		ns_is_active = spdk_nvme_ctrlr_is_active_ns(ctrlr, nsid);
2558 
2559 		if (nsdata->ncap && ns_is_active) {
2560 			NVME_CTRLR_DEBUGLOG(ctrlr, "Namespace %u was updated\n", nsid);
2561 			if (nvme_ns_update(ns) != 0) {
2562 				NVME_CTRLR_ERRLOG(ctrlr, "Failed to update active NS %u\n", nsid);
2563 				continue;
2564 			}
2565 		}
2566 
2567 		if ((nsdata->ncap == 0) && ns_is_active) {
2568 			NVME_CTRLR_DEBUGLOG(ctrlr, "Namespace %u was added\n", nsid);
2569 			if (nvme_ns_construct(ns, nsid, ctrlr) != 0) {
2570 				continue;
2571 			}
2572 		}
2573 
2574 		if (nsdata->ncap && !ns_is_active) {
2575 			NVME_CTRLR_DEBUGLOG(ctrlr, "Namespace %u was removed\n", nsid);
2576 			nvme_ns_destruct(ns);
2577 		}
2578 	}
2579 }
2580 
2581 static int
2582 nvme_ctrlr_construct_namespaces(struct spdk_nvme_ctrlr *ctrlr)
2583 {
2584 	int rc = 0;
2585 	uint32_t i, nn = ctrlr->cdata.nn;
2586 
2587 	/* ctrlr->num_ns may be 0 (startup) or a different number of namespaces (reset),
2588 	 * so check if we need to reallocate.
2589 	 */
2590 	if (nn != ctrlr->num_ns) {
2591 		nvme_ctrlr_destruct_namespaces(ctrlr);
2592 
2593 		if (nn == 0) {
2594 			NVME_CTRLR_WARNLOG(ctrlr, "controller has 0 namespaces\n");
2595 			return 0;
2596 		}
2597 
2598 		ctrlr->ns = spdk_zmalloc(nn * sizeof(struct spdk_nvme_ns), 64, NULL,
2599 					 SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE);
2600 		if (ctrlr->ns == NULL) {
2601 			rc = -ENOMEM;
2602 			goto fail;
2603 		}
2604 
2605 		ctrlr->num_ns = nn;
2606 	} else {
2607 		/*
2608 		 * The controller could have been reset with the same number of namespaces.
2609 		 * If so, we still need to free the iocs specific data, to get a clean slate.
2610 		 */
2611 		for (i = 0; i < ctrlr->num_ns; i++) {
2612 			nvme_ns_free_iocs_specific_data(&ctrlr->ns[i]);
2613 		}
2614 	}
2615 
2616 	return 0;
2617 
2618 fail:
2619 	nvme_ctrlr_destruct_namespaces(ctrlr);
2620 	NVME_CTRLR_ERRLOG(ctrlr, "Failed to construct namespaces, err %d\n", rc);
2621 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
2622 	return rc;
2623 }
2624 
2625 static int
2626 nvme_ctrlr_clear_changed_ns_log(struct spdk_nvme_ctrlr *ctrlr)
2627 {
2628 	struct nvme_completion_poll_status	*status;
2629 	int		rc = -ENOMEM;
2630 	char		*buffer = NULL;
2631 	uint32_t	nsid;
2632 	size_t		buf_size = (SPDK_NVME_MAX_CHANGED_NAMESPACES * sizeof(uint32_t));
2633 
2634 	buffer = spdk_dma_zmalloc(buf_size, 4096, NULL);
2635 	if (!buffer) {
2636 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate buffer for getting "
2637 				  "changed ns log.\n");
2638 		return rc;
2639 	}
2640 
2641 	status = calloc(1, sizeof(*status));
2642 	if (!status) {
2643 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n");
2644 		goto free_buffer;
2645 	}
2646 
2647 	rc = spdk_nvme_ctrlr_cmd_get_log_page(ctrlr,
2648 					      SPDK_NVME_LOG_CHANGED_NS_LIST,
2649 					      SPDK_NVME_GLOBAL_NS_TAG,
2650 					      buffer, buf_size, 0,
2651 					      nvme_completion_poll_cb, status);
2652 
2653 	if (rc) {
2654 		NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_cmd_get_log_page() failed: rc=%d\n", rc);
2655 		free(status);
2656 		goto free_buffer;
2657 	}
2658 
2659 	rc = nvme_wait_for_completion_timeout(ctrlr->adminq, status,
2660 					      ctrlr->opts.admin_timeout_ms * 1000);
2661 	if (!status->timed_out) {
2662 		free(status);
2663 	}
2664 
2665 	if (rc) {
2666 		NVME_CTRLR_ERRLOG(ctrlr, "wait for spdk_nvme_ctrlr_cmd_get_log_page failed: rc=%d\n", rc);
2667 		goto free_buffer;
2668 	}
2669 
2670 	/* only check the case of overflow. */
2671 	nsid = from_le32(buffer);
2672 	if (nsid == 0xffffffffu) {
2673 		NVME_CTRLR_WARNLOG(ctrlr, "changed ns log overflowed.\n");
2674 	}
2675 
2676 free_buffer:
2677 	spdk_dma_free(buffer);
2678 	return rc;
2679 }
2680 
2681 void
2682 nvme_ctrlr_process_async_event(struct spdk_nvme_ctrlr *ctrlr,
2683 			       const struct spdk_nvme_cpl *cpl)
2684 {
2685 	union spdk_nvme_async_event_completion event;
2686 	struct spdk_nvme_ctrlr_process *active_proc;
2687 	int rc;
2688 
2689 	event.raw = cpl->cdw0;
2690 
2691 	if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) &&
2692 	    (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGED)) {
2693 		nvme_ctrlr_clear_changed_ns_log(ctrlr);
2694 
2695 		rc = nvme_ctrlr_identify_active_ns(ctrlr);
2696 		if (rc) {
2697 			return;
2698 		}
2699 		nvme_ctrlr_update_namespaces(ctrlr);
2700 		nvme_io_msg_ctrlr_update(ctrlr);
2701 	}
2702 
2703 	if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) &&
2704 	    (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_ANA_CHANGE)) {
2705 		if (!ctrlr->opts.disable_read_ana_log_page) {
2706 			rc = nvme_ctrlr_update_ana_log_page(ctrlr);
2707 			if (rc) {
2708 				return;
2709 			}
2710 			nvme_ctrlr_parse_ana_log_page(ctrlr, nvme_ctrlr_update_ns_ana_states,
2711 						      ctrlr);
2712 		}
2713 	}
2714 
2715 	active_proc = nvme_ctrlr_get_current_process(ctrlr);
2716 	if (active_proc && active_proc->aer_cb_fn) {
2717 		active_proc->aer_cb_fn(active_proc->aer_cb_arg, cpl);
2718 	}
2719 }
2720 
2721 static void
2722 nvme_ctrlr_queue_async_event(struct spdk_nvme_ctrlr *ctrlr,
2723 			     const struct spdk_nvme_cpl *cpl)
2724 {
2725 	struct  spdk_nvme_ctrlr_aer_completion_list *nvme_event;
2726 
2727 	nvme_event = calloc(1, sizeof(*nvme_event));
2728 	if (!nvme_event) {
2729 		NVME_CTRLR_ERRLOG(ctrlr, "Alloc nvme event failed, ignore the event\n");
2730 		return;
2731 	}
2732 
2733 	nvme_event->cpl = *cpl;
2734 	STAILQ_INSERT_TAIL(&ctrlr->async_events, nvme_event, link);
2735 }
2736 
2737 void
2738 nvme_ctrlr_complete_queued_async_events(struct spdk_nvme_ctrlr *ctrlr)
2739 {
2740 	struct  spdk_nvme_ctrlr_aer_completion_list  *nvme_event, *nvme_event_tmp;
2741 
2742 	STAILQ_FOREACH_SAFE(nvme_event, &ctrlr->async_events, link, nvme_event_tmp) {
2743 		STAILQ_REMOVE(&ctrlr->async_events, nvme_event,
2744 			      spdk_nvme_ctrlr_aer_completion_list, link);
2745 		nvme_ctrlr_process_async_event(ctrlr, &nvme_event->cpl);
2746 		free(nvme_event);
2747 	}
2748 }
2749 
2750 static void
2751 nvme_ctrlr_async_event_cb(void *arg, const struct spdk_nvme_cpl *cpl)
2752 {
2753 	struct nvme_async_event_request	*aer = arg;
2754 	struct spdk_nvme_ctrlr		*ctrlr = aer->ctrlr;
2755 
2756 	if (cpl->status.sct == SPDK_NVME_SCT_GENERIC &&
2757 	    cpl->status.sc == SPDK_NVME_SC_ABORTED_SQ_DELETION) {
2758 		/*
2759 		 *  This is simulated when controller is being shut down, to
2760 		 *  effectively abort outstanding asynchronous event requests
2761 		 *  and make sure all memory is freed.  Do not repost the
2762 		 *  request in this case.
2763 		 */
2764 		return;
2765 	}
2766 
2767 	if (cpl->status.sct == SPDK_NVME_SCT_COMMAND_SPECIFIC &&
2768 	    cpl->status.sc == SPDK_NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED) {
2769 		/*
2770 		 *  SPDK will only send as many AERs as the device says it supports,
2771 		 *  so this status code indicates an out-of-spec device.  Do not repost
2772 		 *  the request in this case.
2773 		 */
2774 		NVME_CTRLR_ERRLOG(ctrlr, "Controller appears out-of-spec for asynchronous event request\n"
2775 				  "handling.  Do not repost this AER.\n");
2776 		return;
2777 	}
2778 
2779 	/* Add the events to the list */
2780 	nvme_ctrlr_queue_async_event(ctrlr, cpl);
2781 
2782 	/* If the ctrlr was removed or in the destruct state, we should not send aer again */
2783 	if (ctrlr->is_removed || ctrlr->is_destructed) {
2784 		return;
2785 	}
2786 
2787 	/*
2788 	 * Repost another asynchronous event request to replace the one
2789 	 *  that just completed.
2790 	 */
2791 	if (nvme_ctrlr_construct_and_submit_aer(ctrlr, aer)) {
2792 		/*
2793 		 * We can't do anything to recover from a failure here,
2794 		 * so just print a warning message and leave the AER unsubmitted.
2795 		 */
2796 		NVME_CTRLR_ERRLOG(ctrlr, "resubmitting AER failed!\n");
2797 	}
2798 }
2799 
2800 static int
2801 nvme_ctrlr_construct_and_submit_aer(struct spdk_nvme_ctrlr *ctrlr,
2802 				    struct nvme_async_event_request *aer)
2803 {
2804 	struct nvme_request *req;
2805 
2806 	aer->ctrlr = ctrlr;
2807 	req = nvme_allocate_request_null(ctrlr->adminq, nvme_ctrlr_async_event_cb, aer);
2808 	aer->req = req;
2809 	if (req == NULL) {
2810 		return -1;
2811 	}
2812 
2813 	req->cmd.opc = SPDK_NVME_OPC_ASYNC_EVENT_REQUEST;
2814 	return nvme_ctrlr_submit_admin_request(ctrlr, req);
2815 }
2816 
2817 static void
2818 nvme_ctrlr_configure_aer_done(void *arg, const struct spdk_nvme_cpl *cpl)
2819 {
2820 	struct nvme_async_event_request		*aer;
2821 	int					rc;
2822 	uint32_t				i;
2823 	struct spdk_nvme_ctrlr *ctrlr =	(struct spdk_nvme_ctrlr *)arg;
2824 
2825 	if (spdk_nvme_cpl_is_error(cpl)) {
2826 		NVME_CTRLR_NOTICELOG(ctrlr, "nvme_ctrlr_configure_aer failed!\n");
2827 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES,
2828 				     ctrlr->opts.admin_timeout_ms);
2829 		return;
2830 	}
2831 
2832 	/* aerl is a zero-based value, so we need to add 1 here. */
2833 	ctrlr->num_aers = spdk_min(NVME_MAX_ASYNC_EVENTS, (ctrlr->cdata.aerl + 1));
2834 
2835 	for (i = 0; i < ctrlr->num_aers; i++) {
2836 		aer = &ctrlr->aer[i];
2837 		rc = nvme_ctrlr_construct_and_submit_aer(ctrlr, aer);
2838 		if (rc) {
2839 			NVME_CTRLR_ERRLOG(ctrlr, "nvme_ctrlr_construct_and_submit_aer failed!\n");
2840 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
2841 			return;
2842 		}
2843 	}
2844 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES,
2845 			     ctrlr->opts.admin_timeout_ms);
2846 }
2847 
2848 static int
2849 nvme_ctrlr_configure_aer(struct spdk_nvme_ctrlr *ctrlr)
2850 {
2851 	union spdk_nvme_feat_async_event_configuration	config;
2852 	int						rc;
2853 
2854 	config.raw = 0;
2855 	config.bits.crit_warn.bits.available_spare = 1;
2856 	config.bits.crit_warn.bits.temperature = 1;
2857 	config.bits.crit_warn.bits.device_reliability = 1;
2858 	config.bits.crit_warn.bits.read_only = 1;
2859 	config.bits.crit_warn.bits.volatile_memory_backup = 1;
2860 
2861 	if (ctrlr->vs.raw >= SPDK_NVME_VERSION(1, 2, 0)) {
2862 		if (ctrlr->cdata.oaes.ns_attribute_notices) {
2863 			config.bits.ns_attr_notice = 1;
2864 		}
2865 		if (ctrlr->cdata.oaes.fw_activation_notices) {
2866 			config.bits.fw_activation_notice = 1;
2867 		}
2868 		if (ctrlr->cdata.oaes.ana_change_notices) {
2869 			config.bits.ana_change_notice = 1;
2870 		}
2871 	}
2872 	if (ctrlr->vs.raw >= SPDK_NVME_VERSION(1, 3, 0) && ctrlr->cdata.lpa.telemetry) {
2873 		config.bits.telemetry_log_notice = 1;
2874 	}
2875 
2876 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_CONFIGURE_AER,
2877 			     ctrlr->opts.admin_timeout_ms);
2878 
2879 	rc = nvme_ctrlr_cmd_set_async_event_config(ctrlr, config,
2880 			nvme_ctrlr_configure_aer_done,
2881 			ctrlr);
2882 	if (rc != 0) {
2883 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
2884 		return rc;
2885 	}
2886 
2887 	return 0;
2888 }
2889 
2890 struct spdk_nvme_ctrlr_process *
2891 nvme_ctrlr_get_process(struct spdk_nvme_ctrlr *ctrlr, pid_t pid)
2892 {
2893 	struct spdk_nvme_ctrlr_process	*active_proc;
2894 
2895 	TAILQ_FOREACH(active_proc, &ctrlr->active_procs, tailq) {
2896 		if (active_proc->pid == pid) {
2897 			return active_proc;
2898 		}
2899 	}
2900 
2901 	return NULL;
2902 }
2903 
2904 struct spdk_nvme_ctrlr_process *
2905 nvme_ctrlr_get_current_process(struct spdk_nvme_ctrlr *ctrlr)
2906 {
2907 	return nvme_ctrlr_get_process(ctrlr, getpid());
2908 }
2909 
2910 /**
2911  * This function will be called when a process is using the controller.
2912  *  1. For the primary process, it is called when constructing the controller.
2913  *  2. For the secondary process, it is called at probing the controller.
2914  * Note: will check whether the process is already added for the same process.
2915  */
2916 int
2917 nvme_ctrlr_add_process(struct spdk_nvme_ctrlr *ctrlr, void *devhandle)
2918 {
2919 	struct spdk_nvme_ctrlr_process	*ctrlr_proc;
2920 	pid_t				pid = getpid();
2921 
2922 	/* Check whether the process is already added or not */
2923 	if (nvme_ctrlr_get_process(ctrlr, pid)) {
2924 		return 0;
2925 	}
2926 
2927 	/* Initialize the per process properties for this ctrlr */
2928 	ctrlr_proc = spdk_zmalloc(sizeof(struct spdk_nvme_ctrlr_process),
2929 				  64, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE);
2930 	if (ctrlr_proc == NULL) {
2931 		NVME_CTRLR_ERRLOG(ctrlr, "failed to allocate memory to track the process props\n");
2932 
2933 		return -1;
2934 	}
2935 
2936 	ctrlr_proc->is_primary = spdk_process_is_primary();
2937 	ctrlr_proc->pid = pid;
2938 	STAILQ_INIT(&ctrlr_proc->active_reqs);
2939 	ctrlr_proc->devhandle = devhandle;
2940 	ctrlr_proc->ref = 0;
2941 	TAILQ_INIT(&ctrlr_proc->allocated_io_qpairs);
2942 
2943 	TAILQ_INSERT_TAIL(&ctrlr->active_procs, ctrlr_proc, tailq);
2944 
2945 	return 0;
2946 }
2947 
2948 /**
2949  * This function will be called when the process detaches the controller.
2950  * Note: the ctrlr_lock must be held when calling this function.
2951  */
2952 static void
2953 nvme_ctrlr_remove_process(struct spdk_nvme_ctrlr *ctrlr,
2954 			  struct spdk_nvme_ctrlr_process *proc)
2955 {
2956 	struct spdk_nvme_qpair	*qpair, *tmp_qpair;
2957 
2958 	assert(STAILQ_EMPTY(&proc->active_reqs));
2959 
2960 	TAILQ_FOREACH_SAFE(qpair, &proc->allocated_io_qpairs, per_process_tailq, tmp_qpair) {
2961 		spdk_nvme_ctrlr_free_io_qpair(qpair);
2962 	}
2963 
2964 	TAILQ_REMOVE(&ctrlr->active_procs, proc, tailq);
2965 
2966 	if (ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
2967 		spdk_pci_device_detach(proc->devhandle);
2968 	}
2969 
2970 	spdk_free(proc);
2971 }
2972 
2973 /**
2974  * This function will be called when the process exited unexpectedly
2975  *  in order to free any incomplete nvme request, allocated IO qpairs
2976  *  and allocated memory.
2977  * Note: the ctrlr_lock must be held when calling this function.
2978  */
2979 static void
2980 nvme_ctrlr_cleanup_process(struct spdk_nvme_ctrlr_process *proc)
2981 {
2982 	struct nvme_request	*req, *tmp_req;
2983 	struct spdk_nvme_qpair	*qpair, *tmp_qpair;
2984 
2985 	STAILQ_FOREACH_SAFE(req, &proc->active_reqs, stailq, tmp_req) {
2986 		STAILQ_REMOVE(&proc->active_reqs, req, nvme_request, stailq);
2987 
2988 		assert(req->pid == proc->pid);
2989 
2990 		nvme_free_request(req);
2991 	}
2992 
2993 	TAILQ_FOREACH_SAFE(qpair, &proc->allocated_io_qpairs, per_process_tailq, tmp_qpair) {
2994 		TAILQ_REMOVE(&proc->allocated_io_qpairs, qpair, per_process_tailq);
2995 
2996 		/*
2997 		 * The process may have been killed while some qpairs were in their
2998 		 *  completion context.  Clear that flag here to allow these IO
2999 		 *  qpairs to be deleted.
3000 		 */
3001 		qpair->in_completion_context = 0;
3002 
3003 		qpair->no_deletion_notification_needed = 1;
3004 
3005 		spdk_nvme_ctrlr_free_io_qpair(qpair);
3006 	}
3007 
3008 	spdk_free(proc);
3009 }
3010 
3011 /**
3012  * This function will be called when destructing the controller.
3013  *  1. There is no more admin request on this controller.
3014  *  2. Clean up any left resource allocation when its associated process is gone.
3015  */
3016 void
3017 nvme_ctrlr_free_processes(struct spdk_nvme_ctrlr *ctrlr)
3018 {
3019 	struct spdk_nvme_ctrlr_process	*active_proc, *tmp;
3020 
3021 	/* Free all the processes' properties and make sure no pending admin IOs */
3022 	TAILQ_FOREACH_SAFE(active_proc, &ctrlr->active_procs, tailq, tmp) {
3023 		TAILQ_REMOVE(&ctrlr->active_procs, active_proc, tailq);
3024 
3025 		assert(STAILQ_EMPTY(&active_proc->active_reqs));
3026 
3027 		spdk_free(active_proc);
3028 	}
3029 }
3030 
3031 /**
3032  * This function will be called when any other process attaches or
3033  *  detaches the controller in order to cleanup those unexpectedly
3034  *  terminated processes.
3035  * Note: the ctrlr_lock must be held when calling this function.
3036  */
3037 static int
3038 nvme_ctrlr_remove_inactive_proc(struct spdk_nvme_ctrlr *ctrlr)
3039 {
3040 	struct spdk_nvme_ctrlr_process	*active_proc, *tmp;
3041 	int				active_proc_count = 0;
3042 
3043 	TAILQ_FOREACH_SAFE(active_proc, &ctrlr->active_procs, tailq, tmp) {
3044 		if ((kill(active_proc->pid, 0) == -1) && (errno == ESRCH)) {
3045 			NVME_CTRLR_ERRLOG(ctrlr, "process %d terminated unexpected\n", active_proc->pid);
3046 
3047 			TAILQ_REMOVE(&ctrlr->active_procs, active_proc, tailq);
3048 
3049 			nvme_ctrlr_cleanup_process(active_proc);
3050 		} else {
3051 			active_proc_count++;
3052 		}
3053 	}
3054 
3055 	return active_proc_count;
3056 }
3057 
3058 void
3059 nvme_ctrlr_proc_get_ref(struct spdk_nvme_ctrlr *ctrlr)
3060 {
3061 	struct spdk_nvme_ctrlr_process	*active_proc;
3062 
3063 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
3064 
3065 	nvme_ctrlr_remove_inactive_proc(ctrlr);
3066 
3067 	active_proc = nvme_ctrlr_get_current_process(ctrlr);
3068 	if (active_proc) {
3069 		active_proc->ref++;
3070 	}
3071 
3072 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
3073 }
3074 
3075 void
3076 nvme_ctrlr_proc_put_ref(struct spdk_nvme_ctrlr *ctrlr)
3077 {
3078 	struct spdk_nvme_ctrlr_process	*active_proc;
3079 	int				proc_count;
3080 
3081 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
3082 
3083 	proc_count = nvme_ctrlr_remove_inactive_proc(ctrlr);
3084 
3085 	active_proc = nvme_ctrlr_get_current_process(ctrlr);
3086 	if (active_proc) {
3087 		active_proc->ref--;
3088 		assert(active_proc->ref >= 0);
3089 
3090 		/*
3091 		 * The last active process will be removed at the end of
3092 		 * the destruction of the controller.
3093 		 */
3094 		if (active_proc->ref == 0 && proc_count != 1) {
3095 			nvme_ctrlr_remove_process(ctrlr, active_proc);
3096 		}
3097 	}
3098 
3099 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
3100 }
3101 
3102 int
3103 nvme_ctrlr_get_ref_count(struct spdk_nvme_ctrlr *ctrlr)
3104 {
3105 	struct spdk_nvme_ctrlr_process	*active_proc;
3106 	int				ref = 0;
3107 
3108 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
3109 
3110 	nvme_ctrlr_remove_inactive_proc(ctrlr);
3111 
3112 	TAILQ_FOREACH(active_proc, &ctrlr->active_procs, tailq) {
3113 		ref += active_proc->ref;
3114 	}
3115 
3116 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
3117 
3118 	return ref;
3119 }
3120 
3121 /**
3122  *  Get the PCI device handle which is only visible to its associated process.
3123  */
3124 struct spdk_pci_device *
3125 nvme_ctrlr_proc_get_devhandle(struct spdk_nvme_ctrlr *ctrlr)
3126 {
3127 	struct spdk_nvme_ctrlr_process	*active_proc;
3128 	struct spdk_pci_device		*devhandle = NULL;
3129 
3130 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
3131 
3132 	active_proc = nvme_ctrlr_get_current_process(ctrlr);
3133 	if (active_proc) {
3134 		devhandle = active_proc->devhandle;
3135 	}
3136 
3137 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
3138 
3139 	return devhandle;
3140 }
3141 
3142 /**
3143  * This function will be called repeatedly during initialization until the controller is ready.
3144  */
3145 int
3146 nvme_ctrlr_process_init(struct spdk_nvme_ctrlr *ctrlr)
3147 {
3148 	union spdk_nvme_cc_register cc;
3149 	union spdk_nvme_csts_register csts;
3150 	uint32_t ready_timeout_in_ms;
3151 	uint64_t ticks;
3152 	int rc = 0;
3153 
3154 	ticks = spdk_get_ticks();
3155 
3156 	/*
3157 	 * May need to avoid accessing any register on the target controller
3158 	 * for a while. Return early without touching the FSM.
3159 	 * Check sleep_timeout_tsc > 0 for unit test.
3160 	 */
3161 	if ((ctrlr->sleep_timeout_tsc > 0) &&
3162 	    (ticks <= ctrlr->sleep_timeout_tsc)) {
3163 		return 0;
3164 	}
3165 	ctrlr->sleep_timeout_tsc = 0;
3166 
3167 	if (ctrlr->state > NVME_CTRLR_STATE_CONNECT_ADMINQ &&
3168 	    (nvme_ctrlr_get_cc(ctrlr, &cc) || nvme_ctrlr_get_csts(ctrlr, &csts))) {
3169 		if (!ctrlr->is_failed && ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE) {
3170 			/* While a device is resetting, it may be unable to service MMIO reads
3171 			 * temporarily. Allow for this case.
3172 			 */
3173 			NVME_CTRLR_DEBUGLOG(ctrlr, "Get registers failed while waiting for CSTS.RDY == 0\n");
3174 			goto init_timeout;
3175 		}
3176 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to read CC and CSTS in state %d\n", ctrlr->state);
3177 		return -EIO;
3178 	}
3179 
3180 	ready_timeout_in_ms = 500 * ctrlr->cap.bits.to;
3181 
3182 	/*
3183 	 * Check if the current initialization step is done or has timed out.
3184 	 */
3185 	switch (ctrlr->state) {
3186 	case NVME_CTRLR_STATE_INIT_DELAY:
3187 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, ready_timeout_in_ms);
3188 		if (ctrlr->quirks & NVME_QUIRK_DELAY_BEFORE_INIT) {
3189 			/*
3190 			 * Controller may need some delay before it's enabled.
3191 			 *
3192 			 * This is a workaround for an issue where the PCIe-attached NVMe controller
3193 			 * is not ready after VFIO reset. We delay the initialization rather than the
3194 			 * enabling itself, because this is required only for the very first enabling
3195 			 * - directly after a VFIO reset.
3196 			 */
3197 			NVME_CTRLR_DEBUGLOG(ctrlr, "Adding 2 second delay before initializing the controller\n");
3198 			ctrlr->sleep_timeout_tsc = ticks + (2000 * spdk_get_ticks_hz() / 1000);
3199 		}
3200 		break;
3201 
3202 	case NVME_CTRLR_STATE_CONNECT_ADMINQ: /* synonymous with NVME_CTRLR_STATE_INIT */
3203 		rc = nvme_transport_ctrlr_connect_qpair(ctrlr, ctrlr->adminq);
3204 		if (rc == 0) {
3205 			nvme_qpair_set_state(ctrlr->adminq, NVME_QPAIR_ENABLED);
3206 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READ_VS, NVME_TIMEOUT_INFINITE);
3207 		} else {
3208 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
3209 		}
3210 		break;
3211 
3212 	case NVME_CTRLR_STATE_READ_VS:
3213 		nvme_ctrlr_get_vs(ctrlr, &ctrlr->vs);
3214 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READ_CAP, NVME_TIMEOUT_INFINITE);
3215 		break;
3216 
3217 	case NVME_CTRLR_STATE_READ_CAP:
3218 		nvme_ctrlr_init_cap(ctrlr);
3219 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CHECK_EN, NVME_TIMEOUT_INFINITE);
3220 		break;
3221 
3222 	case NVME_CTRLR_STATE_CHECK_EN:
3223 		/* Begin the hardware initialization by making sure the controller is disabled. */
3224 		if (cc.bits.en) {
3225 			NVME_CTRLR_DEBUGLOG(ctrlr, "CC.EN = 1\n");
3226 			/*
3227 			 * Controller is currently enabled. We need to disable it to cause a reset.
3228 			 *
3229 			 * If CC.EN = 1 && CSTS.RDY = 0, the controller is in the process of becoming ready.
3230 			 *  Wait for the ready bit to be 1 before disabling the controller.
3231 			 */
3232 			if (csts.bits.rdy == 0) {
3233 				NVME_CTRLR_DEBUGLOG(ctrlr, "CC.EN = 1 && CSTS.RDY = 0 - waiting for reset to complete\n");
3234 				nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1, ready_timeout_in_ms);
3235 				return 0;
3236 			}
3237 
3238 			/* CC.EN = 1 && CSTS.RDY == 1, so we can immediately disable the controller. */
3239 			NVME_CTRLR_DEBUGLOG(ctrlr, "Setting CC.EN = 0\n");
3240 			cc.bits.en = 0;
3241 			if (nvme_ctrlr_set_cc(ctrlr, &cc)) {
3242 				NVME_CTRLR_ERRLOG(ctrlr, "set_cc() failed\n");
3243 				return -EIO;
3244 			}
3245 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, ready_timeout_in_ms);
3246 
3247 			/*
3248 			 * Wait 2.5 seconds before accessing PCI registers.
3249 			 * Not using sleep() to avoid blocking other controller's initialization.
3250 			 */
3251 			if (ctrlr->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY) {
3252 				NVME_CTRLR_DEBUGLOG(ctrlr, "Applying quirk: delay 2.5 seconds before reading registers\n");
3253 				ctrlr->sleep_timeout_tsc = ticks + (2500 * spdk_get_ticks_hz() / 1000);
3254 			}
3255 			return 0;
3256 		} else {
3257 			if (csts.bits.rdy == 1) {
3258 				NVME_CTRLR_DEBUGLOG(ctrlr, "CC.EN = 0 && CSTS.RDY = 1 - waiting for shutdown to complete\n");
3259 			}
3260 
3261 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, ready_timeout_in_ms);
3262 			return 0;
3263 		}
3264 		break;
3265 
3266 	case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1:
3267 		if (csts.bits.rdy == 1) {
3268 			NVME_CTRLR_DEBUGLOG(ctrlr, "CC.EN = 1 && CSTS.RDY = 1 - disabling controller\n");
3269 			/* CC.EN = 1 && CSTS.RDY = 1, so we can set CC.EN = 0 now. */
3270 			NVME_CTRLR_DEBUGLOG(ctrlr, "Setting CC.EN = 0\n");
3271 			cc.bits.en = 0;
3272 			if (nvme_ctrlr_set_cc(ctrlr, &cc)) {
3273 				NVME_CTRLR_ERRLOG(ctrlr, "set_cc() failed\n");
3274 				return -EIO;
3275 			}
3276 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, ready_timeout_in_ms);
3277 			return 0;
3278 		}
3279 		break;
3280 
3281 	case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0:
3282 		if (csts.bits.rdy == 0) {
3283 			NVME_CTRLR_DEBUGLOG(ctrlr, "CC.EN = 0 && CSTS.RDY = 0\n");
3284 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE, ready_timeout_in_ms);
3285 			/*
3286 			 * Delay 100us before setting CC.EN = 1.  Some NVMe SSDs miss CC.EN getting
3287 			 *  set to 1 if it is too soon after CSTS.RDY is reported as 0.
3288 			 */
3289 			spdk_delay_us(100);
3290 			return 0;
3291 		}
3292 		break;
3293 
3294 	case NVME_CTRLR_STATE_ENABLE:
3295 		NVME_CTRLR_DEBUGLOG(ctrlr, "Setting CC.EN = 1\n");
3296 		rc = nvme_ctrlr_enable(ctrlr);
3297 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1, ready_timeout_in_ms);
3298 		return rc;
3299 
3300 	case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1:
3301 		if (csts.bits.rdy == 1) {
3302 			NVME_CTRLR_DEBUGLOG(ctrlr, "CC.EN = 1 && CSTS.RDY = 1 - controller is ready\n");
3303 			/*
3304 			 * The controller has been enabled.
3305 			 *  Perform the rest of initialization serially.
3306 			 */
3307 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_RESET_ADMIN_QUEUE,
3308 					     ctrlr->opts.admin_timeout_ms);
3309 			return 0;
3310 		}
3311 		break;
3312 
3313 	case NVME_CTRLR_STATE_RESET_ADMIN_QUEUE:
3314 		nvme_transport_qpair_reset(ctrlr->adminq);
3315 		if (spdk_nvme_ctrlr_is_discovery(ctrlr)) {
3316 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE);
3317 		} else {
3318 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY, ctrlr->opts.admin_timeout_ms);
3319 		}
3320 		break;
3321 
3322 	case NVME_CTRLR_STATE_IDENTIFY:
3323 		rc = nvme_ctrlr_identify(ctrlr);
3324 		break;
3325 
3326 	case NVME_CTRLR_STATE_IDENTIFY_IOCS_SPECIFIC:
3327 		rc = nvme_ctrlr_identify_iocs_specific(ctrlr);
3328 		break;
3329 
3330 	case NVME_CTRLR_STATE_GET_ZNS_CMD_EFFECTS_LOG:
3331 		rc = nvme_ctrlr_get_zns_cmd_and_effects_log(ctrlr);
3332 		break;
3333 
3334 	case NVME_CTRLR_STATE_SET_NUM_QUEUES:
3335 		nvme_ctrlr_update_nvmf_ioccsz(ctrlr);
3336 		rc = nvme_ctrlr_set_num_queues(ctrlr);
3337 		break;
3338 
3339 	case NVME_CTRLR_STATE_IDENTIFY_ACTIVE_NS:
3340 		_nvme_ctrlr_identify_active_ns(ctrlr);
3341 		break;
3342 
3343 	case NVME_CTRLR_STATE_CONSTRUCT_NS:
3344 		rc = nvme_ctrlr_construct_namespaces(ctrlr);
3345 		if (!rc) {
3346 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_NS,
3347 					     ctrlr->opts.admin_timeout_ms);
3348 		}
3349 		break;
3350 
3351 	case NVME_CTRLR_STATE_IDENTIFY_NS:
3352 		rc = nvme_ctrlr_identify_namespaces(ctrlr);
3353 		break;
3354 
3355 	case NVME_CTRLR_STATE_IDENTIFY_ID_DESCS:
3356 		rc = nvme_ctrlr_identify_id_desc_namespaces(ctrlr);
3357 		break;
3358 
3359 	case NVME_CTRLR_STATE_IDENTIFY_NS_IOCS_SPECIFIC:
3360 		rc = nvme_ctrlr_identify_namespaces_iocs_specific(ctrlr);
3361 		break;
3362 
3363 	case NVME_CTRLR_STATE_CONFIGURE_AER:
3364 		rc = nvme_ctrlr_configure_aer(ctrlr);
3365 		break;
3366 
3367 	case NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES:
3368 		rc = nvme_ctrlr_set_supported_log_pages(ctrlr);
3369 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES,
3370 				     ctrlr->opts.admin_timeout_ms);
3371 		break;
3372 
3373 	case NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES:
3374 		nvme_ctrlr_set_supported_features(ctrlr);
3375 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_DB_BUF_CFG,
3376 				     ctrlr->opts.admin_timeout_ms);
3377 		break;
3378 
3379 	case NVME_CTRLR_STATE_SET_DB_BUF_CFG:
3380 		rc = nvme_ctrlr_set_doorbell_buffer_config(ctrlr);
3381 		break;
3382 
3383 	case NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT:
3384 		rc = nvme_ctrlr_set_keep_alive_timeout(ctrlr);
3385 		break;
3386 
3387 	case NVME_CTRLR_STATE_SET_HOST_ID:
3388 		rc = nvme_ctrlr_set_host_id(ctrlr);
3389 		break;
3390 
3391 	case NVME_CTRLR_STATE_READY:
3392 		NVME_CTRLR_DEBUGLOG(ctrlr, "Ctrlr already in ready state\n");
3393 		return 0;
3394 
3395 	case NVME_CTRLR_STATE_ERROR:
3396 		NVME_CTRLR_ERRLOG(ctrlr, "Ctrlr is in error state\n");
3397 		return -1;
3398 
3399 	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY:
3400 	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_IOCS_SPECIFIC:
3401 	case NVME_CTRLR_STATE_WAIT_FOR_GET_ZNS_CMD_EFFECTS_LOG:
3402 	case NVME_CTRLR_STATE_WAIT_FOR_SET_NUM_QUEUES:
3403 	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ACTIVE_NS:
3404 	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS:
3405 	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ID_DESCS:
3406 	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS_IOCS_SPECIFIC:
3407 	case NVME_CTRLR_STATE_WAIT_FOR_CONFIGURE_AER:
3408 	case NVME_CTRLR_STATE_WAIT_FOR_DB_BUF_CFG:
3409 	case NVME_CTRLR_STATE_WAIT_FOR_KEEP_ALIVE_TIMEOUT:
3410 	case NVME_CTRLR_STATE_WAIT_FOR_HOST_ID:
3411 		spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
3412 		break;
3413 
3414 	default:
3415 		assert(0);
3416 		return -1;
3417 	}
3418 
3419 init_timeout:
3420 	/* Note: we use the ticks captured when we entered this function.
3421 	 * This covers environments where the SPDK process gets swapped out after
3422 	 * we tried to advance the state but before we check the timeout here.
3423 	 * It is not normal for this to happen, but harmless to handle it in this
3424 	 * way.
3425 	 */
3426 	if (ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE &&
3427 	    ticks > ctrlr->state_timeout_tsc) {
3428 		NVME_CTRLR_ERRLOG(ctrlr, "Initialization timed out in state %d\n", ctrlr->state);
3429 		return -1;
3430 	}
3431 
3432 	return rc;
3433 }
3434 
3435 int
3436 nvme_robust_mutex_init_recursive_shared(pthread_mutex_t *mtx)
3437 {
3438 	pthread_mutexattr_t attr;
3439 	int rc = 0;
3440 
3441 	if (pthread_mutexattr_init(&attr)) {
3442 		return -1;
3443 	}
3444 	if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE) ||
3445 #ifndef __FreeBSD__
3446 	    pthread_mutexattr_setrobust(&attr, PTHREAD_MUTEX_ROBUST) ||
3447 	    pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED) ||
3448 #endif
3449 	    pthread_mutex_init(mtx, &attr)) {
3450 		rc = -1;
3451 	}
3452 	pthread_mutexattr_destroy(&attr);
3453 	return rc;
3454 }
3455 
3456 int
3457 nvme_ctrlr_construct(struct spdk_nvme_ctrlr *ctrlr)
3458 {
3459 	int rc;
3460 
3461 	if (ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
3462 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT_DELAY, NVME_TIMEOUT_INFINITE);
3463 	} else {
3464 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, NVME_TIMEOUT_INFINITE);
3465 	}
3466 
3467 	if (ctrlr->opts.admin_queue_size > SPDK_NVME_ADMIN_QUEUE_MAX_ENTRIES) {
3468 		NVME_CTRLR_ERRLOG(ctrlr, "admin_queue_size %u exceeds max defined by NVMe spec, use max value\n",
3469 				  ctrlr->opts.admin_queue_size);
3470 		ctrlr->opts.admin_queue_size = SPDK_NVME_ADMIN_QUEUE_MAX_ENTRIES;
3471 	}
3472 
3473 	if (ctrlr->opts.admin_queue_size < SPDK_NVME_ADMIN_QUEUE_MIN_ENTRIES) {
3474 		NVME_CTRLR_ERRLOG(ctrlr,
3475 				  "admin_queue_size %u is less than minimum defined by NVMe spec, use min value\n",
3476 				  ctrlr->opts.admin_queue_size);
3477 		ctrlr->opts.admin_queue_size = SPDK_NVME_ADMIN_QUEUE_MIN_ENTRIES;
3478 	}
3479 
3480 	ctrlr->flags = 0;
3481 	ctrlr->free_io_qids = NULL;
3482 	ctrlr->is_resetting = false;
3483 	ctrlr->is_failed = false;
3484 	ctrlr->is_destructed = false;
3485 
3486 	TAILQ_INIT(&ctrlr->active_io_qpairs);
3487 	STAILQ_INIT(&ctrlr->queued_aborts);
3488 	STAILQ_INIT(&ctrlr->async_events);
3489 	ctrlr->outstanding_aborts = 0;
3490 
3491 	ctrlr->ana_log_page = NULL;
3492 	ctrlr->ana_log_page_size = 0;
3493 
3494 	rc = nvme_robust_mutex_init_recursive_shared(&ctrlr->ctrlr_lock);
3495 	if (rc != 0) {
3496 		return rc;
3497 	}
3498 
3499 	TAILQ_INIT(&ctrlr->active_procs);
3500 
3501 	return rc;
3502 }
3503 
3504 static void
3505 nvme_ctrlr_init_cap(struct spdk_nvme_ctrlr *ctrlr)
3506 {
3507 	nvme_ctrlr_get_cap(ctrlr, &ctrlr->cap);
3508 
3509 	if (ctrlr->cap.bits.ams & SPDK_NVME_CAP_AMS_WRR) {
3510 		ctrlr->flags |= SPDK_NVME_CTRLR_WRR_SUPPORTED;
3511 	}
3512 
3513 	ctrlr->min_page_size = 1u << (12 + ctrlr->cap.bits.mpsmin);
3514 
3515 	/* For now, always select page_size == min_page_size. */
3516 	ctrlr->page_size = ctrlr->min_page_size;
3517 
3518 	ctrlr->opts.io_queue_size = spdk_max(ctrlr->opts.io_queue_size, SPDK_NVME_IO_QUEUE_MIN_ENTRIES);
3519 	ctrlr->opts.io_queue_size = spdk_min(ctrlr->opts.io_queue_size, MAX_IO_QUEUE_ENTRIES);
3520 	if (ctrlr->quirks & NVME_QUIRK_MINIMUM_IO_QUEUE_SIZE &&
3521 	    ctrlr->opts.io_queue_size == DEFAULT_IO_QUEUE_SIZE) {
3522 		/* If the user specifically set an IO queue size different than the
3523 		 * default, use that value.  Otherwise overwrite with the quirked value.
3524 		 * This allows this quirk to be overridden when necessary.
3525 		 * However, cap.mqes still needs to be respected.
3526 		 */
3527 		ctrlr->opts.io_queue_size = DEFAULT_IO_QUEUE_SIZE_FOR_QUIRK;
3528 	}
3529 	ctrlr->opts.io_queue_size = spdk_min(ctrlr->opts.io_queue_size, ctrlr->cap.bits.mqes + 1u);
3530 
3531 	ctrlr->opts.io_queue_requests = spdk_max(ctrlr->opts.io_queue_requests, ctrlr->opts.io_queue_size);
3532 }
3533 
3534 void
3535 nvme_ctrlr_destruct_finish(struct spdk_nvme_ctrlr *ctrlr)
3536 {
3537 	pthread_mutex_destroy(&ctrlr->ctrlr_lock);
3538 }
3539 
3540 void
3541 nvme_ctrlr_destruct_async(struct spdk_nvme_ctrlr *ctrlr,
3542 			  struct nvme_ctrlr_detach_ctx *ctx)
3543 {
3544 	struct spdk_nvme_qpair *qpair, *tmp;
3545 	struct  spdk_nvme_ctrlr_aer_completion_list *event;
3546 
3547 	NVME_CTRLR_DEBUGLOG(ctrlr, "Prepare to destruct SSD\n");
3548 
3549 	ctrlr->is_destructed = true;
3550 
3551 	spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
3552 
3553 	nvme_ctrlr_abort_queued_aborts(ctrlr);
3554 	nvme_transport_admin_qpair_abort_aers(ctrlr->adminq);
3555 
3556 	while (!STAILQ_EMPTY(&ctrlr->async_events)) {
3557 		event = STAILQ_FIRST(&ctrlr->async_events);
3558 		STAILQ_REMOVE_HEAD(&ctrlr->async_events, link);
3559 		free(event);
3560 	}
3561 
3562 	TAILQ_FOREACH_SAFE(qpair, &ctrlr->active_io_qpairs, tailq, tmp) {
3563 		spdk_nvme_ctrlr_free_io_qpair(qpair);
3564 	}
3565 
3566 	nvme_ctrlr_free_doorbell_buffer(ctrlr);
3567 	nvme_ctrlr_free_iocs_specific_data(ctrlr);
3568 
3569 	if (ctrlr->opts.no_shn_notification) {
3570 		NVME_CTRLR_INFOLOG(ctrlr, "Disable SSD without shutdown notification\n");
3571 		nvme_ctrlr_disable(ctrlr);
3572 		ctx->shutdown_complete = true;
3573 	} else {
3574 		nvme_ctrlr_shutdown_async(ctrlr, ctx);
3575 	}
3576 }
3577 
3578 int
3579 nvme_ctrlr_destruct_poll_async(struct spdk_nvme_ctrlr *ctrlr,
3580 			       struct nvme_ctrlr_detach_ctx *ctx)
3581 {
3582 	int rc = 0;
3583 
3584 	if (!ctx->shutdown_complete) {
3585 		rc = nvme_ctrlr_shutdown_poll_async(ctrlr, ctx);
3586 		if (rc == -EAGAIN) {
3587 			return -EAGAIN;
3588 		}
3589 		/* Destruct ctrlr forcefully for any other error. */
3590 	}
3591 
3592 	if (ctx->cb_fn) {
3593 		ctx->cb_fn(ctrlr);
3594 	}
3595 
3596 	nvme_ctrlr_destruct_namespaces(ctrlr);
3597 	spdk_free(ctrlr->active_ns_list);
3598 	ctrlr->active_ns_list = NULL;
3599 	ctrlr->max_active_ns_idx = 0;
3600 
3601 	spdk_bit_array_free(&ctrlr->free_io_qids);
3602 
3603 	spdk_free(ctrlr->ana_log_page);
3604 	free(ctrlr->copied_ana_desc);
3605 	ctrlr->ana_log_page = NULL;
3606 	ctrlr->copied_ana_desc = NULL;
3607 	ctrlr->ana_log_page_size = 0;
3608 
3609 	nvme_transport_ctrlr_destruct(ctrlr);
3610 
3611 	return rc;
3612 }
3613 
3614 void
3615 nvme_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr)
3616 {
3617 	struct nvme_ctrlr_detach_ctx ctx = {};
3618 	int rc;
3619 
3620 	nvme_ctrlr_destruct_async(ctrlr, &ctx);
3621 
3622 	while (1) {
3623 		rc = nvme_ctrlr_destruct_poll_async(ctrlr, &ctx);
3624 		if (rc != -EAGAIN) {
3625 			break;
3626 		}
3627 		nvme_delay(1000);
3628 	}
3629 }
3630 
3631 int
3632 nvme_ctrlr_submit_admin_request(struct spdk_nvme_ctrlr *ctrlr,
3633 				struct nvme_request *req)
3634 {
3635 	return nvme_qpair_submit_request(ctrlr->adminq, req);
3636 }
3637 
3638 static void
3639 nvme_keep_alive_completion(void *cb_ctx, const struct spdk_nvme_cpl *cpl)
3640 {
3641 	/* Do nothing */
3642 }
3643 
3644 /*
3645  * Check if we need to send a Keep Alive command.
3646  * Caller must hold ctrlr->ctrlr_lock.
3647  */
3648 static int
3649 nvme_ctrlr_keep_alive(struct spdk_nvme_ctrlr *ctrlr)
3650 {
3651 	uint64_t now;
3652 	struct nvme_request *req;
3653 	struct spdk_nvme_cmd *cmd;
3654 	int rc = 0;
3655 
3656 	now = spdk_get_ticks();
3657 	if (now < ctrlr->next_keep_alive_tick) {
3658 		return rc;
3659 	}
3660 
3661 	req = nvme_allocate_request_null(ctrlr->adminq, nvme_keep_alive_completion, NULL);
3662 	if (req == NULL) {
3663 		return rc;
3664 	}
3665 
3666 	cmd = &req->cmd;
3667 	cmd->opc = SPDK_NVME_OPC_KEEP_ALIVE;
3668 
3669 	rc = nvme_ctrlr_submit_admin_request(ctrlr, req);
3670 	if (rc != 0) {
3671 		NVME_CTRLR_ERRLOG(ctrlr, "Submitting Keep Alive failed\n");
3672 		rc = -ENXIO;
3673 	}
3674 
3675 	ctrlr->next_keep_alive_tick = now + ctrlr->keep_alive_interval_ticks;
3676 	return rc;
3677 }
3678 
3679 int32_t
3680 spdk_nvme_ctrlr_process_admin_completions(struct spdk_nvme_ctrlr *ctrlr)
3681 {
3682 	int32_t num_completions;
3683 	int32_t rc;
3684 
3685 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
3686 
3687 	if (ctrlr->keep_alive_interval_ticks) {
3688 		rc = nvme_ctrlr_keep_alive(ctrlr);
3689 		if (rc) {
3690 			nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
3691 			return rc;
3692 		}
3693 	}
3694 
3695 	rc = nvme_io_msg_process(ctrlr);
3696 	if (rc < 0) {
3697 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
3698 		return rc;
3699 	}
3700 	num_completions = rc;
3701 
3702 	rc = spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
3703 
3704 	nvme_ctrlr_complete_queued_async_events(ctrlr);
3705 
3706 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
3707 
3708 	if (rc < 0) {
3709 		num_completions = rc;
3710 	} else {
3711 		num_completions += rc;
3712 	}
3713 
3714 	return num_completions;
3715 }
3716 
3717 const struct spdk_nvme_ctrlr_data *
3718 spdk_nvme_ctrlr_get_data(struct spdk_nvme_ctrlr *ctrlr)
3719 {
3720 	return &ctrlr->cdata;
3721 }
3722 
3723 union spdk_nvme_csts_register spdk_nvme_ctrlr_get_regs_csts(struct spdk_nvme_ctrlr *ctrlr)
3724 {
3725 	union spdk_nvme_csts_register csts;
3726 
3727 	if (nvme_ctrlr_get_csts(ctrlr, &csts)) {
3728 		csts.raw = SPDK_NVME_INVALID_REGISTER_VALUE;
3729 	}
3730 	return csts;
3731 }
3732 
3733 union spdk_nvme_cap_register spdk_nvme_ctrlr_get_regs_cap(struct spdk_nvme_ctrlr *ctrlr)
3734 {
3735 	return ctrlr->cap;
3736 }
3737 
3738 union spdk_nvme_vs_register spdk_nvme_ctrlr_get_regs_vs(struct spdk_nvme_ctrlr *ctrlr)
3739 {
3740 	return ctrlr->vs;
3741 }
3742 
3743 union spdk_nvme_cmbsz_register spdk_nvme_ctrlr_get_regs_cmbsz(struct spdk_nvme_ctrlr *ctrlr)
3744 {
3745 	union spdk_nvme_cmbsz_register cmbsz;
3746 
3747 	if (nvme_ctrlr_get_cmbsz(ctrlr, &cmbsz)) {
3748 		cmbsz.raw = 0;
3749 	}
3750 
3751 	return cmbsz;
3752 }
3753 
3754 union spdk_nvme_pmrcap_register spdk_nvme_ctrlr_get_regs_pmrcap(struct spdk_nvme_ctrlr *ctrlr)
3755 {
3756 	union spdk_nvme_pmrcap_register pmrcap;
3757 
3758 	if (nvme_ctrlr_get_pmrcap(ctrlr, &pmrcap)) {
3759 		pmrcap.raw = 0;
3760 	}
3761 
3762 	return pmrcap;
3763 }
3764 
3765 uint64_t
3766 spdk_nvme_ctrlr_get_pmrsz(struct spdk_nvme_ctrlr *ctrlr)
3767 {
3768 	return ctrlr->pmr_size;
3769 }
3770 
3771 uint32_t
3772 spdk_nvme_ctrlr_get_num_ns(struct spdk_nvme_ctrlr *ctrlr)
3773 {
3774 	return ctrlr->num_ns;
3775 }
3776 
3777 static int32_t
3778 nvme_ctrlr_active_ns_idx(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid)
3779 {
3780 	int32_t result = -1;
3781 
3782 	if (ctrlr->active_ns_list == NULL || nsid == 0 || nsid > ctrlr->cdata.nn) {
3783 		return result;
3784 	}
3785 
3786 	int32_t lower = 0;
3787 	int32_t upper = ctrlr->max_active_ns_idx;
3788 	int32_t mid;
3789 
3790 	while (lower <= upper) {
3791 		mid = lower + (upper - lower) / 2;
3792 		if (ctrlr->active_ns_list[mid] == nsid) {
3793 			result = mid;
3794 			break;
3795 		} else {
3796 			if (ctrlr->active_ns_list[mid] != 0 && ctrlr->active_ns_list[mid] < nsid) {
3797 				lower = mid + 1;
3798 			} else {
3799 				upper = mid - 1;
3800 			}
3801 
3802 		}
3803 	}
3804 
3805 	return result;
3806 }
3807 
3808 bool
3809 spdk_nvme_ctrlr_is_active_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid)
3810 {
3811 	return nvme_ctrlr_active_ns_idx(ctrlr, nsid) != -1;
3812 }
3813 
3814 uint32_t
3815 spdk_nvme_ctrlr_get_first_active_ns(struct spdk_nvme_ctrlr *ctrlr)
3816 {
3817 	return ctrlr->active_ns_list ? ctrlr->active_ns_list[0] : 0;
3818 }
3819 
3820 uint32_t
3821 spdk_nvme_ctrlr_get_next_active_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t prev_nsid)
3822 {
3823 	int32_t nsid_idx = nvme_ctrlr_active_ns_idx(ctrlr, prev_nsid);
3824 	if (nsid_idx >= 0 && (uint32_t)nsid_idx < ctrlr->max_active_ns_idx) {
3825 		return ctrlr->active_ns_list[nsid_idx + 1];
3826 	}
3827 	return 0;
3828 }
3829 
3830 struct spdk_nvme_ns *
3831 spdk_nvme_ctrlr_get_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid)
3832 {
3833 	if (nsid < 1 || nsid > ctrlr->num_ns) {
3834 		return NULL;
3835 	}
3836 
3837 	return &ctrlr->ns[nsid - 1];
3838 }
3839 
3840 struct spdk_pci_device *
3841 spdk_nvme_ctrlr_get_pci_device(struct spdk_nvme_ctrlr *ctrlr)
3842 {
3843 	if (ctrlr == NULL) {
3844 		return NULL;
3845 	}
3846 
3847 	if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) {
3848 		return NULL;
3849 	}
3850 
3851 	return nvme_ctrlr_proc_get_devhandle(ctrlr);
3852 }
3853 
3854 uint32_t
3855 spdk_nvme_ctrlr_get_max_xfer_size(const struct spdk_nvme_ctrlr *ctrlr)
3856 {
3857 	return ctrlr->max_xfer_size;
3858 }
3859 
3860 void
3861 spdk_nvme_ctrlr_register_aer_callback(struct spdk_nvme_ctrlr *ctrlr,
3862 				      spdk_nvme_aer_cb aer_cb_fn,
3863 				      void *aer_cb_arg)
3864 {
3865 	struct spdk_nvme_ctrlr_process *active_proc;
3866 
3867 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
3868 
3869 	active_proc = nvme_ctrlr_get_current_process(ctrlr);
3870 	if (active_proc) {
3871 		active_proc->aer_cb_fn = aer_cb_fn;
3872 		active_proc->aer_cb_arg = aer_cb_arg;
3873 	}
3874 
3875 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
3876 }
3877 
3878 void
3879 spdk_nvme_ctrlr_register_timeout_callback(struct spdk_nvme_ctrlr *ctrlr,
3880 		uint64_t timeout_io_us, uint64_t timeout_admin_us,
3881 		spdk_nvme_timeout_cb cb_fn, void *cb_arg)
3882 {
3883 	struct spdk_nvme_ctrlr_process	*active_proc;
3884 
3885 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
3886 
3887 	active_proc = nvme_ctrlr_get_current_process(ctrlr);
3888 	if (active_proc) {
3889 		active_proc->timeout_io_ticks = timeout_io_us * spdk_get_ticks_hz() / 1000000ULL;
3890 		active_proc->timeout_admin_ticks = timeout_admin_us * spdk_get_ticks_hz() / 1000000ULL;
3891 		active_proc->timeout_cb_fn = cb_fn;
3892 		active_proc->timeout_cb_arg = cb_arg;
3893 	}
3894 
3895 	ctrlr->timeout_enabled = true;
3896 
3897 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
3898 }
3899 
3900 bool
3901 spdk_nvme_ctrlr_is_log_page_supported(struct spdk_nvme_ctrlr *ctrlr, uint8_t log_page)
3902 {
3903 	/* No bounds check necessary, since log_page is uint8_t and log_page_supported has 256 entries */
3904 	SPDK_STATIC_ASSERT(sizeof(ctrlr->log_page_supported) == 256, "log_page_supported size mismatch");
3905 	return ctrlr->log_page_supported[log_page];
3906 }
3907 
3908 bool
3909 spdk_nvme_ctrlr_is_feature_supported(struct spdk_nvme_ctrlr *ctrlr, uint8_t feature_code)
3910 {
3911 	/* No bounds check necessary, since feature_code is uint8_t and feature_supported has 256 entries */
3912 	SPDK_STATIC_ASSERT(sizeof(ctrlr->feature_supported) == 256, "feature_supported size mismatch");
3913 	return ctrlr->feature_supported[feature_code];
3914 }
3915 
3916 int
3917 spdk_nvme_ctrlr_attach_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid,
3918 			  struct spdk_nvme_ctrlr_list *payload)
3919 {
3920 	struct nvme_completion_poll_status	*status;
3921 	int					res;
3922 	struct spdk_nvme_ns			*ns;
3923 
3924 	if (nsid == 0) {
3925 		return -EINVAL;
3926 	}
3927 
3928 	status = calloc(1, sizeof(*status));
3929 	if (!status) {
3930 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n");
3931 		return -ENOMEM;
3932 	}
3933 
3934 	res = nvme_ctrlr_cmd_attach_ns(ctrlr, nsid, payload,
3935 				       nvme_completion_poll_cb, status);
3936 	if (res) {
3937 		free(status);
3938 		return res;
3939 	}
3940 	if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) {
3941 		NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_attach_ns failed!\n");
3942 		if (!status->timed_out) {
3943 			free(status);
3944 		}
3945 		return -ENXIO;
3946 	}
3947 	free(status);
3948 
3949 	res = nvme_ctrlr_identify_active_ns(ctrlr);
3950 	if (res) {
3951 		return res;
3952 	}
3953 
3954 	ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
3955 	assert(ns != NULL);
3956 	return nvme_ns_construct(ns, nsid, ctrlr);
3957 }
3958 
3959 int
3960 spdk_nvme_ctrlr_detach_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid,
3961 			  struct spdk_nvme_ctrlr_list *payload)
3962 {
3963 	struct nvme_completion_poll_status	*status;
3964 	int					res;
3965 	struct spdk_nvme_ns			*ns;
3966 
3967 	if (nsid == 0) {
3968 		return -EINVAL;
3969 	}
3970 
3971 	status = calloc(1, sizeof(*status));
3972 	if (!status) {
3973 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n");
3974 		return -ENOMEM;
3975 	}
3976 
3977 	res = nvme_ctrlr_cmd_detach_ns(ctrlr, nsid, payload,
3978 				       nvme_completion_poll_cb, status);
3979 	if (res) {
3980 		free(status);
3981 		return res;
3982 	}
3983 	if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) {
3984 		NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_detach_ns failed!\n");
3985 		if (!status->timed_out) {
3986 			free(status);
3987 		}
3988 		return -ENXIO;
3989 	}
3990 	free(status);
3991 
3992 	res = nvme_ctrlr_identify_active_ns(ctrlr);
3993 	if (res) {
3994 		return res;
3995 	}
3996 
3997 	ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
3998 	assert(ns != NULL);
3999 	/* Inactive NS */
4000 	nvme_ns_destruct(ns);
4001 
4002 	return 0;
4003 }
4004 
4005 uint32_t
4006 spdk_nvme_ctrlr_create_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns_data *payload)
4007 {
4008 	struct nvme_completion_poll_status	*status;
4009 	int					res;
4010 	uint32_t				nsid;
4011 	struct spdk_nvme_ns			*ns;
4012 
4013 	status = calloc(1, sizeof(*status));
4014 	if (!status) {
4015 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n");
4016 		return 0;
4017 	}
4018 
4019 	res = nvme_ctrlr_cmd_create_ns(ctrlr, payload, nvme_completion_poll_cb, status);
4020 	if (res) {
4021 		free(status);
4022 		return 0;
4023 	}
4024 	if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) {
4025 		NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_create_ns failed!\n");
4026 		if (!status->timed_out) {
4027 			free(status);
4028 		}
4029 		return 0;
4030 	}
4031 
4032 	nsid = status->cpl.cdw0;
4033 	free(status);
4034 
4035 	assert(nsid > 0);
4036 
4037 	ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
4038 	assert(ns != NULL);
4039 	/* Inactive NS */
4040 	res = nvme_ns_construct(ns, nsid, ctrlr);
4041 	if (res) {
4042 		return 0;
4043 	}
4044 
4045 	/* Return the namespace ID that was created */
4046 	return nsid;
4047 }
4048 
4049 int
4050 spdk_nvme_ctrlr_delete_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid)
4051 {
4052 	struct nvme_completion_poll_status	*status;
4053 	int					res;
4054 	struct spdk_nvme_ns			*ns;
4055 
4056 	if (nsid == 0) {
4057 		return -EINVAL;
4058 	}
4059 
4060 	status = calloc(1, sizeof(*status));
4061 	if (!status) {
4062 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n");
4063 		return -ENOMEM;
4064 	}
4065 
4066 	res = nvme_ctrlr_cmd_delete_ns(ctrlr, nsid, nvme_completion_poll_cb, status);
4067 	if (res) {
4068 		free(status);
4069 		return res;
4070 	}
4071 	if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) {
4072 		NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_delete_ns failed!\n");
4073 		if (!status->timed_out) {
4074 			free(status);
4075 		}
4076 		return -ENXIO;
4077 	}
4078 	free(status);
4079 
4080 	res = nvme_ctrlr_identify_active_ns(ctrlr);
4081 	if (res) {
4082 		return res;
4083 	}
4084 
4085 	ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
4086 	assert(ns != NULL);
4087 	nvme_ns_destruct(ns);
4088 
4089 	return 0;
4090 }
4091 
4092 int
4093 spdk_nvme_ctrlr_format(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid,
4094 		       struct spdk_nvme_format *format)
4095 {
4096 	struct nvme_completion_poll_status	*status;
4097 	int					res;
4098 
4099 	status = calloc(1, sizeof(*status));
4100 	if (!status) {
4101 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n");
4102 		return -ENOMEM;
4103 	}
4104 
4105 	res = nvme_ctrlr_cmd_format(ctrlr, nsid, format, nvme_completion_poll_cb,
4106 				    status);
4107 	if (res) {
4108 		free(status);
4109 		return res;
4110 	}
4111 	if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) {
4112 		NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_format failed!\n");
4113 		if (!status->timed_out) {
4114 			free(status);
4115 		}
4116 		return -ENXIO;
4117 	}
4118 	free(status);
4119 
4120 	return spdk_nvme_ctrlr_reset(ctrlr);
4121 }
4122 
4123 int
4124 spdk_nvme_ctrlr_update_firmware(struct spdk_nvme_ctrlr *ctrlr, void *payload, uint32_t size,
4125 				int slot, enum spdk_nvme_fw_commit_action commit_action, struct spdk_nvme_status *completion_status)
4126 {
4127 	struct spdk_nvme_fw_commit		fw_commit;
4128 	struct nvme_completion_poll_status	*status;
4129 	int					res;
4130 	unsigned int				size_remaining;
4131 	unsigned int				offset;
4132 	unsigned int				transfer;
4133 	void					*p;
4134 
4135 	if (!completion_status) {
4136 		return -EINVAL;
4137 	}
4138 	memset(completion_status, 0, sizeof(struct spdk_nvme_status));
4139 	if (size % 4) {
4140 		NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_update_firmware invalid size!\n");
4141 		return -1;
4142 	}
4143 
4144 	/* Current support only for SPDK_NVME_FW_COMMIT_REPLACE_IMG
4145 	 * and SPDK_NVME_FW_COMMIT_REPLACE_AND_ENABLE_IMG
4146 	 */
4147 	if ((commit_action != SPDK_NVME_FW_COMMIT_REPLACE_IMG) &&
4148 	    (commit_action != SPDK_NVME_FW_COMMIT_REPLACE_AND_ENABLE_IMG)) {
4149 		NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_update_firmware invalid command!\n");
4150 		return -1;
4151 	}
4152 
4153 	status = calloc(1, sizeof(*status));
4154 	if (!status) {
4155 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n");
4156 		return -ENOMEM;
4157 	}
4158 
4159 	/* Firmware download */
4160 	size_remaining = size;
4161 	offset = 0;
4162 	p = payload;
4163 
4164 	while (size_remaining > 0) {
4165 		transfer = spdk_min(size_remaining, ctrlr->min_page_size);
4166 
4167 		memset(status, 0, sizeof(*status));
4168 		res = nvme_ctrlr_cmd_fw_image_download(ctrlr, transfer, offset, p,
4169 						       nvme_completion_poll_cb,
4170 						       status);
4171 		if (res) {
4172 			free(status);
4173 			return res;
4174 		}
4175 
4176 		if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) {
4177 			NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_fw_image_download failed!\n");
4178 			if (!status->timed_out) {
4179 				free(status);
4180 			}
4181 			return -ENXIO;
4182 		}
4183 		p += transfer;
4184 		offset += transfer;
4185 		size_remaining -= transfer;
4186 	}
4187 
4188 	/* Firmware commit */
4189 	memset(&fw_commit, 0, sizeof(struct spdk_nvme_fw_commit));
4190 	fw_commit.fs = slot;
4191 	fw_commit.ca = commit_action;
4192 
4193 	memset(status, 0, sizeof(*status));
4194 	res = nvme_ctrlr_cmd_fw_commit(ctrlr, &fw_commit, nvme_completion_poll_cb,
4195 				       status);
4196 	if (res) {
4197 		free(status);
4198 		return res;
4199 	}
4200 
4201 	res = nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock);
4202 
4203 	memcpy(completion_status, &status->cpl.status, sizeof(struct spdk_nvme_status));
4204 
4205 	if (!status->timed_out) {
4206 		free(status);
4207 	}
4208 
4209 	if (res) {
4210 		if (completion_status->sct != SPDK_NVME_SCT_COMMAND_SPECIFIC ||
4211 		    completion_status->sc != SPDK_NVME_SC_FIRMWARE_REQ_NVM_RESET) {
4212 			if (completion_status->sct == SPDK_NVME_SCT_COMMAND_SPECIFIC  &&
4213 			    completion_status->sc == SPDK_NVME_SC_FIRMWARE_REQ_CONVENTIONAL_RESET) {
4214 				NVME_CTRLR_NOTICELOG(ctrlr,
4215 						     "firmware activation requires conventional reset to be performed. !\n");
4216 			} else {
4217 				NVME_CTRLR_ERRLOG(ctrlr, "nvme_ctrlr_cmd_fw_commit failed!\n");
4218 			}
4219 			return -ENXIO;
4220 		}
4221 	}
4222 
4223 	return spdk_nvme_ctrlr_reset(ctrlr);
4224 }
4225 
4226 int
4227 spdk_nvme_ctrlr_reserve_cmb(struct spdk_nvme_ctrlr *ctrlr)
4228 {
4229 	int rc, size;
4230 	union spdk_nvme_cmbsz_register cmbsz;
4231 
4232 	cmbsz = spdk_nvme_ctrlr_get_regs_cmbsz(ctrlr);
4233 
4234 	if (cmbsz.bits.rds == 0 || cmbsz.bits.wds == 0) {
4235 		return -ENOTSUP;
4236 	}
4237 
4238 	size = cmbsz.bits.sz * (0x1000 << (cmbsz.bits.szu * 4));
4239 
4240 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
4241 	rc = nvme_transport_ctrlr_reserve_cmb(ctrlr);
4242 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
4243 
4244 	if (rc < 0) {
4245 		return rc;
4246 	}
4247 
4248 	return size;
4249 }
4250 
4251 void *
4252 spdk_nvme_ctrlr_map_cmb(struct spdk_nvme_ctrlr *ctrlr, size_t *size)
4253 {
4254 	void *buf;
4255 
4256 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
4257 	buf = nvme_transport_ctrlr_map_cmb(ctrlr, size);
4258 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
4259 
4260 	return buf;
4261 }
4262 
4263 void
4264 spdk_nvme_ctrlr_unmap_cmb(struct spdk_nvme_ctrlr *ctrlr)
4265 {
4266 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
4267 	nvme_transport_ctrlr_unmap_cmb(ctrlr);
4268 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
4269 }
4270 
4271 int
4272 spdk_nvme_ctrlr_enable_pmr(struct spdk_nvme_ctrlr *ctrlr)
4273 {
4274 	int rc;
4275 
4276 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
4277 	rc = nvme_transport_ctrlr_enable_pmr(ctrlr);
4278 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
4279 
4280 	return rc;
4281 }
4282 
4283 int
4284 spdk_nvme_ctrlr_disable_pmr(struct spdk_nvme_ctrlr *ctrlr)
4285 {
4286 	int rc;
4287 
4288 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
4289 	rc = nvme_transport_ctrlr_disable_pmr(ctrlr);
4290 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
4291 
4292 	return rc;
4293 }
4294 
4295 void *
4296 spdk_nvme_ctrlr_map_pmr(struct spdk_nvme_ctrlr *ctrlr, size_t *size)
4297 {
4298 	void *buf;
4299 
4300 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
4301 	buf = nvme_transport_ctrlr_map_pmr(ctrlr, size);
4302 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
4303 
4304 	return buf;
4305 }
4306 
4307 int
4308 spdk_nvme_ctrlr_unmap_pmr(struct spdk_nvme_ctrlr *ctrlr)
4309 {
4310 	int rc;
4311 
4312 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
4313 	rc = nvme_transport_ctrlr_unmap_pmr(ctrlr);
4314 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
4315 
4316 	return rc;
4317 }
4318 
4319 bool
4320 spdk_nvme_ctrlr_is_discovery(struct spdk_nvme_ctrlr *ctrlr)
4321 {
4322 	assert(ctrlr);
4323 
4324 	return !strncmp(ctrlr->trid.subnqn, SPDK_NVMF_DISCOVERY_NQN,
4325 			strlen(SPDK_NVMF_DISCOVERY_NQN));
4326 }
4327 
4328 bool
4329 spdk_nvme_ctrlr_is_fabrics(struct spdk_nvme_ctrlr *ctrlr)
4330 {
4331 	assert(ctrlr);
4332 
4333 	/* We always define non-fabrics trtypes outside of the 8-bit range
4334 	 * of NVMe-oF trtype.
4335 	 */
4336 	return ctrlr->trid.trtype < UINT8_MAX;
4337 }
4338 
4339 int
4340 spdk_nvme_ctrlr_security_receive(struct spdk_nvme_ctrlr *ctrlr, uint8_t secp,
4341 				 uint16_t spsp, uint8_t nssf, void *payload, size_t size)
4342 {
4343 	struct nvme_completion_poll_status	*status;
4344 	int					res;
4345 
4346 	status = calloc(1, sizeof(*status));
4347 	if (!status) {
4348 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n");
4349 		return -ENOMEM;
4350 	}
4351 
4352 	res = spdk_nvme_ctrlr_cmd_security_receive(ctrlr, secp, spsp, nssf, payload, size,
4353 			nvme_completion_poll_cb, status);
4354 	if (res) {
4355 		free(status);
4356 		return res;
4357 	}
4358 	if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) {
4359 		NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_cmd_security_receive failed!\n");
4360 		if (!status->timed_out) {
4361 			free(status);
4362 		}
4363 		return -ENXIO;
4364 	}
4365 	free(status);
4366 
4367 	return 0;
4368 }
4369 
4370 int
4371 spdk_nvme_ctrlr_security_send(struct spdk_nvme_ctrlr *ctrlr, uint8_t secp,
4372 			      uint16_t spsp, uint8_t nssf, void *payload, size_t size)
4373 {
4374 	struct nvme_completion_poll_status	*status;
4375 	int					res;
4376 
4377 	status = calloc(1, sizeof(*status));
4378 	if (!status) {
4379 		NVME_CTRLR_ERRLOG(ctrlr, "Failed to allocate status tracker\n");
4380 		return -ENOMEM;
4381 	}
4382 
4383 	res = spdk_nvme_ctrlr_cmd_security_send(ctrlr, secp, spsp, nssf, payload, size,
4384 						nvme_completion_poll_cb,
4385 						status);
4386 	if (res) {
4387 		free(status);
4388 		return res;
4389 	}
4390 	if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) {
4391 		NVME_CTRLR_ERRLOG(ctrlr, "spdk_nvme_ctrlr_cmd_security_send failed!\n");
4392 		if (!status->timed_out) {
4393 			free(status);
4394 		}
4395 		return -ENXIO;
4396 	}
4397 
4398 	free(status);
4399 
4400 	return 0;
4401 }
4402 
4403 uint64_t
4404 spdk_nvme_ctrlr_get_flags(struct spdk_nvme_ctrlr *ctrlr)
4405 {
4406 	return ctrlr->flags;
4407 }
4408 
4409 const struct spdk_nvme_transport_id *
4410 spdk_nvme_ctrlr_get_transport_id(struct spdk_nvme_ctrlr *ctrlr)
4411 {
4412 	return &ctrlr->trid;
4413 }
4414 
4415 int32_t
4416 spdk_nvme_ctrlr_alloc_qid(struct spdk_nvme_ctrlr *ctrlr)
4417 {
4418 	uint32_t qid;
4419 
4420 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
4421 	qid = spdk_bit_array_find_first_set(ctrlr->free_io_qids, 1);
4422 	if (qid > ctrlr->opts.num_io_queues) {
4423 		NVME_CTRLR_ERRLOG(ctrlr, "No free I/O queue IDs\n");
4424 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
4425 		return -1;
4426 	}
4427 
4428 	spdk_bit_array_clear(ctrlr->free_io_qids, qid);
4429 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
4430 	return qid;
4431 }
4432 
4433 void
4434 spdk_nvme_ctrlr_free_qid(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid)
4435 {
4436 	assert(qid <= ctrlr->opts.num_io_queues);
4437 
4438 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
4439 	spdk_bit_array_set(ctrlr->free_io_qids, qid);
4440 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
4441 }
4442