xref: /spdk/lib/nvme/nvme_ctrlr.c (revision 3aa204fb3138c43e63b868e488277f13b098cef1)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "spdk/stdinc.h"
35 
36 #include "nvme_internal.h"
37 
38 #include "spdk/env.h"
39 #include "spdk/string.h"
40 
41 static int nvme_ctrlr_construct_and_submit_aer(struct spdk_nvme_ctrlr *ctrlr,
42 		struct nvme_async_event_request *aer);
43 static int nvme_ctrlr_identify_ns_async(struct spdk_nvme_ns *ns);
44 static int nvme_ctrlr_identify_id_desc_async(struct spdk_nvme_ns *ns);
45 
46 static int
47 nvme_ctrlr_get_cc(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cc_register *cc)
48 {
49 	return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, cc.raw),
50 					      &cc->raw);
51 }
52 
53 static int
54 nvme_ctrlr_get_csts(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_csts_register *csts)
55 {
56 	return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, csts.raw),
57 					      &csts->raw);
58 }
59 
60 int
61 nvme_ctrlr_get_cap(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cap_register *cap)
62 {
63 	return nvme_transport_ctrlr_get_reg_8(ctrlr, offsetof(struct spdk_nvme_registers, cap.raw),
64 					      &cap->raw);
65 }
66 
67 int
68 nvme_ctrlr_get_vs(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_vs_register *vs)
69 {
70 	return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, vs.raw),
71 					      &vs->raw);
72 }
73 
74 static int
75 nvme_ctrlr_set_cc(struct spdk_nvme_ctrlr *ctrlr, const union spdk_nvme_cc_register *cc)
76 {
77 	return nvme_transport_ctrlr_set_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, cc.raw),
78 					      cc->raw);
79 }
80 
81 int
82 nvme_ctrlr_get_cmbsz(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cmbsz_register *cmbsz)
83 {
84 	return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, cmbsz.raw),
85 					      &cmbsz->raw);
86 }
87 
88 void
89 spdk_nvme_ctrlr_get_default_ctrlr_opts(struct spdk_nvme_ctrlr_opts *opts, size_t opts_size)
90 {
91 	char host_id_str[SPDK_UUID_STRING_LEN];
92 
93 	assert(opts);
94 
95 	memset(opts, 0, opts_size);
96 
97 #define FIELD_OK(field) \
98 	offsetof(struct spdk_nvme_ctrlr_opts, field) + sizeof(opts->field) <= opts_size
99 
100 	if (FIELD_OK(num_io_queues)) {
101 		opts->num_io_queues = DEFAULT_MAX_IO_QUEUES;
102 	}
103 
104 	if (FIELD_OK(use_cmb_sqs)) {
105 		opts->use_cmb_sqs = true;
106 	}
107 
108 	if (FIELD_OK(arb_mechanism)) {
109 		opts->arb_mechanism = SPDK_NVME_CC_AMS_RR;
110 	}
111 
112 	if (FIELD_OK(keep_alive_timeout_ms)) {
113 		opts->keep_alive_timeout_ms = MIN_KEEP_ALIVE_TIMEOUT_IN_MS;
114 	}
115 
116 	if (FIELD_OK(io_queue_size)) {
117 		opts->io_queue_size = DEFAULT_IO_QUEUE_SIZE;
118 	}
119 
120 	if (FIELD_OK(io_queue_requests)) {
121 		opts->io_queue_requests = DEFAULT_IO_QUEUE_REQUESTS;
122 	}
123 
124 	if (FIELD_OK(host_id)) {
125 		memset(opts->host_id, 0, sizeof(opts->host_id));
126 	}
127 
128 	if (nvme_driver_init() == 0) {
129 		if (FIELD_OK(extended_host_id)) {
130 			memcpy(opts->extended_host_id, &g_spdk_nvme_driver->default_extended_host_id,
131 			       sizeof(opts->extended_host_id));
132 		}
133 
134 		if (FIELD_OK(hostnqn)) {
135 			spdk_uuid_fmt_lower(host_id_str, sizeof(host_id_str),
136 					    &g_spdk_nvme_driver->default_extended_host_id);
137 			snprintf(opts->hostnqn, sizeof(opts->hostnqn), "2014-08.org.nvmexpress:uuid:%s", host_id_str);
138 		}
139 	}
140 
141 	if (FIELD_OK(src_addr)) {
142 		memset(opts->src_addr, 0, sizeof(opts->src_addr));
143 	}
144 
145 	if (FIELD_OK(src_svcid)) {
146 		memset(opts->src_svcid, 0, sizeof(opts->src_svcid));
147 	}
148 
149 	if (FIELD_OK(command_set)) {
150 		opts->command_set = SPDK_NVME_CC_CSS_NVM;
151 	}
152 
153 	if (FIELD_OK(admin_timeout_ms)) {
154 		opts->admin_timeout_ms = NVME_MAX_TIMEOUT_PERIOD * 1000;
155 	}
156 
157 	if (FIELD_OK(header_digest)) {
158 		opts->header_digest = false;
159 	}
160 
161 	if (FIELD_OK(data_digest)) {
162 		opts->data_digest = false;
163 	}
164 #undef FIELD_OK
165 }
166 
167 /**
168  * This function will be called when the process allocates the IO qpair.
169  * Note: the ctrlr_lock must be held when calling this function.
170  */
171 static void
172 nvme_ctrlr_proc_add_io_qpair(struct spdk_nvme_qpair *qpair)
173 {
174 	struct spdk_nvme_ctrlr_process	*active_proc;
175 	struct spdk_nvme_ctrlr		*ctrlr = qpair->ctrlr;
176 
177 	active_proc = spdk_nvme_ctrlr_get_current_process(ctrlr);
178 	if (active_proc) {
179 		TAILQ_INSERT_TAIL(&active_proc->allocated_io_qpairs, qpair, per_process_tailq);
180 		qpair->active_proc = active_proc;
181 	}
182 }
183 
184 /**
185  * This function will be called when the process frees the IO qpair.
186  * Note: the ctrlr_lock must be held when calling this function.
187  */
188 static void
189 nvme_ctrlr_proc_remove_io_qpair(struct spdk_nvme_qpair *qpair)
190 {
191 	struct spdk_nvme_ctrlr_process	*active_proc;
192 	struct spdk_nvme_ctrlr		*ctrlr = qpair->ctrlr;
193 	struct spdk_nvme_qpair          *active_qpair, *tmp_qpair;
194 
195 	active_proc = spdk_nvme_ctrlr_get_current_process(ctrlr);
196 	if (!active_proc) {
197 		return;
198 	}
199 
200 	TAILQ_FOREACH_SAFE(active_qpair, &active_proc->allocated_io_qpairs,
201 			   per_process_tailq, tmp_qpair) {
202 		if (active_qpair == qpair) {
203 			TAILQ_REMOVE(&active_proc->allocated_io_qpairs,
204 				     active_qpair, per_process_tailq);
205 
206 			break;
207 		}
208 	}
209 }
210 
211 void
212 spdk_nvme_ctrlr_get_default_io_qpair_opts(struct spdk_nvme_ctrlr *ctrlr,
213 		struct spdk_nvme_io_qpair_opts *opts,
214 		size_t opts_size)
215 {
216 	assert(ctrlr);
217 
218 	assert(opts);
219 
220 	memset(opts, 0, opts_size);
221 
222 #define FIELD_OK(field) \
223 	offsetof(struct spdk_nvme_io_qpair_opts, field) + sizeof(opts->field) <= opts_size
224 
225 	if (FIELD_OK(qprio)) {
226 		opts->qprio = SPDK_NVME_QPRIO_URGENT;
227 	}
228 
229 	if (FIELD_OK(io_queue_size)) {
230 		opts->io_queue_size = ctrlr->opts.io_queue_size;
231 	}
232 
233 	if (FIELD_OK(io_queue_requests)) {
234 		opts->io_queue_requests = ctrlr->opts.io_queue_requests;
235 	}
236 
237 	if (FIELD_OK(delay_pcie_doorbell)) {
238 		opts->delay_pcie_doorbell = false;
239 	}
240 
241 #undef FIELD_OK
242 }
243 
244 struct spdk_nvme_qpair *
245 spdk_nvme_ctrlr_alloc_io_qpair(struct spdk_nvme_ctrlr *ctrlr,
246 			       const struct spdk_nvme_io_qpair_opts *user_opts,
247 			       size_t opts_size)
248 {
249 	uint32_t				qid;
250 	struct spdk_nvme_qpair			*qpair;
251 	union spdk_nvme_cc_register		cc;
252 	struct spdk_nvme_io_qpair_opts		opts;
253 
254 	if (!ctrlr) {
255 		return NULL;
256 	}
257 
258 	/*
259 	 * Get the default options, then overwrite them with the user-provided options
260 	 * up to opts_size.
261 	 *
262 	 * This allows for extensions of the opts structure without breaking
263 	 * ABI compatibility.
264 	 */
265 	spdk_nvme_ctrlr_get_default_io_qpair_opts(ctrlr, &opts, sizeof(opts));
266 	if (user_opts) {
267 		memcpy(&opts, user_opts, spdk_min(sizeof(opts), opts_size));
268 	}
269 
270 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
271 	if (nvme_ctrlr_get_cc(ctrlr, &cc)) {
272 		SPDK_ERRLOG("get_cc failed\n");
273 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
274 		return NULL;
275 	}
276 
277 	/* Only the low 2 bits (values 0, 1, 2, 3) of QPRIO are valid. */
278 	if ((opts.qprio & 3) != opts.qprio) {
279 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
280 		return NULL;
281 	}
282 
283 	/*
284 	 * Only value SPDK_NVME_QPRIO_URGENT(0) is valid for the
285 	 * default round robin arbitration method.
286 	 */
287 	if ((cc.bits.ams == SPDK_NVME_CC_AMS_RR) && (opts.qprio != SPDK_NVME_QPRIO_URGENT)) {
288 		SPDK_ERRLOG("invalid queue priority for default round robin arbitration method\n");
289 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
290 		return NULL;
291 	}
292 
293 	/*
294 	 * Get the first available I/O queue ID.
295 	 */
296 	qid = spdk_bit_array_find_first_set(ctrlr->free_io_qids, 1);
297 	if (qid > ctrlr->opts.num_io_queues) {
298 		SPDK_ERRLOG("No free I/O queue IDs\n");
299 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
300 		return NULL;
301 	}
302 
303 	qpair = nvme_transport_ctrlr_create_io_qpair(ctrlr, qid, &opts);
304 	if (qpair == NULL) {
305 		SPDK_ERRLOG("nvme_transport_ctrlr_create_io_qpair() failed\n");
306 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
307 		return NULL;
308 	}
309 	spdk_bit_array_clear(ctrlr->free_io_qids, qid);
310 	TAILQ_INSERT_TAIL(&ctrlr->active_io_qpairs, qpair, tailq);
311 
312 	nvme_ctrlr_proc_add_io_qpair(qpair);
313 
314 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
315 
316 	if (ctrlr->quirks & NVME_QUIRK_DELAY_AFTER_QUEUE_ALLOC) {
317 		spdk_delay_us(100);
318 	}
319 
320 	return qpair;
321 }
322 
323 int
324 spdk_nvme_ctrlr_free_io_qpair(struct spdk_nvme_qpair *qpair)
325 {
326 	struct spdk_nvme_ctrlr *ctrlr;
327 
328 	if (qpair == NULL) {
329 		return 0;
330 	}
331 
332 	ctrlr = qpair->ctrlr;
333 
334 	if (qpair->in_completion_context) {
335 		/*
336 		 * There are many cases where it is convenient to delete an io qpair in the context
337 		 *  of that qpair's completion routine.  To handle this properly, set a flag here
338 		 *  so that the completion routine will perform an actual delete after the context
339 		 *  unwinds.
340 		 */
341 		qpair->delete_after_completion_context = 1;
342 		return 0;
343 	}
344 
345 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
346 
347 	nvme_ctrlr_proc_remove_io_qpair(qpair);
348 
349 	TAILQ_REMOVE(&ctrlr->active_io_qpairs, qpair, tailq);
350 	spdk_bit_array_set(ctrlr->free_io_qids, qpair->id);
351 
352 	if (nvme_transport_ctrlr_delete_io_qpair(ctrlr, qpair)) {
353 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
354 		return -1;
355 	}
356 
357 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
358 	return 0;
359 }
360 
361 static void
362 nvme_ctrlr_construct_intel_support_log_page_list(struct spdk_nvme_ctrlr *ctrlr,
363 		struct spdk_nvme_intel_log_page_directory *log_page_directory)
364 {
365 	if (log_page_directory == NULL) {
366 		return;
367 	}
368 
369 	if (ctrlr->cdata.vid != SPDK_PCI_VID_INTEL) {
370 		return;
371 	}
372 
373 	ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_PAGE_DIRECTORY] = true;
374 
375 	if (log_page_directory->read_latency_log_len ||
376 	    (ctrlr->quirks & NVME_INTEL_QUIRK_READ_LATENCY)) {
377 		ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_READ_CMD_LATENCY] = true;
378 	}
379 	if (log_page_directory->write_latency_log_len ||
380 	    (ctrlr->quirks & NVME_INTEL_QUIRK_WRITE_LATENCY)) {
381 		ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_WRITE_CMD_LATENCY] = true;
382 	}
383 	if (log_page_directory->temperature_statistics_log_len) {
384 		ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_TEMPERATURE] = true;
385 	}
386 	if (log_page_directory->smart_log_len) {
387 		ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_SMART] = true;
388 	}
389 	if (log_page_directory->marketing_description_log_len) {
390 		ctrlr->log_page_supported[SPDK_NVME_INTEL_MARKETING_DESCRIPTION] = true;
391 	}
392 }
393 
394 static int nvme_ctrlr_set_intel_support_log_pages(struct spdk_nvme_ctrlr *ctrlr)
395 {
396 	int rc = 0;
397 	struct nvme_completion_poll_status	status;
398 	struct spdk_nvme_intel_log_page_directory *log_page_directory;
399 
400 	log_page_directory = spdk_zmalloc(sizeof(struct spdk_nvme_intel_log_page_directory),
401 					  64, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
402 	if (log_page_directory == NULL) {
403 		SPDK_ERRLOG("could not allocate log_page_directory\n");
404 		return -ENXIO;
405 	}
406 
407 	rc = spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_INTEL_LOG_PAGE_DIRECTORY,
408 					      SPDK_NVME_GLOBAL_NS_TAG, log_page_directory,
409 					      sizeof(struct spdk_nvme_intel_log_page_directory),
410 					      0, nvme_completion_poll_cb, &status);
411 	if (rc != 0) {
412 		spdk_free(log_page_directory);
413 		return rc;
414 	}
415 
416 	if (spdk_nvme_wait_for_completion_timeout(ctrlr->adminq, &status,
417 			ctrlr->opts.admin_timeout_ms / 1000)) {
418 		spdk_free(log_page_directory);
419 		SPDK_WARNLOG("Intel log pages not supported on Intel drive!\n");
420 		return 0;
421 	}
422 
423 	nvme_ctrlr_construct_intel_support_log_page_list(ctrlr, log_page_directory);
424 	spdk_free(log_page_directory);
425 	return 0;
426 }
427 
428 static int
429 nvme_ctrlr_set_supported_log_pages(struct spdk_nvme_ctrlr *ctrlr)
430 {
431 	int	rc = 0;
432 
433 	memset(ctrlr->log_page_supported, 0, sizeof(ctrlr->log_page_supported));
434 	/* Mandatory pages */
435 	ctrlr->log_page_supported[SPDK_NVME_LOG_ERROR] = true;
436 	ctrlr->log_page_supported[SPDK_NVME_LOG_HEALTH_INFORMATION] = true;
437 	ctrlr->log_page_supported[SPDK_NVME_LOG_FIRMWARE_SLOT] = true;
438 	if (ctrlr->cdata.lpa.celp) {
439 		ctrlr->log_page_supported[SPDK_NVME_LOG_COMMAND_EFFECTS_LOG] = true;
440 	}
441 	if (ctrlr->cdata.vid == SPDK_PCI_VID_INTEL && !(ctrlr->quirks & NVME_INTEL_QUIRK_NO_LOG_PAGES)) {
442 		rc = nvme_ctrlr_set_intel_support_log_pages(ctrlr);
443 	}
444 
445 	return rc;
446 }
447 
448 static void
449 nvme_ctrlr_set_intel_supported_features(struct spdk_nvme_ctrlr *ctrlr)
450 {
451 	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_MAX_LBA] = true;
452 	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_NATIVE_MAX_LBA] = true;
453 	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_POWER_GOVERNOR_SETTING] = true;
454 	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_SMBUS_ADDRESS] = true;
455 	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_LED_PATTERN] = true;
456 	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_RESET_TIMED_WORKLOAD_COUNTERS] = true;
457 	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING] = true;
458 }
459 
460 static void
461 nvme_ctrlr_set_supported_features(struct spdk_nvme_ctrlr *ctrlr)
462 {
463 	memset(ctrlr->feature_supported, 0, sizeof(ctrlr->feature_supported));
464 	/* Mandatory features */
465 	ctrlr->feature_supported[SPDK_NVME_FEAT_ARBITRATION] = true;
466 	ctrlr->feature_supported[SPDK_NVME_FEAT_POWER_MANAGEMENT] = true;
467 	ctrlr->feature_supported[SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD] = true;
468 	ctrlr->feature_supported[SPDK_NVME_FEAT_ERROR_RECOVERY] = true;
469 	ctrlr->feature_supported[SPDK_NVME_FEAT_NUMBER_OF_QUEUES] = true;
470 	ctrlr->feature_supported[SPDK_NVME_FEAT_INTERRUPT_COALESCING] = true;
471 	ctrlr->feature_supported[SPDK_NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION] = true;
472 	ctrlr->feature_supported[SPDK_NVME_FEAT_WRITE_ATOMICITY] = true;
473 	ctrlr->feature_supported[SPDK_NVME_FEAT_ASYNC_EVENT_CONFIGURATION] = true;
474 	/* Optional features */
475 	if (ctrlr->cdata.vwc.present) {
476 		ctrlr->feature_supported[SPDK_NVME_FEAT_VOLATILE_WRITE_CACHE] = true;
477 	}
478 	if (ctrlr->cdata.apsta.supported) {
479 		ctrlr->feature_supported[SPDK_NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION] = true;
480 	}
481 	if (ctrlr->cdata.hmpre) {
482 		ctrlr->feature_supported[SPDK_NVME_FEAT_HOST_MEM_BUFFER] = true;
483 	}
484 	if (ctrlr->cdata.vid == SPDK_PCI_VID_INTEL) {
485 		nvme_ctrlr_set_intel_supported_features(ctrlr);
486 	}
487 }
488 
489 void
490 nvme_ctrlr_fail(struct spdk_nvme_ctrlr *ctrlr, bool hot_remove)
491 {
492 	/*
493 	 * Set the flag here and leave the work failure of qpairs to
494 	 * spdk_nvme_qpair_process_completions().
495 	 */
496 	if (hot_remove) {
497 		ctrlr->is_removed = true;
498 	}
499 	ctrlr->is_failed = true;
500 	SPDK_ERRLOG("ctrlr %s in failed state.\n", ctrlr->trid.traddr);
501 }
502 
503 static void
504 nvme_ctrlr_shutdown(struct spdk_nvme_ctrlr *ctrlr)
505 {
506 	union spdk_nvme_cc_register	cc;
507 	union spdk_nvme_csts_register	csts;
508 	uint32_t			ms_waited = 0;
509 	uint32_t			shutdown_timeout_ms;
510 
511 	if (ctrlr->is_removed) {
512 		return;
513 	}
514 
515 	if (nvme_ctrlr_get_cc(ctrlr, &cc)) {
516 		SPDK_ERRLOG("get_cc() failed\n");
517 		return;
518 	}
519 
520 	cc.bits.shn = SPDK_NVME_SHN_NORMAL;
521 
522 	if (nvme_ctrlr_set_cc(ctrlr, &cc)) {
523 		SPDK_ERRLOG("set_cc() failed\n");
524 		return;
525 	}
526 
527 	/*
528 	 * The NVMe specification defines RTD3E to be the time between
529 	 *  setting SHN = 1 until the controller will set SHST = 10b.
530 	 * If the device doesn't report RTD3 entry latency, or if it
531 	 *  reports RTD3 entry latency less than 10 seconds, pick
532 	 *  10 seconds as a reasonable amount of time to
533 	 *  wait before proceeding.
534 	 */
535 	SPDK_DEBUGLOG(SPDK_LOG_NVME, "RTD3E = %" PRIu32 " us\n", ctrlr->cdata.rtd3e);
536 	shutdown_timeout_ms = (ctrlr->cdata.rtd3e + 999) / 1000;
537 	shutdown_timeout_ms = spdk_max(shutdown_timeout_ms, 10000);
538 	SPDK_DEBUGLOG(SPDK_LOG_NVME, "shutdown timeout = %" PRIu32 " ms\n", shutdown_timeout_ms);
539 
540 	do {
541 		if (nvme_ctrlr_get_csts(ctrlr, &csts)) {
542 			SPDK_ERRLOG("get_csts() failed\n");
543 			return;
544 		}
545 
546 		if (csts.bits.shst == SPDK_NVME_SHST_COMPLETE) {
547 			SPDK_DEBUGLOG(SPDK_LOG_NVME, "shutdown complete in %u milliseconds\n",
548 				      ms_waited);
549 			ctrlr->is_shutdown = true;
550 			return;
551 		}
552 
553 		nvme_delay(1000);
554 		ms_waited++;
555 	} while (ms_waited < shutdown_timeout_ms);
556 
557 	SPDK_ERRLOG("did not shutdown within %u milliseconds\n", shutdown_timeout_ms);
558 	if (ctrlr->quirks & NVME_QUIRK_SHST_COMPLETE) {
559 		SPDK_ERRLOG("likely due to shutdown handling in the VMWare emulated NVMe SSD\n");
560 	}
561 }
562 
563 static int
564 nvme_ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr)
565 {
566 	union spdk_nvme_cc_register	cc;
567 	int				rc;
568 
569 	rc = nvme_transport_ctrlr_enable(ctrlr);
570 	if (rc != 0) {
571 		SPDK_ERRLOG("transport ctrlr_enable failed\n");
572 		return rc;
573 	}
574 
575 	if (nvme_ctrlr_get_cc(ctrlr, &cc)) {
576 		SPDK_ERRLOG("get_cc() failed\n");
577 		return -EIO;
578 	}
579 
580 	if (cc.bits.en != 0) {
581 		SPDK_ERRLOG("%s called with CC.EN = 1\n", __func__);
582 		return -EINVAL;
583 	}
584 
585 	cc.bits.en = 1;
586 	cc.bits.css = 0;
587 	cc.bits.shn = 0;
588 	cc.bits.iosqes = 6; /* SQ entry size == 64 == 2^6 */
589 	cc.bits.iocqes = 4; /* CQ entry size == 16 == 2^4 */
590 
591 	/* Page size is 2 ^ (12 + mps). */
592 	cc.bits.mps = spdk_u32log2(ctrlr->page_size) - 12;
593 
594 	if (ctrlr->cap.bits.css == 0) {
595 		SPDK_INFOLOG(SPDK_LOG_NVME,
596 			     "Drive reports no command sets supported. Assuming NVM is supported.\n");
597 		ctrlr->cap.bits.css = SPDK_NVME_CAP_CSS_NVM;
598 	}
599 
600 	if (!(ctrlr->cap.bits.css & (1u << ctrlr->opts.command_set))) {
601 		SPDK_DEBUGLOG(SPDK_LOG_NVME, "Requested I/O command set %u but supported mask is 0x%x\n",
602 			      ctrlr->opts.command_set, ctrlr->cap.bits.css);
603 		return -EINVAL;
604 	}
605 
606 	cc.bits.css = ctrlr->opts.command_set;
607 
608 	switch (ctrlr->opts.arb_mechanism) {
609 	case SPDK_NVME_CC_AMS_RR:
610 		break;
611 	case SPDK_NVME_CC_AMS_WRR:
612 		if (SPDK_NVME_CAP_AMS_WRR & ctrlr->cap.bits.ams) {
613 			break;
614 		}
615 		return -EINVAL;
616 	case SPDK_NVME_CC_AMS_VS:
617 		if (SPDK_NVME_CAP_AMS_VS & ctrlr->cap.bits.ams) {
618 			break;
619 		}
620 		return -EINVAL;
621 	default:
622 		return -EINVAL;
623 	}
624 
625 	cc.bits.ams = ctrlr->opts.arb_mechanism;
626 
627 	if (nvme_ctrlr_set_cc(ctrlr, &cc)) {
628 		SPDK_ERRLOG("set_cc() failed\n");
629 		return -EIO;
630 	}
631 
632 	return 0;
633 }
634 
635 #ifdef DEBUG
636 static const char *
637 nvme_ctrlr_state_string(enum nvme_ctrlr_state state)
638 {
639 	switch (state) {
640 	case NVME_CTRLR_STATE_INIT_DELAY:
641 		return "delay init";
642 	case NVME_CTRLR_STATE_INIT:
643 		return "init";
644 	case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1:
645 		return "disable and wait for CSTS.RDY = 1";
646 	case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0:
647 		return "disable and wait for CSTS.RDY = 0";
648 	case NVME_CTRLR_STATE_ENABLE:
649 		return "enable controller by writing CC.EN = 1";
650 	case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1:
651 		return "wait for CSTS.RDY = 1";
652 	case NVME_CTRLR_STATE_ENABLE_ADMIN_QUEUE:
653 		return "enable admin queue";
654 	case NVME_CTRLR_STATE_IDENTIFY:
655 		return "identify controller";
656 	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY:
657 		return "wait for identify controller";
658 	case NVME_CTRLR_STATE_SET_NUM_QUEUES:
659 		return "set number of queues";
660 	case NVME_CTRLR_STATE_WAIT_FOR_SET_NUM_QUEUES:
661 		return "wait for set number of queues";
662 	case NVME_CTRLR_STATE_GET_NUM_QUEUES:
663 		return "get number of queues";
664 	case NVME_CTRLR_STATE_WAIT_FOR_GET_NUM_QUEUES:
665 		return "wait for get number of queues";
666 	case NVME_CTRLR_STATE_CONSTRUCT_NS:
667 		return "construct namespaces";
668 	case NVME_CTRLR_STATE_IDENTIFY_ACTIVE_NS:
669 		return "identify active ns";
670 	case NVME_CTRLR_STATE_IDENTIFY_NS:
671 		return "identify ns";
672 	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS:
673 		return "wait for identify ns";
674 	case NVME_CTRLR_STATE_IDENTIFY_ID_DESCS:
675 		return "identify namespace id descriptors";
676 	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ID_DESCS:
677 		return "wait for identify namespace id descriptors";
678 	case NVME_CTRLR_STATE_CONFIGURE_AER:
679 		return "configure AER";
680 	case NVME_CTRLR_STATE_WAIT_FOR_CONFIGURE_AER:
681 		return "wait for configure aer";
682 	case NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES:
683 		return "set supported log pages";
684 	case NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES:
685 		return "set supported features";
686 	case NVME_CTRLR_STATE_SET_DB_BUF_CFG:
687 		return "set doorbell buffer config";
688 	case NVME_CTRLR_STATE_WAIT_FOR_DB_BUF_CFG:
689 		return "wait for doorbell buffer config";
690 	case NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT:
691 		return "set keep alive timeout";
692 	case NVME_CTRLR_STATE_WAIT_FOR_KEEP_ALIVE_TIMEOUT:
693 		return "wait for set keep alive timeout";
694 	case NVME_CTRLR_STATE_SET_HOST_ID:
695 		return "set host ID";
696 	case NVME_CTRLR_STATE_WAIT_FOR_HOST_ID:
697 		return "wait for set host ID";
698 	case NVME_CTRLR_STATE_READY:
699 		return "ready";
700 	case NVME_CTRLR_STATE_ERROR:
701 		return "error";
702 	}
703 	return "unknown";
704 };
705 #endif /* DEBUG */
706 
707 static void
708 nvme_ctrlr_set_state(struct spdk_nvme_ctrlr *ctrlr, enum nvme_ctrlr_state state,
709 		     uint64_t timeout_in_ms)
710 {
711 	ctrlr->state = state;
712 	if (timeout_in_ms == 0) {
713 		SPDK_DEBUGLOG(SPDK_LOG_NVME, "setting state to %s (no timeout)\n",
714 			      nvme_ctrlr_state_string(ctrlr->state));
715 		ctrlr->state_timeout_tsc = NVME_TIMEOUT_INFINITE;
716 	} else {
717 		SPDK_DEBUGLOG(SPDK_LOG_NVME, "setting state to %s (timeout %" PRIu64 " ms)\n",
718 			      nvme_ctrlr_state_string(ctrlr->state), timeout_in_ms);
719 		ctrlr->state_timeout_tsc = spdk_get_ticks() + (timeout_in_ms * spdk_get_ticks_hz()) / 1000;
720 	}
721 }
722 
723 static void
724 nvme_ctrlr_free_doorbell_buffer(struct spdk_nvme_ctrlr *ctrlr)
725 {
726 	if (ctrlr->shadow_doorbell) {
727 		spdk_dma_free(ctrlr->shadow_doorbell);
728 		ctrlr->shadow_doorbell = NULL;
729 	}
730 
731 	if (ctrlr->eventidx) {
732 		spdk_dma_free(ctrlr->eventidx);
733 		ctrlr->eventidx = NULL;
734 	}
735 }
736 
737 static void
738 nvme_ctrlr_set_doorbell_buffer_config_done(void *arg, const struct spdk_nvme_cpl *cpl)
739 {
740 	struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg;
741 
742 	if (spdk_nvme_cpl_is_error(cpl)) {
743 		SPDK_WARNLOG("Doorbell buffer config failed\n");
744 	} else {
745 		SPDK_INFOLOG(SPDK_LOG_NVME, "NVMe controller: %s doorbell buffer config enabled\n",
746 			     ctrlr->trid.traddr);
747 	}
748 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT,
749 			     ctrlr->opts.admin_timeout_ms);
750 }
751 
752 static int
753 nvme_ctrlr_set_doorbell_buffer_config(struct spdk_nvme_ctrlr *ctrlr)
754 {
755 	int rc = 0;
756 	uint64_t prp1, prp2, len;
757 
758 	if (!ctrlr->cdata.oacs.doorbell_buffer_config) {
759 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT,
760 				     ctrlr->opts.admin_timeout_ms);
761 		return 0;
762 	}
763 
764 	if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) {
765 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT,
766 				     ctrlr->opts.admin_timeout_ms);
767 		return 0;
768 	}
769 
770 	/* only 1 page size for doorbell buffer */
771 	ctrlr->shadow_doorbell = spdk_dma_zmalloc(ctrlr->page_size, ctrlr->page_size,
772 				 NULL);
773 	if (ctrlr->shadow_doorbell == NULL) {
774 		rc = -ENOMEM;
775 		goto error;
776 	}
777 
778 	len = ctrlr->page_size;
779 	prp1 = spdk_vtophys(ctrlr->shadow_doorbell, &len);
780 	if (prp1 == SPDK_VTOPHYS_ERROR || len != ctrlr->page_size) {
781 		rc = -EFAULT;
782 		goto error;
783 	}
784 
785 	ctrlr->eventidx = spdk_dma_zmalloc(ctrlr->page_size, ctrlr->page_size, NULL);
786 	if (ctrlr->eventidx == NULL) {
787 		rc = -ENOMEM;
788 		goto error;
789 	}
790 
791 	len = ctrlr->page_size;
792 	prp2 = spdk_vtophys(ctrlr->eventidx, &len);
793 	if (prp2 == SPDK_VTOPHYS_ERROR || len != ctrlr->page_size) {
794 		rc = -EFAULT;
795 		goto error;
796 	}
797 
798 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_DB_BUF_CFG,
799 			     ctrlr->opts.admin_timeout_ms);
800 
801 	rc = nvme_ctrlr_cmd_doorbell_buffer_config(ctrlr, prp1, prp2,
802 			nvme_ctrlr_set_doorbell_buffer_config_done, ctrlr);
803 	if (rc != 0) {
804 		goto error;
805 	}
806 
807 	return 0;
808 
809 error:
810 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
811 	nvme_ctrlr_free_doorbell_buffer(ctrlr);
812 	return rc;
813 }
814 
815 int
816 spdk_nvme_ctrlr_reset(struct spdk_nvme_ctrlr *ctrlr)
817 {
818 	int rc = 0;
819 	struct spdk_nvme_qpair	*qpair;
820 	struct nvme_request	*req, *tmp;
821 
822 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
823 
824 	if (ctrlr->is_resetting || ctrlr->is_failed) {
825 		/*
826 		 * Controller is already resetting or has failed.  Return
827 		 *  immediately since there is no need to kick off another
828 		 *  reset in these cases.
829 		 */
830 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
831 		return 0;
832 	}
833 
834 	ctrlr->is_resetting = true;
835 
836 	SPDK_NOTICELOG("resetting controller\n");
837 
838 	/* Free all of the queued abort requests */
839 	STAILQ_FOREACH_SAFE(req, &ctrlr->queued_aborts, stailq, tmp) {
840 		STAILQ_REMOVE_HEAD(&ctrlr->queued_aborts, stailq);
841 		nvme_free_request(req);
842 		ctrlr->outstanding_aborts--;
843 	}
844 
845 	/* Disable all queues before disabling the controller hardware. */
846 	nvme_qpair_disable(ctrlr->adminq);
847 	TAILQ_FOREACH(qpair, &ctrlr->active_io_qpairs, tailq) {
848 		nvme_qpair_disable(qpair);
849 	}
850 
851 	/* Doorbell buffer config is invalid during reset */
852 	nvme_ctrlr_free_doorbell_buffer(ctrlr);
853 
854 	/* Set the state back to INIT to cause a full hardware reset. */
855 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, NVME_TIMEOUT_INFINITE);
856 
857 	while (ctrlr->state != NVME_CTRLR_STATE_READY) {
858 		if (nvme_ctrlr_process_init(ctrlr) != 0) {
859 			SPDK_ERRLOG("%s: controller reinitialization failed\n", __func__);
860 			nvme_ctrlr_fail(ctrlr, false);
861 			rc = -1;
862 			break;
863 		}
864 	}
865 
866 	if (!ctrlr->is_failed) {
867 		/* Reinitialize qpairs */
868 		TAILQ_FOREACH(qpair, &ctrlr->active_io_qpairs, tailq) {
869 			if (nvme_transport_ctrlr_reinit_io_qpair(ctrlr, qpair) != 0) {
870 				nvme_ctrlr_fail(ctrlr, false);
871 				rc = -1;
872 			}
873 		}
874 	}
875 
876 	ctrlr->is_resetting = false;
877 
878 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
879 
880 	return rc;
881 }
882 
883 static void
884 nvme_ctrlr_identify_done(void *arg, const struct spdk_nvme_cpl *cpl)
885 {
886 	struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg;
887 
888 	if (spdk_nvme_cpl_is_error(cpl)) {
889 		SPDK_ERRLOG("nvme_identify_controller failed!\n");
890 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
891 		return;
892 	}
893 
894 	/*
895 	 * Use MDTS to ensure our default max_xfer_size doesn't exceed what the
896 	 *  controller supports.
897 	 */
898 	ctrlr->max_xfer_size = nvme_transport_ctrlr_get_max_xfer_size(ctrlr);
899 	SPDK_DEBUGLOG(SPDK_LOG_NVME, "transport max_xfer_size %u\n", ctrlr->max_xfer_size);
900 	if (ctrlr->cdata.mdts > 0) {
901 		ctrlr->max_xfer_size = spdk_min(ctrlr->max_xfer_size,
902 						ctrlr->min_page_size * (1 << (ctrlr->cdata.mdts)));
903 		SPDK_DEBUGLOG(SPDK_LOG_NVME, "MDTS max_xfer_size %u\n", ctrlr->max_xfer_size);
904 	}
905 
906 	SPDK_DEBUGLOG(SPDK_LOG_NVME, "CNTLID 0x%04" PRIx16 "\n", ctrlr->cdata.cntlid);
907 	if (ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
908 		ctrlr->cntlid = ctrlr->cdata.cntlid;
909 	} else {
910 		/*
911 		 * Fabrics controllers should already have CNTLID from the Connect command.
912 		 *
913 		 * If CNTLID from Connect doesn't match CNTLID in the Identify Controller data,
914 		 * trust the one from Connect.
915 		 */
916 		if (ctrlr->cntlid != ctrlr->cdata.cntlid) {
917 			SPDK_DEBUGLOG(SPDK_LOG_NVME,
918 				      "Identify CNTLID 0x%04" PRIx16 " != Connect CNTLID 0x%04" PRIx16 "\n",
919 				      ctrlr->cdata.cntlid, ctrlr->cntlid);
920 		}
921 	}
922 
923 	if (ctrlr->cdata.sgls.supported) {
924 		ctrlr->flags |= SPDK_NVME_CTRLR_SGL_SUPPORTED;
925 		ctrlr->max_sges = nvme_transport_ctrlr_get_max_sges(ctrlr);
926 	}
927 
928 	if (ctrlr->cdata.oacs.security) {
929 		ctrlr->flags |= SPDK_NVME_CTRLR_SECURITY_SEND_RECV_SUPPORTED;
930 	}
931 
932 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_NUM_QUEUES,
933 			     ctrlr->opts.admin_timeout_ms);
934 }
935 
936 static int
937 nvme_ctrlr_identify(struct spdk_nvme_ctrlr *ctrlr)
938 {
939 	int	rc;
940 
941 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY,
942 			     ctrlr->opts.admin_timeout_ms);
943 
944 	rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_CTRLR, 0, 0,
945 				     &ctrlr->cdata, sizeof(ctrlr->cdata),
946 				     nvme_ctrlr_identify_done, ctrlr);
947 	if (rc != 0) {
948 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
949 		return rc;
950 	}
951 
952 	return 0;
953 }
954 
955 int
956 nvme_ctrlr_identify_active_ns(struct spdk_nvme_ctrlr *ctrlr)
957 {
958 	struct nvme_completion_poll_status	status;
959 	int					rc;
960 	uint32_t				i;
961 	uint32_t				num_pages;
962 	uint32_t				next_nsid = 0;
963 	uint32_t				*new_ns_list = NULL;
964 
965 	if (ctrlr->num_ns == 0) {
966 		spdk_dma_free(ctrlr->active_ns_list);
967 		ctrlr->active_ns_list = NULL;
968 
969 		return 0;
970 	}
971 
972 	/*
973 	 * The allocated size must be a multiple of sizeof(struct spdk_nvme_ns_list)
974 	 */
975 	num_pages = (ctrlr->num_ns * sizeof(new_ns_list[0]) - 1) / sizeof(struct spdk_nvme_ns_list) + 1;
976 	new_ns_list = spdk_dma_zmalloc(num_pages * sizeof(struct spdk_nvme_ns_list), ctrlr->page_size,
977 				       NULL);
978 	if (!new_ns_list) {
979 		SPDK_ERRLOG("Failed to allocate active_ns_list!\n");
980 		return -ENOMEM;
981 	}
982 
983 	if (ctrlr->vs.raw >= SPDK_NVME_VERSION(1, 1, 0) && !(ctrlr->quirks & NVME_QUIRK_IDENTIFY_CNS)) {
984 		/*
985 		 * Iterate through the pages and fetch each chunk of 1024 namespaces until
986 		 * there are no more active namespaces
987 		 */
988 		for (i = 0; i < num_pages; i++) {
989 			rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_ACTIVE_NS_LIST, 0, next_nsid,
990 						     &new_ns_list[1024 * i], sizeof(struct spdk_nvme_ns_list),
991 						     nvme_completion_poll_cb, &status);
992 			if (rc != 0) {
993 				goto fail;
994 			}
995 			if (spdk_nvme_wait_for_completion(ctrlr->adminq, &status)) {
996 				SPDK_ERRLOG("nvme_ctrlr_cmd_identify_active_ns_list failed!\n");
997 				rc = -ENXIO;
998 				goto fail;
999 			}
1000 			next_nsid = new_ns_list[1024 * i + 1023];
1001 			if (next_nsid == 0) {
1002 				/*
1003 				 * No more active namespaces found, no need to fetch additional chunks
1004 				 */
1005 				break;
1006 			}
1007 		}
1008 
1009 	} else {
1010 		/*
1011 		 * Controller doesn't support active ns list CNS 0x02 so dummy up
1012 		 * an active ns list
1013 		 */
1014 		for (i = 0; i < ctrlr->num_ns; i++) {
1015 			new_ns_list[i] = i + 1;
1016 		}
1017 	}
1018 
1019 	/*
1020 	 * Now that that the list is properly setup, we can swap it in to the ctrlr and
1021 	 * free up the previous one.
1022 	 */
1023 	spdk_dma_free(ctrlr->active_ns_list);
1024 	ctrlr->active_ns_list = new_ns_list;
1025 
1026 	return 0;
1027 fail:
1028 	spdk_dma_free(new_ns_list);
1029 	return rc;
1030 }
1031 
1032 static void
1033 nvme_ctrlr_identify_ns_async_done(void *arg, const struct spdk_nvme_cpl *cpl)
1034 {
1035 	struct spdk_nvme_ns *ns = (struct spdk_nvme_ns *)arg;
1036 	struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr;
1037 	uint32_t nsid;
1038 	int rc;
1039 
1040 	if (spdk_nvme_cpl_is_error(cpl)) {
1041 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
1042 		return;
1043 	} else {
1044 		nvme_ns_set_identify_data(ns);
1045 	}
1046 
1047 	/* move on to the next active NS */
1048 	nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, ns->id);
1049 	ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
1050 	if (ns == NULL) {
1051 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_ID_DESCS,
1052 				     ctrlr->opts.admin_timeout_ms);
1053 		return;
1054 	}
1055 	ns->ctrlr = ctrlr;
1056 	ns->id = nsid;
1057 
1058 	rc = nvme_ctrlr_identify_ns_async(ns);
1059 	if (rc) {
1060 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
1061 	}
1062 }
1063 
1064 static int
1065 nvme_ctrlr_identify_ns_async(struct spdk_nvme_ns *ns)
1066 {
1067 	struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr;
1068 	struct spdk_nvme_ns_data *nsdata;
1069 
1070 	nsdata = &ctrlr->nsdata[ns->id - 1];
1071 
1072 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS,
1073 			     ctrlr->opts.admin_timeout_ms);
1074 	return nvme_ctrlr_cmd_identify(ns->ctrlr, SPDK_NVME_IDENTIFY_NS, 0, ns->id,
1075 				       nsdata, sizeof(*nsdata),
1076 				       nvme_ctrlr_identify_ns_async_done, ns);
1077 }
1078 
1079 static int
1080 nvme_ctrlr_identify_namespaces(struct spdk_nvme_ctrlr *ctrlr)
1081 {
1082 	uint32_t nsid;
1083 	struct spdk_nvme_ns *ns;
1084 	int rc;
1085 
1086 	nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr);
1087 	ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
1088 	if (ns == NULL) {
1089 		/* No active NS, move on to the next state */
1090 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CONFIGURE_AER,
1091 				     ctrlr->opts.admin_timeout_ms);
1092 		return 0;
1093 	}
1094 
1095 	ns->ctrlr = ctrlr;
1096 	ns->id = nsid;
1097 
1098 	rc = nvme_ctrlr_identify_ns_async(ns);
1099 	if (rc) {
1100 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
1101 	}
1102 
1103 	return rc;
1104 }
1105 
1106 static void
1107 nvme_ctrlr_identify_id_desc_async_done(void *arg, const struct spdk_nvme_cpl *cpl)
1108 {
1109 	struct spdk_nvme_ns *ns = (struct spdk_nvme_ns *)arg;
1110 	struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr;
1111 	uint32_t nsid;
1112 	int rc;
1113 
1114 	if (spdk_nvme_cpl_is_error(cpl)) {
1115 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CONFIGURE_AER,
1116 				     ctrlr->opts.admin_timeout_ms);
1117 		return;
1118 	}
1119 
1120 	/* move on to the next active NS */
1121 	nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, ns->id);
1122 	ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
1123 	if (ns == NULL) {
1124 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CONFIGURE_AER,
1125 				     ctrlr->opts.admin_timeout_ms);
1126 		return;
1127 	}
1128 
1129 	rc = nvme_ctrlr_identify_id_desc_async(ns);
1130 	if (rc) {
1131 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
1132 	}
1133 }
1134 
1135 static int
1136 nvme_ctrlr_identify_id_desc_async(struct spdk_nvme_ns *ns)
1137 {
1138 	struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr;
1139 
1140 	memset(ns->id_desc_list, 0, sizeof(ns->id_desc_list));
1141 
1142 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ID_DESCS,
1143 			     ctrlr->opts.admin_timeout_ms);
1144 	return nvme_ctrlr_cmd_identify(ns->ctrlr, SPDK_NVME_IDENTIFY_NS_ID_DESCRIPTOR_LIST,
1145 				       0, ns->id, ns->id_desc_list, sizeof(ns->id_desc_list),
1146 				       nvme_ctrlr_identify_id_desc_async_done, ns);
1147 }
1148 
1149 static int
1150 nvme_ctrlr_identify_id_desc_namespaces(struct spdk_nvme_ctrlr *ctrlr)
1151 {
1152 	uint32_t nsid;
1153 	struct spdk_nvme_ns *ns;
1154 	int rc;
1155 
1156 	if (ctrlr->vs.raw < SPDK_NVME_VERSION(1, 3, 0) ||
1157 	    (ctrlr->quirks & NVME_QUIRK_IDENTIFY_CNS)) {
1158 		SPDK_DEBUGLOG(SPDK_LOG_NVME, "Version < 1.3; not attempting to retrieve NS ID Descriptor List\n");
1159 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CONFIGURE_AER,
1160 				     ctrlr->opts.admin_timeout_ms);
1161 		return 0;
1162 	}
1163 
1164 	nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr);
1165 	ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
1166 	if (ns == NULL) {
1167 		/* No active NS, move on to the next state */
1168 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CONFIGURE_AER,
1169 				     ctrlr->opts.admin_timeout_ms);
1170 		return 0;
1171 	}
1172 
1173 	rc = nvme_ctrlr_identify_id_desc_async(ns);
1174 	if (rc) {
1175 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
1176 	}
1177 
1178 	return rc;
1179 }
1180 
1181 static void
1182 nvme_ctrlr_set_num_queues_done(void *arg, const struct spdk_nvme_cpl *cpl)
1183 {
1184 	struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg;
1185 
1186 	if (spdk_nvme_cpl_is_error(cpl)) {
1187 		SPDK_ERRLOG("Set Features - Number of Queues failed!\n");
1188 	}
1189 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_GET_NUM_QUEUES,
1190 			     ctrlr->opts.admin_timeout_ms);
1191 }
1192 
1193 static int
1194 nvme_ctrlr_set_num_queues(struct spdk_nvme_ctrlr *ctrlr)
1195 {
1196 	int rc;
1197 
1198 	if (ctrlr->opts.num_io_queues > SPDK_NVME_MAX_IO_QUEUES) {
1199 		SPDK_NOTICELOG("Limiting requested num_io_queues %u to max %d\n",
1200 			       ctrlr->opts.num_io_queues, SPDK_NVME_MAX_IO_QUEUES);
1201 		ctrlr->opts.num_io_queues = SPDK_NVME_MAX_IO_QUEUES;
1202 	} else if (ctrlr->opts.num_io_queues < 1) {
1203 		SPDK_NOTICELOG("Requested num_io_queues 0, increasing to 1\n");
1204 		ctrlr->opts.num_io_queues = 1;
1205 	}
1206 
1207 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_SET_NUM_QUEUES,
1208 			     ctrlr->opts.admin_timeout_ms);
1209 
1210 	rc = nvme_ctrlr_cmd_set_num_queues(ctrlr, ctrlr->opts.num_io_queues,
1211 					   nvme_ctrlr_set_num_queues_done, ctrlr);
1212 	if (rc != 0) {
1213 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
1214 		return rc;
1215 	}
1216 
1217 	return 0;
1218 }
1219 
1220 static void
1221 nvme_ctrlr_get_num_queues_done(void *arg, const struct spdk_nvme_cpl *cpl)
1222 {
1223 	uint32_t cq_allocated, sq_allocated, min_allocated, i;
1224 	struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg;
1225 
1226 	if (spdk_nvme_cpl_is_error(cpl)) {
1227 		SPDK_ERRLOG("Get Features - Number of Queues failed!\n");
1228 		ctrlr->opts.num_io_queues = 0;
1229 	} else {
1230 		/*
1231 		 * Data in cdw0 is 0-based.
1232 		 * Lower 16-bits indicate number of submission queues allocated.
1233 		 * Upper 16-bits indicate number of completion queues allocated.
1234 		 */
1235 		sq_allocated = (cpl->cdw0 & 0xFFFF) + 1;
1236 		cq_allocated = (cpl->cdw0 >> 16) + 1;
1237 
1238 		/*
1239 		 * For 1:1 queue mapping, set number of allocated queues to be minimum of
1240 		 * submission and completion queues.
1241 		 */
1242 		min_allocated = spdk_min(sq_allocated, cq_allocated);
1243 
1244 		/* Set number of queues to be minimum of requested and actually allocated. */
1245 		ctrlr->opts.num_io_queues = spdk_min(min_allocated, ctrlr->opts.num_io_queues);
1246 	}
1247 
1248 	ctrlr->free_io_qids = spdk_bit_array_create(ctrlr->opts.num_io_queues + 1);
1249 	if (ctrlr->free_io_qids == NULL) {
1250 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
1251 		return;
1252 	}
1253 
1254 	/* Initialize list of free I/O queue IDs. QID 0 is the admin queue. */
1255 	spdk_bit_array_clear(ctrlr->free_io_qids, 0);
1256 	for (i = 1; i <= ctrlr->opts.num_io_queues; i++) {
1257 		spdk_bit_array_set(ctrlr->free_io_qids, i);
1258 	}
1259 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CONSTRUCT_NS,
1260 			     ctrlr->opts.admin_timeout_ms);
1261 }
1262 
1263 static int
1264 nvme_ctrlr_get_num_queues(struct spdk_nvme_ctrlr *ctrlr)
1265 {
1266 	int rc;
1267 
1268 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_GET_NUM_QUEUES,
1269 			     ctrlr->opts.admin_timeout_ms);
1270 
1271 	/* Obtain the number of queues allocated using Get Features. */
1272 	rc = nvme_ctrlr_cmd_get_num_queues(ctrlr, nvme_ctrlr_get_num_queues_done, ctrlr);
1273 	if (rc != 0) {
1274 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
1275 		return rc;
1276 	}
1277 
1278 	return 0;
1279 }
1280 
1281 static void
1282 nvme_ctrlr_set_keep_alive_timeout_done(void *arg, const struct spdk_nvme_cpl *cpl)
1283 {
1284 	uint32_t keep_alive_interval_ms;
1285 	struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg;
1286 
1287 	if (spdk_nvme_cpl_is_error(cpl)) {
1288 		SPDK_ERRLOG("Keep alive timeout Get Feature failed: SC %x SCT %x\n",
1289 			    cpl->status.sc, cpl->status.sct);
1290 		ctrlr->opts.keep_alive_timeout_ms = 0;
1291 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
1292 		return;
1293 	}
1294 
1295 	if (ctrlr->opts.keep_alive_timeout_ms != cpl->cdw0) {
1296 		SPDK_DEBUGLOG(SPDK_LOG_NVME, "Controller adjusted keep alive timeout to %u ms\n",
1297 			      cpl->cdw0);
1298 	}
1299 
1300 	ctrlr->opts.keep_alive_timeout_ms = cpl->cdw0;
1301 
1302 	keep_alive_interval_ms = ctrlr->opts.keep_alive_timeout_ms / 2;
1303 	if (keep_alive_interval_ms == 0) {
1304 		keep_alive_interval_ms = 1;
1305 	}
1306 	SPDK_DEBUGLOG(SPDK_LOG_NVME, "Sending keep alive every %u ms\n", keep_alive_interval_ms);
1307 
1308 	ctrlr->keep_alive_interval_ticks = (keep_alive_interval_ms * spdk_get_ticks_hz()) / UINT64_C(1000);
1309 
1310 	/* Schedule the first Keep Alive to be sent as soon as possible. */
1311 	ctrlr->next_keep_alive_tick = spdk_get_ticks();
1312 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_HOST_ID,
1313 			     ctrlr->opts.admin_timeout_ms);
1314 }
1315 
1316 static int
1317 nvme_ctrlr_set_keep_alive_timeout(struct spdk_nvme_ctrlr *ctrlr)
1318 {
1319 	int rc;
1320 
1321 	if (ctrlr->opts.keep_alive_timeout_ms == 0) {
1322 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_HOST_ID,
1323 				     ctrlr->opts.admin_timeout_ms);
1324 		return 0;
1325 	}
1326 
1327 	if (ctrlr->cdata.kas == 0) {
1328 		SPDK_DEBUGLOG(SPDK_LOG_NVME, "Controller KAS is 0 - not enabling Keep Alive\n");
1329 		ctrlr->opts.keep_alive_timeout_ms = 0;
1330 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_HOST_ID,
1331 				     ctrlr->opts.admin_timeout_ms);
1332 		return 0;
1333 	}
1334 
1335 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_KEEP_ALIVE_TIMEOUT,
1336 			     ctrlr->opts.admin_timeout_ms);
1337 
1338 	/* Retrieve actual keep alive timeout, since the controller may have adjusted it. */
1339 	rc = spdk_nvme_ctrlr_cmd_get_feature(ctrlr, SPDK_NVME_FEAT_KEEP_ALIVE_TIMER, 0, NULL, 0,
1340 					     nvme_ctrlr_set_keep_alive_timeout_done, ctrlr);
1341 	if (rc != 0) {
1342 		SPDK_ERRLOG("Keep alive timeout Get Feature failed: %d\n", rc);
1343 		ctrlr->opts.keep_alive_timeout_ms = 0;
1344 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
1345 		return rc;
1346 	}
1347 
1348 	return 0;
1349 }
1350 
1351 static void
1352 nvme_ctrlr_set_host_id_done(void *arg, const struct spdk_nvme_cpl *cpl)
1353 {
1354 	struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg;
1355 
1356 	if (spdk_nvme_cpl_is_error(cpl)) {
1357 		/*
1358 		 * Treat Set Features - Host ID failure as non-fatal, since the Host ID feature
1359 		 * is optional.
1360 		 */
1361 		SPDK_WARNLOG("Set Features - Host ID failed: SC 0x%x SCT 0x%x\n",
1362 			     cpl->status.sc, cpl->status.sct);
1363 	} else {
1364 		SPDK_DEBUGLOG(SPDK_LOG_NVME, "Set Features - Host ID was successful\n");
1365 	}
1366 
1367 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE);
1368 }
1369 
1370 static int
1371 nvme_ctrlr_set_host_id(struct spdk_nvme_ctrlr *ctrlr)
1372 {
1373 	uint8_t *host_id;
1374 	uint32_t host_id_size;
1375 	int rc;
1376 
1377 	if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) {
1378 		/*
1379 		 * NVMe-oF sends the host ID during Connect and doesn't allow
1380 		 * Set Features - Host Identifier after Connect, so we don't need to do anything here.
1381 		 */
1382 		SPDK_DEBUGLOG(SPDK_LOG_NVME, "NVMe-oF transport - not sending Set Features - Host ID\n");
1383 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE);
1384 		return 0;
1385 	}
1386 
1387 	if (ctrlr->cdata.ctratt.host_id_exhid_supported) {
1388 		SPDK_DEBUGLOG(SPDK_LOG_NVME, "Using 128-bit extended host identifier\n");
1389 		host_id = ctrlr->opts.extended_host_id;
1390 		host_id_size = sizeof(ctrlr->opts.extended_host_id);
1391 	} else {
1392 		SPDK_DEBUGLOG(SPDK_LOG_NVME, "Using 64-bit host identifier\n");
1393 		host_id = ctrlr->opts.host_id;
1394 		host_id_size = sizeof(ctrlr->opts.host_id);
1395 	}
1396 
1397 	/* If the user specified an all-zeroes host identifier, don't send the command. */
1398 	if (spdk_mem_all_zero(host_id, host_id_size)) {
1399 		SPDK_DEBUGLOG(SPDK_LOG_NVME,
1400 			      "User did not specify host ID - not sending Set Features - Host ID\n");
1401 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE);
1402 		return 0;
1403 	}
1404 
1405 	SPDK_LOGDUMP(SPDK_LOG_NVME, "host_id", host_id, host_id_size);
1406 
1407 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_HOST_ID,
1408 			     ctrlr->opts.admin_timeout_ms);
1409 
1410 	rc = nvme_ctrlr_cmd_set_host_id(ctrlr, host_id, host_id_size, nvme_ctrlr_set_host_id_done, ctrlr);
1411 	if (rc != 0) {
1412 		SPDK_ERRLOG("Set Features - Host ID failed: %d\n", rc);
1413 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
1414 		return rc;
1415 	}
1416 
1417 	return 0;
1418 }
1419 
1420 static void
1421 nvme_ctrlr_destruct_namespaces(struct spdk_nvme_ctrlr *ctrlr)
1422 {
1423 	if (ctrlr->ns) {
1424 		uint32_t i, num_ns = ctrlr->num_ns;
1425 
1426 		for (i = 0; i < num_ns; i++) {
1427 			nvme_ns_destruct(&ctrlr->ns[i]);
1428 		}
1429 
1430 		spdk_free(ctrlr->ns);
1431 		ctrlr->ns = NULL;
1432 		ctrlr->num_ns = 0;
1433 	}
1434 
1435 	if (ctrlr->nsdata) {
1436 		spdk_free(ctrlr->nsdata);
1437 		ctrlr->nsdata = NULL;
1438 	}
1439 
1440 	spdk_dma_free(ctrlr->active_ns_list);
1441 	ctrlr->active_ns_list = NULL;
1442 }
1443 
1444 static void
1445 nvme_ctrlr_update_namespaces(struct spdk_nvme_ctrlr *ctrlr)
1446 {
1447 	uint32_t i, nn = ctrlr->cdata.nn;
1448 	struct spdk_nvme_ns_data *nsdata;
1449 
1450 	for (i = 0; i < nn; i++) {
1451 		struct spdk_nvme_ns	*ns = &ctrlr->ns[i];
1452 		uint32_t		nsid = i + 1;
1453 
1454 		nsdata = &ctrlr->nsdata[nsid - 1];
1455 
1456 		if ((nsdata->ncap == 0) && spdk_nvme_ctrlr_is_active_ns(ctrlr, nsid)) {
1457 			if (nvme_ns_construct(ns, nsid, ctrlr) != 0) {
1458 				continue;
1459 			}
1460 		}
1461 
1462 		if (nsdata->ncap && !spdk_nvme_ctrlr_is_active_ns(ctrlr, nsid)) {
1463 			nvme_ns_destruct(ns);
1464 		}
1465 	}
1466 }
1467 
1468 static int
1469 nvme_ctrlr_construct_namespaces(struct spdk_nvme_ctrlr *ctrlr)
1470 {
1471 	int rc = 0;
1472 	uint32_t nn = ctrlr->cdata.nn;
1473 
1474 	/* ctrlr->num_ns may be 0 (startup) or a different number of namespaces (reset),
1475 	 * so check if we need to reallocate.
1476 	 */
1477 	if (nn != ctrlr->num_ns) {
1478 		nvme_ctrlr_destruct_namespaces(ctrlr);
1479 
1480 		if (nn == 0) {
1481 			SPDK_WARNLOG("controller has 0 namespaces\n");
1482 			return 0;
1483 		}
1484 
1485 		ctrlr->ns = spdk_zmalloc(nn * sizeof(struct spdk_nvme_ns), 64, NULL,
1486 					 SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE);
1487 		if (ctrlr->ns == NULL) {
1488 			rc = -ENOMEM;
1489 			goto fail;
1490 		}
1491 
1492 		ctrlr->nsdata = spdk_zmalloc(nn * sizeof(struct spdk_nvme_ns_data), 64,
1493 					     NULL, SPDK_ENV_SOCKET_ID_ANY,
1494 					     SPDK_MALLOC_SHARE | SPDK_MALLOC_DMA);
1495 		if (ctrlr->nsdata == NULL) {
1496 			rc = -ENOMEM;
1497 			goto fail;
1498 		}
1499 
1500 		ctrlr->num_ns = nn;
1501 	}
1502 
1503 	return 0;
1504 
1505 fail:
1506 	nvme_ctrlr_destruct_namespaces(ctrlr);
1507 	return rc;
1508 }
1509 
1510 static void
1511 nvme_ctrlr_async_event_cb(void *arg, const struct spdk_nvme_cpl *cpl)
1512 {
1513 	struct nvme_async_event_request	*aer = arg;
1514 	struct spdk_nvme_ctrlr		*ctrlr = aer->ctrlr;
1515 	struct spdk_nvme_ctrlr_process	*active_proc;
1516 	union spdk_nvme_async_event_completion	event;
1517 	int					rc;
1518 
1519 	if (cpl->status.sct == SPDK_NVME_SCT_GENERIC &&
1520 	    cpl->status.sc == SPDK_NVME_SC_ABORTED_SQ_DELETION) {
1521 		/*
1522 		 *  This is simulated when controller is being shut down, to
1523 		 *  effectively abort outstanding asynchronous event requests
1524 		 *  and make sure all memory is freed.  Do not repost the
1525 		 *  request in this case.
1526 		 */
1527 		return;
1528 	}
1529 
1530 	if (cpl->status.sct == SPDK_NVME_SCT_COMMAND_SPECIFIC &&
1531 	    cpl->status.sc == SPDK_NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED) {
1532 		/*
1533 		 *  SPDK will only send as many AERs as the device says it supports,
1534 		 *  so this status code indicates an out-of-spec device.  Do not repost
1535 		 *  the request in this case.
1536 		 */
1537 		SPDK_ERRLOG("Controller appears out-of-spec for asynchronous event request\n"
1538 			    "handling.  Do not repost this AER.\n");
1539 		return;
1540 	}
1541 
1542 	event.raw = cpl->cdw0;
1543 	if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) &&
1544 	    (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGED)) {
1545 		rc = nvme_ctrlr_identify_active_ns(ctrlr);
1546 		if (rc) {
1547 			return;
1548 		}
1549 		nvme_ctrlr_update_namespaces(ctrlr);
1550 	}
1551 
1552 	active_proc = spdk_nvme_ctrlr_get_current_process(ctrlr);
1553 	if (active_proc && active_proc->aer_cb_fn) {
1554 		active_proc->aer_cb_fn(active_proc->aer_cb_arg, cpl);
1555 	}
1556 
1557 	/* If the ctrlr is already shutdown, we should not send aer again */
1558 	if (ctrlr->is_shutdown) {
1559 		return;
1560 	}
1561 
1562 	/*
1563 	 * Repost another asynchronous event request to replace the one
1564 	 *  that just completed.
1565 	 */
1566 	if (nvme_ctrlr_construct_and_submit_aer(ctrlr, aer)) {
1567 		/*
1568 		 * We can't do anything to recover from a failure here,
1569 		 * so just print a warning message and leave the AER unsubmitted.
1570 		 */
1571 		SPDK_ERRLOG("resubmitting AER failed!\n");
1572 	}
1573 }
1574 
1575 static int
1576 nvme_ctrlr_construct_and_submit_aer(struct spdk_nvme_ctrlr *ctrlr,
1577 				    struct nvme_async_event_request *aer)
1578 {
1579 	struct nvme_request *req;
1580 
1581 	aer->ctrlr = ctrlr;
1582 	req = nvme_allocate_request_null(ctrlr->adminq, nvme_ctrlr_async_event_cb, aer);
1583 	aer->req = req;
1584 	if (req == NULL) {
1585 		return -1;
1586 	}
1587 
1588 	req->cmd.opc = SPDK_NVME_OPC_ASYNC_EVENT_REQUEST;
1589 	return nvme_ctrlr_submit_admin_request(ctrlr, req);
1590 }
1591 
1592 static void
1593 nvme_ctrlr_configure_aer_done(void *arg, const struct spdk_nvme_cpl *cpl)
1594 {
1595 	struct nvme_async_event_request		*aer;
1596 	int					rc;
1597 	uint32_t				i;
1598 	struct spdk_nvme_ctrlr *ctrlr =	(struct spdk_nvme_ctrlr *)arg;
1599 
1600 	if (spdk_nvme_cpl_is_error(cpl)) {
1601 		SPDK_NOTICELOG("nvme_ctrlr_configure_aer failed!\n");
1602 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES,
1603 				     ctrlr->opts.admin_timeout_ms);
1604 		return;
1605 	}
1606 
1607 	/* aerl is a zero-based value, so we need to add 1 here. */
1608 	ctrlr->num_aers = spdk_min(NVME_MAX_ASYNC_EVENTS, (ctrlr->cdata.aerl + 1));
1609 
1610 	for (i = 0; i < ctrlr->num_aers; i++) {
1611 		aer = &ctrlr->aer[i];
1612 		rc = nvme_ctrlr_construct_and_submit_aer(ctrlr, aer);
1613 		if (rc) {
1614 			SPDK_ERRLOG("nvme_ctrlr_construct_and_submit_aer failed!\n");
1615 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
1616 			return;
1617 		}
1618 	}
1619 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES,
1620 			     ctrlr->opts.admin_timeout_ms);
1621 }
1622 
1623 static int
1624 nvme_ctrlr_configure_aer(struct spdk_nvme_ctrlr *ctrlr)
1625 {
1626 	union spdk_nvme_feat_async_event_configuration	config;
1627 	int						rc;
1628 
1629 	config.raw = 0;
1630 	config.bits.crit_warn.bits.available_spare = 1;
1631 	config.bits.crit_warn.bits.temperature = 1;
1632 	config.bits.crit_warn.bits.device_reliability = 1;
1633 	config.bits.crit_warn.bits.read_only = 1;
1634 	config.bits.crit_warn.bits.volatile_memory_backup = 1;
1635 
1636 	if (ctrlr->vs.raw >= SPDK_NVME_VERSION(1, 2, 0)) {
1637 		if (ctrlr->cdata.oaes.ns_attribute_notices) {
1638 			config.bits.ns_attr_notice = 1;
1639 		}
1640 		if (ctrlr->cdata.oaes.fw_activation_notices) {
1641 			config.bits.fw_activation_notice = 1;
1642 		}
1643 	}
1644 	if (ctrlr->vs.raw >= SPDK_NVME_VERSION(1, 3, 0) && ctrlr->cdata.lpa.telemetry) {
1645 		config.bits.telemetry_log_notice = 1;
1646 	}
1647 
1648 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_CONFIGURE_AER,
1649 			     ctrlr->opts.admin_timeout_ms);
1650 
1651 	rc = nvme_ctrlr_cmd_set_async_event_config(ctrlr, config,
1652 			nvme_ctrlr_configure_aer_done,
1653 			ctrlr);
1654 	if (rc != 0) {
1655 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
1656 		return rc;
1657 	}
1658 
1659 	return 0;
1660 }
1661 
1662 struct spdk_nvme_ctrlr_process *
1663 spdk_nvme_ctrlr_get_process(struct spdk_nvme_ctrlr *ctrlr, pid_t pid)
1664 {
1665 	struct spdk_nvme_ctrlr_process	*active_proc;
1666 
1667 	TAILQ_FOREACH(active_proc, &ctrlr->active_procs, tailq) {
1668 		if (active_proc->pid == pid) {
1669 			return active_proc;
1670 		}
1671 	}
1672 
1673 	return NULL;
1674 }
1675 
1676 struct spdk_nvme_ctrlr_process *
1677 spdk_nvme_ctrlr_get_current_process(struct spdk_nvme_ctrlr *ctrlr)
1678 {
1679 	return spdk_nvme_ctrlr_get_process(ctrlr, getpid());
1680 }
1681 
1682 /**
1683  * This function will be called when a process is using the controller.
1684  *  1. For the primary process, it is called when constructing the controller.
1685  *  2. For the secondary process, it is called at probing the controller.
1686  * Note: will check whether the process is already added for the same process.
1687  */
1688 int
1689 nvme_ctrlr_add_process(struct spdk_nvme_ctrlr *ctrlr, void *devhandle)
1690 {
1691 	struct spdk_nvme_ctrlr_process	*ctrlr_proc;
1692 	pid_t				pid = getpid();
1693 
1694 	/* Check whether the process is already added or not */
1695 	if (spdk_nvme_ctrlr_get_process(ctrlr, pid)) {
1696 		return 0;
1697 	}
1698 
1699 	/* Initialize the per process properties for this ctrlr */
1700 	ctrlr_proc = spdk_zmalloc(sizeof(struct spdk_nvme_ctrlr_process),
1701 				  64, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE);
1702 	if (ctrlr_proc == NULL) {
1703 		SPDK_ERRLOG("failed to allocate memory to track the process props\n");
1704 
1705 		return -1;
1706 	}
1707 
1708 	ctrlr_proc->is_primary = spdk_process_is_primary();
1709 	ctrlr_proc->pid = pid;
1710 	STAILQ_INIT(&ctrlr_proc->active_reqs);
1711 	ctrlr_proc->devhandle = devhandle;
1712 	ctrlr_proc->ref = 0;
1713 	TAILQ_INIT(&ctrlr_proc->allocated_io_qpairs);
1714 
1715 	TAILQ_INSERT_TAIL(&ctrlr->active_procs, ctrlr_proc, tailq);
1716 
1717 	return 0;
1718 }
1719 
1720 /**
1721  * This function will be called when the process detaches the controller.
1722  * Note: the ctrlr_lock must be held when calling this function.
1723  */
1724 static void
1725 nvme_ctrlr_remove_process(struct spdk_nvme_ctrlr *ctrlr,
1726 			  struct spdk_nvme_ctrlr_process *proc)
1727 {
1728 	struct spdk_nvme_qpair	*qpair, *tmp_qpair;
1729 
1730 	assert(STAILQ_EMPTY(&proc->active_reqs));
1731 
1732 	TAILQ_FOREACH_SAFE(qpair, &proc->allocated_io_qpairs, per_process_tailq, tmp_qpair) {
1733 		spdk_nvme_ctrlr_free_io_qpair(qpair);
1734 	}
1735 
1736 	TAILQ_REMOVE(&ctrlr->active_procs, proc, tailq);
1737 
1738 	if (ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
1739 		spdk_pci_device_detach(proc->devhandle);
1740 	}
1741 
1742 	spdk_dma_free(proc);
1743 }
1744 
1745 /**
1746  * This function will be called when the process exited unexpectedly
1747  *  in order to free any incomplete nvme request, allocated IO qpairs
1748  *  and allocated memory.
1749  * Note: the ctrlr_lock must be held when calling this function.
1750  */
1751 static void
1752 nvme_ctrlr_cleanup_process(struct spdk_nvme_ctrlr_process *proc)
1753 {
1754 	struct nvme_request	*req, *tmp_req;
1755 	struct spdk_nvme_qpair	*qpair, *tmp_qpair;
1756 
1757 	STAILQ_FOREACH_SAFE(req, &proc->active_reqs, stailq, tmp_req) {
1758 		STAILQ_REMOVE(&proc->active_reqs, req, nvme_request, stailq);
1759 
1760 		assert(req->pid == proc->pid);
1761 
1762 		nvme_free_request(req);
1763 	}
1764 
1765 	TAILQ_FOREACH_SAFE(qpair, &proc->allocated_io_qpairs, per_process_tailq, tmp_qpair) {
1766 		TAILQ_REMOVE(&proc->allocated_io_qpairs, qpair, per_process_tailq);
1767 
1768 		/*
1769 		 * The process may have been killed while some qpairs were in their
1770 		 *  completion context.  Clear that flag here to allow these IO
1771 		 *  qpairs to be deleted.
1772 		 */
1773 		qpair->in_completion_context = 0;
1774 
1775 		qpair->no_deletion_notification_needed = 1;
1776 
1777 		spdk_nvme_ctrlr_free_io_qpair(qpair);
1778 	}
1779 
1780 	spdk_dma_free(proc);
1781 }
1782 
1783 /**
1784  * This function will be called when destructing the controller.
1785  *  1. There is no more admin request on this controller.
1786  *  2. Clean up any left resource allocation when its associated process is gone.
1787  */
1788 void
1789 nvme_ctrlr_free_processes(struct spdk_nvme_ctrlr *ctrlr)
1790 {
1791 	struct spdk_nvme_ctrlr_process	*active_proc, *tmp;
1792 
1793 	/* Free all the processes' properties and make sure no pending admin IOs */
1794 	TAILQ_FOREACH_SAFE(active_proc, &ctrlr->active_procs, tailq, tmp) {
1795 		TAILQ_REMOVE(&ctrlr->active_procs, active_proc, tailq);
1796 
1797 		assert(STAILQ_EMPTY(&active_proc->active_reqs));
1798 
1799 		spdk_free(active_proc);
1800 	}
1801 }
1802 
1803 /**
1804  * This function will be called when any other process attaches or
1805  *  detaches the controller in order to cleanup those unexpectedly
1806  *  terminated processes.
1807  * Note: the ctrlr_lock must be held when calling this function.
1808  */
1809 static int
1810 nvme_ctrlr_remove_inactive_proc(struct spdk_nvme_ctrlr *ctrlr)
1811 {
1812 	struct spdk_nvme_ctrlr_process	*active_proc, *tmp;
1813 	int				active_proc_count = 0;
1814 
1815 	TAILQ_FOREACH_SAFE(active_proc, &ctrlr->active_procs, tailq, tmp) {
1816 		if ((kill(active_proc->pid, 0) == -1) && (errno == ESRCH)) {
1817 			SPDK_ERRLOG("process %d terminated unexpected\n", active_proc->pid);
1818 
1819 			TAILQ_REMOVE(&ctrlr->active_procs, active_proc, tailq);
1820 
1821 			nvme_ctrlr_cleanup_process(active_proc);
1822 		} else {
1823 			active_proc_count++;
1824 		}
1825 	}
1826 
1827 	return active_proc_count;
1828 }
1829 
1830 void
1831 nvme_ctrlr_proc_get_ref(struct spdk_nvme_ctrlr *ctrlr)
1832 {
1833 	struct spdk_nvme_ctrlr_process	*active_proc;
1834 
1835 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
1836 
1837 	nvme_ctrlr_remove_inactive_proc(ctrlr);
1838 
1839 	active_proc = spdk_nvme_ctrlr_get_current_process(ctrlr);
1840 	if (active_proc) {
1841 		active_proc->ref++;
1842 	}
1843 
1844 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
1845 }
1846 
1847 void
1848 nvme_ctrlr_proc_put_ref(struct spdk_nvme_ctrlr *ctrlr)
1849 {
1850 	struct spdk_nvme_ctrlr_process	*active_proc;
1851 	int				proc_count;
1852 
1853 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
1854 
1855 	proc_count = nvme_ctrlr_remove_inactive_proc(ctrlr);
1856 
1857 	active_proc = spdk_nvme_ctrlr_get_current_process(ctrlr);
1858 	if (active_proc) {
1859 		active_proc->ref--;
1860 		assert(active_proc->ref >= 0);
1861 
1862 		/*
1863 		 * The last active process will be removed at the end of
1864 		 * the destruction of the controller.
1865 		 */
1866 		if (active_proc->ref == 0 && proc_count != 1) {
1867 			nvme_ctrlr_remove_process(ctrlr, active_proc);
1868 		}
1869 	}
1870 
1871 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
1872 }
1873 
1874 int
1875 nvme_ctrlr_get_ref_count(struct spdk_nvme_ctrlr *ctrlr)
1876 {
1877 	struct spdk_nvme_ctrlr_process	*active_proc;
1878 	int				ref = 0;
1879 
1880 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
1881 
1882 	nvme_ctrlr_remove_inactive_proc(ctrlr);
1883 
1884 	TAILQ_FOREACH(active_proc, &ctrlr->active_procs, tailq) {
1885 		ref += active_proc->ref;
1886 	}
1887 
1888 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
1889 
1890 	return ref;
1891 }
1892 
1893 /**
1894  *  Get the PCI device handle which is only visible to its associated process.
1895  */
1896 struct spdk_pci_device *
1897 nvme_ctrlr_proc_get_devhandle(struct spdk_nvme_ctrlr *ctrlr)
1898 {
1899 	struct spdk_nvme_ctrlr_process	*active_proc;
1900 	struct spdk_pci_device		*devhandle = NULL;
1901 
1902 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
1903 
1904 	active_proc = spdk_nvme_ctrlr_get_current_process(ctrlr);
1905 	if (active_proc) {
1906 		devhandle = active_proc->devhandle;
1907 	}
1908 
1909 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
1910 
1911 	return devhandle;
1912 }
1913 
1914 static void
1915 nvme_ctrlr_enable_admin_queue(struct spdk_nvme_ctrlr *ctrlr)
1916 {
1917 	nvme_transport_qpair_reset(ctrlr->adminq);
1918 	nvme_qpair_enable(ctrlr->adminq);
1919 }
1920 
1921 /**
1922  * This function will be called repeatedly during initialization until the controller is ready.
1923  */
1924 int
1925 nvme_ctrlr_process_init(struct spdk_nvme_ctrlr *ctrlr)
1926 {
1927 	union spdk_nvme_cc_register cc;
1928 	union spdk_nvme_csts_register csts;
1929 	uint32_t ready_timeout_in_ms;
1930 	int rc = 0;
1931 
1932 	/*
1933 	 * May need to avoid accessing any register on the target controller
1934 	 * for a while. Return early without touching the FSM.
1935 	 * Check sleep_timeout_tsc > 0 for unit test.
1936 	 */
1937 	if ((ctrlr->sleep_timeout_tsc > 0) &&
1938 	    (spdk_get_ticks() <= ctrlr->sleep_timeout_tsc)) {
1939 		return 0;
1940 	}
1941 	ctrlr->sleep_timeout_tsc = 0;
1942 
1943 	if (nvme_ctrlr_get_cc(ctrlr, &cc) ||
1944 	    nvme_ctrlr_get_csts(ctrlr, &csts)) {
1945 		if (ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE) {
1946 			/* While a device is resetting, it may be unable to service MMIO reads
1947 			 * temporarily. Allow for this case.
1948 			 */
1949 			SPDK_ERRLOG("Get registers failed while waiting for CSTS.RDY == 0\n");
1950 			goto init_timeout;
1951 		}
1952 		SPDK_ERRLOG("Failed to read CC and CSTS in state %d\n", ctrlr->state);
1953 		nvme_ctrlr_fail(ctrlr, false);
1954 		return -EIO;
1955 	}
1956 
1957 	ready_timeout_in_ms = 500 * ctrlr->cap.bits.to;
1958 
1959 	/*
1960 	 * Check if the current initialization step is done or has timed out.
1961 	 */
1962 	switch (ctrlr->state) {
1963 	case NVME_CTRLR_STATE_INIT_DELAY:
1964 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, ready_timeout_in_ms);
1965 		/*
1966 		 * Controller may need some delay before it's enabled.
1967 		 *
1968 		 * This is a workaround for an issue where the PCIe-attached NVMe controller
1969 		 * is not ready after VFIO reset. We delay the initialization rather than the
1970 		 * enabling itself, because this is required only for the very first enabling
1971 		 * - directly after a VFIO reset.
1972 		 *
1973 		 * TODO: Figure out what is actually going wrong.
1974 		 */
1975 		SPDK_DEBUGLOG(SPDK_LOG_NVME, "Adding 2 second delay before initializing the controller\n");
1976 		ctrlr->sleep_timeout_tsc = spdk_get_ticks() + (2000 * spdk_get_ticks_hz() / 1000);
1977 		break;
1978 
1979 	case NVME_CTRLR_STATE_INIT:
1980 		/* Begin the hardware initialization by making sure the controller is disabled. */
1981 		if (cc.bits.en) {
1982 			SPDK_DEBUGLOG(SPDK_LOG_NVME, "CC.EN = 1\n");
1983 			/*
1984 			 * Controller is currently enabled. We need to disable it to cause a reset.
1985 			 *
1986 			 * If CC.EN = 1 && CSTS.RDY = 0, the controller is in the process of becoming ready.
1987 			 *  Wait for the ready bit to be 1 before disabling the controller.
1988 			 */
1989 			if (csts.bits.rdy == 0) {
1990 				SPDK_DEBUGLOG(SPDK_LOG_NVME, "CC.EN = 1 && CSTS.RDY = 0 - waiting for reset to complete\n");
1991 				nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1, ready_timeout_in_ms);
1992 				return 0;
1993 			}
1994 
1995 			/* CC.EN = 1 && CSTS.RDY == 1, so we can immediately disable the controller. */
1996 			SPDK_DEBUGLOG(SPDK_LOG_NVME, "Setting CC.EN = 0\n");
1997 			cc.bits.en = 0;
1998 			if (nvme_ctrlr_set_cc(ctrlr, &cc)) {
1999 				SPDK_ERRLOG("set_cc() failed\n");
2000 				nvme_ctrlr_fail(ctrlr, false);
2001 				return -EIO;
2002 			}
2003 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, ready_timeout_in_ms);
2004 
2005 			/*
2006 			 * Wait 2.5 seconds before accessing PCI registers.
2007 			 * Not using sleep() to avoid blocking other controller's initialization.
2008 			 */
2009 			if (ctrlr->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY) {
2010 				SPDK_DEBUGLOG(SPDK_LOG_NVME, "Applying quirk: delay 2.5 seconds before reading registers\n");
2011 				ctrlr->sleep_timeout_tsc = spdk_get_ticks() + (2500 * spdk_get_ticks_hz() / 1000);
2012 			}
2013 			return 0;
2014 		} else {
2015 			if (csts.bits.rdy == 1) {
2016 				SPDK_DEBUGLOG(SPDK_LOG_NVME, "CC.EN = 0 && CSTS.RDY = 1 - waiting for shutdown to complete\n");
2017 			}
2018 
2019 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, ready_timeout_in_ms);
2020 			return 0;
2021 		}
2022 		break;
2023 
2024 	case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1:
2025 		if (csts.bits.rdy == 1) {
2026 			SPDK_DEBUGLOG(SPDK_LOG_NVME, "CC.EN = 1 && CSTS.RDY = 1 - disabling controller\n");
2027 			/* CC.EN = 1 && CSTS.RDY = 1, so we can set CC.EN = 0 now. */
2028 			SPDK_DEBUGLOG(SPDK_LOG_NVME, "Setting CC.EN = 0\n");
2029 			cc.bits.en = 0;
2030 			if (nvme_ctrlr_set_cc(ctrlr, &cc)) {
2031 				SPDK_ERRLOG("set_cc() failed\n");
2032 				nvme_ctrlr_fail(ctrlr, false);
2033 				return -EIO;
2034 			}
2035 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, ready_timeout_in_ms);
2036 			return 0;
2037 		}
2038 		break;
2039 
2040 	case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0:
2041 		if (csts.bits.rdy == 0) {
2042 			SPDK_DEBUGLOG(SPDK_LOG_NVME, "CC.EN = 0 && CSTS.RDY = 0\n");
2043 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE, ready_timeout_in_ms);
2044 			/*
2045 			 * Delay 100us before setting CC.EN = 1.  Some NVMe SSDs miss CC.EN getting
2046 			 *  set to 1 if it is too soon after CSTS.RDY is reported as 0.
2047 			 */
2048 			spdk_delay_us(100);
2049 			return 0;
2050 		}
2051 		break;
2052 
2053 	case NVME_CTRLR_STATE_ENABLE:
2054 		SPDK_DEBUGLOG(SPDK_LOG_NVME, "Setting CC.EN = 1\n");
2055 		rc = nvme_ctrlr_enable(ctrlr);
2056 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1, ready_timeout_in_ms);
2057 		return rc;
2058 
2059 	case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1:
2060 		if (csts.bits.rdy == 1) {
2061 			SPDK_DEBUGLOG(SPDK_LOG_NVME, "CC.EN = 1 && CSTS.RDY = 1 - controller is ready\n");
2062 			/*
2063 			 * The controller has been enabled.
2064 			 *  Perform the rest of initialization serially.
2065 			 */
2066 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE_ADMIN_QUEUE,
2067 					     ctrlr->opts.admin_timeout_ms);
2068 			return 0;
2069 		}
2070 		break;
2071 
2072 	case NVME_CTRLR_STATE_ENABLE_ADMIN_QUEUE:
2073 		nvme_ctrlr_enable_admin_queue(ctrlr);
2074 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY,
2075 				     ctrlr->opts.admin_timeout_ms);
2076 		break;
2077 
2078 	case NVME_CTRLR_STATE_IDENTIFY:
2079 		rc = nvme_ctrlr_identify(ctrlr);
2080 		break;
2081 
2082 	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY:
2083 		spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
2084 		break;
2085 
2086 	case NVME_CTRLR_STATE_SET_NUM_QUEUES:
2087 		rc = nvme_ctrlr_set_num_queues(ctrlr);
2088 		break;
2089 
2090 	case NVME_CTRLR_STATE_WAIT_FOR_SET_NUM_QUEUES:
2091 		spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
2092 		break;
2093 
2094 	case NVME_CTRLR_STATE_GET_NUM_QUEUES:
2095 		rc = nvme_ctrlr_get_num_queues(ctrlr);
2096 		break;
2097 
2098 	case NVME_CTRLR_STATE_WAIT_FOR_GET_NUM_QUEUES:
2099 		spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
2100 		break;
2101 
2102 	case NVME_CTRLR_STATE_CONSTRUCT_NS:
2103 		rc = nvme_ctrlr_construct_namespaces(ctrlr);
2104 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_ACTIVE_NS,
2105 				     ctrlr->opts.admin_timeout_ms);
2106 		break;
2107 
2108 	case NVME_CTRLR_STATE_IDENTIFY_ACTIVE_NS:
2109 		rc = nvme_ctrlr_identify_active_ns(ctrlr);
2110 		if (rc < 0) {
2111 			nvme_ctrlr_destruct_namespaces(ctrlr);
2112 		}
2113 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_NS,
2114 				     ctrlr->opts.admin_timeout_ms);
2115 		break;
2116 
2117 	case NVME_CTRLR_STATE_IDENTIFY_NS:
2118 		rc = nvme_ctrlr_identify_namespaces(ctrlr);
2119 		break;
2120 
2121 	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS:
2122 		spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
2123 		break;
2124 
2125 	case NVME_CTRLR_STATE_IDENTIFY_ID_DESCS:
2126 		rc = nvme_ctrlr_identify_id_desc_namespaces(ctrlr);
2127 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CONFIGURE_AER,
2128 				     ctrlr->opts.admin_timeout_ms);
2129 		break;
2130 
2131 	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ID_DESCS:
2132 		spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
2133 		break;
2134 
2135 	case NVME_CTRLR_STATE_CONFIGURE_AER:
2136 		rc = nvme_ctrlr_configure_aer(ctrlr);
2137 		break;
2138 
2139 	case NVME_CTRLR_STATE_WAIT_FOR_CONFIGURE_AER:
2140 		spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
2141 		break;
2142 
2143 	case NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES:
2144 		rc = nvme_ctrlr_set_supported_log_pages(ctrlr);
2145 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES,
2146 				     ctrlr->opts.admin_timeout_ms);
2147 		break;
2148 
2149 	case NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES:
2150 		nvme_ctrlr_set_supported_features(ctrlr);
2151 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_DB_BUF_CFG,
2152 				     ctrlr->opts.admin_timeout_ms);
2153 		break;
2154 
2155 	case NVME_CTRLR_STATE_SET_DB_BUF_CFG:
2156 		rc = nvme_ctrlr_set_doorbell_buffer_config(ctrlr);
2157 		break;
2158 
2159 	case NVME_CTRLR_STATE_WAIT_FOR_DB_BUF_CFG:
2160 		spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
2161 		break;
2162 
2163 	case NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT:
2164 		rc = nvme_ctrlr_set_keep_alive_timeout(ctrlr);
2165 		break;
2166 
2167 	case NVME_CTRLR_STATE_WAIT_FOR_KEEP_ALIVE_TIMEOUT:
2168 		spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
2169 		break;
2170 
2171 	case NVME_CTRLR_STATE_SET_HOST_ID:
2172 		rc = nvme_ctrlr_set_host_id(ctrlr);
2173 		break;
2174 
2175 	case NVME_CTRLR_STATE_WAIT_FOR_HOST_ID:
2176 		spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
2177 		break;
2178 
2179 	case NVME_CTRLR_STATE_READY:
2180 		SPDK_DEBUGLOG(SPDK_LOG_NVME, "Ctrlr already in ready state\n");
2181 		return 0;
2182 
2183 	case NVME_CTRLR_STATE_ERROR:
2184 		SPDK_ERRLOG("Ctrlr %s is in error state\n", ctrlr->trid.traddr);
2185 		return -1;
2186 
2187 	default:
2188 		assert(0);
2189 		nvme_ctrlr_fail(ctrlr, false);
2190 		return -1;
2191 	}
2192 
2193 init_timeout:
2194 	if (ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE &&
2195 	    spdk_get_ticks() > ctrlr->state_timeout_tsc) {
2196 		SPDK_ERRLOG("Initialization timed out in state %d\n", ctrlr->state);
2197 		nvme_ctrlr_fail(ctrlr, false);
2198 		return -1;
2199 	}
2200 
2201 	return rc;
2202 }
2203 
2204 int
2205 nvme_robust_mutex_init_recursive_shared(pthread_mutex_t *mtx)
2206 {
2207 	pthread_mutexattr_t attr;
2208 	int rc = 0;
2209 
2210 	if (pthread_mutexattr_init(&attr)) {
2211 		return -1;
2212 	}
2213 	if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE) ||
2214 #ifndef __FreeBSD__
2215 	    pthread_mutexattr_setrobust(&attr, PTHREAD_MUTEX_ROBUST) ||
2216 	    pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED) ||
2217 #endif
2218 	    pthread_mutex_init(mtx, &attr)) {
2219 		rc = -1;
2220 	}
2221 	pthread_mutexattr_destroy(&attr);
2222 	return rc;
2223 }
2224 
2225 int
2226 nvme_ctrlr_construct(struct spdk_nvme_ctrlr *ctrlr)
2227 {
2228 	int rc;
2229 
2230 	if (ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
2231 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT_DELAY, NVME_TIMEOUT_INFINITE);
2232 	} else {
2233 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, NVME_TIMEOUT_INFINITE);
2234 	}
2235 
2236 	ctrlr->flags = 0;
2237 	ctrlr->free_io_qids = NULL;
2238 	ctrlr->is_resetting = false;
2239 	ctrlr->is_failed = false;
2240 	ctrlr->is_shutdown = false;
2241 
2242 	TAILQ_INIT(&ctrlr->active_io_qpairs);
2243 	STAILQ_INIT(&ctrlr->queued_aborts);
2244 	ctrlr->outstanding_aborts = 0;
2245 
2246 	rc = nvme_robust_mutex_init_recursive_shared(&ctrlr->ctrlr_lock);
2247 	if (rc != 0) {
2248 		return rc;
2249 	}
2250 
2251 	TAILQ_INIT(&ctrlr->active_procs);
2252 
2253 	return rc;
2254 }
2255 
2256 /* This function should be called once at ctrlr initialization to set up constant properties. */
2257 void
2258 nvme_ctrlr_init_cap(struct spdk_nvme_ctrlr *ctrlr, const union spdk_nvme_cap_register *cap,
2259 		    const union spdk_nvme_vs_register *vs)
2260 {
2261 	ctrlr->cap = *cap;
2262 	ctrlr->vs = *vs;
2263 
2264 	ctrlr->min_page_size = 1u << (12 + ctrlr->cap.bits.mpsmin);
2265 
2266 	/* For now, always select page_size == min_page_size. */
2267 	ctrlr->page_size = ctrlr->min_page_size;
2268 
2269 	ctrlr->opts.io_queue_size = spdk_max(ctrlr->opts.io_queue_size, SPDK_NVME_IO_QUEUE_MIN_ENTRIES);
2270 	ctrlr->opts.io_queue_size = spdk_min(ctrlr->opts.io_queue_size, MAX_IO_QUEUE_ENTRIES);
2271 	ctrlr->opts.io_queue_size = spdk_min(ctrlr->opts.io_queue_size, ctrlr->cap.bits.mqes + 1u);
2272 
2273 	ctrlr->opts.io_queue_requests = spdk_max(ctrlr->opts.io_queue_requests, ctrlr->opts.io_queue_size);
2274 }
2275 
2276 void
2277 nvme_ctrlr_destruct_finish(struct spdk_nvme_ctrlr *ctrlr)
2278 {
2279 	pthread_mutex_destroy(&ctrlr->ctrlr_lock);
2280 }
2281 
2282 void
2283 nvme_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr)
2284 {
2285 	struct spdk_nvme_qpair *qpair, *tmp;
2286 
2287 	SPDK_DEBUGLOG(SPDK_LOG_NVME, "Prepare to destruct SSD: %s\n", ctrlr->trid.traddr);
2288 	TAILQ_FOREACH_SAFE(qpair, &ctrlr->active_io_qpairs, tailq, tmp) {
2289 		spdk_nvme_ctrlr_free_io_qpair(qpair);
2290 	}
2291 
2292 	nvme_ctrlr_free_doorbell_buffer(ctrlr);
2293 
2294 	nvme_ctrlr_shutdown(ctrlr);
2295 
2296 	nvme_ctrlr_destruct_namespaces(ctrlr);
2297 
2298 	spdk_bit_array_free(&ctrlr->free_io_qids);
2299 
2300 	nvme_transport_ctrlr_destruct(ctrlr);
2301 }
2302 
2303 int
2304 nvme_ctrlr_submit_admin_request(struct spdk_nvme_ctrlr *ctrlr,
2305 				struct nvme_request *req)
2306 {
2307 	return nvme_qpair_submit_request(ctrlr->adminq, req);
2308 }
2309 
2310 static void
2311 nvme_keep_alive_completion(void *cb_ctx, const struct spdk_nvme_cpl *cpl)
2312 {
2313 	/* Do nothing */
2314 }
2315 
2316 /*
2317  * Check if we need to send a Keep Alive command.
2318  * Caller must hold ctrlr->ctrlr_lock.
2319  */
2320 static void
2321 nvme_ctrlr_keep_alive(struct spdk_nvme_ctrlr *ctrlr)
2322 {
2323 	uint64_t now;
2324 	struct nvme_request *req;
2325 	struct spdk_nvme_cmd *cmd;
2326 	int rc;
2327 
2328 	now = spdk_get_ticks();
2329 	if (now < ctrlr->next_keep_alive_tick) {
2330 		return;
2331 	}
2332 
2333 	req = nvme_allocate_request_null(ctrlr->adminq, nvme_keep_alive_completion, NULL);
2334 	if (req == NULL) {
2335 		return;
2336 	}
2337 
2338 	cmd = &req->cmd;
2339 	cmd->opc = SPDK_NVME_OPC_KEEP_ALIVE;
2340 
2341 	rc = nvme_ctrlr_submit_admin_request(ctrlr, req);
2342 	if (rc != 0) {
2343 		SPDK_ERRLOG("Submitting Keep Alive failed\n");
2344 	}
2345 
2346 	ctrlr->next_keep_alive_tick = now + ctrlr->keep_alive_interval_ticks;
2347 }
2348 
2349 int32_t
2350 spdk_nvme_ctrlr_process_admin_completions(struct spdk_nvme_ctrlr *ctrlr)
2351 {
2352 	int32_t num_completions;
2353 
2354 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
2355 	if (ctrlr->keep_alive_interval_ticks) {
2356 		nvme_ctrlr_keep_alive(ctrlr);
2357 	}
2358 	num_completions = spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
2359 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
2360 
2361 	return num_completions;
2362 }
2363 
2364 const struct spdk_nvme_ctrlr_data *
2365 spdk_nvme_ctrlr_get_data(struct spdk_nvme_ctrlr *ctrlr)
2366 {
2367 	return &ctrlr->cdata;
2368 }
2369 
2370 union spdk_nvme_csts_register spdk_nvme_ctrlr_get_regs_csts(struct spdk_nvme_ctrlr *ctrlr)
2371 {
2372 	union spdk_nvme_csts_register csts;
2373 
2374 	if (nvme_ctrlr_get_csts(ctrlr, &csts)) {
2375 		csts.raw = 0xFFFFFFFFu;
2376 	}
2377 	return csts;
2378 }
2379 
2380 union spdk_nvme_cap_register spdk_nvme_ctrlr_get_regs_cap(struct spdk_nvme_ctrlr *ctrlr)
2381 {
2382 	return ctrlr->cap;
2383 }
2384 
2385 union spdk_nvme_vs_register spdk_nvme_ctrlr_get_regs_vs(struct spdk_nvme_ctrlr *ctrlr)
2386 {
2387 	return ctrlr->vs;
2388 }
2389 
2390 union spdk_nvme_cmbsz_register spdk_nvme_ctrlr_get_regs_cmbsz(struct spdk_nvme_ctrlr *ctrlr)
2391 {
2392 	union spdk_nvme_cmbsz_register cmbsz;
2393 
2394 	if (nvme_ctrlr_get_cmbsz(ctrlr, &cmbsz)) {
2395 		cmbsz.raw = 0;
2396 	}
2397 
2398 	return cmbsz;
2399 }
2400 
2401 uint32_t
2402 spdk_nvme_ctrlr_get_num_ns(struct spdk_nvme_ctrlr *ctrlr)
2403 {
2404 	return ctrlr->num_ns;
2405 }
2406 
2407 static int32_t
2408 spdk_nvme_ctrlr_active_ns_idx(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid)
2409 {
2410 	int32_t result = -1;
2411 
2412 	if (ctrlr->active_ns_list == NULL || nsid == 0 || nsid > ctrlr->num_ns) {
2413 		return result;
2414 	}
2415 
2416 	int32_t lower = 0;
2417 	int32_t upper = ctrlr->num_ns - 1;
2418 	int32_t mid;
2419 
2420 	while (lower <= upper) {
2421 		mid = lower + (upper - lower) / 2;
2422 		if (ctrlr->active_ns_list[mid] == nsid) {
2423 			result = mid;
2424 			break;
2425 		} else {
2426 			if (ctrlr->active_ns_list[mid] != 0 && ctrlr->active_ns_list[mid] < nsid) {
2427 				lower = mid + 1;
2428 			} else {
2429 				upper = mid - 1;
2430 			}
2431 
2432 		}
2433 	}
2434 
2435 	return result;
2436 }
2437 
2438 bool
2439 spdk_nvme_ctrlr_is_active_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid)
2440 {
2441 	return spdk_nvme_ctrlr_active_ns_idx(ctrlr, nsid) != -1;
2442 }
2443 
2444 uint32_t
2445 spdk_nvme_ctrlr_get_first_active_ns(struct spdk_nvme_ctrlr *ctrlr)
2446 {
2447 	return ctrlr->active_ns_list ? ctrlr->active_ns_list[0] : 0;
2448 }
2449 
2450 uint32_t
2451 spdk_nvme_ctrlr_get_next_active_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t prev_nsid)
2452 {
2453 	int32_t nsid_idx = spdk_nvme_ctrlr_active_ns_idx(ctrlr, prev_nsid);
2454 	if (ctrlr->active_ns_list && nsid_idx >= 0 && (uint32_t)nsid_idx < ctrlr->num_ns - 1) {
2455 		return ctrlr->active_ns_list[nsid_idx + 1];
2456 	}
2457 	return 0;
2458 }
2459 
2460 struct spdk_nvme_ns *
2461 spdk_nvme_ctrlr_get_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid)
2462 {
2463 	if (nsid < 1 || nsid > ctrlr->num_ns) {
2464 		return NULL;
2465 	}
2466 
2467 	return &ctrlr->ns[nsid - 1];
2468 }
2469 
2470 struct spdk_pci_device *
2471 spdk_nvme_ctrlr_get_pci_device(struct spdk_nvme_ctrlr *ctrlr)
2472 {
2473 	if (ctrlr == NULL) {
2474 		return NULL;
2475 	}
2476 
2477 	if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) {
2478 		return NULL;
2479 	}
2480 
2481 	return nvme_ctrlr_proc_get_devhandle(ctrlr);
2482 }
2483 
2484 uint32_t
2485 spdk_nvme_ctrlr_get_max_xfer_size(const struct spdk_nvme_ctrlr *ctrlr)
2486 {
2487 	return ctrlr->max_xfer_size;
2488 }
2489 
2490 void
2491 spdk_nvme_ctrlr_register_aer_callback(struct spdk_nvme_ctrlr *ctrlr,
2492 				      spdk_nvme_aer_cb aer_cb_fn,
2493 				      void *aer_cb_arg)
2494 {
2495 	struct spdk_nvme_ctrlr_process *active_proc;
2496 
2497 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
2498 
2499 	active_proc = spdk_nvme_ctrlr_get_current_process(ctrlr);
2500 	if (active_proc) {
2501 		active_proc->aer_cb_fn = aer_cb_fn;
2502 		active_proc->aer_cb_arg = aer_cb_arg;
2503 	}
2504 
2505 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
2506 }
2507 
2508 void
2509 spdk_nvme_ctrlr_register_timeout_callback(struct spdk_nvme_ctrlr *ctrlr,
2510 		uint64_t timeout_us, spdk_nvme_timeout_cb cb_fn, void *cb_arg)
2511 {
2512 	struct spdk_nvme_ctrlr_process	*active_proc;
2513 
2514 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
2515 
2516 	active_proc = spdk_nvme_ctrlr_get_current_process(ctrlr);
2517 	if (active_proc) {
2518 		active_proc->timeout_ticks = timeout_us * spdk_get_ticks_hz() / 1000000ULL;
2519 		active_proc->timeout_cb_fn = cb_fn;
2520 		active_proc->timeout_cb_arg = cb_arg;
2521 	}
2522 
2523 	ctrlr->timeout_enabled = true;
2524 
2525 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
2526 }
2527 
2528 bool
2529 spdk_nvme_ctrlr_is_log_page_supported(struct spdk_nvme_ctrlr *ctrlr, uint8_t log_page)
2530 {
2531 	/* No bounds check necessary, since log_page is uint8_t and log_page_supported has 256 entries */
2532 	SPDK_STATIC_ASSERT(sizeof(ctrlr->log_page_supported) == 256, "log_page_supported size mismatch");
2533 	return ctrlr->log_page_supported[log_page];
2534 }
2535 
2536 bool
2537 spdk_nvme_ctrlr_is_feature_supported(struct spdk_nvme_ctrlr *ctrlr, uint8_t feature_code)
2538 {
2539 	/* No bounds check necessary, since feature_code is uint8_t and feature_supported has 256 entries */
2540 	SPDK_STATIC_ASSERT(sizeof(ctrlr->feature_supported) == 256, "feature_supported size mismatch");
2541 	return ctrlr->feature_supported[feature_code];
2542 }
2543 
2544 int
2545 spdk_nvme_ctrlr_attach_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid,
2546 			  struct spdk_nvme_ctrlr_list *payload)
2547 {
2548 	struct nvme_completion_poll_status	status;
2549 	int					res;
2550 	struct spdk_nvme_ns			*ns;
2551 
2552 	res = nvme_ctrlr_cmd_attach_ns(ctrlr, nsid, payload,
2553 				       nvme_completion_poll_cb, &status);
2554 	if (res) {
2555 		return res;
2556 	}
2557 	if (spdk_nvme_wait_for_completion_robust_lock(ctrlr->adminq, &status, &ctrlr->ctrlr_lock)) {
2558 		SPDK_ERRLOG("spdk_nvme_ctrlr_attach_ns failed!\n");
2559 		return -ENXIO;
2560 	}
2561 
2562 	res = nvme_ctrlr_identify_active_ns(ctrlr);
2563 	if (res) {
2564 		return res;
2565 	}
2566 
2567 	ns = &ctrlr->ns[nsid - 1];
2568 	return nvme_ns_construct(ns, nsid, ctrlr);
2569 }
2570 
2571 int
2572 spdk_nvme_ctrlr_detach_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid,
2573 			  struct spdk_nvme_ctrlr_list *payload)
2574 {
2575 	struct nvme_completion_poll_status	status;
2576 	int					res;
2577 	struct spdk_nvme_ns			*ns;
2578 
2579 	res = nvme_ctrlr_cmd_detach_ns(ctrlr, nsid, payload,
2580 				       nvme_completion_poll_cb, &status);
2581 	if (res) {
2582 		return res;
2583 	}
2584 	if (spdk_nvme_wait_for_completion_robust_lock(ctrlr->adminq, &status, &ctrlr->ctrlr_lock)) {
2585 		SPDK_ERRLOG("spdk_nvme_ctrlr_detach_ns failed!\n");
2586 		return -ENXIO;
2587 	}
2588 
2589 	res = nvme_ctrlr_identify_active_ns(ctrlr);
2590 	if (res) {
2591 		return res;
2592 	}
2593 
2594 	ns = &ctrlr->ns[nsid - 1];
2595 	/* Inactive NS */
2596 	nvme_ns_destruct(ns);
2597 
2598 	return 0;
2599 }
2600 
2601 uint32_t
2602 spdk_nvme_ctrlr_create_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns_data *payload)
2603 {
2604 	struct nvme_completion_poll_status	status;
2605 	int					res;
2606 	uint32_t				nsid;
2607 	struct spdk_nvme_ns			*ns;
2608 
2609 	res = nvme_ctrlr_cmd_create_ns(ctrlr, payload, nvme_completion_poll_cb, &status);
2610 	if (res) {
2611 		return 0;
2612 	}
2613 	if (spdk_nvme_wait_for_completion_robust_lock(ctrlr->adminq, &status, &ctrlr->ctrlr_lock)) {
2614 		SPDK_ERRLOG("spdk_nvme_ctrlr_create_ns failed!\n");
2615 		return 0;
2616 	}
2617 
2618 	nsid = status.cpl.cdw0;
2619 	ns = &ctrlr->ns[nsid - 1];
2620 	/* Inactive NS */
2621 	res = nvme_ns_construct(ns, nsid, ctrlr);
2622 	if (res) {
2623 		return 0;
2624 	}
2625 
2626 	/* Return the namespace ID that was created */
2627 	return nsid;
2628 }
2629 
2630 int
2631 spdk_nvme_ctrlr_delete_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid)
2632 {
2633 	struct nvme_completion_poll_status	status;
2634 	int					res;
2635 	struct spdk_nvme_ns			*ns;
2636 
2637 	res = nvme_ctrlr_cmd_delete_ns(ctrlr, nsid, nvme_completion_poll_cb, &status);
2638 	if (res) {
2639 		return res;
2640 	}
2641 	if (spdk_nvme_wait_for_completion_robust_lock(ctrlr->adminq, &status, &ctrlr->ctrlr_lock)) {
2642 		SPDK_ERRLOG("spdk_nvme_ctrlr_delete_ns failed!\n");
2643 		return -ENXIO;
2644 	}
2645 
2646 	res = nvme_ctrlr_identify_active_ns(ctrlr);
2647 	if (res) {
2648 		return res;
2649 	}
2650 
2651 	ns = &ctrlr->ns[nsid - 1];
2652 	nvme_ns_destruct(ns);
2653 
2654 	return 0;
2655 }
2656 
2657 int
2658 spdk_nvme_ctrlr_format(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid,
2659 		       struct spdk_nvme_format *format)
2660 {
2661 	struct nvme_completion_poll_status	status;
2662 	int					res;
2663 
2664 	res = nvme_ctrlr_cmd_format(ctrlr, nsid, format, nvme_completion_poll_cb,
2665 				    &status);
2666 	if (res) {
2667 		return res;
2668 	}
2669 	if (spdk_nvme_wait_for_completion_robust_lock(ctrlr->adminq, &status, &ctrlr->ctrlr_lock)) {
2670 		SPDK_ERRLOG("spdk_nvme_ctrlr_format failed!\n");
2671 		return -ENXIO;
2672 	}
2673 
2674 	return spdk_nvme_ctrlr_reset(ctrlr);
2675 }
2676 
2677 int
2678 spdk_nvme_ctrlr_update_firmware(struct spdk_nvme_ctrlr *ctrlr, void *payload, uint32_t size,
2679 				int slot, enum spdk_nvme_fw_commit_action commit_action, struct spdk_nvme_status *completion_status)
2680 {
2681 	struct spdk_nvme_fw_commit		fw_commit;
2682 	struct nvme_completion_poll_status	status;
2683 	int					res;
2684 	unsigned int				size_remaining;
2685 	unsigned int				offset;
2686 	unsigned int				transfer;
2687 	void					*p;
2688 
2689 	if (!completion_status) {
2690 		return -EINVAL;
2691 	}
2692 	memset(completion_status, 0, sizeof(struct spdk_nvme_status));
2693 	if (size % 4) {
2694 		SPDK_ERRLOG("spdk_nvme_ctrlr_update_firmware invalid size!\n");
2695 		return -1;
2696 	}
2697 
2698 	/* Current support only for SPDK_NVME_FW_COMMIT_REPLACE_IMG
2699 	 * and SPDK_NVME_FW_COMMIT_REPLACE_AND_ENABLE_IMG
2700 	 */
2701 	if ((commit_action != SPDK_NVME_FW_COMMIT_REPLACE_IMG) &&
2702 	    (commit_action != SPDK_NVME_FW_COMMIT_REPLACE_AND_ENABLE_IMG)) {
2703 		SPDK_ERRLOG("spdk_nvme_ctrlr_update_firmware invalid command!\n");
2704 		return -1;
2705 	}
2706 
2707 	/* Firmware download */
2708 	size_remaining = size;
2709 	offset = 0;
2710 	p = payload;
2711 
2712 	while (size_remaining > 0) {
2713 		transfer = spdk_min(size_remaining, ctrlr->min_page_size);
2714 
2715 		res = nvme_ctrlr_cmd_fw_image_download(ctrlr, transfer, offset, p,
2716 						       nvme_completion_poll_cb,
2717 						       &status);
2718 		if (res) {
2719 			return res;
2720 		}
2721 
2722 		if (spdk_nvme_wait_for_completion_robust_lock(ctrlr->adminq, &status, &ctrlr->ctrlr_lock)) {
2723 			SPDK_ERRLOG("spdk_nvme_ctrlr_fw_image_download failed!\n");
2724 			return -ENXIO;
2725 		}
2726 		p += transfer;
2727 		offset += transfer;
2728 		size_remaining -= transfer;
2729 	}
2730 
2731 	/* Firmware commit */
2732 	memset(&fw_commit, 0, sizeof(struct spdk_nvme_fw_commit));
2733 	fw_commit.fs = slot;
2734 	fw_commit.ca = commit_action;
2735 
2736 	res = nvme_ctrlr_cmd_fw_commit(ctrlr, &fw_commit, nvme_completion_poll_cb,
2737 				       &status);
2738 	if (res) {
2739 		return res;
2740 	}
2741 
2742 	res = spdk_nvme_wait_for_completion_robust_lock(ctrlr->adminq, &status, &ctrlr->ctrlr_lock);
2743 
2744 	memcpy(completion_status, &status.cpl.status, sizeof(struct spdk_nvme_status));
2745 
2746 	if (res) {
2747 		if (status.cpl.status.sct != SPDK_NVME_SCT_COMMAND_SPECIFIC ||
2748 		    status.cpl.status.sc != SPDK_NVME_SC_FIRMWARE_REQ_NVM_RESET) {
2749 			if (status.cpl.status.sct == SPDK_NVME_SCT_COMMAND_SPECIFIC  &&
2750 			    status.cpl.status.sc == SPDK_NVME_SC_FIRMWARE_REQ_CONVENTIONAL_RESET) {
2751 				SPDK_NOTICELOG("firmware activation requires conventional reset to be performed. !\n");
2752 			} else {
2753 				SPDK_ERRLOG("nvme_ctrlr_cmd_fw_commit failed!\n");
2754 			}
2755 			return -ENXIO;
2756 		}
2757 	}
2758 
2759 	return spdk_nvme_ctrlr_reset(ctrlr);
2760 }
2761 
2762 void *
2763 spdk_nvme_ctrlr_alloc_cmb_io_buffer(struct spdk_nvme_ctrlr *ctrlr, size_t size)
2764 {
2765 	void *buf;
2766 
2767 	if (size == 0) {
2768 		return NULL;
2769 	}
2770 
2771 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
2772 	buf = nvme_transport_ctrlr_alloc_cmb_io_buffer(ctrlr, size);
2773 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
2774 
2775 	return buf;
2776 }
2777 
2778 void
2779 spdk_nvme_ctrlr_free_cmb_io_buffer(struct spdk_nvme_ctrlr *ctrlr, void *buf, size_t size)
2780 {
2781 	if (buf && size) {
2782 		nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
2783 		nvme_transport_ctrlr_free_cmb_io_buffer(ctrlr, buf, size);
2784 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
2785 	}
2786 }
2787 
2788 bool
2789 spdk_nvme_ctrlr_is_discovery(struct spdk_nvme_ctrlr *ctrlr)
2790 {
2791 	assert(ctrlr);
2792 
2793 	return !strncmp(ctrlr->trid.subnqn, SPDK_NVMF_DISCOVERY_NQN,
2794 			strlen(SPDK_NVMF_DISCOVERY_NQN));
2795 }
2796 
2797 int
2798 spdk_nvme_ctrlr_security_receive(struct spdk_nvme_ctrlr *ctrlr, uint8_t secp,
2799 				 uint16_t spsp, uint8_t nssf, void *payload, size_t size)
2800 {
2801 	struct nvme_completion_poll_status	status;
2802 	int					res;
2803 
2804 	res = nvme_ctrlr_cmd_security_receive(ctrlr, secp, spsp, nssf, payload, size,
2805 					      nvme_completion_poll_cb, &status);
2806 	if (res) {
2807 		return res;
2808 	}
2809 	if (spdk_nvme_wait_for_completion_robust_lock(ctrlr->adminq, &status, &ctrlr->ctrlr_lock)) {
2810 		SPDK_ERRLOG("spdk_nvme_ctrlr_security_receive failed!\n");
2811 		return -ENXIO;
2812 	}
2813 
2814 	return 0;
2815 }
2816 
2817 int
2818 spdk_nvme_ctrlr_security_send(struct spdk_nvme_ctrlr *ctrlr, uint8_t secp,
2819 			      uint16_t spsp, uint8_t nssf, void *payload, size_t size)
2820 {
2821 	struct nvme_completion_poll_status	status;
2822 	int					res;
2823 
2824 	res = nvme_ctrlr_cmd_security_send(ctrlr, secp, spsp, nssf, payload, size, nvme_completion_poll_cb,
2825 					   &status);
2826 	if (res) {
2827 		return res;
2828 	}
2829 	if (spdk_nvme_wait_for_completion_robust_lock(ctrlr->adminq, &status, &ctrlr->ctrlr_lock)) {
2830 		SPDK_ERRLOG("spdk_nvme_ctrlr_security_send failed!\n");
2831 		return -ENXIO;
2832 	}
2833 
2834 	return 0;
2835 }
2836 
2837 uint64_t
2838 spdk_nvme_ctrlr_get_flags(struct spdk_nvme_ctrlr *ctrlr)
2839 {
2840 	return ctrlr->flags;
2841 }
2842