xref: /spdk/lib/nvme/nvme_ctrlr.c (revision 2fac05e919e1940137e4502f01beabb81ebbef9c)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "spdk/stdinc.h"
35 
36 #include "nvme_internal.h"
37 
38 #include "spdk/env.h"
39 #include "spdk/string.h"
40 
41 static int nvme_ctrlr_construct_and_submit_aer(struct spdk_nvme_ctrlr *ctrlr,
42 		struct nvme_async_event_request *aer);
43 
44 static int
45 nvme_ctrlr_get_cc(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cc_register *cc)
46 {
47 	return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, cc.raw),
48 					      &cc->raw);
49 }
50 
51 static int
52 nvme_ctrlr_get_csts(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_csts_register *csts)
53 {
54 	return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, csts.raw),
55 					      &csts->raw);
56 }
57 
58 int
59 nvme_ctrlr_get_cap(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cap_register *cap)
60 {
61 	return nvme_transport_ctrlr_get_reg_8(ctrlr, offsetof(struct spdk_nvme_registers, cap.raw),
62 					      &cap->raw);
63 }
64 
65 int
66 nvme_ctrlr_get_vs(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_vs_register *vs)
67 {
68 	return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, vs.raw),
69 					      &vs->raw);
70 }
71 
72 static int
73 nvme_ctrlr_set_cc(struct spdk_nvme_ctrlr *ctrlr, const union spdk_nvme_cc_register *cc)
74 {
75 	return nvme_transport_ctrlr_set_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, cc.raw),
76 					      cc->raw);
77 }
78 
79 void
80 spdk_nvme_ctrlr_get_default_ctrlr_opts(struct spdk_nvme_ctrlr_opts *opts, size_t opts_size)
81 {
82 	char host_id_str[SPDK_UUID_STRING_LEN];
83 
84 	assert(opts);
85 
86 	memset(opts, 0, opts_size);
87 
88 #define FIELD_OK(field) \
89 	offsetof(struct spdk_nvme_ctrlr_opts, field) + sizeof(opts->field) <= opts_size
90 
91 	if (FIELD_OK(num_io_queues)) {
92 		opts->num_io_queues = DEFAULT_MAX_IO_QUEUES;
93 	}
94 
95 	if (FIELD_OK(use_cmb_sqs)) {
96 		opts->use_cmb_sqs = true;
97 	}
98 
99 	if (FIELD_OK(arb_mechanism)) {
100 		opts->arb_mechanism = SPDK_NVME_CC_AMS_RR;
101 	}
102 
103 	if (FIELD_OK(keep_alive_timeout_ms)) {
104 		opts->keep_alive_timeout_ms = 10 * 1000;
105 	}
106 
107 	if (FIELD_OK(io_queue_size)) {
108 		opts->io_queue_size = DEFAULT_IO_QUEUE_SIZE;
109 	}
110 
111 	if (FIELD_OK(io_queue_requests)) {
112 		opts->io_queue_requests = DEFAULT_IO_QUEUE_REQUESTS;
113 	}
114 
115 	if (FIELD_OK(host_id)) {
116 		memset(opts->host_id, 0, sizeof(opts->host_id));
117 	}
118 
119 	if (nvme_driver_init() == 0) {
120 		if (FIELD_OK(extended_host_id)) {
121 			memcpy(opts->extended_host_id, &g_spdk_nvme_driver->default_extended_host_id,
122 			       sizeof(opts->extended_host_id));
123 		}
124 
125 		if (FIELD_OK(hostnqn)) {
126 			spdk_uuid_fmt_lower(host_id_str, sizeof(host_id_str),
127 					    &g_spdk_nvme_driver->default_extended_host_id);
128 			snprintf(opts->hostnqn, sizeof(opts->hostnqn), "2014-08.org.nvmexpress:uuid:%s", host_id_str);
129 		}
130 	}
131 
132 	if (FIELD_OK(src_addr)) {
133 		memset(opts->src_addr, 0, sizeof(opts->src_addr));
134 	}
135 
136 	if (FIELD_OK(src_svcid)) {
137 		memset(opts->src_svcid, 0, sizeof(opts->src_svcid));
138 	}
139 
140 	if (FIELD_OK(command_set)) {
141 		opts->command_set = SPDK_NVME_CC_CSS_NVM;
142 	}
143 #undef FIELD_OK
144 }
145 
146 /**
147  * This function will be called when the process allocates the IO qpair.
148  * Note: the ctrlr_lock must be held when calling this function.
149  */
150 static void
151 nvme_ctrlr_proc_add_io_qpair(struct spdk_nvme_qpair *qpair)
152 {
153 	struct spdk_nvme_ctrlr_process	*active_proc;
154 	struct spdk_nvme_ctrlr		*ctrlr = qpair->ctrlr;
155 
156 	active_proc = spdk_nvme_ctrlr_get_current_process(ctrlr);
157 	if (active_proc) {
158 		TAILQ_INSERT_TAIL(&active_proc->allocated_io_qpairs, qpair, per_process_tailq);
159 		qpair->active_proc = active_proc;
160 	}
161 }
162 
163 /**
164  * This function will be called when the process frees the IO qpair.
165  * Note: the ctrlr_lock must be held when calling this function.
166  */
167 static void
168 nvme_ctrlr_proc_remove_io_qpair(struct spdk_nvme_qpair *qpair)
169 {
170 	struct spdk_nvme_ctrlr_process	*active_proc;
171 	struct spdk_nvme_ctrlr		*ctrlr = qpair->ctrlr;
172 	struct spdk_nvme_qpair          *active_qpair, *tmp_qpair;
173 
174 	active_proc = spdk_nvme_ctrlr_get_current_process(ctrlr);
175 	if (!active_proc) {
176 		return;
177 	}
178 
179 	TAILQ_FOREACH_SAFE(active_qpair, &active_proc->allocated_io_qpairs,
180 			   per_process_tailq, tmp_qpair) {
181 		if (active_qpair == qpair) {
182 			TAILQ_REMOVE(&active_proc->allocated_io_qpairs,
183 				     active_qpair, per_process_tailq);
184 
185 			break;
186 		}
187 	}
188 }
189 
190 void
191 spdk_nvme_ctrlr_get_default_io_qpair_opts(struct spdk_nvme_ctrlr *ctrlr,
192 		struct spdk_nvme_io_qpair_opts *opts,
193 		size_t opts_size)
194 {
195 	assert(ctrlr);
196 
197 	assert(opts);
198 
199 	memset(opts, 0, opts_size);
200 
201 #define FIELD_OK(field) \
202 	offsetof(struct spdk_nvme_io_qpair_opts, field) + sizeof(opts->field) <= opts_size
203 
204 	if (FIELD_OK(qprio)) {
205 		opts->qprio = SPDK_NVME_QPRIO_URGENT;
206 	}
207 
208 	if (FIELD_OK(io_queue_size)) {
209 		opts->io_queue_size = ctrlr->opts.io_queue_size;
210 	}
211 
212 	if (FIELD_OK(io_queue_requests)) {
213 		opts->io_queue_requests = ctrlr->opts.io_queue_requests;
214 	}
215 
216 #undef FIELD_OK
217 }
218 
219 struct spdk_nvme_qpair *
220 spdk_nvme_ctrlr_alloc_io_qpair(struct spdk_nvme_ctrlr *ctrlr,
221 			       const struct spdk_nvme_io_qpair_opts *user_opts,
222 			       size_t opts_size)
223 {
224 	uint32_t				qid;
225 	struct spdk_nvme_qpair			*qpair;
226 	union spdk_nvme_cc_register		cc;
227 	struct spdk_nvme_io_qpair_opts		opts;
228 
229 	if (!ctrlr) {
230 		return NULL;
231 	}
232 
233 	/*
234 	 * Get the default options, then overwrite them with the user-provided options
235 	 * up to opts_size.
236 	 *
237 	 * This allows for extensions of the opts structure without breaking
238 	 * ABI compatibility.
239 	 */
240 	spdk_nvme_ctrlr_get_default_io_qpair_opts(ctrlr, &opts, sizeof(opts));
241 	if (user_opts) {
242 		memcpy(&opts, user_opts, spdk_min(sizeof(opts), opts_size));
243 	}
244 
245 	if (nvme_ctrlr_get_cc(ctrlr, &cc)) {
246 		SPDK_ERRLOG("get_cc failed\n");
247 		return NULL;
248 	}
249 
250 	/* Only the low 2 bits (values 0, 1, 2, 3) of QPRIO are valid. */
251 	if ((opts.qprio & 3) != opts.qprio) {
252 		return NULL;
253 	}
254 
255 	/*
256 	 * Only value SPDK_NVME_QPRIO_URGENT(0) is valid for the
257 	 * default round robin arbitration method.
258 	 */
259 	if ((cc.bits.ams == SPDK_NVME_CC_AMS_RR) && (opts.qprio != SPDK_NVME_QPRIO_URGENT)) {
260 		SPDK_ERRLOG("invalid queue priority for default round robin arbitration method\n");
261 		return NULL;
262 	}
263 
264 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
265 
266 	/*
267 	 * Get the first available I/O queue ID.
268 	 */
269 	qid = spdk_bit_array_find_first_set(ctrlr->free_io_qids, 1);
270 	if (qid > ctrlr->opts.num_io_queues) {
271 		SPDK_ERRLOG("No free I/O queue IDs\n");
272 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
273 		return NULL;
274 	}
275 
276 	qpair = nvme_transport_ctrlr_create_io_qpair(ctrlr, qid, &opts);
277 	if (qpair == NULL) {
278 		SPDK_ERRLOG("nvme_transport_ctrlr_create_io_qpair() failed\n");
279 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
280 		return NULL;
281 	}
282 	spdk_bit_array_clear(ctrlr->free_io_qids, qid);
283 	TAILQ_INSERT_TAIL(&ctrlr->active_io_qpairs, qpair, tailq);
284 
285 	nvme_ctrlr_proc_add_io_qpair(qpair);
286 
287 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
288 
289 	if (ctrlr->quirks & NVME_QUIRK_DELAY_AFTER_QUEUE_ALLOC) {
290 		spdk_delay_us(100);
291 	}
292 
293 	return qpair;
294 }
295 
296 int
297 spdk_nvme_ctrlr_free_io_qpair(struct spdk_nvme_qpair *qpair)
298 {
299 	struct spdk_nvme_ctrlr *ctrlr;
300 
301 	if (qpair == NULL) {
302 		return 0;
303 	}
304 
305 	ctrlr = qpair->ctrlr;
306 
307 	if (qpair->in_completion_context) {
308 		/*
309 		 * There are many cases where it is convenient to delete an io qpair in the context
310 		 *  of that qpair's completion routine.  To handle this properly, set a flag here
311 		 *  so that the completion routine will perform an actual delete after the context
312 		 *  unwinds.
313 		 */
314 		qpair->delete_after_completion_context = 1;
315 		return 0;
316 	}
317 
318 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
319 
320 	nvme_ctrlr_proc_remove_io_qpair(qpair);
321 
322 	TAILQ_REMOVE(&ctrlr->active_io_qpairs, qpair, tailq);
323 	spdk_bit_array_set(ctrlr->free_io_qids, qpair->id);
324 
325 	if (nvme_transport_ctrlr_delete_io_qpair(ctrlr, qpair)) {
326 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
327 		return -1;
328 	}
329 
330 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
331 	return 0;
332 }
333 
334 static void
335 nvme_ctrlr_construct_intel_support_log_page_list(struct spdk_nvme_ctrlr *ctrlr,
336 		struct spdk_nvme_intel_log_page_directory *log_page_directory)
337 {
338 	if (log_page_directory == NULL) {
339 		return;
340 	}
341 
342 	if (ctrlr->cdata.vid != SPDK_PCI_VID_INTEL) {
343 		return;
344 	}
345 
346 	ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_PAGE_DIRECTORY] = true;
347 
348 	if (log_page_directory->read_latency_log_len ||
349 	    (ctrlr->quirks & NVME_INTEL_QUIRK_READ_LATENCY)) {
350 		ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_READ_CMD_LATENCY] = true;
351 	}
352 	if (log_page_directory->write_latency_log_len ||
353 	    (ctrlr->quirks & NVME_INTEL_QUIRK_WRITE_LATENCY)) {
354 		ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_WRITE_CMD_LATENCY] = true;
355 	}
356 	if (log_page_directory->temperature_statistics_log_len) {
357 		ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_TEMPERATURE] = true;
358 	}
359 	if (log_page_directory->smart_log_len) {
360 		ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_SMART] = true;
361 	}
362 	if (log_page_directory->marketing_description_log_len) {
363 		ctrlr->log_page_supported[SPDK_NVME_INTEL_MARKETING_DESCRIPTION] = true;
364 	}
365 }
366 
367 static int nvme_ctrlr_set_intel_support_log_pages(struct spdk_nvme_ctrlr *ctrlr)
368 {
369 	uint64_t phys_addr = 0;
370 	struct nvme_completion_poll_status	status;
371 	struct spdk_nvme_intel_log_page_directory *log_page_directory;
372 
373 	log_page_directory = spdk_dma_zmalloc(sizeof(struct spdk_nvme_intel_log_page_directory),
374 					      64, &phys_addr);
375 	if (log_page_directory == NULL) {
376 		SPDK_ERRLOG("could not allocate log_page_directory\n");
377 		return -ENXIO;
378 	}
379 
380 	spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_INTEL_LOG_PAGE_DIRECTORY, SPDK_NVME_GLOBAL_NS_TAG,
381 					 log_page_directory, sizeof(struct spdk_nvme_intel_log_page_directory), 0,
382 					 nvme_completion_poll_cb,
383 					 &status);
384 	if (spdk_nvme_wait_for_completion(ctrlr->adminq, &status)) {
385 		spdk_dma_free(log_page_directory);
386 		SPDK_ERRLOG("nvme_ctrlr_cmd_get_log_page failed!\n");
387 		return -ENXIO;
388 	}
389 
390 	nvme_ctrlr_construct_intel_support_log_page_list(ctrlr, log_page_directory);
391 	spdk_dma_free(log_page_directory);
392 	return 0;
393 }
394 
395 static void
396 nvme_ctrlr_set_supported_log_pages(struct spdk_nvme_ctrlr *ctrlr)
397 {
398 	memset(ctrlr->log_page_supported, 0, sizeof(ctrlr->log_page_supported));
399 	/* Mandatory pages */
400 	ctrlr->log_page_supported[SPDK_NVME_LOG_ERROR] = true;
401 	ctrlr->log_page_supported[SPDK_NVME_LOG_HEALTH_INFORMATION] = true;
402 	ctrlr->log_page_supported[SPDK_NVME_LOG_FIRMWARE_SLOT] = true;
403 	if (ctrlr->cdata.lpa.celp) {
404 		ctrlr->log_page_supported[SPDK_NVME_LOG_COMMAND_EFFECTS_LOG] = true;
405 	}
406 	if (ctrlr->cdata.vid == SPDK_PCI_VID_INTEL) {
407 		nvme_ctrlr_set_intel_support_log_pages(ctrlr);
408 	}
409 }
410 
411 static void
412 nvme_ctrlr_set_intel_supported_features(struct spdk_nvme_ctrlr *ctrlr)
413 {
414 	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_MAX_LBA] = true;
415 	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_NATIVE_MAX_LBA] = true;
416 	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_POWER_GOVERNOR_SETTING] = true;
417 	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_SMBUS_ADDRESS] = true;
418 	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_LED_PATTERN] = true;
419 	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_RESET_TIMED_WORKLOAD_COUNTERS] = true;
420 	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING] = true;
421 }
422 
423 static void
424 nvme_ctrlr_set_supported_features(struct spdk_nvme_ctrlr *ctrlr)
425 {
426 	memset(ctrlr->feature_supported, 0, sizeof(ctrlr->feature_supported));
427 	/* Mandatory features */
428 	ctrlr->feature_supported[SPDK_NVME_FEAT_ARBITRATION] = true;
429 	ctrlr->feature_supported[SPDK_NVME_FEAT_POWER_MANAGEMENT] = true;
430 	ctrlr->feature_supported[SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD] = true;
431 	ctrlr->feature_supported[SPDK_NVME_FEAT_ERROR_RECOVERY] = true;
432 	ctrlr->feature_supported[SPDK_NVME_FEAT_NUMBER_OF_QUEUES] = true;
433 	ctrlr->feature_supported[SPDK_NVME_FEAT_INTERRUPT_COALESCING] = true;
434 	ctrlr->feature_supported[SPDK_NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION] = true;
435 	ctrlr->feature_supported[SPDK_NVME_FEAT_WRITE_ATOMICITY] = true;
436 	ctrlr->feature_supported[SPDK_NVME_FEAT_ASYNC_EVENT_CONFIGURATION] = true;
437 	/* Optional features */
438 	if (ctrlr->cdata.vwc.present) {
439 		ctrlr->feature_supported[SPDK_NVME_FEAT_VOLATILE_WRITE_CACHE] = true;
440 	}
441 	if (ctrlr->cdata.apsta.supported) {
442 		ctrlr->feature_supported[SPDK_NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION] = true;
443 	}
444 	if (ctrlr->cdata.hmpre) {
445 		ctrlr->feature_supported[SPDK_NVME_FEAT_HOST_MEM_BUFFER] = true;
446 	}
447 	if (ctrlr->cdata.vid == SPDK_PCI_VID_INTEL) {
448 		nvme_ctrlr_set_intel_supported_features(ctrlr);
449 	}
450 }
451 
452 void
453 nvme_ctrlr_fail(struct spdk_nvme_ctrlr *ctrlr, bool hot_remove)
454 {
455 	/*
456 	 * Set the flag here and leave the work failure of qpairs to
457 	 * spdk_nvme_qpair_process_completions().
458 	 */
459 	if (hot_remove) {
460 		ctrlr->is_removed = true;
461 	}
462 	ctrlr->is_failed = true;
463 	SPDK_ERRLOG("ctrlr %s in failed state.\n", ctrlr->trid.traddr);
464 }
465 
466 static void
467 nvme_ctrlr_shutdown(struct spdk_nvme_ctrlr *ctrlr)
468 {
469 	union spdk_nvme_cc_register	cc;
470 	union spdk_nvme_csts_register	csts;
471 	uint32_t			ms_waited = 0;
472 	uint32_t			shutdown_timeout_ms;
473 
474 	if (ctrlr->is_removed) {
475 		return;
476 	}
477 
478 	if (nvme_ctrlr_get_cc(ctrlr, &cc)) {
479 		SPDK_ERRLOG("get_cc() failed\n");
480 		return;
481 	}
482 
483 	cc.bits.shn = SPDK_NVME_SHN_NORMAL;
484 
485 	if (nvme_ctrlr_set_cc(ctrlr, &cc)) {
486 		SPDK_ERRLOG("set_cc() failed\n");
487 		return;
488 	}
489 
490 	/*
491 	 * The NVMe specification defines RTD3E to be the time between
492 	 *  setting SHN = 1 until the controller will set SHST = 10b.
493 	 * If the device doesn't report RTD3 entry latency, or if it
494 	 *  reports RTD3 entry latency less than 10 seconds, pick
495 	 *  10 seconds as a reasonable amount of time to
496 	 *  wait before proceeding.
497 	 */
498 	SPDK_DEBUGLOG(SPDK_LOG_NVME, "RTD3E = %" PRIu32 " us\n", ctrlr->cdata.rtd3e);
499 	shutdown_timeout_ms = (ctrlr->cdata.rtd3e + 999) / 1000;
500 	shutdown_timeout_ms = spdk_max(shutdown_timeout_ms, 10000);
501 	SPDK_DEBUGLOG(SPDK_LOG_NVME, "shutdown timeout = %" PRIu32 " ms\n", shutdown_timeout_ms);
502 
503 	do {
504 		if (nvme_ctrlr_get_csts(ctrlr, &csts)) {
505 			SPDK_ERRLOG("get_csts() failed\n");
506 			return;
507 		}
508 
509 		if (csts.bits.shst == SPDK_NVME_SHST_COMPLETE) {
510 			SPDK_DEBUGLOG(SPDK_LOG_NVME, "shutdown complete in %u milliseconds\n",
511 				      ms_waited);
512 			return;
513 		}
514 
515 		nvme_delay(1000);
516 		ms_waited++;
517 	} while (ms_waited < shutdown_timeout_ms);
518 
519 	SPDK_ERRLOG("did not shutdown within %u milliseconds\n", shutdown_timeout_ms);
520 }
521 
522 static int
523 nvme_ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr)
524 {
525 	union spdk_nvme_cc_register	cc;
526 	int				rc;
527 
528 	rc = nvme_transport_ctrlr_enable(ctrlr);
529 	if (rc != 0) {
530 		SPDK_ERRLOG("transport ctrlr_enable failed\n");
531 		return rc;
532 	}
533 
534 	if (nvme_ctrlr_get_cc(ctrlr, &cc)) {
535 		SPDK_ERRLOG("get_cc() failed\n");
536 		return -EIO;
537 	}
538 
539 	if (cc.bits.en != 0) {
540 		SPDK_ERRLOG("%s called with CC.EN = 1\n", __func__);
541 		return -EINVAL;
542 	}
543 
544 	cc.bits.en = 1;
545 	cc.bits.css = 0;
546 	cc.bits.shn = 0;
547 	cc.bits.iosqes = 6; /* SQ entry size == 64 == 2^6 */
548 	cc.bits.iocqes = 4; /* CQ entry size == 16 == 2^4 */
549 
550 	/* Page size is 2 ^ (12 + mps). */
551 	cc.bits.mps = spdk_u32log2(ctrlr->page_size) - 12;
552 
553 	if (ctrlr->cap.bits.css == 0) {
554 		SPDK_INFOLOG(SPDK_LOG_NVME,
555 			     "Drive reports no command sets supported. Assuming NVM is supported.\n");
556 		ctrlr->cap.bits.css = SPDK_NVME_CAP_CSS_NVM;
557 	}
558 
559 	if (!(ctrlr->cap.bits.css & (1u << ctrlr->opts.command_set))) {
560 		SPDK_DEBUGLOG(SPDK_LOG_NVME, "Requested I/O command set %u but supported mask is 0x%x\n",
561 			      ctrlr->opts.command_set, ctrlr->cap.bits.css);
562 		return -EINVAL;
563 	}
564 
565 	cc.bits.css = ctrlr->opts.command_set;
566 
567 	switch (ctrlr->opts.arb_mechanism) {
568 	case SPDK_NVME_CC_AMS_RR:
569 		break;
570 	case SPDK_NVME_CC_AMS_WRR:
571 		if (SPDK_NVME_CAP_AMS_WRR & ctrlr->cap.bits.ams) {
572 			break;
573 		}
574 		return -EINVAL;
575 	case SPDK_NVME_CC_AMS_VS:
576 		if (SPDK_NVME_CAP_AMS_VS & ctrlr->cap.bits.ams) {
577 			break;
578 		}
579 		return -EINVAL;
580 	default:
581 		return -EINVAL;
582 	}
583 
584 	cc.bits.ams = ctrlr->opts.arb_mechanism;
585 
586 	if (nvme_ctrlr_set_cc(ctrlr, &cc)) {
587 		SPDK_ERRLOG("set_cc() failed\n");
588 		return -EIO;
589 	}
590 
591 	return 0;
592 }
593 
594 #ifdef DEBUG
595 static const char *
596 nvme_ctrlr_state_string(enum nvme_ctrlr_state state)
597 {
598 	switch (state) {
599 	case NVME_CTRLR_STATE_INIT:
600 		return "init";
601 	case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1:
602 		return "disable and wait for CSTS.RDY = 1";
603 	case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0:
604 		return "disable and wait for CSTS.RDY = 0";
605 	case NVME_CTRLR_STATE_ENABLE:
606 		return "enable controller by writing CC.EN = 1";
607 	case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1:
608 		return "wait for CSTS.RDY = 1";
609 	case NVME_CTRLR_STATE_READY:
610 		return "ready";
611 	}
612 	return "unknown";
613 };
614 #endif /* DEBUG */
615 
616 static void
617 nvme_ctrlr_set_state(struct spdk_nvme_ctrlr *ctrlr, enum nvme_ctrlr_state state,
618 		     uint64_t timeout_in_ms)
619 {
620 	ctrlr->state = state;
621 	if (timeout_in_ms == NVME_TIMEOUT_INFINITE) {
622 		SPDK_DEBUGLOG(SPDK_LOG_NVME, "setting state to %s (no timeout)\n",
623 			      nvme_ctrlr_state_string(ctrlr->state));
624 		ctrlr->state_timeout_tsc = NVME_TIMEOUT_INFINITE;
625 	} else {
626 		SPDK_DEBUGLOG(SPDK_LOG_NVME, "setting state to %s (timeout %" PRIu64 " ms)\n",
627 			      nvme_ctrlr_state_string(ctrlr->state), timeout_in_ms);
628 		ctrlr->state_timeout_tsc = spdk_get_ticks() + (timeout_in_ms * spdk_get_ticks_hz()) / 1000;
629 	}
630 }
631 
632 static void
633 nvme_ctrlr_free_doorbell_buffer(struct spdk_nvme_ctrlr *ctrlr)
634 {
635 	if (ctrlr->shadow_doorbell) {
636 		spdk_dma_free(ctrlr->shadow_doorbell);
637 		ctrlr->shadow_doorbell = NULL;
638 	}
639 
640 	if (ctrlr->eventidx) {
641 		spdk_dma_free(ctrlr->eventidx);
642 		ctrlr->eventidx = NULL;
643 	}
644 }
645 
646 static int
647 nvme_ctrlr_set_doorbell_buffer_config(struct spdk_nvme_ctrlr *ctrlr)
648 {
649 	int rc;
650 	struct nvme_completion_poll_status status;
651 	uint64_t prp1, prp2;
652 
653 	if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) {
654 		return 0;
655 	}
656 
657 	/* only 1 page size for doorbell buffer */
658 	ctrlr->shadow_doorbell = spdk_dma_zmalloc(ctrlr->page_size, ctrlr->page_size,
659 				 &prp1);
660 	if (ctrlr->shadow_doorbell == NULL) {
661 		return -1;
662 	}
663 
664 	ctrlr->eventidx = spdk_dma_zmalloc(ctrlr->page_size, ctrlr->page_size, &prp2);
665 	if (ctrlr->eventidx == NULL) {
666 		goto error;
667 	}
668 
669 	rc = nvme_ctrlr_cmd_doorbell_buffer_config(ctrlr, prp1, prp2,
670 			nvme_completion_poll_cb, &status);
671 	if (rc != 0) {
672 		goto error;
673 	}
674 
675 	if (spdk_nvme_wait_for_completion(ctrlr->adminq, &status)) {
676 		goto error;
677 	}
678 
679 	SPDK_INFOLOG(SPDK_LOG_NVME, "NVMe controller: %s doorbell buffer config enabled\n",
680 		     ctrlr->trid.traddr);
681 
682 	return 0;
683 
684 error:
685 	nvme_ctrlr_free_doorbell_buffer(ctrlr);
686 	return -1;
687 }
688 
689 int
690 spdk_nvme_ctrlr_reset(struct spdk_nvme_ctrlr *ctrlr)
691 {
692 	int rc = 0;
693 	struct spdk_nvme_qpair	*qpair;
694 	struct nvme_request	*req, *tmp;
695 
696 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
697 
698 	if (ctrlr->is_resetting || ctrlr->is_failed) {
699 		/*
700 		 * Controller is already resetting or has failed.  Return
701 		 *  immediately since there is no need to kick off another
702 		 *  reset in these cases.
703 		 */
704 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
705 		return 0;
706 	}
707 
708 	ctrlr->is_resetting = true;
709 
710 	SPDK_NOTICELOG("resetting controller\n");
711 
712 	/* Free all of the queued abort requests */
713 	STAILQ_FOREACH_SAFE(req, &ctrlr->queued_aborts, stailq, tmp) {
714 		STAILQ_REMOVE_HEAD(&ctrlr->queued_aborts, stailq);
715 		nvme_free_request(req);
716 		ctrlr->outstanding_aborts--;
717 	}
718 
719 	/* Disable all queues before disabling the controller hardware. */
720 	nvme_qpair_disable(ctrlr->adminq);
721 	TAILQ_FOREACH(qpair, &ctrlr->active_io_qpairs, tailq) {
722 		nvme_qpair_disable(qpair);
723 	}
724 
725 	/* Doorbell buffer config is invalid during reset */
726 	nvme_ctrlr_free_doorbell_buffer(ctrlr);
727 
728 	/* Set the state back to INIT to cause a full hardware reset. */
729 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, NVME_TIMEOUT_INFINITE);
730 
731 	while (ctrlr->state != NVME_CTRLR_STATE_READY) {
732 		if (nvme_ctrlr_process_init(ctrlr) != 0) {
733 			SPDK_ERRLOG("%s: controller reinitialization failed\n", __func__);
734 			nvme_ctrlr_fail(ctrlr, false);
735 			rc = -1;
736 			break;
737 		}
738 	}
739 
740 	if (!ctrlr->is_failed) {
741 		/* Reinitialize qpairs */
742 		TAILQ_FOREACH(qpair, &ctrlr->active_io_qpairs, tailq) {
743 			if (nvme_transport_ctrlr_reinit_io_qpair(ctrlr, qpair) != 0) {
744 				nvme_ctrlr_fail(ctrlr, false);
745 				rc = -1;
746 			}
747 		}
748 	}
749 
750 	ctrlr->is_resetting = false;
751 
752 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
753 
754 	return rc;
755 }
756 
757 static int
758 nvme_ctrlr_identify(struct spdk_nvme_ctrlr *ctrlr)
759 {
760 	struct nvme_completion_poll_status	status;
761 	int					rc;
762 
763 	rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_CTRLR, 0, 0,
764 				     &ctrlr->cdata, sizeof(ctrlr->cdata),
765 				     nvme_completion_poll_cb, &status);
766 	if (rc != 0) {
767 		return rc;
768 	}
769 
770 	if (spdk_nvme_wait_for_completion(ctrlr->adminq, &status)) {
771 		SPDK_ERRLOG("nvme_identify_controller failed!\n");
772 		return -ENXIO;
773 	}
774 
775 	/*
776 	 * Use MDTS to ensure our default max_xfer_size doesn't exceed what the
777 	 *  controller supports.
778 	 */
779 	ctrlr->max_xfer_size = nvme_transport_ctrlr_get_max_xfer_size(ctrlr);
780 	SPDK_DEBUGLOG(SPDK_LOG_NVME, "transport max_xfer_size %u\n", ctrlr->max_xfer_size);
781 	if (ctrlr->cdata.mdts > 0) {
782 		ctrlr->max_xfer_size = spdk_min(ctrlr->max_xfer_size,
783 						ctrlr->min_page_size * (1 << (ctrlr->cdata.mdts)));
784 		SPDK_DEBUGLOG(SPDK_LOG_NVME, "MDTS max_xfer_size %u\n", ctrlr->max_xfer_size);
785 	}
786 
787 	SPDK_DEBUGLOG(SPDK_LOG_NVME, "CNTLID 0x%04" PRIx16 "\n", ctrlr->cdata.cntlid);
788 	if (ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
789 		ctrlr->cntlid = ctrlr->cdata.cntlid;
790 	} else {
791 		/*
792 		 * Fabrics controllers should already have CNTLID from the Connect command.
793 		 *
794 		 * If CNTLID from Connect doesn't match CNTLID in the Identify Controller data,
795 		 * trust the one from Connect.
796 		 */
797 		if (ctrlr->cntlid != ctrlr->cdata.cntlid) {
798 			SPDK_DEBUGLOG(SPDK_LOG_NVME,
799 				      "Identify CNTLID 0x%04" PRIx16 " != Connect CNTLID 0x%04" PRIx16 "\n",
800 				      ctrlr->cdata.cntlid, ctrlr->cntlid);
801 		}
802 	}
803 
804 	return 0;
805 }
806 
807 
808 int
809 nvme_ctrlr_identify_active_ns(struct spdk_nvme_ctrlr *ctrlr)
810 {
811 	struct nvme_completion_poll_status	status;
812 	int					rc;
813 	uint32_t				i;
814 	uint32_t				num_pages;
815 	uint32_t				next_nsid = 0;
816 	uint32_t				*new_ns_list = NULL;
817 
818 
819 	/*
820 	 * The allocated size must be a multiple of sizeof(struct spdk_nvme_ns_list)
821 	 */
822 	num_pages = (ctrlr->num_ns * sizeof(new_ns_list[0]) - 1) / sizeof(struct spdk_nvme_ns_list) + 1;
823 	new_ns_list = spdk_dma_zmalloc(num_pages * sizeof(struct spdk_nvme_ns_list), ctrlr->page_size,
824 				       NULL);
825 	if (!new_ns_list) {
826 		SPDK_ERRLOG("Failed to allocate active_ns_list!\n");
827 		return -ENOMEM;
828 	}
829 
830 	if (ctrlr->vs.raw >= SPDK_NVME_VERSION(1, 1, 0) && !(ctrlr->quirks & NVME_QUIRK_IDENTIFY_CNS)) {
831 		/*
832 		 * Iterate through the pages and fetch each chunk of 1024 namespaces until
833 		 * there are no more active namespaces
834 		 */
835 		for (i = 0; i < num_pages; i++) {
836 			rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_ACTIVE_NS_LIST, 0, next_nsid,
837 						     &new_ns_list[1024 * i], sizeof(struct spdk_nvme_ns_list),
838 						     nvme_completion_poll_cb, &status);
839 			if (rc != 0) {
840 				goto fail;
841 			}
842 			if (spdk_nvme_wait_for_completion(ctrlr->adminq, &status)) {
843 				SPDK_ERRLOG("nvme_ctrlr_cmd_identify_active_ns_list failed!\n");
844 				rc = -ENXIO;
845 				goto fail;
846 			}
847 			next_nsid = new_ns_list[1024 * i + 1023];
848 			if (next_nsid == 0) {
849 				/*
850 				 * No more active namespaces found, no need to fetch additional chunks
851 				 */
852 				break;
853 			}
854 		}
855 
856 	} else {
857 		/*
858 		 * Controller doesn't support active ns list CNS 0x02 so dummy up
859 		 * an active ns list
860 		 */
861 		for (i = 0; i < ctrlr->num_ns; i++) {
862 			new_ns_list[i] = i + 1;
863 		}
864 	}
865 
866 	/*
867 	 * Now that that the list is properly setup, we can swap it in to the ctrlr and
868 	 * free up the previous one.
869 	 */
870 	spdk_dma_free(ctrlr->active_ns_list);
871 	ctrlr->active_ns_list = new_ns_list;
872 
873 	return 0;
874 fail:
875 	spdk_dma_free(new_ns_list);
876 	return rc;
877 }
878 
879 static int
880 nvme_ctrlr_set_num_qpairs(struct spdk_nvme_ctrlr *ctrlr)
881 {
882 	struct nvme_completion_poll_status	status;
883 	uint32_t cq_allocated, sq_allocated, min_allocated, i;
884 	int rc;
885 
886 	if (ctrlr->opts.num_io_queues > SPDK_NVME_MAX_IO_QUEUES) {
887 		SPDK_NOTICELOG("Limiting requested num_io_queues %u to max %d\n",
888 			       ctrlr->opts.num_io_queues, SPDK_NVME_MAX_IO_QUEUES);
889 		ctrlr->opts.num_io_queues = SPDK_NVME_MAX_IO_QUEUES;
890 	} else if (ctrlr->opts.num_io_queues < 1) {
891 		SPDK_NOTICELOG("Requested num_io_queues 0, increasing to 1\n");
892 		ctrlr->opts.num_io_queues = 1;
893 	}
894 
895 	rc = nvme_ctrlr_cmd_set_num_queues(ctrlr, ctrlr->opts.num_io_queues,
896 					   nvme_completion_poll_cb, &status);
897 	if (rc != 0) {
898 		return rc;
899 	}
900 
901 	if (spdk_nvme_wait_for_completion(ctrlr->adminq, &status)) {
902 		SPDK_ERRLOG("Set Features - Number of Queues failed!\n");
903 	}
904 
905 	/* Obtain the number of queues allocated using Get Features. */
906 	rc = nvme_ctrlr_cmd_get_num_queues(ctrlr, nvme_completion_poll_cb, &status);
907 	if (rc != 0) {
908 		return rc;
909 	}
910 
911 	if (spdk_nvme_wait_for_completion(ctrlr->adminq, &status)) {
912 		SPDK_ERRLOG("Get Features - Number of Queues failed!\n");
913 		ctrlr->opts.num_io_queues = 0;
914 	} else {
915 		/*
916 		 * Data in cdw0 is 0-based.
917 		 * Lower 16-bits indicate number of submission queues allocated.
918 		 * Upper 16-bits indicate number of completion queues allocated.
919 		 */
920 		sq_allocated = (status.cpl.cdw0 & 0xFFFF) + 1;
921 		cq_allocated = (status.cpl.cdw0 >> 16) + 1;
922 
923 		/*
924 		 * For 1:1 queue mapping, set number of allocated queues to be minimum of
925 		 * submission and completion queues.
926 		 */
927 		min_allocated = spdk_min(sq_allocated, cq_allocated);
928 
929 		/* Set number of queues to be minimum of requested and actually allocated. */
930 		ctrlr->opts.num_io_queues = spdk_min(min_allocated, ctrlr->opts.num_io_queues);
931 	}
932 
933 	ctrlr->free_io_qids = spdk_bit_array_create(ctrlr->opts.num_io_queues + 1);
934 	if (ctrlr->free_io_qids == NULL) {
935 		return -ENOMEM;
936 	}
937 
938 	/* Initialize list of free I/O queue IDs. QID 0 is the admin queue. */
939 	spdk_bit_array_clear(ctrlr->free_io_qids, 0);
940 	for (i = 1; i <= ctrlr->opts.num_io_queues; i++) {
941 		spdk_bit_array_set(ctrlr->free_io_qids, i);
942 	}
943 
944 	return 0;
945 }
946 
947 static int
948 nvme_ctrlr_set_keep_alive_timeout(struct spdk_nvme_ctrlr *ctrlr)
949 {
950 	struct nvme_completion_poll_status status;
951 	uint32_t keep_alive_interval_ms;
952 	int rc;
953 
954 	if (ctrlr->opts.keep_alive_timeout_ms == 0) {
955 		return 0;
956 	}
957 
958 	if (ctrlr->cdata.kas == 0) {
959 		SPDK_DEBUGLOG(SPDK_LOG_NVME, "Controller KAS is 0 - not enabling Keep Alive\n");
960 		ctrlr->opts.keep_alive_timeout_ms = 0;
961 		return 0;
962 	}
963 
964 	/* Retrieve actual keep alive timeout, since the controller may have adjusted it. */
965 	rc = spdk_nvme_ctrlr_cmd_get_feature(ctrlr, SPDK_NVME_FEAT_KEEP_ALIVE_TIMER, 0, NULL, 0,
966 					     nvme_completion_poll_cb, &status);
967 	if (rc != 0) {
968 		SPDK_ERRLOG("Keep alive timeout Get Feature failed: %d\n", rc);
969 		ctrlr->opts.keep_alive_timeout_ms = 0;
970 		return rc;
971 	}
972 
973 	if (spdk_nvme_wait_for_completion(ctrlr->adminq, &status)) {
974 		SPDK_ERRLOG("Keep alive timeout Get Feature failed: SC %x SCT %x\n",
975 			    status.cpl.status.sc, status.cpl.status.sct);
976 		ctrlr->opts.keep_alive_timeout_ms = 0;
977 		return -ENXIO;
978 	}
979 
980 	if (ctrlr->opts.keep_alive_timeout_ms != status.cpl.cdw0) {
981 		SPDK_DEBUGLOG(SPDK_LOG_NVME, "Controller adjusted keep alive timeout to %u ms\n",
982 			      status.cpl.cdw0);
983 	}
984 
985 	ctrlr->opts.keep_alive_timeout_ms = status.cpl.cdw0;
986 
987 	keep_alive_interval_ms = ctrlr->opts.keep_alive_timeout_ms / 2;
988 	if (keep_alive_interval_ms == 0) {
989 		keep_alive_interval_ms = 1;
990 	}
991 	SPDK_DEBUGLOG(SPDK_LOG_NVME, "Sending keep alive every %u ms\n", keep_alive_interval_ms);
992 
993 	ctrlr->keep_alive_interval_ticks = (keep_alive_interval_ms * spdk_get_ticks_hz()) / UINT64_C(1000);
994 
995 	/* Schedule the first Keep Alive to be sent as soon as possible. */
996 	ctrlr->next_keep_alive_tick = spdk_get_ticks();
997 
998 	return 0;
999 }
1000 
1001 static int
1002 nvme_ctrlr_set_host_id(struct spdk_nvme_ctrlr *ctrlr)
1003 {
1004 	struct nvme_completion_poll_status status;
1005 	uint8_t *host_id;
1006 	uint32_t host_id_size;
1007 	int rc;
1008 
1009 	if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) {
1010 		/*
1011 		 * NVMe-oF sends the host ID during Connect and doesn't allow
1012 		 * Set Features - Host Identifier after Connect, so we don't need to do anything here.
1013 		 */
1014 		SPDK_DEBUGLOG(SPDK_LOG_NVME, "NVMe-oF transport - not sending Set Features - Host ID\n");
1015 		return 0;
1016 	}
1017 
1018 	if (ctrlr->cdata.ctratt.host_id_exhid_supported) {
1019 		SPDK_DEBUGLOG(SPDK_LOG_NVME, "Using 128-bit extended host identifier\n");
1020 		host_id = ctrlr->opts.extended_host_id;
1021 		host_id_size = sizeof(ctrlr->opts.extended_host_id);
1022 	} else {
1023 		SPDK_DEBUGLOG(SPDK_LOG_NVME, "Using 64-bit host identifier\n");
1024 		host_id = ctrlr->opts.host_id;
1025 		host_id_size = sizeof(ctrlr->opts.host_id);
1026 	}
1027 
1028 	/* If the user specified an all-zeroes host identifier, don't send the command. */
1029 	if (spdk_mem_all_zero(host_id, host_id_size)) {
1030 		SPDK_DEBUGLOG(SPDK_LOG_NVME,
1031 			      "User did not specify host ID - not sending Set Features - Host ID\n");
1032 		return 0;
1033 	}
1034 
1035 	SPDK_TRACEDUMP(SPDK_LOG_NVME, "host_id", host_id, host_id_size);
1036 
1037 	rc = nvme_ctrlr_cmd_set_host_id(ctrlr, host_id, host_id_size, nvme_completion_poll_cb, &status);
1038 	if (rc != 0) {
1039 		SPDK_ERRLOG("Set Features - Host ID failed: %d\n", rc);
1040 		return rc;
1041 	}
1042 
1043 	if (spdk_nvme_wait_for_completion(ctrlr->adminq, &status)) {
1044 		SPDK_WARNLOG("Set Features - Host ID failed: SC 0x%x SCT 0x%x\n",
1045 			     status.cpl.status.sc, status.cpl.status.sct);
1046 		/*
1047 		 * Treat Set Features - Host ID failure as non-fatal, since the Host ID feature
1048 		 * is optional.
1049 		 */
1050 		return 0;
1051 	}
1052 
1053 	SPDK_DEBUGLOG(SPDK_LOG_NVME, "Set Features - Host ID was successful\n");
1054 	return 0;
1055 }
1056 
1057 static void
1058 nvme_ctrlr_destruct_namespaces(struct spdk_nvme_ctrlr *ctrlr)
1059 {
1060 	if (ctrlr->ns) {
1061 		uint32_t i, num_ns = ctrlr->num_ns;
1062 
1063 		for (i = 0; i < num_ns; i++) {
1064 			nvme_ns_destruct(&ctrlr->ns[i]);
1065 		}
1066 
1067 		spdk_dma_free(ctrlr->ns);
1068 		ctrlr->ns = NULL;
1069 		ctrlr->num_ns = 0;
1070 	}
1071 
1072 	if (ctrlr->nsdata) {
1073 		spdk_dma_free(ctrlr->nsdata);
1074 		ctrlr->nsdata = NULL;
1075 	}
1076 
1077 	spdk_dma_free(ctrlr->active_ns_list);
1078 	ctrlr->active_ns_list = NULL;
1079 }
1080 
1081 static int
1082 nvme_ctrlr_update_namespaces(struct spdk_nvme_ctrlr *ctrlr)
1083 {
1084 	uint32_t i, nn = ctrlr->cdata.nn;
1085 	struct spdk_nvme_ns_data *nsdata;
1086 
1087 	if (nvme_ctrlr_identify_active_ns(ctrlr)) {
1088 		return -1;
1089 	}
1090 
1091 	for (i = 0; i < nn; i++) {
1092 		struct spdk_nvme_ns	*ns = &ctrlr->ns[i];
1093 		uint32_t		nsid = i + 1;
1094 		nsdata			= &ctrlr->nsdata[nsid - 1];
1095 
1096 		if ((nsdata->ncap == 0) && spdk_nvme_ctrlr_is_active_ns(ctrlr, nsid)) {
1097 			if (nvme_ns_construct(ns, nsid, ctrlr) != 0) {
1098 				continue;
1099 			}
1100 		}
1101 
1102 		if (nsdata->ncap && !spdk_nvme_ctrlr_is_active_ns(ctrlr, nsid)) {
1103 			nvme_ns_destruct(ns);
1104 		}
1105 	}
1106 
1107 	return 0;
1108 }
1109 
1110 static int
1111 nvme_ctrlr_construct_namespaces(struct spdk_nvme_ctrlr *ctrlr)
1112 {
1113 	uint32_t nn = ctrlr->cdata.nn;
1114 	uint64_t phys_addr = 0;
1115 
1116 	/* ctrlr->num_ns may be 0 (startup) or a different number of namespaces (reset),
1117 	 * so check if we need to reallocate.
1118 	 */
1119 	if (nn != ctrlr->num_ns) {
1120 		nvme_ctrlr_destruct_namespaces(ctrlr);
1121 
1122 		if (nn == 0) {
1123 			SPDK_WARNLOG("controller has 0 namespaces\n");
1124 			return 0;
1125 		}
1126 
1127 		ctrlr->ns = spdk_dma_zmalloc(nn * sizeof(struct spdk_nvme_ns), 64,
1128 					     &phys_addr);
1129 		if (ctrlr->ns == NULL) {
1130 			goto fail;
1131 		}
1132 
1133 		ctrlr->nsdata = spdk_dma_zmalloc(nn * sizeof(struct spdk_nvme_ns_data), 64,
1134 						 &phys_addr);
1135 		if (ctrlr->nsdata == NULL) {
1136 			goto fail;
1137 		}
1138 
1139 		ctrlr->num_ns = nn;
1140 	}
1141 
1142 	if (nvme_ctrlr_update_namespaces(ctrlr)) {
1143 		goto fail;
1144 	}
1145 	return 0;
1146 
1147 fail:
1148 	nvme_ctrlr_destruct_namespaces(ctrlr);
1149 	return -1;
1150 }
1151 
1152 static void
1153 nvme_ctrlr_async_event_cb(void *arg, const struct spdk_nvme_cpl *cpl)
1154 {
1155 	struct nvme_async_event_request	*aer = arg;
1156 	struct spdk_nvme_ctrlr		*ctrlr = aer->ctrlr;
1157 	struct spdk_nvme_ctrlr_process	*active_proc;
1158 	union spdk_nvme_async_event_completion	event;
1159 
1160 	if (cpl->status.sc == SPDK_NVME_SC_ABORTED_SQ_DELETION) {
1161 		/*
1162 		 *  This is simulated when controller is being shut down, to
1163 		 *  effectively abort outstanding asynchronous event requests
1164 		 *  and make sure all memory is freed.  Do not repost the
1165 		 *  request in this case.
1166 		 */
1167 		return;
1168 	}
1169 
1170 	event.raw = cpl->cdw0;
1171 	if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) &&
1172 	    (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGED)) {
1173 		nvme_ctrlr_update_namespaces(ctrlr);
1174 	}
1175 
1176 	active_proc = spdk_nvme_ctrlr_get_current_process(ctrlr);
1177 	if (active_proc && active_proc->aer_cb_fn) {
1178 		active_proc->aer_cb_fn(active_proc->aer_cb_arg, cpl);
1179 	}
1180 
1181 	/*
1182 	 * Repost another asynchronous event request to replace the one
1183 	 *  that just completed.
1184 	 */
1185 	if (nvme_ctrlr_construct_and_submit_aer(ctrlr, aer)) {
1186 		/*
1187 		 * We can't do anything to recover from a failure here,
1188 		 * so just print a warning message and leave the AER unsubmitted.
1189 		 */
1190 		SPDK_ERRLOG("resubmitting AER failed!\n");
1191 	}
1192 }
1193 
1194 static int
1195 nvme_ctrlr_construct_and_submit_aer(struct spdk_nvme_ctrlr *ctrlr,
1196 				    struct nvme_async_event_request *aer)
1197 {
1198 	struct nvme_request *req;
1199 
1200 	aer->ctrlr = ctrlr;
1201 	req = nvme_allocate_request_null(ctrlr->adminq, nvme_ctrlr_async_event_cb, aer);
1202 	aer->req = req;
1203 	if (req == NULL) {
1204 		return -1;
1205 	}
1206 
1207 	req->cmd.opc = SPDK_NVME_OPC_ASYNC_EVENT_REQUEST;
1208 	return nvme_ctrlr_submit_admin_request(ctrlr, req);
1209 }
1210 
1211 static int
1212 _nvme_ctrlr_configure_aer(struct spdk_nvme_ctrlr *ctrlr)
1213 {
1214 	union spdk_nvme_feat_async_event_configuration	config;
1215 	struct nvme_completion_poll_status		status;
1216 	int						rc;
1217 
1218 	config.raw = 0;
1219 	config.bits.crit_warn.bits.available_spare = 1;
1220 	config.bits.crit_warn.bits.temperature = 1;
1221 	config.bits.crit_warn.bits.device_reliability = 1;
1222 	config.bits.crit_warn.bits.read_only = 1;
1223 	config.bits.crit_warn.bits.volatile_memory_backup = 1;
1224 
1225 	if (ctrlr->vs.raw >= SPDK_NVME_VERSION(1, 2, 0)) {
1226 		if (ctrlr->cdata.oaes.ns_attribute_notices) {
1227 			config.bits.ns_attr_notice = 1;
1228 		}
1229 		if (ctrlr->cdata.oaes.fw_activation_notices) {
1230 			config.bits.fw_activation_notice = 1;
1231 		}
1232 	}
1233 	if (ctrlr->vs.raw >= SPDK_NVME_VERSION(1, 3, 0) && ctrlr->cdata.lpa.telemetry) {
1234 		config.bits.telemetry_log_notice = 1;
1235 	}
1236 
1237 	rc = nvme_ctrlr_cmd_set_async_event_config(ctrlr, config, nvme_completion_poll_cb, &status);
1238 	if (rc != 0) {
1239 		return rc;
1240 	}
1241 
1242 	if (spdk_nvme_wait_for_completion(ctrlr->adminq, &status)) {
1243 		return -ENXIO;
1244 	}
1245 
1246 	return 0;
1247 }
1248 
1249 static int
1250 nvme_ctrlr_configure_aer(struct spdk_nvme_ctrlr *ctrlr)
1251 {
1252 	struct nvme_async_event_request		*aer;
1253 	uint32_t				i;
1254 	int					rc;
1255 
1256 	rc = _nvme_ctrlr_configure_aer(ctrlr);
1257 	if (rc != 0) {
1258 		SPDK_NOTICELOG("nvme_ctrlr_configure_aer failed!\n");
1259 		return 0;
1260 	}
1261 
1262 	/* aerl is a zero-based value, so we need to add 1 here. */
1263 	ctrlr->num_aers = spdk_min(NVME_MAX_ASYNC_EVENTS, (ctrlr->cdata.aerl + 1));
1264 
1265 	for (i = 0; i < ctrlr->num_aers; i++) {
1266 		aer = &ctrlr->aer[i];
1267 		if (nvme_ctrlr_construct_and_submit_aer(ctrlr, aer)) {
1268 			SPDK_ERRLOG("nvme_ctrlr_construct_and_submit_aer failed!\n");
1269 			return -1;
1270 		}
1271 	}
1272 
1273 	return 0;
1274 }
1275 
1276 struct spdk_nvme_ctrlr_process *
1277 spdk_nvme_ctrlr_get_process(struct spdk_nvme_ctrlr *ctrlr, pid_t pid)
1278 {
1279 	struct spdk_nvme_ctrlr_process	*active_proc;
1280 
1281 	TAILQ_FOREACH(active_proc, &ctrlr->active_procs, tailq) {
1282 		if (active_proc->pid == pid) {
1283 			return active_proc;
1284 		}
1285 	}
1286 
1287 	return NULL;
1288 }
1289 
1290 struct spdk_nvme_ctrlr_process *
1291 spdk_nvme_ctrlr_get_current_process(struct spdk_nvme_ctrlr *ctrlr)
1292 {
1293 	return spdk_nvme_ctrlr_get_process(ctrlr, getpid());
1294 }
1295 
1296 /**
1297  * This function will be called when a process is using the controller.
1298  *  1. For the primary process, it is called when constructing the controller.
1299  *  2. For the secondary process, it is called at probing the controller.
1300  * Note: will check whether the process is already added for the same process.
1301  */
1302 int
1303 nvme_ctrlr_add_process(struct spdk_nvme_ctrlr *ctrlr, void *devhandle)
1304 {
1305 	struct spdk_nvme_ctrlr_process	*ctrlr_proc;
1306 	pid_t				pid = getpid();
1307 
1308 	/* Check whether the process is already added or not */
1309 	if (spdk_nvme_ctrlr_get_process(ctrlr, pid)) {
1310 		return 0;
1311 	}
1312 
1313 	/* Initialize the per process properties for this ctrlr */
1314 	ctrlr_proc = spdk_dma_zmalloc(sizeof(struct spdk_nvme_ctrlr_process), 64, NULL);
1315 	if (ctrlr_proc == NULL) {
1316 		SPDK_ERRLOG("failed to allocate memory to track the process props\n");
1317 
1318 		return -1;
1319 	}
1320 
1321 	ctrlr_proc->is_primary = spdk_process_is_primary();
1322 	ctrlr_proc->pid = pid;
1323 	STAILQ_INIT(&ctrlr_proc->active_reqs);
1324 	ctrlr_proc->devhandle = devhandle;
1325 	ctrlr_proc->ref = 0;
1326 	TAILQ_INIT(&ctrlr_proc->allocated_io_qpairs);
1327 
1328 	TAILQ_INSERT_TAIL(&ctrlr->active_procs, ctrlr_proc, tailq);
1329 
1330 	return 0;
1331 }
1332 
1333 /**
1334  * This function will be called when the process detaches the controller.
1335  * Note: the ctrlr_lock must be held when calling this function.
1336  */
1337 static void
1338 nvme_ctrlr_remove_process(struct spdk_nvme_ctrlr *ctrlr,
1339 			  struct spdk_nvme_ctrlr_process *proc)
1340 {
1341 	struct spdk_nvme_qpair	*qpair, *tmp_qpair;
1342 
1343 	assert(STAILQ_EMPTY(&proc->active_reqs));
1344 
1345 	TAILQ_FOREACH_SAFE(qpair, &proc->allocated_io_qpairs, per_process_tailq, tmp_qpair) {
1346 		spdk_nvme_ctrlr_free_io_qpair(qpair);
1347 	}
1348 
1349 	TAILQ_REMOVE(&ctrlr->active_procs, proc, tailq);
1350 
1351 	spdk_dma_free(proc);
1352 }
1353 
1354 /**
1355  * This function will be called when the process exited unexpectedly
1356  *  in order to free any incomplete nvme request, allocated IO qpairs
1357  *  and allocated memory.
1358  * Note: the ctrlr_lock must be held when calling this function.
1359  */
1360 static void
1361 nvme_ctrlr_cleanup_process(struct spdk_nvme_ctrlr_process *proc)
1362 {
1363 	struct nvme_request	*req, *tmp_req;
1364 	struct spdk_nvme_qpair	*qpair, *tmp_qpair;
1365 
1366 	STAILQ_FOREACH_SAFE(req, &proc->active_reqs, stailq, tmp_req) {
1367 		STAILQ_REMOVE(&proc->active_reqs, req, nvme_request, stailq);
1368 
1369 		assert(req->pid == proc->pid);
1370 
1371 		nvme_free_request(req);
1372 	}
1373 
1374 	TAILQ_FOREACH_SAFE(qpair, &proc->allocated_io_qpairs, per_process_tailq, tmp_qpair) {
1375 		TAILQ_REMOVE(&proc->allocated_io_qpairs, qpair, per_process_tailq);
1376 
1377 		/*
1378 		 * The process may have been killed while some qpairs were in their
1379 		 *  completion context.  Clear that flag here to allow these IO
1380 		 *  qpairs to be deleted.
1381 		 */
1382 		qpair->in_completion_context = 0;
1383 
1384 		qpair->no_deletion_notification_needed = 1;
1385 
1386 		spdk_nvme_ctrlr_free_io_qpair(qpair);
1387 	}
1388 
1389 	spdk_dma_free(proc);
1390 }
1391 
1392 /**
1393  * This function will be called when destructing the controller.
1394  *  1. There is no more admin request on this controller.
1395  *  2. Clean up any left resource allocation when its associated process is gone.
1396  */
1397 void
1398 nvme_ctrlr_free_processes(struct spdk_nvme_ctrlr *ctrlr)
1399 {
1400 	struct spdk_nvme_ctrlr_process	*active_proc, *tmp;
1401 
1402 	/* Free all the processes' properties and make sure no pending admin IOs */
1403 	TAILQ_FOREACH_SAFE(active_proc, &ctrlr->active_procs, tailq, tmp) {
1404 		TAILQ_REMOVE(&ctrlr->active_procs, active_proc, tailq);
1405 
1406 		assert(STAILQ_EMPTY(&active_proc->active_reqs));
1407 
1408 		spdk_dma_free(active_proc);
1409 	}
1410 }
1411 
1412 /**
1413  * This function will be called when any other process attaches or
1414  *  detaches the controller in order to cleanup those unexpectedly
1415  *  terminated processes.
1416  * Note: the ctrlr_lock must be held when calling this function.
1417  */
1418 static int
1419 nvme_ctrlr_remove_inactive_proc(struct spdk_nvme_ctrlr *ctrlr)
1420 {
1421 	struct spdk_nvme_ctrlr_process	*active_proc, *tmp;
1422 	int				active_proc_count = 0;
1423 
1424 	TAILQ_FOREACH_SAFE(active_proc, &ctrlr->active_procs, tailq, tmp) {
1425 		if ((kill(active_proc->pid, 0) == -1) && (errno == ESRCH)) {
1426 			SPDK_ERRLOG("process %d terminated unexpected\n", active_proc->pid);
1427 
1428 			TAILQ_REMOVE(&ctrlr->active_procs, active_proc, tailq);
1429 
1430 			nvme_ctrlr_cleanup_process(active_proc);
1431 		} else {
1432 			active_proc_count++;
1433 		}
1434 	}
1435 
1436 	return active_proc_count;
1437 }
1438 
1439 void
1440 nvme_ctrlr_proc_get_ref(struct spdk_nvme_ctrlr *ctrlr)
1441 {
1442 	struct spdk_nvme_ctrlr_process	*active_proc;
1443 
1444 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
1445 
1446 	nvme_ctrlr_remove_inactive_proc(ctrlr);
1447 
1448 	active_proc = spdk_nvme_ctrlr_get_current_process(ctrlr);
1449 	if (active_proc) {
1450 		active_proc->ref++;
1451 	}
1452 
1453 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
1454 }
1455 
1456 void
1457 nvme_ctrlr_proc_put_ref(struct spdk_nvme_ctrlr *ctrlr)
1458 {
1459 	struct spdk_nvme_ctrlr_process	*active_proc;
1460 	int				proc_count;
1461 
1462 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
1463 
1464 	proc_count = nvme_ctrlr_remove_inactive_proc(ctrlr);
1465 
1466 	active_proc = spdk_nvme_ctrlr_get_current_process(ctrlr);
1467 	if (active_proc) {
1468 		active_proc->ref--;
1469 		assert(active_proc->ref >= 0);
1470 
1471 		/*
1472 		 * The last active process will be removed at the end of
1473 		 * the destruction of the controller.
1474 		 */
1475 		if (active_proc->ref == 0 && proc_count != 1) {
1476 			nvme_ctrlr_remove_process(ctrlr, active_proc);
1477 		}
1478 	}
1479 
1480 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
1481 }
1482 
1483 int
1484 nvme_ctrlr_get_ref_count(struct spdk_nvme_ctrlr *ctrlr)
1485 {
1486 	struct spdk_nvme_ctrlr_process	*active_proc;
1487 	int				ref = 0;
1488 
1489 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
1490 
1491 	nvme_ctrlr_remove_inactive_proc(ctrlr);
1492 
1493 	TAILQ_FOREACH(active_proc, &ctrlr->active_procs, tailq) {
1494 		ref += active_proc->ref;
1495 	}
1496 
1497 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
1498 
1499 	return ref;
1500 }
1501 
1502 /**
1503  *  Get the PCI device handle which is only visible to its associated process.
1504  */
1505 struct spdk_pci_device *
1506 nvme_ctrlr_proc_get_devhandle(struct spdk_nvme_ctrlr *ctrlr)
1507 {
1508 	struct spdk_nvme_ctrlr_process	*active_proc;
1509 	struct spdk_pci_device		*devhandle = NULL;
1510 
1511 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
1512 
1513 	active_proc = spdk_nvme_ctrlr_get_current_process(ctrlr);
1514 	if (active_proc) {
1515 		devhandle = active_proc->devhandle;
1516 	}
1517 
1518 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
1519 
1520 	return devhandle;
1521 }
1522 
1523 /**
1524  * This function will be called repeatedly during initialization until the controller is ready.
1525  */
1526 int
1527 nvme_ctrlr_process_init(struct spdk_nvme_ctrlr *ctrlr)
1528 {
1529 	union spdk_nvme_cc_register cc;
1530 	union spdk_nvme_csts_register csts;
1531 	uint32_t ready_timeout_in_ms;
1532 	int rc;
1533 
1534 	/*
1535 	 * May need to avoid accessing any register on the target controller
1536 	 * for a while. Return early without touching the FSM.
1537 	 * Check sleep_timeout_tsc > 0 for unit test.
1538 	 */
1539 	if ((ctrlr->sleep_timeout_tsc > 0) &&
1540 	    (spdk_get_ticks() <= ctrlr->sleep_timeout_tsc)) {
1541 		return 0;
1542 	}
1543 	ctrlr->sleep_timeout_tsc = 0;
1544 
1545 	if (nvme_ctrlr_get_cc(ctrlr, &cc) ||
1546 	    nvme_ctrlr_get_csts(ctrlr, &csts)) {
1547 		if (ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE) {
1548 			/* While a device is resetting, it may be unable to service MMIO reads
1549 			 * temporarily. Allow for this case.
1550 			 */
1551 			SPDK_ERRLOG("Get registers failed while waiting for CSTS.RDY == 0\n");
1552 			goto init_timeout;
1553 		}
1554 		SPDK_ERRLOG("Failed to read CC and CSTS in state %d\n", ctrlr->state);
1555 		nvme_ctrlr_fail(ctrlr, false);
1556 		return -EIO;
1557 	}
1558 
1559 	ready_timeout_in_ms = 500 * ctrlr->cap.bits.to;
1560 
1561 	/*
1562 	 * Check if the current initialization step is done or has timed out.
1563 	 */
1564 	switch (ctrlr->state) {
1565 	case NVME_CTRLR_STATE_INIT:
1566 		/* Begin the hardware initialization by making sure the controller is disabled. */
1567 		if (cc.bits.en) {
1568 			SPDK_DEBUGLOG(SPDK_LOG_NVME, "CC.EN = 1\n");
1569 			/*
1570 			 * Controller is currently enabled. We need to disable it to cause a reset.
1571 			 *
1572 			 * If CC.EN = 1 && CSTS.RDY = 0, the controller is in the process of becoming ready.
1573 			 *  Wait for the ready bit to be 1 before disabling the controller.
1574 			 */
1575 			if (csts.bits.rdy == 0) {
1576 				SPDK_DEBUGLOG(SPDK_LOG_NVME, "CC.EN = 1 && CSTS.RDY = 0 - waiting for reset to complete\n");
1577 				nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1, ready_timeout_in_ms);
1578 				return 0;
1579 			}
1580 
1581 			/* CC.EN = 1 && CSTS.RDY == 1, so we can immediately disable the controller. */
1582 			SPDK_DEBUGLOG(SPDK_LOG_NVME, "Setting CC.EN = 0\n");
1583 			cc.bits.en = 0;
1584 			if (nvme_ctrlr_set_cc(ctrlr, &cc)) {
1585 				SPDK_ERRLOG("set_cc() failed\n");
1586 				nvme_ctrlr_fail(ctrlr, false);
1587 				return -EIO;
1588 			}
1589 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, ready_timeout_in_ms);
1590 
1591 			/*
1592 			 * Wait 2 secsonds before accessing PCI registers.
1593 			 * Not using sleep() to avoid blocking other controller's initialization.
1594 			 */
1595 			if (ctrlr->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY) {
1596 				SPDK_DEBUGLOG(SPDK_LOG_NVME, "Applying quirk: delay 2 seconds before reading registers\n");
1597 				ctrlr->sleep_timeout_tsc = spdk_get_ticks() + 2 * spdk_get_ticks_hz();
1598 			}
1599 			return 0;
1600 		} else {
1601 			if (csts.bits.rdy == 1) {
1602 				SPDK_DEBUGLOG(SPDK_LOG_NVME, "CC.EN = 0 && CSTS.RDY = 1 - waiting for shutdown to complete\n");
1603 			}
1604 
1605 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, ready_timeout_in_ms);
1606 			return 0;
1607 		}
1608 		break;
1609 
1610 	case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1:
1611 		if (csts.bits.rdy == 1) {
1612 			SPDK_DEBUGLOG(SPDK_LOG_NVME, "CC.EN = 1 && CSTS.RDY = 1 - disabling controller\n");
1613 			/* CC.EN = 1 && CSTS.RDY = 1, so we can set CC.EN = 0 now. */
1614 			SPDK_DEBUGLOG(SPDK_LOG_NVME, "Setting CC.EN = 0\n");
1615 			cc.bits.en = 0;
1616 			if (nvme_ctrlr_set_cc(ctrlr, &cc)) {
1617 				SPDK_ERRLOG("set_cc() failed\n");
1618 				nvme_ctrlr_fail(ctrlr, false);
1619 				return -EIO;
1620 			}
1621 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, ready_timeout_in_ms);
1622 			return 0;
1623 		}
1624 		break;
1625 
1626 	case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0:
1627 		if (csts.bits.rdy == 0) {
1628 			SPDK_DEBUGLOG(SPDK_LOG_NVME, "CC.EN = 0 && CSTS.RDY = 0\n");
1629 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE, ready_timeout_in_ms);
1630 			/*
1631 			 * Delay 100us before setting CC.EN = 1.  Some NVMe SSDs miss CC.EN getting
1632 			 *  set to 1 if it is too soon after CSTS.RDY is reported as 0.
1633 			 */
1634 			spdk_delay_us(100);
1635 			return 0;
1636 		}
1637 		break;
1638 
1639 	case NVME_CTRLR_STATE_ENABLE:
1640 		SPDK_DEBUGLOG(SPDK_LOG_NVME, "Setting CC.EN = 1\n");
1641 		rc = nvme_ctrlr_enable(ctrlr);
1642 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1, ready_timeout_in_ms);
1643 		return rc;
1644 
1645 	case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1:
1646 		if (csts.bits.rdy == 1) {
1647 			SPDK_DEBUGLOG(SPDK_LOG_NVME, "CC.EN = 1 && CSTS.RDY = 1 - controller is ready\n");
1648 			/*
1649 			 * The controller has been enabled.
1650 			 *  Perform the rest of initialization in nvme_ctrlr_start() serially.
1651 			 */
1652 			rc = nvme_ctrlr_start(ctrlr);
1653 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE);
1654 			return rc;
1655 		}
1656 		break;
1657 
1658 	case NVME_CTRLR_STATE_READY:
1659 		SPDK_DEBUGLOG(SPDK_LOG_NVME, "Ctrlr already in ready state\n");
1660 		return 0;
1661 
1662 	default:
1663 		assert(0);
1664 		nvme_ctrlr_fail(ctrlr, false);
1665 		return -1;
1666 	}
1667 
1668 init_timeout:
1669 	if (ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE &&
1670 	    spdk_get_ticks() > ctrlr->state_timeout_tsc) {
1671 		SPDK_ERRLOG("Initialization timed out in state %d\n", ctrlr->state);
1672 		nvme_ctrlr_fail(ctrlr, false);
1673 		return -1;
1674 	}
1675 
1676 	return 0;
1677 }
1678 
1679 int
1680 nvme_ctrlr_start(struct spdk_nvme_ctrlr *ctrlr)
1681 {
1682 	nvme_transport_qpair_reset(ctrlr->adminq);
1683 
1684 	nvme_qpair_enable(ctrlr->adminq);
1685 
1686 	if (nvme_ctrlr_identify(ctrlr) != 0) {
1687 		return -1;
1688 	}
1689 
1690 	if (nvme_ctrlr_set_num_qpairs(ctrlr) != 0) {
1691 		return -1;
1692 	}
1693 
1694 	if (nvme_ctrlr_construct_namespaces(ctrlr) != 0) {
1695 		return -1;
1696 	}
1697 
1698 	if (nvme_ctrlr_configure_aer(ctrlr) != 0) {
1699 		return -1;
1700 	}
1701 
1702 	nvme_ctrlr_set_supported_log_pages(ctrlr);
1703 	nvme_ctrlr_set_supported_features(ctrlr);
1704 
1705 	if (ctrlr->cdata.sgls.supported) {
1706 		ctrlr->flags |= SPDK_NVME_CTRLR_SGL_SUPPORTED;
1707 		ctrlr->max_sges = nvme_transport_ctrlr_get_max_sges(ctrlr);
1708 	}
1709 
1710 	if (ctrlr->cdata.oacs.doorbell_buffer_config) {
1711 		if (nvme_ctrlr_set_doorbell_buffer_config(ctrlr)) {
1712 			SPDK_WARNLOG("Doorbell buffer config failed\n");
1713 		}
1714 	}
1715 
1716 
1717 	if (nvme_ctrlr_set_keep_alive_timeout(ctrlr) != 0) {
1718 		SPDK_ERRLOG("Setting keep alive timeout failed\n");
1719 		return -1;
1720 	}
1721 
1722 	if (nvme_ctrlr_set_host_id(ctrlr) != 0) {
1723 		return -1;
1724 	}
1725 
1726 	return 0;
1727 }
1728 
1729 int
1730 nvme_robust_mutex_init_recursive_shared(pthread_mutex_t *mtx)
1731 {
1732 	pthread_mutexattr_t attr;
1733 	int rc = 0;
1734 
1735 	if (pthread_mutexattr_init(&attr)) {
1736 		return -1;
1737 	}
1738 	if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE) ||
1739 #ifndef __FreeBSD__
1740 	    pthread_mutexattr_setrobust(&attr, PTHREAD_MUTEX_ROBUST) ||
1741 	    pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED) ||
1742 #endif
1743 	    pthread_mutex_init(mtx, &attr)) {
1744 		rc = -1;
1745 	}
1746 	pthread_mutexattr_destroy(&attr);
1747 	return rc;
1748 }
1749 
1750 int
1751 nvme_ctrlr_construct(struct spdk_nvme_ctrlr *ctrlr)
1752 {
1753 	int rc;
1754 
1755 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, NVME_TIMEOUT_INFINITE);
1756 	ctrlr->flags = 0;
1757 	ctrlr->free_io_qids = NULL;
1758 	ctrlr->is_resetting = false;
1759 	ctrlr->is_failed = false;
1760 
1761 	TAILQ_INIT(&ctrlr->active_io_qpairs);
1762 	STAILQ_INIT(&ctrlr->queued_aborts);
1763 	ctrlr->outstanding_aborts = 0;
1764 
1765 	rc = nvme_robust_mutex_init_recursive_shared(&ctrlr->ctrlr_lock);
1766 	if (rc != 0) {
1767 		return rc;
1768 	}
1769 
1770 	TAILQ_INIT(&ctrlr->active_procs);
1771 
1772 	return rc;
1773 }
1774 
1775 /* This function should be called once at ctrlr initialization to set up constant properties. */
1776 void
1777 nvme_ctrlr_init_cap(struct spdk_nvme_ctrlr *ctrlr, const union spdk_nvme_cap_register *cap,
1778 		    const union spdk_nvme_vs_register *vs)
1779 {
1780 	ctrlr->cap = *cap;
1781 	ctrlr->vs = *vs;
1782 
1783 	ctrlr->min_page_size = 1u << (12 + ctrlr->cap.bits.mpsmin);
1784 
1785 	/* For now, always select page_size == min_page_size. */
1786 	ctrlr->page_size = ctrlr->min_page_size;
1787 
1788 	ctrlr->opts.io_queue_size = spdk_max(ctrlr->opts.io_queue_size, SPDK_NVME_IO_QUEUE_MIN_ENTRIES);
1789 	ctrlr->opts.io_queue_size = spdk_min(ctrlr->opts.io_queue_size, ctrlr->cap.bits.mqes + 1u);
1790 
1791 	ctrlr->opts.io_queue_requests = spdk_max(ctrlr->opts.io_queue_requests, ctrlr->opts.io_queue_size);
1792 }
1793 
1794 void
1795 nvme_ctrlr_destruct_finish(struct spdk_nvme_ctrlr *ctrlr)
1796 {
1797 	pthread_mutex_destroy(&ctrlr->ctrlr_lock);
1798 }
1799 
1800 void
1801 nvme_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr)
1802 {
1803 	struct spdk_nvme_qpair *qpair, *tmp;
1804 
1805 	SPDK_DEBUGLOG(SPDK_LOG_NVME, "Prepare to destruct SSD: %s\n", ctrlr->trid.traddr);
1806 	TAILQ_FOREACH_SAFE(qpair, &ctrlr->active_io_qpairs, tailq, tmp) {
1807 		spdk_nvme_ctrlr_free_io_qpair(qpair);
1808 	}
1809 
1810 	nvme_ctrlr_free_doorbell_buffer(ctrlr);
1811 
1812 	nvme_ctrlr_shutdown(ctrlr);
1813 
1814 	nvme_ctrlr_destruct_namespaces(ctrlr);
1815 
1816 	spdk_bit_array_free(&ctrlr->free_io_qids);
1817 
1818 	nvme_transport_ctrlr_destruct(ctrlr);
1819 }
1820 
1821 int
1822 nvme_ctrlr_submit_admin_request(struct spdk_nvme_ctrlr *ctrlr,
1823 				struct nvme_request *req)
1824 {
1825 	return nvme_qpair_submit_request(ctrlr->adminq, req);
1826 }
1827 
1828 static void
1829 nvme_keep_alive_completion(void *cb_ctx, const struct spdk_nvme_cpl *cpl)
1830 {
1831 	/* Do nothing */
1832 }
1833 
1834 /*
1835  * Check if we need to send a Keep Alive command.
1836  * Caller must hold ctrlr->ctrlr_lock.
1837  */
1838 static void
1839 nvme_ctrlr_keep_alive(struct spdk_nvme_ctrlr *ctrlr)
1840 {
1841 	uint64_t now;
1842 	struct nvme_request *req;
1843 	struct spdk_nvme_cmd *cmd;
1844 	int rc;
1845 
1846 	now = spdk_get_ticks();
1847 	if (now < ctrlr->next_keep_alive_tick) {
1848 		return;
1849 	}
1850 
1851 	req = nvme_allocate_request_null(ctrlr->adminq, nvme_keep_alive_completion, NULL);
1852 	if (req == NULL) {
1853 		return;
1854 	}
1855 
1856 	cmd = &req->cmd;
1857 	cmd->opc = SPDK_NVME_OPC_KEEP_ALIVE;
1858 
1859 	rc = nvme_ctrlr_submit_admin_request(ctrlr, req);
1860 	if (rc != 0) {
1861 		SPDK_ERRLOG("Submitting Keep Alive failed\n");
1862 	}
1863 
1864 	ctrlr->next_keep_alive_tick = now + ctrlr->keep_alive_interval_ticks;
1865 }
1866 
1867 int32_t
1868 spdk_nvme_ctrlr_process_admin_completions(struct spdk_nvme_ctrlr *ctrlr)
1869 {
1870 	int32_t num_completions;
1871 
1872 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
1873 	if (ctrlr->keep_alive_interval_ticks) {
1874 		nvme_ctrlr_keep_alive(ctrlr);
1875 	}
1876 	num_completions = spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
1877 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
1878 
1879 	return num_completions;
1880 }
1881 
1882 const struct spdk_nvme_ctrlr_data *
1883 spdk_nvme_ctrlr_get_data(struct spdk_nvme_ctrlr *ctrlr)
1884 {
1885 	return &ctrlr->cdata;
1886 }
1887 
1888 union spdk_nvme_csts_register spdk_nvme_ctrlr_get_regs_csts(struct spdk_nvme_ctrlr *ctrlr)
1889 {
1890 	union spdk_nvme_csts_register csts;
1891 
1892 	if (nvme_ctrlr_get_csts(ctrlr, &csts)) {
1893 		csts.raw = 0xFFFFFFFFu;
1894 	}
1895 	return csts;
1896 }
1897 
1898 union spdk_nvme_cap_register spdk_nvme_ctrlr_get_regs_cap(struct spdk_nvme_ctrlr *ctrlr)
1899 {
1900 	return ctrlr->cap;
1901 }
1902 
1903 union spdk_nvme_vs_register spdk_nvme_ctrlr_get_regs_vs(struct spdk_nvme_ctrlr *ctrlr)
1904 {
1905 	return ctrlr->vs;
1906 }
1907 
1908 uint32_t
1909 spdk_nvme_ctrlr_get_num_ns(struct spdk_nvme_ctrlr *ctrlr)
1910 {
1911 	return ctrlr->num_ns;
1912 }
1913 
1914 static int32_t
1915 spdk_nvme_ctrlr_active_ns_idx(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid)
1916 {
1917 	int32_t result = -1;
1918 
1919 	if (ctrlr->active_ns_list == NULL || nsid == 0 || nsid > ctrlr->num_ns) {
1920 		return result;
1921 	}
1922 
1923 	int32_t lower = 0;
1924 	int32_t upper = ctrlr->num_ns - 1;
1925 	int32_t mid;
1926 
1927 	while (lower <= upper) {
1928 		mid = lower + (upper - lower) / 2;
1929 		if (ctrlr->active_ns_list[mid] == nsid) {
1930 			result = mid;
1931 			break;
1932 		} else {
1933 			if (ctrlr->active_ns_list[mid] != 0 && ctrlr->active_ns_list[mid] < nsid) {
1934 				lower = mid + 1;
1935 			} else {
1936 				upper = mid - 1;
1937 			}
1938 
1939 		}
1940 	}
1941 
1942 	return result;
1943 }
1944 
1945 bool
1946 spdk_nvme_ctrlr_is_active_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid)
1947 {
1948 	return spdk_nvme_ctrlr_active_ns_idx(ctrlr, nsid) != -1;
1949 }
1950 
1951 uint32_t
1952 spdk_nvme_ctrlr_get_first_active_ns(struct spdk_nvme_ctrlr *ctrlr)
1953 {
1954 	return ctrlr->active_ns_list ? ctrlr->active_ns_list[0] : 0;
1955 }
1956 
1957 uint32_t
1958 spdk_nvme_ctrlr_get_next_active_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t prev_nsid)
1959 {
1960 	int32_t nsid_idx = spdk_nvme_ctrlr_active_ns_idx(ctrlr, prev_nsid);
1961 	if (ctrlr->active_ns_list && nsid_idx >= 0 && (uint32_t)nsid_idx < ctrlr->num_ns - 1) {
1962 		return ctrlr->active_ns_list[nsid_idx + 1];
1963 	}
1964 	return 0;
1965 }
1966 
1967 struct spdk_nvme_ns *
1968 spdk_nvme_ctrlr_get_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid)
1969 {
1970 	if (nsid < 1 || nsid > ctrlr->num_ns) {
1971 		return NULL;
1972 	}
1973 
1974 	return &ctrlr->ns[nsid - 1];
1975 }
1976 
1977 struct spdk_pci_device *
1978 spdk_nvme_ctrlr_get_pci_device(struct spdk_nvme_ctrlr *ctrlr)
1979 {
1980 	if (ctrlr == NULL) {
1981 		return NULL;
1982 	}
1983 
1984 	if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) {
1985 		return NULL;
1986 	}
1987 
1988 	return nvme_ctrlr_proc_get_devhandle(ctrlr);
1989 }
1990 
1991 uint32_t
1992 spdk_nvme_ctrlr_get_max_xfer_size(const struct spdk_nvme_ctrlr *ctrlr)
1993 {
1994 	return ctrlr->max_xfer_size;
1995 }
1996 
1997 void
1998 spdk_nvme_ctrlr_register_aer_callback(struct spdk_nvme_ctrlr *ctrlr,
1999 				      spdk_nvme_aer_cb aer_cb_fn,
2000 				      void *aer_cb_arg)
2001 {
2002 	struct spdk_nvme_ctrlr_process *active_proc;
2003 
2004 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
2005 
2006 	active_proc = spdk_nvme_ctrlr_get_current_process(ctrlr);
2007 	if (active_proc) {
2008 		active_proc->aer_cb_fn = aer_cb_fn;
2009 		active_proc->aer_cb_arg = aer_cb_arg;
2010 	}
2011 
2012 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
2013 }
2014 
2015 void
2016 spdk_nvme_ctrlr_register_timeout_callback(struct spdk_nvme_ctrlr *ctrlr,
2017 		uint32_t nvme_timeout, spdk_nvme_timeout_cb cb_fn, void *cb_arg)
2018 {
2019 	struct spdk_nvme_ctrlr_process	*active_proc;
2020 
2021 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
2022 
2023 	active_proc = spdk_nvme_ctrlr_get_current_process(ctrlr);
2024 	if (active_proc) {
2025 		active_proc->timeout_ticks = nvme_timeout * spdk_get_ticks_hz();
2026 		active_proc->timeout_cb_fn = cb_fn;
2027 		active_proc->timeout_cb_arg = cb_arg;
2028 	}
2029 
2030 	ctrlr->timeout_enabled = true;
2031 
2032 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
2033 }
2034 
2035 bool
2036 spdk_nvme_ctrlr_is_log_page_supported(struct spdk_nvme_ctrlr *ctrlr, uint8_t log_page)
2037 {
2038 	/* No bounds check necessary, since log_page is uint8_t and log_page_supported has 256 entries */
2039 	SPDK_STATIC_ASSERT(sizeof(ctrlr->log_page_supported) == 256, "log_page_supported size mismatch");
2040 	return ctrlr->log_page_supported[log_page];
2041 }
2042 
2043 bool
2044 spdk_nvme_ctrlr_is_feature_supported(struct spdk_nvme_ctrlr *ctrlr, uint8_t feature_code)
2045 {
2046 	/* No bounds check necessary, since feature_code is uint8_t and feature_supported has 256 entries */
2047 	SPDK_STATIC_ASSERT(sizeof(ctrlr->feature_supported) == 256, "feature_supported size mismatch");
2048 	return ctrlr->feature_supported[feature_code];
2049 }
2050 
2051 int
2052 spdk_nvme_ctrlr_attach_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid,
2053 			  struct spdk_nvme_ctrlr_list *payload)
2054 {
2055 	struct nvme_completion_poll_status	status;
2056 	int					res;
2057 	struct spdk_nvme_ns			*ns;
2058 
2059 	res = nvme_ctrlr_cmd_attach_ns(ctrlr, nsid, payload,
2060 				       nvme_completion_poll_cb, &status);
2061 	if (res) {
2062 		return res;
2063 	}
2064 	if (spdk_nvme_wait_for_completion_robust_lock(ctrlr->adminq, &status, &ctrlr->ctrlr_lock)) {
2065 		SPDK_ERRLOG("spdk_nvme_ctrlr_attach_ns failed!\n");
2066 		return -ENXIO;
2067 	}
2068 
2069 	res = nvme_ctrlr_identify_active_ns(ctrlr);
2070 	if (res) {
2071 		return res;
2072 	}
2073 
2074 	ns = &ctrlr->ns[nsid - 1];
2075 	return nvme_ns_construct(ns, nsid, ctrlr);
2076 }
2077 
2078 int
2079 spdk_nvme_ctrlr_detach_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid,
2080 			  struct spdk_nvme_ctrlr_list *payload)
2081 {
2082 	struct nvme_completion_poll_status	status;
2083 	int					res;
2084 	struct spdk_nvme_ns			*ns;
2085 
2086 	res = nvme_ctrlr_cmd_detach_ns(ctrlr, nsid, payload,
2087 				       nvme_completion_poll_cb, &status);
2088 	if (res) {
2089 		return res;
2090 	}
2091 	if (spdk_nvme_wait_for_completion_robust_lock(ctrlr->adminq, &status, &ctrlr->ctrlr_lock)) {
2092 		SPDK_ERRLOG("spdk_nvme_ctrlr_detach_ns failed!\n");
2093 		return -ENXIO;
2094 	}
2095 
2096 	res = nvme_ctrlr_identify_active_ns(ctrlr);
2097 	if (res) {
2098 		return res;
2099 	}
2100 
2101 	ns = &ctrlr->ns[nsid - 1];
2102 	/* Inactive NS */
2103 	nvme_ns_destruct(ns);
2104 
2105 	return 0;
2106 }
2107 
2108 uint32_t
2109 spdk_nvme_ctrlr_create_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns_data *payload)
2110 {
2111 	struct nvme_completion_poll_status	status;
2112 	int					res;
2113 	uint32_t				nsid;
2114 	struct spdk_nvme_ns			*ns;
2115 
2116 	res = nvme_ctrlr_cmd_create_ns(ctrlr, payload, nvme_completion_poll_cb, &status);
2117 	if (res) {
2118 		return 0;
2119 	}
2120 	if (spdk_nvme_wait_for_completion_robust_lock(ctrlr->adminq, &status, &ctrlr->ctrlr_lock)) {
2121 		SPDK_ERRLOG("spdk_nvme_ctrlr_create_ns failed!\n");
2122 		return 0;
2123 	}
2124 
2125 	nsid = status.cpl.cdw0;
2126 	ns = &ctrlr->ns[nsid - 1];
2127 	/* Inactive NS */
2128 	res = nvme_ns_construct(ns, nsid, ctrlr);
2129 	if (res) {
2130 		return 0;
2131 	}
2132 
2133 	/* Return the namespace ID that was created */
2134 	return nsid;
2135 }
2136 
2137 int
2138 spdk_nvme_ctrlr_delete_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid)
2139 {
2140 	struct nvme_completion_poll_status	status;
2141 	int					res;
2142 	struct spdk_nvme_ns			*ns;
2143 
2144 	res = nvme_ctrlr_cmd_delete_ns(ctrlr, nsid, nvme_completion_poll_cb, &status);
2145 	if (res) {
2146 		return res;
2147 	}
2148 	if (spdk_nvme_wait_for_completion_robust_lock(ctrlr->adminq, &status, &ctrlr->ctrlr_lock)) {
2149 		SPDK_ERRLOG("spdk_nvme_ctrlr_delete_ns failed!\n");
2150 		return -ENXIO;
2151 	}
2152 
2153 	res = nvme_ctrlr_identify_active_ns(ctrlr);
2154 	if (res) {
2155 		return res;
2156 	}
2157 
2158 	ns = &ctrlr->ns[nsid - 1];
2159 	nvme_ns_destruct(ns);
2160 
2161 	return 0;
2162 }
2163 
2164 int
2165 spdk_nvme_ctrlr_format(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid,
2166 		       struct spdk_nvme_format *format)
2167 {
2168 	struct nvme_completion_poll_status	status;
2169 	int					res;
2170 
2171 	res = nvme_ctrlr_cmd_format(ctrlr, nsid, format, nvme_completion_poll_cb,
2172 				    &status);
2173 	if (res) {
2174 		return res;
2175 	}
2176 	if (spdk_nvme_wait_for_completion_robust_lock(ctrlr->adminq, &status, &ctrlr->ctrlr_lock)) {
2177 		SPDK_ERRLOG("spdk_nvme_ctrlr_format failed!\n");
2178 		return -ENXIO;
2179 	}
2180 
2181 	return spdk_nvme_ctrlr_reset(ctrlr);
2182 }
2183 
2184 int
2185 spdk_nvme_ctrlr_update_firmware(struct spdk_nvme_ctrlr *ctrlr, void *payload, uint32_t size,
2186 				int slot, enum spdk_nvme_fw_commit_action commit_action, struct spdk_nvme_status *completion_status)
2187 {
2188 	struct spdk_nvme_fw_commit		fw_commit;
2189 	struct nvme_completion_poll_status	status;
2190 	int					res;
2191 	unsigned int				size_remaining;
2192 	unsigned int				offset;
2193 	unsigned int				transfer;
2194 	void					*p;
2195 
2196 	if (!completion_status) {
2197 		return -EINVAL;
2198 	}
2199 	memset(completion_status, 0, sizeof(struct spdk_nvme_status));
2200 	if (size % 4) {
2201 		SPDK_ERRLOG("spdk_nvme_ctrlr_update_firmware invalid size!\n");
2202 		return -1;
2203 	}
2204 
2205 	/* Current support only for SPDK_NVME_FW_COMMIT_REPLACE_IMG
2206 	 * and SPDK_NVME_FW_COMMIT_REPLACE_AND_ENABLE_IMG
2207 	 */
2208 	if ((commit_action != SPDK_NVME_FW_COMMIT_REPLACE_IMG) &&
2209 	    (commit_action != SPDK_NVME_FW_COMMIT_REPLACE_AND_ENABLE_IMG)) {
2210 		SPDK_ERRLOG("spdk_nvme_ctrlr_update_firmware invalid command!\n");
2211 		return -1;
2212 	}
2213 
2214 	/* Firmware download */
2215 	size_remaining = size;
2216 	offset = 0;
2217 	p = payload;
2218 
2219 	while (size_remaining > 0) {
2220 		transfer = spdk_min(size_remaining, ctrlr->min_page_size);
2221 
2222 		res = nvme_ctrlr_cmd_fw_image_download(ctrlr, transfer, offset, p,
2223 						       nvme_completion_poll_cb,
2224 						       &status);
2225 		if (res) {
2226 			return res;
2227 		}
2228 
2229 		if (spdk_nvme_wait_for_completion_robust_lock(ctrlr->adminq, &status, &ctrlr->ctrlr_lock)) {
2230 			SPDK_ERRLOG("spdk_nvme_ctrlr_fw_image_download failed!\n");
2231 			return -ENXIO;
2232 		}
2233 		p += transfer;
2234 		offset += transfer;
2235 		size_remaining -= transfer;
2236 	}
2237 
2238 	/* Firmware commit */
2239 	memset(&fw_commit, 0, sizeof(struct spdk_nvme_fw_commit));
2240 	fw_commit.fs = slot;
2241 	fw_commit.ca = commit_action;
2242 
2243 	res = nvme_ctrlr_cmd_fw_commit(ctrlr, &fw_commit, nvme_completion_poll_cb,
2244 				       &status);
2245 	if (res) {
2246 		return res;
2247 	}
2248 
2249 	res = spdk_nvme_wait_for_completion_robust_lock(ctrlr->adminq, &status, &ctrlr->ctrlr_lock);
2250 
2251 	memcpy(completion_status, &status.cpl.status, sizeof(struct spdk_nvme_status));
2252 
2253 	if (res) {
2254 		if (status.cpl.status.sct != SPDK_NVME_SCT_COMMAND_SPECIFIC ||
2255 		    status.cpl.status.sc != SPDK_NVME_SC_FIRMWARE_REQ_NVM_RESET) {
2256 			if (status.cpl.status.sct == SPDK_NVME_SCT_COMMAND_SPECIFIC  &&
2257 			    status.cpl.status.sc == SPDK_NVME_SC_FIRMWARE_REQ_CONVENTIONAL_RESET) {
2258 				SPDK_NOTICELOG("firmware activation requires conventional reset to be performed. !\n");
2259 			} else {
2260 				SPDK_ERRLOG("nvme_ctrlr_cmd_fw_commit failed!\n");
2261 			}
2262 			return -ENXIO;
2263 		}
2264 	}
2265 
2266 	return spdk_nvme_ctrlr_reset(ctrlr);
2267 }
2268 
2269 void *
2270 spdk_nvme_ctrlr_alloc_cmb_io_buffer(struct spdk_nvme_ctrlr *ctrlr, size_t size)
2271 {
2272 	void *buf;
2273 
2274 	if (size == 0) {
2275 		return NULL;
2276 	}
2277 
2278 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
2279 	buf = nvme_transport_ctrlr_alloc_cmb_io_buffer(ctrlr, size);
2280 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
2281 
2282 	return buf;
2283 }
2284 
2285 void
2286 spdk_nvme_ctrlr_free_cmb_io_buffer(struct spdk_nvme_ctrlr *ctrlr, void *buf, size_t size)
2287 {
2288 	if (buf && size) {
2289 		nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
2290 		nvme_transport_ctrlr_free_cmb_io_buffer(ctrlr, buf, size);
2291 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
2292 	}
2293 }
2294