xref: /spdk/lib/nvme/nvme_ctrlr.c (revision da60639f86dd88295eb46c2d76f9c327db92d7b3)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation. All rights reserved.
5  *   Copyright (c) 2019, 2020 Mellanox Technologies LTD. All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "spdk/stdinc.h"
35 
36 #include "nvme_internal.h"
37 #include "nvme_io_msg.h"
38 
39 #include "spdk/env.h"
40 #include "spdk/string.h"
41 
42 struct nvme_active_ns_ctx;
43 
44 static void nvme_ctrlr_destruct_namespaces(struct spdk_nvme_ctrlr *ctrlr);
45 static int nvme_ctrlr_construct_and_submit_aer(struct spdk_nvme_ctrlr *ctrlr,
46 		struct nvme_async_event_request *aer);
47 static void nvme_ctrlr_identify_active_ns_async(struct nvme_active_ns_ctx *ctx);
48 static int nvme_ctrlr_identify_ns_async(struct spdk_nvme_ns *ns);
49 static int nvme_ctrlr_identify_id_desc_async(struct spdk_nvme_ns *ns);
50 
51 static int
52 nvme_ctrlr_get_cc(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cc_register *cc)
53 {
54 	return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, cc.raw),
55 					      &cc->raw);
56 }
57 
58 static int
59 nvme_ctrlr_get_csts(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_csts_register *csts)
60 {
61 	return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, csts.raw),
62 					      &csts->raw);
63 }
64 
65 int
66 nvme_ctrlr_get_cap(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cap_register *cap)
67 {
68 	return nvme_transport_ctrlr_get_reg_8(ctrlr, offsetof(struct spdk_nvme_registers, cap.raw),
69 					      &cap->raw);
70 }
71 
72 int
73 nvme_ctrlr_get_vs(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_vs_register *vs)
74 {
75 	return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, vs.raw),
76 					      &vs->raw);
77 }
78 
79 static int
80 nvme_ctrlr_set_cc(struct spdk_nvme_ctrlr *ctrlr, const union spdk_nvme_cc_register *cc)
81 {
82 	return nvme_transport_ctrlr_set_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, cc.raw),
83 					      cc->raw);
84 }
85 
86 int
87 nvme_ctrlr_get_cmbsz(struct spdk_nvme_ctrlr *ctrlr, union spdk_nvme_cmbsz_register *cmbsz)
88 {
89 	return nvme_transport_ctrlr_get_reg_4(ctrlr, offsetof(struct spdk_nvme_registers, cmbsz.raw),
90 					      &cmbsz->raw);
91 }
92 
93 /* When the field in spdk_nvme_ctrlr_opts are changed and you change this function, please
94  * also update the nvme_ctrl_opts_init function in nvme_ctrlr.c
95  */
96 void
97 spdk_nvme_ctrlr_get_default_ctrlr_opts(struct spdk_nvme_ctrlr_opts *opts, size_t opts_size)
98 {
99 	char host_id_str[SPDK_UUID_STRING_LEN];
100 
101 	assert(opts);
102 
103 	opts->opts_size = opts_size;
104 
105 #define FIELD_OK(field) \
106 	offsetof(struct spdk_nvme_ctrlr_opts, field) + sizeof(opts->field) <= opts_size
107 
108 	if (FIELD_OK(num_io_queues)) {
109 		opts->num_io_queues = DEFAULT_MAX_IO_QUEUES;
110 	}
111 
112 	if (FIELD_OK(use_cmb_sqs)) {
113 		opts->use_cmb_sqs = true;
114 	}
115 
116 	if (FIELD_OK(no_shn_notification)) {
117 		opts->no_shn_notification = false;
118 	}
119 
120 	if (FIELD_OK(arb_mechanism)) {
121 		opts->arb_mechanism = SPDK_NVME_CC_AMS_RR;
122 	}
123 
124 	if (FIELD_OK(arbitration_burst)) {
125 		opts->arbitration_burst = 0;
126 	}
127 
128 	if (FIELD_OK(low_priority_weight)) {
129 		opts->low_priority_weight = 0;
130 	}
131 
132 	if (FIELD_OK(medium_priority_weight)) {
133 		opts->medium_priority_weight = 0;
134 	}
135 
136 	if (FIELD_OK(high_priority_weight)) {
137 		opts->high_priority_weight = 0;
138 	}
139 
140 	if (FIELD_OK(keep_alive_timeout_ms)) {
141 		opts->keep_alive_timeout_ms = MIN_KEEP_ALIVE_TIMEOUT_IN_MS;
142 	}
143 
144 	if (FIELD_OK(transport_retry_count)) {
145 		opts->transport_retry_count = SPDK_NVME_DEFAULT_RETRY_COUNT;
146 	}
147 
148 	if (FIELD_OK(io_queue_size)) {
149 		opts->io_queue_size = DEFAULT_IO_QUEUE_SIZE;
150 	}
151 
152 	if (nvme_driver_init() == 0) {
153 		if (FIELD_OK(hostnqn)) {
154 			spdk_uuid_fmt_lower(host_id_str, sizeof(host_id_str),
155 					    &g_spdk_nvme_driver->default_extended_host_id);
156 			snprintf(opts->hostnqn, sizeof(opts->hostnqn), "2014-08.org.nvmexpress:uuid:%s", host_id_str);
157 		}
158 
159 		if (FIELD_OK(extended_host_id)) {
160 			memcpy(opts->extended_host_id, &g_spdk_nvme_driver->default_extended_host_id,
161 			       sizeof(opts->extended_host_id));
162 		}
163 
164 	}
165 
166 	if (FIELD_OK(io_queue_requests)) {
167 		opts->io_queue_requests = DEFAULT_IO_QUEUE_REQUESTS;
168 	}
169 
170 	if (FIELD_OK(src_addr)) {
171 		memset(opts->src_addr, 0, sizeof(opts->src_addr));
172 	}
173 
174 	if (FIELD_OK(src_svcid)) {
175 		memset(opts->src_svcid, 0, sizeof(opts->src_svcid));
176 	}
177 
178 	if (FIELD_OK(host_id)) {
179 		memset(opts->host_id, 0, sizeof(opts->host_id));
180 	}
181 
182 	if (FIELD_OK(command_set)) {
183 		opts->command_set = SPDK_NVME_CC_CSS_NVM;
184 	}
185 
186 	if (FIELD_OK(admin_timeout_ms)) {
187 		opts->admin_timeout_ms = NVME_MAX_ADMIN_TIMEOUT_IN_SECS * 1000;
188 	}
189 
190 	if (FIELD_OK(header_digest)) {
191 		opts->header_digest = false;
192 	}
193 
194 	if (FIELD_OK(data_digest)) {
195 		opts->data_digest = false;
196 	}
197 
198 	if (FIELD_OK(disable_error_logging)) {
199 		opts->disable_error_logging = false;
200 	}
201 
202 	if (FIELD_OK(transport_ack_timeout)) {
203 		opts->transport_ack_timeout = SPDK_NVME_DEFAULT_TRANSPORT_ACK_TIMEOUT;
204 	}
205 
206 	if (FIELD_OK(admin_queue_size)) {
207 		opts->admin_queue_size = DEFAULT_ADMIN_QUEUE_SIZE;
208 	}
209 #undef FIELD_OK
210 }
211 
212 /**
213  * This function will be called when the process allocates the IO qpair.
214  * Note: the ctrlr_lock must be held when calling this function.
215  */
216 static void
217 nvme_ctrlr_proc_add_io_qpair(struct spdk_nvme_qpair *qpair)
218 {
219 	struct spdk_nvme_ctrlr_process	*active_proc;
220 	struct spdk_nvme_ctrlr		*ctrlr = qpair->ctrlr;
221 
222 	active_proc = nvme_ctrlr_get_current_process(ctrlr);
223 	if (active_proc) {
224 		TAILQ_INSERT_TAIL(&active_proc->allocated_io_qpairs, qpair, per_process_tailq);
225 		qpair->active_proc = active_proc;
226 	}
227 }
228 
229 /**
230  * This function will be called when the process frees the IO qpair.
231  * Note: the ctrlr_lock must be held when calling this function.
232  */
233 static void
234 nvme_ctrlr_proc_remove_io_qpair(struct spdk_nvme_qpair *qpair)
235 {
236 	struct spdk_nvme_ctrlr_process	*active_proc;
237 	struct spdk_nvme_ctrlr		*ctrlr = qpair->ctrlr;
238 	struct spdk_nvme_qpair          *active_qpair, *tmp_qpair;
239 
240 	active_proc = nvme_ctrlr_get_current_process(ctrlr);
241 	if (!active_proc) {
242 		return;
243 	}
244 
245 	TAILQ_FOREACH_SAFE(active_qpair, &active_proc->allocated_io_qpairs,
246 			   per_process_tailq, tmp_qpair) {
247 		if (active_qpair == qpair) {
248 			TAILQ_REMOVE(&active_proc->allocated_io_qpairs,
249 				     active_qpair, per_process_tailq);
250 
251 			break;
252 		}
253 	}
254 }
255 
256 void
257 spdk_nvme_ctrlr_get_default_io_qpair_opts(struct spdk_nvme_ctrlr *ctrlr,
258 		struct spdk_nvme_io_qpair_opts *opts,
259 		size_t opts_size)
260 {
261 	assert(ctrlr);
262 
263 	assert(opts);
264 
265 	memset(opts, 0, opts_size);
266 
267 #define FIELD_OK(field) \
268 	offsetof(struct spdk_nvme_io_qpair_opts, field) + sizeof(opts->field) <= opts_size
269 
270 	if (FIELD_OK(qprio)) {
271 		opts->qprio = SPDK_NVME_QPRIO_URGENT;
272 	}
273 
274 	if (FIELD_OK(io_queue_size)) {
275 		opts->io_queue_size = ctrlr->opts.io_queue_size;
276 	}
277 
278 	if (FIELD_OK(io_queue_requests)) {
279 		opts->io_queue_requests = ctrlr->opts.io_queue_requests;
280 	}
281 
282 	if (FIELD_OK(delay_cmd_submit)) {
283 		opts->delay_cmd_submit = false;
284 	}
285 
286 	if (FIELD_OK(sq.vaddr)) {
287 		opts->sq.vaddr = NULL;
288 	}
289 
290 	if (FIELD_OK(sq.paddr)) {
291 		opts->sq.paddr = 0;
292 	}
293 
294 	if (FIELD_OK(sq.buffer_size)) {
295 		opts->sq.buffer_size = 0;
296 	}
297 
298 	if (FIELD_OK(cq.vaddr)) {
299 		opts->cq.vaddr = NULL;
300 	}
301 
302 	if (FIELD_OK(cq.paddr)) {
303 		opts->cq.paddr = 0;
304 	}
305 
306 	if (FIELD_OK(cq.buffer_size)) {
307 		opts->cq.buffer_size = 0;
308 	}
309 
310 	if (FIELD_OK(create_only)) {
311 		opts->create_only = false;
312 	}
313 
314 #undef FIELD_OK
315 }
316 
317 static struct spdk_nvme_qpair *
318 nvme_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr,
319 			   const struct spdk_nvme_io_qpair_opts *opts)
320 {
321 	uint32_t				qid;
322 	struct spdk_nvme_qpair			*qpair;
323 	union spdk_nvme_cc_register		cc;
324 
325 	if (!ctrlr) {
326 		return NULL;
327 	}
328 
329 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
330 	if (nvme_ctrlr_get_cc(ctrlr, &cc)) {
331 		SPDK_ERRLOG("get_cc failed\n");
332 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
333 		return NULL;
334 	}
335 
336 	if (opts->qprio & ~SPDK_NVME_CREATE_IO_SQ_QPRIO_MASK) {
337 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
338 		return NULL;
339 	}
340 
341 	/*
342 	 * Only value SPDK_NVME_QPRIO_URGENT(0) is valid for the
343 	 * default round robin arbitration method.
344 	 */
345 	if ((cc.bits.ams == SPDK_NVME_CC_AMS_RR) && (opts->qprio != SPDK_NVME_QPRIO_URGENT)) {
346 		SPDK_ERRLOG("invalid queue priority for default round robin arbitration method\n");
347 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
348 		return NULL;
349 	}
350 
351 	/*
352 	 * Get the first available I/O queue ID.
353 	 */
354 	qid = spdk_bit_array_find_first_set(ctrlr->free_io_qids, 1);
355 	if (qid > ctrlr->opts.num_io_queues) {
356 		SPDK_ERRLOG("No free I/O queue IDs\n");
357 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
358 		return NULL;
359 	}
360 
361 	qpair = nvme_transport_ctrlr_create_io_qpair(ctrlr, qid, opts);
362 	if (qpair == NULL) {
363 		SPDK_ERRLOG("nvme_transport_ctrlr_create_io_qpair() failed\n");
364 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
365 		return NULL;
366 	}
367 
368 	spdk_bit_array_clear(ctrlr->free_io_qids, qid);
369 	TAILQ_INSERT_TAIL(&ctrlr->active_io_qpairs, qpair, tailq);
370 
371 	nvme_ctrlr_proc_add_io_qpair(qpair);
372 
373 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
374 
375 	return qpair;
376 }
377 
378 int
379 spdk_nvme_ctrlr_connect_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
380 {
381 	int rc;
382 
383 	if (nvme_qpair_get_state(qpair) != NVME_QPAIR_DISCONNECTED) {
384 		return -EISCONN;
385 	}
386 
387 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
388 	rc = nvme_transport_ctrlr_connect_qpair(ctrlr, qpair);
389 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
390 
391 	if (ctrlr->quirks & NVME_QUIRK_DELAY_AFTER_QUEUE_ALLOC) {
392 		spdk_delay_us(100);
393 	}
394 
395 	return rc;
396 }
397 
398 void
399 spdk_nvme_ctrlr_disconnect_io_qpair(struct spdk_nvme_qpair *qpair)
400 {
401 	struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr;
402 
403 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
404 	nvme_transport_ctrlr_disconnect_qpair(ctrlr, qpair);
405 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
406 }
407 
408 struct spdk_nvme_qpair *
409 spdk_nvme_ctrlr_alloc_io_qpair(struct spdk_nvme_ctrlr *ctrlr,
410 			       const struct spdk_nvme_io_qpair_opts *user_opts,
411 			       size_t opts_size)
412 {
413 
414 	struct spdk_nvme_qpair		*qpair;
415 	struct spdk_nvme_io_qpair_opts	opts;
416 	int				rc;
417 
418 	/*
419 	 * Get the default options, then overwrite them with the user-provided options
420 	 * up to opts_size.
421 	 *
422 	 * This allows for extensions of the opts structure without breaking
423 	 * ABI compatibility.
424 	 */
425 	spdk_nvme_ctrlr_get_default_io_qpair_opts(ctrlr, &opts, sizeof(opts));
426 	if (user_opts) {
427 		memcpy(&opts, user_opts, spdk_min(sizeof(opts), opts_size));
428 
429 		/* If user passes buffers, make sure they're big enough for the requested queue size */
430 		if (opts.sq.vaddr) {
431 			if (opts.sq.buffer_size < (opts.io_queue_size * sizeof(struct spdk_nvme_cmd))) {
432 				SPDK_ERRLOG("sq buffer size %lx is too small for sq size %lx\n",
433 					    opts.sq.buffer_size, (opts.io_queue_size * sizeof(struct spdk_nvme_cmd)));
434 				return NULL;
435 			}
436 		}
437 		if (opts.cq.vaddr) {
438 			if (opts.cq.buffer_size < (opts.io_queue_size * sizeof(struct spdk_nvme_cpl))) {
439 				SPDK_ERRLOG("cq buffer size %lx is too small for cq size %lx\n",
440 					    opts.cq.buffer_size, (opts.io_queue_size * sizeof(struct spdk_nvme_cpl)));
441 				return NULL;
442 			}
443 		}
444 	}
445 
446 	qpair = nvme_ctrlr_create_io_qpair(ctrlr, &opts);
447 
448 	if (qpair == NULL || opts.create_only == true) {
449 		return qpair;
450 	}
451 
452 	rc = spdk_nvme_ctrlr_connect_io_qpair(ctrlr, qpair);
453 	if (rc != 0) {
454 		SPDK_ERRLOG("nvme_transport_ctrlr_connect_io_qpair() failed\n");
455 		nvme_transport_ctrlr_delete_io_qpair(ctrlr, qpair);
456 		return NULL;
457 	}
458 
459 	return qpair;
460 }
461 
462 int
463 spdk_nvme_ctrlr_reconnect_io_qpair(struct spdk_nvme_qpair *qpair)
464 {
465 	struct spdk_nvme_ctrlr *ctrlr;
466 	enum nvme_qpair_state qpair_state;
467 	int rc;
468 
469 	assert(qpair != NULL);
470 	assert(nvme_qpair_is_admin_queue(qpair) == false);
471 	assert(qpair->ctrlr != NULL);
472 
473 	ctrlr = qpair->ctrlr;
474 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
475 	qpair_state = nvme_qpair_get_state(qpair);
476 
477 	if (ctrlr->is_removed) {
478 		rc = -ENODEV;
479 		goto out;
480 	}
481 
482 	if (ctrlr->is_resetting || qpair_state == NVME_QPAIR_DISCONNECTING) {
483 		rc = -EAGAIN;
484 		goto out;
485 	}
486 
487 	if (ctrlr->is_failed || qpair_state == NVME_QPAIR_DESTROYING) {
488 		rc = -ENXIO;
489 		goto out;
490 	}
491 
492 	if (qpair_state != NVME_QPAIR_DISCONNECTED) {
493 		rc = 0;
494 		goto out;
495 	}
496 
497 	rc = nvme_transport_ctrlr_connect_qpair(ctrlr, qpair);
498 	if (rc) {
499 		rc = -EAGAIN;
500 		goto out;
501 	}
502 
503 out:
504 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
505 	return rc;
506 }
507 
508 spdk_nvme_qp_failure_reason
509 spdk_nvme_ctrlr_get_admin_qp_failure_reason(struct spdk_nvme_ctrlr *ctrlr)
510 {
511 	return ctrlr->adminq->transport_failure_reason;
512 }
513 
514 /*
515  * This internal function will attempt to take the controller
516  * lock before calling disconnect on a controller qpair.
517  * Functions already holding the controller lock should
518  * call nvme_transport_ctrlr_disconnect_qpair directly.
519  */
520 void
521 nvme_ctrlr_disconnect_qpair(struct spdk_nvme_qpair *qpair)
522 {
523 	struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr;
524 
525 	assert(ctrlr != NULL);
526 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
527 	nvme_transport_ctrlr_disconnect_qpair(ctrlr, qpair);
528 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
529 }
530 
531 int
532 spdk_nvme_ctrlr_free_io_qpair(struct spdk_nvme_qpair *qpair)
533 {
534 	struct spdk_nvme_ctrlr *ctrlr;
535 
536 	if (qpair == NULL) {
537 		return 0;
538 	}
539 
540 	ctrlr = qpair->ctrlr;
541 
542 	if (qpair->in_completion_context) {
543 		/*
544 		 * There are many cases where it is convenient to delete an io qpair in the context
545 		 *  of that qpair's completion routine.  To handle this properly, set a flag here
546 		 *  so that the completion routine will perform an actual delete after the context
547 		 *  unwinds.
548 		 */
549 		qpair->delete_after_completion_context = 1;
550 		return 0;
551 	}
552 
553 	if (qpair->poll_group && qpair->poll_group->in_completion_context) {
554 		/* Same as above, but in a poll group. */
555 		qpair->poll_group->num_qpairs_to_delete++;
556 		qpair->delete_after_completion_context = 1;
557 		return 0;
558 	}
559 
560 	if (qpair->poll_group) {
561 		spdk_nvme_poll_group_remove(qpair->poll_group->group, qpair);
562 	}
563 
564 	/* Do not retry. */
565 	nvme_qpair_set_state(qpair, NVME_QPAIR_DESTROYING);
566 
567 	/* In the multi-process case, a process may call this function on a foreign
568 	 * I/O qpair (i.e. one that this process did not create) when that qpairs process
569 	 * exits unexpectedly.  In that case, we must not try to abort any reqs associated
570 	 * with that qpair, since the callbacks will also be foreign to this process.
571 	 */
572 	if (qpair->active_proc == nvme_ctrlr_get_current_process(ctrlr)) {
573 		nvme_qpair_abort_reqs(qpair, 1);
574 	}
575 
576 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
577 
578 	nvme_ctrlr_proc_remove_io_qpair(qpair);
579 
580 	TAILQ_REMOVE(&ctrlr->active_io_qpairs, qpair, tailq);
581 	spdk_bit_array_set(ctrlr->free_io_qids, qpair->id);
582 
583 	if (nvme_transport_ctrlr_delete_io_qpair(ctrlr, qpair)) {
584 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
585 		return -1;
586 	}
587 
588 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
589 	return 0;
590 }
591 
592 static void
593 nvme_ctrlr_construct_intel_support_log_page_list(struct spdk_nvme_ctrlr *ctrlr,
594 		struct spdk_nvme_intel_log_page_directory *log_page_directory)
595 {
596 	if (log_page_directory == NULL) {
597 		return;
598 	}
599 
600 	if (ctrlr->cdata.vid != SPDK_PCI_VID_INTEL) {
601 		return;
602 	}
603 
604 	ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_PAGE_DIRECTORY] = true;
605 
606 	if (log_page_directory->read_latency_log_len ||
607 	    (ctrlr->quirks & NVME_INTEL_QUIRK_READ_LATENCY)) {
608 		ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_READ_CMD_LATENCY] = true;
609 	}
610 	if (log_page_directory->write_latency_log_len ||
611 	    (ctrlr->quirks & NVME_INTEL_QUIRK_WRITE_LATENCY)) {
612 		ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_WRITE_CMD_LATENCY] = true;
613 	}
614 	if (log_page_directory->temperature_statistics_log_len) {
615 		ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_TEMPERATURE] = true;
616 	}
617 	if (log_page_directory->smart_log_len) {
618 		ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_SMART] = true;
619 	}
620 	if (log_page_directory->marketing_description_log_len) {
621 		ctrlr->log_page_supported[SPDK_NVME_INTEL_MARKETING_DESCRIPTION] = true;
622 	}
623 }
624 
625 static int nvme_ctrlr_set_intel_support_log_pages(struct spdk_nvme_ctrlr *ctrlr)
626 {
627 	int rc = 0;
628 	struct nvme_completion_poll_status	*status;
629 	struct spdk_nvme_intel_log_page_directory *log_page_directory;
630 
631 	log_page_directory = spdk_zmalloc(sizeof(struct spdk_nvme_intel_log_page_directory),
632 					  64, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
633 	if (log_page_directory == NULL) {
634 		SPDK_ERRLOG("could not allocate log_page_directory\n");
635 		return -ENXIO;
636 	}
637 
638 	status = calloc(1, sizeof(*status));
639 	if (!status) {
640 		SPDK_ERRLOG("Failed to allocate status tracker\n");
641 		spdk_free(log_page_directory);
642 		return -ENOMEM;
643 	}
644 
645 	rc = spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_INTEL_LOG_PAGE_DIRECTORY,
646 					      SPDK_NVME_GLOBAL_NS_TAG, log_page_directory,
647 					      sizeof(struct spdk_nvme_intel_log_page_directory),
648 					      0, nvme_completion_poll_cb, status);
649 	if (rc != 0) {
650 		spdk_free(log_page_directory);
651 		free(status);
652 		return rc;
653 	}
654 
655 	if (nvme_wait_for_completion_timeout(ctrlr->adminq, status,
656 					     ctrlr->opts.admin_timeout_ms / 1000)) {
657 		spdk_free(log_page_directory);
658 		SPDK_WARNLOG("Intel log pages not supported on Intel drive!\n");
659 		if (!status->timed_out) {
660 			free(status);
661 		}
662 		return 0;
663 	}
664 
665 	nvme_ctrlr_construct_intel_support_log_page_list(ctrlr, log_page_directory);
666 	spdk_free(log_page_directory);
667 	free(status);
668 	return 0;
669 }
670 
671 static int
672 nvme_ctrlr_set_supported_log_pages(struct spdk_nvme_ctrlr *ctrlr)
673 {
674 	int	rc = 0;
675 
676 	memset(ctrlr->log_page_supported, 0, sizeof(ctrlr->log_page_supported));
677 	/* Mandatory pages */
678 	ctrlr->log_page_supported[SPDK_NVME_LOG_ERROR] = true;
679 	ctrlr->log_page_supported[SPDK_NVME_LOG_HEALTH_INFORMATION] = true;
680 	ctrlr->log_page_supported[SPDK_NVME_LOG_FIRMWARE_SLOT] = true;
681 	if (ctrlr->cdata.lpa.celp) {
682 		ctrlr->log_page_supported[SPDK_NVME_LOG_COMMAND_EFFECTS_LOG] = true;
683 	}
684 	if (ctrlr->cdata.vid == SPDK_PCI_VID_INTEL && !(ctrlr->quirks & NVME_INTEL_QUIRK_NO_LOG_PAGES)) {
685 		rc = nvme_ctrlr_set_intel_support_log_pages(ctrlr);
686 	}
687 
688 	return rc;
689 }
690 
691 static void
692 nvme_ctrlr_set_intel_supported_features(struct spdk_nvme_ctrlr *ctrlr)
693 {
694 	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_MAX_LBA] = true;
695 	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_NATIVE_MAX_LBA] = true;
696 	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_POWER_GOVERNOR_SETTING] = true;
697 	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_SMBUS_ADDRESS] = true;
698 	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_LED_PATTERN] = true;
699 	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_RESET_TIMED_WORKLOAD_COUNTERS] = true;
700 	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING] = true;
701 }
702 
703 static void
704 nvme_ctrlr_set_arbitration_feature(struct spdk_nvme_ctrlr *ctrlr)
705 {
706 	uint32_t cdw11;
707 	struct nvme_completion_poll_status *status;
708 
709 	if (ctrlr->opts.arbitration_burst == 0) {
710 		return;
711 	}
712 
713 	if (ctrlr->opts.arbitration_burst > 7) {
714 		SPDK_WARNLOG("Valid arbitration burst values is from 0-7\n");
715 		return;
716 	}
717 
718 	status = calloc(1, sizeof(*status));
719 	if (!status) {
720 		SPDK_ERRLOG("Failed to allocate status tracker\n");
721 		return;
722 	}
723 
724 	cdw11 = ctrlr->opts.arbitration_burst;
725 
726 	if (spdk_nvme_ctrlr_get_flags(ctrlr) & SPDK_NVME_CTRLR_WRR_SUPPORTED) {
727 		cdw11 |= (uint32_t)ctrlr->opts.low_priority_weight << 8;
728 		cdw11 |= (uint32_t)ctrlr->opts.medium_priority_weight << 16;
729 		cdw11 |= (uint32_t)ctrlr->opts.high_priority_weight << 24;
730 	}
731 
732 	if (spdk_nvme_ctrlr_cmd_set_feature(ctrlr, SPDK_NVME_FEAT_ARBITRATION,
733 					    cdw11, 0, NULL, 0,
734 					    nvme_completion_poll_cb, status) < 0) {
735 		SPDK_ERRLOG("Set arbitration feature failed\n");
736 		free(status);
737 		return;
738 	}
739 
740 	if (nvme_wait_for_completion_timeout(ctrlr->adminq, status,
741 					     ctrlr->opts.admin_timeout_ms / 1000)) {
742 		SPDK_ERRLOG("Timeout to set arbitration feature\n");
743 	}
744 
745 	if (!status->timed_out) {
746 		free(status);
747 	}
748 }
749 
750 static void
751 nvme_ctrlr_set_supported_features(struct spdk_nvme_ctrlr *ctrlr)
752 {
753 	memset(ctrlr->feature_supported, 0, sizeof(ctrlr->feature_supported));
754 	/* Mandatory features */
755 	ctrlr->feature_supported[SPDK_NVME_FEAT_ARBITRATION] = true;
756 	ctrlr->feature_supported[SPDK_NVME_FEAT_POWER_MANAGEMENT] = true;
757 	ctrlr->feature_supported[SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD] = true;
758 	ctrlr->feature_supported[SPDK_NVME_FEAT_ERROR_RECOVERY] = true;
759 	ctrlr->feature_supported[SPDK_NVME_FEAT_NUMBER_OF_QUEUES] = true;
760 	ctrlr->feature_supported[SPDK_NVME_FEAT_INTERRUPT_COALESCING] = true;
761 	ctrlr->feature_supported[SPDK_NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION] = true;
762 	ctrlr->feature_supported[SPDK_NVME_FEAT_WRITE_ATOMICITY] = true;
763 	ctrlr->feature_supported[SPDK_NVME_FEAT_ASYNC_EVENT_CONFIGURATION] = true;
764 	/* Optional features */
765 	if (ctrlr->cdata.vwc.present) {
766 		ctrlr->feature_supported[SPDK_NVME_FEAT_VOLATILE_WRITE_CACHE] = true;
767 	}
768 	if (ctrlr->cdata.apsta.supported) {
769 		ctrlr->feature_supported[SPDK_NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION] = true;
770 	}
771 	if (ctrlr->cdata.hmpre) {
772 		ctrlr->feature_supported[SPDK_NVME_FEAT_HOST_MEM_BUFFER] = true;
773 	}
774 	if (ctrlr->cdata.vid == SPDK_PCI_VID_INTEL) {
775 		nvme_ctrlr_set_intel_supported_features(ctrlr);
776 	}
777 
778 	nvme_ctrlr_set_arbitration_feature(ctrlr);
779 }
780 
781 bool
782 spdk_nvme_ctrlr_is_failed(struct spdk_nvme_ctrlr *ctrlr)
783 {
784 	return ctrlr->is_failed;
785 }
786 
787 void
788 nvme_ctrlr_fail(struct spdk_nvme_ctrlr *ctrlr, bool hot_remove)
789 {
790 	/*
791 	 * Set the flag here and leave the work failure of qpairs to
792 	 * spdk_nvme_qpair_process_completions().
793 	 */
794 	if (hot_remove) {
795 		ctrlr->is_removed = true;
796 	}
797 	ctrlr->is_failed = true;
798 	nvme_transport_ctrlr_disconnect_qpair(ctrlr, ctrlr->adminq);
799 	SPDK_ERRLOG("ctrlr %s in failed state.\n", ctrlr->trid.traddr);
800 }
801 
802 /**
803  * This public API function will try to take the controller lock.
804  * Any private functions being called from a thread already holding
805  * the ctrlr lock should call nvme_ctrlr_fail directly.
806  */
807 void
808 spdk_nvme_ctrlr_fail(struct spdk_nvme_ctrlr *ctrlr)
809 {
810 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
811 	nvme_ctrlr_fail(ctrlr, false);
812 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
813 }
814 
815 static void
816 nvme_ctrlr_shutdown(struct spdk_nvme_ctrlr *ctrlr)
817 {
818 	union spdk_nvme_cc_register	cc;
819 	union spdk_nvme_csts_register	csts;
820 	uint32_t			ms_waited = 0;
821 	uint32_t			shutdown_timeout_ms;
822 
823 	if (ctrlr->is_removed) {
824 		return;
825 	}
826 
827 	if (nvme_ctrlr_get_cc(ctrlr, &cc)) {
828 		SPDK_ERRLOG("ctrlr %s get_cc() failed\n", ctrlr->trid.traddr);
829 		return;
830 	}
831 
832 	cc.bits.shn = SPDK_NVME_SHN_NORMAL;
833 
834 	if (nvme_ctrlr_set_cc(ctrlr, &cc)) {
835 		SPDK_ERRLOG("ctrlr %s set_cc() failed\n", ctrlr->trid.traddr);
836 		return;
837 	}
838 
839 	/*
840 	 * The NVMe specification defines RTD3E to be the time between
841 	 *  setting SHN = 1 until the controller will set SHST = 10b.
842 	 * If the device doesn't report RTD3 entry latency, or if it
843 	 *  reports RTD3 entry latency less than 10 seconds, pick
844 	 *  10 seconds as a reasonable amount of time to
845 	 *  wait before proceeding.
846 	 */
847 	SPDK_DEBUGLOG(SPDK_LOG_NVME, "RTD3E = %" PRIu32 " us\n", ctrlr->cdata.rtd3e);
848 	shutdown_timeout_ms = (ctrlr->cdata.rtd3e + 999) / 1000;
849 	shutdown_timeout_ms = spdk_max(shutdown_timeout_ms, 10000);
850 	SPDK_DEBUGLOG(SPDK_LOG_NVME, "shutdown timeout = %" PRIu32 " ms\n", shutdown_timeout_ms);
851 
852 	do {
853 		if (nvme_ctrlr_get_csts(ctrlr, &csts)) {
854 			SPDK_ERRLOG("ctrlr %s get_csts() failed\n", ctrlr->trid.traddr);
855 			return;
856 		}
857 
858 		if (csts.bits.shst == SPDK_NVME_SHST_COMPLETE) {
859 			SPDK_DEBUGLOG(SPDK_LOG_NVME, "ctrlr %s shutdown complete in %u milliseconds\n",
860 				      ctrlr->trid.traddr, ms_waited);
861 			return;
862 		}
863 
864 		nvme_delay(1000);
865 		ms_waited++;
866 	} while (ms_waited < shutdown_timeout_ms);
867 
868 	SPDK_ERRLOG("ctrlr %s did not shutdown within %u milliseconds\n",
869 		    ctrlr->trid.traddr, shutdown_timeout_ms);
870 	if (ctrlr->quirks & NVME_QUIRK_SHST_COMPLETE) {
871 		SPDK_ERRLOG("likely due to shutdown handling in the VMWare emulated NVMe SSD\n");
872 	}
873 }
874 
875 static int
876 nvme_ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr)
877 {
878 	union spdk_nvme_cc_register	cc;
879 	int				rc;
880 
881 	rc = nvme_transport_ctrlr_enable(ctrlr);
882 	if (rc != 0) {
883 		SPDK_ERRLOG("transport ctrlr_enable failed\n");
884 		return rc;
885 	}
886 
887 	if (nvme_ctrlr_get_cc(ctrlr, &cc)) {
888 		SPDK_ERRLOG("get_cc() failed\n");
889 		return -EIO;
890 	}
891 
892 	if (cc.bits.en != 0) {
893 		SPDK_ERRLOG("called with CC.EN = 1\n");
894 		return -EINVAL;
895 	}
896 
897 	cc.bits.en = 1;
898 	cc.bits.css = 0;
899 	cc.bits.shn = 0;
900 	cc.bits.iosqes = 6; /* SQ entry size == 64 == 2^6 */
901 	cc.bits.iocqes = 4; /* CQ entry size == 16 == 2^4 */
902 
903 	/* Page size is 2 ^ (12 + mps). */
904 	cc.bits.mps = spdk_u32log2(ctrlr->page_size) - 12;
905 
906 	if (ctrlr->cap.bits.css == 0) {
907 		SPDK_INFOLOG(SPDK_LOG_NVME,
908 			     "Drive reports no command sets supported. Assuming NVM is supported.\n");
909 		ctrlr->cap.bits.css = SPDK_NVME_CAP_CSS_NVM;
910 	}
911 
912 	if (!(ctrlr->cap.bits.css & (1u << ctrlr->opts.command_set))) {
913 		SPDK_DEBUGLOG(SPDK_LOG_NVME, "Requested I/O command set %u but supported mask is 0x%x\n",
914 			      ctrlr->opts.command_set, ctrlr->cap.bits.css);
915 		SPDK_DEBUGLOG(SPDK_LOG_NVME, "Falling back to NVM. Assuming NVM is supported.\n");
916 		ctrlr->opts.command_set = SPDK_NVME_CC_CSS_NVM;
917 	}
918 
919 	cc.bits.css = ctrlr->opts.command_set;
920 
921 	switch (ctrlr->opts.arb_mechanism) {
922 	case SPDK_NVME_CC_AMS_RR:
923 		break;
924 	case SPDK_NVME_CC_AMS_WRR:
925 		if (SPDK_NVME_CAP_AMS_WRR & ctrlr->cap.bits.ams) {
926 			break;
927 		}
928 		return -EINVAL;
929 	case SPDK_NVME_CC_AMS_VS:
930 		if (SPDK_NVME_CAP_AMS_VS & ctrlr->cap.bits.ams) {
931 			break;
932 		}
933 		return -EINVAL;
934 	default:
935 		return -EINVAL;
936 	}
937 
938 	cc.bits.ams = ctrlr->opts.arb_mechanism;
939 
940 	if (nvme_ctrlr_set_cc(ctrlr, &cc)) {
941 		SPDK_ERRLOG("set_cc() failed\n");
942 		return -EIO;
943 	}
944 
945 	return 0;
946 }
947 
948 static int
949 nvme_ctrlr_disable(struct spdk_nvme_ctrlr *ctrlr)
950 {
951 	union spdk_nvme_cc_register	cc;
952 
953 	if (nvme_ctrlr_get_cc(ctrlr, &cc)) {
954 		SPDK_ERRLOG("get_cc() failed\n");
955 		return -EIO;
956 	}
957 
958 	if (cc.bits.en == 0) {
959 		return 0;
960 	}
961 
962 	cc.bits.en = 0;
963 
964 	if (nvme_ctrlr_set_cc(ctrlr, &cc)) {
965 		SPDK_ERRLOG("set_cc() failed\n");
966 		return -EIO;
967 	}
968 
969 	return 0;
970 }
971 
972 #ifdef DEBUG
973 static const char *
974 nvme_ctrlr_state_string(enum nvme_ctrlr_state state)
975 {
976 	switch (state) {
977 	case NVME_CTRLR_STATE_INIT_DELAY:
978 		return "delay init";
979 	case NVME_CTRLR_STATE_INIT:
980 		return "init";
981 	case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1:
982 		return "disable and wait for CSTS.RDY = 1";
983 	case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0:
984 		return "disable and wait for CSTS.RDY = 0";
985 	case NVME_CTRLR_STATE_ENABLE:
986 		return "enable controller by writing CC.EN = 1";
987 	case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1:
988 		return "wait for CSTS.RDY = 1";
989 	case NVME_CTRLR_STATE_RESET_ADMIN_QUEUE:
990 		return "reset admin queue";
991 	case NVME_CTRLR_STATE_IDENTIFY:
992 		return "identify controller";
993 	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY:
994 		return "wait for identify controller";
995 	case NVME_CTRLR_STATE_SET_NUM_QUEUES:
996 		return "set number of queues";
997 	case NVME_CTRLR_STATE_WAIT_FOR_SET_NUM_QUEUES:
998 		return "wait for set number of queues";
999 	case NVME_CTRLR_STATE_CONSTRUCT_NS:
1000 		return "construct namespaces";
1001 	case NVME_CTRLR_STATE_IDENTIFY_ACTIVE_NS:
1002 		return "identify active ns";
1003 	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ACTIVE_NS:
1004 		return "wait for identify active ns";
1005 	case NVME_CTRLR_STATE_IDENTIFY_NS:
1006 		return "identify ns";
1007 	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS:
1008 		return "wait for identify ns";
1009 	case NVME_CTRLR_STATE_IDENTIFY_ID_DESCS:
1010 		return "identify namespace id descriptors";
1011 	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ID_DESCS:
1012 		return "wait for identify namespace id descriptors";
1013 	case NVME_CTRLR_STATE_CONFIGURE_AER:
1014 		return "configure AER";
1015 	case NVME_CTRLR_STATE_WAIT_FOR_CONFIGURE_AER:
1016 		return "wait for configure aer";
1017 	case NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES:
1018 		return "set supported log pages";
1019 	case NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES:
1020 		return "set supported features";
1021 	case NVME_CTRLR_STATE_SET_DB_BUF_CFG:
1022 		return "set doorbell buffer config";
1023 	case NVME_CTRLR_STATE_WAIT_FOR_DB_BUF_CFG:
1024 		return "wait for doorbell buffer config";
1025 	case NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT:
1026 		return "set keep alive timeout";
1027 	case NVME_CTRLR_STATE_WAIT_FOR_KEEP_ALIVE_TIMEOUT:
1028 		return "wait for set keep alive timeout";
1029 	case NVME_CTRLR_STATE_SET_HOST_ID:
1030 		return "set host ID";
1031 	case NVME_CTRLR_STATE_WAIT_FOR_HOST_ID:
1032 		return "wait for set host ID";
1033 	case NVME_CTRLR_STATE_READY:
1034 		return "ready";
1035 	case NVME_CTRLR_STATE_ERROR:
1036 		return "error";
1037 	}
1038 	return "unknown";
1039 };
1040 #endif /* DEBUG */
1041 
1042 static void
1043 nvme_ctrlr_set_state(struct spdk_nvme_ctrlr *ctrlr, enum nvme_ctrlr_state state,
1044 		     uint64_t timeout_in_ms)
1045 {
1046 	uint64_t ticks_per_ms, timeout_in_ticks, now_ticks;
1047 
1048 	ctrlr->state = state;
1049 	if (timeout_in_ms == NVME_TIMEOUT_INFINITE) {
1050 		goto inf;
1051 	}
1052 
1053 	ticks_per_ms = spdk_get_ticks_hz() / 1000;
1054 	if (timeout_in_ms > UINT64_MAX / ticks_per_ms) {
1055 		SPDK_ERRLOG("Specified timeout would cause integer overflow. Defaulting to no timeout.\n");
1056 		goto inf;
1057 	}
1058 
1059 	now_ticks = spdk_get_ticks();
1060 	timeout_in_ticks = timeout_in_ms * ticks_per_ms;
1061 	if (timeout_in_ticks > UINT64_MAX - now_ticks) {
1062 		SPDK_ERRLOG("Specified timeout would cause integer overflow. Defaulting to no timeout.\n");
1063 		goto inf;
1064 	}
1065 
1066 	ctrlr->state_timeout_tsc = timeout_in_ticks + now_ticks;
1067 	SPDK_DEBUGLOG(SPDK_LOG_NVME, "setting state to %s (timeout %" PRIu64 " ms)\n",
1068 		      nvme_ctrlr_state_string(ctrlr->state), timeout_in_ms);
1069 	return;
1070 inf:
1071 	SPDK_DEBUGLOG(SPDK_LOG_NVME, "setting state to %s (no timeout)\n",
1072 		      nvme_ctrlr_state_string(ctrlr->state));
1073 	ctrlr->state_timeout_tsc = NVME_TIMEOUT_INFINITE;
1074 }
1075 
1076 static void
1077 nvme_ctrlr_free_doorbell_buffer(struct spdk_nvme_ctrlr *ctrlr)
1078 {
1079 	if (ctrlr->shadow_doorbell) {
1080 		spdk_free(ctrlr->shadow_doorbell);
1081 		ctrlr->shadow_doorbell = NULL;
1082 	}
1083 
1084 	if (ctrlr->eventidx) {
1085 		spdk_free(ctrlr->eventidx);
1086 		ctrlr->eventidx = NULL;
1087 	}
1088 }
1089 
1090 static void
1091 nvme_ctrlr_set_doorbell_buffer_config_done(void *arg, const struct spdk_nvme_cpl *cpl)
1092 {
1093 	struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg;
1094 
1095 	if (spdk_nvme_cpl_is_error(cpl)) {
1096 		SPDK_WARNLOG("Doorbell buffer config failed\n");
1097 	} else {
1098 		SPDK_INFOLOG(SPDK_LOG_NVME, "NVMe controller: %s doorbell buffer config enabled\n",
1099 			     ctrlr->trid.traddr);
1100 	}
1101 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT,
1102 			     ctrlr->opts.admin_timeout_ms);
1103 }
1104 
1105 static int
1106 nvme_ctrlr_set_doorbell_buffer_config(struct spdk_nvme_ctrlr *ctrlr)
1107 {
1108 	int rc = 0;
1109 	uint64_t prp1, prp2, len;
1110 
1111 	if (!ctrlr->cdata.oacs.doorbell_buffer_config) {
1112 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT,
1113 				     ctrlr->opts.admin_timeout_ms);
1114 		return 0;
1115 	}
1116 
1117 	if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) {
1118 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT,
1119 				     ctrlr->opts.admin_timeout_ms);
1120 		return 0;
1121 	}
1122 
1123 	/* only 1 page size for doorbell buffer */
1124 	ctrlr->shadow_doorbell = spdk_zmalloc(ctrlr->page_size, ctrlr->page_size,
1125 					      NULL, SPDK_ENV_LCORE_ID_ANY,
1126 					      SPDK_MALLOC_DMA | SPDK_MALLOC_SHARE);
1127 	if (ctrlr->shadow_doorbell == NULL) {
1128 		rc = -ENOMEM;
1129 		goto error;
1130 	}
1131 
1132 	len = ctrlr->page_size;
1133 	prp1 = spdk_vtophys(ctrlr->shadow_doorbell, &len);
1134 	if (prp1 == SPDK_VTOPHYS_ERROR || len != ctrlr->page_size) {
1135 		rc = -EFAULT;
1136 		goto error;
1137 	}
1138 
1139 	ctrlr->eventidx = spdk_zmalloc(ctrlr->page_size, ctrlr->page_size,
1140 				       NULL, SPDK_ENV_LCORE_ID_ANY,
1141 				       SPDK_MALLOC_DMA | SPDK_MALLOC_SHARE);
1142 	if (ctrlr->eventidx == NULL) {
1143 		rc = -ENOMEM;
1144 		goto error;
1145 	}
1146 
1147 	len = ctrlr->page_size;
1148 	prp2 = spdk_vtophys(ctrlr->eventidx, &len);
1149 	if (prp2 == SPDK_VTOPHYS_ERROR || len != ctrlr->page_size) {
1150 		rc = -EFAULT;
1151 		goto error;
1152 	}
1153 
1154 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_DB_BUF_CFG,
1155 			     ctrlr->opts.admin_timeout_ms);
1156 
1157 	rc = nvme_ctrlr_cmd_doorbell_buffer_config(ctrlr, prp1, prp2,
1158 			nvme_ctrlr_set_doorbell_buffer_config_done, ctrlr);
1159 	if (rc != 0) {
1160 		goto error;
1161 	}
1162 
1163 	return 0;
1164 
1165 error:
1166 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
1167 	nvme_ctrlr_free_doorbell_buffer(ctrlr);
1168 	return rc;
1169 }
1170 
1171 static void
1172 nvme_ctrlr_abort_queued_aborts(struct spdk_nvme_ctrlr *ctrlr)
1173 {
1174 	struct nvme_request	*req, *tmp;
1175 	struct spdk_nvme_cpl	cpl = {};
1176 
1177 	cpl.status.sc = SPDK_NVME_SC_ABORTED_SQ_DELETION;
1178 	cpl.status.sct = SPDK_NVME_SCT_GENERIC;
1179 
1180 	STAILQ_FOREACH_SAFE(req, &ctrlr->queued_aborts, stailq, tmp) {
1181 		STAILQ_REMOVE_HEAD(&ctrlr->queued_aborts, stailq);
1182 
1183 		nvme_complete_request(req->cb_fn, req->cb_arg, req->qpair, req, &cpl);
1184 		nvme_free_request(req);
1185 	}
1186 }
1187 
1188 int
1189 spdk_nvme_ctrlr_reset(struct spdk_nvme_ctrlr *ctrlr)
1190 {
1191 	int rc = 0;
1192 	struct spdk_nvme_qpair	*qpair;
1193 
1194 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
1195 
1196 	if (ctrlr->is_resetting || ctrlr->is_removed) {
1197 		/*
1198 		 * Controller is already resetting or has been removed. Return
1199 		 *  immediately since there is no need to kick off another
1200 		 *  reset in these cases.
1201 		 */
1202 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
1203 		return ctrlr->is_resetting ? 0 : -ENXIO;
1204 	}
1205 
1206 	ctrlr->is_resetting = true;
1207 	ctrlr->is_failed = false;
1208 
1209 	SPDK_NOTICELOG("resetting controller\n");
1210 
1211 	/* Abort all of the queued abort requests */
1212 	nvme_ctrlr_abort_queued_aborts(ctrlr);
1213 
1214 	nvme_transport_admin_qpair_abort_aers(ctrlr->adminq);
1215 
1216 	/* Disable all queues before disabling the controller hardware. */
1217 	TAILQ_FOREACH(qpair, &ctrlr->active_io_qpairs, tailq) {
1218 		qpair->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_LOCAL;
1219 	}
1220 
1221 	ctrlr->adminq->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_LOCAL;
1222 	nvme_transport_ctrlr_disconnect_qpair(ctrlr, ctrlr->adminq);
1223 	if (nvme_transport_ctrlr_connect_qpair(ctrlr, ctrlr->adminq) != 0) {
1224 		SPDK_ERRLOG("Controller reinitialization failed.\n");
1225 		rc = -1;
1226 		goto out;
1227 	}
1228 
1229 	/* Doorbell buffer config is invalid during reset */
1230 	nvme_ctrlr_free_doorbell_buffer(ctrlr);
1231 
1232 	/* Set the state back to INIT to cause a full hardware reset. */
1233 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, NVME_TIMEOUT_INFINITE);
1234 
1235 	nvme_qpair_set_state(ctrlr->adminq, NVME_QPAIR_ENABLED);
1236 	while (ctrlr->state != NVME_CTRLR_STATE_READY) {
1237 		if (nvme_ctrlr_process_init(ctrlr) != 0) {
1238 			SPDK_ERRLOG("controller reinitialization failed\n");
1239 			rc = -1;
1240 			break;
1241 		}
1242 	}
1243 
1244 	/*
1245 	 * For PCIe controllers, the memory locations of the tranpsort qpair
1246 	 * don't change when the controller is reset. They simply need to be
1247 	 * re-enabled with admin commands to the controller. For fabric
1248 	 * controllers we need to disconnect and reconnect the qpair on its
1249 	 * own thread outside of the context of the reset.
1250 	 */
1251 	if (rc == 0 && ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
1252 		/* Reinitialize qpairs */
1253 		TAILQ_FOREACH(qpair, &ctrlr->active_io_qpairs, tailq) {
1254 			if (nvme_transport_ctrlr_connect_qpair(ctrlr, qpair) != 0) {
1255 				qpair->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_LOCAL;
1256 				rc = -1;
1257 				continue;
1258 			}
1259 		}
1260 	}
1261 
1262 out:
1263 	if (rc) {
1264 		nvme_ctrlr_fail(ctrlr, false);
1265 	}
1266 	ctrlr->is_resetting = false;
1267 
1268 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
1269 
1270 	if (!ctrlr->cdata.oaes.ns_attribute_notices) {
1271 		/*
1272 		 * If controller doesn't support ns_attribute_notices and
1273 		 * namespace attributes change (e.g. number of namespaces)
1274 		 * we need to update system handling device reset.
1275 		 */
1276 		nvme_io_msg_ctrlr_update(ctrlr);
1277 	}
1278 
1279 	return rc;
1280 }
1281 
1282 int
1283 spdk_nvme_ctrlr_set_trid(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_transport_id *trid)
1284 {
1285 	int rc = 0;
1286 
1287 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
1288 
1289 	if (ctrlr->is_failed == false) {
1290 		rc = -EPERM;
1291 		goto out;
1292 	}
1293 
1294 	if (trid->trtype != ctrlr->trid.trtype) {
1295 		rc = -EINVAL;
1296 		goto out;
1297 	}
1298 
1299 	if (strncmp(trid->subnqn, ctrlr->trid.subnqn, SPDK_NVMF_NQN_MAX_LEN)) {
1300 		rc = -EINVAL;
1301 		goto out;
1302 	}
1303 
1304 	ctrlr->trid = *trid;
1305 
1306 out:
1307 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
1308 	return rc;
1309 }
1310 
1311 static void
1312 nvme_ctrlr_identify_done(void *arg, const struct spdk_nvme_cpl *cpl)
1313 {
1314 	struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg;
1315 
1316 	if (spdk_nvme_cpl_is_error(cpl)) {
1317 		SPDK_ERRLOG("nvme_identify_controller failed!\n");
1318 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
1319 		return;
1320 	}
1321 
1322 	/*
1323 	 * Use MDTS to ensure our default max_xfer_size doesn't exceed what the
1324 	 *  controller supports.
1325 	 */
1326 	ctrlr->max_xfer_size = nvme_transport_ctrlr_get_max_xfer_size(ctrlr);
1327 	SPDK_DEBUGLOG(SPDK_LOG_NVME, "transport max_xfer_size %u\n", ctrlr->max_xfer_size);
1328 	if (ctrlr->cdata.mdts > 0) {
1329 		ctrlr->max_xfer_size = spdk_min(ctrlr->max_xfer_size,
1330 						ctrlr->min_page_size * (1 << (ctrlr->cdata.mdts)));
1331 		SPDK_DEBUGLOG(SPDK_LOG_NVME, "MDTS max_xfer_size %u\n", ctrlr->max_xfer_size);
1332 	}
1333 
1334 	SPDK_DEBUGLOG(SPDK_LOG_NVME, "CNTLID 0x%04" PRIx16 "\n", ctrlr->cdata.cntlid);
1335 	if (ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
1336 		ctrlr->cntlid = ctrlr->cdata.cntlid;
1337 	} else {
1338 		/*
1339 		 * Fabrics controllers should already have CNTLID from the Connect command.
1340 		 *
1341 		 * If CNTLID from Connect doesn't match CNTLID in the Identify Controller data,
1342 		 * trust the one from Connect.
1343 		 */
1344 		if (ctrlr->cntlid != ctrlr->cdata.cntlid) {
1345 			SPDK_DEBUGLOG(SPDK_LOG_NVME,
1346 				      "Identify CNTLID 0x%04" PRIx16 " != Connect CNTLID 0x%04" PRIx16 "\n",
1347 				      ctrlr->cdata.cntlid, ctrlr->cntlid);
1348 		}
1349 	}
1350 
1351 	if (ctrlr->cdata.sgls.supported) {
1352 		assert(ctrlr->cdata.sgls.supported != 0x3);
1353 		ctrlr->flags |= SPDK_NVME_CTRLR_SGL_SUPPORTED;
1354 		if (ctrlr->cdata.sgls.supported == 0x2) {
1355 			ctrlr->flags |= SPDK_NVME_CTRLR_SGL_REQUIRES_DWORD_ALIGNMENT;
1356 		}
1357 		/*
1358 		 * Use MSDBD to ensure our max_sges doesn't exceed what the
1359 		 *  controller supports.
1360 		 */
1361 		ctrlr->max_sges = nvme_transport_ctrlr_get_max_sges(ctrlr);
1362 		if (ctrlr->cdata.nvmf_specific.msdbd != 0) {
1363 			ctrlr->max_sges = spdk_min(ctrlr->cdata.nvmf_specific.msdbd, ctrlr->max_sges);
1364 		} else {
1365 			/* A value 0 indicates no limit. */
1366 		}
1367 		SPDK_DEBUGLOG(SPDK_LOG_NVME, "transport max_sges %u\n", ctrlr->max_sges);
1368 	}
1369 
1370 	if (ctrlr->cdata.oacs.security && !(ctrlr->quirks & NVME_QUIRK_OACS_SECURITY)) {
1371 		ctrlr->flags |= SPDK_NVME_CTRLR_SECURITY_SEND_RECV_SUPPORTED;
1372 	}
1373 
1374 	SPDK_DEBUGLOG(SPDK_LOG_NVME, "fuses compare and write: %d\n", ctrlr->cdata.fuses.compare_and_write);
1375 	if (ctrlr->cdata.fuses.compare_and_write) {
1376 		ctrlr->flags |= SPDK_NVME_CTRLR_COMPARE_AND_WRITE_SUPPORTED;
1377 	}
1378 
1379 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_NUM_QUEUES,
1380 			     ctrlr->opts.admin_timeout_ms);
1381 }
1382 
1383 static int
1384 nvme_ctrlr_identify(struct spdk_nvme_ctrlr *ctrlr)
1385 {
1386 	int	rc;
1387 
1388 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY,
1389 			     ctrlr->opts.admin_timeout_ms);
1390 
1391 	rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_CTRLR, 0, 0,
1392 				     &ctrlr->cdata, sizeof(ctrlr->cdata),
1393 				     nvme_ctrlr_identify_done, ctrlr);
1394 	if (rc != 0) {
1395 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
1396 		return rc;
1397 	}
1398 
1399 	return 0;
1400 }
1401 
1402 enum nvme_active_ns_state {
1403 	NVME_ACTIVE_NS_STATE_IDLE,
1404 	NVME_ACTIVE_NS_STATE_PROCESSING,
1405 	NVME_ACTIVE_NS_STATE_DONE,
1406 	NVME_ACTIVE_NS_STATE_ERROR
1407 };
1408 
1409 typedef void (*nvme_active_ns_ctx_deleter)(struct nvme_active_ns_ctx *);
1410 
1411 struct nvme_active_ns_ctx {
1412 	struct spdk_nvme_ctrlr *ctrlr;
1413 	uint32_t page;
1414 	uint32_t num_pages;
1415 	uint32_t next_nsid;
1416 	uint32_t *new_ns_list;
1417 	nvme_active_ns_ctx_deleter deleter;
1418 
1419 	enum nvme_active_ns_state state;
1420 };
1421 
1422 static struct nvme_active_ns_ctx *
1423 nvme_active_ns_ctx_create(struct spdk_nvme_ctrlr *ctrlr, nvme_active_ns_ctx_deleter deleter)
1424 {
1425 	struct nvme_active_ns_ctx *ctx;
1426 	uint32_t num_pages = 0;
1427 	uint32_t *new_ns_list = NULL;
1428 
1429 	ctx = calloc(1, sizeof(*ctx));
1430 	if (!ctx) {
1431 		SPDK_ERRLOG("Failed to allocate nvme_active_ns_ctx!\n");
1432 		return NULL;
1433 	}
1434 
1435 	if (ctrlr->num_ns) {
1436 		/* The allocated size must be a multiple of sizeof(struct spdk_nvme_ns_list) */
1437 		num_pages = (ctrlr->num_ns * sizeof(new_ns_list[0]) - 1) / sizeof(struct spdk_nvme_ns_list) + 1;
1438 		new_ns_list = spdk_zmalloc(num_pages * sizeof(struct spdk_nvme_ns_list), ctrlr->page_size,
1439 					   NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA | SPDK_MALLOC_SHARE);
1440 		if (!new_ns_list) {
1441 			SPDK_ERRLOG("Failed to allocate active_ns_list!\n");
1442 			free(ctx);
1443 			return NULL;
1444 		}
1445 	}
1446 
1447 	ctx->num_pages = num_pages;
1448 	ctx->new_ns_list = new_ns_list;
1449 	ctx->ctrlr = ctrlr;
1450 	ctx->deleter = deleter;
1451 
1452 	return ctx;
1453 }
1454 
1455 static void
1456 nvme_active_ns_ctx_destroy(struct nvme_active_ns_ctx *ctx)
1457 {
1458 	spdk_free(ctx->new_ns_list);
1459 	free(ctx);
1460 }
1461 
1462 static void
1463 nvme_ctrlr_identify_active_ns_swap(struct spdk_nvme_ctrlr *ctrlr, uint32_t **new_ns_list)
1464 {
1465 	spdk_free(ctrlr->active_ns_list);
1466 	ctrlr->active_ns_list = *new_ns_list;
1467 	*new_ns_list = NULL;
1468 }
1469 
1470 static void
1471 nvme_ctrlr_identify_active_ns_async_done(void *arg, const struct spdk_nvme_cpl *cpl)
1472 {
1473 	struct nvme_active_ns_ctx *ctx = arg;
1474 
1475 	if (spdk_nvme_cpl_is_error(cpl)) {
1476 		ctx->state = NVME_ACTIVE_NS_STATE_ERROR;
1477 		goto out;
1478 	}
1479 
1480 	ctx->next_nsid = ctx->new_ns_list[1024 * ctx->page + 1023];
1481 	if (ctx->next_nsid == 0 || ++ctx->page == ctx->num_pages) {
1482 		ctx->state = NVME_ACTIVE_NS_STATE_DONE;
1483 		goto out;
1484 	}
1485 
1486 	nvme_ctrlr_identify_active_ns_async(ctx);
1487 	return;
1488 
1489 out:
1490 	if (ctx->deleter) {
1491 		ctx->deleter(ctx);
1492 	}
1493 }
1494 
1495 static void
1496 nvme_ctrlr_identify_active_ns_async(struct nvme_active_ns_ctx *ctx)
1497 {
1498 	struct spdk_nvme_ctrlr *ctrlr = ctx->ctrlr;
1499 	uint32_t i;
1500 	int rc;
1501 
1502 	if (ctrlr->num_ns == 0) {
1503 		ctx->state = NVME_ACTIVE_NS_STATE_DONE;
1504 		goto out;
1505 	}
1506 
1507 	/*
1508 	 * If controller doesn't support active ns list CNS 0x02 dummy up
1509 	 * an active ns list, i.e. all namespaces report as active
1510 	 */
1511 	if (ctrlr->vs.raw < SPDK_NVME_VERSION(1, 1, 0) || ctrlr->quirks & NVME_QUIRK_IDENTIFY_CNS) {
1512 		for (i = 0; i < ctrlr->num_ns; i++) {
1513 			ctx->new_ns_list[i] = i + 1;
1514 		}
1515 
1516 		ctx->state = NVME_ACTIVE_NS_STATE_DONE;
1517 		goto out;
1518 	}
1519 
1520 	ctx->state = NVME_ACTIVE_NS_STATE_PROCESSING;
1521 	rc = nvme_ctrlr_cmd_identify(ctrlr, SPDK_NVME_IDENTIFY_ACTIVE_NS_LIST, 0, ctx->next_nsid,
1522 				     &ctx->new_ns_list[1024 * ctx->page], sizeof(struct spdk_nvme_ns_list),
1523 				     nvme_ctrlr_identify_active_ns_async_done, ctx);
1524 	if (rc != 0) {
1525 		ctx->state = NVME_ACTIVE_NS_STATE_ERROR;
1526 		goto out;
1527 	}
1528 
1529 	return;
1530 
1531 out:
1532 	if (ctx->deleter) {
1533 		ctx->deleter(ctx);
1534 	}
1535 }
1536 
1537 static void
1538 _nvme_active_ns_ctx_deleter(struct nvme_active_ns_ctx *ctx)
1539 {
1540 	struct spdk_nvme_ctrlr *ctrlr = ctx->ctrlr;
1541 
1542 	if (ctx->state == NVME_ACTIVE_NS_STATE_ERROR) {
1543 		nvme_ctrlr_destruct_namespaces(ctrlr);
1544 		nvme_active_ns_ctx_destroy(ctx);
1545 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
1546 		return;
1547 	}
1548 
1549 	assert(ctx->state == NVME_ACTIVE_NS_STATE_DONE);
1550 	nvme_ctrlr_identify_active_ns_swap(ctrlr, &ctx->new_ns_list);
1551 	nvme_active_ns_ctx_destroy(ctx);
1552 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_NS, ctrlr->opts.admin_timeout_ms);
1553 }
1554 
1555 static void
1556 _nvme_ctrlr_identify_active_ns(struct spdk_nvme_ctrlr *ctrlr)
1557 {
1558 	struct nvme_active_ns_ctx *ctx;
1559 
1560 	ctx = nvme_active_ns_ctx_create(ctrlr, _nvme_active_ns_ctx_deleter);
1561 	if (!ctx) {
1562 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
1563 		return;
1564 	}
1565 
1566 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ACTIVE_NS,
1567 			     ctrlr->opts.admin_timeout_ms);
1568 	nvme_ctrlr_identify_active_ns_async(ctx);
1569 }
1570 
1571 int
1572 nvme_ctrlr_identify_active_ns(struct spdk_nvme_ctrlr *ctrlr)
1573 {
1574 	struct nvme_active_ns_ctx *ctx;
1575 	int rc;
1576 
1577 	ctx = nvme_active_ns_ctx_create(ctrlr, NULL);
1578 	if (!ctx) {
1579 		return -ENOMEM;
1580 	}
1581 
1582 	nvme_ctrlr_identify_active_ns_async(ctx);
1583 	while (ctx->state == NVME_ACTIVE_NS_STATE_PROCESSING) {
1584 		rc = spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
1585 		if (rc < 0) {
1586 			ctx->state = NVME_ACTIVE_NS_STATE_ERROR;
1587 			break;
1588 		}
1589 	}
1590 
1591 	if (ctx->state == NVME_ACTIVE_NS_STATE_ERROR) {
1592 		nvme_active_ns_ctx_destroy(ctx);
1593 		return -ENXIO;
1594 	}
1595 
1596 	assert(ctx->state == NVME_ACTIVE_NS_STATE_DONE);
1597 	nvme_ctrlr_identify_active_ns_swap(ctrlr, &ctx->new_ns_list);
1598 	nvme_active_ns_ctx_destroy(ctx);
1599 
1600 	return 0;
1601 }
1602 
1603 static void
1604 nvme_ctrlr_identify_ns_async_done(void *arg, const struct spdk_nvme_cpl *cpl)
1605 {
1606 	struct spdk_nvme_ns *ns = (struct spdk_nvme_ns *)arg;
1607 	struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr;
1608 	uint32_t nsid;
1609 	int rc;
1610 
1611 	if (spdk_nvme_cpl_is_error(cpl)) {
1612 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
1613 		return;
1614 	} else {
1615 		nvme_ns_set_identify_data(ns);
1616 	}
1617 
1618 	/* move on to the next active NS */
1619 	nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, ns->id);
1620 	ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
1621 	if (ns == NULL) {
1622 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_ID_DESCS,
1623 				     ctrlr->opts.admin_timeout_ms);
1624 		return;
1625 	}
1626 	ns->ctrlr = ctrlr;
1627 	ns->id = nsid;
1628 
1629 	rc = nvme_ctrlr_identify_ns_async(ns);
1630 	if (rc) {
1631 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
1632 	}
1633 }
1634 
1635 static int
1636 nvme_ctrlr_identify_ns_async(struct spdk_nvme_ns *ns)
1637 {
1638 	struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr;
1639 	struct spdk_nvme_ns_data *nsdata;
1640 
1641 	nsdata = &ctrlr->nsdata[ns->id - 1];
1642 
1643 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS,
1644 			     ctrlr->opts.admin_timeout_ms);
1645 	return nvme_ctrlr_cmd_identify(ns->ctrlr, SPDK_NVME_IDENTIFY_NS, 0, ns->id,
1646 				       nsdata, sizeof(*nsdata),
1647 				       nvme_ctrlr_identify_ns_async_done, ns);
1648 }
1649 
1650 static int
1651 nvme_ctrlr_identify_namespaces(struct spdk_nvme_ctrlr *ctrlr)
1652 {
1653 	uint32_t nsid;
1654 	struct spdk_nvme_ns *ns;
1655 	int rc;
1656 
1657 	nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr);
1658 	ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
1659 	if (ns == NULL) {
1660 		/* No active NS, move on to the next state */
1661 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CONFIGURE_AER,
1662 				     ctrlr->opts.admin_timeout_ms);
1663 		return 0;
1664 	}
1665 
1666 	ns->ctrlr = ctrlr;
1667 	ns->id = nsid;
1668 
1669 	rc = nvme_ctrlr_identify_ns_async(ns);
1670 	if (rc) {
1671 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
1672 	}
1673 
1674 	return rc;
1675 }
1676 
1677 static void
1678 nvme_ctrlr_identify_id_desc_async_done(void *arg, const struct spdk_nvme_cpl *cpl)
1679 {
1680 	struct spdk_nvme_ns *ns = (struct spdk_nvme_ns *)arg;
1681 	struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr;
1682 	uint32_t nsid;
1683 	int rc;
1684 
1685 	if (spdk_nvme_cpl_is_error(cpl)) {
1686 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CONFIGURE_AER,
1687 				     ctrlr->opts.admin_timeout_ms);
1688 		return;
1689 	}
1690 
1691 	/* move on to the next active NS */
1692 	nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, ns->id);
1693 	ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
1694 	if (ns == NULL) {
1695 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CONFIGURE_AER,
1696 				     ctrlr->opts.admin_timeout_ms);
1697 		return;
1698 	}
1699 
1700 	rc = nvme_ctrlr_identify_id_desc_async(ns);
1701 	if (rc) {
1702 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
1703 	}
1704 }
1705 
1706 static int
1707 nvme_ctrlr_identify_id_desc_async(struct spdk_nvme_ns *ns)
1708 {
1709 	struct spdk_nvme_ctrlr *ctrlr = ns->ctrlr;
1710 
1711 	memset(ns->id_desc_list, 0, sizeof(ns->id_desc_list));
1712 
1713 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ID_DESCS,
1714 			     ctrlr->opts.admin_timeout_ms);
1715 	return nvme_ctrlr_cmd_identify(ns->ctrlr, SPDK_NVME_IDENTIFY_NS_ID_DESCRIPTOR_LIST,
1716 				       0, ns->id, ns->id_desc_list, sizeof(ns->id_desc_list),
1717 				       nvme_ctrlr_identify_id_desc_async_done, ns);
1718 }
1719 
1720 static int
1721 nvme_ctrlr_identify_id_desc_namespaces(struct spdk_nvme_ctrlr *ctrlr)
1722 {
1723 	uint32_t nsid;
1724 	struct spdk_nvme_ns *ns;
1725 	int rc;
1726 
1727 	if (ctrlr->vs.raw < SPDK_NVME_VERSION(1, 3, 0) ||
1728 	    (ctrlr->quirks & NVME_QUIRK_IDENTIFY_CNS)) {
1729 		SPDK_DEBUGLOG(SPDK_LOG_NVME, "Version < 1.3; not attempting to retrieve NS ID Descriptor List\n");
1730 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CONFIGURE_AER,
1731 				     ctrlr->opts.admin_timeout_ms);
1732 		return 0;
1733 	}
1734 
1735 	nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr);
1736 	ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
1737 	if (ns == NULL) {
1738 		/* No active NS, move on to the next state */
1739 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CONFIGURE_AER,
1740 				     ctrlr->opts.admin_timeout_ms);
1741 		return 0;
1742 	}
1743 
1744 	rc = nvme_ctrlr_identify_id_desc_async(ns);
1745 	if (rc) {
1746 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
1747 	}
1748 
1749 	return rc;
1750 }
1751 
1752 static void
1753 nvme_ctrlr_update_nvmf_ioccsz(struct spdk_nvme_ctrlr *ctrlr)
1754 {
1755 	if (ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_RDMA ||
1756 	    ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_TCP ||
1757 	    ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_FC) {
1758 		if (ctrlr->cdata.nvmf_specific.ioccsz < 4) {
1759 			SPDK_ERRLOG("Incorrect IOCCSZ %u, the minimum value should be 4\n",
1760 				    ctrlr->cdata.nvmf_specific.ioccsz);
1761 			ctrlr->cdata.nvmf_specific.ioccsz = 4;
1762 			assert(0);
1763 		}
1764 		ctrlr->ioccsz_bytes = ctrlr->cdata.nvmf_specific.ioccsz * 16 - sizeof(struct spdk_nvme_cmd);
1765 		ctrlr->icdoff = ctrlr->cdata.nvmf_specific.icdoff;
1766 	}
1767 }
1768 
1769 static void
1770 nvme_ctrlr_set_num_queues_done(void *arg, const struct spdk_nvme_cpl *cpl)
1771 {
1772 	uint32_t cq_allocated, sq_allocated, min_allocated, i;
1773 	struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg;
1774 
1775 	if (spdk_nvme_cpl_is_error(cpl)) {
1776 		SPDK_ERRLOG("Set Features - Number of Queues failed!\n");
1777 		ctrlr->opts.num_io_queues = 0;
1778 	} else {
1779 		/*
1780 		 * Data in cdw0 is 0-based.
1781 		 * Lower 16-bits indicate number of submission queues allocated.
1782 		 * Upper 16-bits indicate number of completion queues allocated.
1783 		 */
1784 		sq_allocated = (cpl->cdw0 & 0xFFFF) + 1;
1785 		cq_allocated = (cpl->cdw0 >> 16) + 1;
1786 
1787 		/*
1788 		 * For 1:1 queue mapping, set number of allocated queues to be minimum of
1789 		 * submission and completion queues.
1790 		 */
1791 		min_allocated = spdk_min(sq_allocated, cq_allocated);
1792 
1793 		/* Set number of queues to be minimum of requested and actually allocated. */
1794 		ctrlr->opts.num_io_queues = spdk_min(min_allocated, ctrlr->opts.num_io_queues);
1795 	}
1796 
1797 	ctrlr->free_io_qids = spdk_bit_array_create(ctrlr->opts.num_io_queues + 1);
1798 	if (ctrlr->free_io_qids == NULL) {
1799 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
1800 		return;
1801 	}
1802 
1803 	/* Initialize list of free I/O queue IDs. QID 0 is the admin queue. */
1804 	spdk_bit_array_clear(ctrlr->free_io_qids, 0);
1805 	for (i = 1; i <= ctrlr->opts.num_io_queues; i++) {
1806 		spdk_bit_array_set(ctrlr->free_io_qids, i);
1807 	}
1808 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_CONSTRUCT_NS,
1809 			     ctrlr->opts.admin_timeout_ms);
1810 }
1811 
1812 static int
1813 nvme_ctrlr_set_num_queues(struct spdk_nvme_ctrlr *ctrlr)
1814 {
1815 	int rc;
1816 
1817 	if (ctrlr->opts.num_io_queues > SPDK_NVME_MAX_IO_QUEUES) {
1818 		SPDK_NOTICELOG("Limiting requested num_io_queues %u to max %d\n",
1819 			       ctrlr->opts.num_io_queues, SPDK_NVME_MAX_IO_QUEUES);
1820 		ctrlr->opts.num_io_queues = SPDK_NVME_MAX_IO_QUEUES;
1821 	} else if (ctrlr->opts.num_io_queues < 1) {
1822 		SPDK_NOTICELOG("Requested num_io_queues 0, increasing to 1\n");
1823 		ctrlr->opts.num_io_queues = 1;
1824 	}
1825 
1826 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_SET_NUM_QUEUES,
1827 			     ctrlr->opts.admin_timeout_ms);
1828 
1829 	rc = nvme_ctrlr_cmd_set_num_queues(ctrlr, ctrlr->opts.num_io_queues,
1830 					   nvme_ctrlr_set_num_queues_done, ctrlr);
1831 	if (rc != 0) {
1832 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
1833 		return rc;
1834 	}
1835 
1836 	return 0;
1837 }
1838 
1839 static void
1840 nvme_ctrlr_set_keep_alive_timeout_done(void *arg, const struct spdk_nvme_cpl *cpl)
1841 {
1842 	uint32_t keep_alive_interval_ms;
1843 	struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg;
1844 
1845 	if (spdk_nvme_cpl_is_error(cpl)) {
1846 		if ((cpl->status.sct == SPDK_NVME_SCT_GENERIC) &&
1847 		    (cpl->status.sc == SPDK_NVME_SC_INVALID_FIELD)) {
1848 			SPDK_DEBUGLOG(SPDK_LOG_NVME, "Keep alive timeout Get Feature is not supported\n");
1849 		} else {
1850 			SPDK_ERRLOG("Keep alive timeout Get Feature failed: SC %x SCT %x\n",
1851 				    cpl->status.sc, cpl->status.sct);
1852 			ctrlr->opts.keep_alive_timeout_ms = 0;
1853 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
1854 			return;
1855 		}
1856 	} else {
1857 		if (ctrlr->opts.keep_alive_timeout_ms != cpl->cdw0) {
1858 			SPDK_DEBUGLOG(SPDK_LOG_NVME, "Controller adjusted keep alive timeout to %u ms\n",
1859 				      cpl->cdw0);
1860 		}
1861 
1862 		ctrlr->opts.keep_alive_timeout_ms = cpl->cdw0;
1863 	}
1864 
1865 	keep_alive_interval_ms = ctrlr->opts.keep_alive_timeout_ms / 2;
1866 	if (keep_alive_interval_ms == 0) {
1867 		keep_alive_interval_ms = 1;
1868 	}
1869 	SPDK_DEBUGLOG(SPDK_LOG_NVME, "Sending keep alive every %u ms\n", keep_alive_interval_ms);
1870 
1871 	ctrlr->keep_alive_interval_ticks = (keep_alive_interval_ms * spdk_get_ticks_hz()) / UINT64_C(1000);
1872 
1873 	/* Schedule the first Keep Alive to be sent as soon as possible. */
1874 	ctrlr->next_keep_alive_tick = spdk_get_ticks();
1875 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_HOST_ID,
1876 			     ctrlr->opts.admin_timeout_ms);
1877 }
1878 
1879 static int
1880 nvme_ctrlr_set_keep_alive_timeout(struct spdk_nvme_ctrlr *ctrlr)
1881 {
1882 	int rc;
1883 
1884 	if (ctrlr->opts.keep_alive_timeout_ms == 0) {
1885 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_HOST_ID,
1886 				     ctrlr->opts.admin_timeout_ms);
1887 		return 0;
1888 	}
1889 
1890 	if (ctrlr->cdata.kas == 0) {
1891 		SPDK_DEBUGLOG(SPDK_LOG_NVME, "Controller KAS is 0 - not enabling Keep Alive\n");
1892 		ctrlr->opts.keep_alive_timeout_ms = 0;
1893 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_HOST_ID,
1894 				     ctrlr->opts.admin_timeout_ms);
1895 		return 0;
1896 	}
1897 
1898 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_KEEP_ALIVE_TIMEOUT,
1899 			     ctrlr->opts.admin_timeout_ms);
1900 
1901 	/* Retrieve actual keep alive timeout, since the controller may have adjusted it. */
1902 	rc = spdk_nvme_ctrlr_cmd_get_feature(ctrlr, SPDK_NVME_FEAT_KEEP_ALIVE_TIMER, 0, NULL, 0,
1903 					     nvme_ctrlr_set_keep_alive_timeout_done, ctrlr);
1904 	if (rc != 0) {
1905 		SPDK_ERRLOG("Keep alive timeout Get Feature failed: %d\n", rc);
1906 		ctrlr->opts.keep_alive_timeout_ms = 0;
1907 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
1908 		return rc;
1909 	}
1910 
1911 	return 0;
1912 }
1913 
1914 static void
1915 nvme_ctrlr_set_host_id_done(void *arg, const struct spdk_nvme_cpl *cpl)
1916 {
1917 	struct spdk_nvme_ctrlr *ctrlr = (struct spdk_nvme_ctrlr *)arg;
1918 
1919 	if (spdk_nvme_cpl_is_error(cpl)) {
1920 		/*
1921 		 * Treat Set Features - Host ID failure as non-fatal, since the Host ID feature
1922 		 * is optional.
1923 		 */
1924 		SPDK_WARNLOG("Set Features - Host ID failed: SC 0x%x SCT 0x%x\n",
1925 			     cpl->status.sc, cpl->status.sct);
1926 	} else {
1927 		SPDK_DEBUGLOG(SPDK_LOG_NVME, "Set Features - Host ID was successful\n");
1928 	}
1929 
1930 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE);
1931 }
1932 
1933 static int
1934 nvme_ctrlr_set_host_id(struct spdk_nvme_ctrlr *ctrlr)
1935 {
1936 	uint8_t *host_id;
1937 	uint32_t host_id_size;
1938 	int rc;
1939 
1940 	if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) {
1941 		/*
1942 		 * NVMe-oF sends the host ID during Connect and doesn't allow
1943 		 * Set Features - Host Identifier after Connect, so we don't need to do anything here.
1944 		 */
1945 		SPDK_DEBUGLOG(SPDK_LOG_NVME, "NVMe-oF transport - not sending Set Features - Host ID\n");
1946 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE);
1947 		return 0;
1948 	}
1949 
1950 	if (ctrlr->cdata.ctratt.host_id_exhid_supported) {
1951 		SPDK_DEBUGLOG(SPDK_LOG_NVME, "Using 128-bit extended host identifier\n");
1952 		host_id = ctrlr->opts.extended_host_id;
1953 		host_id_size = sizeof(ctrlr->opts.extended_host_id);
1954 	} else {
1955 		SPDK_DEBUGLOG(SPDK_LOG_NVME, "Using 64-bit host identifier\n");
1956 		host_id = ctrlr->opts.host_id;
1957 		host_id_size = sizeof(ctrlr->opts.host_id);
1958 	}
1959 
1960 	/* If the user specified an all-zeroes host identifier, don't send the command. */
1961 	if (spdk_mem_all_zero(host_id, host_id_size)) {
1962 		SPDK_DEBUGLOG(SPDK_LOG_NVME,
1963 			      "User did not specify host ID - not sending Set Features - Host ID\n");
1964 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE);
1965 		return 0;
1966 	}
1967 
1968 	SPDK_LOGDUMP(SPDK_LOG_NVME, "host_id", host_id, host_id_size);
1969 
1970 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_HOST_ID,
1971 			     ctrlr->opts.admin_timeout_ms);
1972 
1973 	rc = nvme_ctrlr_cmd_set_host_id(ctrlr, host_id, host_id_size, nvme_ctrlr_set_host_id_done, ctrlr);
1974 	if (rc != 0) {
1975 		SPDK_ERRLOG("Set Features - Host ID failed: %d\n", rc);
1976 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
1977 		return rc;
1978 	}
1979 
1980 	return 0;
1981 }
1982 
1983 static void
1984 nvme_ctrlr_destruct_namespaces(struct spdk_nvme_ctrlr *ctrlr)
1985 {
1986 	if (ctrlr->ns) {
1987 		uint32_t i, num_ns = ctrlr->num_ns;
1988 
1989 		for (i = 0; i < num_ns; i++) {
1990 			nvme_ns_destruct(&ctrlr->ns[i]);
1991 		}
1992 
1993 		spdk_free(ctrlr->ns);
1994 		ctrlr->ns = NULL;
1995 		ctrlr->num_ns = 0;
1996 	}
1997 
1998 	if (ctrlr->nsdata) {
1999 		spdk_free(ctrlr->nsdata);
2000 		ctrlr->nsdata = NULL;
2001 	}
2002 
2003 	spdk_free(ctrlr->active_ns_list);
2004 	ctrlr->active_ns_list = NULL;
2005 }
2006 
2007 static void
2008 nvme_ctrlr_update_namespaces(struct spdk_nvme_ctrlr *ctrlr)
2009 {
2010 	uint32_t i, nn = ctrlr->cdata.nn;
2011 	struct spdk_nvme_ns_data *nsdata;
2012 	bool ns_is_active;
2013 
2014 	for (i = 0; i < nn; i++) {
2015 		struct spdk_nvme_ns	*ns = &ctrlr->ns[i];
2016 		uint32_t		nsid = i + 1;
2017 
2018 		nsdata = &ctrlr->nsdata[nsid - 1];
2019 		ns_is_active = spdk_nvme_ctrlr_is_active_ns(ctrlr, nsid);
2020 
2021 		if (nsdata->ncap && ns_is_active) {
2022 			if (nvme_ns_update(ns) != 0) {
2023 				SPDK_ERRLOG("Failed to update active NS %u\n", nsid);
2024 				continue;
2025 			}
2026 		}
2027 
2028 		if ((nsdata->ncap == 0) && ns_is_active) {
2029 			if (nvme_ns_construct(ns, nsid, ctrlr) != 0) {
2030 				continue;
2031 			}
2032 		}
2033 
2034 		if (nsdata->ncap && !ns_is_active) {
2035 			nvme_ns_destruct(ns);
2036 		}
2037 	}
2038 }
2039 
2040 static int
2041 nvme_ctrlr_construct_namespaces(struct spdk_nvme_ctrlr *ctrlr)
2042 {
2043 	int rc = 0;
2044 	uint32_t nn = ctrlr->cdata.nn;
2045 
2046 	/* ctrlr->num_ns may be 0 (startup) or a different number of namespaces (reset),
2047 	 * so check if we need to reallocate.
2048 	 */
2049 	if (nn != ctrlr->num_ns) {
2050 		nvme_ctrlr_destruct_namespaces(ctrlr);
2051 
2052 		if (nn == 0) {
2053 			SPDK_WARNLOG("controller has 0 namespaces\n");
2054 			return 0;
2055 		}
2056 
2057 		ctrlr->ns = spdk_zmalloc(nn * sizeof(struct spdk_nvme_ns), 64, NULL,
2058 					 SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE);
2059 		if (ctrlr->ns == NULL) {
2060 			rc = -ENOMEM;
2061 			goto fail;
2062 		}
2063 
2064 		ctrlr->nsdata = spdk_zmalloc(nn * sizeof(struct spdk_nvme_ns_data), 64,
2065 					     NULL, SPDK_ENV_SOCKET_ID_ANY,
2066 					     SPDK_MALLOC_SHARE | SPDK_MALLOC_DMA);
2067 		if (ctrlr->nsdata == NULL) {
2068 			rc = -ENOMEM;
2069 			goto fail;
2070 		}
2071 
2072 		ctrlr->num_ns = nn;
2073 	}
2074 
2075 	return 0;
2076 
2077 fail:
2078 	nvme_ctrlr_destruct_namespaces(ctrlr);
2079 	return rc;
2080 }
2081 
2082 static void
2083 nvme_ctrlr_async_event_cb(void *arg, const struct spdk_nvme_cpl *cpl)
2084 {
2085 	struct nvme_async_event_request	*aer = arg;
2086 	struct spdk_nvme_ctrlr		*ctrlr = aer->ctrlr;
2087 	struct spdk_nvme_ctrlr_process	*active_proc;
2088 	union spdk_nvme_async_event_completion	event;
2089 	int					rc;
2090 
2091 	if (cpl->status.sct == SPDK_NVME_SCT_GENERIC &&
2092 	    cpl->status.sc == SPDK_NVME_SC_ABORTED_SQ_DELETION) {
2093 		/*
2094 		 *  This is simulated when controller is being shut down, to
2095 		 *  effectively abort outstanding asynchronous event requests
2096 		 *  and make sure all memory is freed.  Do not repost the
2097 		 *  request in this case.
2098 		 */
2099 		return;
2100 	}
2101 
2102 	if (cpl->status.sct == SPDK_NVME_SCT_COMMAND_SPECIFIC &&
2103 	    cpl->status.sc == SPDK_NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED) {
2104 		/*
2105 		 *  SPDK will only send as many AERs as the device says it supports,
2106 		 *  so this status code indicates an out-of-spec device.  Do not repost
2107 		 *  the request in this case.
2108 		 */
2109 		SPDK_ERRLOG("Controller appears out-of-spec for asynchronous event request\n"
2110 			    "handling.  Do not repost this AER.\n");
2111 		return;
2112 	}
2113 
2114 	event.raw = cpl->cdw0;
2115 	if ((event.bits.async_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_NOTICE) &&
2116 	    (event.bits.async_event_info == SPDK_NVME_ASYNC_EVENT_NS_ATTR_CHANGED)) {
2117 		rc = nvme_ctrlr_identify_active_ns(ctrlr);
2118 		if (rc) {
2119 			return;
2120 		}
2121 		nvme_ctrlr_update_namespaces(ctrlr);
2122 		nvme_io_msg_ctrlr_update(ctrlr);
2123 	}
2124 
2125 	active_proc = nvme_ctrlr_get_current_process(ctrlr);
2126 	if (active_proc && active_proc->aer_cb_fn) {
2127 		active_proc->aer_cb_fn(active_proc->aer_cb_arg, cpl);
2128 	}
2129 
2130 	/* If the ctrlr was removed or in the destruct state, we should not send aer again */
2131 	if (ctrlr->is_removed || ctrlr->is_destructed) {
2132 		return;
2133 	}
2134 
2135 	/*
2136 	 * Repost another asynchronous event request to replace the one
2137 	 *  that just completed.
2138 	 */
2139 	if (nvme_ctrlr_construct_and_submit_aer(ctrlr, aer)) {
2140 		/*
2141 		 * We can't do anything to recover from a failure here,
2142 		 * so just print a warning message and leave the AER unsubmitted.
2143 		 */
2144 		SPDK_ERRLOG("resubmitting AER failed!\n");
2145 	}
2146 }
2147 
2148 static int
2149 nvme_ctrlr_construct_and_submit_aer(struct spdk_nvme_ctrlr *ctrlr,
2150 				    struct nvme_async_event_request *aer)
2151 {
2152 	struct nvme_request *req;
2153 
2154 	aer->ctrlr = ctrlr;
2155 	req = nvme_allocate_request_null(ctrlr->adminq, nvme_ctrlr_async_event_cb, aer);
2156 	aer->req = req;
2157 	if (req == NULL) {
2158 		return -1;
2159 	}
2160 
2161 	req->cmd.opc = SPDK_NVME_OPC_ASYNC_EVENT_REQUEST;
2162 	return nvme_ctrlr_submit_admin_request(ctrlr, req);
2163 }
2164 
2165 static void
2166 nvme_ctrlr_configure_aer_done(void *arg, const struct spdk_nvme_cpl *cpl)
2167 {
2168 	struct nvme_async_event_request		*aer;
2169 	int					rc;
2170 	uint32_t				i;
2171 	struct spdk_nvme_ctrlr *ctrlr =	(struct spdk_nvme_ctrlr *)arg;
2172 
2173 	if (spdk_nvme_cpl_is_error(cpl)) {
2174 		SPDK_NOTICELOG("nvme_ctrlr_configure_aer failed!\n");
2175 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES,
2176 				     ctrlr->opts.admin_timeout_ms);
2177 		return;
2178 	}
2179 
2180 	/* aerl is a zero-based value, so we need to add 1 here. */
2181 	ctrlr->num_aers = spdk_min(NVME_MAX_ASYNC_EVENTS, (ctrlr->cdata.aerl + 1));
2182 
2183 	for (i = 0; i < ctrlr->num_aers; i++) {
2184 		aer = &ctrlr->aer[i];
2185 		rc = nvme_ctrlr_construct_and_submit_aer(ctrlr, aer);
2186 		if (rc) {
2187 			SPDK_ERRLOG("nvme_ctrlr_construct_and_submit_aer failed!\n");
2188 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
2189 			return;
2190 		}
2191 	}
2192 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES,
2193 			     ctrlr->opts.admin_timeout_ms);
2194 }
2195 
2196 static int
2197 nvme_ctrlr_configure_aer(struct spdk_nvme_ctrlr *ctrlr)
2198 {
2199 	union spdk_nvme_feat_async_event_configuration	config;
2200 	int						rc;
2201 
2202 	config.raw = 0;
2203 	config.bits.crit_warn.bits.available_spare = 1;
2204 	config.bits.crit_warn.bits.temperature = 1;
2205 	config.bits.crit_warn.bits.device_reliability = 1;
2206 	config.bits.crit_warn.bits.read_only = 1;
2207 	config.bits.crit_warn.bits.volatile_memory_backup = 1;
2208 
2209 	if (ctrlr->vs.raw >= SPDK_NVME_VERSION(1, 2, 0)) {
2210 		if (ctrlr->cdata.oaes.ns_attribute_notices) {
2211 			config.bits.ns_attr_notice = 1;
2212 		}
2213 		if (ctrlr->cdata.oaes.fw_activation_notices) {
2214 			config.bits.fw_activation_notice = 1;
2215 		}
2216 	}
2217 	if (ctrlr->vs.raw >= SPDK_NVME_VERSION(1, 3, 0) && ctrlr->cdata.lpa.telemetry) {
2218 		config.bits.telemetry_log_notice = 1;
2219 	}
2220 
2221 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_WAIT_FOR_CONFIGURE_AER,
2222 			     ctrlr->opts.admin_timeout_ms);
2223 
2224 	rc = nvme_ctrlr_cmd_set_async_event_config(ctrlr, config,
2225 			nvme_ctrlr_configure_aer_done,
2226 			ctrlr);
2227 	if (rc != 0) {
2228 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ERROR, NVME_TIMEOUT_INFINITE);
2229 		return rc;
2230 	}
2231 
2232 	return 0;
2233 }
2234 
2235 struct spdk_nvme_ctrlr_process *
2236 nvme_ctrlr_get_process(struct spdk_nvme_ctrlr *ctrlr, pid_t pid)
2237 {
2238 	struct spdk_nvme_ctrlr_process	*active_proc;
2239 
2240 	TAILQ_FOREACH(active_proc, &ctrlr->active_procs, tailq) {
2241 		if (active_proc->pid == pid) {
2242 			return active_proc;
2243 		}
2244 	}
2245 
2246 	return NULL;
2247 }
2248 
2249 struct spdk_nvme_ctrlr_process *
2250 nvme_ctrlr_get_current_process(struct spdk_nvme_ctrlr *ctrlr)
2251 {
2252 	return nvme_ctrlr_get_process(ctrlr, getpid());
2253 }
2254 
2255 /**
2256  * This function will be called when a process is using the controller.
2257  *  1. For the primary process, it is called when constructing the controller.
2258  *  2. For the secondary process, it is called at probing the controller.
2259  * Note: will check whether the process is already added for the same process.
2260  */
2261 int
2262 nvme_ctrlr_add_process(struct spdk_nvme_ctrlr *ctrlr, void *devhandle)
2263 {
2264 	struct spdk_nvme_ctrlr_process	*ctrlr_proc;
2265 	pid_t				pid = getpid();
2266 
2267 	/* Check whether the process is already added or not */
2268 	if (nvme_ctrlr_get_process(ctrlr, pid)) {
2269 		return 0;
2270 	}
2271 
2272 	/* Initialize the per process properties for this ctrlr */
2273 	ctrlr_proc = spdk_zmalloc(sizeof(struct spdk_nvme_ctrlr_process),
2274 				  64, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_SHARE);
2275 	if (ctrlr_proc == NULL) {
2276 		SPDK_ERRLOG("failed to allocate memory to track the process props\n");
2277 
2278 		return -1;
2279 	}
2280 
2281 	ctrlr_proc->is_primary = spdk_process_is_primary();
2282 	ctrlr_proc->pid = pid;
2283 	STAILQ_INIT(&ctrlr_proc->active_reqs);
2284 	ctrlr_proc->devhandle = devhandle;
2285 	ctrlr_proc->ref = 0;
2286 	TAILQ_INIT(&ctrlr_proc->allocated_io_qpairs);
2287 
2288 	TAILQ_INSERT_TAIL(&ctrlr->active_procs, ctrlr_proc, tailq);
2289 
2290 	return 0;
2291 }
2292 
2293 /**
2294  * This function will be called when the process detaches the controller.
2295  * Note: the ctrlr_lock must be held when calling this function.
2296  */
2297 static void
2298 nvme_ctrlr_remove_process(struct spdk_nvme_ctrlr *ctrlr,
2299 			  struct spdk_nvme_ctrlr_process *proc)
2300 {
2301 	struct spdk_nvme_qpair	*qpair, *tmp_qpair;
2302 
2303 	assert(STAILQ_EMPTY(&proc->active_reqs));
2304 
2305 	TAILQ_FOREACH_SAFE(qpair, &proc->allocated_io_qpairs, per_process_tailq, tmp_qpair) {
2306 		spdk_nvme_ctrlr_free_io_qpair(qpair);
2307 	}
2308 
2309 	TAILQ_REMOVE(&ctrlr->active_procs, proc, tailq);
2310 
2311 	if (ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
2312 		spdk_pci_device_detach(proc->devhandle);
2313 	}
2314 
2315 	spdk_free(proc);
2316 }
2317 
2318 /**
2319  * This function will be called when the process exited unexpectedly
2320  *  in order to free any incomplete nvme request, allocated IO qpairs
2321  *  and allocated memory.
2322  * Note: the ctrlr_lock must be held when calling this function.
2323  */
2324 static void
2325 nvme_ctrlr_cleanup_process(struct spdk_nvme_ctrlr_process *proc)
2326 {
2327 	struct nvme_request	*req, *tmp_req;
2328 	struct spdk_nvme_qpair	*qpair, *tmp_qpair;
2329 
2330 	STAILQ_FOREACH_SAFE(req, &proc->active_reqs, stailq, tmp_req) {
2331 		STAILQ_REMOVE(&proc->active_reqs, req, nvme_request, stailq);
2332 
2333 		assert(req->pid == proc->pid);
2334 
2335 		nvme_free_request(req);
2336 	}
2337 
2338 	TAILQ_FOREACH_SAFE(qpair, &proc->allocated_io_qpairs, per_process_tailq, tmp_qpair) {
2339 		TAILQ_REMOVE(&proc->allocated_io_qpairs, qpair, per_process_tailq);
2340 
2341 		/*
2342 		 * The process may have been killed while some qpairs were in their
2343 		 *  completion context.  Clear that flag here to allow these IO
2344 		 *  qpairs to be deleted.
2345 		 */
2346 		qpair->in_completion_context = 0;
2347 
2348 		qpair->no_deletion_notification_needed = 1;
2349 
2350 		spdk_nvme_ctrlr_free_io_qpair(qpair);
2351 	}
2352 
2353 	spdk_free(proc);
2354 }
2355 
2356 /**
2357  * This function will be called when destructing the controller.
2358  *  1. There is no more admin request on this controller.
2359  *  2. Clean up any left resource allocation when its associated process is gone.
2360  */
2361 void
2362 nvme_ctrlr_free_processes(struct spdk_nvme_ctrlr *ctrlr)
2363 {
2364 	struct spdk_nvme_ctrlr_process	*active_proc, *tmp;
2365 
2366 	/* Free all the processes' properties and make sure no pending admin IOs */
2367 	TAILQ_FOREACH_SAFE(active_proc, &ctrlr->active_procs, tailq, tmp) {
2368 		TAILQ_REMOVE(&ctrlr->active_procs, active_proc, tailq);
2369 
2370 		assert(STAILQ_EMPTY(&active_proc->active_reqs));
2371 
2372 		spdk_free(active_proc);
2373 	}
2374 }
2375 
2376 /**
2377  * This function will be called when any other process attaches or
2378  *  detaches the controller in order to cleanup those unexpectedly
2379  *  terminated processes.
2380  * Note: the ctrlr_lock must be held when calling this function.
2381  */
2382 static int
2383 nvme_ctrlr_remove_inactive_proc(struct spdk_nvme_ctrlr *ctrlr)
2384 {
2385 	struct spdk_nvme_ctrlr_process	*active_proc, *tmp;
2386 	int				active_proc_count = 0;
2387 
2388 	TAILQ_FOREACH_SAFE(active_proc, &ctrlr->active_procs, tailq, tmp) {
2389 		if ((kill(active_proc->pid, 0) == -1) && (errno == ESRCH)) {
2390 			SPDK_ERRLOG("process %d terminated unexpected\n", active_proc->pid);
2391 
2392 			TAILQ_REMOVE(&ctrlr->active_procs, active_proc, tailq);
2393 
2394 			nvme_ctrlr_cleanup_process(active_proc);
2395 		} else {
2396 			active_proc_count++;
2397 		}
2398 	}
2399 
2400 	return active_proc_count;
2401 }
2402 
2403 void
2404 nvme_ctrlr_proc_get_ref(struct spdk_nvme_ctrlr *ctrlr)
2405 {
2406 	struct spdk_nvme_ctrlr_process	*active_proc;
2407 
2408 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
2409 
2410 	nvme_ctrlr_remove_inactive_proc(ctrlr);
2411 
2412 	active_proc = nvme_ctrlr_get_current_process(ctrlr);
2413 	if (active_proc) {
2414 		active_proc->ref++;
2415 	}
2416 
2417 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
2418 }
2419 
2420 void
2421 nvme_ctrlr_proc_put_ref(struct spdk_nvme_ctrlr *ctrlr)
2422 {
2423 	struct spdk_nvme_ctrlr_process	*active_proc;
2424 	int				proc_count;
2425 
2426 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
2427 
2428 	proc_count = nvme_ctrlr_remove_inactive_proc(ctrlr);
2429 
2430 	active_proc = nvme_ctrlr_get_current_process(ctrlr);
2431 	if (active_proc) {
2432 		active_proc->ref--;
2433 		assert(active_proc->ref >= 0);
2434 
2435 		/*
2436 		 * The last active process will be removed at the end of
2437 		 * the destruction of the controller.
2438 		 */
2439 		if (active_proc->ref == 0 && proc_count != 1) {
2440 			nvme_ctrlr_remove_process(ctrlr, active_proc);
2441 		}
2442 	}
2443 
2444 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
2445 }
2446 
2447 int
2448 nvme_ctrlr_get_ref_count(struct spdk_nvme_ctrlr *ctrlr)
2449 {
2450 	struct spdk_nvme_ctrlr_process	*active_proc;
2451 	int				ref = 0;
2452 
2453 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
2454 
2455 	nvme_ctrlr_remove_inactive_proc(ctrlr);
2456 
2457 	TAILQ_FOREACH(active_proc, &ctrlr->active_procs, tailq) {
2458 		ref += active_proc->ref;
2459 	}
2460 
2461 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
2462 
2463 	return ref;
2464 }
2465 
2466 /**
2467  *  Get the PCI device handle which is only visible to its associated process.
2468  */
2469 struct spdk_pci_device *
2470 nvme_ctrlr_proc_get_devhandle(struct spdk_nvme_ctrlr *ctrlr)
2471 {
2472 	struct spdk_nvme_ctrlr_process	*active_proc;
2473 	struct spdk_pci_device		*devhandle = NULL;
2474 
2475 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
2476 
2477 	active_proc = nvme_ctrlr_get_current_process(ctrlr);
2478 	if (active_proc) {
2479 		devhandle = active_proc->devhandle;
2480 	}
2481 
2482 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
2483 
2484 	return devhandle;
2485 }
2486 
2487 /**
2488  * This function will be called repeatedly during initialization until the controller is ready.
2489  */
2490 int
2491 nvme_ctrlr_process_init(struct spdk_nvme_ctrlr *ctrlr)
2492 {
2493 	union spdk_nvme_cc_register cc;
2494 	union spdk_nvme_csts_register csts;
2495 	uint32_t ready_timeout_in_ms;
2496 	int rc = 0;
2497 
2498 	/*
2499 	 * May need to avoid accessing any register on the target controller
2500 	 * for a while. Return early without touching the FSM.
2501 	 * Check sleep_timeout_tsc > 0 for unit test.
2502 	 */
2503 	if ((ctrlr->sleep_timeout_tsc > 0) &&
2504 	    (spdk_get_ticks() <= ctrlr->sleep_timeout_tsc)) {
2505 		return 0;
2506 	}
2507 	ctrlr->sleep_timeout_tsc = 0;
2508 
2509 	if (nvme_ctrlr_get_cc(ctrlr, &cc) ||
2510 	    nvme_ctrlr_get_csts(ctrlr, &csts)) {
2511 		if (ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE) {
2512 			/* While a device is resetting, it may be unable to service MMIO reads
2513 			 * temporarily. Allow for this case.
2514 			 */
2515 			SPDK_ERRLOG("Get registers failed while waiting for CSTS.RDY == 0\n");
2516 			goto init_timeout;
2517 		}
2518 		SPDK_ERRLOG("Failed to read CC and CSTS in state %d\n", ctrlr->state);
2519 		return -EIO;
2520 	}
2521 
2522 	ready_timeout_in_ms = 500 * ctrlr->cap.bits.to;
2523 
2524 	/*
2525 	 * Check if the current initialization step is done or has timed out.
2526 	 */
2527 	switch (ctrlr->state) {
2528 	case NVME_CTRLR_STATE_INIT_DELAY:
2529 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, ready_timeout_in_ms);
2530 		if (ctrlr->quirks & NVME_QUIRK_DELAY_BEFORE_INIT) {
2531 			/*
2532 			 * Controller may need some delay before it's enabled.
2533 			 *
2534 			 * This is a workaround for an issue where the PCIe-attached NVMe controller
2535 			 * is not ready after VFIO reset. We delay the initialization rather than the
2536 			 * enabling itself, because this is required only for the very first enabling
2537 			 * - directly after a VFIO reset.
2538 			 */
2539 			SPDK_DEBUGLOG(SPDK_LOG_NVME, "Adding 2 second delay before initializing the controller\n");
2540 			ctrlr->sleep_timeout_tsc = spdk_get_ticks() + (2000 * spdk_get_ticks_hz() / 1000);
2541 		}
2542 		break;
2543 
2544 	case NVME_CTRLR_STATE_INIT:
2545 		/* Begin the hardware initialization by making sure the controller is disabled. */
2546 		if (cc.bits.en) {
2547 			SPDK_DEBUGLOG(SPDK_LOG_NVME, "CC.EN = 1\n");
2548 			/*
2549 			 * Controller is currently enabled. We need to disable it to cause a reset.
2550 			 *
2551 			 * If CC.EN = 1 && CSTS.RDY = 0, the controller is in the process of becoming ready.
2552 			 *  Wait for the ready bit to be 1 before disabling the controller.
2553 			 */
2554 			if (csts.bits.rdy == 0) {
2555 				SPDK_DEBUGLOG(SPDK_LOG_NVME, "CC.EN = 1 && CSTS.RDY = 0 - waiting for reset to complete\n");
2556 				nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1, ready_timeout_in_ms);
2557 				return 0;
2558 			}
2559 
2560 			/* CC.EN = 1 && CSTS.RDY == 1, so we can immediately disable the controller. */
2561 			SPDK_DEBUGLOG(SPDK_LOG_NVME, "Setting CC.EN = 0\n");
2562 			cc.bits.en = 0;
2563 			if (nvme_ctrlr_set_cc(ctrlr, &cc)) {
2564 				SPDK_ERRLOG("set_cc() failed\n");
2565 				return -EIO;
2566 			}
2567 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, ready_timeout_in_ms);
2568 
2569 			/*
2570 			 * Wait 2.5 seconds before accessing PCI registers.
2571 			 * Not using sleep() to avoid blocking other controller's initialization.
2572 			 */
2573 			if (ctrlr->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY) {
2574 				SPDK_DEBUGLOG(SPDK_LOG_NVME, "Applying quirk: delay 2.5 seconds before reading registers\n");
2575 				ctrlr->sleep_timeout_tsc = spdk_get_ticks() + (2500 * spdk_get_ticks_hz() / 1000);
2576 			}
2577 			return 0;
2578 		} else {
2579 			if (csts.bits.rdy == 1) {
2580 				SPDK_DEBUGLOG(SPDK_LOG_NVME, "CC.EN = 0 && CSTS.RDY = 1 - waiting for shutdown to complete\n");
2581 			}
2582 
2583 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, ready_timeout_in_ms);
2584 			return 0;
2585 		}
2586 		break;
2587 
2588 	case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1:
2589 		if (csts.bits.rdy == 1) {
2590 			SPDK_DEBUGLOG(SPDK_LOG_NVME, "CC.EN = 1 && CSTS.RDY = 1 - disabling controller\n");
2591 			/* CC.EN = 1 && CSTS.RDY = 1, so we can set CC.EN = 0 now. */
2592 			SPDK_DEBUGLOG(SPDK_LOG_NVME, "Setting CC.EN = 0\n");
2593 			cc.bits.en = 0;
2594 			if (nvme_ctrlr_set_cc(ctrlr, &cc)) {
2595 				SPDK_ERRLOG("set_cc() failed\n");
2596 				return -EIO;
2597 			}
2598 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, ready_timeout_in_ms);
2599 			return 0;
2600 		}
2601 		break;
2602 
2603 	case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0:
2604 		if (csts.bits.rdy == 0) {
2605 			SPDK_DEBUGLOG(SPDK_LOG_NVME, "CC.EN = 0 && CSTS.RDY = 0\n");
2606 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE, ready_timeout_in_ms);
2607 			/*
2608 			 * Delay 100us before setting CC.EN = 1.  Some NVMe SSDs miss CC.EN getting
2609 			 *  set to 1 if it is too soon after CSTS.RDY is reported as 0.
2610 			 */
2611 			spdk_delay_us(100);
2612 			return 0;
2613 		}
2614 		break;
2615 
2616 	case NVME_CTRLR_STATE_ENABLE:
2617 		SPDK_DEBUGLOG(SPDK_LOG_NVME, "Setting CC.EN = 1\n");
2618 		rc = nvme_ctrlr_enable(ctrlr);
2619 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1, ready_timeout_in_ms);
2620 		return rc;
2621 
2622 	case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1:
2623 		if (csts.bits.rdy == 1) {
2624 			SPDK_DEBUGLOG(SPDK_LOG_NVME, "CC.EN = 1 && CSTS.RDY = 1 - controller is ready\n");
2625 			/*
2626 			 * The controller has been enabled.
2627 			 *  Perform the rest of initialization serially.
2628 			 */
2629 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_RESET_ADMIN_QUEUE,
2630 					     ctrlr->opts.admin_timeout_ms);
2631 			return 0;
2632 		}
2633 		break;
2634 
2635 	case NVME_CTRLR_STATE_RESET_ADMIN_QUEUE:
2636 		nvme_transport_qpair_reset(ctrlr->adminq);
2637 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY,
2638 				     ctrlr->opts.admin_timeout_ms);
2639 		break;
2640 
2641 	case NVME_CTRLR_STATE_IDENTIFY:
2642 		rc = nvme_ctrlr_identify(ctrlr);
2643 		break;
2644 
2645 	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY:
2646 		spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
2647 		break;
2648 
2649 	case NVME_CTRLR_STATE_SET_NUM_QUEUES:
2650 		nvme_ctrlr_update_nvmf_ioccsz(ctrlr);
2651 		rc = nvme_ctrlr_set_num_queues(ctrlr);
2652 		break;
2653 
2654 	case NVME_CTRLR_STATE_WAIT_FOR_SET_NUM_QUEUES:
2655 		spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
2656 		break;
2657 
2658 	case NVME_CTRLR_STATE_CONSTRUCT_NS:
2659 		rc = nvme_ctrlr_construct_namespaces(ctrlr);
2660 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_IDENTIFY_ACTIVE_NS,
2661 				     ctrlr->opts.admin_timeout_ms);
2662 		break;
2663 
2664 	case NVME_CTRLR_STATE_IDENTIFY_ACTIVE_NS:
2665 		_nvme_ctrlr_identify_active_ns(ctrlr);
2666 		break;
2667 
2668 	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ACTIVE_NS:
2669 		spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
2670 		break;
2671 
2672 	case NVME_CTRLR_STATE_IDENTIFY_NS:
2673 		rc = nvme_ctrlr_identify_namespaces(ctrlr);
2674 		break;
2675 
2676 	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_NS:
2677 		spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
2678 		break;
2679 
2680 	case NVME_CTRLR_STATE_IDENTIFY_ID_DESCS:
2681 		rc = nvme_ctrlr_identify_id_desc_namespaces(ctrlr);
2682 		break;
2683 
2684 	case NVME_CTRLR_STATE_WAIT_FOR_IDENTIFY_ID_DESCS:
2685 		spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
2686 		break;
2687 
2688 	case NVME_CTRLR_STATE_CONFIGURE_AER:
2689 		rc = nvme_ctrlr_configure_aer(ctrlr);
2690 		break;
2691 
2692 	case NVME_CTRLR_STATE_WAIT_FOR_CONFIGURE_AER:
2693 		spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
2694 		break;
2695 
2696 	case NVME_CTRLR_STATE_SET_SUPPORTED_LOG_PAGES:
2697 		rc = nvme_ctrlr_set_supported_log_pages(ctrlr);
2698 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES,
2699 				     ctrlr->opts.admin_timeout_ms);
2700 		break;
2701 
2702 	case NVME_CTRLR_STATE_SET_SUPPORTED_FEATURES:
2703 		nvme_ctrlr_set_supported_features(ctrlr);
2704 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_SET_DB_BUF_CFG,
2705 				     ctrlr->opts.admin_timeout_ms);
2706 		break;
2707 
2708 	case NVME_CTRLR_STATE_SET_DB_BUF_CFG:
2709 		rc = nvme_ctrlr_set_doorbell_buffer_config(ctrlr);
2710 		break;
2711 
2712 	case NVME_CTRLR_STATE_WAIT_FOR_DB_BUF_CFG:
2713 		spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
2714 		break;
2715 
2716 	case NVME_CTRLR_STATE_SET_KEEP_ALIVE_TIMEOUT:
2717 		rc = nvme_ctrlr_set_keep_alive_timeout(ctrlr);
2718 		break;
2719 
2720 	case NVME_CTRLR_STATE_WAIT_FOR_KEEP_ALIVE_TIMEOUT:
2721 		spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
2722 		break;
2723 
2724 	case NVME_CTRLR_STATE_SET_HOST_ID:
2725 		rc = nvme_ctrlr_set_host_id(ctrlr);
2726 		break;
2727 
2728 	case NVME_CTRLR_STATE_WAIT_FOR_HOST_ID:
2729 		spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
2730 		break;
2731 
2732 	case NVME_CTRLR_STATE_READY:
2733 		SPDK_DEBUGLOG(SPDK_LOG_NVME, "Ctrlr already in ready state\n");
2734 		return 0;
2735 
2736 	case NVME_CTRLR_STATE_ERROR:
2737 		SPDK_ERRLOG("Ctrlr %s is in error state\n", ctrlr->trid.traddr);
2738 		return -1;
2739 
2740 	default:
2741 		assert(0);
2742 		return -1;
2743 	}
2744 
2745 init_timeout:
2746 	if (ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE &&
2747 	    spdk_get_ticks() > ctrlr->state_timeout_tsc) {
2748 		SPDK_ERRLOG("Initialization timed out in state %d\n", ctrlr->state);
2749 		return -1;
2750 	}
2751 
2752 	return rc;
2753 }
2754 
2755 int
2756 nvme_robust_mutex_init_recursive_shared(pthread_mutex_t *mtx)
2757 {
2758 	pthread_mutexattr_t attr;
2759 	int rc = 0;
2760 
2761 	if (pthread_mutexattr_init(&attr)) {
2762 		return -1;
2763 	}
2764 	if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE) ||
2765 #ifndef __FreeBSD__
2766 	    pthread_mutexattr_setrobust(&attr, PTHREAD_MUTEX_ROBUST) ||
2767 	    pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED) ||
2768 #endif
2769 	    pthread_mutex_init(mtx, &attr)) {
2770 		rc = -1;
2771 	}
2772 	pthread_mutexattr_destroy(&attr);
2773 	return rc;
2774 }
2775 
2776 int
2777 nvme_ctrlr_construct(struct spdk_nvme_ctrlr *ctrlr)
2778 {
2779 	int rc;
2780 
2781 	if (ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
2782 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT_DELAY, NVME_TIMEOUT_INFINITE);
2783 	} else {
2784 		nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, NVME_TIMEOUT_INFINITE);
2785 	}
2786 
2787 	if (ctrlr->opts.admin_queue_size > SPDK_NVME_ADMIN_QUEUE_MAX_ENTRIES) {
2788 		SPDK_ERRLOG("admin_queue_size %u exceeds max defined by NVMe spec, use max value\n",
2789 			    ctrlr->opts.admin_queue_size);
2790 		ctrlr->opts.admin_queue_size = SPDK_NVME_ADMIN_QUEUE_MAX_ENTRIES;
2791 	}
2792 
2793 	if (ctrlr->opts.admin_queue_size < SPDK_NVME_ADMIN_QUEUE_MIN_ENTRIES) {
2794 		SPDK_ERRLOG("admin_queue_size %u is less than minimum defined by NVMe spec, use min value\n",
2795 			    ctrlr->opts.admin_queue_size);
2796 		ctrlr->opts.admin_queue_size = SPDK_NVME_ADMIN_QUEUE_MIN_ENTRIES;
2797 	}
2798 
2799 	ctrlr->flags = 0;
2800 	ctrlr->free_io_qids = NULL;
2801 	ctrlr->is_resetting = false;
2802 	ctrlr->is_failed = false;
2803 	ctrlr->is_destructed = false;
2804 
2805 	TAILQ_INIT(&ctrlr->active_io_qpairs);
2806 	STAILQ_INIT(&ctrlr->queued_aborts);
2807 	ctrlr->outstanding_aborts = 0;
2808 
2809 	rc = nvme_robust_mutex_init_recursive_shared(&ctrlr->ctrlr_lock);
2810 	if (rc != 0) {
2811 		return rc;
2812 	}
2813 
2814 	TAILQ_INIT(&ctrlr->active_procs);
2815 
2816 	return rc;
2817 }
2818 
2819 /* This function should be called once at ctrlr initialization to set up constant properties. */
2820 void
2821 nvme_ctrlr_init_cap(struct spdk_nvme_ctrlr *ctrlr, const union spdk_nvme_cap_register *cap,
2822 		    const union spdk_nvme_vs_register *vs)
2823 {
2824 	ctrlr->cap = *cap;
2825 	ctrlr->vs = *vs;
2826 
2827 	if (ctrlr->cap.bits.ams & SPDK_NVME_CAP_AMS_WRR) {
2828 		ctrlr->flags |= SPDK_NVME_CTRLR_WRR_SUPPORTED;
2829 	}
2830 
2831 	ctrlr->min_page_size = 1u << (12 + ctrlr->cap.bits.mpsmin);
2832 
2833 	/* For now, always select page_size == min_page_size. */
2834 	ctrlr->page_size = ctrlr->min_page_size;
2835 
2836 	ctrlr->opts.io_queue_size = spdk_max(ctrlr->opts.io_queue_size, SPDK_NVME_IO_QUEUE_MIN_ENTRIES);
2837 	ctrlr->opts.io_queue_size = spdk_min(ctrlr->opts.io_queue_size, MAX_IO_QUEUE_ENTRIES);
2838 	ctrlr->opts.io_queue_size = spdk_min(ctrlr->opts.io_queue_size, ctrlr->cap.bits.mqes + 1u);
2839 
2840 	ctrlr->opts.io_queue_requests = spdk_max(ctrlr->opts.io_queue_requests, ctrlr->opts.io_queue_size);
2841 }
2842 
2843 void
2844 nvme_ctrlr_destruct_finish(struct spdk_nvme_ctrlr *ctrlr)
2845 {
2846 	pthread_mutex_destroy(&ctrlr->ctrlr_lock);
2847 }
2848 
2849 void
2850 nvme_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr)
2851 {
2852 	struct spdk_nvme_qpair *qpair, *tmp;
2853 
2854 	SPDK_DEBUGLOG(SPDK_LOG_NVME, "Prepare to destruct SSD: %s\n", ctrlr->trid.traddr);
2855 
2856 	ctrlr->is_destructed = true;
2857 
2858 	spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
2859 
2860 	nvme_ctrlr_abort_queued_aborts(ctrlr);
2861 	nvme_transport_admin_qpair_abort_aers(ctrlr->adminq);
2862 
2863 	TAILQ_FOREACH_SAFE(qpair, &ctrlr->active_io_qpairs, tailq, tmp) {
2864 		spdk_nvme_ctrlr_free_io_qpair(qpair);
2865 	}
2866 
2867 	nvme_ctrlr_free_doorbell_buffer(ctrlr);
2868 
2869 	if (ctrlr->opts.no_shn_notification) {
2870 		SPDK_INFOLOG(SPDK_LOG_NVME, "Disable SSD: %s without shutdown notification\n",
2871 			     ctrlr->trid.traddr);
2872 		nvme_ctrlr_disable(ctrlr);
2873 	} else {
2874 		nvme_ctrlr_shutdown(ctrlr);
2875 	}
2876 
2877 	nvme_ctrlr_destruct_namespaces(ctrlr);
2878 
2879 	spdk_bit_array_free(&ctrlr->free_io_qids);
2880 
2881 	nvme_transport_ctrlr_destruct(ctrlr);
2882 }
2883 
2884 int
2885 nvme_ctrlr_submit_admin_request(struct spdk_nvme_ctrlr *ctrlr,
2886 				struct nvme_request *req)
2887 {
2888 	return nvme_qpair_submit_request(ctrlr->adminq, req);
2889 }
2890 
2891 static void
2892 nvme_keep_alive_completion(void *cb_ctx, const struct spdk_nvme_cpl *cpl)
2893 {
2894 	/* Do nothing */
2895 }
2896 
2897 /*
2898  * Check if we need to send a Keep Alive command.
2899  * Caller must hold ctrlr->ctrlr_lock.
2900  */
2901 static void
2902 nvme_ctrlr_keep_alive(struct spdk_nvme_ctrlr *ctrlr)
2903 {
2904 	uint64_t now;
2905 	struct nvme_request *req;
2906 	struct spdk_nvme_cmd *cmd;
2907 	int rc;
2908 
2909 	now = spdk_get_ticks();
2910 	if (now < ctrlr->next_keep_alive_tick) {
2911 		return;
2912 	}
2913 
2914 	req = nvme_allocate_request_null(ctrlr->adminq, nvme_keep_alive_completion, NULL);
2915 	if (req == NULL) {
2916 		return;
2917 	}
2918 
2919 	cmd = &req->cmd;
2920 	cmd->opc = SPDK_NVME_OPC_KEEP_ALIVE;
2921 
2922 	rc = nvme_ctrlr_submit_admin_request(ctrlr, req);
2923 	if (rc != 0) {
2924 		SPDK_ERRLOG("Submitting Keep Alive failed\n");
2925 	}
2926 
2927 	ctrlr->next_keep_alive_tick = now + ctrlr->keep_alive_interval_ticks;
2928 }
2929 
2930 int32_t
2931 spdk_nvme_ctrlr_process_admin_completions(struct spdk_nvme_ctrlr *ctrlr)
2932 {
2933 	int32_t num_completions;
2934 	int32_t rc;
2935 
2936 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
2937 
2938 	if (ctrlr->keep_alive_interval_ticks) {
2939 		nvme_ctrlr_keep_alive(ctrlr);
2940 	}
2941 
2942 	rc = nvme_io_msg_process(ctrlr);
2943 	if (rc < 0) {
2944 		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
2945 		return rc;
2946 	}
2947 	num_completions = rc;
2948 
2949 	rc = spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
2950 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
2951 
2952 	if (rc < 0) {
2953 		num_completions = rc;
2954 	} else {
2955 		num_completions += rc;
2956 	}
2957 
2958 	return num_completions;
2959 }
2960 
2961 const struct spdk_nvme_ctrlr_data *
2962 spdk_nvme_ctrlr_get_data(struct spdk_nvme_ctrlr *ctrlr)
2963 {
2964 	return &ctrlr->cdata;
2965 }
2966 
2967 union spdk_nvme_csts_register spdk_nvme_ctrlr_get_regs_csts(struct spdk_nvme_ctrlr *ctrlr)
2968 {
2969 	union spdk_nvme_csts_register csts;
2970 
2971 	if (nvme_ctrlr_get_csts(ctrlr, &csts)) {
2972 		csts.raw = 0xFFFFFFFFu;
2973 	}
2974 	return csts;
2975 }
2976 
2977 union spdk_nvme_cap_register spdk_nvme_ctrlr_get_regs_cap(struct spdk_nvme_ctrlr *ctrlr)
2978 {
2979 	return ctrlr->cap;
2980 }
2981 
2982 union spdk_nvme_vs_register spdk_nvme_ctrlr_get_regs_vs(struct spdk_nvme_ctrlr *ctrlr)
2983 {
2984 	return ctrlr->vs;
2985 }
2986 
2987 union spdk_nvme_cmbsz_register spdk_nvme_ctrlr_get_regs_cmbsz(struct spdk_nvme_ctrlr *ctrlr)
2988 {
2989 	union spdk_nvme_cmbsz_register cmbsz;
2990 
2991 	if (nvme_ctrlr_get_cmbsz(ctrlr, &cmbsz)) {
2992 		cmbsz.raw = 0;
2993 	}
2994 
2995 	return cmbsz;
2996 }
2997 
2998 uint32_t
2999 spdk_nvme_ctrlr_get_num_ns(struct spdk_nvme_ctrlr *ctrlr)
3000 {
3001 	return ctrlr->num_ns;
3002 }
3003 
3004 static int32_t
3005 nvme_ctrlr_active_ns_idx(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid)
3006 {
3007 	int32_t result = -1;
3008 
3009 	if (ctrlr->active_ns_list == NULL || nsid == 0 || nsid > ctrlr->num_ns) {
3010 		return result;
3011 	}
3012 
3013 	int32_t lower = 0;
3014 	int32_t upper = ctrlr->num_ns - 1;
3015 	int32_t mid;
3016 
3017 	while (lower <= upper) {
3018 		mid = lower + (upper - lower) / 2;
3019 		if (ctrlr->active_ns_list[mid] == nsid) {
3020 			result = mid;
3021 			break;
3022 		} else {
3023 			if (ctrlr->active_ns_list[mid] != 0 && ctrlr->active_ns_list[mid] < nsid) {
3024 				lower = mid + 1;
3025 			} else {
3026 				upper = mid - 1;
3027 			}
3028 
3029 		}
3030 	}
3031 
3032 	return result;
3033 }
3034 
3035 bool
3036 spdk_nvme_ctrlr_is_active_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid)
3037 {
3038 	return nvme_ctrlr_active_ns_idx(ctrlr, nsid) != -1;
3039 }
3040 
3041 uint32_t
3042 spdk_nvme_ctrlr_get_first_active_ns(struct spdk_nvme_ctrlr *ctrlr)
3043 {
3044 	return ctrlr->active_ns_list ? ctrlr->active_ns_list[0] : 0;
3045 }
3046 
3047 uint32_t
3048 spdk_nvme_ctrlr_get_next_active_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t prev_nsid)
3049 {
3050 	int32_t nsid_idx = nvme_ctrlr_active_ns_idx(ctrlr, prev_nsid);
3051 	if (ctrlr->active_ns_list && nsid_idx >= 0 && (uint32_t)nsid_idx < ctrlr->num_ns - 1) {
3052 		return ctrlr->active_ns_list[nsid_idx + 1];
3053 	}
3054 	return 0;
3055 }
3056 
3057 struct spdk_nvme_ns *
3058 spdk_nvme_ctrlr_get_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid)
3059 {
3060 	if (nsid < 1 || nsid > ctrlr->num_ns) {
3061 		return NULL;
3062 	}
3063 
3064 	return &ctrlr->ns[nsid - 1];
3065 }
3066 
3067 struct spdk_pci_device *
3068 spdk_nvme_ctrlr_get_pci_device(struct spdk_nvme_ctrlr *ctrlr)
3069 {
3070 	if (ctrlr == NULL) {
3071 		return NULL;
3072 	}
3073 
3074 	if (ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) {
3075 		return NULL;
3076 	}
3077 
3078 	return nvme_ctrlr_proc_get_devhandle(ctrlr);
3079 }
3080 
3081 uint32_t
3082 spdk_nvme_ctrlr_get_max_xfer_size(const struct spdk_nvme_ctrlr *ctrlr)
3083 {
3084 	return ctrlr->max_xfer_size;
3085 }
3086 
3087 void
3088 spdk_nvme_ctrlr_register_aer_callback(struct spdk_nvme_ctrlr *ctrlr,
3089 				      spdk_nvme_aer_cb aer_cb_fn,
3090 				      void *aer_cb_arg)
3091 {
3092 	struct spdk_nvme_ctrlr_process *active_proc;
3093 
3094 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
3095 
3096 	active_proc = nvme_ctrlr_get_current_process(ctrlr);
3097 	if (active_proc) {
3098 		active_proc->aer_cb_fn = aer_cb_fn;
3099 		active_proc->aer_cb_arg = aer_cb_arg;
3100 	}
3101 
3102 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
3103 }
3104 
3105 void
3106 spdk_nvme_ctrlr_register_timeout_callback(struct spdk_nvme_ctrlr *ctrlr,
3107 		uint64_t timeout_us, spdk_nvme_timeout_cb cb_fn, void *cb_arg)
3108 {
3109 	struct spdk_nvme_ctrlr_process	*active_proc;
3110 
3111 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
3112 
3113 	active_proc = nvme_ctrlr_get_current_process(ctrlr);
3114 	if (active_proc) {
3115 		active_proc->timeout_ticks = timeout_us * spdk_get_ticks_hz() / 1000000ULL;
3116 		active_proc->timeout_cb_fn = cb_fn;
3117 		active_proc->timeout_cb_arg = cb_arg;
3118 	}
3119 
3120 	ctrlr->timeout_enabled = true;
3121 
3122 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
3123 }
3124 
3125 bool
3126 spdk_nvme_ctrlr_is_log_page_supported(struct spdk_nvme_ctrlr *ctrlr, uint8_t log_page)
3127 {
3128 	/* No bounds check necessary, since log_page is uint8_t and log_page_supported has 256 entries */
3129 	SPDK_STATIC_ASSERT(sizeof(ctrlr->log_page_supported) == 256, "log_page_supported size mismatch");
3130 	return ctrlr->log_page_supported[log_page];
3131 }
3132 
3133 bool
3134 spdk_nvme_ctrlr_is_feature_supported(struct spdk_nvme_ctrlr *ctrlr, uint8_t feature_code)
3135 {
3136 	/* No bounds check necessary, since feature_code is uint8_t and feature_supported has 256 entries */
3137 	SPDK_STATIC_ASSERT(sizeof(ctrlr->feature_supported) == 256, "feature_supported size mismatch");
3138 	return ctrlr->feature_supported[feature_code];
3139 }
3140 
3141 int
3142 spdk_nvme_ctrlr_attach_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid,
3143 			  struct spdk_nvme_ctrlr_list *payload)
3144 {
3145 	struct nvme_completion_poll_status	*status;
3146 	int					res;
3147 	struct spdk_nvme_ns			*ns;
3148 
3149 	status = calloc(1, sizeof(*status));
3150 	if (!status) {
3151 		SPDK_ERRLOG("Failed to allocate status tracker\n");
3152 		return -ENOMEM;
3153 	}
3154 
3155 	res = nvme_ctrlr_cmd_attach_ns(ctrlr, nsid, payload,
3156 				       nvme_completion_poll_cb, status);
3157 	if (res) {
3158 		free(status);
3159 		return res;
3160 	}
3161 	if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) {
3162 		SPDK_ERRLOG("spdk_nvme_ctrlr_attach_ns failed!\n");
3163 		if (!status->timed_out) {
3164 			free(status);
3165 		}
3166 		return -ENXIO;
3167 	}
3168 	free(status);
3169 
3170 	res = nvme_ctrlr_identify_active_ns(ctrlr);
3171 	if (res) {
3172 		return res;
3173 	}
3174 
3175 	ns = &ctrlr->ns[nsid - 1];
3176 	return nvme_ns_construct(ns, nsid, ctrlr);
3177 }
3178 
3179 int
3180 spdk_nvme_ctrlr_detach_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid,
3181 			  struct spdk_nvme_ctrlr_list *payload)
3182 {
3183 	struct nvme_completion_poll_status	*status;
3184 	int					res;
3185 	struct spdk_nvme_ns			*ns;
3186 
3187 	status = calloc(1, sizeof(*status));
3188 	if (!status) {
3189 		SPDK_ERRLOG("Failed to allocate status tracker\n");
3190 		return -ENOMEM;
3191 	}
3192 
3193 	res = nvme_ctrlr_cmd_detach_ns(ctrlr, nsid, payload,
3194 				       nvme_completion_poll_cb, status);
3195 	if (res) {
3196 		free(status);
3197 		return res;
3198 	}
3199 	if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) {
3200 		SPDK_ERRLOG("spdk_nvme_ctrlr_detach_ns failed!\n");
3201 		if (!status->timed_out) {
3202 			free(status);
3203 		}
3204 		return -ENXIO;
3205 	}
3206 	free(status);
3207 
3208 	res = nvme_ctrlr_identify_active_ns(ctrlr);
3209 	if (res) {
3210 		return res;
3211 	}
3212 
3213 	ns = &ctrlr->ns[nsid - 1];
3214 	/* Inactive NS */
3215 	nvme_ns_destruct(ns);
3216 
3217 	return 0;
3218 }
3219 
3220 uint32_t
3221 spdk_nvme_ctrlr_create_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns_data *payload)
3222 {
3223 	struct nvme_completion_poll_status	*status;
3224 	int					res;
3225 	uint32_t				nsid;
3226 	struct spdk_nvme_ns			*ns;
3227 
3228 	status = calloc(1, sizeof(*status));
3229 	if (!status) {
3230 		SPDK_ERRLOG("Failed to allocate status tracker\n");
3231 		return 0;
3232 	}
3233 
3234 	res = nvme_ctrlr_cmd_create_ns(ctrlr, payload, nvme_completion_poll_cb, status);
3235 	if (res) {
3236 		free(status);
3237 		return 0;
3238 	}
3239 	if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) {
3240 		SPDK_ERRLOG("spdk_nvme_ctrlr_create_ns failed!\n");
3241 		if (!status->timed_out) {
3242 			free(status);
3243 		}
3244 		return 0;
3245 	}
3246 
3247 	nsid = status->cpl.cdw0;
3248 	ns = &ctrlr->ns[nsid - 1];
3249 	free(status);
3250 	/* Inactive NS */
3251 	res = nvme_ns_construct(ns, nsid, ctrlr);
3252 	if (res) {
3253 		return 0;
3254 	}
3255 
3256 	/* Return the namespace ID that was created */
3257 	return nsid;
3258 }
3259 
3260 int
3261 spdk_nvme_ctrlr_delete_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid)
3262 {
3263 	struct nvme_completion_poll_status	*status;
3264 	int					res;
3265 	struct spdk_nvme_ns			*ns;
3266 
3267 	status = calloc(1, sizeof(*status));
3268 	if (!status) {
3269 		SPDK_ERRLOG("Failed to allocate status tracker\n");
3270 		return -ENOMEM;
3271 	}
3272 
3273 	res = nvme_ctrlr_cmd_delete_ns(ctrlr, nsid, nvme_completion_poll_cb, status);
3274 	if (res) {
3275 		free(status);
3276 		return res;
3277 	}
3278 	if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) {
3279 		SPDK_ERRLOG("spdk_nvme_ctrlr_delete_ns failed!\n");
3280 		if (!status->timed_out) {
3281 			free(status);
3282 		}
3283 		return -ENXIO;
3284 	}
3285 	free(status);
3286 
3287 	res = nvme_ctrlr_identify_active_ns(ctrlr);
3288 	if (res) {
3289 		return res;
3290 	}
3291 
3292 	ns = &ctrlr->ns[nsid - 1];
3293 	nvme_ns_destruct(ns);
3294 
3295 	return 0;
3296 }
3297 
3298 int
3299 spdk_nvme_ctrlr_format(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid,
3300 		       struct spdk_nvme_format *format)
3301 {
3302 	struct nvme_completion_poll_status	*status;
3303 	int					res;
3304 
3305 	status = calloc(1, sizeof(*status));
3306 	if (!status) {
3307 		SPDK_ERRLOG("Failed to allocate status tracker\n");
3308 		return -ENOMEM;
3309 	}
3310 
3311 	res = nvme_ctrlr_cmd_format(ctrlr, nsid, format, nvme_completion_poll_cb,
3312 				    status);
3313 	if (res) {
3314 		free(status);
3315 		return res;
3316 	}
3317 	if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) {
3318 		SPDK_ERRLOG("spdk_nvme_ctrlr_format failed!\n");
3319 		if (!status->timed_out) {
3320 			free(status);
3321 		}
3322 		return -ENXIO;
3323 	}
3324 	free(status);
3325 
3326 	return spdk_nvme_ctrlr_reset(ctrlr);
3327 }
3328 
3329 int
3330 spdk_nvme_ctrlr_update_firmware(struct spdk_nvme_ctrlr *ctrlr, void *payload, uint32_t size,
3331 				int slot, enum spdk_nvme_fw_commit_action commit_action, struct spdk_nvme_status *completion_status)
3332 {
3333 	struct spdk_nvme_fw_commit		fw_commit;
3334 	struct nvme_completion_poll_status	*status;
3335 	int					res;
3336 	unsigned int				size_remaining;
3337 	unsigned int				offset;
3338 	unsigned int				transfer;
3339 	void					*p;
3340 
3341 	if (!completion_status) {
3342 		return -EINVAL;
3343 	}
3344 	memset(completion_status, 0, sizeof(struct spdk_nvme_status));
3345 	if (size % 4) {
3346 		SPDK_ERRLOG("spdk_nvme_ctrlr_update_firmware invalid size!\n");
3347 		return -1;
3348 	}
3349 
3350 	/* Current support only for SPDK_NVME_FW_COMMIT_REPLACE_IMG
3351 	 * and SPDK_NVME_FW_COMMIT_REPLACE_AND_ENABLE_IMG
3352 	 */
3353 	if ((commit_action != SPDK_NVME_FW_COMMIT_REPLACE_IMG) &&
3354 	    (commit_action != SPDK_NVME_FW_COMMIT_REPLACE_AND_ENABLE_IMG)) {
3355 		SPDK_ERRLOG("spdk_nvme_ctrlr_update_firmware invalid command!\n");
3356 		return -1;
3357 	}
3358 
3359 	status = calloc(1, sizeof(*status));
3360 	if (!status) {
3361 		SPDK_ERRLOG("Failed to allocate status tracker\n");
3362 		return -ENOMEM;
3363 	}
3364 
3365 	/* Firmware download */
3366 	size_remaining = size;
3367 	offset = 0;
3368 	p = payload;
3369 
3370 	while (size_remaining > 0) {
3371 		transfer = spdk_min(size_remaining, ctrlr->min_page_size);
3372 
3373 		memset(status, 0, sizeof(*status));
3374 		res = nvme_ctrlr_cmd_fw_image_download(ctrlr, transfer, offset, p,
3375 						       nvme_completion_poll_cb,
3376 						       status);
3377 		if (res) {
3378 			free(status);
3379 			return res;
3380 		}
3381 
3382 		if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) {
3383 			SPDK_ERRLOG("spdk_nvme_ctrlr_fw_image_download failed!\n");
3384 			if (!status->timed_out) {
3385 				free(status);
3386 			}
3387 			return -ENXIO;
3388 		}
3389 		p += transfer;
3390 		offset += transfer;
3391 		size_remaining -= transfer;
3392 	}
3393 
3394 	/* Firmware commit */
3395 	memset(&fw_commit, 0, sizeof(struct spdk_nvme_fw_commit));
3396 	fw_commit.fs = slot;
3397 	fw_commit.ca = commit_action;
3398 
3399 	memset(status, 0, sizeof(*status));
3400 	res = nvme_ctrlr_cmd_fw_commit(ctrlr, &fw_commit, nvme_completion_poll_cb,
3401 				       status);
3402 	if (res) {
3403 		free(status);
3404 		return res;
3405 	}
3406 
3407 	res = nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock);
3408 
3409 	memcpy(completion_status, &status->cpl.status, sizeof(struct spdk_nvme_status));
3410 
3411 	if (!status->timed_out) {
3412 		free(status);
3413 	}
3414 
3415 	if (res) {
3416 		if (completion_status->sct != SPDK_NVME_SCT_COMMAND_SPECIFIC ||
3417 		    completion_status->sc != SPDK_NVME_SC_FIRMWARE_REQ_NVM_RESET) {
3418 			if (completion_status->sct == SPDK_NVME_SCT_COMMAND_SPECIFIC  &&
3419 			    completion_status->sc == SPDK_NVME_SC_FIRMWARE_REQ_CONVENTIONAL_RESET) {
3420 				SPDK_NOTICELOG("firmware activation requires conventional reset to be performed. !\n");
3421 			} else {
3422 				SPDK_ERRLOG("nvme_ctrlr_cmd_fw_commit failed!\n");
3423 			}
3424 			return -ENXIO;
3425 		}
3426 	}
3427 
3428 	return spdk_nvme_ctrlr_reset(ctrlr);
3429 }
3430 
3431 int
3432 spdk_nvme_ctrlr_reserve_cmb(struct spdk_nvme_ctrlr *ctrlr)
3433 {
3434 	int rc, size;
3435 	union spdk_nvme_cmbsz_register cmbsz;
3436 
3437 	cmbsz = spdk_nvme_ctrlr_get_regs_cmbsz(ctrlr);
3438 
3439 	if (cmbsz.bits.rds == 0 || cmbsz.bits.wds == 0) {
3440 		return -ENOTSUP;
3441 	}
3442 
3443 	size = cmbsz.bits.sz * (0x1000 << (cmbsz.bits.szu * 4));
3444 
3445 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
3446 	rc = nvme_transport_ctrlr_reserve_cmb(ctrlr);
3447 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
3448 
3449 	if (rc < 0) {
3450 		return rc;
3451 	}
3452 
3453 	return size;
3454 }
3455 
3456 void *
3457 spdk_nvme_ctrlr_map_cmb(struct spdk_nvme_ctrlr *ctrlr, size_t *size)
3458 {
3459 	void *buf;
3460 
3461 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
3462 	buf = nvme_transport_ctrlr_map_cmb(ctrlr, size);
3463 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
3464 
3465 	return buf;
3466 }
3467 
3468 void
3469 spdk_nvme_ctrlr_unmap_cmb(struct spdk_nvme_ctrlr *ctrlr)
3470 {
3471 	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
3472 	nvme_transport_ctrlr_unmap_cmb(ctrlr);
3473 	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
3474 }
3475 
3476 bool
3477 spdk_nvme_ctrlr_is_discovery(struct spdk_nvme_ctrlr *ctrlr)
3478 {
3479 	assert(ctrlr);
3480 
3481 	return !strncmp(ctrlr->trid.subnqn, SPDK_NVMF_DISCOVERY_NQN,
3482 			strlen(SPDK_NVMF_DISCOVERY_NQN));
3483 }
3484 
3485 int
3486 spdk_nvme_ctrlr_security_receive(struct spdk_nvme_ctrlr *ctrlr, uint8_t secp,
3487 				 uint16_t spsp, uint8_t nssf, void *payload, size_t size)
3488 {
3489 	struct nvme_completion_poll_status	*status;
3490 	int					res;
3491 
3492 	status = calloc(1, sizeof(*status));
3493 	if (!status) {
3494 		SPDK_ERRLOG("Failed to allocate status tracker\n");
3495 		return -ENOMEM;
3496 	}
3497 
3498 	res = spdk_nvme_ctrlr_cmd_security_receive(ctrlr, secp, spsp, nssf, payload, size,
3499 			nvme_completion_poll_cb, status);
3500 	if (res) {
3501 		free(status);
3502 		return res;
3503 	}
3504 	if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) {
3505 		SPDK_ERRLOG("spdk_nvme_ctrlr_cmd_security_receive failed!\n");
3506 		if (!status->timed_out) {
3507 			free(status);
3508 		}
3509 		return -ENXIO;
3510 	}
3511 	free(status);
3512 
3513 	return 0;
3514 }
3515 
3516 int
3517 spdk_nvme_ctrlr_security_send(struct spdk_nvme_ctrlr *ctrlr, uint8_t secp,
3518 			      uint16_t spsp, uint8_t nssf, void *payload, size_t size)
3519 {
3520 	struct nvme_completion_poll_status	*status;
3521 	int					res;
3522 
3523 	status = calloc(1, sizeof(*status));
3524 	if (!status) {
3525 		SPDK_ERRLOG("Failed to allocate status tracker\n");
3526 		return -ENOMEM;
3527 	}
3528 
3529 	res = spdk_nvme_ctrlr_cmd_security_send(ctrlr, secp, spsp, nssf, payload, size,
3530 						nvme_completion_poll_cb,
3531 						status);
3532 	if (res) {
3533 		free(status);
3534 		return res;
3535 	}
3536 	if (nvme_wait_for_completion_robust_lock(ctrlr->adminq, status, &ctrlr->ctrlr_lock)) {
3537 		SPDK_ERRLOG("spdk_nvme_ctrlr_cmd_security_send failed!\n");
3538 		if (!status->timed_out) {
3539 			free(status);
3540 		}
3541 		return -ENXIO;
3542 	}
3543 
3544 	free(status);
3545 
3546 	return 0;
3547 }
3548 
3549 uint64_t
3550 spdk_nvme_ctrlr_get_flags(struct spdk_nvme_ctrlr *ctrlr)
3551 {
3552 	return ctrlr->flags;
3553 }
3554 
3555 const struct spdk_nvme_transport_id *
3556 spdk_nvme_ctrlr_get_transport_id(struct spdk_nvme_ctrlr *ctrlr)
3557 {
3558 	return &ctrlr->trid;
3559 }
3560 
3561 /* FIXME need to specify max number of iovs */
3562 int
3563 spdk_nvme_map_prps(void *prv, struct spdk_nvme_cmd *cmd, struct iovec *iovs,
3564 		   uint32_t len, size_t mps,
3565 		   void *(*gpa_to_vva)(void *prv, uint64_t addr, uint64_t len))
3566 {
3567 	uint64_t prp1, prp2;
3568 	void *vva;
3569 	uint32_t i;
3570 	uint32_t residue_len, nents;
3571 	uint64_t *prp_list;
3572 	int iovcnt;
3573 
3574 	prp1 = cmd->dptr.prp.prp1;
3575 	prp2 = cmd->dptr.prp.prp2;
3576 
3577 	/* PRP1 may started with unaligned page address */
3578 	residue_len = mps - (prp1 % mps);
3579 	residue_len = spdk_min(len, residue_len);
3580 
3581 	vva = gpa_to_vva(prv, prp1, residue_len);
3582 	if (spdk_unlikely(vva == NULL)) {
3583 		SPDK_ERRLOG("GPA to VVA failed\n");
3584 		return -1;
3585 	}
3586 	iovs[0].iov_base = vva;
3587 	iovs[0].iov_len = residue_len;
3588 	len -= residue_len;
3589 
3590 	if (len) {
3591 		if (spdk_unlikely(prp2 == 0)) {
3592 			SPDK_ERRLOG("no PRP2, %d remaining\n", len);
3593 			return -1;
3594 		}
3595 
3596 		if (len <= mps) {
3597 			/* 2 PRP used */
3598 			iovcnt = 2;
3599 			vva = gpa_to_vva(prv, prp2, len);
3600 			if (spdk_unlikely(vva == NULL)) {
3601 				SPDK_ERRLOG("no VVA for %#lx, len%#x\n",
3602 					    prp2, len);
3603 				return -1;
3604 			}
3605 			iovs[1].iov_base = vva;
3606 			iovs[1].iov_len = len;
3607 		} else {
3608 			/* PRP list used */
3609 			nents = (len + mps - 1) / mps;
3610 			vva = gpa_to_vva(prv, prp2, nents * sizeof(*prp_list));
3611 			if (spdk_unlikely(vva == NULL)) {
3612 				SPDK_ERRLOG("no VVA for %#lx, nents=%#x\n",
3613 					    prp2, nents);
3614 				return -1;
3615 			}
3616 			prp_list = vva;
3617 			i = 0;
3618 			while (len != 0) {
3619 				residue_len = spdk_min(len, mps);
3620 				vva = gpa_to_vva(prv, prp_list[i], residue_len);
3621 				if (spdk_unlikely(vva == NULL)) {
3622 					SPDK_ERRLOG("no VVA for %#lx, residue_len=%#x\n",
3623 						    prp_list[i], residue_len);
3624 					return -1;
3625 				}
3626 				iovs[i + 1].iov_base = vva;
3627 				iovs[i + 1].iov_len = residue_len;
3628 				len -= residue_len;
3629 				i++;
3630 			}
3631 			iovcnt = i + 1;
3632 		}
3633 	} else {
3634 		/* 1 PRP used */
3635 		iovcnt = 1;
3636 	}
3637 
3638 	return iovcnt;
3639 }
3640