xref: /spdk/lib/nvme/nvme_ctrlr.c (revision 0cb95227812ac0ccadad31d05cb0f8c7b7e86ab9)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright (c) Intel Corporation.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "nvme_internal.h"
35 #include "spdk/pci.h"
36 
37 static int nvme_ctrlr_construct_and_submit_aer(struct spdk_nvme_ctrlr *ctrlr,
38 		struct nvme_async_event_request *aer);
39 
40 
41 void
42 spdk_nvme_ctrlr_opts_set_defaults(struct spdk_nvme_ctrlr_opts *opts)
43 {
44 	opts->num_io_queues = DEFAULT_MAX_IO_QUEUES;
45 	opts->use_cmb_sqs = false;
46 	opts->arb_mechanism = SPDK_NVME_CC_AMS_RR;
47 }
48 
49 static int
50 spdk_nvme_ctrlr_create_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
51 {
52 	struct nvme_completion_poll_status	status;
53 	int rc;
54 
55 	status.done = false;
56 	rc = nvme_ctrlr_cmd_create_io_cq(ctrlr, qpair, nvme_completion_poll_cb, &status);
57 	if (rc != 0) {
58 		return rc;
59 	}
60 
61 	while (status.done == false) {
62 		spdk_nvme_qpair_process_completions(&ctrlr->adminq, 0);
63 	}
64 	if (spdk_nvme_cpl_is_error(&status.cpl)) {
65 		nvme_printf(ctrlr, "nvme_create_io_cq failed!\n");
66 		return -1;
67 	}
68 
69 	status.done = false;
70 	rc = nvme_ctrlr_cmd_create_io_sq(qpair->ctrlr, qpair, nvme_completion_poll_cb, &status);
71 	if (rc != 0) {
72 		return rc;
73 	}
74 
75 	while (status.done == false) {
76 		spdk_nvme_qpair_process_completions(&ctrlr->adminq, 0);
77 	}
78 	if (spdk_nvme_cpl_is_error(&status.cpl)) {
79 		nvme_printf(ctrlr, "nvme_create_io_sq failed!\n");
80 		/* Attempt to delete the completion queue */
81 		status.done = false;
82 		rc = nvme_ctrlr_cmd_delete_io_cq(qpair->ctrlr, qpair, nvme_completion_poll_cb, &status);
83 		if (rc != 0) {
84 			return -1;
85 		}
86 		while (status.done == false) {
87 			spdk_nvme_qpair_process_completions(&ctrlr->adminq, 0);
88 		}
89 		return -1;
90 	}
91 
92 	nvme_qpair_reset(qpair);
93 
94 	return 0;
95 }
96 
97 struct spdk_nvme_qpair *
98 spdk_nvme_ctrlr_alloc_io_qpair(struct spdk_nvme_ctrlr *ctrlr,
99 			       enum spdk_nvme_qprio qprio)
100 {
101 	struct spdk_nvme_qpair			*qpair;
102 	union spdk_nvme_cc_register		cc;
103 
104 	cc.raw = nvme_mmio_read_4(ctrlr, cc.raw);
105 
106 	/* Only the low 2 bits (values 0, 1, 2, 3) of QPRIO are valid. */
107 	if ((qprio & 3) != qprio) {
108 		return NULL;
109 	}
110 
111 	/*
112 	 * Only value SPDK_NVME_QPRIO_URGENT(0) is valid for the
113 	 * default round robin arbitration method.
114 	 */
115 	if ((cc.bits.ams == SPDK_NVME_CC_AMS_RR) && (qprio != SPDK_NVME_QPRIO_URGENT)) {
116 		nvme_printf(ctrlr,
117 			    "invalid queue priority for default round robin arbitration method\n");
118 		return NULL;
119 	}
120 
121 	nvme_mutex_lock(&ctrlr->ctrlr_lock);
122 
123 	/*
124 	 * Get the first available qpair structure.
125 	 */
126 	qpair = TAILQ_FIRST(&ctrlr->free_io_qpairs);
127 	if (qpair == NULL) {
128 		/* No free queue IDs */
129 		nvme_mutex_unlock(&ctrlr->ctrlr_lock);
130 		return NULL;
131 	}
132 
133 	/*
134 	 * At this point, qpair contains a preallocated submission and completion queue and a
135 	 *  unique queue ID, but it is not yet created on the controller.
136 	 *
137 	 * Fill out the submission queue priority and send out the Create I/O Queue commands.
138 	 */
139 	qpair->qprio = qprio;
140 	if (spdk_nvme_ctrlr_create_qpair(ctrlr, qpair) != 0) {
141 		/*
142 		 * spdk_nvme_ctrlr_create_qpair() failed, so the qpair structure is still unused.
143 		 * Exit here so we don't insert it into the active_io_qpairs list.
144 		 */
145 		nvme_mutex_unlock(&ctrlr->ctrlr_lock);
146 		return NULL;
147 	}
148 	TAILQ_REMOVE(&ctrlr->free_io_qpairs, qpair, tailq);
149 	TAILQ_INSERT_TAIL(&ctrlr->active_io_qpairs, qpair, tailq);
150 
151 	nvme_mutex_unlock(&ctrlr->ctrlr_lock);
152 
153 	return qpair;
154 }
155 
156 int
157 spdk_nvme_ctrlr_free_io_qpair(struct spdk_nvme_qpair *qpair)
158 {
159 	struct spdk_nvme_ctrlr *ctrlr;
160 	struct nvme_completion_poll_status status;
161 	int rc;
162 
163 	if (qpair == NULL) {
164 		return 0;
165 	}
166 
167 	ctrlr = qpair->ctrlr;
168 
169 	nvme_mutex_lock(&ctrlr->ctrlr_lock);
170 
171 	/* Delete the I/O submission queue and then the completion queue */
172 
173 	status.done = false;
174 	rc = nvme_ctrlr_cmd_delete_io_sq(ctrlr, qpair, nvme_completion_poll_cb, &status);
175 	if (rc != 0) {
176 		nvme_mutex_unlock(&ctrlr->ctrlr_lock);
177 		return rc;
178 	}
179 	while (status.done == false) {
180 		spdk_nvme_qpair_process_completions(&ctrlr->adminq, 0);
181 	}
182 	if (spdk_nvme_cpl_is_error(&status.cpl)) {
183 		nvme_mutex_unlock(&ctrlr->ctrlr_lock);
184 		return -1;
185 	}
186 
187 	status.done = false;
188 	rc = nvme_ctrlr_cmd_delete_io_cq(ctrlr, qpair, nvme_completion_poll_cb, &status);
189 	if (rc != 0) {
190 		nvme_mutex_unlock(&ctrlr->ctrlr_lock);
191 		return rc;
192 	}
193 	while (status.done == false) {
194 		spdk_nvme_qpair_process_completions(&ctrlr->adminq, 0);
195 	}
196 	if (spdk_nvme_cpl_is_error(&status.cpl)) {
197 		nvme_mutex_unlock(&ctrlr->ctrlr_lock);
198 		return -1;
199 	}
200 
201 	TAILQ_REMOVE(&ctrlr->active_io_qpairs, qpair, tailq);
202 	TAILQ_INSERT_HEAD(&ctrlr->free_io_qpairs, qpair, tailq);
203 
204 	nvme_mutex_unlock(&ctrlr->ctrlr_lock);
205 	return 0;
206 }
207 
208 static void
209 nvme_ctrlr_construct_intel_support_log_page_list(struct spdk_nvme_ctrlr *ctrlr,
210 		struct spdk_nvme_intel_log_page_directory *log_page_directory)
211 {
212 	struct spdk_pci_device *dev;
213 	struct pci_id pci_id;
214 
215 	if (ctrlr->cdata.vid != SPDK_PCI_VID_INTEL || log_page_directory == NULL)
216 		return;
217 
218 	dev = ctrlr->devhandle;
219 	pci_id.vendor_id = spdk_pci_device_get_vendor_id(dev);
220 	pci_id.dev_id = spdk_pci_device_get_device_id(dev);
221 	pci_id.sub_vendor_id = spdk_pci_device_get_subvendor_id(dev);
222 	pci_id.sub_dev_id = spdk_pci_device_get_subdevice_id(dev);
223 
224 	ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_PAGE_DIRECTORY] = true;
225 
226 	if (log_page_directory->read_latency_log_len ||
227 	    nvme_intel_has_quirk(&pci_id, NVME_INTEL_QUIRK_READ_LATENCY)) {
228 		ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_READ_CMD_LATENCY] = true;
229 	}
230 	if (log_page_directory->write_latency_log_len ||
231 	    nvme_intel_has_quirk(&pci_id, NVME_INTEL_QUIRK_WRITE_LATENCY)) {
232 		ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_WRITE_CMD_LATENCY] = true;
233 	}
234 	if (log_page_directory->temperature_statistics_log_len) {
235 		ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_TEMPERATURE] = true;
236 	}
237 	if (log_page_directory->smart_log_len) {
238 		ctrlr->log_page_supported[SPDK_NVME_INTEL_LOG_SMART] = true;
239 	}
240 	if (log_page_directory->marketing_description_log_len) {
241 		ctrlr->log_page_supported[SPDK_NVME_INTEL_MARKETING_DESCRIPTION] = true;
242 	}
243 }
244 
245 static int nvme_ctrlr_set_intel_support_log_pages(struct spdk_nvme_ctrlr *ctrlr)
246 {
247 	uint64_t phys_addr = 0;
248 	struct nvme_completion_poll_status	status;
249 	struct spdk_nvme_intel_log_page_directory *log_page_directory;
250 
251 	log_page_directory = nvme_malloc("nvme_log_page_directory",
252 					 sizeof(struct spdk_nvme_intel_log_page_directory),
253 					 64, &phys_addr);
254 	if (log_page_directory == NULL) {
255 		nvme_printf(NULL, "could not allocate log_page_directory\n");
256 		return -ENXIO;
257 	}
258 
259 	status.done = false;
260 	spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_INTEL_LOG_PAGE_DIRECTORY, SPDK_NVME_GLOBAL_NS_TAG,
261 					 log_page_directory, sizeof(struct spdk_nvme_intel_log_page_directory),
262 					 nvme_completion_poll_cb,
263 					 &status);
264 	while (status.done == false) {
265 		spdk_nvme_qpair_process_completions(&ctrlr->adminq, 0);
266 	}
267 	if (spdk_nvme_cpl_is_error(&status.cpl)) {
268 		nvme_free(log_page_directory);
269 		nvme_printf(ctrlr, "nvme_ctrlr_cmd_get_log_page failed!\n");
270 		return -ENXIO;
271 	}
272 
273 	nvme_ctrlr_construct_intel_support_log_page_list(ctrlr, log_page_directory);
274 	nvme_free(log_page_directory);
275 	return 0;
276 }
277 
278 static void
279 nvme_ctrlr_set_supported_log_pages(struct spdk_nvme_ctrlr *ctrlr)
280 {
281 	memset(ctrlr->log_page_supported, 0, sizeof(ctrlr->log_page_supported));
282 	/* Mandatory pages */
283 	ctrlr->log_page_supported[SPDK_NVME_LOG_ERROR] = true;
284 	ctrlr->log_page_supported[SPDK_NVME_LOG_HEALTH_INFORMATION] = true;
285 	ctrlr->log_page_supported[SPDK_NVME_LOG_FIRMWARE_SLOT] = true;
286 	if (ctrlr->cdata.lpa.celp) {
287 		ctrlr->log_page_supported[SPDK_NVME_LOG_COMMAND_EFFECTS_LOG] = true;
288 	}
289 	if (ctrlr->cdata.vid == SPDK_PCI_VID_INTEL) {
290 		nvme_ctrlr_set_intel_support_log_pages(ctrlr);
291 	}
292 }
293 
294 static void
295 nvme_ctrlr_set_intel_supported_features(struct spdk_nvme_ctrlr *ctrlr)
296 {
297 	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_MAX_LBA] = true;
298 	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_NATIVE_MAX_LBA] = true;
299 	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_POWER_GOVERNOR_SETTING] = true;
300 	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_SMBUS_ADDRESS] = true;
301 	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_LED_PATTERN] = true;
302 	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_RESET_TIMED_WORKLOAD_COUNTERS] = true;
303 	ctrlr->feature_supported[SPDK_NVME_INTEL_FEAT_LATENCY_TRACKING] = true;
304 }
305 
306 static void
307 nvme_ctrlr_set_supported_features(struct spdk_nvme_ctrlr *ctrlr)
308 {
309 	memset(ctrlr->feature_supported, 0, sizeof(ctrlr->feature_supported));
310 	/* Mandatory features */
311 	ctrlr->feature_supported[SPDK_NVME_FEAT_ARBITRATION] = true;
312 	ctrlr->feature_supported[SPDK_NVME_FEAT_POWER_MANAGEMENT] = true;
313 	ctrlr->feature_supported[SPDK_NVME_FEAT_TEMPERATURE_THRESHOLD] = true;
314 	ctrlr->feature_supported[SPDK_NVME_FEAT_ERROR_RECOVERY] = true;
315 	ctrlr->feature_supported[SPDK_NVME_FEAT_NUMBER_OF_QUEUES] = true;
316 	ctrlr->feature_supported[SPDK_NVME_FEAT_INTERRUPT_COALESCING] = true;
317 	ctrlr->feature_supported[SPDK_NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION] = true;
318 	ctrlr->feature_supported[SPDK_NVME_FEAT_WRITE_ATOMICITY] = true;
319 	ctrlr->feature_supported[SPDK_NVME_FEAT_ASYNC_EVENT_CONFIGURATION] = true;
320 	/* Optional features */
321 	if (ctrlr->cdata.vwc.present) {
322 		ctrlr->feature_supported[SPDK_NVME_FEAT_VOLATILE_WRITE_CACHE] = true;
323 	}
324 	if (ctrlr->cdata.apsta.supported) {
325 		ctrlr->feature_supported[SPDK_NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION] = true;
326 	}
327 	if (ctrlr->cdata.hmpre) {
328 		ctrlr->feature_supported[SPDK_NVME_FEAT_HOST_MEM_BUFFER] = true;
329 	}
330 	if (ctrlr->cdata.vid == SPDK_PCI_VID_INTEL) {
331 		nvme_ctrlr_set_intel_supported_features(ctrlr);
332 	}
333 }
334 
335 static int
336 nvme_ctrlr_construct_admin_qpair(struct spdk_nvme_ctrlr *ctrlr)
337 {
338 	return nvme_qpair_construct(&ctrlr->adminq,
339 				    0, /* qpair ID */
340 				    NVME_ADMIN_ENTRIES,
341 				    NVME_ADMIN_TRACKERS,
342 				    ctrlr);
343 }
344 
345 static int
346 nvme_ctrlr_construct_io_qpairs(struct spdk_nvme_ctrlr *ctrlr)
347 {
348 	struct spdk_nvme_qpair		*qpair;
349 	union spdk_nvme_cap_register	cap;
350 	uint32_t			i, num_entries, num_trackers;
351 	int				rc;
352 
353 	if (ctrlr->ioq != NULL) {
354 		/*
355 		 * io_qpairs were already constructed, so just return.
356 		 *  This typically happens when the controller is
357 		 *  initialized a second (or subsequent) time after a
358 		 *  controller reset.
359 		 */
360 		return 0;
361 	}
362 
363 	/*
364 	 * NVMe spec sets a hard limit of 64K max entries, but
365 	 *  devices may specify a smaller limit, so we need to check
366 	 *  the MQES field in the capabilities register.
367 	 */
368 	cap.raw = nvme_mmio_read_8(ctrlr, cap.raw);
369 	num_entries = nvme_min(NVME_IO_ENTRIES, cap.bits.mqes + 1);
370 
371 	/*
372 	 * No need to have more trackers than entries in the submit queue.
373 	 *  Note also that for a queue size of N, we can only have (N-1)
374 	 *  commands outstanding, hence the "-1" here.
375 	 */
376 	num_trackers = nvme_min(NVME_IO_TRACKERS, (num_entries - 1));
377 
378 	ctrlr->max_xfer_size = NVME_MAX_XFER_SIZE;
379 
380 	ctrlr->ioq = calloc(ctrlr->opts.num_io_queues, sizeof(struct spdk_nvme_qpair));
381 
382 	if (ctrlr->ioq == NULL)
383 		return -1;
384 
385 	for (i = 0; i < ctrlr->opts.num_io_queues; i++) {
386 		qpair = &ctrlr->ioq[i];
387 
388 		/*
389 		 * Admin queue has ID=0. IO queues start at ID=1 -
390 		 *  hence the 'i+1' here.
391 		 */
392 		rc = nvme_qpair_construct(qpair,
393 					  i + 1, /* qpair ID */
394 					  num_entries,
395 					  num_trackers,
396 					  ctrlr);
397 		if (rc)
398 			return -1;
399 
400 		TAILQ_INSERT_TAIL(&ctrlr->free_io_qpairs, qpair, tailq);
401 	}
402 
403 	return 0;
404 }
405 
406 static void
407 nvme_ctrlr_fail(struct spdk_nvme_ctrlr *ctrlr)
408 {
409 	uint32_t i;
410 
411 	ctrlr->is_failed = true;
412 	nvme_qpair_fail(&ctrlr->adminq);
413 	if (ctrlr->ioq) {
414 		for (i = 0; i < ctrlr->opts.num_io_queues; i++) {
415 			nvme_qpair_fail(&ctrlr->ioq[i]);
416 		}
417 	}
418 }
419 
420 static void
421 nvme_ctrlr_shutdown(struct spdk_nvme_ctrlr *ctrlr)
422 {
423 	union spdk_nvme_cc_register	cc;
424 	union spdk_nvme_csts_register	csts;
425 	int				ms_waited = 0;
426 
427 	cc.raw = nvme_mmio_read_4(ctrlr, cc.raw);
428 	cc.bits.shn = SPDK_NVME_SHN_NORMAL;
429 	nvme_mmio_write_4(ctrlr, cc.raw, cc.raw);
430 
431 	csts.raw = nvme_mmio_read_4(ctrlr, csts.raw);
432 	/*
433 	 * The NVMe spec does not define a timeout period
434 	 *  for shutdown notification, so we just pick
435 	 *  5 seconds as a reasonable amount of time to
436 	 *  wait before proceeding.
437 	 */
438 	while (csts.bits.shst != SPDK_NVME_SHST_COMPLETE) {
439 		nvme_delay(1000);
440 		csts.raw = nvme_mmio_read_4(ctrlr, csts.raw);
441 		if (ms_waited++ >= 5000)
442 			break;
443 	}
444 	if (csts.bits.shst != SPDK_NVME_SHST_COMPLETE)
445 		nvme_printf(ctrlr, "did not shutdown within 5 seconds\n");
446 }
447 
448 static int
449 nvme_ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr)
450 {
451 	union spdk_nvme_cc_register	cc;
452 	union spdk_nvme_aqa_register	aqa;
453 	union spdk_nvme_cap_register	cap;
454 
455 	cc.raw = nvme_mmio_read_4(ctrlr, cc.raw);
456 
457 	if (cc.bits.en != 0) {
458 		nvme_printf(ctrlr, "%s called with CC.EN = 1\n", __func__);
459 		return -EINVAL;
460 	}
461 
462 	nvme_mmio_write_8(ctrlr, asq, ctrlr->adminq.cmd_bus_addr);
463 	nvme_mmio_write_8(ctrlr, acq, ctrlr->adminq.cpl_bus_addr);
464 
465 	aqa.raw = 0;
466 	/* acqs and asqs are 0-based. */
467 	aqa.bits.acqs = ctrlr->adminq.num_entries - 1;
468 	aqa.bits.asqs = ctrlr->adminq.num_entries - 1;
469 	nvme_mmio_write_4(ctrlr, aqa.raw, aqa.raw);
470 
471 	cc.bits.en = 1;
472 	cc.bits.css = 0;
473 	cc.bits.shn = 0;
474 	cc.bits.iosqes = 6; /* SQ entry size == 64 == 2^6 */
475 	cc.bits.iocqes = 4; /* CQ entry size == 16 == 2^4 */
476 
477 	/* Page size is 2 ^ (12 + mps). */
478 	cc.bits.mps = nvme_u32log2(PAGE_SIZE) - 12;
479 
480 	cap.raw = nvme_mmio_read_8(ctrlr, cap.raw);
481 
482 	switch (ctrlr->opts.arb_mechanism) {
483 	case SPDK_NVME_CC_AMS_RR:
484 		break;
485 	case SPDK_NVME_CC_AMS_WRR:
486 		if (SPDK_NVME_CAP_AMS_WRR & cap.bits.ams) {
487 			break;
488 		}
489 		return -EINVAL;
490 	case SPDK_NVME_CC_AMS_VS:
491 		if (SPDK_NVME_CAP_AMS_VS & cap.bits.ams) {
492 			break;
493 		}
494 		return -EINVAL;
495 	default:
496 		return -EINVAL;
497 	}
498 
499 	cc.bits.ams = ctrlr->opts.arb_mechanism;
500 
501 	nvme_mmio_write_4(ctrlr, cc.raw, cc.raw);
502 
503 	return 0;
504 }
505 
506 static void
507 nvme_ctrlr_set_state(struct spdk_nvme_ctrlr *ctrlr, enum nvme_ctrlr_state state,
508 		     uint64_t timeout_in_ms)
509 {
510 	ctrlr->state = state;
511 	if (timeout_in_ms == NVME_TIMEOUT_INFINITE) {
512 		ctrlr->state_timeout_tsc = NVME_TIMEOUT_INFINITE;
513 	} else {
514 		ctrlr->state_timeout_tsc = nvme_get_tsc() + (timeout_in_ms * nvme_get_tsc_hz()) / 1000;
515 	}
516 }
517 
518 int
519 spdk_nvme_ctrlr_reset(struct spdk_nvme_ctrlr *ctrlr)
520 {
521 	int rc = 0;
522 	uint32_t i;
523 	struct spdk_nvme_qpair *qpair;
524 
525 	nvme_mutex_lock(&ctrlr->ctrlr_lock);
526 
527 	if (ctrlr->is_resetting || ctrlr->is_failed) {
528 		/*
529 		 * Controller is already resetting or has failed.  Return
530 		 *  immediately since there is no need to kick off another
531 		 *  reset in these cases.
532 		 */
533 		nvme_mutex_unlock(&ctrlr->ctrlr_lock);
534 		return 0;
535 	}
536 
537 	ctrlr->is_resetting = true;
538 
539 	nvme_printf(ctrlr, "resetting controller\n");
540 
541 	/* Disable all queues before disabling the controller hardware. */
542 	nvme_qpair_disable(&ctrlr->adminq);
543 	for (i = 0; i < ctrlr->opts.num_io_queues; i++) {
544 		nvme_qpair_disable(&ctrlr->ioq[i]);
545 	}
546 
547 	/* Set the state back to INIT to cause a full hardware reset. */
548 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, NVME_TIMEOUT_INFINITE);
549 
550 	while (ctrlr->state != NVME_CTRLR_STATE_READY) {
551 		if (nvme_ctrlr_process_init(ctrlr) != 0) {
552 			nvme_printf(ctrlr, "%s: controller reinitialization failed\n", __func__);
553 			nvme_ctrlr_fail(ctrlr);
554 			rc = -1;
555 			break;
556 		}
557 	}
558 
559 	if (!ctrlr->is_failed) {
560 		/* Reinitialize qpairs */
561 		TAILQ_FOREACH(qpair, &ctrlr->active_io_qpairs, tailq) {
562 			if (spdk_nvme_ctrlr_create_qpair(ctrlr, qpair) != 0) {
563 				nvme_ctrlr_fail(ctrlr);
564 				rc = -1;
565 			}
566 		}
567 	}
568 
569 	ctrlr->is_resetting = false;
570 
571 	nvme_mutex_unlock(&ctrlr->ctrlr_lock);
572 
573 	return rc;
574 }
575 
576 static int
577 nvme_ctrlr_identify(struct spdk_nvme_ctrlr *ctrlr)
578 {
579 	struct nvme_completion_poll_status	status;
580 	int					rc;
581 
582 	status.done = false;
583 	rc = nvme_ctrlr_cmd_identify_controller(ctrlr, &ctrlr->cdata,
584 						nvme_completion_poll_cb, &status);
585 	if (rc != 0) {
586 		return rc;
587 	}
588 
589 	while (status.done == false) {
590 		spdk_nvme_qpair_process_completions(&ctrlr->adminq, 0);
591 	}
592 	if (spdk_nvme_cpl_is_error(&status.cpl)) {
593 		nvme_printf(ctrlr, "nvme_identify_controller failed!\n");
594 		return -ENXIO;
595 	}
596 
597 	/*
598 	 * Use MDTS to ensure our default max_xfer_size doesn't exceed what the
599 	 *  controller supports.
600 	 */
601 	if (ctrlr->cdata.mdts > 0) {
602 		ctrlr->max_xfer_size = nvme_min(ctrlr->max_xfer_size,
603 						ctrlr->min_page_size * (1 << (ctrlr->cdata.mdts)));
604 	}
605 
606 	return 0;
607 }
608 
609 static int
610 nvme_ctrlr_set_num_qpairs(struct spdk_nvme_ctrlr *ctrlr)
611 {
612 	struct nvme_completion_poll_status	status;
613 	int					cq_allocated, sq_allocated;
614 	int					rc;
615 
616 	status.done = false;
617 
618 	if (ctrlr->opts.num_io_queues > SPDK_NVME_MAX_IO_QUEUES) {
619 		nvme_printf(ctrlr, "Limiting requested num_io_queues %u to max %d\n",
620 			    ctrlr->opts.num_io_queues, SPDK_NVME_MAX_IO_QUEUES);
621 		ctrlr->opts.num_io_queues = SPDK_NVME_MAX_IO_QUEUES;
622 	} else if (ctrlr->opts.num_io_queues < 1) {
623 		nvme_printf(ctrlr, "Requested num_io_queues 0, increasing to 1\n");
624 		ctrlr->opts.num_io_queues = 1;
625 	}
626 
627 	rc = nvme_ctrlr_cmd_set_num_queues(ctrlr, ctrlr->opts.num_io_queues,
628 					   nvme_completion_poll_cb, &status);
629 	if (rc != 0) {
630 		return rc;
631 	}
632 
633 	while (status.done == false) {
634 		spdk_nvme_qpair_process_completions(&ctrlr->adminq, 0);
635 	}
636 	if (spdk_nvme_cpl_is_error(&status.cpl)) {
637 		nvme_printf(ctrlr, "nvme_set_num_queues failed!\n");
638 		return -ENXIO;
639 	}
640 
641 	/*
642 	 * Data in cdw0 is 0-based.
643 	 * Lower 16-bits indicate number of submission queues allocated.
644 	 * Upper 16-bits indicate number of completion queues allocated.
645 	 */
646 	sq_allocated = (status.cpl.cdw0 & 0xFFFF) + 1;
647 	cq_allocated = (status.cpl.cdw0 >> 16) + 1;
648 
649 	ctrlr->opts.num_io_queues = nvme_min(sq_allocated, cq_allocated);
650 
651 	return 0;
652 }
653 
654 static void
655 nvme_ctrlr_destruct_namespaces(struct spdk_nvme_ctrlr *ctrlr)
656 {
657 	if (ctrlr->ns) {
658 		uint32_t i, num_ns = ctrlr->num_ns;
659 
660 		for (i = 0; i < num_ns; i++) {
661 			nvme_ns_destruct(&ctrlr->ns[i]);
662 		}
663 
664 		free(ctrlr->ns);
665 		ctrlr->ns = NULL;
666 		ctrlr->num_ns = 0;
667 	}
668 
669 	if (ctrlr->nsdata) {
670 		nvme_free(ctrlr->nsdata);
671 		ctrlr->nsdata = NULL;
672 	}
673 }
674 
675 static int
676 nvme_ctrlr_construct_namespaces(struct spdk_nvme_ctrlr *ctrlr)
677 {
678 	uint32_t i, nn = ctrlr->cdata.nn;
679 	uint64_t phys_addr = 0;
680 
681 	if (nn == 0) {
682 		nvme_printf(ctrlr, "controller has 0 namespaces\n");
683 		return -1;
684 	}
685 
686 	/* ctrlr->num_ns may be 0 (startup) or a different number of namespaces (reset),
687 	 * so check if we need to reallocate.
688 	 */
689 	if (nn != ctrlr->num_ns) {
690 		nvme_ctrlr_destruct_namespaces(ctrlr);
691 
692 		ctrlr->ns = calloc(nn, sizeof(struct spdk_nvme_ns));
693 		if (ctrlr->ns == NULL) {
694 			goto fail;
695 		}
696 
697 		ctrlr->nsdata = nvme_malloc("nvme_namespaces",
698 					    nn * sizeof(struct spdk_nvme_ns_data), 64,
699 					    &phys_addr);
700 		if (ctrlr->nsdata == NULL) {
701 			goto fail;
702 		}
703 
704 		ctrlr->num_ns = nn;
705 	}
706 
707 	for (i = 0; i < nn; i++) {
708 		struct spdk_nvme_ns	*ns = &ctrlr->ns[i];
709 		uint32_t 		nsid = i + 1;
710 
711 		if (nvme_ns_construct(ns, nsid, ctrlr) != 0) {
712 			goto fail;
713 		}
714 	}
715 
716 	return 0;
717 
718 fail:
719 	nvme_ctrlr_destruct_namespaces(ctrlr);
720 	return -1;
721 }
722 
723 static void
724 nvme_ctrlr_async_event_cb(void *arg, const struct spdk_nvme_cpl *cpl)
725 {
726 	struct nvme_async_event_request	*aer = arg;
727 	struct spdk_nvme_ctrlr		*ctrlr = aer->ctrlr;
728 
729 	if (cpl->status.sc == SPDK_NVME_SC_ABORTED_SQ_DELETION) {
730 		/*
731 		 *  This is simulated when controller is being shut down, to
732 		 *  effectively abort outstanding asynchronous event requests
733 		 *  and make sure all memory is freed.  Do not repost the
734 		 *  request in this case.
735 		 */
736 		return;
737 	}
738 
739 	if (ctrlr->aer_cb_fn != NULL) {
740 		ctrlr->aer_cb_fn(ctrlr->aer_cb_arg, cpl);
741 	}
742 
743 	/*
744 	 * Repost another asynchronous event request to replace the one
745 	 *  that just completed.
746 	 */
747 	if (nvme_ctrlr_construct_and_submit_aer(ctrlr, aer)) {
748 		/*
749 		 * We can't do anything to recover from a failure here,
750 		 * so just print a warning message and leave the AER unsubmitted.
751 		 */
752 		nvme_printf(ctrlr, "resubmitting AER failed!\n");
753 	}
754 }
755 
756 static int
757 nvme_ctrlr_construct_and_submit_aer(struct spdk_nvme_ctrlr *ctrlr,
758 				    struct nvme_async_event_request *aer)
759 {
760 	struct nvme_request *req;
761 
762 	aer->ctrlr = ctrlr;
763 	req = nvme_allocate_request_null(nvme_ctrlr_async_event_cb, aer);
764 	aer->req = req;
765 	if (req == NULL) {
766 		return -1;
767 	}
768 
769 	req->cmd.opc = SPDK_NVME_OPC_ASYNC_EVENT_REQUEST;
770 	return nvme_ctrlr_submit_admin_request(ctrlr, req);
771 }
772 
773 static int
774 nvme_ctrlr_configure_aer(struct spdk_nvme_ctrlr *ctrlr)
775 {
776 	union spdk_nvme_critical_warning_state	state;
777 	struct nvme_async_event_request		*aer;
778 	uint32_t				i;
779 	struct nvme_completion_poll_status	status;
780 	int					rc;
781 
782 	status.done = false;
783 
784 	state.raw = 0xFF;
785 	state.bits.reserved = 0;
786 	rc = nvme_ctrlr_cmd_set_async_event_config(ctrlr, state, nvme_completion_poll_cb, &status);
787 	if (rc != 0) {
788 		return rc;
789 	}
790 
791 	while (status.done == false) {
792 		spdk_nvme_qpair_process_completions(&ctrlr->adminq, 0);
793 	}
794 	if (spdk_nvme_cpl_is_error(&status.cpl)) {
795 		nvme_printf(ctrlr, "nvme_ctrlr_cmd_set_async_event_config failed!\n");
796 		return -ENXIO;
797 	}
798 
799 	/* aerl is a zero-based value, so we need to add 1 here. */
800 	ctrlr->num_aers = nvme_min(NVME_MAX_ASYNC_EVENTS, (ctrlr->cdata.aerl + 1));
801 
802 	for (i = 0; i < ctrlr->num_aers; i++) {
803 		aer = &ctrlr->aer[i];
804 		if (nvme_ctrlr_construct_and_submit_aer(ctrlr, aer)) {
805 			nvme_printf(ctrlr, "nvme_ctrlr_construct_and_submit_aer failed!\n");
806 			return -1;
807 		}
808 	}
809 
810 	return 0;
811 }
812 
813 /**
814  * This function will be called repeatedly during initialization until the controller is ready.
815  */
816 int
817 nvme_ctrlr_process_init(struct spdk_nvme_ctrlr *ctrlr)
818 {
819 	union spdk_nvme_cc_register cc;
820 	union spdk_nvme_csts_register csts;
821 	union spdk_nvme_cap_register cap;
822 	uint32_t ready_timeout_in_ms;
823 	int rc;
824 
825 	cc.raw = nvme_mmio_read_4(ctrlr, cc.raw);
826 	csts.raw = nvme_mmio_read_4(ctrlr, csts.raw);
827 	cap.raw = nvme_mmio_read_8(ctrlr, cap.raw);
828 
829 	ready_timeout_in_ms = 500 * cap.bits.to;
830 
831 	/*
832 	 * Check if the current initialization step is done or has timed out.
833 	 */
834 	switch (ctrlr->state) {
835 	case NVME_CTRLR_STATE_INIT:
836 		/* Begin the hardware initialization by making sure the controller is disabled. */
837 		if (cc.bits.en) {
838 			/*
839 			 * Controller is currently enabled. We need to disable it to cause a reset.
840 			 *
841 			 * If CC.EN = 1 && CSTS.RDY = 0, the controller is in the process of becoming ready.
842 			 *  Wait for the ready bit to be 1 before disabling the controller.
843 			 */
844 			if (csts.bits.rdy == 0) {
845 				nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1, ready_timeout_in_ms);
846 				return 0;
847 			}
848 
849 			/* CC.EN = 1 && CSTS.RDY == 1, so we can immediately disable the controller. */
850 			cc.bits.en = 0;
851 			nvme_mmio_write_4(ctrlr, cc.raw, cc.raw);
852 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, ready_timeout_in_ms);
853 			return 0;
854 		} else {
855 			if (csts.bits.rdy == 1) {
856 				/*
857 				 * Controller is in the process of shutting down.
858 				 * We need to wait for RDY to become 0.
859 				 */
860 				nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, ready_timeout_in_ms);
861 				return 0;
862 			}
863 
864 			/*
865 			 * Controller is currently disabled. We can jump straight to enabling it.
866 			 */
867 			rc = nvme_ctrlr_enable(ctrlr);
868 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1, ready_timeout_in_ms);
869 			return rc;
870 		}
871 		break;
872 
873 	case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1:
874 		if (csts.bits.rdy == 1) {
875 			/* CC.EN = 1 && CSTS.RDY = 1, so we can set CC.EN = 0 now. */
876 			cc.bits.en = 0;
877 			nvme_mmio_write_4(ctrlr, cc.raw, cc.raw);
878 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, ready_timeout_in_ms);
879 			return 0;
880 		}
881 		break;
882 
883 	case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0:
884 		if (csts.bits.rdy == 0) {
885 			/* CC.EN = 0 && CSTS.RDY = 0, so we can enable the controller now. */
886 			rc = nvme_ctrlr_enable(ctrlr);
887 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1, ready_timeout_in_ms);
888 			return rc;
889 		}
890 		break;
891 
892 	case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1:
893 		if (csts.bits.rdy == 1) {
894 			/*
895 			 * The controller has been enabled.
896 			 *  Perform the rest of initialization in nvme_ctrlr_start() serially.
897 			 */
898 			rc = nvme_ctrlr_start(ctrlr);
899 			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE);
900 			return rc;
901 		}
902 		break;
903 
904 	default:
905 		nvme_assert(0, ("unhandled ctrlr state %d\n", ctrlr->state));
906 		nvme_ctrlr_fail(ctrlr);
907 		return -1;
908 	}
909 
910 	if (ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE &&
911 	    nvme_get_tsc() > ctrlr->state_timeout_tsc) {
912 		nvme_printf(ctrlr, "Initialization timed out in state %d\n", ctrlr->state);
913 		nvme_ctrlr_fail(ctrlr);
914 		return -1;
915 	}
916 
917 	return 0;
918 }
919 
920 int
921 nvme_ctrlr_start(struct spdk_nvme_ctrlr *ctrlr)
922 {
923 	nvme_qpair_reset(&ctrlr->adminq);
924 
925 	nvme_qpair_enable(&ctrlr->adminq);
926 
927 	if (nvme_ctrlr_identify(ctrlr) != 0) {
928 		return -1;
929 	}
930 
931 	if (nvme_ctrlr_set_num_qpairs(ctrlr) != 0) {
932 		return -1;
933 	}
934 
935 	if (nvme_ctrlr_construct_io_qpairs(ctrlr)) {
936 		return -1;
937 	}
938 
939 	if (nvme_ctrlr_construct_namespaces(ctrlr) != 0) {
940 		return -1;
941 	}
942 
943 	if (nvme_ctrlr_configure_aer(ctrlr) != 0) {
944 		return -1;
945 	}
946 
947 	nvme_ctrlr_set_supported_log_pages(ctrlr);
948 	nvme_ctrlr_set_supported_features(ctrlr);
949 
950 	if (ctrlr->cdata.sgls.supported) {
951 		ctrlr->flags |= SPDK_NVME_CTRLR_SGL_SUPPORTED;
952 	}
953 
954 	return 0;
955 }
956 
957 static void
958 nvme_ctrlr_map_cmb(struct spdk_nvme_ctrlr *ctrlr)
959 {
960 	int rc;
961 	void *addr;
962 	uint32_t bir;
963 	union spdk_nvme_cmbsz_register cmbsz;
964 	union spdk_nvme_cmbloc_register cmbloc;
965 	uint64_t size, unit_size, offset, bar_size, bar_phys_addr;
966 
967 	cmbsz.raw = nvme_mmio_read_4(ctrlr, cmbsz.raw);
968 	cmbloc.raw = nvme_mmio_read_4(ctrlr, cmbloc.raw);
969 	if (!cmbsz.bits.sz)
970 		goto exit;
971 
972 	bir = cmbloc.bits.bir;
973 	/* Values 0 2 3 4 5 are valid for BAR */
974 	if (bir > 5 || bir == 1)
975 		goto exit;
976 
977 	/* unit size for 4KB/64KB/1MB/16MB/256MB/4GB/64GB */
978 	unit_size = (uint64_t)1 << (12 + 4 * cmbsz.bits.szu);
979 	/* controller memory buffer size in Bytes */
980 	size = unit_size * cmbsz.bits.sz;
981 	/* controller memory buffer offset from BAR in Bytes */
982 	offset = unit_size * cmbloc.bits.ofst;
983 
984 	nvme_pcicfg_get_bar_addr_len(ctrlr->devhandle, bir, &bar_phys_addr, &bar_size);
985 
986 	if (offset > bar_size)
987 		goto exit;
988 
989 	if (size > bar_size - offset)
990 		goto exit;
991 
992 	rc = nvme_pcicfg_map_bar_write_combine(ctrlr->devhandle, bir, &addr);
993 	if ((rc != 0) || addr == NULL)
994 		goto exit;
995 
996 	ctrlr->cmb_bar_virt_addr = addr;
997 	ctrlr->cmb_bar_phys_addr = bar_phys_addr;
998 	ctrlr->cmb_size = size;
999 	ctrlr->cmb_current_offset = offset;
1000 
1001 	if (!cmbsz.bits.sqs) {
1002 		ctrlr->opts.use_cmb_sqs = false;
1003 	}
1004 
1005 	return;
1006 exit:
1007 	ctrlr->cmb_bar_virt_addr = NULL;
1008 	ctrlr->opts.use_cmb_sqs = false;
1009 	return;
1010 }
1011 
1012 static int
1013 nvme_ctrlr_unmap_cmb(struct spdk_nvme_ctrlr *ctrlr)
1014 {
1015 	int rc = 0;
1016 	union spdk_nvme_cmbloc_register cmbloc;
1017 	void *addr = ctrlr->cmb_bar_virt_addr;
1018 
1019 	if (addr) {
1020 		cmbloc.raw = nvme_mmio_read_4(ctrlr, cmbloc.raw);
1021 		rc = nvme_pcicfg_unmap_bar(ctrlr->devhandle, cmbloc.bits.bir, addr);
1022 	}
1023 	return rc;
1024 }
1025 
1026 int
1027 nvme_ctrlr_alloc_cmb(struct spdk_nvme_ctrlr *ctrlr, uint64_t length, uint64_t aligned,
1028 		     uint64_t *offset)
1029 {
1030 	uint64_t round_offset;
1031 
1032 	round_offset = ctrlr->cmb_current_offset;
1033 	round_offset = (round_offset + (aligned - 1)) & ~(aligned - 1);
1034 
1035 	if (round_offset + length > ctrlr->cmb_size)
1036 		return -1;
1037 
1038 	*offset = round_offset;
1039 	ctrlr->cmb_current_offset = round_offset + length;
1040 
1041 	return 0;
1042 }
1043 
1044 static int
1045 nvme_ctrlr_allocate_bars(struct spdk_nvme_ctrlr *ctrlr)
1046 {
1047 	int rc;
1048 	void *addr;
1049 
1050 	rc = nvme_pcicfg_map_bar(ctrlr->devhandle, 0, 0 /* writable */, &addr);
1051 	ctrlr->regs = (volatile struct spdk_nvme_registers *)addr;
1052 	if ((ctrlr->regs == NULL) || (rc != 0)) {
1053 		nvme_printf(ctrlr, "pci_device_map_range failed with error code %d\n", rc);
1054 		return -1;
1055 	}
1056 
1057 	nvme_ctrlr_map_cmb(ctrlr);
1058 
1059 	return 0;
1060 }
1061 
1062 static int
1063 nvme_ctrlr_free_bars(struct spdk_nvme_ctrlr *ctrlr)
1064 {
1065 	int rc = 0;
1066 	void *addr = (void *)ctrlr->regs;
1067 
1068 	rc = nvme_ctrlr_unmap_cmb(ctrlr);
1069 	if (rc != 0) {
1070 		nvme_printf(ctrlr, "nvme_ctrlr_unmap_cmb failed with error code %d\n", rc);
1071 		return -1;
1072 	}
1073 
1074 	if (addr) {
1075 		rc = nvme_pcicfg_unmap_bar(ctrlr->devhandle, 0, addr);
1076 	}
1077 	return rc;
1078 }
1079 
1080 int
1081 nvme_ctrlr_construct(struct spdk_nvme_ctrlr *ctrlr, void *devhandle)
1082 {
1083 	union spdk_nvme_cap_register	cap;
1084 	uint32_t			cmd_reg;
1085 	int				status;
1086 	int				rc;
1087 
1088 	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, NVME_TIMEOUT_INFINITE);
1089 	ctrlr->devhandle = devhandle;
1090 	ctrlr->flags = 0;
1091 
1092 	status = nvme_ctrlr_allocate_bars(ctrlr);
1093 	if (status != 0) {
1094 		return status;
1095 	}
1096 
1097 	/* Enable PCI busmaster. */
1098 	nvme_pcicfg_read32(devhandle, &cmd_reg, 4);
1099 	cmd_reg |= 0x4;
1100 	nvme_pcicfg_write32(devhandle, cmd_reg, 4);
1101 
1102 	cap.raw = nvme_mmio_read_8(ctrlr, cap.raw);
1103 
1104 	/* Doorbell stride is 2 ^ (dstrd + 2),
1105 	 * but we want multiples of 4, so drop the + 2 */
1106 	ctrlr->doorbell_stride_u32 = 1 << cap.bits.dstrd;
1107 
1108 	ctrlr->min_page_size = 1 << (12 + cap.bits.mpsmin);
1109 
1110 	rc = nvme_ctrlr_construct_admin_qpair(ctrlr);
1111 	if (rc)
1112 		return rc;
1113 
1114 	ctrlr->is_resetting = false;
1115 	ctrlr->is_failed = false;
1116 
1117 	TAILQ_INIT(&ctrlr->free_io_qpairs);
1118 	TAILQ_INIT(&ctrlr->active_io_qpairs);
1119 
1120 	nvme_mutex_init_recursive(&ctrlr->ctrlr_lock);
1121 
1122 	return 0;
1123 }
1124 
1125 void
1126 nvme_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr)
1127 {
1128 	uint32_t	i;
1129 
1130 	while (!TAILQ_EMPTY(&ctrlr->active_io_qpairs)) {
1131 		struct spdk_nvme_qpair *qpair = TAILQ_FIRST(&ctrlr->active_io_qpairs);
1132 
1133 		spdk_nvme_ctrlr_free_io_qpair(qpair);
1134 	}
1135 
1136 	nvme_ctrlr_shutdown(ctrlr);
1137 
1138 	nvme_ctrlr_destruct_namespaces(ctrlr);
1139 	if (ctrlr->ioq) {
1140 		for (i = 0; i < ctrlr->opts.num_io_queues; i++) {
1141 			nvme_qpair_destroy(&ctrlr->ioq[i]);
1142 		}
1143 	}
1144 
1145 	free(ctrlr->ioq);
1146 
1147 	nvme_qpair_destroy(&ctrlr->adminq);
1148 
1149 	nvme_ctrlr_free_bars(ctrlr);
1150 	nvme_mutex_destroy(&ctrlr->ctrlr_lock);
1151 }
1152 
1153 int
1154 nvme_ctrlr_submit_admin_request(struct spdk_nvme_ctrlr *ctrlr,
1155 				struct nvme_request *req)
1156 {
1157 	return nvme_qpair_submit_request(&ctrlr->adminq, req);
1158 }
1159 
1160 int32_t
1161 spdk_nvme_ctrlr_process_admin_completions(struct spdk_nvme_ctrlr *ctrlr)
1162 {
1163 	int32_t num_completions;
1164 
1165 	nvme_mutex_lock(&ctrlr->ctrlr_lock);
1166 	num_completions = spdk_nvme_qpair_process_completions(&ctrlr->adminq, 0);
1167 	nvme_mutex_unlock(&ctrlr->ctrlr_lock);
1168 
1169 	return num_completions;
1170 }
1171 
1172 const struct spdk_nvme_ctrlr_data *
1173 spdk_nvme_ctrlr_get_data(struct spdk_nvme_ctrlr *ctrlr)
1174 {
1175 	return &ctrlr->cdata;
1176 }
1177 
1178 union spdk_nvme_cap_register spdk_nvme_ctrlr_get_regs_cap(struct spdk_nvme_ctrlr *ctrlr)
1179 {
1180 	union spdk_nvme_cap_register cap;
1181 
1182 	cap.raw = nvme_mmio_read_8(ctrlr, cap.raw);
1183 	return cap;
1184 }
1185 
1186 union spdk_nvme_vs_register spdk_nvme_ctrlr_get_regs_vs(struct spdk_nvme_ctrlr *ctrlr)
1187 {
1188 	union spdk_nvme_vs_register vs;
1189 
1190 	vs.raw = nvme_mmio_read_4(ctrlr, vs.raw);
1191 	return vs;
1192 }
1193 
1194 uint32_t
1195 spdk_nvme_ctrlr_get_num_ns(struct spdk_nvme_ctrlr *ctrlr)
1196 {
1197 	return ctrlr->num_ns;
1198 }
1199 
1200 struct spdk_nvme_ns *
1201 spdk_nvme_ctrlr_get_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t ns_id)
1202 {
1203 	if (ns_id < 1 || ns_id > ctrlr->num_ns) {
1204 		return NULL;
1205 	}
1206 
1207 	return &ctrlr->ns[ns_id - 1];
1208 }
1209 
1210 void
1211 spdk_nvme_ctrlr_register_aer_callback(struct spdk_nvme_ctrlr *ctrlr,
1212 				      spdk_nvme_aer_cb aer_cb_fn,
1213 				      void *aer_cb_arg)
1214 {
1215 	ctrlr->aer_cb_fn = aer_cb_fn;
1216 	ctrlr->aer_cb_arg = aer_cb_arg;
1217 }
1218 
1219 bool
1220 spdk_nvme_ctrlr_is_log_page_supported(struct spdk_nvme_ctrlr *ctrlr, uint8_t log_page)
1221 {
1222 	/* No bounds check necessary, since log_page is uint8_t and log_page_supported has 256 entries */
1223 	SPDK_STATIC_ASSERT(sizeof(ctrlr->log_page_supported) == 256, "log_page_supported size mismatch");
1224 	return ctrlr->log_page_supported[log_page];
1225 }
1226 
1227 bool
1228 spdk_nvme_ctrlr_is_feature_supported(struct spdk_nvme_ctrlr *ctrlr, uint8_t feature_code)
1229 {
1230 	/* No bounds check necessary, since feature_code is uint8_t and feature_supported has 256 entries */
1231 	SPDK_STATIC_ASSERT(sizeof(ctrlr->feature_supported) == 256, "feature_supported size mismatch");
1232 	return ctrlr->feature_supported[feature_code];
1233 }
1234 
1235 int
1236 spdk_nvme_ctrlr_attach_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid,
1237 			  struct spdk_nvme_ctrlr_list *payload)
1238 {
1239 	struct nvme_completion_poll_status	status;
1240 	int					res;
1241 
1242 	status.done = false;
1243 	res = nvme_ctrlr_cmd_attach_ns(ctrlr, nsid, payload,
1244 				       nvme_completion_poll_cb, &status);
1245 	if (res)
1246 		return res;
1247 	while (status.done == false) {
1248 		nvme_mutex_lock(&ctrlr->ctrlr_lock);
1249 		spdk_nvme_qpair_process_completions(&ctrlr->adminq, 0);
1250 		nvme_mutex_unlock(&ctrlr->ctrlr_lock);
1251 	}
1252 	if (spdk_nvme_cpl_is_error(&status.cpl)) {
1253 		nvme_printf(ctrlr, "spdk_nvme_ctrlr_attach_ns failed!\n");
1254 		return -ENXIO;
1255 	}
1256 
1257 	return spdk_nvme_ctrlr_reset(ctrlr);
1258 }
1259 
1260 int
1261 spdk_nvme_ctrlr_detach_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid,
1262 			  struct spdk_nvme_ctrlr_list *payload)
1263 {
1264 	struct nvme_completion_poll_status	status;
1265 	int					res;
1266 
1267 	status.done = false;
1268 	res = nvme_ctrlr_cmd_detach_ns(ctrlr, nsid, payload,
1269 				       nvme_completion_poll_cb, &status);
1270 	if (res)
1271 		return res;
1272 	while (status.done == false) {
1273 		nvme_mutex_lock(&ctrlr->ctrlr_lock);
1274 		spdk_nvme_qpair_process_completions(&ctrlr->adminq, 0);
1275 		nvme_mutex_unlock(&ctrlr->ctrlr_lock);
1276 	}
1277 	if (spdk_nvme_cpl_is_error(&status.cpl)) {
1278 		nvme_printf(ctrlr, "spdk_nvme_ctrlr_detach_ns failed!\n");
1279 		return -ENXIO;
1280 	}
1281 
1282 	return spdk_nvme_ctrlr_reset(ctrlr);
1283 }
1284 
1285 uint32_t
1286 spdk_nvme_ctrlr_create_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns_data *payload)
1287 {
1288 	struct nvme_completion_poll_status	status;
1289 	int					res;
1290 
1291 	status.done = false;
1292 	res = nvme_ctrlr_cmd_create_ns(ctrlr, payload, nvme_completion_poll_cb, &status);
1293 	if (res)
1294 		return 0;
1295 	while (status.done == false) {
1296 		nvme_mutex_lock(&ctrlr->ctrlr_lock);
1297 		spdk_nvme_qpair_process_completions(&ctrlr->adminq, 0);
1298 		nvme_mutex_unlock(&ctrlr->ctrlr_lock);
1299 	}
1300 	if (spdk_nvme_cpl_is_error(&status.cpl)) {
1301 		nvme_printf(ctrlr, "spdk_nvme_ctrlr_create_ns failed!\n");
1302 		return 0;
1303 	}
1304 
1305 	res = spdk_nvme_ctrlr_reset(ctrlr);
1306 	if (res) {
1307 		return 0;
1308 	}
1309 
1310 	/* Return the namespace ID that was created */
1311 	return status.cpl.cdw0;
1312 }
1313 
1314 int
1315 spdk_nvme_ctrlr_delete_ns(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid)
1316 {
1317 	struct nvme_completion_poll_status	status;
1318 	int					res;
1319 
1320 	status.done = false;
1321 	res = nvme_ctrlr_cmd_delete_ns(ctrlr, nsid, nvme_completion_poll_cb, &status);
1322 	if (res)
1323 		return res;
1324 	while (status.done == false) {
1325 		nvme_mutex_lock(&ctrlr->ctrlr_lock);
1326 		spdk_nvme_qpair_process_completions(&ctrlr->adminq, 0);
1327 		nvme_mutex_unlock(&ctrlr->ctrlr_lock);
1328 	}
1329 	if (spdk_nvme_cpl_is_error(&status.cpl)) {
1330 		nvme_printf(ctrlr, "spdk_nvme_ctrlr_delete_ns failed!\n");
1331 		return -ENXIO;
1332 	}
1333 
1334 	return spdk_nvme_ctrlr_reset(ctrlr);
1335 }
1336 
1337 int
1338 spdk_nvme_ctrlr_format(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid,
1339 		       struct spdk_nvme_format *format)
1340 {
1341 	struct nvme_completion_poll_status	status;
1342 	int					res;
1343 
1344 	status.done = false;
1345 	res = nvme_ctrlr_cmd_format(ctrlr, nsid, format, nvme_completion_poll_cb,
1346 				    &status);
1347 	if (res)
1348 		return res;
1349 	while (status.done == false) {
1350 		nvme_mutex_lock(&ctrlr->ctrlr_lock);
1351 		spdk_nvme_qpair_process_completions(&ctrlr->adminq, 0);
1352 		nvme_mutex_unlock(&ctrlr->ctrlr_lock);
1353 	}
1354 	if (spdk_nvme_cpl_is_error(&status.cpl)) {
1355 		nvme_printf(ctrlr, "spdk_nvme_ctrlr_format failed!\n");
1356 		return -ENXIO;
1357 	}
1358 
1359 	return spdk_nvme_ctrlr_reset(ctrlr);
1360 }
1361 
1362 int
1363 spdk_nvme_ctrlr_update_firmware(struct spdk_nvme_ctrlr *ctrlr, void *payload, uint32_t size,
1364 				int slot)
1365 {
1366 	struct spdk_nvme_fw_commit		fw_commit;
1367 	struct nvme_completion_poll_status	status;
1368 	int					res;
1369 	unsigned int				size_remaining;
1370 	unsigned int				offset;
1371 	unsigned int				transfer;
1372 	void					*p;
1373 
1374 	if (size % 4) {
1375 		nvme_printf(ctrlr, "spdk_nvme_ctrlr_update_firmware invalid size!\n");
1376 		return -1;
1377 	}
1378 
1379 	/* Firmware download */
1380 	size_remaining = size;
1381 	offset = 0;
1382 	p = payload;
1383 
1384 	while (size_remaining > 0) {
1385 		transfer = nvme_min(size_remaining, ctrlr->min_page_size);
1386 		status.done = false;
1387 
1388 		res = nvme_ctrlr_cmd_fw_image_download(ctrlr, transfer, offset, p,
1389 						       nvme_completion_poll_cb,
1390 						       &status);
1391 		if (res)
1392 			return res;
1393 
1394 		while (status.done == false) {
1395 			nvme_mutex_lock(&ctrlr->ctrlr_lock);
1396 			spdk_nvme_qpair_process_completions(&ctrlr->adminq, 0);
1397 			nvme_mutex_unlock(&ctrlr->ctrlr_lock);
1398 		}
1399 		if (spdk_nvme_cpl_is_error(&status.cpl)) {
1400 			nvme_printf(ctrlr, "spdk_nvme_ctrlr_fw_image_download failed!\n");
1401 			return -ENXIO;
1402 		}
1403 		p += transfer;
1404 		offset += transfer;
1405 		size_remaining -= transfer;
1406 	}
1407 
1408 	/* Firmware commit */
1409 	memset(&fw_commit, 0, sizeof(struct spdk_nvme_fw_commit));
1410 	fw_commit.fs = slot;
1411 	fw_commit.ca = SPDK_NVME_FW_COMMIT_REPLACE_IMG;
1412 
1413 	status.done = false;
1414 
1415 	res = nvme_ctrlr_cmd_fw_commit(ctrlr, &fw_commit, nvme_completion_poll_cb,
1416 				       &status);
1417 	if (res)
1418 		return res;
1419 
1420 	while (status.done == false) {
1421 		nvme_mutex_lock(&ctrlr->ctrlr_lock);
1422 		spdk_nvme_qpair_process_completions(&ctrlr->adminq, 0);
1423 		nvme_mutex_unlock(&ctrlr->ctrlr_lock);
1424 	}
1425 	if (spdk_nvme_cpl_is_error(&status.cpl)) {
1426 		nvme_printf(ctrlr, "nvme_ctrlr_cmd_fw_commit failed!\n");
1427 		return -ENXIO;
1428 	}
1429 
1430 	return spdk_nvme_ctrlr_reset(ctrlr);
1431 }
1432