xref: /spdk/lib/nvme/nvme_ctrlr.c (revision 1010fb3af11b62e6a6826ea1bef40ab49cc503a4)
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
5  *   All rights reserved.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of Intel Corporation nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "nvme_internal.h"
35 
36 /**
37  * \file
38  *
39  */
40 
41 static void nvme_ctrlr_construct_and_submit_aer(struct nvme_controller *ctrlr,
42 		struct nvme_async_event_request *aer);
43 
44 static int
45 nvme_ctrlr_construct_admin_qpair(struct nvme_controller *ctrlr)
46 {
47 	struct nvme_qpair	*qpair;
48 	int rc;
49 
50 	qpair = &ctrlr->adminq;
51 
52 	/*
53 	 * The admin queue's max xfer size is treated differently than the
54 	 *  max I/O xfer size.  16KB is sufficient here - maybe even less?
55 	 */
56 	rc = nvme_qpair_construct(qpair,
57 				  0, /* qpair ID */
58 				  NVME_ADMIN_ENTRIES,
59 				  NVME_ADMIN_TRACKERS,
60 				  ctrlr);
61 	return rc;
62 }
63 
64 static int
65 nvme_ctrlr_construct_io_qpairs(struct nvme_controller *ctrlr)
66 {
67 	struct nvme_qpair		*qpair;
68 	union nvme_cap_lo_register	cap_lo;
69 	int				i, num_entries, num_trackers, rc;
70 
71 	rc = 0;
72 	if (ctrlr->ioq != NULL) {
73 		/*
74 		 * io_qpairs were already constructed, so just return.
75 		 *  This typically happens when the controller is
76 		 *  initialized a second (or subsequent) time after a
77 		 *  controller reset.
78 		 */
79 		return 0;
80 	}
81 
82 	/*
83 	 * NVMe spec sets a hard limit of 64K max entries, but
84 	 *  devices may specify a smaller limit, so we need to check
85 	 *  the MQES field in the capabilities register.
86 	 */
87 	cap_lo.raw = nvme_mmio_read_4(ctrlr, cap_lo.raw);
88 	num_entries = nvme_min(NVME_IO_ENTRIES, cap_lo.bits.mqes + 1);
89 
90 	/*
91 	 * No need to have more trackers than entries in the submit queue.
92 	 *  Note also that for a queue size of N, we can only have (N-1)
93 	 *  commands outstanding, hence the "-1" here.
94 	 */
95 	num_trackers = nvme_min(NVME_IO_TRACKERS, (num_entries - 1));
96 
97 	ctrlr->max_xfer_size = NVME_MAX_XFER_SIZE;
98 
99 	ctrlr->ioq = calloc(ctrlr->num_io_queues, sizeof(struct nvme_qpair));
100 
101 	if (ctrlr->ioq == NULL)
102 		return -1;
103 
104 	for (i = 0; i < ctrlr->num_io_queues; i++) {
105 		qpair = &ctrlr->ioq[i];
106 
107 		/*
108 		 * Admin queue has ID=0. IO queues start at ID=1 -
109 		 *  hence the 'i+1' here.
110 		 *
111 		 * For I/O queues, use the controller-wide max_xfer_size
112 		 *  calculated in nvme_attach().
113 		 */
114 		rc = nvme_qpair_construct(qpair,
115 					  i + 1, /* qpair ID */
116 					  num_entries,
117 					  num_trackers,
118 					  ctrlr);
119 		if (rc)
120 			return -1;
121 	}
122 
123 	return 0;
124 }
125 
126 static void
127 nvme_ctrlr_fail(struct nvme_controller *ctrlr)
128 {
129 	int i;
130 
131 	ctrlr->is_failed = true;
132 	nvme_qpair_fail(&ctrlr->adminq);
133 	for (i = 0; i < ctrlr->num_io_queues; i++) {
134 		nvme_qpair_fail(&ctrlr->ioq[i]);
135 	}
136 }
137 
138 static int
139 _nvme_ctrlr_wait_for_ready(struct nvme_controller *ctrlr, int desired_ready_value)
140 {
141 	int ms_waited, ready_timeout_in_ms;
142 	union nvme_csts_register csts;
143 	union nvme_cap_lo_register cap_lo;
144 
145 	/* Get ready timeout value from controller, in units of 500ms. */
146 	cap_lo.raw = nvme_mmio_read_4(ctrlr, cap_lo.raw);
147 	ready_timeout_in_ms = cap_lo.bits.to * 500;
148 
149 	csts.raw = nvme_mmio_read_4(ctrlr, csts);
150 
151 	ms_waited = 0;
152 
153 	while (csts.bits.rdy != desired_ready_value) {
154 		nvme_delay(1000);
155 		if (ms_waited++ > ready_timeout_in_ms) {
156 			nvme_printf(ctrlr, "controller ready did not become %d "
157 				    "within %d ms\n", desired_ready_value, ready_timeout_in_ms);
158 			return ENXIO;
159 		}
160 		csts.raw = nvme_mmio_read_4(ctrlr, csts);
161 	}
162 
163 	return 0;
164 }
165 
166 static int
167 nvme_ctrlr_wait_for_ready(struct nvme_controller *ctrlr)
168 {
169 	union nvme_cc_register cc;
170 
171 	cc.raw = nvme_mmio_read_4(ctrlr, cc.raw);
172 
173 	if (!cc.bits.en) {
174 		nvme_printf(ctrlr, "%s called with cc.en = 0\n", __func__);
175 		return ENXIO;
176 	}
177 
178 	return _nvme_ctrlr_wait_for_ready(ctrlr, 1);
179 }
180 
181 static void
182 nvme_ctrlr_disable(struct nvme_controller *ctrlr)
183 {
184 	union nvme_cc_register cc;
185 	union nvme_csts_register csts;
186 
187 	cc.raw = nvme_mmio_read_4(ctrlr, cc.raw);
188 	csts.raw = nvme_mmio_read_4(ctrlr, csts);
189 
190 	if (cc.bits.en == 1 && csts.bits.rdy == 0) {
191 		_nvme_ctrlr_wait_for_ready(ctrlr, 1);
192 	}
193 
194 	cc.bits.en = 0;
195 	nvme_mmio_write_4(ctrlr, cc.raw, cc.raw);
196 	nvme_delay(5000);
197 
198 	_nvme_ctrlr_wait_for_ready(ctrlr, 0);
199 }
200 
201 static void
202 nvme_ctrlr_shutdown(struct nvme_controller *ctrlr)
203 {
204 	union nvme_cc_register		cc;
205 	union nvme_csts_register	csts;
206 	int				ms_waited = 0;
207 
208 	cc.raw = nvme_mmio_read_4(ctrlr, cc.raw);
209 	cc.bits.shn = NVME_SHN_NORMAL;
210 	nvme_mmio_write_4(ctrlr, cc.raw, cc.raw);
211 
212 	csts.raw = nvme_mmio_read_4(ctrlr, csts);
213 	/*
214 	 * The NVMe spec does not define a timeout period
215 	 *  for shutdown notification, so we just pick
216 	 *  5 seconds as a reasonable amount of time to
217 	 *  wait before proceeding.
218 	 */
219 	while (csts.bits.shst != NVME_SHST_COMPLETE) {
220 		nvme_delay(1000);
221 		csts.raw = nvme_mmio_read_4(ctrlr, csts);
222 		if (ms_waited++ >= 5000)
223 			break;
224 	}
225 	if (csts.bits.shst != NVME_SHST_COMPLETE)
226 		nvme_printf(ctrlr, "did not shutdown within 5 seconds\n");
227 }
228 
229 static int
230 nvme_ctrlr_enable(struct nvme_controller *ctrlr)
231 {
232 	union nvme_cc_register		cc;
233 	union nvme_csts_register	csts;
234 	union nvme_aqa_register		aqa;
235 
236 	cc.raw = nvme_mmio_read_4(ctrlr, cc.raw);
237 	csts.raw = nvme_mmio_read_4(ctrlr, csts);
238 
239 	if (cc.bits.en == 1) {
240 		if (csts.bits.rdy == 1) {
241 			return 0;
242 		} else {
243 			return nvme_ctrlr_wait_for_ready(ctrlr);
244 		}
245 	}
246 
247 	nvme_mmio_write_8(ctrlr, asq, ctrlr->adminq.cmd_bus_addr);
248 	nvme_delay(5000);
249 	nvme_mmio_write_8(ctrlr, acq, ctrlr->adminq.cpl_bus_addr);
250 	nvme_delay(5000);
251 
252 	aqa.raw = 0;
253 	/* acqs and asqs are 0-based. */
254 	aqa.bits.acqs = ctrlr->adminq.num_entries - 1;
255 	aqa.bits.asqs = ctrlr->adminq.num_entries - 1;
256 	nvme_mmio_write_4(ctrlr, aqa.raw, aqa.raw);
257 	nvme_delay(5000);
258 
259 	cc.bits.en = 1;
260 	cc.bits.css = 0;
261 	cc.bits.ams = 0;
262 	cc.bits.shn = 0;
263 	cc.bits.iosqes = 6; /* SQ entry size == 64 == 2^6 */
264 	cc.bits.iocqes = 4; /* CQ entry size == 16 == 2^4 */
265 
266 	/* Page size is 2 ^ (12 + mps). */
267 	cc.bits.mps = nvme_u32log2(PAGE_SIZE) - 12;
268 
269 	nvme_mmio_write_4(ctrlr, cc.raw, cc.raw);
270 	nvme_delay(5000);
271 
272 	return nvme_ctrlr_wait_for_ready(ctrlr);
273 }
274 
275 int
276 nvme_ctrlr_hw_reset(struct nvme_controller *ctrlr)
277 {
278 	int i, rc;
279 	union nvme_cc_register cc;
280 
281 	cc.raw = nvme_mmio_read_4(ctrlr, cc.raw);
282 	if (cc.bits.en) {
283 		nvme_qpair_disable(&ctrlr->adminq);
284 		for (i = 0; i < ctrlr->num_io_queues; i++) {
285 			nvme_qpair_disable(&ctrlr->ioq[i]);
286 		}
287 
288 		nvme_delay(100 * 1000);
289 	} else {
290 		/*
291 		 * Ensure we do a transition from cc.en==1 to cc.en==0.
292 		 *  If we started disabled (cc.en==0), then we have to enable
293 		 *  first to get a reset.
294 		 */
295 		nvme_ctrlr_enable(ctrlr);
296 	}
297 
298 	nvme_ctrlr_disable(ctrlr);
299 	rc = nvme_ctrlr_enable(ctrlr);
300 
301 	nvme_delay(100 * 1000);
302 
303 	return rc;
304 }
305 
306 int
307 nvme_ctrlr_reset(struct nvme_controller *ctrlr)
308 {
309 	int rc;
310 
311 	nvme_mutex_lock(&ctrlr->ctrlr_lock);
312 
313 	if (ctrlr->is_resetting || ctrlr->is_failed) {
314 		/*
315 		 * Controller is already resetting or has failed.  Return
316 		 *  immediately since there is no need to kick off another
317 		 *  reset in these cases.
318 		 */
319 		nvme_mutex_unlock(&ctrlr->ctrlr_lock);
320 		return 0;
321 	}
322 
323 	ctrlr->is_resetting = 1;
324 
325 	nvme_printf(ctrlr, "resetting controller\n");
326 	/* nvme_ctrlr_start() issues a reset as its first step */
327 	rc = nvme_ctrlr_start(ctrlr);
328 	if (rc) {
329 		nvme_ctrlr_fail(ctrlr);
330 	}
331 
332 	ctrlr->is_resetting = 0;
333 
334 	nvme_mutex_unlock(&ctrlr->ctrlr_lock);
335 
336 	return rc;
337 }
338 
339 static int
340 nvme_ctrlr_identify(struct nvme_controller *ctrlr)
341 {
342 	struct nvme_completion_poll_status	status;
343 
344 	status.done = false;
345 	nvme_ctrlr_cmd_identify_controller(ctrlr, &ctrlr->cdata,
346 					   nvme_completion_poll_cb, &status);
347 	while (status.done == false) {
348 		nvme_qpair_process_completions(&ctrlr->adminq);
349 	}
350 	if (nvme_completion_is_error(&status.cpl)) {
351 		nvme_printf(ctrlr, "nvme_identify_controller failed!\n");
352 		return ENXIO;
353 	}
354 
355 	/*
356 	 * Use MDTS to ensure our default max_xfer_size doesn't exceed what the
357 	 *  controller supports.
358 	 */
359 	if (ctrlr->cdata.mdts > 0) {
360 		ctrlr->max_xfer_size = nvme_min(ctrlr->max_xfer_size,
361 						ctrlr->min_page_size * (1 << (ctrlr->cdata.mdts)));
362 	}
363 
364 	return 0;
365 }
366 
367 static int
368 nvme_ctrlr_set_num_qpairs(struct nvme_controller *ctrlr)
369 {
370 	struct nvme_driver			*driver = &g_nvme_driver;
371 	struct nvme_completion_poll_status	status;
372 	int					cq_allocated, sq_allocated;
373 	uint32_t				max_io_queues;
374 
375 	status.done = false;
376 
377 	nvme_mutex_lock(&driver->lock);
378 	max_io_queues = driver->max_io_queues;
379 	nvme_mutex_unlock(&driver->lock);
380 
381 	nvme_ctrlr_cmd_set_num_queues(ctrlr, max_io_queues,
382 				      nvme_completion_poll_cb, &status);
383 	while (status.done == false) {
384 		nvme_qpair_process_completions(&ctrlr->adminq);
385 	}
386 	if (nvme_completion_is_error(&status.cpl)) {
387 		nvme_printf(ctrlr, "nvme_set_num_queues failed!\n");
388 		return ENXIO;
389 	}
390 
391 	/*
392 	 * Data in cdw0 is 0-based.
393 	 * Lower 16-bits indicate number of submission queues allocated.
394 	 * Upper 16-bits indicate number of completion queues allocated.
395 	 */
396 	sq_allocated = (status.cpl.cdw0 & 0xFFFF) + 1;
397 	cq_allocated = (status.cpl.cdw0 >> 16) + 1;
398 
399 	ctrlr->num_io_queues = nvme_min(sq_allocated, cq_allocated);
400 
401 	nvme_mutex_lock(&driver->lock);
402 	driver->max_io_queues = nvme_min(driver->max_io_queues, ctrlr->num_io_queues);
403 	nvme_mutex_unlock(&driver->lock);
404 
405 	return 0;
406 }
407 
408 static int
409 nvme_ctrlr_create_qpairs(struct nvme_controller *ctrlr)
410 {
411 	struct nvme_completion_poll_status	status;
412 	struct nvme_qpair			*qpair;
413 	int					i;
414 
415 	if (nvme_ctrlr_construct_io_qpairs(ctrlr)) {
416 		nvme_printf(ctrlr, "nvme_ctrlr_construct_io_qpairs failed!\n");
417 		return ENXIO;
418 	}
419 
420 	for (i = 0; i < ctrlr->num_io_queues; i++) {
421 		qpair = &ctrlr->ioq[i];
422 
423 		status.done = false;
424 		nvme_ctrlr_cmd_create_io_cq(ctrlr, qpair,
425 					    nvme_completion_poll_cb, &status);
426 		while (status.done == false) {
427 			nvme_qpair_process_completions(&ctrlr->adminq);
428 		}
429 		if (nvme_completion_is_error(&status.cpl)) {
430 			nvme_printf(ctrlr, "nvme_create_io_cq failed!\n");
431 			return ENXIO;
432 		}
433 
434 		status.done = false;
435 		nvme_ctrlr_cmd_create_io_sq(qpair->ctrlr, qpair,
436 					    nvme_completion_poll_cb, &status);
437 		while (status.done == false) {
438 			nvme_qpair_process_completions(&ctrlr->adminq);
439 		}
440 		if (nvme_completion_is_error(&status.cpl)) {
441 			nvme_printf(ctrlr, "nvme_create_io_sq failed!\n");
442 			return ENXIO;
443 		}
444 
445 		nvme_qpair_reset(qpair);
446 	}
447 
448 	return 0;
449 }
450 
451 static void
452 nvme_ctrlr_destruct_namespaces(struct nvme_controller *ctrlr)
453 {
454 	if (ctrlr->ns) {
455 		uint32_t i, num_ns = ctrlr->num_ns;
456 
457 		for (i = 0; i < num_ns; i++) {
458 			nvme_ns_destruct(&ctrlr->ns[i]);
459 		}
460 
461 		free(ctrlr->ns);
462 		ctrlr->ns = NULL;
463 		ctrlr->num_ns = 0;
464 	}
465 
466 	if (ctrlr->nsdata) {
467 		nvme_free(ctrlr->nsdata);
468 		ctrlr->nsdata = NULL;
469 	}
470 }
471 
472 static int
473 nvme_ctrlr_construct_namespaces(struct nvme_controller *ctrlr)
474 {
475 	uint32_t i, nn = ctrlr->cdata.nn;
476 	uint64_t phys_addr = 0;
477 
478 	if (nn == 0) {
479 		nvme_printf(ctrlr, "controller has 0 namespaces\n");
480 		return -1;
481 	}
482 
483 	/* ctrlr->num_ns may be 0 (startup) or a different number of namespaces (reset),
484 	 * so check if we need to reallocate.
485 	 */
486 	if (nn != ctrlr->num_ns) {
487 		nvme_ctrlr_destruct_namespaces(ctrlr);
488 
489 		ctrlr->ns = calloc(nn, sizeof(struct nvme_namespace));
490 		if (ctrlr->ns == NULL) {
491 			goto fail;
492 		}
493 
494 		ctrlr->nsdata = nvme_malloc("nvme_namespaces",
495 					    nn * sizeof(struct nvme_namespace_data), 64,
496 					    &phys_addr);
497 		if (ctrlr->nsdata == NULL) {
498 			goto fail;
499 		}
500 
501 		ctrlr->num_ns = nn;
502 	}
503 
504 	for (i = 0; i < nn; i++) {
505 		struct nvme_namespace	*ns = &ctrlr->ns[i];
506 		uint32_t 		nsid = i + 1;
507 
508 		if (nvme_ns_construct(ns, nsid, ctrlr) != 0) {
509 			goto fail;
510 		}
511 	}
512 
513 	return 0;
514 
515 fail:
516 	nvme_ctrlr_destruct_namespaces(ctrlr);
517 	return -1;
518 }
519 
520 static void
521 nvme_ctrlr_async_event_cb(void *arg, const struct nvme_completion *cpl)
522 {
523 	struct nvme_async_event_request	*aer = arg;
524 	struct nvme_controller		*ctrlr = aer->ctrlr;
525 
526 	if (cpl->status.sc == NVME_SC_ABORTED_SQ_DELETION) {
527 		/*
528 		 *  This is simulated when controller is being shut down, to
529 		 *  effectively abort outstanding asynchronous event requests
530 		 *  and make sure all memory is freed.  Do not repost the
531 		 *  request in this case.
532 		 */
533 		return;
534 	}
535 
536 	if (ctrlr->aer_cb_fn != NULL) {
537 		ctrlr->aer_cb_fn(ctrlr->aer_cb_arg, cpl);
538 	}
539 
540 	/*
541 	 * Repost another asynchronous event request to replace the one
542 	 *  that just completed.
543 	 */
544 	nvme_ctrlr_construct_and_submit_aer(aer->ctrlr, aer);
545 }
546 
547 static void
548 nvme_ctrlr_construct_and_submit_aer(struct nvme_controller *ctrlr,
549 				    struct nvme_async_event_request *aer)
550 {
551 	struct nvme_request *req;
552 
553 	aer->ctrlr = ctrlr;
554 	req = nvme_allocate_request(NULL, 0, nvme_ctrlr_async_event_cb, aer);
555 	aer->req = req;
556 
557 	/*
558 	 * Disable timeout here, since asynchronous event requests should by
559 	 *  nature never be timed out.
560 	 */
561 	req->timeout = false;
562 	req->cmd.opc = NVME_OPC_ASYNC_EVENT_REQUEST;
563 	nvme_ctrlr_submit_admin_request(ctrlr, req);
564 }
565 
566 static int
567 nvme_ctrlr_configure_aer(struct nvme_controller *ctrlr)
568 {
569 	union nvme_critical_warning_state	state;
570 	struct nvme_async_event_request		*aer;
571 	uint32_t				i;
572 	struct nvme_completion_poll_status	status;
573 
574 	status.done = false;
575 
576 	state.raw = 0xFF;
577 	state.bits.reserved = 0;
578 	nvme_ctrlr_cmd_set_async_event_config(ctrlr, state, nvme_completion_poll_cb, &status);
579 
580 	while (status.done == false) {
581 		nvme_qpair_process_completions(&ctrlr->adminq);
582 	}
583 	if (nvme_completion_is_error(&status.cpl)) {
584 		nvme_printf(ctrlr, "nvme_ctrlr_cmd_set_async_event_config failed!\n");
585 		return ENXIO;
586 	}
587 
588 	/* aerl is a zero-based value, so we need to add 1 here. */
589 	ctrlr->num_aers = nvme_min(NVME_MAX_ASYNC_EVENTS, (ctrlr->cdata.aerl + 1));
590 
591 	for (i = 0; i < ctrlr->num_aers; i++) {
592 		aer = &ctrlr->aer[i];
593 		nvme_ctrlr_construct_and_submit_aer(ctrlr, aer);
594 	}
595 
596 	return 0;
597 }
598 
599 int
600 nvme_ctrlr_start(struct nvme_controller *ctrlr)
601 {
602 	if (nvme_ctrlr_hw_reset(ctrlr) != 0) {
603 		return -1;
604 	}
605 
606 	nvme_qpair_reset(&ctrlr->adminq);
607 
608 	nvme_qpair_enable(&ctrlr->adminq);
609 
610 	if (nvme_ctrlr_identify(ctrlr) != 0) {
611 		return -1;
612 	}
613 
614 	if (nvme_ctrlr_set_num_qpairs(ctrlr) != 0) {
615 		return -1;
616 	}
617 
618 	if (nvme_ctrlr_create_qpairs(ctrlr) != 0) {
619 		return -1;
620 	}
621 
622 	if (nvme_ctrlr_construct_namespaces(ctrlr) != 0) {
623 		return -1;
624 	}
625 
626 	if (nvme_ctrlr_configure_aer(ctrlr) != 0) {
627 		return -1;
628 	}
629 
630 	return 0;
631 }
632 
633 static int
634 nvme_ctrlr_allocate_bars(struct nvme_controller *ctrlr)
635 {
636 	int rc;
637 	void *addr;
638 
639 	rc = nvme_pcicfg_map_bar(ctrlr->devhandle, 0, 0 /* writable */, &addr);
640 	ctrlr->regs = (volatile struct nvme_registers *)addr;
641 	if ((ctrlr->regs == NULL) || (rc != 0)) {
642 		printf("pci_device_map_range failed with error code %d\n", rc);
643 		return -1;
644 	}
645 
646 	return 0;
647 }
648 
649 static int
650 nvme_ctrlr_free_bars(struct nvme_controller *ctrlr)
651 {
652 	int rc = 0;
653 	void *addr = (void *)ctrlr->regs;
654 
655 	if (addr) {
656 		rc = nvme_pcicfg_unmap_bar(ctrlr->devhandle, 0, addr);
657 	}
658 	return rc;
659 }
660 
661 int
662 nvme_ctrlr_construct(struct nvme_controller *ctrlr, void *devhandle)
663 {
664 	union nvme_cap_hi_register	cap_hi;
665 	uint32_t			cmd_reg;
666 	int				status;
667 	int				rc;
668 
669 	ctrlr->devhandle = devhandle;
670 
671 	status = nvme_ctrlr_allocate_bars(ctrlr);
672 	if (status != 0) {
673 		return status;
674 	}
675 
676 	/* Enable PCI busmaster. */
677 	nvme_pcicfg_read32(devhandle, &cmd_reg, 4);
678 	cmd_reg |= 0x4;
679 	nvme_pcicfg_write32(devhandle, cmd_reg, 4);
680 
681 	cap_hi.raw = nvme_mmio_read_4(ctrlr, cap_hi.raw);
682 
683 	/* Doorbell stride is 2 ^ (dstrd + 2),
684 	 * but we want multiples of 4, so drop the + 2 */
685 	ctrlr->doorbell_stride_u32 = 1 << cap_hi.bits.dstrd;
686 
687 	ctrlr->min_page_size = 1 << (12 + cap_hi.bits.mpsmin);
688 
689 	rc = nvme_ctrlr_construct_admin_qpair(ctrlr);
690 	if (rc)
691 		return rc;
692 
693 	ctrlr->is_resetting = 0;
694 	ctrlr->is_failed = false;
695 
696 	nvme_mutex_init_recursive(&ctrlr->ctrlr_lock);
697 
698 	return 0;
699 }
700 
701 void
702 nvme_ctrlr_destruct(struct nvme_controller *ctrlr)
703 {
704 	int				i;
705 
706 	nvme_ctrlr_disable(ctrlr);
707 	nvme_ctrlr_shutdown(ctrlr);
708 
709 	nvme_ctrlr_destruct_namespaces(ctrlr);
710 
711 	for (i = 0; i < ctrlr->num_io_queues; i++) {
712 		nvme_qpair_destroy(&ctrlr->ioq[i]);
713 	}
714 
715 	free(ctrlr->ioq);
716 
717 	nvme_qpair_destroy(&ctrlr->adminq);
718 
719 	nvme_ctrlr_free_bars(ctrlr);
720 	nvme_mutex_destroy(&ctrlr->ctrlr_lock);
721 }
722 
723 void
724 nvme_ctrlr_submit_admin_request(struct nvme_controller *ctrlr,
725 				struct nvme_request *req)
726 {
727 	nvme_qpair_submit_request(&ctrlr->adminq, req);
728 }
729 
730 void
731 nvme_ctrlr_submit_io_request(struct nvme_controller *ctrlr,
732 			     struct nvme_request *req)
733 {
734 	struct nvme_qpair       *qpair;
735 
736 	nvme_assert(nvme_thread_ioq_index >= 0, ("no ioq_index assigned for thread\n"));
737 	qpair = &ctrlr->ioq[nvme_thread_ioq_index];
738 
739 	nvme_qpair_submit_request(qpair, req);
740 }
741 
742 void
743 nvme_ctrlr_process_io_completions(struct nvme_controller *ctrlr)
744 {
745 	nvme_assert(nvme_thread_ioq_index >= 0, ("no ioq_index assigned for thread\n"));
746 	nvme_qpair_process_completions(&ctrlr->ioq[nvme_thread_ioq_index]);
747 }
748 
749 void
750 nvme_ctrlr_process_admin_completions(struct nvme_controller *ctrlr)
751 {
752 	nvme_mutex_lock(&ctrlr->ctrlr_lock);
753 	nvme_qpair_process_completions(&ctrlr->adminq);
754 	nvme_mutex_unlock(&ctrlr->ctrlr_lock);
755 }
756 
757 const struct nvme_controller_data *
758 nvme_ctrlr_get_data(struct nvme_controller *ctrlr)
759 {
760 
761 	return &ctrlr->cdata;
762 }
763 
764 uint32_t
765 nvme_ctrlr_get_num_ns(struct nvme_controller *ctrlr)
766 {
767 	return ctrlr->num_ns;
768 }
769 
770 struct nvme_namespace *
771 nvme_ctrlr_get_ns(struct nvme_controller *ctrlr, uint32_t ns_id)
772 {
773 	if (ns_id < 1 || ns_id > ctrlr->num_ns) {
774 		return NULL;
775 	}
776 
777 	return &ctrlr->ns[ns_id - 1];
778 }
779 
780 void
781 nvme_ctrlr_register_aer_callback(struct nvme_controller *ctrlr,
782 				 nvme_aer_cb_fn_t aer_cb_fn,
783 				 void *aer_cb_arg)
784 {
785 	ctrlr->aer_cb_fn = aer_cb_fn;
786 	ctrlr->aer_cb_arg = aer_cb_arg;
787 }
788