xref: /dflybsd-src/sys/dev/disk/nvme/nvme_attach.c (revision 84ad1523f937564945ea31e53b3b52654e3cc6d0)
197a077a0SMatthew Dillon /*
297a077a0SMatthew Dillon  * Copyright (c) 2016 The DragonFly Project.  All rights reserved.
397a077a0SMatthew Dillon  *
497a077a0SMatthew Dillon  * This code is derived from software contributed to The DragonFly Project
597a077a0SMatthew Dillon  * by Matthew Dillon <dillon@backplane.com>
697a077a0SMatthew Dillon  *
797a077a0SMatthew Dillon  * Redistribution and use in source and binary forms, with or without
897a077a0SMatthew Dillon  * modification, are permitted provided that the following conditions
997a077a0SMatthew Dillon  * are met:
1097a077a0SMatthew Dillon  *
1197a077a0SMatthew Dillon  * 1. Redistributions of source code must retain the above copyright
1297a077a0SMatthew Dillon  *    notice, this list of conditions and the following disclaimer.
1397a077a0SMatthew Dillon  * 2. Redistributions in binary form must reproduce the above copyright
1497a077a0SMatthew Dillon  *    notice, this list of conditions and the following disclaimer in
1597a077a0SMatthew Dillon  *    the documentation and/or other materials provided with the
1697a077a0SMatthew Dillon  *    distribution.
1797a077a0SMatthew Dillon  * 3. Neither the name of The DragonFly Project nor the names of its
1897a077a0SMatthew Dillon  *    contributors may be used to endorse or promote products derived
1997a077a0SMatthew Dillon  *    from this software without specific, prior written permission.
2097a077a0SMatthew Dillon  *
2197a077a0SMatthew Dillon  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
2297a077a0SMatthew Dillon  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
2397a077a0SMatthew Dillon  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
2497a077a0SMatthew Dillon  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
2597a077a0SMatthew Dillon  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
2697a077a0SMatthew Dillon  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
2797a077a0SMatthew Dillon  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
2897a077a0SMatthew Dillon  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
2997a077a0SMatthew Dillon  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
3097a077a0SMatthew Dillon  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
3197a077a0SMatthew Dillon  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3297a077a0SMatthew Dillon  * SUCH DAMAGE.
3397a077a0SMatthew Dillon  */
3497a077a0SMatthew Dillon 
3597a077a0SMatthew Dillon #include "nvme.h"
3697a077a0SMatthew Dillon 
3797a077a0SMatthew Dillon static int	nvme_pci_attach(device_t);
3897a077a0SMatthew Dillon static int	nvme_pci_detach(device_t);
3997a077a0SMatthew Dillon 
4097a077a0SMatthew Dillon static const nvme_device_t nvme_devices[] = {
4197a077a0SMatthew Dillon 	/* Vendor-specific table goes here (see ahci for example) */
4297a077a0SMatthew Dillon 	{ 0, 0, nvme_pci_attach, nvme_pci_detach, "NVME-PCIe" }
4397a077a0SMatthew Dillon };
4497a077a0SMatthew Dillon 
4518d2384bSMatthew Dillon static int	nvme_msix_enable = 1;
4618d2384bSMatthew Dillon TUNABLE_INT("hw.nvme.msix.enable", &nvme_msix_enable);
477e782064SMatthew Dillon static int	nvme_msi_enable = 0;
4897a077a0SMatthew Dillon TUNABLE_INT("hw.nvme.msi.enable", &nvme_msi_enable);
4997a077a0SMatthew Dillon 
5011759406SMatthew Dillon TAILQ_HEAD(, nvme_softc) nvme_sc_list = TAILQ_HEAD_INITIALIZER(nvme_sc_list);
5111759406SMatthew Dillon struct lock nvme_master_lock = LOCK_INITIALIZER("nvmstr", 0, 0);
5211759406SMatthew Dillon 
5318d2384bSMatthew Dillon static int last_global_cpu;
5418d2384bSMatthew Dillon 
5597a077a0SMatthew Dillon /*
5697a077a0SMatthew Dillon  * Match during probe and attach.  The device does not yet have a softc.
5797a077a0SMatthew Dillon  */
5897a077a0SMatthew Dillon const nvme_device_t *
5997a077a0SMatthew Dillon nvme_lookup_device(device_t dev)
6097a077a0SMatthew Dillon {
6197a077a0SMatthew Dillon 	const nvme_device_t *ad;
6297a077a0SMatthew Dillon 	uint16_t vendor = pci_get_vendor(dev);
6397a077a0SMatthew Dillon 	uint16_t product = pci_get_device(dev);
6497a077a0SMatthew Dillon 	uint8_t class = pci_get_class(dev);
6597a077a0SMatthew Dillon 	uint8_t subclass = pci_get_subclass(dev);
6697a077a0SMatthew Dillon 	uint8_t progif = pci_read_config(dev, PCIR_PROGIF, 1);
6797a077a0SMatthew Dillon 	int is_nvme;
6897a077a0SMatthew Dillon 
6997a077a0SMatthew Dillon 	/*
7097a077a0SMatthew Dillon 	 * Generally speaking if the pci device does not identify as
7197a077a0SMatthew Dillon 	 * AHCI we skip it.
7297a077a0SMatthew Dillon 	 */
7397a077a0SMatthew Dillon 	if (class == PCIC_STORAGE && subclass == PCIS_STORAGE_NVM &&
7497a077a0SMatthew Dillon 	    progif == PCIP_STORAGE_NVM_ENTERPRISE_NVMHCI_1_0) {
7597a077a0SMatthew Dillon 		is_nvme = 1;
7697a077a0SMatthew Dillon 	} else {
7797a077a0SMatthew Dillon 		is_nvme = 0;
7897a077a0SMatthew Dillon 	}
7997a077a0SMatthew Dillon 
8097a077a0SMatthew Dillon 	for (ad = &nvme_devices[0]; ad->vendor; ++ad) {
8197a077a0SMatthew Dillon 		if (ad->vendor == vendor && ad->product == product)
8297a077a0SMatthew Dillon 			return (ad);
8397a077a0SMatthew Dillon 	}
8497a077a0SMatthew Dillon 
8597a077a0SMatthew Dillon 	/*
8697a077a0SMatthew Dillon 	 * Last ad is the default match if the PCI device matches SATA.
8797a077a0SMatthew Dillon 	 */
8897a077a0SMatthew Dillon 	if (is_nvme == 0)
8997a077a0SMatthew Dillon 		ad = NULL;
9097a077a0SMatthew Dillon 	return (ad);
9197a077a0SMatthew Dillon }
9297a077a0SMatthew Dillon 
9397a077a0SMatthew Dillon /*
9497a077a0SMatthew Dillon  * Attach functions.  They all eventually fall through to nvme_pci_attach().
9597a077a0SMatthew Dillon  */
9697a077a0SMatthew Dillon static int
9797a077a0SMatthew Dillon nvme_pci_attach(device_t dev)
9897a077a0SMatthew Dillon {
9997a077a0SMatthew Dillon 	nvme_softc_t *sc = device_get_softc(dev);
10097a077a0SMatthew Dillon 	uint32_t reg;
10197a077a0SMatthew Dillon 	int error;
10297a077a0SMatthew Dillon 	int msi_enable;
10318d2384bSMatthew Dillon 	int msix_enable;
10497a077a0SMatthew Dillon 
10597a077a0SMatthew Dillon 	if (pci_read_config(dev, PCIR_COMMAND, 2) & 0x0400) {
10697a077a0SMatthew Dillon 		device_printf(dev, "BIOS disabled PCI interrupt, "
10797a077a0SMatthew Dillon 				   "re-enabling\n");
10897a077a0SMatthew Dillon 		pci_write_config(dev, PCIR_COMMAND,
10997a077a0SMatthew Dillon 			pci_read_config(dev, PCIR_COMMAND, 2) & ~0x0400, 2);
11097a077a0SMatthew Dillon 	}
11197a077a0SMatthew Dillon 
11297a077a0SMatthew Dillon 	sc->dev = dev;
11397a077a0SMatthew Dillon 
11497a077a0SMatthew Dillon 	/*
11597a077a0SMatthew Dillon 	 * Map the register window
11697a077a0SMatthew Dillon 	 */
11797a077a0SMatthew Dillon 	sc->rid_regs = PCIR_BAR(0);
11897a077a0SMatthew Dillon 	sc->regs = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
11997a077a0SMatthew Dillon 					  &sc->rid_regs, RF_ACTIVE);
12097a077a0SMatthew Dillon 	if (sc->regs == NULL) {
12197a077a0SMatthew Dillon 		device_printf(dev, "unable to map registers\n");
12297a077a0SMatthew Dillon 		nvme_pci_detach(dev);
12397a077a0SMatthew Dillon 		return (ENXIO);
12497a077a0SMatthew Dillon 	}
12597a077a0SMatthew Dillon 	sc->iot = rman_get_bustag(sc->regs);
12697a077a0SMatthew Dillon 	sc->ioh = rman_get_bushandle(sc->regs);
12797a077a0SMatthew Dillon 
12897a077a0SMatthew Dillon 	/*
1297e782064SMatthew Dillon 	 * NVMe allows the MSI-X table to be mapped to BAR 4/5.
1307e782064SMatthew Dillon 	 * Always try to map BAR4, but it's ok if it fails.  Must
1317e782064SMatthew Dillon 	 * be done prior to allocating our interrupts.
1327e782064SMatthew Dillon 	 */
1337e782064SMatthew Dillon 	sc->rid_bar4 = PCIR_BAR(4);
1347e782064SMatthew Dillon 	sc->bar4 = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
1357e782064SMatthew Dillon 					  &sc->rid_bar4, RF_ACTIVE);
1367e782064SMatthew Dillon 
1377e782064SMatthew Dillon 	/*
1387e782064SMatthew Dillon 	 * Map the interrupt or initial interrupt which will be used for
139*84ad1523SMatthew Dillon 	 * the admin queue.  NVME chipsets can potentially support a huge
140*84ad1523SMatthew Dillon 	 * number of MSIX vectors but we really only need enough for
141*84ad1523SMatthew Dillon 	 * available cpus, plus 1.
1427e782064SMatthew Dillon 	 */
14318d2384bSMatthew Dillon 	msi_enable = device_getenv_int(dev, "msi.enable", nvme_msi_enable);
14418d2384bSMatthew Dillon 	msix_enable = device_getenv_int(dev, "msix.enable", nvme_msix_enable);
1457e782064SMatthew Dillon 
14618d2384bSMatthew Dillon 	error = 0;
14718d2384bSMatthew Dillon 	if (msix_enable) {
14818d2384bSMatthew Dillon 		int i;
14918d2384bSMatthew Dillon 		int cpu;
15018d2384bSMatthew Dillon 
15118d2384bSMatthew Dillon 		sc->nirqs = pci_msix_count(dev);
15218d2384bSMatthew Dillon 		sc->irq_type = PCI_INTR_TYPE_MSIX;
153*84ad1523SMatthew Dillon 		if (sc->nirqs > ncpus + 1)		/* max we need */
154*84ad1523SMatthew Dillon 			sc->nirqs = ncpus + 1;
15518d2384bSMatthew Dillon 
15618d2384bSMatthew Dillon 		error = pci_setup_msix(dev);
15728a5c21eSMatthew Dillon 		cpu = (last_global_cpu + 0) % ncpus;	/* GCC warn */
15818d2384bSMatthew Dillon 		for (i = 0; error == 0 && i < sc->nirqs; ++i) {
15918d2384bSMatthew Dillon 			cpu = (last_global_cpu + i) % ncpus;
16018d2384bSMatthew Dillon 			error = pci_alloc_msix_vector(dev, i,
16118d2384bSMatthew Dillon 						      &sc->rid_irq[i], cpu);
16218d2384bSMatthew Dillon 			if (error)
16318d2384bSMatthew Dillon 				break;
16418d2384bSMatthew Dillon 			sc->irq[i] = bus_alloc_resource_any(dev, SYS_RES_IRQ,
16518d2384bSMatthew Dillon 							    &sc->rid_irq[i],
16618d2384bSMatthew Dillon 							    RF_ACTIVE);
16718d2384bSMatthew Dillon 			/*
16828a5c21eSMatthew Dillon 			 * We want this to overwrite queue 0's cpu vector
16928a5c21eSMatthew Dillon 			 * when the cpu's rotate through later on.
17018d2384bSMatthew Dillon 			 */
17128a5c21eSMatthew Dillon 			if (sc->cputovect[cpu] == 0)
17218d2384bSMatthew Dillon 				sc->cputovect[cpu] = i;
17318d2384bSMatthew Dillon 		}
17428a5c21eSMatthew Dillon 
17528a5c21eSMatthew Dillon 		/*
17628a5c21eSMatthew Dillon 		 * If we did not iterate enough cpus (that is, there weren't
17728a5c21eSMatthew Dillon 		 * enough irqs for all available cpus) we still need to
17828a5c21eSMatthew Dillon 		 * finish or sc->cputovect[] mapping.
17928a5c21eSMatthew Dillon 		 */
18028a5c21eSMatthew Dillon 		while (error == 0) {
18128a5c21eSMatthew Dillon 			cpu = (cpu + 1) % ncpus;
18228a5c21eSMatthew Dillon 			i = (i + 1) % sc->nirqs;
18328a5c21eSMatthew Dillon 			if (i == 0)
18428a5c21eSMatthew Dillon 				i = 1;
18528a5c21eSMatthew Dillon 			if (sc->cputovect[cpu] != 0)
18628a5c21eSMatthew Dillon 				break;
18728a5c21eSMatthew Dillon 			sc->cputovect[cpu] = i;
18828a5c21eSMatthew Dillon 		}
18928a5c21eSMatthew Dillon 
19018d2384bSMatthew Dillon 		if (error) {
19118d2384bSMatthew Dillon 			while (--i >= 0) {
19218d2384bSMatthew Dillon 				bus_release_resource(dev, SYS_RES_IRQ,
19318d2384bSMatthew Dillon 						     sc->rid_irq[i],
19418d2384bSMatthew Dillon 						     sc->irq[i]);
19518d2384bSMatthew Dillon 				pci_release_msix_vector(dev, sc->rid_irq[i]);
19618d2384bSMatthew Dillon 				sc->irq[i] = NULL;
19718d2384bSMatthew Dillon 			}
19818d2384bSMatthew Dillon 			/* leave error intact to fall through to normal */
19918d2384bSMatthew Dillon 		} else {
20018d2384bSMatthew Dillon 			last_global_cpu = (last_global_cpu + sc->nirqs) % ncpus;
20118d2384bSMatthew Dillon 			pci_enable_msix(dev);
20218d2384bSMatthew Dillon 		}
20318d2384bSMatthew Dillon 	}
20418d2384bSMatthew Dillon 	if (error) {
20518d2384bSMatthew Dillon 		uint32_t irq_flags;
20618d2384bSMatthew Dillon 
20718d2384bSMatthew Dillon 		error = 0;
20818d2384bSMatthew Dillon 		sc->nirqs = 1;
20918d2384bSMatthew Dillon 		sc->irq_type = pci_alloc_1intr(dev, msi_enable,
21018d2384bSMatthew Dillon 					       &sc->rid_irq[0], &irq_flags);
21118d2384bSMatthew Dillon 		sc->irq[0] = bus_alloc_resource_any(dev, SYS_RES_IRQ,
21218d2384bSMatthew Dillon 						 &sc->rid_irq[0], irq_flags);
21318d2384bSMatthew Dillon 	}
21418d2384bSMatthew Dillon 	if (sc->irq[0] == NULL) {
2157e782064SMatthew Dillon 		device_printf(dev, "unable to map interrupt\n");
2167e782064SMatthew Dillon 		nvme_pci_detach(dev);
2177e782064SMatthew Dillon 		return (ENXIO);
21818d2384bSMatthew Dillon 	} else {
21918d2384bSMatthew Dillon 		const char *type;
22018d2384bSMatthew Dillon 		switch(sc->irq_type) {
22118d2384bSMatthew Dillon 		case PCI_INTR_TYPE_MSI:
22218d2384bSMatthew Dillon 			type = "MSI";
22318d2384bSMatthew Dillon 			break;
22418d2384bSMatthew Dillon 		case PCI_INTR_TYPE_MSIX:
22518d2384bSMatthew Dillon 			type = "MSIX";
22618d2384bSMatthew Dillon 			break;
22718d2384bSMatthew Dillon 		default:
22818d2384bSMatthew Dillon 			type = "normal-int";
22918d2384bSMatthew Dillon 			break;
23018d2384bSMatthew Dillon 		}
23118d2384bSMatthew Dillon 		device_printf(dev, "mapped %d %s IRQs\n", sc->nirqs, type);
2327e782064SMatthew Dillon 	}
2337e782064SMatthew Dillon 
2347e782064SMatthew Dillon 	/*
23597a077a0SMatthew Dillon 	 * Make sure the chip is disabled, which will reset all controller
23697a077a0SMatthew Dillon 	 * registers except for the admin queue registers.  Device should
23797a077a0SMatthew Dillon 	 * already be disabled so this is usually instantanious.  Use a
23897a077a0SMatthew Dillon 	 * fixed 5-second timeout in case it is not.  I'd like my other
23997a077a0SMatthew Dillon 	 * reads to occur after the device has been disabled.
24097a077a0SMatthew Dillon 	 */
24197a077a0SMatthew Dillon 	sc->entimo = hz * 5;
24297a077a0SMatthew Dillon 	error = nvme_enable(sc, 0);
24397a077a0SMatthew Dillon 	if (error) {
24497a077a0SMatthew Dillon 		nvme_pci_detach(dev);
24597a077a0SMatthew Dillon 		return (ENXIO);
24697a077a0SMatthew Dillon 	}
24797a077a0SMatthew Dillon 
24897a077a0SMatthew Dillon 	/*
24997a077a0SMatthew Dillon 	 * Get capabillities and version and report
25097a077a0SMatthew Dillon 	 */
25197a077a0SMatthew Dillon 	sc->vers = nvme_read(sc, NVME_REG_VERS);
25297a077a0SMatthew Dillon 	sc->cap = nvme_read8(sc, NVME_REG_CAP);
25397a077a0SMatthew Dillon 	sc->maxqe = NVME_CAP_MQES_GET(sc->cap);
25497a077a0SMatthew Dillon 	sc->dstrd4 = NVME_CAP_DSTRD_GET(sc->cap);
25597a077a0SMatthew Dillon 
25697a077a0SMatthew Dillon 	device_printf(dev, "NVME Version %u.%u maxqe=%u caps=%016jx\n",
25797a077a0SMatthew Dillon 		      NVME_VERS_MAJOR_GET(sc->vers),
25897a077a0SMatthew Dillon 		      NVME_VERS_MINOR_GET(sc->vers),
25997a077a0SMatthew Dillon 		      sc->maxqe, sc->cap);
26097a077a0SMatthew Dillon 
26197a077a0SMatthew Dillon 	/*
26297a077a0SMatthew Dillon 	 * Enable timeout, 500ms increments.  Convert to ticks.
26397a077a0SMatthew Dillon 	 */
26497a077a0SMatthew Dillon 	sc->entimo = NVME_CAP_TIMEOUT_GET(sc->cap) * hz / 2; /* in ticks */
26597a077a0SMatthew Dillon 	++sc->entimo;		/* fudge */
26697a077a0SMatthew Dillon 
26797a077a0SMatthew Dillon 	/*
26897a077a0SMatthew Dillon 	 * Validate maxqe.  To cap the amount of memory we reserve for
26997a077a0SMatthew Dillon 	 * PRPs we limit maxqe to 256.  Also make sure it is a power of
27097a077a0SMatthew Dillon 	 * two.
27197a077a0SMatthew Dillon 	 */
27297a077a0SMatthew Dillon 	if (sc->maxqe < 2) {
27397a077a0SMatthew Dillon 		device_printf(dev,
27497a077a0SMatthew Dillon 			      "Attach failed, max queue entries (%d) "
27597a077a0SMatthew Dillon 			      "below minimum (2)\n", sc->maxqe);
27697a077a0SMatthew Dillon 		nvme_pci_detach(dev);
27797a077a0SMatthew Dillon 		return (ENXIO);
27897a077a0SMatthew Dillon 	}
27997a077a0SMatthew Dillon 	if (sc->maxqe > 256)
28097a077a0SMatthew Dillon 		sc->maxqe = 256;
28197a077a0SMatthew Dillon 	for (reg = 2; reg <= sc->maxqe; reg <<= 1)
28297a077a0SMatthew Dillon 		;
28397a077a0SMatthew Dillon 	sc->maxqe = reg >> 1;
28497a077a0SMatthew Dillon 
28597a077a0SMatthew Dillon 	/*
28697a077a0SMatthew Dillon 	 * DMA tags
28797a077a0SMatthew Dillon 	 *
28897a077a0SMatthew Dillon 	 * PRP	- Worst case PRPs needed per queue is MAXPHYS / PAGE_SIZE
28997a077a0SMatthew Dillon 	 *	  (typically 64), multiplied by maxqe (typ 256).  Roughly
29097a077a0SMatthew Dillon 	 *	  ~128KB per queue.  Align for cache performance.  We actually
29197a077a0SMatthew Dillon 	 *	  need one more PRP per queue entry worst-case to handle
29297a077a0SMatthew Dillon 	 *	  buffer overlap, but we have an extra one in the command
29397a077a0SMatthew Dillon 	 *	  structure so we don't have to calculate that out.
29497a077a0SMatthew Dillon 	 *
29597a077a0SMatthew Dillon 	 *	  Remember that we intend to allocate potentially many queues,
29697a077a0SMatthew Dillon 	 *	  so we don't want to bloat this too much.  A queue depth of
29797a077a0SMatthew Dillon 	 *	  256 is plenty.
29897a077a0SMatthew Dillon 	 *
29997a077a0SMatthew Dillon 	 * CMD - Storage for the submit queue.  maxqe * 64	(~16KB)
30097a077a0SMatthew Dillon 	 *
30197a077a0SMatthew Dillon 	 * RES - Storage for the completion queue.  maxqe * 16	(~4KB)
30297a077a0SMatthew Dillon 	 *
30397a077a0SMatthew Dillon 	 * ADM - Storage for admin command DMA data.  Maximum admin command
30497a077a0SMatthew Dillon 	 *	 DMA data is 4KB so reserve maxqe * 4KB (~1MB).  There is only
30597a077a0SMatthew Dillon 	 *	 one admin queue.
30697a077a0SMatthew Dillon 	 *
30797a077a0SMatthew Dillon 	 * NOTE: There are no boundary requirements for NVMe, but I specify a
30897a077a0SMatthew Dillon 	 *	 4MB boundary anyway because this reduces mass-bit flipping
30997a077a0SMatthew Dillon 	 *	 of address bits inside the controller when incrementing
31097a077a0SMatthew Dillon 	 *	 DMA addresses.  Why not?  Can't hurt.
31197a077a0SMatthew Dillon 	 */
31297a077a0SMatthew Dillon 	sc->prp_bytes = sizeof(uint64_t) * (MAXPHYS / PAGE_SIZE) * sc->maxqe;
31397a077a0SMatthew Dillon 	sc->cmd_bytes = sizeof(nvme_subq_item_t) * sc->maxqe;
31497a077a0SMatthew Dillon 	sc->res_bytes = sizeof(nvme_comq_item_t) * sc->maxqe;
31597a077a0SMatthew Dillon 	sc->adm_bytes = NVME_MAX_ADMIN_BUFFER * sc->maxqe;
31697a077a0SMatthew Dillon 
31797a077a0SMatthew Dillon 	error = 0;
31897a077a0SMatthew Dillon 
31997a077a0SMatthew Dillon 	error += bus_dma_tag_create(
32097a077a0SMatthew Dillon 			NULL,				/* parent tag */
32197a077a0SMatthew Dillon 			PAGE_SIZE,			/* alignment */
32297a077a0SMatthew Dillon 			4 * 1024 * 1024,		/* boundary */
32397a077a0SMatthew Dillon 			BUS_SPACE_MAXADDR,		/* loaddr? */
32497a077a0SMatthew Dillon 			BUS_SPACE_MAXADDR,		/* hiaddr */
32597a077a0SMatthew Dillon 			NULL,				/* filter */
32697a077a0SMatthew Dillon 			NULL,				/* filterarg */
32797a077a0SMatthew Dillon 			sc->prp_bytes,			/* [max]size */
32897a077a0SMatthew Dillon 			1,				/* maxsegs */
32997a077a0SMatthew Dillon 			sc->prp_bytes,			/* maxsegsz */
33097a077a0SMatthew Dillon 			0,				/* flags */
33197a077a0SMatthew Dillon 			&sc->prps_tag);			/* return tag */
33297a077a0SMatthew Dillon 
33397a077a0SMatthew Dillon 	error += bus_dma_tag_create(
33497a077a0SMatthew Dillon 			NULL,				/* parent tag */
33597a077a0SMatthew Dillon 			PAGE_SIZE,			/* alignment */
33697a077a0SMatthew Dillon 			4 * 1024 * 1024,		/* boundary */
33797a077a0SMatthew Dillon 			BUS_SPACE_MAXADDR,		/* loaddr? */
33897a077a0SMatthew Dillon 			BUS_SPACE_MAXADDR,		/* hiaddr */
33997a077a0SMatthew Dillon 			NULL,				/* filter */
34097a077a0SMatthew Dillon 			NULL,				/* filterarg */
34197a077a0SMatthew Dillon 			sc->cmd_bytes,			/* [max]size */
34297a077a0SMatthew Dillon 			1,				/* maxsegs */
34397a077a0SMatthew Dillon 			sc->cmd_bytes,			/* maxsegsz */
34497a077a0SMatthew Dillon 			0,				/* flags */
34597a077a0SMatthew Dillon 			&sc->sque_tag);			/* return tag */
34697a077a0SMatthew Dillon 
34797a077a0SMatthew Dillon 	error += bus_dma_tag_create(
34897a077a0SMatthew Dillon 			NULL,				/* parent tag */
34997a077a0SMatthew Dillon 			PAGE_SIZE,			/* alignment */
35097a077a0SMatthew Dillon 			4 * 1024 * 1024,		/* boundary */
35197a077a0SMatthew Dillon 			BUS_SPACE_MAXADDR,		/* loaddr? */
35297a077a0SMatthew Dillon 			BUS_SPACE_MAXADDR,		/* hiaddr */
35397a077a0SMatthew Dillon 			NULL,				/* filter */
35497a077a0SMatthew Dillon 			NULL,				/* filterarg */
35597a077a0SMatthew Dillon 			sc->res_bytes,			/* [max]size */
35697a077a0SMatthew Dillon 			1,				/* maxsegs */
35797a077a0SMatthew Dillon 			sc->res_bytes,			/* maxsegsz */
35897a077a0SMatthew Dillon 			0,				/* flags */
35997a077a0SMatthew Dillon 			&sc->cque_tag);			/* return tag */
36097a077a0SMatthew Dillon 
36197a077a0SMatthew Dillon 	error += bus_dma_tag_create(
36297a077a0SMatthew Dillon 			NULL,				/* parent tag */
36397a077a0SMatthew Dillon 			PAGE_SIZE,			/* alignment */
36497a077a0SMatthew Dillon 			4 * 1024 * 1024,		/* boundary */
36597a077a0SMatthew Dillon 			BUS_SPACE_MAXADDR,		/* loaddr? */
36697a077a0SMatthew Dillon 			BUS_SPACE_MAXADDR,		/* hiaddr */
36797a077a0SMatthew Dillon 			NULL,				/* filter */
36897a077a0SMatthew Dillon 			NULL,				/* filterarg */
36997a077a0SMatthew Dillon 			sc->adm_bytes,			/* [max]size */
37097a077a0SMatthew Dillon 			1,				/* maxsegs */
37197a077a0SMatthew Dillon 			sc->adm_bytes,			/* maxsegsz */
37297a077a0SMatthew Dillon 			0,				/* flags */
37397a077a0SMatthew Dillon 			&sc->adm_tag);			/* return tag */
37497a077a0SMatthew Dillon 
37597a077a0SMatthew Dillon 	if (error) {
37697a077a0SMatthew Dillon 		device_printf(dev, "unable to create dma tags\n");
37797a077a0SMatthew Dillon 		nvme_pci_detach(dev);
37897a077a0SMatthew Dillon 		return (ENXIO);
37997a077a0SMatthew Dillon 	}
38097a077a0SMatthew Dillon 
38197a077a0SMatthew Dillon 	/*
38297a077a0SMatthew Dillon 	 * Setup the admin queues (qid 0).
38397a077a0SMatthew Dillon 	 */
38497a077a0SMatthew Dillon 	error = nvme_alloc_subqueue(sc, 0);
38597a077a0SMatthew Dillon 	if (error) {
38697a077a0SMatthew Dillon 		device_printf(dev, "unable to allocate admin subqueue\n");
38797a077a0SMatthew Dillon 		nvme_pci_detach(dev);
38897a077a0SMatthew Dillon 		return (ENXIO);
38997a077a0SMatthew Dillon 	}
39097a077a0SMatthew Dillon 	error = nvme_alloc_comqueue(sc, 0);
39197a077a0SMatthew Dillon 	if (error) {
39297a077a0SMatthew Dillon 		device_printf(dev, "unable to allocate admin comqueue\n");
39397a077a0SMatthew Dillon 		nvme_pci_detach(dev);
39497a077a0SMatthew Dillon 		return (ENXIO);
39597a077a0SMatthew Dillon 	}
39697a077a0SMatthew Dillon 
39797a077a0SMatthew Dillon 	/*
39897a077a0SMatthew Dillon 	 * Initialize the admin queue registers
39997a077a0SMatthew Dillon 	 */
40097a077a0SMatthew Dillon 	reg = NVME_ATTR_COM_SET(sc->maxqe) | NVME_ATTR_SUB_SET(sc->maxqe);
40197a077a0SMatthew Dillon 	nvme_write(sc, NVME_REG_ADM_ATTR, reg);
40297a077a0SMatthew Dillon 	nvme_write8(sc, NVME_REG_ADM_SUBADR, (uint64_t)sc->subqueues[0].psubq);
40397a077a0SMatthew Dillon 	nvme_write8(sc, NVME_REG_ADM_COMADR, (uint64_t)sc->comqueues[0].pcomq);
40497a077a0SMatthew Dillon 
40597a077a0SMatthew Dillon 	/*
40697a077a0SMatthew Dillon 	 * Other configuration registers
40797a077a0SMatthew Dillon 	 */
40897a077a0SMatthew Dillon 	reg = NVME_CONFIG_IOSUB_ES_SET(6) |		/* 64 byte sub entry */
40997a077a0SMatthew Dillon 	      NVME_CONFIG_IOCOM_ES_SET(4) |		/* 16 byte com entry */
41097a077a0SMatthew Dillon 	      NVME_CONFIG_MEMPG_SET(PAGE_SHIFT) |	/* 4K pages */
41197a077a0SMatthew Dillon 	      NVME_CONFIG_CSS_NVM;			/* NVME command set */
41297a077a0SMatthew Dillon 	nvme_write(sc, NVME_REG_CONFIG, reg);
41397a077a0SMatthew Dillon 
41497a077a0SMatthew Dillon 	reg = nvme_read(sc, NVME_REG_MEMSIZE);
41597a077a0SMatthew Dillon 
41697a077a0SMatthew Dillon 	/*
41797a077a0SMatthew Dillon 	 * Enable the chip for operation
41897a077a0SMatthew Dillon 	 */
41997a077a0SMatthew Dillon 	error = nvme_enable(sc, 1);
42097a077a0SMatthew Dillon 	if (error) {
42197a077a0SMatthew Dillon 		nvme_enable(sc, 0);
42297a077a0SMatthew Dillon 		nvme_pci_detach(dev);
42397a077a0SMatthew Dillon 		return (ENXIO);
42497a077a0SMatthew Dillon 	}
42597a077a0SMatthew Dillon 
42697a077a0SMatthew Dillon 	/*
42797a077a0SMatthew Dillon 	 * Start the admin thread.  This will also setup the admin queue
42897a077a0SMatthew Dillon 	 * interrupt.
42997a077a0SMatthew Dillon 	 */
43097a077a0SMatthew Dillon 	error = nvme_start_admin_thread(sc);
43197a077a0SMatthew Dillon 	if (error) {
43297a077a0SMatthew Dillon 		nvme_pci_detach(dev);
43397a077a0SMatthew Dillon 		return (ENXIO);
43497a077a0SMatthew Dillon 	}
43511759406SMatthew Dillon 	lockmgr(&nvme_master_lock, LK_EXCLUSIVE);
43611759406SMatthew Dillon 	sc->flags |= NVME_SC_ATTACHED;
43711759406SMatthew Dillon 	TAILQ_INSERT_TAIL(&nvme_sc_list, sc, entry);
43811759406SMatthew Dillon 	lockmgr(&nvme_master_lock, LK_RELEASE);
43997a077a0SMatthew Dillon 
44097a077a0SMatthew Dillon 	return(0);
44197a077a0SMatthew Dillon }
44297a077a0SMatthew Dillon 
44397a077a0SMatthew Dillon /*
44497a077a0SMatthew Dillon  * Device unload / detachment
44597a077a0SMatthew Dillon  */
44697a077a0SMatthew Dillon static int
44797a077a0SMatthew Dillon nvme_pci_detach(device_t dev)
44897a077a0SMatthew Dillon {
44997a077a0SMatthew Dillon 	nvme_softc_t *sc = device_get_softc(dev);
45018d2384bSMatthew Dillon 	int i;
45197a077a0SMatthew Dillon 
45297a077a0SMatthew Dillon 	/*
45397a077a0SMatthew Dillon 	 * Stop the admin thread
45497a077a0SMatthew Dillon 	 */
45597a077a0SMatthew Dillon 	nvme_stop_admin_thread(sc);
45697a077a0SMatthew Dillon 
45797a077a0SMatthew Dillon 	/*
45811759406SMatthew Dillon 	 * Issue a normal shutdown and wait for completion
45911759406SMatthew Dillon 	 */
46011759406SMatthew Dillon 	nvme_issue_shutdown(sc);
46111759406SMatthew Dillon 
46211759406SMatthew Dillon 	/*
46397a077a0SMatthew Dillon 	 * Disable the chip
46497a077a0SMatthew Dillon 	 */
46597a077a0SMatthew Dillon 	nvme_enable(sc, 0);
46697a077a0SMatthew Dillon 
46797a077a0SMatthew Dillon 	/*
46897a077a0SMatthew Dillon 	 * Free admin memory
46997a077a0SMatthew Dillon 	 */
47097a077a0SMatthew Dillon 	nvme_free_subqueue(sc, 0);
47197a077a0SMatthew Dillon 	nvme_free_comqueue(sc, 0);
47297a077a0SMatthew Dillon 
47397a077a0SMatthew Dillon 	/*
47497a077a0SMatthew Dillon 	 * Release related resources.
47597a077a0SMatthew Dillon 	 */
47618d2384bSMatthew Dillon 	for (i = 0; i < sc->nirqs; ++i) {
47718d2384bSMatthew Dillon 		if (sc->irq[i]) {
47818d2384bSMatthew Dillon 			bus_release_resource(dev, SYS_RES_IRQ,
47918d2384bSMatthew Dillon 					     sc->rid_irq[i], sc->irq[i]);
48018d2384bSMatthew Dillon 			sc->irq[i] = NULL;
48118d2384bSMatthew Dillon 			if (sc->irq_type == PCI_INTR_TYPE_MSIX)
48218d2384bSMatthew Dillon 				pci_release_msix_vector(dev, sc->rid_irq[i]);
48397a077a0SMatthew Dillon 		}
48418d2384bSMatthew Dillon 	}
48518d2384bSMatthew Dillon 	switch(sc->irq_type) {
48618d2384bSMatthew Dillon 	case PCI_INTR_TYPE_MSI:
48797a077a0SMatthew Dillon 		pci_release_msi(dev);
48818d2384bSMatthew Dillon 		break;
48918d2384bSMatthew Dillon 	case PCI_INTR_TYPE_MSIX:
49018d2384bSMatthew Dillon 		pci_teardown_msix(dev);
49118d2384bSMatthew Dillon 		break;
49218d2384bSMatthew Dillon 	default:
49318d2384bSMatthew Dillon 		break;
49418d2384bSMatthew Dillon 	}
49597a077a0SMatthew Dillon 
49697a077a0SMatthew Dillon 	/*
49797a077a0SMatthew Dillon 	 * Release remaining chipset resources
49897a077a0SMatthew Dillon 	 */
49997a077a0SMatthew Dillon 	if (sc->regs) {
50097a077a0SMatthew Dillon 		bus_release_resource(dev, SYS_RES_MEMORY,
50197a077a0SMatthew Dillon 				     sc->rid_regs, sc->regs);
50297a077a0SMatthew Dillon 		sc->regs = NULL;
50397a077a0SMatthew Dillon 	}
5047e782064SMatthew Dillon 	if (sc->bar4) {
5057e782064SMatthew Dillon 		bus_release_resource(dev, SYS_RES_MEMORY,
5067e782064SMatthew Dillon 				     sc->rid_bar4, sc->regs);
5077e782064SMatthew Dillon 		sc->bar4 = NULL;
5087e782064SMatthew Dillon 	}
5097e782064SMatthew Dillon 
51097a077a0SMatthew Dillon 	/*
51197a077a0SMatthew Dillon 	 * Cleanup the DMA tags
51297a077a0SMatthew Dillon 	 */
51397a077a0SMatthew Dillon 	if (sc->prps_tag) {
51497a077a0SMatthew Dillon 		bus_dma_tag_destroy(sc->prps_tag);
51597a077a0SMatthew Dillon 		sc->prps_tag = NULL;
51697a077a0SMatthew Dillon 	}
51797a077a0SMatthew Dillon 	if (sc->sque_tag) {
51897a077a0SMatthew Dillon 		bus_dma_tag_destroy(sc->sque_tag);
51997a077a0SMatthew Dillon 		sc->sque_tag = NULL;
52097a077a0SMatthew Dillon 	}
52197a077a0SMatthew Dillon 	if (sc->cque_tag) {
52297a077a0SMatthew Dillon 		bus_dma_tag_destroy(sc->cque_tag);
52397a077a0SMatthew Dillon 		sc->cque_tag = NULL;
52497a077a0SMatthew Dillon 	}
52597a077a0SMatthew Dillon 	if (sc->adm_tag) {
52697a077a0SMatthew Dillon 		bus_dma_tag_destroy(sc->adm_tag);
52797a077a0SMatthew Dillon 		sc->adm_tag = NULL;
52897a077a0SMatthew Dillon 	}
52997a077a0SMatthew Dillon 
53011759406SMatthew Dillon 	if (sc->flags & NVME_SC_ATTACHED) {
53111759406SMatthew Dillon 		lockmgr(&nvme_master_lock, LK_EXCLUSIVE);
53211759406SMatthew Dillon 		sc->flags &= ~NVME_SC_ATTACHED;
53311759406SMatthew Dillon 		TAILQ_REMOVE(&nvme_sc_list, sc, entry);
53411759406SMatthew Dillon 		lockmgr(&nvme_master_lock, LK_RELEASE);
53511759406SMatthew Dillon 	}
53611759406SMatthew Dillon 
53797a077a0SMatthew Dillon 	return (0);
53897a077a0SMatthew Dillon }
539