197a077a0SMatthew Dillon /* 297a077a0SMatthew Dillon * Copyright (c) 2016 The DragonFly Project. All rights reserved. 397a077a0SMatthew Dillon * 497a077a0SMatthew Dillon * This code is derived from software contributed to The DragonFly Project 597a077a0SMatthew Dillon * by Matthew Dillon <dillon@backplane.com> 697a077a0SMatthew Dillon * 797a077a0SMatthew Dillon * Redistribution and use in source and binary forms, with or without 897a077a0SMatthew Dillon * modification, are permitted provided that the following conditions 997a077a0SMatthew Dillon * are met: 1097a077a0SMatthew Dillon * 1197a077a0SMatthew Dillon * 1. Redistributions of source code must retain the above copyright 1297a077a0SMatthew Dillon * notice, this list of conditions and the following disclaimer. 1397a077a0SMatthew Dillon * 2. Redistributions in binary form must reproduce the above copyright 1497a077a0SMatthew Dillon * notice, this list of conditions and the following disclaimer in 1597a077a0SMatthew Dillon * the documentation and/or other materials provided with the 1697a077a0SMatthew Dillon * distribution. 1797a077a0SMatthew Dillon * 3. Neither the name of The DragonFly Project nor the names of its 1897a077a0SMatthew Dillon * contributors may be used to endorse or promote products derived 1997a077a0SMatthew Dillon * from this software without specific, prior written permission. 2097a077a0SMatthew Dillon * 2197a077a0SMatthew Dillon * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 2297a077a0SMatthew Dillon * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 2397a077a0SMatthew Dillon * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 2497a077a0SMatthew Dillon * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 2597a077a0SMatthew Dillon * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 2697a077a0SMatthew Dillon * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 2797a077a0SMatthew Dillon * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 2897a077a0SMatthew Dillon * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 2997a077a0SMatthew Dillon * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 3097a077a0SMatthew Dillon * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 3197a077a0SMatthew Dillon * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 3297a077a0SMatthew Dillon * SUCH DAMAGE. 3397a077a0SMatthew Dillon */ 3497a077a0SMatthew Dillon 3597a077a0SMatthew Dillon #include "nvme.h" 3697a077a0SMatthew Dillon 3797a077a0SMatthew Dillon static int nvme_pci_attach(device_t); 3897a077a0SMatthew Dillon static int nvme_pci_detach(device_t); 3997a077a0SMatthew Dillon 4097a077a0SMatthew Dillon static const nvme_device_t nvme_devices[] = { 4197a077a0SMatthew Dillon /* Vendor-specific table goes here (see ahci for example) */ 4297a077a0SMatthew Dillon { 0, 0, nvme_pci_attach, nvme_pci_detach, "NVME-PCIe" } 4397a077a0SMatthew Dillon }; 4497a077a0SMatthew Dillon 4518d2384bSMatthew Dillon static int nvme_msix_enable = 1; 4618d2384bSMatthew Dillon TUNABLE_INT("hw.nvme.msix.enable", &nvme_msix_enable); 477e782064SMatthew Dillon static int nvme_msi_enable = 0; 4897a077a0SMatthew Dillon TUNABLE_INT("hw.nvme.msi.enable", &nvme_msi_enable); 4997a077a0SMatthew Dillon 5011759406SMatthew Dillon TAILQ_HEAD(, nvme_softc) nvme_sc_list = TAILQ_HEAD_INITIALIZER(nvme_sc_list); 5111759406SMatthew Dillon struct lock nvme_master_lock = LOCK_INITIALIZER("nvmstr", 0, 0); 5211759406SMatthew Dillon 5318d2384bSMatthew Dillon static int last_global_cpu; 5418d2384bSMatthew Dillon 5597a077a0SMatthew Dillon /* 5697a077a0SMatthew Dillon * Match during probe and attach. The device does not yet have a softc. 5797a077a0SMatthew Dillon */ 5897a077a0SMatthew Dillon const nvme_device_t * 5997a077a0SMatthew Dillon nvme_lookup_device(device_t dev) 6097a077a0SMatthew Dillon { 6197a077a0SMatthew Dillon const nvme_device_t *ad; 6297a077a0SMatthew Dillon uint16_t vendor = pci_get_vendor(dev); 6397a077a0SMatthew Dillon uint16_t product = pci_get_device(dev); 6497a077a0SMatthew Dillon uint8_t class = pci_get_class(dev); 6597a077a0SMatthew Dillon uint8_t subclass = pci_get_subclass(dev); 6697a077a0SMatthew Dillon uint8_t progif = pci_read_config(dev, PCIR_PROGIF, 1); 6797a077a0SMatthew Dillon int is_nvme; 6897a077a0SMatthew Dillon 6997a077a0SMatthew Dillon /* 7097a077a0SMatthew Dillon * Generally speaking if the pci device does not identify as 7197a077a0SMatthew Dillon * AHCI we skip it. 7297a077a0SMatthew Dillon */ 7397a077a0SMatthew Dillon if (class == PCIC_STORAGE && subclass == PCIS_STORAGE_NVM && 7497a077a0SMatthew Dillon progif == PCIP_STORAGE_NVM_ENTERPRISE_NVMHCI_1_0) { 7597a077a0SMatthew Dillon is_nvme = 1; 7697a077a0SMatthew Dillon } else { 7797a077a0SMatthew Dillon is_nvme = 0; 7897a077a0SMatthew Dillon } 7997a077a0SMatthew Dillon 8097a077a0SMatthew Dillon for (ad = &nvme_devices[0]; ad->vendor; ++ad) { 8197a077a0SMatthew Dillon if (ad->vendor == vendor && ad->product == product) 8297a077a0SMatthew Dillon return (ad); 8397a077a0SMatthew Dillon } 8497a077a0SMatthew Dillon 8597a077a0SMatthew Dillon /* 8697a077a0SMatthew Dillon * Last ad is the default match if the PCI device matches SATA. 8797a077a0SMatthew Dillon */ 8897a077a0SMatthew Dillon if (is_nvme == 0) 8997a077a0SMatthew Dillon ad = NULL; 9097a077a0SMatthew Dillon return (ad); 9197a077a0SMatthew Dillon } 9297a077a0SMatthew Dillon 9397a077a0SMatthew Dillon /* 9497a077a0SMatthew Dillon * Attach functions. They all eventually fall through to nvme_pci_attach(). 9597a077a0SMatthew Dillon */ 9697a077a0SMatthew Dillon static int 9797a077a0SMatthew Dillon nvme_pci_attach(device_t dev) 9897a077a0SMatthew Dillon { 9997a077a0SMatthew Dillon nvme_softc_t *sc = device_get_softc(dev); 10097a077a0SMatthew Dillon uint32_t reg; 10197a077a0SMatthew Dillon int error; 10297a077a0SMatthew Dillon int msi_enable; 10318d2384bSMatthew Dillon int msix_enable; 10497a077a0SMatthew Dillon 10597a077a0SMatthew Dillon if (pci_read_config(dev, PCIR_COMMAND, 2) & 0x0400) { 10697a077a0SMatthew Dillon device_printf(dev, "BIOS disabled PCI interrupt, " 10797a077a0SMatthew Dillon "re-enabling\n"); 10897a077a0SMatthew Dillon pci_write_config(dev, PCIR_COMMAND, 10997a077a0SMatthew Dillon pci_read_config(dev, PCIR_COMMAND, 2) & ~0x0400, 2); 11097a077a0SMatthew Dillon } 11197a077a0SMatthew Dillon 11297a077a0SMatthew Dillon sc->dev = dev; 11397a077a0SMatthew Dillon 11497a077a0SMatthew Dillon /* 11597a077a0SMatthew Dillon * Map the register window 11697a077a0SMatthew Dillon */ 11797a077a0SMatthew Dillon sc->rid_regs = PCIR_BAR(0); 11897a077a0SMatthew Dillon sc->regs = bus_alloc_resource_any(dev, SYS_RES_MEMORY, 11997a077a0SMatthew Dillon &sc->rid_regs, RF_ACTIVE); 12097a077a0SMatthew Dillon if (sc->regs == NULL) { 12197a077a0SMatthew Dillon device_printf(dev, "unable to map registers\n"); 12297a077a0SMatthew Dillon nvme_pci_detach(dev); 12397a077a0SMatthew Dillon return (ENXIO); 12497a077a0SMatthew Dillon } 12597a077a0SMatthew Dillon sc->iot = rman_get_bustag(sc->regs); 12697a077a0SMatthew Dillon sc->ioh = rman_get_bushandle(sc->regs); 12797a077a0SMatthew Dillon 12897a077a0SMatthew Dillon /* 1297e782064SMatthew Dillon * NVMe allows the MSI-X table to be mapped to BAR 4/5. 1307e782064SMatthew Dillon * Always try to map BAR4, but it's ok if it fails. Must 1317e782064SMatthew Dillon * be done prior to allocating our interrupts. 1327e782064SMatthew Dillon */ 1337e782064SMatthew Dillon sc->rid_bar4 = PCIR_BAR(4); 1347e782064SMatthew Dillon sc->bar4 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, 1357e782064SMatthew Dillon &sc->rid_bar4, RF_ACTIVE); 1367e782064SMatthew Dillon 1377e782064SMatthew Dillon /* 1387e782064SMatthew Dillon * Map the interrupt or initial interrupt which will be used for 139*84ad1523SMatthew Dillon * the admin queue. NVME chipsets can potentially support a huge 140*84ad1523SMatthew Dillon * number of MSIX vectors but we really only need enough for 141*84ad1523SMatthew Dillon * available cpus, plus 1. 1427e782064SMatthew Dillon */ 14318d2384bSMatthew Dillon msi_enable = device_getenv_int(dev, "msi.enable", nvme_msi_enable); 14418d2384bSMatthew Dillon msix_enable = device_getenv_int(dev, "msix.enable", nvme_msix_enable); 1457e782064SMatthew Dillon 14618d2384bSMatthew Dillon error = 0; 14718d2384bSMatthew Dillon if (msix_enable) { 14818d2384bSMatthew Dillon int i; 14918d2384bSMatthew Dillon int cpu; 15018d2384bSMatthew Dillon 15118d2384bSMatthew Dillon sc->nirqs = pci_msix_count(dev); 15218d2384bSMatthew Dillon sc->irq_type = PCI_INTR_TYPE_MSIX; 153*84ad1523SMatthew Dillon if (sc->nirqs > ncpus + 1) /* max we need */ 154*84ad1523SMatthew Dillon sc->nirqs = ncpus + 1; 15518d2384bSMatthew Dillon 15618d2384bSMatthew Dillon error = pci_setup_msix(dev); 15728a5c21eSMatthew Dillon cpu = (last_global_cpu + 0) % ncpus; /* GCC warn */ 15818d2384bSMatthew Dillon for (i = 0; error == 0 && i < sc->nirqs; ++i) { 15918d2384bSMatthew Dillon cpu = (last_global_cpu + i) % ncpus; 16018d2384bSMatthew Dillon error = pci_alloc_msix_vector(dev, i, 16118d2384bSMatthew Dillon &sc->rid_irq[i], cpu); 16218d2384bSMatthew Dillon if (error) 16318d2384bSMatthew Dillon break; 16418d2384bSMatthew Dillon sc->irq[i] = bus_alloc_resource_any(dev, SYS_RES_IRQ, 16518d2384bSMatthew Dillon &sc->rid_irq[i], 16618d2384bSMatthew Dillon RF_ACTIVE); 16718d2384bSMatthew Dillon /* 16828a5c21eSMatthew Dillon * We want this to overwrite queue 0's cpu vector 16928a5c21eSMatthew Dillon * when the cpu's rotate through later on. 17018d2384bSMatthew Dillon */ 17128a5c21eSMatthew Dillon if (sc->cputovect[cpu] == 0) 17218d2384bSMatthew Dillon sc->cputovect[cpu] = i; 17318d2384bSMatthew Dillon } 17428a5c21eSMatthew Dillon 17528a5c21eSMatthew Dillon /* 17628a5c21eSMatthew Dillon * If we did not iterate enough cpus (that is, there weren't 17728a5c21eSMatthew Dillon * enough irqs for all available cpus) we still need to 17828a5c21eSMatthew Dillon * finish or sc->cputovect[] mapping. 17928a5c21eSMatthew Dillon */ 18028a5c21eSMatthew Dillon while (error == 0) { 18128a5c21eSMatthew Dillon cpu = (cpu + 1) % ncpus; 18228a5c21eSMatthew Dillon i = (i + 1) % sc->nirqs; 18328a5c21eSMatthew Dillon if (i == 0) 18428a5c21eSMatthew Dillon i = 1; 18528a5c21eSMatthew Dillon if (sc->cputovect[cpu] != 0) 18628a5c21eSMatthew Dillon break; 18728a5c21eSMatthew Dillon sc->cputovect[cpu] = i; 18828a5c21eSMatthew Dillon } 18928a5c21eSMatthew Dillon 19018d2384bSMatthew Dillon if (error) { 19118d2384bSMatthew Dillon while (--i >= 0) { 19218d2384bSMatthew Dillon bus_release_resource(dev, SYS_RES_IRQ, 19318d2384bSMatthew Dillon sc->rid_irq[i], 19418d2384bSMatthew Dillon sc->irq[i]); 19518d2384bSMatthew Dillon pci_release_msix_vector(dev, sc->rid_irq[i]); 19618d2384bSMatthew Dillon sc->irq[i] = NULL; 19718d2384bSMatthew Dillon } 19818d2384bSMatthew Dillon /* leave error intact to fall through to normal */ 19918d2384bSMatthew Dillon } else { 20018d2384bSMatthew Dillon last_global_cpu = (last_global_cpu + sc->nirqs) % ncpus; 20118d2384bSMatthew Dillon pci_enable_msix(dev); 20218d2384bSMatthew Dillon } 20318d2384bSMatthew Dillon } 20418d2384bSMatthew Dillon if (error) { 20518d2384bSMatthew Dillon uint32_t irq_flags; 20618d2384bSMatthew Dillon 20718d2384bSMatthew Dillon error = 0; 20818d2384bSMatthew Dillon sc->nirqs = 1; 20918d2384bSMatthew Dillon sc->irq_type = pci_alloc_1intr(dev, msi_enable, 21018d2384bSMatthew Dillon &sc->rid_irq[0], &irq_flags); 21118d2384bSMatthew Dillon sc->irq[0] = bus_alloc_resource_any(dev, SYS_RES_IRQ, 21218d2384bSMatthew Dillon &sc->rid_irq[0], irq_flags); 21318d2384bSMatthew Dillon } 21418d2384bSMatthew Dillon if (sc->irq[0] == NULL) { 2157e782064SMatthew Dillon device_printf(dev, "unable to map interrupt\n"); 2167e782064SMatthew Dillon nvme_pci_detach(dev); 2177e782064SMatthew Dillon return (ENXIO); 21818d2384bSMatthew Dillon } else { 21918d2384bSMatthew Dillon const char *type; 22018d2384bSMatthew Dillon switch(sc->irq_type) { 22118d2384bSMatthew Dillon case PCI_INTR_TYPE_MSI: 22218d2384bSMatthew Dillon type = "MSI"; 22318d2384bSMatthew Dillon break; 22418d2384bSMatthew Dillon case PCI_INTR_TYPE_MSIX: 22518d2384bSMatthew Dillon type = "MSIX"; 22618d2384bSMatthew Dillon break; 22718d2384bSMatthew Dillon default: 22818d2384bSMatthew Dillon type = "normal-int"; 22918d2384bSMatthew Dillon break; 23018d2384bSMatthew Dillon } 23118d2384bSMatthew Dillon device_printf(dev, "mapped %d %s IRQs\n", sc->nirqs, type); 2327e782064SMatthew Dillon } 2337e782064SMatthew Dillon 2347e782064SMatthew Dillon /* 23597a077a0SMatthew Dillon * Make sure the chip is disabled, which will reset all controller 23697a077a0SMatthew Dillon * registers except for the admin queue registers. Device should 23797a077a0SMatthew Dillon * already be disabled so this is usually instantanious. Use a 23897a077a0SMatthew Dillon * fixed 5-second timeout in case it is not. I'd like my other 23997a077a0SMatthew Dillon * reads to occur after the device has been disabled. 24097a077a0SMatthew Dillon */ 24197a077a0SMatthew Dillon sc->entimo = hz * 5; 24297a077a0SMatthew Dillon error = nvme_enable(sc, 0); 24397a077a0SMatthew Dillon if (error) { 24497a077a0SMatthew Dillon nvme_pci_detach(dev); 24597a077a0SMatthew Dillon return (ENXIO); 24697a077a0SMatthew Dillon } 24797a077a0SMatthew Dillon 24897a077a0SMatthew Dillon /* 24997a077a0SMatthew Dillon * Get capabillities and version and report 25097a077a0SMatthew Dillon */ 25197a077a0SMatthew Dillon sc->vers = nvme_read(sc, NVME_REG_VERS); 25297a077a0SMatthew Dillon sc->cap = nvme_read8(sc, NVME_REG_CAP); 25397a077a0SMatthew Dillon sc->maxqe = NVME_CAP_MQES_GET(sc->cap); 25497a077a0SMatthew Dillon sc->dstrd4 = NVME_CAP_DSTRD_GET(sc->cap); 25597a077a0SMatthew Dillon 25697a077a0SMatthew Dillon device_printf(dev, "NVME Version %u.%u maxqe=%u caps=%016jx\n", 25797a077a0SMatthew Dillon NVME_VERS_MAJOR_GET(sc->vers), 25897a077a0SMatthew Dillon NVME_VERS_MINOR_GET(sc->vers), 25997a077a0SMatthew Dillon sc->maxqe, sc->cap); 26097a077a0SMatthew Dillon 26197a077a0SMatthew Dillon /* 26297a077a0SMatthew Dillon * Enable timeout, 500ms increments. Convert to ticks. 26397a077a0SMatthew Dillon */ 26497a077a0SMatthew Dillon sc->entimo = NVME_CAP_TIMEOUT_GET(sc->cap) * hz / 2; /* in ticks */ 26597a077a0SMatthew Dillon ++sc->entimo; /* fudge */ 26697a077a0SMatthew Dillon 26797a077a0SMatthew Dillon /* 26897a077a0SMatthew Dillon * Validate maxqe. To cap the amount of memory we reserve for 26997a077a0SMatthew Dillon * PRPs we limit maxqe to 256. Also make sure it is a power of 27097a077a0SMatthew Dillon * two. 27197a077a0SMatthew Dillon */ 27297a077a0SMatthew Dillon if (sc->maxqe < 2) { 27397a077a0SMatthew Dillon device_printf(dev, 27497a077a0SMatthew Dillon "Attach failed, max queue entries (%d) " 27597a077a0SMatthew Dillon "below minimum (2)\n", sc->maxqe); 27697a077a0SMatthew Dillon nvme_pci_detach(dev); 27797a077a0SMatthew Dillon return (ENXIO); 27897a077a0SMatthew Dillon } 27997a077a0SMatthew Dillon if (sc->maxqe > 256) 28097a077a0SMatthew Dillon sc->maxqe = 256; 28197a077a0SMatthew Dillon for (reg = 2; reg <= sc->maxqe; reg <<= 1) 28297a077a0SMatthew Dillon ; 28397a077a0SMatthew Dillon sc->maxqe = reg >> 1; 28497a077a0SMatthew Dillon 28597a077a0SMatthew Dillon /* 28697a077a0SMatthew Dillon * DMA tags 28797a077a0SMatthew Dillon * 28897a077a0SMatthew Dillon * PRP - Worst case PRPs needed per queue is MAXPHYS / PAGE_SIZE 28997a077a0SMatthew Dillon * (typically 64), multiplied by maxqe (typ 256). Roughly 29097a077a0SMatthew Dillon * ~128KB per queue. Align for cache performance. We actually 29197a077a0SMatthew Dillon * need one more PRP per queue entry worst-case to handle 29297a077a0SMatthew Dillon * buffer overlap, but we have an extra one in the command 29397a077a0SMatthew Dillon * structure so we don't have to calculate that out. 29497a077a0SMatthew Dillon * 29597a077a0SMatthew Dillon * Remember that we intend to allocate potentially many queues, 29697a077a0SMatthew Dillon * so we don't want to bloat this too much. A queue depth of 29797a077a0SMatthew Dillon * 256 is plenty. 29897a077a0SMatthew Dillon * 29997a077a0SMatthew Dillon * CMD - Storage for the submit queue. maxqe * 64 (~16KB) 30097a077a0SMatthew Dillon * 30197a077a0SMatthew Dillon * RES - Storage for the completion queue. maxqe * 16 (~4KB) 30297a077a0SMatthew Dillon * 30397a077a0SMatthew Dillon * ADM - Storage for admin command DMA data. Maximum admin command 30497a077a0SMatthew Dillon * DMA data is 4KB so reserve maxqe * 4KB (~1MB). There is only 30597a077a0SMatthew Dillon * one admin queue. 30697a077a0SMatthew Dillon * 30797a077a0SMatthew Dillon * NOTE: There are no boundary requirements for NVMe, but I specify a 30897a077a0SMatthew Dillon * 4MB boundary anyway because this reduces mass-bit flipping 30997a077a0SMatthew Dillon * of address bits inside the controller when incrementing 31097a077a0SMatthew Dillon * DMA addresses. Why not? Can't hurt. 31197a077a0SMatthew Dillon */ 31297a077a0SMatthew Dillon sc->prp_bytes = sizeof(uint64_t) * (MAXPHYS / PAGE_SIZE) * sc->maxqe; 31397a077a0SMatthew Dillon sc->cmd_bytes = sizeof(nvme_subq_item_t) * sc->maxqe; 31497a077a0SMatthew Dillon sc->res_bytes = sizeof(nvme_comq_item_t) * sc->maxqe; 31597a077a0SMatthew Dillon sc->adm_bytes = NVME_MAX_ADMIN_BUFFER * sc->maxqe; 31697a077a0SMatthew Dillon 31797a077a0SMatthew Dillon error = 0; 31897a077a0SMatthew Dillon 31997a077a0SMatthew Dillon error += bus_dma_tag_create( 32097a077a0SMatthew Dillon NULL, /* parent tag */ 32197a077a0SMatthew Dillon PAGE_SIZE, /* alignment */ 32297a077a0SMatthew Dillon 4 * 1024 * 1024, /* boundary */ 32397a077a0SMatthew Dillon BUS_SPACE_MAXADDR, /* loaddr? */ 32497a077a0SMatthew Dillon BUS_SPACE_MAXADDR, /* hiaddr */ 32597a077a0SMatthew Dillon NULL, /* filter */ 32697a077a0SMatthew Dillon NULL, /* filterarg */ 32797a077a0SMatthew Dillon sc->prp_bytes, /* [max]size */ 32897a077a0SMatthew Dillon 1, /* maxsegs */ 32997a077a0SMatthew Dillon sc->prp_bytes, /* maxsegsz */ 33097a077a0SMatthew Dillon 0, /* flags */ 33197a077a0SMatthew Dillon &sc->prps_tag); /* return tag */ 33297a077a0SMatthew Dillon 33397a077a0SMatthew Dillon error += bus_dma_tag_create( 33497a077a0SMatthew Dillon NULL, /* parent tag */ 33597a077a0SMatthew Dillon PAGE_SIZE, /* alignment */ 33697a077a0SMatthew Dillon 4 * 1024 * 1024, /* boundary */ 33797a077a0SMatthew Dillon BUS_SPACE_MAXADDR, /* loaddr? */ 33897a077a0SMatthew Dillon BUS_SPACE_MAXADDR, /* hiaddr */ 33997a077a0SMatthew Dillon NULL, /* filter */ 34097a077a0SMatthew Dillon NULL, /* filterarg */ 34197a077a0SMatthew Dillon sc->cmd_bytes, /* [max]size */ 34297a077a0SMatthew Dillon 1, /* maxsegs */ 34397a077a0SMatthew Dillon sc->cmd_bytes, /* maxsegsz */ 34497a077a0SMatthew Dillon 0, /* flags */ 34597a077a0SMatthew Dillon &sc->sque_tag); /* return tag */ 34697a077a0SMatthew Dillon 34797a077a0SMatthew Dillon error += bus_dma_tag_create( 34897a077a0SMatthew Dillon NULL, /* parent tag */ 34997a077a0SMatthew Dillon PAGE_SIZE, /* alignment */ 35097a077a0SMatthew Dillon 4 * 1024 * 1024, /* boundary */ 35197a077a0SMatthew Dillon BUS_SPACE_MAXADDR, /* loaddr? */ 35297a077a0SMatthew Dillon BUS_SPACE_MAXADDR, /* hiaddr */ 35397a077a0SMatthew Dillon NULL, /* filter */ 35497a077a0SMatthew Dillon NULL, /* filterarg */ 35597a077a0SMatthew Dillon sc->res_bytes, /* [max]size */ 35697a077a0SMatthew Dillon 1, /* maxsegs */ 35797a077a0SMatthew Dillon sc->res_bytes, /* maxsegsz */ 35897a077a0SMatthew Dillon 0, /* flags */ 35997a077a0SMatthew Dillon &sc->cque_tag); /* return tag */ 36097a077a0SMatthew Dillon 36197a077a0SMatthew Dillon error += bus_dma_tag_create( 36297a077a0SMatthew Dillon NULL, /* parent tag */ 36397a077a0SMatthew Dillon PAGE_SIZE, /* alignment */ 36497a077a0SMatthew Dillon 4 * 1024 * 1024, /* boundary */ 36597a077a0SMatthew Dillon BUS_SPACE_MAXADDR, /* loaddr? */ 36697a077a0SMatthew Dillon BUS_SPACE_MAXADDR, /* hiaddr */ 36797a077a0SMatthew Dillon NULL, /* filter */ 36897a077a0SMatthew Dillon NULL, /* filterarg */ 36997a077a0SMatthew Dillon sc->adm_bytes, /* [max]size */ 37097a077a0SMatthew Dillon 1, /* maxsegs */ 37197a077a0SMatthew Dillon sc->adm_bytes, /* maxsegsz */ 37297a077a0SMatthew Dillon 0, /* flags */ 37397a077a0SMatthew Dillon &sc->adm_tag); /* return tag */ 37497a077a0SMatthew Dillon 37597a077a0SMatthew Dillon if (error) { 37697a077a0SMatthew Dillon device_printf(dev, "unable to create dma tags\n"); 37797a077a0SMatthew Dillon nvme_pci_detach(dev); 37897a077a0SMatthew Dillon return (ENXIO); 37997a077a0SMatthew Dillon } 38097a077a0SMatthew Dillon 38197a077a0SMatthew Dillon /* 38297a077a0SMatthew Dillon * Setup the admin queues (qid 0). 38397a077a0SMatthew Dillon */ 38497a077a0SMatthew Dillon error = nvme_alloc_subqueue(sc, 0); 38597a077a0SMatthew Dillon if (error) { 38697a077a0SMatthew Dillon device_printf(dev, "unable to allocate admin subqueue\n"); 38797a077a0SMatthew Dillon nvme_pci_detach(dev); 38897a077a0SMatthew Dillon return (ENXIO); 38997a077a0SMatthew Dillon } 39097a077a0SMatthew Dillon error = nvme_alloc_comqueue(sc, 0); 39197a077a0SMatthew Dillon if (error) { 39297a077a0SMatthew Dillon device_printf(dev, "unable to allocate admin comqueue\n"); 39397a077a0SMatthew Dillon nvme_pci_detach(dev); 39497a077a0SMatthew Dillon return (ENXIO); 39597a077a0SMatthew Dillon } 39697a077a0SMatthew Dillon 39797a077a0SMatthew Dillon /* 39897a077a0SMatthew Dillon * Initialize the admin queue registers 39997a077a0SMatthew Dillon */ 40097a077a0SMatthew Dillon reg = NVME_ATTR_COM_SET(sc->maxqe) | NVME_ATTR_SUB_SET(sc->maxqe); 40197a077a0SMatthew Dillon nvme_write(sc, NVME_REG_ADM_ATTR, reg); 40297a077a0SMatthew Dillon nvme_write8(sc, NVME_REG_ADM_SUBADR, (uint64_t)sc->subqueues[0].psubq); 40397a077a0SMatthew Dillon nvme_write8(sc, NVME_REG_ADM_COMADR, (uint64_t)sc->comqueues[0].pcomq); 40497a077a0SMatthew Dillon 40597a077a0SMatthew Dillon /* 40697a077a0SMatthew Dillon * Other configuration registers 40797a077a0SMatthew Dillon */ 40897a077a0SMatthew Dillon reg = NVME_CONFIG_IOSUB_ES_SET(6) | /* 64 byte sub entry */ 40997a077a0SMatthew Dillon NVME_CONFIG_IOCOM_ES_SET(4) | /* 16 byte com entry */ 41097a077a0SMatthew Dillon NVME_CONFIG_MEMPG_SET(PAGE_SHIFT) | /* 4K pages */ 41197a077a0SMatthew Dillon NVME_CONFIG_CSS_NVM; /* NVME command set */ 41297a077a0SMatthew Dillon nvme_write(sc, NVME_REG_CONFIG, reg); 41397a077a0SMatthew Dillon 41497a077a0SMatthew Dillon reg = nvme_read(sc, NVME_REG_MEMSIZE); 41597a077a0SMatthew Dillon 41697a077a0SMatthew Dillon /* 41797a077a0SMatthew Dillon * Enable the chip for operation 41897a077a0SMatthew Dillon */ 41997a077a0SMatthew Dillon error = nvme_enable(sc, 1); 42097a077a0SMatthew Dillon if (error) { 42197a077a0SMatthew Dillon nvme_enable(sc, 0); 42297a077a0SMatthew Dillon nvme_pci_detach(dev); 42397a077a0SMatthew Dillon return (ENXIO); 42497a077a0SMatthew Dillon } 42597a077a0SMatthew Dillon 42697a077a0SMatthew Dillon /* 42797a077a0SMatthew Dillon * Start the admin thread. This will also setup the admin queue 42897a077a0SMatthew Dillon * interrupt. 42997a077a0SMatthew Dillon */ 43097a077a0SMatthew Dillon error = nvme_start_admin_thread(sc); 43197a077a0SMatthew Dillon if (error) { 43297a077a0SMatthew Dillon nvme_pci_detach(dev); 43397a077a0SMatthew Dillon return (ENXIO); 43497a077a0SMatthew Dillon } 43511759406SMatthew Dillon lockmgr(&nvme_master_lock, LK_EXCLUSIVE); 43611759406SMatthew Dillon sc->flags |= NVME_SC_ATTACHED; 43711759406SMatthew Dillon TAILQ_INSERT_TAIL(&nvme_sc_list, sc, entry); 43811759406SMatthew Dillon lockmgr(&nvme_master_lock, LK_RELEASE); 43997a077a0SMatthew Dillon 44097a077a0SMatthew Dillon return(0); 44197a077a0SMatthew Dillon } 44297a077a0SMatthew Dillon 44397a077a0SMatthew Dillon /* 44497a077a0SMatthew Dillon * Device unload / detachment 44597a077a0SMatthew Dillon */ 44697a077a0SMatthew Dillon static int 44797a077a0SMatthew Dillon nvme_pci_detach(device_t dev) 44897a077a0SMatthew Dillon { 44997a077a0SMatthew Dillon nvme_softc_t *sc = device_get_softc(dev); 45018d2384bSMatthew Dillon int i; 45197a077a0SMatthew Dillon 45297a077a0SMatthew Dillon /* 45397a077a0SMatthew Dillon * Stop the admin thread 45497a077a0SMatthew Dillon */ 45597a077a0SMatthew Dillon nvme_stop_admin_thread(sc); 45697a077a0SMatthew Dillon 45797a077a0SMatthew Dillon /* 45811759406SMatthew Dillon * Issue a normal shutdown and wait for completion 45911759406SMatthew Dillon */ 46011759406SMatthew Dillon nvme_issue_shutdown(sc); 46111759406SMatthew Dillon 46211759406SMatthew Dillon /* 46397a077a0SMatthew Dillon * Disable the chip 46497a077a0SMatthew Dillon */ 46597a077a0SMatthew Dillon nvme_enable(sc, 0); 46697a077a0SMatthew Dillon 46797a077a0SMatthew Dillon /* 46897a077a0SMatthew Dillon * Free admin memory 46997a077a0SMatthew Dillon */ 47097a077a0SMatthew Dillon nvme_free_subqueue(sc, 0); 47197a077a0SMatthew Dillon nvme_free_comqueue(sc, 0); 47297a077a0SMatthew Dillon 47397a077a0SMatthew Dillon /* 47497a077a0SMatthew Dillon * Release related resources. 47597a077a0SMatthew Dillon */ 47618d2384bSMatthew Dillon for (i = 0; i < sc->nirqs; ++i) { 47718d2384bSMatthew Dillon if (sc->irq[i]) { 47818d2384bSMatthew Dillon bus_release_resource(dev, SYS_RES_IRQ, 47918d2384bSMatthew Dillon sc->rid_irq[i], sc->irq[i]); 48018d2384bSMatthew Dillon sc->irq[i] = NULL; 48118d2384bSMatthew Dillon if (sc->irq_type == PCI_INTR_TYPE_MSIX) 48218d2384bSMatthew Dillon pci_release_msix_vector(dev, sc->rid_irq[i]); 48397a077a0SMatthew Dillon } 48418d2384bSMatthew Dillon } 48518d2384bSMatthew Dillon switch(sc->irq_type) { 48618d2384bSMatthew Dillon case PCI_INTR_TYPE_MSI: 48797a077a0SMatthew Dillon pci_release_msi(dev); 48818d2384bSMatthew Dillon break; 48918d2384bSMatthew Dillon case PCI_INTR_TYPE_MSIX: 49018d2384bSMatthew Dillon pci_teardown_msix(dev); 49118d2384bSMatthew Dillon break; 49218d2384bSMatthew Dillon default: 49318d2384bSMatthew Dillon break; 49418d2384bSMatthew Dillon } 49597a077a0SMatthew Dillon 49697a077a0SMatthew Dillon /* 49797a077a0SMatthew Dillon * Release remaining chipset resources 49897a077a0SMatthew Dillon */ 49997a077a0SMatthew Dillon if (sc->regs) { 50097a077a0SMatthew Dillon bus_release_resource(dev, SYS_RES_MEMORY, 50197a077a0SMatthew Dillon sc->rid_regs, sc->regs); 50297a077a0SMatthew Dillon sc->regs = NULL; 50397a077a0SMatthew Dillon } 5047e782064SMatthew Dillon if (sc->bar4) { 5057e782064SMatthew Dillon bus_release_resource(dev, SYS_RES_MEMORY, 5067e782064SMatthew Dillon sc->rid_bar4, sc->regs); 5077e782064SMatthew Dillon sc->bar4 = NULL; 5087e782064SMatthew Dillon } 5097e782064SMatthew Dillon 51097a077a0SMatthew Dillon /* 51197a077a0SMatthew Dillon * Cleanup the DMA tags 51297a077a0SMatthew Dillon */ 51397a077a0SMatthew Dillon if (sc->prps_tag) { 51497a077a0SMatthew Dillon bus_dma_tag_destroy(sc->prps_tag); 51597a077a0SMatthew Dillon sc->prps_tag = NULL; 51697a077a0SMatthew Dillon } 51797a077a0SMatthew Dillon if (sc->sque_tag) { 51897a077a0SMatthew Dillon bus_dma_tag_destroy(sc->sque_tag); 51997a077a0SMatthew Dillon sc->sque_tag = NULL; 52097a077a0SMatthew Dillon } 52197a077a0SMatthew Dillon if (sc->cque_tag) { 52297a077a0SMatthew Dillon bus_dma_tag_destroy(sc->cque_tag); 52397a077a0SMatthew Dillon sc->cque_tag = NULL; 52497a077a0SMatthew Dillon } 52597a077a0SMatthew Dillon if (sc->adm_tag) { 52697a077a0SMatthew Dillon bus_dma_tag_destroy(sc->adm_tag); 52797a077a0SMatthew Dillon sc->adm_tag = NULL; 52897a077a0SMatthew Dillon } 52997a077a0SMatthew Dillon 53011759406SMatthew Dillon if (sc->flags & NVME_SC_ATTACHED) { 53111759406SMatthew Dillon lockmgr(&nvme_master_lock, LK_EXCLUSIVE); 53211759406SMatthew Dillon sc->flags &= ~NVME_SC_ATTACHED; 53311759406SMatthew Dillon TAILQ_REMOVE(&nvme_sc_list, sc, entry); 53411759406SMatthew Dillon lockmgr(&nvme_master_lock, LK_RELEASE); 53511759406SMatthew Dillon } 53611759406SMatthew Dillon 53797a077a0SMatthew Dillon return (0); 53897a077a0SMatthew Dillon } 539