197a077a0SMatthew Dillon /*
297a077a0SMatthew Dillon * Copyright (c) 2016 The DragonFly Project. All rights reserved.
397a077a0SMatthew Dillon *
497a077a0SMatthew Dillon * This code is derived from software contributed to The DragonFly Project
597a077a0SMatthew Dillon * by Matthew Dillon <dillon@backplane.com>
697a077a0SMatthew Dillon *
797a077a0SMatthew Dillon * Redistribution and use in source and binary forms, with or without
897a077a0SMatthew Dillon * modification, are permitted provided that the following conditions
997a077a0SMatthew Dillon * are met:
1097a077a0SMatthew Dillon *
1197a077a0SMatthew Dillon * 1. Redistributions of source code must retain the above copyright
1297a077a0SMatthew Dillon * notice, this list of conditions and the following disclaimer.
1397a077a0SMatthew Dillon * 2. Redistributions in binary form must reproduce the above copyright
1497a077a0SMatthew Dillon * notice, this list of conditions and the following disclaimer in
1597a077a0SMatthew Dillon * the documentation and/or other materials provided with the
1697a077a0SMatthew Dillon * distribution.
1797a077a0SMatthew Dillon * 3. Neither the name of The DragonFly Project nor the names of its
1897a077a0SMatthew Dillon * contributors may be used to endorse or promote products derived
1997a077a0SMatthew Dillon * from this software without specific, prior written permission.
2097a077a0SMatthew Dillon *
2197a077a0SMatthew Dillon * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
2297a077a0SMatthew Dillon * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
2397a077a0SMatthew Dillon * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
2497a077a0SMatthew Dillon * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
2597a077a0SMatthew Dillon * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
2697a077a0SMatthew Dillon * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
2797a077a0SMatthew Dillon * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
2897a077a0SMatthew Dillon * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
2997a077a0SMatthew Dillon * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
3097a077a0SMatthew Dillon * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
3197a077a0SMatthew Dillon * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3297a077a0SMatthew Dillon * SUCH DAMAGE.
3397a077a0SMatthew Dillon */
3497a077a0SMatthew Dillon
3597a077a0SMatthew Dillon #include "nvme.h"
3697a077a0SMatthew Dillon
3797a077a0SMatthew Dillon static int nvme_pci_attach(device_t);
3897a077a0SMatthew Dillon static int nvme_pci_detach(device_t);
3997a077a0SMatthew Dillon
4097a077a0SMatthew Dillon static const nvme_device_t nvme_devices[] = {
4197a077a0SMatthew Dillon /* Vendor-specific table goes here (see ahci for example) */
4297a077a0SMatthew Dillon { 0, 0, nvme_pci_attach, nvme_pci_detach, "NVME-PCIe" }
4397a077a0SMatthew Dillon };
4497a077a0SMatthew Dillon
4518d2384bSMatthew Dillon static int nvme_msix_enable = 1;
4618d2384bSMatthew Dillon TUNABLE_INT("hw.nvme.msix.enable", &nvme_msix_enable);
477e782064SMatthew Dillon static int nvme_msi_enable = 0;
4897a077a0SMatthew Dillon TUNABLE_INT("hw.nvme.msi.enable", &nvme_msi_enable);
4997a077a0SMatthew Dillon
5011759406SMatthew Dillon TAILQ_HEAD(, nvme_softc) nvme_sc_list = TAILQ_HEAD_INITIALIZER(nvme_sc_list);
5111759406SMatthew Dillon struct lock nvme_master_lock = LOCK_INITIALIZER("nvmstr", 0, 0);
5211759406SMatthew Dillon
5318d2384bSMatthew Dillon static int last_global_cpu;
5418d2384bSMatthew Dillon
5597a077a0SMatthew Dillon /*
5697a077a0SMatthew Dillon * Match during probe and attach. The device does not yet have a softc.
5797a077a0SMatthew Dillon */
5897a077a0SMatthew Dillon const nvme_device_t *
nvme_lookup_device(device_t dev)5997a077a0SMatthew Dillon nvme_lookup_device(device_t dev)
6097a077a0SMatthew Dillon {
6197a077a0SMatthew Dillon const nvme_device_t *ad;
6297a077a0SMatthew Dillon uint16_t vendor = pci_get_vendor(dev);
6397a077a0SMatthew Dillon uint16_t product = pci_get_device(dev);
6497a077a0SMatthew Dillon uint8_t class = pci_get_class(dev);
6597a077a0SMatthew Dillon uint8_t subclass = pci_get_subclass(dev);
6697a077a0SMatthew Dillon uint8_t progif = pci_read_config(dev, PCIR_PROGIF, 1);
6797a077a0SMatthew Dillon int is_nvme;
6897a077a0SMatthew Dillon
6997a077a0SMatthew Dillon /*
7097a077a0SMatthew Dillon * Generally speaking if the pci device does not identify as
7197a077a0SMatthew Dillon * AHCI we skip it.
7297a077a0SMatthew Dillon */
7397a077a0SMatthew Dillon if (class == PCIC_STORAGE && subclass == PCIS_STORAGE_NVM &&
7497a077a0SMatthew Dillon progif == PCIP_STORAGE_NVM_ENTERPRISE_NVMHCI_1_0) {
7597a077a0SMatthew Dillon is_nvme = 1;
7697a077a0SMatthew Dillon } else {
7797a077a0SMatthew Dillon is_nvme = 0;
7897a077a0SMatthew Dillon }
7997a077a0SMatthew Dillon
8097a077a0SMatthew Dillon for (ad = &nvme_devices[0]; ad->vendor; ++ad) {
8197a077a0SMatthew Dillon if (ad->vendor == vendor && ad->product == product)
8297a077a0SMatthew Dillon return (ad);
8397a077a0SMatthew Dillon }
8497a077a0SMatthew Dillon
8597a077a0SMatthew Dillon /*
8697a077a0SMatthew Dillon * Last ad is the default match if the PCI device matches SATA.
8797a077a0SMatthew Dillon */
8897a077a0SMatthew Dillon if (is_nvme == 0)
8997a077a0SMatthew Dillon ad = NULL;
9097a077a0SMatthew Dillon return (ad);
9197a077a0SMatthew Dillon }
9297a077a0SMatthew Dillon
9397a077a0SMatthew Dillon /*
9497a077a0SMatthew Dillon * Attach functions. They all eventually fall through to nvme_pci_attach().
9597a077a0SMatthew Dillon */
9697a077a0SMatthew Dillon static int
nvme_pci_attach(device_t dev)9797a077a0SMatthew Dillon nvme_pci_attach(device_t dev)
9897a077a0SMatthew Dillon {
9997a077a0SMatthew Dillon nvme_softc_t *sc = device_get_softc(dev);
10097a077a0SMatthew Dillon uint32_t reg;
10197a077a0SMatthew Dillon int error;
10297a077a0SMatthew Dillon int msi_enable;
10318d2384bSMatthew Dillon int msix_enable;
10497a077a0SMatthew Dillon
105b52f4c3fSMatthew Dillon #if 0
10697a077a0SMatthew Dillon if (pci_read_config(dev, PCIR_COMMAND, 2) & 0x0400) {
10797a077a0SMatthew Dillon device_printf(dev, "BIOS disabled PCI interrupt, "
10897a077a0SMatthew Dillon "re-enabling\n");
10997a077a0SMatthew Dillon pci_write_config(dev, PCIR_COMMAND,
11097a077a0SMatthew Dillon pci_read_config(dev, PCIR_COMMAND, 2) & ~0x0400, 2);
11197a077a0SMatthew Dillon }
112b52f4c3fSMatthew Dillon #endif
11397a077a0SMatthew Dillon
11497a077a0SMatthew Dillon sc->dev = dev;
11597a077a0SMatthew Dillon
11697a077a0SMatthew Dillon /*
11797a077a0SMatthew Dillon * Map the register window
11897a077a0SMatthew Dillon */
11997a077a0SMatthew Dillon sc->rid_regs = PCIR_BAR(0);
12097a077a0SMatthew Dillon sc->regs = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
12197a077a0SMatthew Dillon &sc->rid_regs, RF_ACTIVE);
12297a077a0SMatthew Dillon if (sc->regs == NULL) {
12397a077a0SMatthew Dillon device_printf(dev, "unable to map registers\n");
12497a077a0SMatthew Dillon nvme_pci_detach(dev);
12597a077a0SMatthew Dillon return (ENXIO);
12697a077a0SMatthew Dillon }
12797a077a0SMatthew Dillon sc->iot = rman_get_bustag(sc->regs);
12897a077a0SMatthew Dillon sc->ioh = rman_get_bushandle(sc->regs);
12997a077a0SMatthew Dillon
13097a077a0SMatthew Dillon /*
1317e782064SMatthew Dillon * NVMe allows the MSI-X table to be mapped to BAR 4/5.
1327e782064SMatthew Dillon * Always try to map BAR4, but it's ok if it fails. Must
1337e782064SMatthew Dillon * be done prior to allocating our interrupts.
1347e782064SMatthew Dillon */
1357e782064SMatthew Dillon sc->rid_bar4 = PCIR_BAR(4);
1367e782064SMatthew Dillon sc->bar4 = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
1377e782064SMatthew Dillon &sc->rid_bar4, RF_ACTIVE);
1387e782064SMatthew Dillon
1397e782064SMatthew Dillon /*
1407e782064SMatthew Dillon * Map the interrupt or initial interrupt which will be used for
14184ad1523SMatthew Dillon * the admin queue. NVME chipsets can potentially support a huge
14284ad1523SMatthew Dillon * number of MSIX vectors but we really only need enough for
14384ad1523SMatthew Dillon * available cpus, plus 1.
1447e782064SMatthew Dillon */
14518d2384bSMatthew Dillon msi_enable = device_getenv_int(dev, "msi.enable", nvme_msi_enable);
14618d2384bSMatthew Dillon msix_enable = device_getenv_int(dev, "msix.enable", nvme_msix_enable);
1477e782064SMatthew Dillon
14818d2384bSMatthew Dillon error = 0;
14918d2384bSMatthew Dillon if (msix_enable) {
15018d2384bSMatthew Dillon int i;
15118d2384bSMatthew Dillon int cpu;
15218d2384bSMatthew Dillon
15318d2384bSMatthew Dillon sc->nirqs = pci_msix_count(dev);
15418d2384bSMatthew Dillon sc->irq_type = PCI_INTR_TYPE_MSIX;
15584ad1523SMatthew Dillon if (sc->nirqs > ncpus + 1) /* max we need */
15684ad1523SMatthew Dillon sc->nirqs = ncpus + 1;
15718d2384bSMatthew Dillon
15818d2384bSMatthew Dillon error = pci_setup_msix(dev);
15928a5c21eSMatthew Dillon cpu = (last_global_cpu + 0) % ncpus; /* GCC warn */
16018d2384bSMatthew Dillon for (i = 0; error == 0 && i < sc->nirqs; ++i) {
16118d2384bSMatthew Dillon cpu = (last_global_cpu + i) % ncpus;
16218d2384bSMatthew Dillon error = pci_alloc_msix_vector(dev, i,
16318d2384bSMatthew Dillon &sc->rid_irq[i], cpu);
16418d2384bSMatthew Dillon if (error)
16518d2384bSMatthew Dillon break;
16618d2384bSMatthew Dillon sc->irq[i] = bus_alloc_resource_any(dev, SYS_RES_IRQ,
16718d2384bSMatthew Dillon &sc->rid_irq[i],
16818d2384bSMatthew Dillon RF_ACTIVE);
16918d2384bSMatthew Dillon /*
17028a5c21eSMatthew Dillon * We want this to overwrite queue 0's cpu vector
17128a5c21eSMatthew Dillon * when the cpu's rotate through later on.
17218d2384bSMatthew Dillon */
17328a5c21eSMatthew Dillon if (sc->cputovect[cpu] == 0)
17418d2384bSMatthew Dillon sc->cputovect[cpu] = i;
17518d2384bSMatthew Dillon }
17628a5c21eSMatthew Dillon
17728a5c21eSMatthew Dillon /*
17828a5c21eSMatthew Dillon * If we did not iterate enough cpus (that is, there weren't
17928a5c21eSMatthew Dillon * enough irqs for all available cpus) we still need to
18028a5c21eSMatthew Dillon * finish or sc->cputovect[] mapping.
18128a5c21eSMatthew Dillon */
18228a5c21eSMatthew Dillon while (error == 0) {
18328a5c21eSMatthew Dillon cpu = (cpu + 1) % ncpus;
18428a5c21eSMatthew Dillon i = (i + 1) % sc->nirqs;
18528a5c21eSMatthew Dillon if (i == 0)
18628a5c21eSMatthew Dillon i = 1;
18728a5c21eSMatthew Dillon if (sc->cputovect[cpu] != 0)
18828a5c21eSMatthew Dillon break;
18928a5c21eSMatthew Dillon sc->cputovect[cpu] = i;
19028a5c21eSMatthew Dillon }
19128a5c21eSMatthew Dillon
19218d2384bSMatthew Dillon if (error) {
19318d2384bSMatthew Dillon while (--i >= 0) {
19418d2384bSMatthew Dillon bus_release_resource(dev, SYS_RES_IRQ,
19518d2384bSMatthew Dillon sc->rid_irq[i],
19618d2384bSMatthew Dillon sc->irq[i]);
19718d2384bSMatthew Dillon pci_release_msix_vector(dev, sc->rid_irq[i]);
19818d2384bSMatthew Dillon sc->irq[i] = NULL;
19918d2384bSMatthew Dillon }
20018d2384bSMatthew Dillon /* leave error intact to fall through to normal */
20118d2384bSMatthew Dillon } else {
20218d2384bSMatthew Dillon last_global_cpu = (last_global_cpu + sc->nirqs) % ncpus;
20318d2384bSMatthew Dillon pci_enable_msix(dev);
20418d2384bSMatthew Dillon }
20518d2384bSMatthew Dillon }
2062d746837SMatthew Dillon
2072d746837SMatthew Dillon /*
2082d746837SMatthew Dillon * If we have to use a normal interrupt we fake the cputovect[] in
2092d746837SMatthew Dillon * order to try to map at least (ncpus) submission queues. The admin
2102d746837SMatthew Dillon * code will limit the number of completion queues to something
2112d746837SMatthew Dillon * reasonable when nirqs is 1 since the single interrupt polls all
2122d746837SMatthew Dillon * completion queues.
2132d746837SMatthew Dillon *
2142d746837SMatthew Dillon * NOTE: We do NOT want to map a single completion queue (#0), because
2152d746837SMatthew Dillon * then an I/O submission and/or completion queue will overlap
2162d746837SMatthew Dillon * the admin submission or completion queue, and that can cause
2172d746837SMatthew Dillon * havoc when admin commands are submitted that don't return
2182d746837SMatthew Dillon * for long periods of time.
2192d746837SMatthew Dillon *
2202d746837SMatthew Dillon * NOTE: Chipsets supporting MSI-X *MIGHT* *NOT* properly support
2212d746837SMatthew Dillon * a normal pin-based level interrupt. For example, the BPX
2222d746837SMatthew Dillon * NVMe SSD just leaves the level interrupt stuck on. Do not
2232d746837SMatthew Dillon * disable MSI-X unless you have no choice.
2242d746837SMatthew Dillon */
225b52f4c3fSMatthew Dillon if (msix_enable == 0 || error) {
22618d2384bSMatthew Dillon uint32_t irq_flags;
2272d746837SMatthew Dillon int i;
22818d2384bSMatthew Dillon
22918d2384bSMatthew Dillon error = 0;
23018d2384bSMatthew Dillon sc->nirqs = 1;
23118d2384bSMatthew Dillon sc->irq_type = pci_alloc_1intr(dev, msi_enable,
23218d2384bSMatthew Dillon &sc->rid_irq[0], &irq_flags);
23318d2384bSMatthew Dillon sc->irq[0] = bus_alloc_resource_any(dev, SYS_RES_IRQ,
23418d2384bSMatthew Dillon &sc->rid_irq[0], irq_flags);
2352d746837SMatthew Dillon
2362d746837SMatthew Dillon for (i = 0; i < ncpus; ++i)
2372d746837SMatthew Dillon sc->cputovect[i] = i + 1;
23818d2384bSMatthew Dillon }
23918d2384bSMatthew Dillon if (sc->irq[0] == NULL) {
2407e782064SMatthew Dillon device_printf(dev, "unable to map interrupt\n");
2417e782064SMatthew Dillon nvme_pci_detach(dev);
2427e782064SMatthew Dillon return (ENXIO);
24318d2384bSMatthew Dillon } else {
24418d2384bSMatthew Dillon const char *type;
24518d2384bSMatthew Dillon switch(sc->irq_type) {
24618d2384bSMatthew Dillon case PCI_INTR_TYPE_MSI:
24718d2384bSMatthew Dillon type = "MSI";
24818d2384bSMatthew Dillon break;
24918d2384bSMatthew Dillon case PCI_INTR_TYPE_MSIX:
25018d2384bSMatthew Dillon type = "MSIX";
25118d2384bSMatthew Dillon break;
25218d2384bSMatthew Dillon default:
25318d2384bSMatthew Dillon type = "normal-int";
25418d2384bSMatthew Dillon break;
25518d2384bSMatthew Dillon }
25618d2384bSMatthew Dillon device_printf(dev, "mapped %d %s IRQs\n", sc->nirqs, type);
2577e782064SMatthew Dillon }
2587e782064SMatthew Dillon
2597e782064SMatthew Dillon /*
26097a077a0SMatthew Dillon * Make sure the chip is disabled, which will reset all controller
26197a077a0SMatthew Dillon * registers except for the admin queue registers. Device should
26297a077a0SMatthew Dillon * already be disabled so this is usually instantanious. Use a
26397a077a0SMatthew Dillon * fixed 5-second timeout in case it is not. I'd like my other
26497a077a0SMatthew Dillon * reads to occur after the device has been disabled.
26597a077a0SMatthew Dillon */
26697a077a0SMatthew Dillon sc->entimo = hz * 5;
26797a077a0SMatthew Dillon error = nvme_enable(sc, 0);
26897a077a0SMatthew Dillon if (error) {
26997a077a0SMatthew Dillon nvme_pci_detach(dev);
27097a077a0SMatthew Dillon return (ENXIO);
27197a077a0SMatthew Dillon }
27297a077a0SMatthew Dillon
27397a077a0SMatthew Dillon /*
27497a077a0SMatthew Dillon * Get capabillities and version and report
27597a077a0SMatthew Dillon */
27697a077a0SMatthew Dillon sc->vers = nvme_read(sc, NVME_REG_VERS);
27797a077a0SMatthew Dillon sc->cap = nvme_read8(sc, NVME_REG_CAP);
27897a077a0SMatthew Dillon sc->maxqe = NVME_CAP_MQES_GET(sc->cap);
27997a077a0SMatthew Dillon sc->dstrd4 = NVME_CAP_DSTRD_GET(sc->cap);
28097a077a0SMatthew Dillon
28197a077a0SMatthew Dillon device_printf(dev, "NVME Version %u.%u maxqe=%u caps=%016jx\n",
28297a077a0SMatthew Dillon NVME_VERS_MAJOR_GET(sc->vers),
28397a077a0SMatthew Dillon NVME_VERS_MINOR_GET(sc->vers),
28497a077a0SMatthew Dillon sc->maxqe, sc->cap);
28597a077a0SMatthew Dillon
28697a077a0SMatthew Dillon /*
28797a077a0SMatthew Dillon * Enable timeout, 500ms increments. Convert to ticks.
28897a077a0SMatthew Dillon */
28997a077a0SMatthew Dillon sc->entimo = NVME_CAP_TIMEOUT_GET(sc->cap) * hz / 2; /* in ticks */
29097a077a0SMatthew Dillon ++sc->entimo; /* fudge */
29197a077a0SMatthew Dillon
29297a077a0SMatthew Dillon /*
29397a077a0SMatthew Dillon * Validate maxqe. To cap the amount of memory we reserve for
29497a077a0SMatthew Dillon * PRPs we limit maxqe to 256. Also make sure it is a power of
29597a077a0SMatthew Dillon * two.
29697a077a0SMatthew Dillon */
29797a077a0SMatthew Dillon if (sc->maxqe < 2) {
29897a077a0SMatthew Dillon device_printf(dev,
29997a077a0SMatthew Dillon "Attach failed, max queue entries (%d) "
30097a077a0SMatthew Dillon "below minimum (2)\n", sc->maxqe);
30197a077a0SMatthew Dillon nvme_pci_detach(dev);
30297a077a0SMatthew Dillon return (ENXIO);
30397a077a0SMatthew Dillon }
30497a077a0SMatthew Dillon if (sc->maxqe > 256)
30597a077a0SMatthew Dillon sc->maxqe = 256;
30697a077a0SMatthew Dillon for (reg = 2; reg <= sc->maxqe; reg <<= 1)
30797a077a0SMatthew Dillon ;
30897a077a0SMatthew Dillon sc->maxqe = reg >> 1;
30997a077a0SMatthew Dillon
31097a077a0SMatthew Dillon /*
31197a077a0SMatthew Dillon * DMA tags
31297a077a0SMatthew Dillon *
31397a077a0SMatthew Dillon * PRP - Worst case PRPs needed per queue is MAXPHYS / PAGE_SIZE
31497a077a0SMatthew Dillon * (typically 64), multiplied by maxqe (typ 256). Roughly
31597a077a0SMatthew Dillon * ~128KB per queue. Align for cache performance. We actually
31697a077a0SMatthew Dillon * need one more PRP per queue entry worst-case to handle
31797a077a0SMatthew Dillon * buffer overlap, but we have an extra one in the command
31897a077a0SMatthew Dillon * structure so we don't have to calculate that out.
31997a077a0SMatthew Dillon *
32097a077a0SMatthew Dillon * Remember that we intend to allocate potentially many queues,
32197a077a0SMatthew Dillon * so we don't want to bloat this too much. A queue depth of
32297a077a0SMatthew Dillon * 256 is plenty.
32397a077a0SMatthew Dillon *
32497a077a0SMatthew Dillon * CMD - Storage for the submit queue. maxqe * 64 (~16KB)
32597a077a0SMatthew Dillon *
32697a077a0SMatthew Dillon * RES - Storage for the completion queue. maxqe * 16 (~4KB)
32797a077a0SMatthew Dillon *
32897a077a0SMatthew Dillon * ADM - Storage for admin command DMA data. Maximum admin command
32997a077a0SMatthew Dillon * DMA data is 4KB so reserve maxqe * 4KB (~1MB). There is only
33097a077a0SMatthew Dillon * one admin queue.
33197a077a0SMatthew Dillon *
33297a077a0SMatthew Dillon * NOTE: There are no boundary requirements for NVMe, but I specify a
33397a077a0SMatthew Dillon * 4MB boundary anyway because this reduces mass-bit flipping
33497a077a0SMatthew Dillon * of address bits inside the controller when incrementing
33597a077a0SMatthew Dillon * DMA addresses. Why not? Can't hurt.
33697a077a0SMatthew Dillon */
33797a077a0SMatthew Dillon sc->prp_bytes = sizeof(uint64_t) * (MAXPHYS / PAGE_SIZE) * sc->maxqe;
33897a077a0SMatthew Dillon sc->cmd_bytes = sizeof(nvme_subq_item_t) * sc->maxqe;
33997a077a0SMatthew Dillon sc->res_bytes = sizeof(nvme_comq_item_t) * sc->maxqe;
34097a077a0SMatthew Dillon sc->adm_bytes = NVME_MAX_ADMIN_BUFFER * sc->maxqe;
34197a077a0SMatthew Dillon
34297a077a0SMatthew Dillon error = 0;
34397a077a0SMatthew Dillon
34497a077a0SMatthew Dillon error += bus_dma_tag_create(
34597a077a0SMatthew Dillon NULL, /* parent tag */
34697a077a0SMatthew Dillon PAGE_SIZE, /* alignment */
34797a077a0SMatthew Dillon 4 * 1024 * 1024, /* boundary */
34897a077a0SMatthew Dillon BUS_SPACE_MAXADDR, /* loaddr? */
34997a077a0SMatthew Dillon BUS_SPACE_MAXADDR, /* hiaddr */
35097a077a0SMatthew Dillon sc->prp_bytes, /* [max]size */
35197a077a0SMatthew Dillon 1, /* maxsegs */
35297a077a0SMatthew Dillon sc->prp_bytes, /* maxsegsz */
35397a077a0SMatthew Dillon 0, /* flags */
35497a077a0SMatthew Dillon &sc->prps_tag); /* return tag */
35597a077a0SMatthew Dillon
35697a077a0SMatthew Dillon error += bus_dma_tag_create(
35797a077a0SMatthew Dillon NULL, /* parent tag */
35897a077a0SMatthew Dillon PAGE_SIZE, /* alignment */
35997a077a0SMatthew Dillon 4 * 1024 * 1024, /* boundary */
36097a077a0SMatthew Dillon BUS_SPACE_MAXADDR, /* loaddr? */
36197a077a0SMatthew Dillon BUS_SPACE_MAXADDR, /* hiaddr */
36297a077a0SMatthew Dillon sc->cmd_bytes, /* [max]size */
36397a077a0SMatthew Dillon 1, /* maxsegs */
36497a077a0SMatthew Dillon sc->cmd_bytes, /* maxsegsz */
36597a077a0SMatthew Dillon 0, /* flags */
36697a077a0SMatthew Dillon &sc->sque_tag); /* return tag */
36797a077a0SMatthew Dillon
36897a077a0SMatthew Dillon error += bus_dma_tag_create(
36997a077a0SMatthew Dillon NULL, /* parent tag */
37097a077a0SMatthew Dillon PAGE_SIZE, /* alignment */
37197a077a0SMatthew Dillon 4 * 1024 * 1024, /* boundary */
37297a077a0SMatthew Dillon BUS_SPACE_MAXADDR, /* loaddr? */
37397a077a0SMatthew Dillon BUS_SPACE_MAXADDR, /* hiaddr */
37497a077a0SMatthew Dillon sc->res_bytes, /* [max]size */
37597a077a0SMatthew Dillon 1, /* maxsegs */
37697a077a0SMatthew Dillon sc->res_bytes, /* maxsegsz */
37797a077a0SMatthew Dillon 0, /* flags */
37897a077a0SMatthew Dillon &sc->cque_tag); /* return tag */
37997a077a0SMatthew Dillon
38097a077a0SMatthew Dillon error += bus_dma_tag_create(
38197a077a0SMatthew Dillon NULL, /* parent tag */
38297a077a0SMatthew Dillon PAGE_SIZE, /* alignment */
38397a077a0SMatthew Dillon 4 * 1024 * 1024, /* boundary */
38497a077a0SMatthew Dillon BUS_SPACE_MAXADDR, /* loaddr? */
38597a077a0SMatthew Dillon BUS_SPACE_MAXADDR, /* hiaddr */
38697a077a0SMatthew Dillon sc->adm_bytes, /* [max]size */
38797a077a0SMatthew Dillon 1, /* maxsegs */
38897a077a0SMatthew Dillon sc->adm_bytes, /* maxsegsz */
38997a077a0SMatthew Dillon 0, /* flags */
39097a077a0SMatthew Dillon &sc->adm_tag); /* return tag */
39197a077a0SMatthew Dillon
39297a077a0SMatthew Dillon if (error) {
39397a077a0SMatthew Dillon device_printf(dev, "unable to create dma tags\n");
39497a077a0SMatthew Dillon nvme_pci_detach(dev);
39597a077a0SMatthew Dillon return (ENXIO);
39697a077a0SMatthew Dillon }
39797a077a0SMatthew Dillon
39897a077a0SMatthew Dillon /*
39997a077a0SMatthew Dillon * Setup the admin queues (qid 0).
40097a077a0SMatthew Dillon */
40197a077a0SMatthew Dillon error = nvme_alloc_subqueue(sc, 0);
40297a077a0SMatthew Dillon if (error) {
40397a077a0SMatthew Dillon device_printf(dev, "unable to allocate admin subqueue\n");
40497a077a0SMatthew Dillon nvme_pci_detach(dev);
40597a077a0SMatthew Dillon return (ENXIO);
40697a077a0SMatthew Dillon }
40797a077a0SMatthew Dillon error = nvme_alloc_comqueue(sc, 0);
40897a077a0SMatthew Dillon if (error) {
40997a077a0SMatthew Dillon device_printf(dev, "unable to allocate admin comqueue\n");
41097a077a0SMatthew Dillon nvme_pci_detach(dev);
41197a077a0SMatthew Dillon return (ENXIO);
41297a077a0SMatthew Dillon }
41397a077a0SMatthew Dillon
41497a077a0SMatthew Dillon /*
41597a077a0SMatthew Dillon * Initialize the admin queue registers
41697a077a0SMatthew Dillon */
41797a077a0SMatthew Dillon reg = NVME_ATTR_COM_SET(sc->maxqe) | NVME_ATTR_SUB_SET(sc->maxqe);
41897a077a0SMatthew Dillon nvme_write(sc, NVME_REG_ADM_ATTR, reg);
41997a077a0SMatthew Dillon nvme_write8(sc, NVME_REG_ADM_SUBADR, (uint64_t)sc->subqueues[0].psubq);
42097a077a0SMatthew Dillon nvme_write8(sc, NVME_REG_ADM_COMADR, (uint64_t)sc->comqueues[0].pcomq);
42197a077a0SMatthew Dillon
42297a077a0SMatthew Dillon /*
423b52f4c3fSMatthew Dillon * qemu appears to require this, real hardware does not appear
424b52f4c3fSMatthew Dillon * to require this.
425b52f4c3fSMatthew Dillon */
426b52f4c3fSMatthew Dillon pci_enable_busmaster(dev);
427b52f4c3fSMatthew Dillon
428b52f4c3fSMatthew Dillon /*
42997a077a0SMatthew Dillon * Other configuration registers
43097a077a0SMatthew Dillon */
43197a077a0SMatthew Dillon reg = NVME_CONFIG_IOSUB_ES_SET(6) | /* 64 byte sub entry */
43297a077a0SMatthew Dillon NVME_CONFIG_IOCOM_ES_SET(4) | /* 16 byte com entry */
43397a077a0SMatthew Dillon NVME_CONFIG_MEMPG_SET(PAGE_SHIFT) | /* 4K pages */
43497a077a0SMatthew Dillon NVME_CONFIG_CSS_NVM; /* NVME command set */
43597a077a0SMatthew Dillon nvme_write(sc, NVME_REG_CONFIG, reg);
43697a077a0SMatthew Dillon
43797a077a0SMatthew Dillon reg = nvme_read(sc, NVME_REG_MEMSIZE);
43897a077a0SMatthew Dillon
43997a077a0SMatthew Dillon /*
44097a077a0SMatthew Dillon * Enable the chip for operation
44197a077a0SMatthew Dillon */
44297a077a0SMatthew Dillon error = nvme_enable(sc, 1);
44397a077a0SMatthew Dillon if (error) {
44497a077a0SMatthew Dillon nvme_enable(sc, 0);
44597a077a0SMatthew Dillon nvme_pci_detach(dev);
44697a077a0SMatthew Dillon return (ENXIO);
44797a077a0SMatthew Dillon }
44897a077a0SMatthew Dillon
44997a077a0SMatthew Dillon /*
45097a077a0SMatthew Dillon * Start the admin thread. This will also setup the admin queue
45197a077a0SMatthew Dillon * interrupt.
45297a077a0SMatthew Dillon */
45397a077a0SMatthew Dillon error = nvme_start_admin_thread(sc);
45497a077a0SMatthew Dillon if (error) {
45597a077a0SMatthew Dillon nvme_pci_detach(dev);
45697a077a0SMatthew Dillon return (ENXIO);
45797a077a0SMatthew Dillon }
45811759406SMatthew Dillon lockmgr(&nvme_master_lock, LK_EXCLUSIVE);
45911759406SMatthew Dillon sc->flags |= NVME_SC_ATTACHED;
46011759406SMatthew Dillon TAILQ_INSERT_TAIL(&nvme_sc_list, sc, entry);
46111759406SMatthew Dillon lockmgr(&nvme_master_lock, LK_RELEASE);
46297a077a0SMatthew Dillon
46397a077a0SMatthew Dillon return(0);
46497a077a0SMatthew Dillon }
46597a077a0SMatthew Dillon
46697a077a0SMatthew Dillon /*
46797a077a0SMatthew Dillon * Device unload / detachment
46897a077a0SMatthew Dillon */
46997a077a0SMatthew Dillon static int
nvme_pci_detach(device_t dev)47097a077a0SMatthew Dillon nvme_pci_detach(device_t dev)
47197a077a0SMatthew Dillon {
47297a077a0SMatthew Dillon nvme_softc_t *sc = device_get_softc(dev);
47318d2384bSMatthew Dillon int i;
47497a077a0SMatthew Dillon
47597a077a0SMatthew Dillon /*
47697a077a0SMatthew Dillon * Stop the admin thread
47797a077a0SMatthew Dillon */
47897a077a0SMatthew Dillon nvme_stop_admin_thread(sc);
47997a077a0SMatthew Dillon
48097a077a0SMatthew Dillon /*
48111759406SMatthew Dillon * Issue a normal shutdown and wait for completion
48211759406SMatthew Dillon */
483*049f03b7SMatthew Dillon nvme_issue_shutdown(sc, 0);
48411759406SMatthew Dillon
48511759406SMatthew Dillon /*
48697a077a0SMatthew Dillon * Disable the chip
48797a077a0SMatthew Dillon */
48897a077a0SMatthew Dillon nvme_enable(sc, 0);
48997a077a0SMatthew Dillon
49097a077a0SMatthew Dillon /*
49197a077a0SMatthew Dillon * Free admin memory
49297a077a0SMatthew Dillon */
49397a077a0SMatthew Dillon nvme_free_subqueue(sc, 0);
49497a077a0SMatthew Dillon nvme_free_comqueue(sc, 0);
49597a077a0SMatthew Dillon
49697a077a0SMatthew Dillon /*
49797a077a0SMatthew Dillon * Release related resources.
49897a077a0SMatthew Dillon */
49918d2384bSMatthew Dillon for (i = 0; i < sc->nirqs; ++i) {
50018d2384bSMatthew Dillon if (sc->irq[i]) {
50118d2384bSMatthew Dillon bus_release_resource(dev, SYS_RES_IRQ,
50218d2384bSMatthew Dillon sc->rid_irq[i], sc->irq[i]);
50318d2384bSMatthew Dillon sc->irq[i] = NULL;
50418d2384bSMatthew Dillon if (sc->irq_type == PCI_INTR_TYPE_MSIX)
50518d2384bSMatthew Dillon pci_release_msix_vector(dev, sc->rid_irq[i]);
50697a077a0SMatthew Dillon }
50718d2384bSMatthew Dillon }
50818d2384bSMatthew Dillon switch(sc->irq_type) {
50918d2384bSMatthew Dillon case PCI_INTR_TYPE_MSI:
51097a077a0SMatthew Dillon pci_release_msi(dev);
51118d2384bSMatthew Dillon break;
51218d2384bSMatthew Dillon case PCI_INTR_TYPE_MSIX:
51318d2384bSMatthew Dillon pci_teardown_msix(dev);
51418d2384bSMatthew Dillon break;
51518d2384bSMatthew Dillon default:
51618d2384bSMatthew Dillon break;
51718d2384bSMatthew Dillon }
51897a077a0SMatthew Dillon
51997a077a0SMatthew Dillon /*
52097a077a0SMatthew Dillon * Release remaining chipset resources
52197a077a0SMatthew Dillon */
52297a077a0SMatthew Dillon if (sc->regs) {
52397a077a0SMatthew Dillon bus_release_resource(dev, SYS_RES_MEMORY,
52497a077a0SMatthew Dillon sc->rid_regs, sc->regs);
52597a077a0SMatthew Dillon sc->regs = NULL;
52697a077a0SMatthew Dillon }
5277e782064SMatthew Dillon if (sc->bar4) {
5287e782064SMatthew Dillon bus_release_resource(dev, SYS_RES_MEMORY,
529b52f4c3fSMatthew Dillon sc->rid_bar4, sc->bar4);
5307e782064SMatthew Dillon sc->bar4 = NULL;
5317e782064SMatthew Dillon }
5327e782064SMatthew Dillon
53397a077a0SMatthew Dillon /*
53497a077a0SMatthew Dillon * Cleanup the DMA tags
53597a077a0SMatthew Dillon */
53697a077a0SMatthew Dillon if (sc->prps_tag) {
53797a077a0SMatthew Dillon bus_dma_tag_destroy(sc->prps_tag);
53897a077a0SMatthew Dillon sc->prps_tag = NULL;
53997a077a0SMatthew Dillon }
54097a077a0SMatthew Dillon if (sc->sque_tag) {
54197a077a0SMatthew Dillon bus_dma_tag_destroy(sc->sque_tag);
54297a077a0SMatthew Dillon sc->sque_tag = NULL;
54397a077a0SMatthew Dillon }
54497a077a0SMatthew Dillon if (sc->cque_tag) {
54597a077a0SMatthew Dillon bus_dma_tag_destroy(sc->cque_tag);
54697a077a0SMatthew Dillon sc->cque_tag = NULL;
54797a077a0SMatthew Dillon }
54897a077a0SMatthew Dillon if (sc->adm_tag) {
54997a077a0SMatthew Dillon bus_dma_tag_destroy(sc->adm_tag);
55097a077a0SMatthew Dillon sc->adm_tag = NULL;
55197a077a0SMatthew Dillon }
55297a077a0SMatthew Dillon
55311759406SMatthew Dillon if (sc->flags & NVME_SC_ATTACHED) {
55411759406SMatthew Dillon lockmgr(&nvme_master_lock, LK_EXCLUSIVE);
55511759406SMatthew Dillon sc->flags &= ~NVME_SC_ATTACHED;
55611759406SMatthew Dillon TAILQ_REMOVE(&nvme_sc_list, sc, entry);
55711759406SMatthew Dillon lockmgr(&nvme_master_lock, LK_RELEASE);
55811759406SMatthew Dillon }
55911759406SMatthew Dillon
56097a077a0SMatthew Dillon return (0);
56197a077a0SMatthew Dillon }
562