197a077a0SMatthew Dillon /*
2049f03b7SMatthew Dillon * Copyright (c) 2016-2018 The DragonFly Project. All rights reserved.
397a077a0SMatthew Dillon *
497a077a0SMatthew Dillon * This code is derived from software contributed to The DragonFly Project
597a077a0SMatthew Dillon * by Matthew Dillon <dillon@backplane.com>
697a077a0SMatthew Dillon *
797a077a0SMatthew Dillon * Redistribution and use in source and binary forms, with or without
897a077a0SMatthew Dillon * modification, are permitted provided that the following conditions
997a077a0SMatthew Dillon * are met:
1097a077a0SMatthew Dillon *
1197a077a0SMatthew Dillon * 1. Redistributions of source code must retain the above copyright
1297a077a0SMatthew Dillon * notice, this list of conditions and the following disclaimer.
1397a077a0SMatthew Dillon * 2. Redistributions in binary form must reproduce the above copyright
1497a077a0SMatthew Dillon * notice, this list of conditions and the following disclaimer in
1597a077a0SMatthew Dillon * the documentation and/or other materials provided with the
1697a077a0SMatthew Dillon * distribution.
1797a077a0SMatthew Dillon * 3. Neither the name of The DragonFly Project nor the names of its
1897a077a0SMatthew Dillon * contributors may be used to endorse or promote products derived
1997a077a0SMatthew Dillon * from this software without specific, prior written permission.
2097a077a0SMatthew Dillon *
2197a077a0SMatthew Dillon * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
2297a077a0SMatthew Dillon * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
2397a077a0SMatthew Dillon * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
2497a077a0SMatthew Dillon * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
2597a077a0SMatthew Dillon * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
2697a077a0SMatthew Dillon * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
2797a077a0SMatthew Dillon * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
2897a077a0SMatthew Dillon * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
2997a077a0SMatthew Dillon * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
3097a077a0SMatthew Dillon * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
3197a077a0SMatthew Dillon * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3297a077a0SMatthew Dillon * SUCH DAMAGE.
3397a077a0SMatthew Dillon */
3497a077a0SMatthew Dillon /*
3597a077a0SMatthew Dillon * Most low-level chip related functions (other than attachment) reside in
3697a077a0SMatthew Dillon * this module. Most functions assume that the caller is already holding
3797a077a0SMatthew Dillon * appropriate locks to prevent SMP collisions.
3897a077a0SMatthew Dillon */
3997a077a0SMatthew Dillon
4097a077a0SMatthew Dillon #include "nvme.h"
4197a077a0SMatthew Dillon
42c6cd37a0SMatthew Dillon MALLOC_DEFINE(M_NVME, "NVMe Driver", "NVME");
4397a077a0SMatthew Dillon
4497a077a0SMatthew Dillon /*
4597a077a0SMatthew Dillon * DMA mapping callbacks.
4697a077a0SMatthew Dillon */
4797a077a0SMatthew Dillon static
4897a077a0SMatthew Dillon void
nvme_dmamem_saveseg(void * info,bus_dma_segment_t * segs,int nsegs,int error)4997a077a0SMatthew Dillon nvme_dmamem_saveseg(void *info, bus_dma_segment_t *segs, int nsegs, int error)
5097a077a0SMatthew Dillon {
5197a077a0SMatthew Dillon KKASSERT(error == 0);
5297a077a0SMatthew Dillon KKASSERT(nsegs == 1);
5397a077a0SMatthew Dillon *(bus_addr_t *)info = segs->ds_addr;
5497a077a0SMatthew Dillon }
5597a077a0SMatthew Dillon
5697a077a0SMatthew Dillon /*
5797a077a0SMatthew Dillon * Low-level chip enable/disable.
5897a077a0SMatthew Dillon */
5997a077a0SMatthew Dillon int
nvme_enable(nvme_softc_t * sc,int enable)6097a077a0SMatthew Dillon nvme_enable(nvme_softc_t *sc, int enable)
6197a077a0SMatthew Dillon {
6297a077a0SMatthew Dillon uint32_t reg;
6397a077a0SMatthew Dillon int error = 0;
6497a077a0SMatthew Dillon int base_ticks;
6597a077a0SMatthew Dillon
6697a077a0SMatthew Dillon reg = nvme_read(sc, NVME_REG_CONFIG);
6797a077a0SMatthew Dillon if (enable == 0 && (reg & NVME_CONFIG_EN)) {
6897a077a0SMatthew Dillon /*
6997a077a0SMatthew Dillon * Disable the chip so we can program it.
7097a077a0SMatthew Dillon */
7197a077a0SMatthew Dillon reg &= ~NVME_CONFIG_EN;
7297a077a0SMatthew Dillon nvme_write(sc, NVME_REG_CONFIG, reg);
7397a077a0SMatthew Dillon } else if (enable && (reg & NVME_CONFIG_EN) == 0) {
7497a077a0SMatthew Dillon /*
7597a077a0SMatthew Dillon * Enable the chip once programmed.
7697a077a0SMatthew Dillon */
7797a077a0SMatthew Dillon reg |= NVME_CONFIG_EN;
7897a077a0SMatthew Dillon nvme_write(sc, NVME_REG_CONFIG, reg);
7997a077a0SMatthew Dillon }
8097a077a0SMatthew Dillon error = ENXIO;
8197a077a0SMatthew Dillon base_ticks = ticks;
8297a077a0SMatthew Dillon while ((int)(ticks - base_ticks) < sc->entimo) {
8397a077a0SMatthew Dillon reg = nvme_read(sc, NVME_REG_STATUS);
8497a077a0SMatthew Dillon if (enable == 0 && (reg & NVME_STATUS_RDY) == 0) {
8597a077a0SMatthew Dillon error = 0;
8697a077a0SMatthew Dillon break;
8797a077a0SMatthew Dillon }
8897a077a0SMatthew Dillon if (enable && (reg & NVME_STATUS_RDY)) {
8997a077a0SMatthew Dillon error = 0;
9097a077a0SMatthew Dillon break;
9197a077a0SMatthew Dillon }
9297a077a0SMatthew Dillon nvme_os_sleep(50); /* 50ms poll */
9397a077a0SMatthew Dillon }
942d746837SMatthew Dillon
952d746837SMatthew Dillon /*
962d746837SMatthew Dillon * Interrupt masking (only applicable when MSI-X not used, 3.1.3 and
972d746837SMatthew Dillon * 3.1.4 state that these registers should not be accessed with MSI-X)
982d746837SMatthew Dillon */
992d746837SMatthew Dillon if (error == 0 && sc->nirqs == 1) {
1002d746837SMatthew Dillon if (enable) {
1012d746837SMatthew Dillon nvme_write(sc, NVME_REG_INTSET, ~1);
1022d746837SMatthew Dillon nvme_write(sc, NVME_REG_INTCLR, 1);
1032d746837SMatthew Dillon } else {
1042d746837SMatthew Dillon nvme_write(sc, NVME_REG_INTSET, ~1);
1052d746837SMatthew Dillon }
1062d746837SMatthew Dillon }
1072d746837SMatthew Dillon
10897a077a0SMatthew Dillon if (error) {
10997a077a0SMatthew Dillon device_printf(sc->dev, "Cannot %s device\n",
11097a077a0SMatthew Dillon (enable ? "enable" : "disable"));
11134885004SMatthew Dillon } else {
11234885004SMatthew Dillon #if 0
11334885004SMatthew Dillon kprintf("gratuitous 15 second sleep\n");
11434885004SMatthew Dillon nvme_os_sleep(15000);
11534885004SMatthew Dillon kprintf("gratuitous 15 second sleep done\n");
11634885004SMatthew Dillon #endif
11797a077a0SMatthew Dillon }
11897a077a0SMatthew Dillon return error;
11997a077a0SMatthew Dillon }
12097a077a0SMatthew Dillon
12197a077a0SMatthew Dillon /*
12297a077a0SMatthew Dillon * Allocate submission and completion queues. If qid is 0 we are allocating
12397a077a0SMatthew Dillon * the ADMIN queues, otherwise we are allocating I/O queues.
12497a077a0SMatthew Dillon */
12597a077a0SMatthew Dillon int
nvme_alloc_subqueue(nvme_softc_t * sc,uint16_t qid)12697a077a0SMatthew Dillon nvme_alloc_subqueue(nvme_softc_t *sc, uint16_t qid)
12797a077a0SMatthew Dillon {
12897a077a0SMatthew Dillon nvme_subqueue_t *queue = &sc->subqueues[qid];
12997a077a0SMatthew Dillon int error = 0;
13097a077a0SMatthew Dillon
13197a077a0SMatthew Dillon /*
13297a077a0SMatthew Dillon * For now implement the maximum queue size negotiated in the
13397a077a0SMatthew Dillon * attach.
13497a077a0SMatthew Dillon */
13597a077a0SMatthew Dillon lockinit(&queue->lk, "nvqlk", 0, 0);
13697a077a0SMatthew Dillon queue->sc = sc;
13797a077a0SMatthew Dillon queue->nqe = sc->maxqe;
13897a077a0SMatthew Dillon queue->qid = qid;
13997a077a0SMatthew Dillon queue->subq_doorbell_reg = NVME_REG_SUBQ_BELL(qid, sc->dstrd4);
14097a077a0SMatthew Dillon
14197a077a0SMatthew Dillon /*
14297a077a0SMatthew Dillon * dma memory for the submission queue
14397a077a0SMatthew Dillon */
14497a077a0SMatthew Dillon if (error == 0) {
14597a077a0SMatthew Dillon error = bus_dmamem_alloc(sc->sque_tag, (void **)&queue->ksubq,
14697a077a0SMatthew Dillon BUS_DMA_ZERO, &queue->sque_map);
14797a077a0SMatthew Dillon }
14897a077a0SMatthew Dillon if (error == 0) {
14997a077a0SMatthew Dillon error = bus_dmamap_load(sc->sque_tag, queue->sque_map,
15097a077a0SMatthew Dillon queue->ksubq,
15197a077a0SMatthew Dillon bus_dma_tag_getmaxsize(sc->sque_tag),
15297a077a0SMatthew Dillon nvme_dmamem_saveseg, &queue->psubq,
15397a077a0SMatthew Dillon 0);
15497a077a0SMatthew Dillon }
15597a077a0SMatthew Dillon
15697a077a0SMatthew Dillon /*
15797a077a0SMatthew Dillon * dma memory for enough PRPs to map MAXPHYS bytes of memory per
15897a077a0SMatthew Dillon * request. A MAXPHYS buffer which begins partially straddling
15997a077a0SMatthew Dillon * a page boundary can still be accomodated because we have an
16097a077a0SMatthew Dillon * additional PRP entry in cmd.head.
16197a077a0SMatthew Dillon */
16297a077a0SMatthew Dillon if (error == 0) {
16397a077a0SMatthew Dillon error = bus_dmamem_alloc(sc->prps_tag, (void **)&queue->kprps,
16497a077a0SMatthew Dillon BUS_DMA_ZERO, &queue->prps_map);
16597a077a0SMatthew Dillon }
16697a077a0SMatthew Dillon if (error == 0) {
16797a077a0SMatthew Dillon error = bus_dmamap_load(sc->prps_tag, queue->prps_map,
16897a077a0SMatthew Dillon queue->kprps,
16997a077a0SMatthew Dillon bus_dma_tag_getmaxsize(sc->prps_tag),
17097a077a0SMatthew Dillon nvme_dmamem_saveseg, &queue->pprps,
17197a077a0SMatthew Dillon 0);
17297a077a0SMatthew Dillon }
17397a077a0SMatthew Dillon
17497a077a0SMatthew Dillon /*
17597a077a0SMatthew Dillon * dma memory for admin data
17697a077a0SMatthew Dillon */
17797a077a0SMatthew Dillon if (qid == 0 && error == 0) {
17897a077a0SMatthew Dillon error = bus_dmamem_alloc(sc->adm_tag,
17997a077a0SMatthew Dillon (void **)&queue->kdatapgs,
18097a077a0SMatthew Dillon BUS_DMA_ZERO, &queue->adm_map);
18197a077a0SMatthew Dillon }
18297a077a0SMatthew Dillon if (qid == 0 && error == 0) {
18397a077a0SMatthew Dillon error = bus_dmamap_load(sc->adm_tag, queue->adm_map,
18497a077a0SMatthew Dillon queue->kdatapgs,
18597a077a0SMatthew Dillon bus_dma_tag_getmaxsize(sc->adm_tag),
18697a077a0SMatthew Dillon nvme_dmamem_saveseg, &queue->pdatapgs,
18797a077a0SMatthew Dillon 0);
18897a077a0SMatthew Dillon }
18997a077a0SMatthew Dillon
19097a077a0SMatthew Dillon /*
19197a077a0SMatthew Dillon * Driver request structures
19297a077a0SMatthew Dillon */
19397a077a0SMatthew Dillon if (error == 0) {
19497a077a0SMatthew Dillon nvme_request_t *req;
19597a077a0SMatthew Dillon uint32_t i;
19697a077a0SMatthew Dillon
19797a077a0SMatthew Dillon queue->reqary = kmalloc(sizeof(nvme_request_t) * queue->nqe,
19897a077a0SMatthew Dillon M_NVME, M_WAITOK | M_ZERO);
19997a077a0SMatthew Dillon for (i = 0; i < queue->nqe; ++i) {
20097a077a0SMatthew Dillon req = &queue->reqary[i];
201049f03b7SMatthew Dillon if (i == 0) {
202049f03b7SMatthew Dillon /*
203049f03b7SMatthew Dillon * Set aside one request for dump operation
204049f03b7SMatthew Dillon */
205049f03b7SMatthew Dillon queue->dump_req = req;
206049f03b7SMatthew Dillon } else {
207049f03b7SMatthew Dillon /*
208049f03b7SMatthew Dillon * The rest go through the normal list
209049f03b7SMatthew Dillon */
21097a077a0SMatthew Dillon req->next_avail = queue->first_avail;
21197a077a0SMatthew Dillon queue->first_avail = req;
212049f03b7SMatthew Dillon }
21397a077a0SMatthew Dillon req->subq = queue;
21497a077a0SMatthew Dillon req->comq = &sc->comqueues[queue->comqid];
21597a077a0SMatthew Dillon req->cmd_id = i;
21697a077a0SMatthew Dillon if (qid == 0) {
21797a077a0SMatthew Dillon req->info = &queue->kdatapgs[i];
21897a077a0SMatthew Dillon req->pinfo = queue->pdatapgs +
21997a077a0SMatthew Dillon i * sizeof(nvme_admin_data_t);
22097a077a0SMatthew Dillon }
22197a077a0SMatthew Dillon }
22297a077a0SMatthew Dillon }
22397a077a0SMatthew Dillon
22497a077a0SMatthew Dillon /*
22597a077a0SMatthew Dillon * Error handling
22697a077a0SMatthew Dillon */
22797a077a0SMatthew Dillon if (error)
22897a077a0SMatthew Dillon nvme_free_subqueue(sc, qid);
22997a077a0SMatthew Dillon return error;
23097a077a0SMatthew Dillon }
23197a077a0SMatthew Dillon
23297a077a0SMatthew Dillon int
nvme_alloc_comqueue(nvme_softc_t * sc,uint16_t qid)23397a077a0SMatthew Dillon nvme_alloc_comqueue(nvme_softc_t *sc, uint16_t qid)
23497a077a0SMatthew Dillon {
23597a077a0SMatthew Dillon nvme_comqueue_t *queue = &sc->comqueues[qid];
23697a077a0SMatthew Dillon int error = 0;
23797a077a0SMatthew Dillon
23897a077a0SMatthew Dillon /*
23997a077a0SMatthew Dillon * For now implement the maximum queue size negotiated in the
24097a077a0SMatthew Dillon * attach.
24197a077a0SMatthew Dillon */
24297a077a0SMatthew Dillon lockinit(&queue->lk, "nvqlk", 0, 0);
24397a077a0SMatthew Dillon queue->sc = sc;
24497a077a0SMatthew Dillon queue->qid = qid;
24597a077a0SMatthew Dillon queue->phase = NVME_COMQ_STATUS_PHASE;
24697a077a0SMatthew Dillon queue->comq_doorbell_reg = NVME_REG_COMQ_BELL(qid, sc->dstrd4);
24797a077a0SMatthew Dillon
24897a077a0SMatthew Dillon if (error == 0) {
24997a077a0SMatthew Dillon error = bus_dmamem_alloc(sc->cque_tag, (void **)&queue->kcomq,
25097a077a0SMatthew Dillon BUS_DMA_ZERO, &queue->cque_map);
25197a077a0SMatthew Dillon }
25297a077a0SMatthew Dillon if (error == 0) {
25397a077a0SMatthew Dillon error = bus_dmamap_load(sc->cque_tag, queue->cque_map,
25497a077a0SMatthew Dillon queue->kcomq,
25597a077a0SMatthew Dillon bus_dma_tag_getmaxsize(sc->cque_tag),
25697a077a0SMatthew Dillon nvme_dmamem_saveseg, &queue->pcomq,
25797a077a0SMatthew Dillon 0);
25897a077a0SMatthew Dillon }
25997a077a0SMatthew Dillon
26097a077a0SMatthew Dillon /*
2612d746837SMatthew Dillon * Set nqe last. The comq polling loop tests this field and we
2622d746837SMatthew Dillon * do not want it to spuriously assume that the comq is initialized
2632d746837SMatthew Dillon * until it actually is.
26497a077a0SMatthew Dillon */
2652d746837SMatthew Dillon if (error == 0)
2662d746837SMatthew Dillon queue->nqe = sc->maxqe;
2672d746837SMatthew Dillon
26897a077a0SMatthew Dillon if (error)
26997a077a0SMatthew Dillon nvme_free_comqueue(sc, qid);
27097a077a0SMatthew Dillon return error;
27197a077a0SMatthew Dillon }
27297a077a0SMatthew Dillon
27397a077a0SMatthew Dillon void
nvme_free_subqueue(nvme_softc_t * sc,uint16_t qid)27497a077a0SMatthew Dillon nvme_free_subqueue(nvme_softc_t *sc, uint16_t qid)
27597a077a0SMatthew Dillon {
27697a077a0SMatthew Dillon nvme_subqueue_t *queue = &sc->subqueues[qid];
27797a077a0SMatthew Dillon
27897a077a0SMatthew Dillon queue->first_avail = NULL;
27997a077a0SMatthew Dillon if (queue->reqary) {
28097a077a0SMatthew Dillon kfree(queue->reqary, M_NVME);
28197a077a0SMatthew Dillon queue->reqary = NULL;
28297a077a0SMatthew Dillon }
28397a077a0SMatthew Dillon if (queue->ksubq) {
28497a077a0SMatthew Dillon bus_dmamem_free(sc->sque_tag, queue->ksubq, queue->sque_map);
28597a077a0SMatthew Dillon bus_dmamap_unload(sc->sque_tag, queue->sque_map);
28697a077a0SMatthew Dillon bus_dmamap_destroy(sc->sque_tag, queue->sque_map);
28797a077a0SMatthew Dillon }
28897a077a0SMatthew Dillon if (queue->kprps) {
28997a077a0SMatthew Dillon bus_dmamem_free(sc->prps_tag, queue->kprps, queue->prps_map);
29097a077a0SMatthew Dillon bus_dmamap_unload(sc->prps_tag, queue->prps_map);
29197a077a0SMatthew Dillon bus_dmamap_destroy(sc->prps_tag, queue->prps_map);
29297a077a0SMatthew Dillon }
29397a077a0SMatthew Dillon if (queue->kdatapgs) {
29497a077a0SMatthew Dillon bus_dmamem_free(sc->adm_tag, queue->kdatapgs, queue->adm_map);
29597a077a0SMatthew Dillon bus_dmamap_unload(sc->adm_tag, queue->adm_map);
29697a077a0SMatthew Dillon bus_dmamap_destroy(sc->adm_tag, queue->adm_map);
29797a077a0SMatthew Dillon }
29897a077a0SMatthew Dillon bzero(queue, sizeof(*queue));
29997a077a0SMatthew Dillon }
30097a077a0SMatthew Dillon
30197a077a0SMatthew Dillon void
nvme_free_comqueue(nvme_softc_t * sc,uint16_t qid)30297a077a0SMatthew Dillon nvme_free_comqueue(nvme_softc_t *sc, uint16_t qid)
30397a077a0SMatthew Dillon {
30497a077a0SMatthew Dillon nvme_comqueue_t *queue = &sc->comqueues[qid];
30597a077a0SMatthew Dillon
3062d746837SMatthew Dillon /*
3072d746837SMatthew Dillon * Clear this field first so poll loops ignore the comq.
3082d746837SMatthew Dillon */
3092d746837SMatthew Dillon queue->nqe = 0;
3102d746837SMatthew Dillon
31197a077a0SMatthew Dillon if (queue->kcomq) {
31297a077a0SMatthew Dillon bus_dmamem_free(sc->cque_tag, queue->kcomq, queue->cque_map);
31397a077a0SMatthew Dillon bus_dmamap_unload(sc->cque_tag, queue->cque_map);
31497a077a0SMatthew Dillon bus_dmamap_destroy(sc->cque_tag, queue->cque_map);
31597a077a0SMatthew Dillon }
31697a077a0SMatthew Dillon bzero(queue, sizeof(*queue));
31797a077a0SMatthew Dillon }
31897a077a0SMatthew Dillon
31997a077a0SMatthew Dillon /*
32097a077a0SMatthew Dillon * ADMIN AND I/O REQUEST HANDLING
32197a077a0SMatthew Dillon */
32297a077a0SMatthew Dillon
32397a077a0SMatthew Dillon /*
32497a077a0SMatthew Dillon * Obtain a request and handle DMA mapping the supplied kernel buffer.
32597a077a0SMatthew Dillon * Fields in cmd.head will be initialized and remaining fields will be zero'd.
32697a077a0SMatthew Dillon * Caller is responsible for filling in remaining fields as appropriate.
32797a077a0SMatthew Dillon *
32897a077a0SMatthew Dillon * Caller must hold the queue lock.
32997a077a0SMatthew Dillon */
33097a077a0SMatthew Dillon nvme_request_t *
nvme_get_admin_request(nvme_softc_t * sc,uint8_t opcode)33197a077a0SMatthew Dillon nvme_get_admin_request(nvme_softc_t *sc, uint8_t opcode)
33297a077a0SMatthew Dillon {
33397a077a0SMatthew Dillon nvme_request_t *req;
33497a077a0SMatthew Dillon
33597a077a0SMatthew Dillon req = nvme_get_request(&sc->subqueues[0], opcode, NULL, 0);
33697a077a0SMatthew Dillon req->cmd.head.prp1 = req->pinfo;
33797a077a0SMatthew Dillon req->callback = NULL;
33897a077a0SMatthew Dillon
33997a077a0SMatthew Dillon return req;
34097a077a0SMatthew Dillon }
34197a077a0SMatthew Dillon
34297a077a0SMatthew Dillon /*
34397a077a0SMatthew Dillon * ADMIN AND I/O REQUEST HANDLING
34497a077a0SMatthew Dillon */
34597a077a0SMatthew Dillon
346049f03b7SMatthew Dillon static __inline
347049f03b7SMatthew Dillon void
_nvme_fill_request(nvme_subqueue_t * queue,uint8_t opcode,char * kva,size_t bytes,nvme_request_t * req)348049f03b7SMatthew Dillon _nvme_fill_request(nvme_subqueue_t *queue, uint8_t opcode,
349049f03b7SMatthew Dillon char *kva, size_t bytes,
350049f03b7SMatthew Dillon nvme_request_t *req)
351049f03b7SMatthew Dillon {
352049f03b7SMatthew Dillon /*
353049f03b7SMatthew Dillon * Fill-in basic fields and do the DMA mapping.
354049f03b7SMatthew Dillon */
355049f03b7SMatthew Dillon req->next_avail = NULL;
356049f03b7SMatthew Dillon KKASSERT(req->state == NVME_REQ_AVAIL);
357049f03b7SMatthew Dillon req->state = NVME_REQ_ALLOCATED;
358049f03b7SMatthew Dillon req->callback = NULL;
359049f03b7SMatthew Dillon req->waiting = 0;
360049f03b7SMatthew Dillon
361049f03b7SMatthew Dillon req->cmd.head.opcode = opcode;
362049f03b7SMatthew Dillon req->cmd.head.flags = NVME_SUBQFLG_PRP | NVME_SUBQFLG_NORM;
363049f03b7SMatthew Dillon req->cmd.head.cid = req->cmd_id;
364049f03b7SMatthew Dillon req->cmd.head.nsid = 0;
365049f03b7SMatthew Dillon req->cmd.head.mptr = 0;
366049f03b7SMatthew Dillon req->cmd.head.prp1 = 0;
367049f03b7SMatthew Dillon req->cmd.head.prp2 = 0;
368049f03b7SMatthew Dillon req->cmd.dw10 = 0;
369049f03b7SMatthew Dillon req->cmd.dw11 = 0;
370049f03b7SMatthew Dillon req->cmd.dw12 = 0;
371049f03b7SMatthew Dillon req->cmd.dw13 = 0;
372049f03b7SMatthew Dillon req->cmd.dw14 = 0;
373049f03b7SMatthew Dillon req->cmd.dw15 = 0;
374049f03b7SMatthew Dillon
375049f03b7SMatthew Dillon if (kva) {
376049f03b7SMatthew Dillon size_t count = 0;
377049f03b7SMatthew Dillon size_t idx = 0;
378049f03b7SMatthew Dillon vm_paddr_t paddr;
379049f03b7SMatthew Dillon vm_paddr_t pprptab;
380049f03b7SMatthew Dillon uint64_t *kprptab;
381049f03b7SMatthew Dillon KKASSERT(bytes >= 0 && bytes <= MAXPHYS);
382049f03b7SMatthew Dillon
383049f03b7SMatthew Dillon kprptab = queue->kprps +
384049f03b7SMatthew Dillon (MAXPHYS / PAGE_SIZE) * req->cmd_id;
385049f03b7SMatthew Dillon pprptab = queue->pprps +
386049f03b7SMatthew Dillon (MAXPHYS / PAGE_SIZE) * req->cmd_id *
387049f03b7SMatthew Dillon sizeof(uint64_t);
388049f03b7SMatthew Dillon
389049f03b7SMatthew Dillon while (count < bytes) {
390049f03b7SMatthew Dillon paddr = vtophys(kva + count);
391049f03b7SMatthew Dillon if (idx == 0) {
392049f03b7SMatthew Dillon KKASSERT((paddr & 3) == 0);
393049f03b7SMatthew Dillon req->cmd.head.prp1 = paddr;
394049f03b7SMatthew Dillon count += (((intptr_t)kva + PAGE_SIZE) &
395049f03b7SMatthew Dillon ~(intptr_t)PAGE_MASK) -
396049f03b7SMatthew Dillon (intptr_t)kva;
397049f03b7SMatthew Dillon } else if (idx == 1 && count + PAGE_SIZE >= bytes) {
398049f03b7SMatthew Dillon KKASSERT((paddr & PAGE_MASK) == 0);
399049f03b7SMatthew Dillon req->cmd.head.prp2 = paddr;
400049f03b7SMatthew Dillon count += PAGE_SIZE;
401049f03b7SMatthew Dillon } else {
402049f03b7SMatthew Dillon KKASSERT((paddr & PAGE_MASK) == 0);
403049f03b7SMatthew Dillon /* if (idx == 1) -- not needed, just repeat */
404049f03b7SMatthew Dillon req->cmd.head.prp2 = pprptab; /* repeat */
405049f03b7SMatthew Dillon kprptab[idx - 1] = paddr;
406049f03b7SMatthew Dillon count += PAGE_SIZE;
407049f03b7SMatthew Dillon }
408049f03b7SMatthew Dillon ++idx;
409049f03b7SMatthew Dillon }
410049f03b7SMatthew Dillon }
411049f03b7SMatthew Dillon }
412049f03b7SMatthew Dillon
413049f03b7SMatthew Dillon
41497a077a0SMatthew Dillon /*
41597a077a0SMatthew Dillon * Obtain a request and handle DMA mapping the supplied kernel buffer.
41697a077a0SMatthew Dillon * Fields in cmd.head will be initialized and remaining fields will be zero'd.
41797a077a0SMatthew Dillon * Caller is responsible for filling in remaining fields as appropriate.
41897a077a0SMatthew Dillon *
419f2a9f724SMatthew Dillon * May return NULL if no requests are available or if there is no room in
420f2a9f724SMatthew Dillon * the submission queue to handle it (should only be possible on an I/O queue,
421f2a9f724SMatthew Dillon * admin queue operations are managed).
42211759406SMatthew Dillon *
42343844926SMatthew Dillon * Caller should NOT hold the queue lock.
42497a077a0SMatthew Dillon */
42597a077a0SMatthew Dillon nvme_request_t *
nvme_get_request(nvme_subqueue_t * queue,uint8_t opcode,char * kva,size_t bytes)42697a077a0SMatthew Dillon nvme_get_request(nvme_subqueue_t *queue, uint8_t opcode,
42797a077a0SMatthew Dillon char *kva, size_t bytes)
42897a077a0SMatthew Dillon {
42997a077a0SMatthew Dillon nvme_request_t *req;
43097a077a0SMatthew Dillon nvme_request_t *next;
43197a077a0SMatthew Dillon
43297a077a0SMatthew Dillon /*
433f2a9f724SMatthew Dillon * No easy lockless way to pull a new request off. We have to check
434f2a9f724SMatthew Dillon * for a number of conditions and there may be multiple threads
435f2a9f724SMatthew Dillon * making this call simultaneously, which complicates matters even
436f2a9f724SMatthew Dillon * more.
43797a077a0SMatthew Dillon */
43897a077a0SMatthew Dillon lockmgr(&queue->lk, LK_EXCLUSIVE);
439f2a9f724SMatthew Dillon
440f2a9f724SMatthew Dillon /*
441f2a9f724SMatthew Dillon * Make sure the submission queue has room to accomodate the
442f2a9f724SMatthew Dillon * request. Requests can be completed out of order so the
443f2a9f724SMatthew Dillon * submission ring could still be full even though we have
444f2a9f724SMatthew Dillon * requests available.
445f2a9f724SMatthew Dillon */
446f2a9f724SMatthew Dillon if ((queue->subq_tail + queue->unsubmitted + 1) % queue->nqe ==
447f2a9f724SMatthew Dillon queue->subq_head) {
448f2a9f724SMatthew Dillon lockmgr(&queue->lk, LK_RELEASE);
449f2a9f724SMatthew Dillon KKASSERT(queue->qid != 0);
450a391a14aSMatthew Dillon atomic_swap_int(&queue->signal_requeue, 1);
451f2a9f724SMatthew Dillon
452f2a9f724SMatthew Dillon return NULL;
453f2a9f724SMatthew Dillon }
454f2a9f724SMatthew Dillon
455f2a9f724SMatthew Dillon /*
456f2a9f724SMatthew Dillon * Pop the next available request off of the first_avail linked
457f2a9f724SMatthew Dillon * list. An atomic op must be used here because nvme_put_request()
458f2a9f724SMatthew Dillon * returns requests to the list without holding queue->lk.
459f2a9f724SMatthew Dillon */
46097a077a0SMatthew Dillon for (;;) {
46197a077a0SMatthew Dillon req = queue->first_avail;
46297a077a0SMatthew Dillon cpu_ccfence();
46311759406SMatthew Dillon if (req == NULL) {
46411759406SMatthew Dillon lockmgr(&queue->lk, LK_RELEASE);
46543844926SMatthew Dillon KKASSERT(queue->qid != 0);
466a391a14aSMatthew Dillon atomic_swap_int(&queue->signal_requeue, 1);
46743844926SMatthew Dillon
46811759406SMatthew Dillon return NULL;
46911759406SMatthew Dillon }
47097a077a0SMatthew Dillon next = req->next_avail;
47197a077a0SMatthew Dillon if (atomic_cmpset_ptr(&queue->first_avail, req, next))
47297a077a0SMatthew Dillon break;
47397a077a0SMatthew Dillon }
474f2a9f724SMatthew Dillon
475f2a9f724SMatthew Dillon /*
476f2a9f724SMatthew Dillon * We have to keep track of unsubmitted requests in order to be
477f2a9f724SMatthew Dillon * able to properly check whether the ring is full or not (check
478f2a9f724SMatthew Dillon * is done at the top of this procedure, above).
479f2a9f724SMatthew Dillon */
480f2a9f724SMatthew Dillon ++queue->unsubmitted;
48197a077a0SMatthew Dillon lockmgr(&queue->lk, LK_RELEASE);
482f2a9f724SMatthew Dillon
483049f03b7SMatthew Dillon _nvme_fill_request(queue, opcode, kva, bytes, req);
484049f03b7SMatthew Dillon
485049f03b7SMatthew Dillon return req;
486049f03b7SMatthew Dillon }
487049f03b7SMatthew Dillon
488f2a9f724SMatthew Dillon /*
489049f03b7SMatthew Dillon * dump path only, cannot block. Allow the lock to fail and bump
490049f03b7SMatthew Dillon * queue->unsubmitted anyway.
491f2a9f724SMatthew Dillon */
492049f03b7SMatthew Dillon nvme_request_t *
nvme_get_dump_request(nvme_subqueue_t * queue,uint8_t opcode,char * kva,size_t bytes)493049f03b7SMatthew Dillon nvme_get_dump_request(nvme_subqueue_t *queue, uint8_t opcode,
494049f03b7SMatthew Dillon char *kva, size_t bytes)
495049f03b7SMatthew Dillon {
496049f03b7SMatthew Dillon nvme_request_t *req;
497049f03b7SMatthew Dillon int error;
49897a077a0SMatthew Dillon
499049f03b7SMatthew Dillon error = lockmgr(&queue->lk, LK_EXCLUSIVE | LK_NOWAIT);
500049f03b7SMatthew Dillon req = queue->dump_req;
501049f03b7SMatthew Dillon ++queue->unsubmitted;
502049f03b7SMatthew Dillon if (error == 0)
503049f03b7SMatthew Dillon lockmgr(&queue->lk, LK_RELEASE);
504049f03b7SMatthew Dillon _nvme_fill_request(queue, opcode, kva, bytes, req);
50597a077a0SMatthew Dillon
50697a077a0SMatthew Dillon return req;
50797a077a0SMatthew Dillon }
50897a077a0SMatthew Dillon
50997a077a0SMatthew Dillon /*
51097a077a0SMatthew Dillon * Submit request for execution. This will doorbell the subq.
51197a077a0SMatthew Dillon *
51297a077a0SMatthew Dillon * Caller must hold the queue lock.
51397a077a0SMatthew Dillon */
51497a077a0SMatthew Dillon void
nvme_submit_request(nvme_request_t * req)51597a077a0SMatthew Dillon nvme_submit_request(nvme_request_t *req)
51697a077a0SMatthew Dillon {
51797a077a0SMatthew Dillon nvme_subqueue_t *queue = req->subq;
51897a077a0SMatthew Dillon nvme_allcmd_t *cmd;
51997a077a0SMatthew Dillon
52097a077a0SMatthew Dillon cmd = &queue->ksubq[queue->subq_tail];
521f2a9f724SMatthew Dillon --queue->unsubmitted;
52297a077a0SMatthew Dillon if (++queue->subq_tail == queue->nqe)
52397a077a0SMatthew Dillon queue->subq_tail = 0;
524f2a9f724SMatthew Dillon KKASSERT(queue->subq_tail != queue->subq_head);
52597a077a0SMatthew Dillon *cmd = req->cmd;
52697a077a0SMatthew Dillon cpu_sfence(); /* needed? */
52797a077a0SMatthew Dillon req->state = NVME_REQ_SUBMITTED;
52897a077a0SMatthew Dillon nvme_write(queue->sc, queue->subq_doorbell_reg, queue->subq_tail);
52997a077a0SMatthew Dillon }
53097a077a0SMatthew Dillon
53197a077a0SMatthew Dillon /*
53297a077a0SMatthew Dillon * Wait for a request to complete.
53397a077a0SMatthew Dillon *
53497a077a0SMatthew Dillon * Caller does not need to hold the queue lock. If it does, or if it
53597a077a0SMatthew Dillon * holds some other lock, it should pass it in so it can be released across
53697a077a0SMatthew Dillon * sleeps, else pass NULL.
53797a077a0SMatthew Dillon */
53897a077a0SMatthew Dillon int
nvme_wait_request(nvme_request_t * req)539049f03b7SMatthew Dillon nvme_wait_request(nvme_request_t *req)
54097a077a0SMatthew Dillon {
54197a077a0SMatthew Dillon struct lock *lk;
54297a077a0SMatthew Dillon int code;
54397a077a0SMatthew Dillon
54497a077a0SMatthew Dillon req->waiting = 1;
54597a077a0SMatthew Dillon if (req->state != NVME_REQ_COMPLETED) {
54697a077a0SMatthew Dillon lk = &req->comq->lk;
54797a077a0SMatthew Dillon cpu_lfence();
54897a077a0SMatthew Dillon lockmgr(lk, LK_EXCLUSIVE);
54997a077a0SMatthew Dillon while (req->state == NVME_REQ_SUBMITTED) {
55097a077a0SMatthew Dillon nvme_poll_completions(req->comq, lk);
55197a077a0SMatthew Dillon if (req->state != NVME_REQ_SUBMITTED)
55297a077a0SMatthew Dillon break;
5537e782064SMatthew Dillon lksleep(req, lk, 0, "nvwait", hz);
55497a077a0SMatthew Dillon }
55597a077a0SMatthew Dillon lockmgr(lk, LK_RELEASE);
55697a077a0SMatthew Dillon KKASSERT(req->state == NVME_REQ_COMPLETED);
55797a077a0SMatthew Dillon }
5587e782064SMatthew Dillon cpu_lfence();
55997a077a0SMatthew Dillon code = NVME_COMQ_STATUS_CODE_GET(req->res.tail.status);
56097a077a0SMatthew Dillon
56197a077a0SMatthew Dillon return code;
56297a077a0SMatthew Dillon }
56397a077a0SMatthew Dillon
56497a077a0SMatthew Dillon /*
565049f03b7SMatthew Dillon * dump path only, we cannot block, and the lock is allowed
566049f03b7SMatthew Dillon * to fail. But still try to play nice with interrupt threads.
567049f03b7SMatthew Dillon */
568049f03b7SMatthew Dillon int
nvme_poll_request(nvme_request_t * req)569049f03b7SMatthew Dillon nvme_poll_request(nvme_request_t *req)
570049f03b7SMatthew Dillon {
571049f03b7SMatthew Dillon struct lock *lk;
572049f03b7SMatthew Dillon int code;
573049f03b7SMatthew Dillon int didlock = 500; /* 500uS max */
574049f03b7SMatthew Dillon
575049f03b7SMatthew Dillon req->waiting = 1;
576049f03b7SMatthew Dillon if (req->state != NVME_REQ_COMPLETED) {
577049f03b7SMatthew Dillon lk = &req->comq->lk;
578049f03b7SMatthew Dillon cpu_lfence();
579049f03b7SMatthew Dillon while (lockmgr(lk, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
580049f03b7SMatthew Dillon if (--didlock == 0)
581049f03b7SMatthew Dillon break;
582049f03b7SMatthew Dillon tsc_delay(1000); /* 1uS */
583049f03b7SMatthew Dillon }
584049f03b7SMatthew Dillon while (req->state == NVME_REQ_SUBMITTED) {
585049f03b7SMatthew Dillon nvme_poll_completions(req->comq, lk);
586049f03b7SMatthew Dillon if (req->state != NVME_REQ_SUBMITTED)
587049f03b7SMatthew Dillon break;
588049f03b7SMatthew Dillon lwkt_switch();
589049f03b7SMatthew Dillon }
590049f03b7SMatthew Dillon if (didlock)
591049f03b7SMatthew Dillon lockmgr(lk, LK_RELEASE);
592049f03b7SMatthew Dillon KKASSERT(req->state == NVME_REQ_COMPLETED);
593049f03b7SMatthew Dillon }
594049f03b7SMatthew Dillon cpu_lfence();
595049f03b7SMatthew Dillon code = NVME_COMQ_STATUS_CODE_GET(req->res.tail.status);
596049f03b7SMatthew Dillon
597049f03b7SMatthew Dillon return code;
598049f03b7SMatthew Dillon }
599049f03b7SMatthew Dillon
600049f03b7SMatthew Dillon /*
60197a077a0SMatthew Dillon * Put request away, making it available for reuse. If this is an admin
60297a077a0SMatthew Dillon * request its auxillary data page is also being released for reuse.
60397a077a0SMatthew Dillon *
60497a077a0SMatthew Dillon * Caller does NOT have to hold the queue lock.
60597a077a0SMatthew Dillon */
60697a077a0SMatthew Dillon void
nvme_put_request(nvme_request_t * req)60797a077a0SMatthew Dillon nvme_put_request(nvme_request_t *req)
60897a077a0SMatthew Dillon {
60997a077a0SMatthew Dillon nvme_subqueue_t *queue = req->subq;
61097a077a0SMatthew Dillon nvme_request_t *next;
61197a077a0SMatthew Dillon
61297a077a0SMatthew Dillon /*
61397a077a0SMatthew Dillon * Insert on head for best cache reuse.
61497a077a0SMatthew Dillon */
61597a077a0SMatthew Dillon KKASSERT(req->state == NVME_REQ_COMPLETED);
61697a077a0SMatthew Dillon req->state = NVME_REQ_AVAIL;
61797a077a0SMatthew Dillon for (;;) {
61897a077a0SMatthew Dillon next = queue->first_avail;
61997a077a0SMatthew Dillon cpu_ccfence();
62097a077a0SMatthew Dillon req->next_avail = next;
62197a077a0SMatthew Dillon if (atomic_cmpset_ptr(&queue->first_avail, next, req))
62297a077a0SMatthew Dillon break;
62397a077a0SMatthew Dillon }
62411759406SMatthew Dillon
62511759406SMatthew Dillon /*
62611759406SMatthew Dillon * If BIOs were deferred due to lack of request space signal the
62711759406SMatthew Dillon * admin thread to requeue them. This is a bit messy and normally
62811759406SMatthew Dillon * should not happen due to the large number of queue entries nvme
62911759406SMatthew Dillon * usually has. Let it race for now (admin has a 1hz tick).
63011759406SMatthew Dillon */
631a391a14aSMatthew Dillon if (atomic_swap_int(&queue->signal_requeue, 0)) {
63211759406SMatthew Dillon atomic_set_int(&queue->sc->admin_signal, ADMIN_SIG_REQUEUE);
63311759406SMatthew Dillon wakeup(&queue->sc->admin_signal);
63411759406SMatthew Dillon }
63597a077a0SMatthew Dillon }
63697a077a0SMatthew Dillon
63797a077a0SMatthew Dillon /*
638049f03b7SMatthew Dillon * dump path only.
639049f03b7SMatthew Dillon */
640049f03b7SMatthew Dillon void
nvme_put_dump_request(nvme_request_t * req)641049f03b7SMatthew Dillon nvme_put_dump_request(nvme_request_t *req)
642049f03b7SMatthew Dillon {
643049f03b7SMatthew Dillon KKASSERT(req->state == NVME_REQ_COMPLETED);
644049f03b7SMatthew Dillon req->state = NVME_REQ_AVAIL;
645049f03b7SMatthew Dillon }
646049f03b7SMatthew Dillon
647049f03b7SMatthew Dillon /*
64897a077a0SMatthew Dillon * Poll for completions on queue, copy the 16-byte hw result entry
64997a077a0SMatthew Dillon * into the request and poke the doorbell to update the controller's
65097a077a0SMatthew Dillon * understanding of comq_head.
65197a077a0SMatthew Dillon *
65297a077a0SMatthew Dillon * If lk is non-NULL it will be passed to the callback which typically
65397a077a0SMatthew Dillon * releases it temporarily when calling biodone() or doing other complex
65497a077a0SMatthew Dillon * work on the result.
65597a077a0SMatthew Dillon *
65697a077a0SMatthew Dillon * Caller must usually hold comq->lk.
65797a077a0SMatthew Dillon */
65897a077a0SMatthew Dillon void
nvme_poll_completions(nvme_comqueue_t * comq,struct lock * lk)65997a077a0SMatthew Dillon nvme_poll_completions(nvme_comqueue_t *comq, struct lock *lk)
66097a077a0SMatthew Dillon {
66197a077a0SMatthew Dillon nvme_softc_t *sc = comq->sc;
66297a077a0SMatthew Dillon nvme_request_t *req;
66397a077a0SMatthew Dillon nvme_subqueue_t *subq;
66497a077a0SMatthew Dillon nvme_allres_t *res;
66597a077a0SMatthew Dillon #if 0
66697a077a0SMatthew Dillon int didwork = 0;
66797a077a0SMatthew Dillon #endif
66897a077a0SMatthew Dillon
66997a077a0SMatthew Dillon KKASSERT(comq->comq_tail < comq->nqe);
67097a077a0SMatthew Dillon cpu_lfence(); /* needed prior to first phase test */
67197a077a0SMatthew Dillon for (;;) {
67297a077a0SMatthew Dillon /*
67397a077a0SMatthew Dillon * WARNING! LOCK MAY HAVE BEEN TEMPORARILY LOST DURING LOOP.
67497a077a0SMatthew Dillon */
67597a077a0SMatthew Dillon res = &comq->kcomq[comq->comq_tail];
67697a077a0SMatthew Dillon if ((res->tail.status ^ comq->phase) & NVME_COMQ_STATUS_PHASE)
67797a077a0SMatthew Dillon break;
67897a077a0SMatthew Dillon
67997a077a0SMatthew Dillon /*
68097a077a0SMatthew Dillon * Process result on completion queue.
68197a077a0SMatthew Dillon *
68297a077a0SMatthew Dillon * Bump comq_tail, flip the phase detect when we roll-over.
68397a077a0SMatthew Dillon * doorbell every 1/4 queue and at the end of the loop.
68497a077a0SMatthew Dillon */
68597a077a0SMatthew Dillon if (++comq->comq_tail == comq->nqe) {
68697a077a0SMatthew Dillon comq->comq_tail = 0;
68797a077a0SMatthew Dillon comq->phase ^= NVME_COMQ_STATUS_PHASE;
68897a077a0SMatthew Dillon }
68997a077a0SMatthew Dillon
69097a077a0SMatthew Dillon /*
69197a077a0SMatthew Dillon * WARNING! I imploded the chip by reusing a command id
69297a077a0SMatthew Dillon * before it was discarded in the completion queue
69397a077a0SMatthew Dillon * via the doorbell, so for now we always write
69497a077a0SMatthew Dillon * the doorbell before marking the request as
69597a077a0SMatthew Dillon * COMPLETED (it can be reused instantly upon
69697a077a0SMatthew Dillon * being marked).
69797a077a0SMatthew Dillon */
69897a077a0SMatthew Dillon #if 0
69997a077a0SMatthew Dillon if (++didwork == (comq->nqe >> 2)) {
70097a077a0SMatthew Dillon didwork = 0;
70197a077a0SMatthew Dillon nvme_write(comq->sc, comq->comq_doorbell_reg,
70297a077a0SMatthew Dillon comq->comq_tail);
70397a077a0SMatthew Dillon }
70497a077a0SMatthew Dillon #endif
70597a077a0SMatthew Dillon cpu_lfence(); /* needed prior to content check */
70697a077a0SMatthew Dillon
70797a077a0SMatthew Dillon /*
708f2a9f724SMatthew Dillon * Locate the request and related submission queue. The
709f2a9f724SMatthew Dillon * request could be on a different queue. A submission
710f2a9f724SMatthew Dillon * queue can have only one completion queue, so we can
711f2a9f724SMatthew Dillon * update subq_head without locking the submission queue.
712f2a9f724SMatthew Dillon */
713f2a9f724SMatthew Dillon subq = &sc->subqueues[res->tail.subq_id];
714f2a9f724SMatthew Dillon subq->subq_head = res->tail.subq_head_ptr;
715f2a9f724SMatthew Dillon req = &subq->reqary[res->tail.cmd_id];
716f2a9f724SMatthew Dillon
717f2a9f724SMatthew Dillon /*
718f2a9f724SMatthew Dillon * Copy the fields and wakeup anyone waiting on req.
71997a077a0SMatthew Dillon * The response field in the completion queue can be reused
72097a077a0SMatthew Dillon * once we doorbell which is why we make a copy.
72197a077a0SMatthew Dillon */
72297a077a0SMatthew Dillon KKASSERT(req->state == NVME_REQ_SUBMITTED &&
72397a077a0SMatthew Dillon req->comq == comq);
72497a077a0SMatthew Dillon req->res = *res;
72597a077a0SMatthew Dillon nvme_write(comq->sc, comq->comq_doorbell_reg, comq->comq_tail);
72697a077a0SMatthew Dillon cpu_sfence();
72797a077a0SMatthew Dillon req->state = NVME_REQ_COMPLETED;
72897a077a0SMatthew Dillon if (req->callback) {
72997a077a0SMatthew Dillon req->callback(req, lk);
73097a077a0SMatthew Dillon } else if (req->waiting) {
73197a077a0SMatthew Dillon wakeup(req);
73297a077a0SMatthew Dillon }
73397a077a0SMatthew Dillon }
73497a077a0SMatthew Dillon #if 0
73597a077a0SMatthew Dillon if (didwork)
73697a077a0SMatthew Dillon nvme_write(comq->sc, comq->comq_doorbell_reg, comq->comq_tail);
73797a077a0SMatthew Dillon #endif
73897a077a0SMatthew Dillon }
73997a077a0SMatthew Dillon
7402d746837SMatthew Dillon /*
7412d746837SMatthew Dillon * Core interrupt handler (called from dedicated interrupt thread, possibly
7422d746837SMatthew Dillon * preempts other threads).
7432d746837SMatthew Dillon *
7442d746837SMatthew Dillon * NOTE: For pin-based level interrupts, the chipset interrupt is cleared
7452d746837SMatthew Dillon * automatically once all the head doorbells are updated. However,
7462d746837SMatthew Dillon * most chipsets assume MSI-X will be used and MAY NOT IMPLEMENT
7472d746837SMatthew Dillon * pin-based interrupts properly. I found the BPX card, for example,
7482d746837SMatthew Dillon * is unable to clear a pin-based interrupt.
7492d746837SMatthew Dillon */
75097a077a0SMatthew Dillon void
nvme_intr(void * arg)75197a077a0SMatthew Dillon nvme_intr(void *arg)
75297a077a0SMatthew Dillon {
75318d2384bSMatthew Dillon nvme_comqueue_t *comq = arg;
75418d2384bSMatthew Dillon nvme_softc_t *sc;
75518d2384bSMatthew Dillon int i;
75618d2384bSMatthew Dillon int skip;
7577e782064SMatthew Dillon
7582d746837SMatthew Dillon /*
7592d746837SMatthew Dillon * Process all completion queues associated with this vector. The
7602d746837SMatthew Dillon * interrupt is masked in the APIC. Do NOT mess with the NVMe
7612d746837SMatthew Dillon * masking registers because (1) We don't need to and it wastes time,
7622d746837SMatthew Dillon * and (2) We aren't supposed to touch them if using MSI-X anyway.
7632d746837SMatthew Dillon */
76418d2384bSMatthew Dillon sc = comq->sc;
76518d2384bSMatthew Dillon if (sc->nirqs == 1)
76618d2384bSMatthew Dillon skip = 1;
76718d2384bSMatthew Dillon else
76818d2384bSMatthew Dillon skip = sc->nirqs - 1;
76918d2384bSMatthew Dillon
77018d2384bSMatthew Dillon for (i = comq->qid; i <= sc->niocomqs; i += skip) {
77118d2384bSMatthew Dillon if (comq->nqe) {
77218d2384bSMatthew Dillon lockmgr(&comq->lk, LK_EXCLUSIVE);
77318d2384bSMatthew Dillon nvme_poll_completions(comq, &comq->lk);
77418d2384bSMatthew Dillon lockmgr(&comq->lk, LK_RELEASE);
77518d2384bSMatthew Dillon }
77618d2384bSMatthew Dillon comq += skip;
77718d2384bSMatthew Dillon }
77897a077a0SMatthew Dillon }
77997a077a0SMatthew Dillon
78097a077a0SMatthew Dillon /*
78197a077a0SMatthew Dillon * ADMIN HELPER COMMAND ROLLUP FUNCTIONS
78297a077a0SMatthew Dillon */
78397a077a0SMatthew Dillon /*
78497a077a0SMatthew Dillon * Issue command to create a submission queue.
78597a077a0SMatthew Dillon */
78697a077a0SMatthew Dillon int
nvme_create_subqueue(nvme_softc_t * sc,uint16_t qid)78797a077a0SMatthew Dillon nvme_create_subqueue(nvme_softc_t *sc, uint16_t qid)
78897a077a0SMatthew Dillon {
78997a077a0SMatthew Dillon nvme_request_t *req;
79097a077a0SMatthew Dillon nvme_subqueue_t *subq = &sc->subqueues[qid];
79197a077a0SMatthew Dillon int status;
79297a077a0SMatthew Dillon
79397a077a0SMatthew Dillon req = nvme_get_admin_request(sc, NVME_OP_CREATE_SUBQ);
79497a077a0SMatthew Dillon req->cmd.head.prp1 = subq->psubq;
79597a077a0SMatthew Dillon req->cmd.crsub.subq_id = qid;
79697a077a0SMatthew Dillon req->cmd.crsub.subq_size = subq->nqe - 1; /* 0's based value */
79797a077a0SMatthew Dillon req->cmd.crsub.flags = NVME_CREATESUB_PC | NVME_CREATESUB_PRI_URG;
79897a077a0SMatthew Dillon req->cmd.crsub.comq_id = subq->comqid;
79997a077a0SMatthew Dillon
80097a077a0SMatthew Dillon nvme_submit_request(req);
801049f03b7SMatthew Dillon status = nvme_wait_request(req);
80297a077a0SMatthew Dillon nvme_put_request(req);
80397a077a0SMatthew Dillon
80497a077a0SMatthew Dillon return status;
80597a077a0SMatthew Dillon }
80697a077a0SMatthew Dillon
80797a077a0SMatthew Dillon /*
80897a077a0SMatthew Dillon * Issue command to create a completion queue.
80997a077a0SMatthew Dillon */
81097a077a0SMatthew Dillon int
nvme_create_comqueue(nvme_softc_t * sc,uint16_t qid)81197a077a0SMatthew Dillon nvme_create_comqueue(nvme_softc_t *sc, uint16_t qid)
81297a077a0SMatthew Dillon {
81397a077a0SMatthew Dillon nvme_request_t *req;
81497a077a0SMatthew Dillon nvme_comqueue_t *comq = &sc->comqueues[qid];
81597a077a0SMatthew Dillon int status;
81618d2384bSMatthew Dillon int error;
81718d2384bSMatthew Dillon uint16_t ivect;
81818d2384bSMatthew Dillon
81918d2384bSMatthew Dillon error = 0;
82018d2384bSMatthew Dillon if (sc->nirqs > 1) {
82118d2384bSMatthew Dillon ivect = 1 + (qid - 1) % (sc->nirqs - 1);
82218d2384bSMatthew Dillon if (qid && ivect == qid) {
82318d2384bSMatthew Dillon error = bus_setup_intr(sc->dev, sc->irq[ivect],
82492a276ebSSepherosa Ziehau INTR_MPSAFE | INTR_HIFREQ,
82518d2384bSMatthew Dillon nvme_intr,
82618d2384bSMatthew Dillon &sc->comqueues[ivect],
82718d2384bSMatthew Dillon &sc->irq_handle[ivect],
82818d2384bSMatthew Dillon NULL);
82918d2384bSMatthew Dillon }
83018d2384bSMatthew Dillon } else {
83118d2384bSMatthew Dillon ivect = 0;
83218d2384bSMatthew Dillon }
83318d2384bSMatthew Dillon if (error)
83418d2384bSMatthew Dillon return error;
83597a077a0SMatthew Dillon
83697a077a0SMatthew Dillon req = nvme_get_admin_request(sc, NVME_OP_CREATE_COMQ);
83797a077a0SMatthew Dillon req->cmd.head.prp1 = comq->pcomq;
83897a077a0SMatthew Dillon req->cmd.crcom.comq_id = qid;
83997a077a0SMatthew Dillon req->cmd.crcom.comq_size = comq->nqe - 1; /* 0's based value */
84018d2384bSMatthew Dillon req->cmd.crcom.ivect = ivect;
84197a077a0SMatthew Dillon req->cmd.crcom.flags = NVME_CREATECOM_PC | NVME_CREATECOM_IEN;
84297a077a0SMatthew Dillon
84397a077a0SMatthew Dillon nvme_submit_request(req);
844049f03b7SMatthew Dillon status = nvme_wait_request(req);
84597a077a0SMatthew Dillon nvme_put_request(req);
84697a077a0SMatthew Dillon
847*1014e37cSMatthew Dillon /*
848*1014e37cSMatthew Dillon * Ooops, create failed, undo the irq setup
849*1014e37cSMatthew Dillon */
850*1014e37cSMatthew Dillon if (sc->nirqs > 1 && status) {
851*1014e37cSMatthew Dillon ivect = 1 + (qid - 1) % (sc->nirqs - 1);
852*1014e37cSMatthew Dillon if (qid && ivect == qid) {
853*1014e37cSMatthew Dillon bus_teardown_intr(sc->dev,
854*1014e37cSMatthew Dillon sc->irq[ivect],
855*1014e37cSMatthew Dillon sc->irq_handle[ivect]);
856*1014e37cSMatthew Dillon sc->irq_handle[ivect] = NULL;
857*1014e37cSMatthew Dillon }
858*1014e37cSMatthew Dillon }
859*1014e37cSMatthew Dillon
86097a077a0SMatthew Dillon return status;
86197a077a0SMatthew Dillon }
86297a077a0SMatthew Dillon
86397a077a0SMatthew Dillon /*
86411759406SMatthew Dillon * Issue command to delete a submission queue.
86511759406SMatthew Dillon */
86611759406SMatthew Dillon int
nvme_delete_subqueue(nvme_softc_t * sc,uint16_t qid)86711759406SMatthew Dillon nvme_delete_subqueue(nvme_softc_t *sc, uint16_t qid)
86811759406SMatthew Dillon {
86911759406SMatthew Dillon nvme_request_t *req;
87011759406SMatthew Dillon /*nvme_subqueue_t *subq = &sc->subqueues[qid];*/
87111759406SMatthew Dillon int status;
87211759406SMatthew Dillon
87311759406SMatthew Dillon req = nvme_get_admin_request(sc, NVME_OP_DELETE_SUBQ);
87411759406SMatthew Dillon req->cmd.head.prp1 = 0;
87511759406SMatthew Dillon req->cmd.delete.qid = qid;
87611759406SMatthew Dillon
87711759406SMatthew Dillon nvme_submit_request(req);
878049f03b7SMatthew Dillon status = nvme_wait_request(req);
87911759406SMatthew Dillon nvme_put_request(req);
88011759406SMatthew Dillon
88111759406SMatthew Dillon return status;
88211759406SMatthew Dillon }
88311759406SMatthew Dillon
88411759406SMatthew Dillon /*
88511759406SMatthew Dillon * Issue command to delete a completion queue.
88611759406SMatthew Dillon */
88711759406SMatthew Dillon int
nvme_delete_comqueue(nvme_softc_t * sc,uint16_t qid)88811759406SMatthew Dillon nvme_delete_comqueue(nvme_softc_t *sc, uint16_t qid)
88911759406SMatthew Dillon {
89011759406SMatthew Dillon nvme_request_t *req;
891*1014e37cSMatthew Dillon nvme_comqueue_t *comq = &sc->comqueues[qid];
89211759406SMatthew Dillon int status;
89318d2384bSMatthew Dillon uint16_t ivect;
89411759406SMatthew Dillon
895*1014e37cSMatthew Dillon if (comq->sc == NULL)
896*1014e37cSMatthew Dillon return 0;
897*1014e37cSMatthew Dillon
89811759406SMatthew Dillon req = nvme_get_admin_request(sc, NVME_OP_DELETE_COMQ);
89911759406SMatthew Dillon req->cmd.head.prp1 = 0;
90011759406SMatthew Dillon req->cmd.delete.qid = qid;
90111759406SMatthew Dillon
90211759406SMatthew Dillon nvme_submit_request(req);
903049f03b7SMatthew Dillon status = nvme_wait_request(req);
90411759406SMatthew Dillon nvme_put_request(req);
90511759406SMatthew Dillon
90618d2384bSMatthew Dillon if (qid && sc->nirqs > 1) {
90718d2384bSMatthew Dillon ivect = 1 + (qid - 1) % (sc->nirqs - 1);
908*1014e37cSMatthew Dillon if (ivect == qid && sc->irq_handle[ivect]) {
90918d2384bSMatthew Dillon bus_teardown_intr(sc->dev,
91018d2384bSMatthew Dillon sc->irq[ivect],
91118d2384bSMatthew Dillon sc->irq_handle[ivect]);
912*1014e37cSMatthew Dillon sc->irq_handle[ivect] = NULL;
91318d2384bSMatthew Dillon }
91418d2384bSMatthew Dillon }
91518d2384bSMatthew Dillon
91611759406SMatthew Dillon return status;
91711759406SMatthew Dillon }
91811759406SMatthew Dillon
91911759406SMatthew Dillon /*
92011759406SMatthew Dillon * Issue friendly shutdown to controller.
92111759406SMatthew Dillon */
92211759406SMatthew Dillon int
nvme_issue_shutdown(nvme_softc_t * sc,int dopoll)923049f03b7SMatthew Dillon nvme_issue_shutdown(nvme_softc_t *sc, int dopoll)
92411759406SMatthew Dillon {
92511759406SMatthew Dillon uint32_t reg;
92611759406SMatthew Dillon int base_ticks;
92711759406SMatthew Dillon int error;
92811759406SMatthew Dillon
92911759406SMatthew Dillon /*
93011759406SMatthew Dillon * Put us in shutdown
93111759406SMatthew Dillon */
93211759406SMatthew Dillon reg = nvme_read(sc, NVME_REG_CONFIG);
93311759406SMatthew Dillon reg &= ~NVME_CONFIG_SHUT_MASK;
93411759406SMatthew Dillon reg |= NVME_CONFIG_SHUT_NORM;
93511759406SMatthew Dillon nvme_write(sc, NVME_REG_CONFIG, reg);
93611759406SMatthew Dillon
93711759406SMatthew Dillon /*
93811759406SMatthew Dillon * Wait up to 10 seconds for acknowlegement
93911759406SMatthew Dillon */
94011759406SMatthew Dillon error = ENXIO;
94111759406SMatthew Dillon base_ticks = ticks;
94211759406SMatthew Dillon while ((int)(ticks - base_ticks) < 10 * 20) {
94311759406SMatthew Dillon reg = nvme_read(sc, NVME_REG_STATUS);
94411759406SMatthew Dillon if ((reg & NVME_STATUS_SHUT_MASK) & NVME_STATUS_SHUT_DONE) {
94511759406SMatthew Dillon error = 0;
94611759406SMatthew Dillon break;
94711759406SMatthew Dillon }
948049f03b7SMatthew Dillon if (dopoll == 0)
94911759406SMatthew Dillon nvme_os_sleep(50); /* 50ms poll */
95011759406SMatthew Dillon }
95111759406SMatthew Dillon if (error)
95211759406SMatthew Dillon device_printf(sc->dev, "Unable to shutdown chip nicely\n");
95311759406SMatthew Dillon else
95411759406SMatthew Dillon device_printf(sc->dev, "Normal chip shutdown succeeded\n");
95511759406SMatthew Dillon
95611759406SMatthew Dillon return error;
95711759406SMatthew Dillon }
95811759406SMatthew Dillon
95911759406SMatthew Dillon /*
96097a077a0SMatthew Dillon * Make space-padded string serial and model numbers more readable.
96197a077a0SMatthew Dillon */
96297a077a0SMatthew Dillon size_t
string_cleanup(char * str,int domiddle)96397a077a0SMatthew Dillon string_cleanup(char *str, int domiddle)
96497a077a0SMatthew Dillon {
96597a077a0SMatthew Dillon size_t i;
96697a077a0SMatthew Dillon size_t j;
96797a077a0SMatthew Dillon int atbeg = 1;
96897a077a0SMatthew Dillon
96997a077a0SMatthew Dillon for (i = j = 0; str[i]; ++i) {
97097a077a0SMatthew Dillon if ((str[i] == ' ' || str[i] == '\r') &&
97197a077a0SMatthew Dillon (atbeg || domiddle)) {
97297a077a0SMatthew Dillon continue;
97397a077a0SMatthew Dillon } else {
97497a077a0SMatthew Dillon atbeg = 0;
97597a077a0SMatthew Dillon }
97697a077a0SMatthew Dillon str[j] = str[i];
97797a077a0SMatthew Dillon ++j;
97897a077a0SMatthew Dillon }
97997a077a0SMatthew Dillon while (domiddle == 0 && j > 0 && (str[j-1] == ' ' || str[j-1] == '\r'))
98097a077a0SMatthew Dillon --j;
98197a077a0SMatthew Dillon str[j] = 0;
98297a077a0SMatthew Dillon if (domiddle == 0) {
98397a077a0SMatthew Dillon for (j = 0; str[j]; ++j) {
98497a077a0SMatthew Dillon if (str[j] == ' ')
98597a077a0SMatthew Dillon str[j] = '_';
98697a077a0SMatthew Dillon }
98797a077a0SMatthew Dillon }
98897a077a0SMatthew Dillon
98997a077a0SMatthew Dillon return j;
99097a077a0SMatthew Dillon }
991