xref: /openbsd-src/sys/dev/ic/nvme.c (revision f1dd7b858388b4a23f4f67a4957ec5ff656ebbe8)
1 /*	$OpenBSD: nvme.c,v 1.91 2021/02/25 07:30:36 jan Exp $ */
2 
3 /*
4  * Copyright (c) 2014 David Gwynne <dlg@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/param.h>
20 #include <sys/systm.h>
21 #include <sys/buf.h>
22 #include <sys/kernel.h>
23 #include <sys/malloc.h>
24 #include <sys/device.h>
25 #include <sys/queue.h>
26 #include <sys/mutex.h>
27 #include <sys/pool.h>
28 
29 #include <sys/atomic.h>
30 
31 #include <machine/bus.h>
32 
33 #include <scsi/scsi_all.h>
34 #include <scsi/scsi_disk.h>
35 #include <scsi/scsiconf.h>
36 
37 #include <dev/ic/nvmereg.h>
38 #include <dev/ic/nvmevar.h>
39 
40 struct cfdriver nvme_cd = {
41 	NULL,
42 	"nvme",
43 	DV_DULL
44 };
45 
46 int	nvme_ready(struct nvme_softc *, u_int32_t);
47 int	nvme_enable(struct nvme_softc *);
48 int	nvme_disable(struct nvme_softc *);
49 int	nvme_shutdown(struct nvme_softc *);
50 int	nvme_resume(struct nvme_softc *);
51 
52 void	nvme_dumpregs(struct nvme_softc *);
53 int	nvme_identify(struct nvme_softc *, u_int);
54 void	nvme_fill_identify(struct nvme_softc *, struct nvme_ccb *, void *);
55 
56 int	nvme_ccbs_alloc(struct nvme_softc *, u_int);
57 void	nvme_ccbs_free(struct nvme_softc *, u_int);
58 
59 void *	nvme_ccb_get(void *);
60 void	nvme_ccb_put(void *, void *);
61 
62 int	nvme_poll(struct nvme_softc *, struct nvme_queue *, struct nvme_ccb *,
63 	    void (*)(struct nvme_softc *, struct nvme_ccb *, void *));
64 void	nvme_poll_fill(struct nvme_softc *, struct nvme_ccb *, void *);
65 void	nvme_poll_done(struct nvme_softc *, struct nvme_ccb *,
66 	    struct nvme_cqe *);
67 void	nvme_sqe_fill(struct nvme_softc *, struct nvme_ccb *, void *);
68 void	nvme_empty_done(struct nvme_softc *, struct nvme_ccb *,
69 	    struct nvme_cqe *);
70 
71 struct nvme_queue *
72 	nvme_q_alloc(struct nvme_softc *, u_int16_t, u_int, u_int);
73 int	nvme_q_create(struct nvme_softc *, struct nvme_queue *);
74 int	nvme_q_reset(struct nvme_softc *, struct nvme_queue *);
75 int	nvme_q_delete(struct nvme_softc *, struct nvme_queue *);
76 void	nvme_q_submit(struct nvme_softc *,
77 	    struct nvme_queue *, struct nvme_ccb *,
78 	    void (*)(struct nvme_softc *, struct nvme_ccb *, void *));
79 int	nvme_q_complete(struct nvme_softc *, struct nvme_queue *);
80 void	nvme_q_free(struct nvme_softc *, struct nvme_queue *);
81 
82 struct nvme_dmamem *
83 	nvme_dmamem_alloc(struct nvme_softc *, size_t);
84 void	nvme_dmamem_free(struct nvme_softc *, struct nvme_dmamem *);
85 void	nvme_dmamem_sync(struct nvme_softc *, struct nvme_dmamem *, int);
86 
87 void	nvme_scsi_cmd(struct scsi_xfer *);
88 void	nvme_minphys(struct buf *, struct scsi_link *);
89 int	nvme_scsi_probe(struct scsi_link *);
90 void	nvme_scsi_free(struct scsi_link *);
91 
92 #ifdef HIBERNATE
93 #include <uvm/uvm_extern.h>
94 #include <sys/hibernate.h>
95 #include <sys/disk.h>
96 #include <sys/disklabel.h>
97 
98 int	nvme_hibernate_io(dev_t, daddr_t, vaddr_t, size_t, int, void *);
99 #endif
100 
101 struct scsi_adapter nvme_switch = {
102 	nvme_scsi_cmd, nvme_minphys, nvme_scsi_probe, nvme_scsi_free, NULL
103 };
104 
105 void	nvme_scsi_io(struct scsi_xfer *, int);
106 void	nvme_scsi_io_fill(struct nvme_softc *, struct nvme_ccb *, void *);
107 void	nvme_scsi_io_done(struct nvme_softc *, struct nvme_ccb *,
108 	    struct nvme_cqe *);
109 
110 void	nvme_scsi_sync(struct scsi_xfer *);
111 void	nvme_scsi_sync_fill(struct nvme_softc *, struct nvme_ccb *, void *);
112 void	nvme_scsi_sync_done(struct nvme_softc *, struct nvme_ccb *,
113 	    struct nvme_cqe *);
114 
115 void	nvme_scsi_inq(struct scsi_xfer *);
116 void	nvme_scsi_inquiry(struct scsi_xfer *);
117 void	nvme_scsi_capacity16(struct scsi_xfer *);
118 void	nvme_scsi_capacity(struct scsi_xfer *);
119 
120 #define nvme_read4(_s, _r) \
121 	bus_space_read_4((_s)->sc_iot, (_s)->sc_ioh, (_r))
122 #define nvme_write4(_s, _r, _v) \
123 	bus_space_write_4((_s)->sc_iot, (_s)->sc_ioh, (_r), (_v))
124 /*
125  * Some controllers, at least Apple NVMe, always require split
126  * transfers, so don't use bus_space_{read,write}_8() on LP64.
127  */
128 static inline u_int64_t
129 nvme_read8(struct nvme_softc *sc, bus_size_t r)
130 {
131 	u_int64_t v;
132 	u_int32_t *a = (u_int32_t *)&v;
133 
134 #if _BYTE_ORDER == _LITTLE_ENDIAN
135 	a[0] = nvme_read4(sc, r);
136 	a[1] = nvme_read4(sc, r + 4);
137 #else /* _BYTE_ORDER == _LITTLE_ENDIAN */
138 	a[1] = nvme_read4(sc, r);
139 	a[0] = nvme_read4(sc, r + 4);
140 #endif
141 
142 	return (v);
143 }
144 
145 static inline void
146 nvme_write8(struct nvme_softc *sc, bus_size_t r, u_int64_t v)
147 {
148 	u_int32_t *a = (u_int32_t *)&v;
149 
150 #if _BYTE_ORDER == _LITTLE_ENDIAN
151 	nvme_write4(sc, r, a[0]);
152 	nvme_write4(sc, r + 4, a[1]);
153 #else /* _BYTE_ORDER == _LITTLE_ENDIAN */
154 	nvme_write4(sc, r, a[1]);
155 	nvme_write4(sc, r + 4, a[0]);
156 #endif
157 }
158 #define nvme_barrier(_s, _r, _l, _f) \
159 	bus_space_barrier((_s)->sc_iot, (_s)->sc_ioh, (_r), (_l), (_f))
160 
161 void
162 nvme_dumpregs(struct nvme_softc *sc)
163 {
164 	u_int64_t r8;
165 	u_int32_t r4;
166 
167 	r8 = nvme_read8(sc, NVME_CAP);
168 	printf("%s: cap  0x%016llx\n", DEVNAME(sc), nvme_read8(sc, NVME_CAP));
169 	printf("%s:  mpsmax %u (%u)\n", DEVNAME(sc),
170 	    (u_int)NVME_CAP_MPSMAX(r8), (1 << NVME_CAP_MPSMAX(r8)));
171 	printf("%s:  mpsmin %u (%u)\n", DEVNAME(sc),
172 	    (u_int)NVME_CAP_MPSMIN(r8), (1 << NVME_CAP_MPSMIN(r8)));
173 	printf("%s:  css %llu\n", DEVNAME(sc), NVME_CAP_CSS(r8));
174 	printf("%s:  nssrs %llu\n", DEVNAME(sc), NVME_CAP_NSSRS(r8));
175 	printf("%s:  dstrd %u\n", DEVNAME(sc), NVME_CAP_DSTRD(r8));
176 	printf("%s:  to %llu msec\n", DEVNAME(sc), NVME_CAP_TO(r8));
177 	printf("%s:  ams %llu\n", DEVNAME(sc), NVME_CAP_AMS(r8));
178 	printf("%s:  cqr %llu\n", DEVNAME(sc), NVME_CAP_CQR(r8));
179 	printf("%s:  mqes %llu\n", DEVNAME(sc), NVME_CAP_MQES(r8));
180 
181 	printf("%s: vs   0x%04x\n", DEVNAME(sc), nvme_read4(sc, NVME_VS));
182 
183 	r4 = nvme_read4(sc, NVME_CC);
184 	printf("%s: cc   0x%04x\n", DEVNAME(sc), r4);
185 	printf("%s:  iocqes %u\n", DEVNAME(sc), NVME_CC_IOCQES_R(r4));
186 	printf("%s:  iosqes %u\n", DEVNAME(sc), NVME_CC_IOSQES_R(r4));
187 	printf("%s:  shn %u\n", DEVNAME(sc), NVME_CC_SHN_R(r4));
188 	printf("%s:  ams %u\n", DEVNAME(sc), NVME_CC_AMS_R(r4));
189 	printf("%s:  mps %u\n", DEVNAME(sc), NVME_CC_MPS_R(r4));
190 	printf("%s:  css %u\n", DEVNAME(sc), NVME_CC_CSS_R(r4));
191 	printf("%s:  en %u\n", DEVNAME(sc), ISSET(r4, NVME_CC_EN));
192 
193 	printf("%s: csts 0x%08x\n", DEVNAME(sc), nvme_read4(sc, NVME_CSTS));
194 	printf("%s: aqa  0x%08x\n", DEVNAME(sc), nvme_read4(sc, NVME_AQA));
195 	printf("%s: asq  0x%016llx\n", DEVNAME(sc), nvme_read8(sc, NVME_ASQ));
196 	printf("%s: acq  0x%016llx\n", DEVNAME(sc), nvme_read8(sc, NVME_ACQ));
197 }
198 
199 int
200 nvme_ready(struct nvme_softc *sc, u_int32_t rdy)
201 {
202 	u_int i = 0;
203 
204 	while ((nvme_read4(sc, NVME_CSTS) & NVME_CSTS_RDY) != rdy) {
205 		if (i++ > sc->sc_rdy_to)
206 			return (1);
207 
208 		delay(1000);
209 		nvme_barrier(sc, NVME_CSTS, 4, BUS_SPACE_BARRIER_READ);
210 	}
211 
212 	return (0);
213 }
214 
215 int
216 nvme_enable(struct nvme_softc *sc)
217 {
218 	u_int32_t cc;
219 
220 	cc = nvme_read4(sc, NVME_CC);
221 	if (ISSET(cc, NVME_CC_EN))
222 		return (nvme_ready(sc, NVME_CSTS_RDY));
223 
224 	nvme_write4(sc, NVME_AQA, NVME_AQA_ACQS(sc->sc_admin_q->q_entries) |
225 	    NVME_AQA_ASQS(sc->sc_admin_q->q_entries));
226 	nvme_barrier(sc, 0, sc->sc_ios, BUS_SPACE_BARRIER_WRITE);
227 
228 	nvme_write8(sc, NVME_ASQ, NVME_DMA_DVA(sc->sc_admin_q->q_sq_dmamem));
229 	nvme_barrier(sc, 0, sc->sc_ios, BUS_SPACE_BARRIER_WRITE);
230 	nvme_write8(sc, NVME_ACQ, NVME_DMA_DVA(sc->sc_admin_q->q_cq_dmamem));
231 	nvme_barrier(sc, 0, sc->sc_ios, BUS_SPACE_BARRIER_WRITE);
232 
233 	CLR(cc, NVME_CC_IOCQES_MASK | NVME_CC_IOSQES_MASK | NVME_CC_SHN_MASK |
234 	    NVME_CC_AMS_MASK | NVME_CC_MPS_MASK | NVME_CC_CSS_MASK);
235 	SET(cc, NVME_CC_IOSQES(6));	/* Submission queue size == 2**6 (64) */
236 	SET(cc, NVME_CC_IOCQES(4));	/* Completion queue size == 2**4 (16) */
237 	SET(cc, NVME_CC_SHN(NVME_CC_SHN_NONE));
238 	SET(cc, NVME_CC_CSS(NVME_CC_CSS_NVM));
239 	SET(cc, NVME_CC_AMS(NVME_CC_AMS_RR));
240 	SET(cc, NVME_CC_MPS(ffs(sc->sc_mps) - 1));
241 	SET(cc, NVME_CC_EN);
242 
243 	nvme_write4(sc, NVME_CC, cc);
244 	nvme_barrier(sc, 0, sc->sc_ios,
245 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
246 
247 	return (nvme_ready(sc, NVME_CSTS_RDY));
248 }
249 
250 int
251 nvme_disable(struct nvme_softc *sc)
252 {
253 	u_int32_t cc, csts;
254 
255 	cc = nvme_read4(sc, NVME_CC);
256 	if (ISSET(cc, NVME_CC_EN)) {
257 		csts = nvme_read4(sc, NVME_CSTS);
258 		if (!ISSET(csts, NVME_CSTS_CFS) &&
259 		    nvme_ready(sc, NVME_CSTS_RDY) != 0)
260 			return (1);
261 	}
262 
263 	CLR(cc, NVME_CC_EN);
264 
265 	nvme_write4(sc, NVME_CC, cc);
266 	nvme_barrier(sc, 0, sc->sc_ios,
267 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
268 
269 	return (nvme_ready(sc, 0));
270 }
271 
272 int
273 nvme_attach(struct nvme_softc *sc)
274 {
275 	struct scsibus_attach_args saa;
276 	u_int64_t cap;
277 	u_int32_t reg;
278 	u_int nccbs = 0;
279 
280 	mtx_init(&sc->sc_ccb_mtx, IPL_BIO);
281 	SIMPLEQ_INIT(&sc->sc_ccb_list);
282 	scsi_iopool_init(&sc->sc_iopool, sc, nvme_ccb_get, nvme_ccb_put);
283 
284 	reg = nvme_read4(sc, NVME_VS);
285 	if (reg == 0xffffffff) {
286 		printf(", invalid mapping\n");
287 		return (1);
288 	}
289 
290 	printf(", NVMe %d.%d\n", NVME_VS_MJR(reg), NVME_VS_MNR(reg));
291 
292 	cap = nvme_read8(sc, NVME_CAP);
293 	sc->sc_dstrd = NVME_CAP_DSTRD(cap);
294 	if (NVME_CAP_MPSMIN(cap) > PAGE_SHIFT) {
295 		printf("%s: NVMe minimum page size %u "
296 		    "is greater than CPU page size %u\n", DEVNAME(sc),
297 		    1 << NVME_CAP_MPSMIN(cap), 1 << PAGE_SHIFT);
298 		return (1);
299 	}
300 	if (NVME_CAP_MPSMAX(cap) < PAGE_SHIFT)
301 		sc->sc_mps = 1 << NVME_CAP_MPSMAX(cap);
302 	else
303 		sc->sc_mps = 1 << PAGE_SHIFT;
304 
305 	sc->sc_rdy_to = NVME_CAP_TO(cap);
306 	sc->sc_mdts = MAXPHYS;
307 	sc->sc_max_prpl = sc->sc_mdts / sc->sc_mps;
308 
309 	if (nvme_disable(sc) != 0) {
310 		printf("%s: unable to disable controller\n", DEVNAME(sc));
311 		return (1);
312 	}
313 
314 	sc->sc_admin_q = nvme_q_alloc(sc, NVME_ADMIN_Q, 128, sc->sc_dstrd);
315 	if (sc->sc_admin_q == NULL) {
316 		printf("%s: unable to allocate admin queue\n", DEVNAME(sc));
317 		return (1);
318 	}
319 
320 	if (nvme_ccbs_alloc(sc, 16) != 0) {
321 		printf("%s: unable to allocate initial ccbs\n", DEVNAME(sc));
322 		goto free_admin_q;
323 	}
324 	nccbs = 16;
325 
326 	if (nvme_enable(sc) != 0) {
327 		printf("%s: unable to enable controller\n", DEVNAME(sc));
328 		goto free_ccbs;
329 	}
330 
331 	if (nvme_identify(sc, NVME_CAP_MPSMIN(cap)) != 0) {
332 		printf("%s: unable to identify controller\n", DEVNAME(sc));
333 		goto disable;
334 	}
335 
336 	/* We now know the real values of sc_mdts and sc_max_prpl. */
337 	nvme_ccbs_free(sc, nccbs);
338 	if (nvme_ccbs_alloc(sc, 64) != 0) {
339 		printf("%s: unable to allocate ccbs\n", DEVNAME(sc));
340 		goto free_admin_q;
341 	}
342 	nccbs = 64;
343 
344 	sc->sc_q = nvme_q_alloc(sc, NVME_IO_Q, 128, sc->sc_dstrd);
345 	if (sc->sc_q == NULL) {
346 		printf("%s: unable to allocate io q\n", DEVNAME(sc));
347 		goto disable;
348 	}
349 
350 	if (nvme_q_create(sc, sc->sc_q) != 0) {
351 		printf("%s: unable to create io q\n", DEVNAME(sc));
352 		goto free_q;
353 	}
354 
355 	sc->sc_hib_q = nvme_q_alloc(sc, NVME_HIB_Q, 4, sc->sc_dstrd);
356 	if (sc->sc_hib_q == NULL) {
357 		printf("%s: unable to allocate hibernate io queue\n", DEVNAME(sc));
358 		goto free_q;
359 	}
360 
361 	nvme_write4(sc, NVME_INTMC, 1);
362 
363 	sc->sc_namespaces = mallocarray(sc->sc_nn + 1,
364 	    sizeof(*sc->sc_namespaces), M_DEVBUF, M_WAITOK|M_ZERO);
365 
366 	saa.saa_adapter = &nvme_switch;
367 	saa.saa_adapter_softc = sc;
368 	saa.saa_adapter_buswidth = sc->sc_nn + 1;
369 	saa.saa_luns = 1;
370 	saa.saa_adapter_target = 0;
371 	saa.saa_openings = 64;
372 	saa.saa_pool = &sc->sc_iopool;
373 	saa.saa_quirks = saa.saa_flags = 0;
374 	saa.saa_wwpn = saa.saa_wwnn = 0;
375 
376 	config_found(&sc->sc_dev, &saa, scsiprint);
377 
378 	return (0);
379 
380 free_q:
381 	nvme_q_free(sc, sc->sc_q);
382 disable:
383 	nvme_disable(sc);
384 free_ccbs:
385 	nvme_ccbs_free(sc, nccbs);
386 free_admin_q:
387 	nvme_q_free(sc, sc->sc_admin_q);
388 
389 	return (1);
390 }
391 
392 int
393 nvme_resume(struct nvme_softc *sc)
394 {
395 	if (nvme_disable(sc) != 0) {
396 		printf("%s: unable to disable controller\n", DEVNAME(sc));
397 		return (1);
398 	}
399 
400 	if (nvme_q_reset(sc, sc->sc_admin_q) != 0) {
401 		printf("%s: unable to reset admin queue\n", DEVNAME(sc));
402 		return (1);
403 	}
404 
405 	if (nvme_enable(sc) != 0) {
406 		printf("%s: unable to enable controller\n", DEVNAME(sc));
407 		return (1);
408 	}
409 
410 	sc->sc_q = nvme_q_alloc(sc, NVME_IO_Q, 128, sc->sc_dstrd);
411 	if (sc->sc_q == NULL) {
412 		printf("%s: unable to allocate io q\n", DEVNAME(sc));
413 		goto disable;
414 	}
415 
416 	if (nvme_q_create(sc, sc->sc_q) != 0) {
417 		printf("%s: unable to create io q\n", DEVNAME(sc));
418 		goto free_q;
419 	}
420 
421 	nvme_write4(sc, NVME_INTMC, 1);
422 
423 	return (0);
424 
425 free_q:
426 	nvme_q_free(sc, sc->sc_q);
427 disable:
428 	nvme_disable(sc);
429 
430 	return (1);
431 }
432 
433 int
434 nvme_scsi_probe(struct scsi_link *link)
435 {
436 	struct nvme_softc *sc = link->bus->sb_adapter_softc;
437 	struct nvme_sqe sqe;
438 	struct nvm_identify_namespace *identify;
439 	struct nvme_dmamem *mem;
440 	struct nvme_ccb *ccb;
441 	int rv;
442 
443 	ccb = scsi_io_get(&sc->sc_iopool, 0);
444 	KASSERT(ccb != NULL);
445 
446 	mem = nvme_dmamem_alloc(sc, sizeof(*identify));
447 	if (mem == NULL)
448 		return (ENOMEM);
449 
450 	memset(&sqe, 0, sizeof(sqe));
451 	sqe.opcode = NVM_ADMIN_IDENTIFY;
452 	htolem32(&sqe.nsid, link->target);
453 	htolem64(&sqe.entry.prp[0], NVME_DMA_DVA(mem));
454 	htolem32(&sqe.cdw10, 0);
455 
456 	ccb->ccb_done = nvme_empty_done;
457 	ccb->ccb_cookie = &sqe;
458 
459 	nvme_dmamem_sync(sc, mem, BUS_DMASYNC_PREREAD);
460 	rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill);
461 	nvme_dmamem_sync(sc, mem, BUS_DMASYNC_POSTREAD);
462 
463 	scsi_io_put(&sc->sc_iopool, ccb);
464 
465 	identify = NVME_DMA_KVA(mem);
466 	if (rv == 0) {
467 		if (lemtoh64(&identify->nsze) > 0) {
468 			/* Commit namespace if it has a size greater than zero. */
469 			identify = malloc(sizeof(*identify), M_DEVBUF, M_WAITOK);
470 			memcpy(identify, NVME_DMA_KVA(mem), sizeof(*identify));
471 			sc->sc_namespaces[link->target].ident = identify;
472 		} else {
473 			/* Don't attach a namespace if its size is zero. */
474 			rv = ENXIO;
475 		}
476 	}
477 
478 	nvme_dmamem_free(sc, mem);
479 
480 	return (rv);
481 }
482 
483 int
484 nvme_shutdown(struct nvme_softc *sc)
485 {
486 	u_int32_t cc, csts;
487 	int i;
488 
489 	nvme_write4(sc, NVME_INTMC, 0);
490 
491 	if (nvme_q_delete(sc, sc->sc_q) != 0) {
492 		printf("%s: unable to delete q, disabling\n", DEVNAME(sc));
493 		goto disable;
494 	}
495 
496 	cc = nvme_read4(sc, NVME_CC);
497 	CLR(cc, NVME_CC_SHN_MASK);
498 	SET(cc, NVME_CC_SHN(NVME_CC_SHN_NORMAL));
499 	nvme_write4(sc, NVME_CC, cc);
500 
501 	for (i = 0; i < 4000; i++) {
502 		nvme_barrier(sc, 0, sc->sc_ios,
503 		    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
504 		csts = nvme_read4(sc, NVME_CSTS);
505 		if ((csts & NVME_CSTS_SHST_MASK) == NVME_CSTS_SHST_DONE)
506 			return (0);
507 
508 		delay(1000);
509 	}
510 
511 	printf("%s: unable to shutdown, disabling\n", DEVNAME(sc));
512 
513 disable:
514 	nvme_disable(sc);
515 	return (0);
516 }
517 
518 int
519 nvme_activate(struct nvme_softc *sc, int act)
520 {
521 	int rv;
522 
523 	switch (act) {
524 	case DVACT_POWERDOWN:
525 		rv = config_activate_children(&sc->sc_dev, act);
526 		nvme_shutdown(sc);
527 		break;
528 	case DVACT_RESUME:
529 		rv = nvme_resume(sc);
530 		if (rv == 0)
531 			rv = config_activate_children(&sc->sc_dev, act);
532 		break;
533 	default:
534 		rv = config_activate_children(&sc->sc_dev, act);
535 		break;
536 	}
537 
538 	return (rv);
539 }
540 
541 void
542 nvme_scsi_cmd(struct scsi_xfer *xs)
543 {
544 	switch (xs->cmd.opcode) {
545 	case READ_COMMAND:
546 	case READ_10:
547 	case READ_12:
548 	case READ_16:
549 		nvme_scsi_io(xs, SCSI_DATA_IN);
550 		return;
551 	case WRITE_COMMAND:
552 	case WRITE_10:
553 	case WRITE_12:
554 	case WRITE_16:
555 		nvme_scsi_io(xs, SCSI_DATA_OUT);
556 		return;
557 
558 	case SYNCHRONIZE_CACHE:
559 		nvme_scsi_sync(xs);
560 		return;
561 
562 	case INQUIRY:
563 		nvme_scsi_inq(xs);
564 		return;
565 	case READ_CAPACITY_16:
566 		nvme_scsi_capacity16(xs);
567 		return;
568 	case READ_CAPACITY:
569 		nvme_scsi_capacity(xs);
570 		return;
571 
572 	case TEST_UNIT_READY:
573 	case PREVENT_ALLOW:
574 	case START_STOP:
575 		xs->error = XS_NOERROR;
576 		scsi_done(xs);
577 		return;
578 
579 	default:
580 		break;
581 	}
582 
583 	xs->error = XS_DRIVER_STUFFUP;
584 	scsi_done(xs);
585 }
586 
587 void
588 nvme_minphys(struct buf *bp, struct scsi_link *link)
589 {
590 	struct nvme_softc *sc = link->bus->sb_adapter_softc;
591 
592 	if (bp->b_bcount > sc->sc_mdts)
593 		bp->b_bcount = sc->sc_mdts;
594 }
595 
596 void
597 nvme_scsi_io(struct scsi_xfer *xs, int dir)
598 {
599 	struct scsi_link *link = xs->sc_link;
600 	struct nvme_softc *sc = link->bus->sb_adapter_softc;
601 	struct nvme_ccb *ccb = xs->io;
602 	bus_dmamap_t dmap = ccb->ccb_dmamap;
603 	int i;
604 
605 	if ((xs->flags & (SCSI_DATA_IN|SCSI_DATA_OUT)) != dir)
606 		goto stuffup;
607 
608 	ccb->ccb_done = nvme_scsi_io_done;
609 	ccb->ccb_cookie = xs;
610 
611 	if (bus_dmamap_load(sc->sc_dmat, dmap,
612 	    xs->data, xs->datalen, NULL, ISSET(xs->flags, SCSI_NOSLEEP) ?
613 	    BUS_DMA_NOWAIT : BUS_DMA_WAITOK) != 0)
614 		goto stuffup;
615 
616 	bus_dmamap_sync(sc->sc_dmat, dmap, 0, dmap->dm_mapsize,
617 	    ISSET(xs->flags, SCSI_DATA_IN) ?
618 	    BUS_DMASYNC_PREREAD : BUS_DMASYNC_PREWRITE);
619 
620 	if (dmap->dm_nsegs > 2) {
621 		for (i = 1; i < dmap->dm_nsegs; i++) {
622 			htolem64(&ccb->ccb_prpl[i - 1],
623 			    dmap->dm_segs[i].ds_addr);
624 		}
625 		bus_dmamap_sync(sc->sc_dmat,
626 		    NVME_DMA_MAP(sc->sc_ccb_prpls),
627 		    ccb->ccb_prpl_off,
628 		    sizeof(*ccb->ccb_prpl) * dmap->dm_nsegs - 1,
629 		    BUS_DMASYNC_PREWRITE);
630 	}
631 
632 	if (ISSET(xs->flags, SCSI_POLL)) {
633 		nvme_poll(sc, sc->sc_q, ccb, nvme_scsi_io_fill);
634 		return;
635 	}
636 
637 	nvme_q_submit(sc, sc->sc_q, ccb, nvme_scsi_io_fill);
638 	return;
639 
640 stuffup:
641 	xs->error = XS_DRIVER_STUFFUP;
642 	scsi_done(xs);
643 }
644 
645 void
646 nvme_scsi_io_fill(struct nvme_softc *sc, struct nvme_ccb *ccb, void *slot)
647 {
648 	struct nvme_sqe_io *sqe = slot;
649 	struct scsi_xfer *xs = ccb->ccb_cookie;
650 	struct scsi_link *link = xs->sc_link;
651 	bus_dmamap_t dmap = ccb->ccb_dmamap;
652 	u_int64_t lba;
653 	u_int32_t blocks;
654 
655 	scsi_cmd_rw_decode(&xs->cmd, &lba, &blocks);
656 
657 	sqe->opcode = ISSET(xs->flags, SCSI_DATA_IN) ?
658 	    NVM_CMD_READ : NVM_CMD_WRITE;
659 	htolem32(&sqe->nsid, link->target);
660 
661 	htolem64(&sqe->entry.prp[0], dmap->dm_segs[0].ds_addr);
662 	switch (dmap->dm_nsegs) {
663 	case 1:
664 		break;
665 	case 2:
666 		htolem64(&sqe->entry.prp[1], dmap->dm_segs[1].ds_addr);
667 		break;
668 	default:
669 		/* the prp list is already set up and synced */
670 		htolem64(&sqe->entry.prp[1], ccb->ccb_prpl_dva);
671 		break;
672 	}
673 
674 	htolem64(&sqe->slba, lba);
675 	htolem16(&sqe->nlb, blocks - 1);
676 }
677 
678 void
679 nvme_scsi_io_done(struct nvme_softc *sc, struct nvme_ccb *ccb,
680     struct nvme_cqe *cqe)
681 {
682 	struct scsi_xfer *xs = ccb->ccb_cookie;
683 	bus_dmamap_t dmap = ccb->ccb_dmamap;
684 	u_int16_t flags;
685 
686 	if (dmap->dm_nsegs > 2) {
687 		bus_dmamap_sync(sc->sc_dmat,
688 		    NVME_DMA_MAP(sc->sc_ccb_prpls),
689 		    ccb->ccb_prpl_off,
690 		    sizeof(*ccb->ccb_prpl) * dmap->dm_nsegs - 1,
691 		    BUS_DMASYNC_POSTWRITE);
692 	}
693 
694 	bus_dmamap_sync(sc->sc_dmat, dmap, 0, dmap->dm_mapsize,
695 	    ISSET(xs->flags, SCSI_DATA_IN) ?
696 	    BUS_DMASYNC_POSTREAD : BUS_DMASYNC_POSTWRITE);
697 
698 	bus_dmamap_unload(sc->sc_dmat, dmap);
699 
700 	flags = lemtoh16(&cqe->flags);
701 
702 	xs->error = (NVME_CQE_SC(flags) == NVME_CQE_SC_SUCCESS) ?
703 	    XS_NOERROR : XS_DRIVER_STUFFUP;
704 	xs->status = SCSI_OK;
705 	xs->resid = 0;
706 	scsi_done(xs);
707 }
708 
709 void
710 nvme_scsi_sync(struct scsi_xfer *xs)
711 {
712 	struct scsi_link *link = xs->sc_link;
713 	struct nvme_softc *sc = link->bus->sb_adapter_softc;
714 	struct nvme_ccb *ccb = xs->io;
715 
716 	ccb->ccb_done = nvme_scsi_sync_done;
717 	ccb->ccb_cookie = xs;
718 
719 	if (ISSET(xs->flags, SCSI_POLL)) {
720 		nvme_poll(sc, sc->sc_q, ccb, nvme_scsi_sync_fill);
721 		return;
722 	}
723 
724 	nvme_q_submit(sc, sc->sc_q, ccb, nvme_scsi_sync_fill);
725 }
726 
727 void
728 nvme_scsi_sync_fill(struct nvme_softc *sc, struct nvme_ccb *ccb, void *slot)
729 {
730 	struct nvme_sqe *sqe = slot;
731 	struct scsi_xfer *xs = ccb->ccb_cookie;
732 	struct scsi_link *link = xs->sc_link;
733 
734 	sqe->opcode = NVM_CMD_FLUSH;
735 	htolem32(&sqe->nsid, link->target);
736 }
737 
738 void
739 nvme_scsi_sync_done(struct nvme_softc *sc, struct nvme_ccb *ccb,
740     struct nvme_cqe *cqe)
741 {
742 	struct scsi_xfer *xs = ccb->ccb_cookie;
743 	u_int16_t flags;
744 
745 	flags = lemtoh16(&cqe->flags);
746 
747 	xs->error = (NVME_CQE_SC(flags) == NVME_CQE_SC_SUCCESS) ?
748 	    XS_NOERROR : XS_DRIVER_STUFFUP;
749 	xs->status = SCSI_OK;
750 	xs->resid = 0;
751 	scsi_done(xs);
752 }
753 
754 void
755 nvme_scsi_inq(struct scsi_xfer *xs)
756 {
757 	struct scsi_inquiry *inq = (struct scsi_inquiry *)&xs->cmd;
758 
759 	if (!ISSET(inq->flags, SI_EVPD)) {
760 		nvme_scsi_inquiry(xs);
761 		return;
762 	}
763 
764 	switch (inq->pagecode) {
765 	default:
766 		/* printf("%s: %d\n", __func__, inq->pagecode); */
767 		break;
768 	}
769 
770 	xs->error = XS_DRIVER_STUFFUP;
771 	scsi_done(xs);
772 }
773 
774 void
775 nvme_scsi_inquiry(struct scsi_xfer *xs)
776 {
777 	struct scsi_inquiry_data inq;
778 	struct scsi_link *link = xs->sc_link;
779 	struct nvme_softc *sc = link->bus->sb_adapter_softc;
780 	struct nvm_identify_namespace *ns;
781 
782 	ns = sc->sc_namespaces[link->target].ident;
783 
784 	memset(&inq, 0, sizeof(inq));
785 
786 	inq.device = T_DIRECT;
787 	inq.version = SCSI_REV_SPC4;
788 	inq.response_format = SID_SCSI2_RESPONSE;
789 	inq.additional_length = SID_SCSI2_ALEN;
790 	inq.flags |= SID_CmdQue;
791 	memcpy(inq.vendor, "NVMe    ", sizeof(inq.vendor));
792 	memcpy(inq.product, sc->sc_identify.mn, sizeof(inq.product));
793 	memcpy(inq.revision, sc->sc_identify.fr, sizeof(inq.revision));
794 
795 	scsi_copy_internal_data(xs, &inq, sizeof(inq));
796 
797 	xs->error = XS_NOERROR;
798 	scsi_done(xs);
799 }
800 
801 void
802 nvme_scsi_capacity16(struct scsi_xfer *xs)
803 {
804 	struct scsi_read_cap_data_16 rcd;
805 	struct scsi_link *link = xs->sc_link;
806 	struct nvme_softc *sc = link->bus->sb_adapter_softc;
807 	struct nvm_identify_namespace *ns;
808 	struct nvm_namespace_format *f;
809 	u_int64_t nsze;
810 	u_int16_t tpe = READ_CAP_16_TPE;
811 
812 	ns = sc->sc_namespaces[link->target].ident;
813 
814 	if (xs->cmdlen != sizeof(struct scsi_read_capacity_16)) {
815 		xs->error = XS_DRIVER_STUFFUP;
816 		scsi_done(xs);
817 		return;
818 	}
819 
820 	/* sd_read_cap_16() will add one */
821 	nsze = lemtoh64(&ns->nsze) - 1;
822 	f = &ns->lbaf[NVME_ID_NS_FLBAS(ns->flbas)];
823 
824 	memset(&rcd, 0, sizeof(rcd));
825 	_lto8b(nsze, rcd.addr);
826 	_lto4b(1 << f->lbads, rcd.length);
827 	_lto2b(tpe, rcd.lowest_aligned);
828 
829 	memcpy(xs->data, &rcd, MIN(sizeof(rcd), xs->datalen));
830 
831 	xs->error = XS_NOERROR;
832 	scsi_done(xs);
833 }
834 
835 void
836 nvme_scsi_capacity(struct scsi_xfer *xs)
837 {
838 	struct scsi_read_cap_data rcd;
839 	struct scsi_link *link = xs->sc_link;
840 	struct nvme_softc *sc = link->bus->sb_adapter_softc;
841 	struct nvm_identify_namespace *ns;
842 	struct nvm_namespace_format *f;
843 	u_int64_t nsze;
844 
845 	ns = sc->sc_namespaces[link->target].ident;
846 
847 	if (xs->cmdlen != sizeof(struct scsi_read_capacity)) {
848 		xs->error = XS_DRIVER_STUFFUP;
849 		scsi_done(xs);
850 		return;
851 	}
852 
853 	/* sd_read_cap_10() will add one */
854 	nsze = lemtoh64(&ns->nsze) - 1;
855 	if (nsze > 0xffffffff)
856 		nsze = 0xffffffff;
857 
858 	f = &ns->lbaf[NVME_ID_NS_FLBAS(ns->flbas)];
859 
860 	memset(&rcd, 0, sizeof(rcd));
861 	_lto4b(nsze, rcd.addr);
862 	_lto4b(1 << f->lbads, rcd.length);
863 
864 	memcpy(xs->data, &rcd, MIN(sizeof(rcd), xs->datalen));
865 
866 	xs->error = XS_NOERROR;
867 	scsi_done(xs);
868 }
869 
870 void
871 nvme_scsi_free(struct scsi_link *link)
872 {
873 	struct nvme_softc *sc = link->bus->sb_adapter_softc;
874 	struct nvm_identify_namespace *identify;
875 
876 	identify = sc->sc_namespaces[link->target].ident;
877 	sc->sc_namespaces[link->target].ident = NULL;
878 
879 	free(identify, M_DEVBUF, sizeof(*identify));
880 }
881 
882 void
883 nvme_q_submit(struct nvme_softc *sc, struct nvme_queue *q, struct nvme_ccb *ccb,
884     void (*fill)(struct nvme_softc *, struct nvme_ccb *, void *))
885 {
886 	struct nvme_sqe *sqe = NVME_DMA_KVA(q->q_sq_dmamem);
887 	u_int32_t tail;
888 
889 	mtx_enter(&q->q_sq_mtx);
890 	tail = q->q_sq_tail;
891 	if (++q->q_sq_tail >= q->q_entries)
892 		q->q_sq_tail = 0;
893 
894 	sqe += tail;
895 
896 	bus_dmamap_sync(sc->sc_dmat, NVME_DMA_MAP(q->q_sq_dmamem),
897 	    sizeof(*sqe) * tail, sizeof(*sqe), BUS_DMASYNC_POSTWRITE);
898 	memset(sqe, 0, sizeof(*sqe));
899 	(*fill)(sc, ccb, sqe);
900 	sqe->cid = ccb->ccb_id;
901 	bus_dmamap_sync(sc->sc_dmat, NVME_DMA_MAP(q->q_sq_dmamem),
902 	    sizeof(*sqe) * tail, sizeof(*sqe), BUS_DMASYNC_PREWRITE);
903 
904 	nvme_write4(sc, q->q_sqtdbl, q->q_sq_tail);
905 	mtx_leave(&q->q_sq_mtx);
906 }
907 
908 struct nvme_poll_state {
909 	struct nvme_sqe s;
910 	struct nvme_cqe c;
911 };
912 
913 int
914 nvme_poll(struct nvme_softc *sc, struct nvme_queue *q, struct nvme_ccb *ccb,
915     void (*fill)(struct nvme_softc *, struct nvme_ccb *, void *))
916 {
917 	struct nvme_poll_state state;
918 	void (*done)(struct nvme_softc *, struct nvme_ccb *, struct nvme_cqe *);
919 	void *cookie;
920 	u_int16_t flags;
921 
922 	memset(&state, 0, sizeof(state));
923 	(*fill)(sc, ccb, &state.s);
924 
925 	done = ccb->ccb_done;
926 	cookie = ccb->ccb_cookie;
927 
928 	ccb->ccb_done = nvme_poll_done;
929 	ccb->ccb_cookie = &state;
930 
931 	nvme_q_submit(sc, q, ccb, nvme_poll_fill);
932 	while (!ISSET(state.c.flags, htole16(NVME_CQE_PHASE))) {
933 		if (nvme_q_complete(sc, q) == 0)
934 			delay(10);
935 
936 		/* XXX no timeout? */
937 	}
938 
939 	ccb->ccb_cookie = cookie;
940 	done(sc, ccb, &state.c);
941 
942 	flags = lemtoh16(&state.c.flags);
943 
944 	return (flags & ~NVME_CQE_PHASE);
945 }
946 
947 void
948 nvme_poll_fill(struct nvme_softc *sc, struct nvme_ccb *ccb, void *slot)
949 {
950 	struct nvme_sqe *sqe = slot;
951 	struct nvme_poll_state *state = ccb->ccb_cookie;
952 
953 	*sqe = state->s;
954 }
955 
956 void
957 nvme_poll_done(struct nvme_softc *sc, struct nvme_ccb *ccb,
958     struct nvme_cqe *cqe)
959 {
960 	struct nvme_poll_state *state = ccb->ccb_cookie;
961 
962 	state->c = *cqe;
963 	SET(state->c.flags, htole16(NVME_CQE_PHASE));
964 }
965 
966 void
967 nvme_sqe_fill(struct nvme_softc *sc, struct nvme_ccb *ccb, void *slot)
968 {
969 	struct nvme_sqe *src = ccb->ccb_cookie;
970 	struct nvme_sqe *dst = slot;
971 
972 	*dst = *src;
973 }
974 
975 void
976 nvme_empty_done(struct nvme_softc *sc, struct nvme_ccb *ccb,
977     struct nvme_cqe *cqe)
978 {
979 }
980 
981 int
982 nvme_q_complete(struct nvme_softc *sc, struct nvme_queue *q)
983 {
984 	struct nvme_ccb *ccb;
985 	struct nvme_cqe *ring = NVME_DMA_KVA(q->q_cq_dmamem), *cqe;
986 	u_int32_t head;
987 	u_int16_t flags;
988 	int rv = 0;
989 
990 	if (!mtx_enter_try(&q->q_cq_mtx))
991 		return (-1);
992 
993 	head = q->q_cq_head;
994 
995 	nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_POSTREAD);
996 	for (;;) {
997 		cqe = &ring[head];
998 		flags = lemtoh16(&cqe->flags);
999 		if ((flags & NVME_CQE_PHASE) != q->q_cq_phase)
1000 			break;
1001 
1002 		membar_consumer();
1003 
1004 		ccb = &sc->sc_ccbs[cqe->cid];
1005 		ccb->ccb_done(sc, ccb, cqe);
1006 
1007 		if (++head >= q->q_entries) {
1008 			head = 0;
1009 			q->q_cq_phase ^= NVME_CQE_PHASE;
1010 		}
1011 
1012 		rv = 1;
1013 	}
1014 	nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_PREREAD);
1015 
1016 	if (rv)
1017 		nvme_write4(sc, q->q_cqhdbl, q->q_cq_head = head);
1018 	mtx_leave(&q->q_cq_mtx);
1019 
1020 	return (rv);
1021 }
1022 
1023 int
1024 nvme_identify(struct nvme_softc *sc, u_int mpsmin)
1025 {
1026 	char sn[41], mn[81], fr[17];
1027 	struct nvm_identify_controller *identify;
1028 	struct nvme_dmamem *mem;
1029 	struct nvme_ccb *ccb;
1030 	int rv = 1;
1031 
1032 	ccb = nvme_ccb_get(sc);
1033 	if (ccb == NULL)
1034 		panic("nvme_identify: nvme_ccb_get returned NULL");
1035 
1036 	mem = nvme_dmamem_alloc(sc, sizeof(*identify));
1037 	if (mem == NULL)
1038 		return (1);
1039 
1040 	ccb->ccb_done = nvme_empty_done;
1041 	ccb->ccb_cookie = mem;
1042 
1043 	nvme_dmamem_sync(sc, mem, BUS_DMASYNC_PREREAD);
1044 	rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_fill_identify);
1045 	nvme_dmamem_sync(sc, mem, BUS_DMASYNC_POSTREAD);
1046 
1047 	nvme_ccb_put(sc, ccb);
1048 
1049 	if (rv != 0)
1050 		goto done;
1051 
1052 	identify = NVME_DMA_KVA(mem);
1053 
1054 	scsi_strvis(sn, identify->sn, sizeof(identify->sn));
1055 	scsi_strvis(mn, identify->mn, sizeof(identify->mn));
1056 	scsi_strvis(fr, identify->fr, sizeof(identify->fr));
1057 
1058 	printf("%s: %s, firmware %s, serial %s\n", DEVNAME(sc), mn, fr, sn);
1059 
1060 	if (identify->mdts > 0) {
1061 		sc->sc_mdts = (1 << identify->mdts) * (1 << mpsmin);
1062 		if (sc->sc_mdts > NVME_MAXPHYS)
1063 			sc->sc_mdts = NVME_MAXPHYS;
1064 		sc->sc_max_prpl = sc->sc_mdts / sc->sc_mps;
1065 	}
1066 
1067 	sc->sc_nn = lemtoh32(&identify->nn);
1068 
1069 	/*
1070 	 * At least one Apple NVMe device presents a second, bogus disk that is
1071 	 * inaccessible, so cap targets at 1.
1072 	 *
1073 	 * sd1 at scsibus1 targ 2 lun 0: <NVMe, APPLE SSD AP0512, 16.1> [..]
1074 	 * sd1: 0MB, 4096 bytes/sector, 2 sectors
1075 	 */
1076 	if (sc->sc_nn > 1 &&
1077 	    mn[0] == 'A' && mn[1] == 'P' && mn[2] == 'P' && mn[3] == 'L' &&
1078 	    mn[4] == 'E')
1079 		sc->sc_nn = 1;
1080 
1081 	memcpy(&sc->sc_identify, identify, sizeof(sc->sc_identify));
1082 
1083 done:
1084 	nvme_dmamem_free(sc, mem);
1085 
1086 	return (rv);
1087 }
1088 
1089 int
1090 nvme_q_create(struct nvme_softc *sc, struct nvme_queue *q)
1091 {
1092 	struct nvme_sqe_q sqe;
1093 	struct nvme_ccb *ccb;
1094 	int rv;
1095 
1096 	ccb = scsi_io_get(&sc->sc_iopool, 0);
1097 	KASSERT(ccb != NULL);
1098 
1099 	ccb->ccb_done = nvme_empty_done;
1100 	ccb->ccb_cookie = &sqe;
1101 
1102 	memset(&sqe, 0, sizeof(sqe));
1103 	sqe.opcode = NVM_ADMIN_ADD_IOCQ;
1104 	htolem64(&sqe.prp1, NVME_DMA_DVA(q->q_cq_dmamem));
1105 	htolem16(&sqe.qsize, q->q_entries - 1);
1106 	htolem16(&sqe.qid, q->q_id);
1107 	sqe.qflags = NVM_SQE_CQ_IEN | NVM_SQE_Q_PC;
1108 
1109 	rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill);
1110 	if (rv != 0)
1111 		goto fail;
1112 
1113 	ccb->ccb_done = nvme_empty_done;
1114 	ccb->ccb_cookie = &sqe;
1115 
1116 	memset(&sqe, 0, sizeof(sqe));
1117 	sqe.opcode = NVM_ADMIN_ADD_IOSQ;
1118 	htolem64(&sqe.prp1, NVME_DMA_DVA(q->q_sq_dmamem));
1119 	htolem16(&sqe.qsize, q->q_entries - 1);
1120 	htolem16(&sqe.qid, q->q_id);
1121 	htolem16(&sqe.cqid, q->q_id);
1122 	sqe.qflags = NVM_SQE_Q_PC;
1123 
1124 	rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill);
1125 	if (rv != 0)
1126 		goto fail;
1127 
1128 fail:
1129 	scsi_io_put(&sc->sc_iopool, ccb);
1130 	return (rv);
1131 }
1132 
1133 int
1134 nvme_q_delete(struct nvme_softc *sc, struct nvme_queue *q)
1135 {
1136 	struct nvme_sqe_q sqe;
1137 	struct nvme_ccb *ccb;
1138 	int rv;
1139 
1140 	ccb = scsi_io_get(&sc->sc_iopool, 0);
1141 	KASSERT(ccb != NULL);
1142 
1143 	ccb->ccb_done = nvme_empty_done;
1144 	ccb->ccb_cookie = &sqe;
1145 
1146 	memset(&sqe, 0, sizeof(sqe));
1147 	sqe.opcode = NVM_ADMIN_DEL_IOSQ;
1148 	htolem16(&sqe.qid, q->q_id);
1149 
1150 	rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill);
1151 	if (rv != 0)
1152 		goto fail;
1153 
1154 	ccb->ccb_done = nvme_empty_done;
1155 	ccb->ccb_cookie = &sqe;
1156 
1157 	memset(&sqe, 0, sizeof(sqe));
1158 	sqe.opcode = NVM_ADMIN_DEL_IOCQ;
1159 	htolem16(&sqe.qid, q->q_id);
1160 
1161 	rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill);
1162 	if (rv != 0)
1163 		goto fail;
1164 
1165 	nvme_q_free(sc, q);
1166 
1167 fail:
1168 	scsi_io_put(&sc->sc_iopool, ccb);
1169 	return (rv);
1170 
1171 }
1172 
1173 void
1174 nvme_fill_identify(struct nvme_softc *sc, struct nvme_ccb *ccb, void *slot)
1175 {
1176 	struct nvme_sqe *sqe = slot;
1177 	struct nvme_dmamem *mem = ccb->ccb_cookie;
1178 
1179 	sqe->opcode = NVM_ADMIN_IDENTIFY;
1180 	htolem64(&sqe->entry.prp[0], NVME_DMA_DVA(mem));
1181 	htolem32(&sqe->cdw10, 1);
1182 }
1183 
1184 int
1185 nvme_ccbs_alloc(struct nvme_softc *sc, u_int nccbs)
1186 {
1187 	struct nvme_ccb *ccb;
1188 	bus_addr_t off;
1189 	u_int64_t *prpl;
1190 	u_int i;
1191 
1192 	sc->sc_ccbs = mallocarray(nccbs, sizeof(*ccb), M_DEVBUF,
1193 	    M_WAITOK | M_CANFAIL);
1194 	if (sc->sc_ccbs == NULL)
1195 		return (1);
1196 
1197 	sc->sc_ccb_prpls = nvme_dmamem_alloc(sc,
1198 	    sizeof(*prpl) * sc->sc_max_prpl * nccbs);
1199 
1200 	prpl = NVME_DMA_KVA(sc->sc_ccb_prpls);
1201 	off = 0;
1202 
1203 	for (i = 0; i < nccbs; i++) {
1204 		ccb = &sc->sc_ccbs[i];
1205 
1206 		if (bus_dmamap_create(sc->sc_dmat, sc->sc_mdts,
1207 		    sc->sc_max_prpl + 1, /* we get a free prp in the sqe */
1208 		    sc->sc_mps, sc->sc_mps, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW,
1209 		    &ccb->ccb_dmamap) != 0)
1210 			goto free_maps;
1211 
1212 		ccb->ccb_id = i;
1213 		ccb->ccb_prpl = prpl;
1214 		ccb->ccb_prpl_off = off;
1215 		ccb->ccb_prpl_dva = NVME_DMA_DVA(sc->sc_ccb_prpls) + off;
1216 
1217 		SIMPLEQ_INSERT_TAIL(&sc->sc_ccb_list, ccb, ccb_entry);
1218 
1219 		prpl += sc->sc_max_prpl;
1220 		off += sizeof(*prpl) * sc->sc_max_prpl;
1221 	}
1222 
1223 	return (0);
1224 
1225 free_maps:
1226 	nvme_ccbs_free(sc, nccbs);
1227 	return (1);
1228 }
1229 
1230 void *
1231 nvme_ccb_get(void *cookie)
1232 {
1233 	struct nvme_softc *sc = cookie;
1234 	struct nvme_ccb *ccb;
1235 
1236 	mtx_enter(&sc->sc_ccb_mtx);
1237 	ccb = SIMPLEQ_FIRST(&sc->sc_ccb_list);
1238 	if (ccb != NULL)
1239 		SIMPLEQ_REMOVE_HEAD(&sc->sc_ccb_list, ccb_entry);
1240 	mtx_leave(&sc->sc_ccb_mtx);
1241 
1242 	return (ccb);
1243 }
1244 
1245 void
1246 nvme_ccb_put(void *cookie, void *io)
1247 {
1248 	struct nvme_softc *sc = cookie;
1249 	struct nvme_ccb *ccb = io;
1250 
1251 	mtx_enter(&sc->sc_ccb_mtx);
1252 	SIMPLEQ_INSERT_HEAD(&sc->sc_ccb_list, ccb, ccb_entry);
1253 	mtx_leave(&sc->sc_ccb_mtx);
1254 }
1255 
1256 void
1257 nvme_ccbs_free(struct nvme_softc *sc, unsigned int nccbs)
1258 {
1259 	struct nvme_ccb *ccb;
1260 
1261 	while ((ccb = SIMPLEQ_FIRST(&sc->sc_ccb_list)) != NULL) {
1262 		SIMPLEQ_REMOVE_HEAD(&sc->sc_ccb_list, ccb_entry);
1263 		bus_dmamap_destroy(sc->sc_dmat, ccb->ccb_dmamap);
1264 	}
1265 
1266 	nvme_dmamem_free(sc, sc->sc_ccb_prpls);
1267 	free(sc->sc_ccbs, M_DEVBUF, nccbs * sizeof(*ccb));
1268 }
1269 
1270 struct nvme_queue *
1271 nvme_q_alloc(struct nvme_softc *sc, u_int16_t id, u_int entries, u_int dstrd)
1272 {
1273 	struct nvme_queue *q;
1274 
1275 	q = malloc(sizeof(*q), M_DEVBUF, M_WAITOK | M_CANFAIL);
1276 	if (q == NULL)
1277 		return (NULL);
1278 
1279 	q->q_sq_dmamem = nvme_dmamem_alloc(sc,
1280 	    sizeof(struct nvme_sqe) * entries);
1281 	if (q->q_sq_dmamem == NULL)
1282 		goto free;
1283 
1284 	q->q_cq_dmamem = nvme_dmamem_alloc(sc,
1285 	    sizeof(struct nvme_cqe) * entries);
1286 	if (q->q_cq_dmamem == NULL)
1287 		goto free_sq;
1288 
1289 	memset(NVME_DMA_KVA(q->q_sq_dmamem), 0, NVME_DMA_LEN(q->q_sq_dmamem));
1290 	memset(NVME_DMA_KVA(q->q_cq_dmamem), 0, NVME_DMA_LEN(q->q_cq_dmamem));
1291 
1292 	mtx_init(&q->q_sq_mtx, IPL_BIO);
1293 	mtx_init(&q->q_cq_mtx, IPL_BIO);
1294 	q->q_sqtdbl = NVME_SQTDBL(id, dstrd);
1295 	q->q_cqhdbl = NVME_CQHDBL(id, dstrd);
1296 
1297 	q->q_id = id;
1298 	q->q_entries = entries;
1299 	q->q_sq_tail = 0;
1300 	q->q_cq_head = 0;
1301 	q->q_cq_phase = NVME_CQE_PHASE;
1302 
1303 	nvme_dmamem_sync(sc, q->q_sq_dmamem, BUS_DMASYNC_PREWRITE);
1304 	nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_PREREAD);
1305 
1306 	return (q);
1307 
1308 free_sq:
1309 	nvme_dmamem_free(sc, q->q_sq_dmamem);
1310 free:
1311 	free(q, M_DEVBUF, sizeof *q);
1312 
1313 	return (NULL);
1314 }
1315 
1316 int
1317 nvme_q_reset(struct nvme_softc *sc, struct nvme_queue *q)
1318 {
1319 	memset(NVME_DMA_KVA(q->q_sq_dmamem), 0, NVME_DMA_LEN(q->q_sq_dmamem));
1320 	memset(NVME_DMA_KVA(q->q_cq_dmamem), 0, NVME_DMA_LEN(q->q_cq_dmamem));
1321 
1322 	q->q_sqtdbl = NVME_SQTDBL(q->q_id, sc->sc_dstrd);
1323 	q->q_cqhdbl = NVME_CQHDBL(q->q_id, sc->sc_dstrd);
1324 
1325 	q->q_sq_tail = 0;
1326 	q->q_cq_head = 0;
1327 	q->q_cq_phase = NVME_CQE_PHASE;
1328 
1329 	nvme_dmamem_sync(sc, q->q_sq_dmamem, BUS_DMASYNC_PREWRITE);
1330 	nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_PREREAD);
1331 
1332 	return (0);
1333 }
1334 
1335 void
1336 nvme_q_free(struct nvme_softc *sc, struct nvme_queue *q)
1337 {
1338 	nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_POSTREAD);
1339 	nvme_dmamem_sync(sc, q->q_sq_dmamem, BUS_DMASYNC_POSTWRITE);
1340 	nvme_dmamem_free(sc, q->q_cq_dmamem);
1341 	nvme_dmamem_free(sc, q->q_sq_dmamem);
1342 	free(q, M_DEVBUF, sizeof *q);
1343 }
1344 
1345 int
1346 nvme_intr(void *xsc)
1347 {
1348 	struct nvme_softc *sc = xsc;
1349 	int rv = 0;
1350 
1351 	if (nvme_q_complete(sc, sc->sc_q))
1352 		rv = 1;
1353 	if (nvme_q_complete(sc, sc->sc_admin_q))
1354 		rv = 1;
1355 
1356 	return (rv);
1357 }
1358 
1359 int
1360 nvme_intr_intx(void *xsc)
1361 {
1362 	struct nvme_softc *sc = xsc;
1363 	int rv;
1364 
1365 	nvme_write4(sc, NVME_INTMS, 1);
1366 	rv = nvme_intr(sc);
1367 	nvme_write4(sc, NVME_INTMC, 1);
1368 
1369 	return (rv);
1370 }
1371 
1372 struct nvme_dmamem *
1373 nvme_dmamem_alloc(struct nvme_softc *sc, size_t size)
1374 {
1375 	struct nvme_dmamem *ndm;
1376 	int nsegs;
1377 
1378 	ndm = malloc(sizeof(*ndm), M_DEVBUF, M_WAITOK | M_ZERO);
1379 	if (ndm == NULL)
1380 		return (NULL);
1381 
1382 	ndm->ndm_size = size;
1383 
1384 	if (bus_dmamap_create(sc->sc_dmat, size, 1, size, 0,
1385 	    BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, &ndm->ndm_map) != 0)
1386 		goto ndmfree;
1387 
1388 	if (bus_dmamem_alloc(sc->sc_dmat, size, sc->sc_mps, 0, &ndm->ndm_seg,
1389 	    1, &nsegs, BUS_DMA_WAITOK | BUS_DMA_ZERO) != 0)
1390 		goto destroy;
1391 
1392 	if (bus_dmamem_map(sc->sc_dmat, &ndm->ndm_seg, nsegs, size,
1393 	    &ndm->ndm_kva, BUS_DMA_WAITOK) != 0)
1394 		goto free;
1395 
1396 	if (bus_dmamap_load(sc->sc_dmat, ndm->ndm_map, ndm->ndm_kva, size,
1397 	    NULL, BUS_DMA_WAITOK) != 0)
1398 		goto unmap;
1399 
1400 	return (ndm);
1401 
1402 unmap:
1403 	bus_dmamem_unmap(sc->sc_dmat, ndm->ndm_kva, size);
1404 free:
1405 	bus_dmamem_free(sc->sc_dmat, &ndm->ndm_seg, 1);
1406 destroy:
1407 	bus_dmamap_destroy(sc->sc_dmat, ndm->ndm_map);
1408 ndmfree:
1409 	free(ndm, M_DEVBUF, sizeof *ndm);
1410 
1411 	return (NULL);
1412 }
1413 
1414 void
1415 nvme_dmamem_sync(struct nvme_softc *sc, struct nvme_dmamem *mem, int ops)
1416 {
1417 	bus_dmamap_sync(sc->sc_dmat, NVME_DMA_MAP(mem),
1418 	    0, NVME_DMA_LEN(mem), ops);
1419 }
1420 
1421 void
1422 nvme_dmamem_free(struct nvme_softc *sc, struct nvme_dmamem *ndm)
1423 {
1424 	bus_dmamap_unload(sc->sc_dmat, ndm->ndm_map);
1425 	bus_dmamem_unmap(sc->sc_dmat, ndm->ndm_kva, ndm->ndm_size);
1426 	bus_dmamem_free(sc->sc_dmat, &ndm->ndm_seg, 1);
1427 	bus_dmamap_destroy(sc->sc_dmat, ndm->ndm_map);
1428 	free(ndm, M_DEVBUF, sizeof *ndm);
1429 }
1430 
1431 #ifdef HIBERNATE
1432 
1433 int
1434 nvme_hibernate_admin_cmd(struct nvme_softc *sc, struct nvme_sqe *sqe,
1435     struct nvme_cqe *cqe, int cid)
1436 {
1437 	struct nvme_sqe *asqe = NVME_DMA_KVA(sc->sc_admin_q->q_sq_dmamem);
1438 	struct nvme_cqe *acqe = NVME_DMA_KVA(sc->sc_admin_q->q_cq_dmamem);
1439 	struct nvme_queue *q = sc->sc_admin_q;
1440 	int tail;
1441 	u_int16_t flags;
1442 
1443 	/* submit command */
1444 	tail = q->q_sq_tail;
1445 	if (++q->q_sq_tail >= q->q_entries)
1446 		q->q_sq_tail = 0;
1447 
1448 	asqe += tail;
1449 	bus_dmamap_sync(sc->sc_dmat, NVME_DMA_MAP(q->q_sq_dmamem),
1450 	    sizeof(*sqe) * tail, sizeof(*sqe), BUS_DMASYNC_POSTWRITE);
1451 	*asqe = *sqe;
1452 	asqe->cid = cid;
1453 	bus_dmamap_sync(sc->sc_dmat, NVME_DMA_MAP(q->q_sq_dmamem),
1454 	    sizeof(*sqe) * tail, sizeof(*sqe), BUS_DMASYNC_PREWRITE);
1455 
1456 	nvme_write4(sc, q->q_sqtdbl, q->q_sq_tail);
1457 
1458 	/* wait for completion */
1459 	acqe += q->q_cq_head;
1460 	for (;;) {
1461 		nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_POSTREAD);
1462 		flags = lemtoh16(&acqe->flags);
1463 		if ((flags & NVME_CQE_PHASE) == q->q_cq_phase)
1464 			break;
1465 
1466 		delay(10);
1467 	}
1468 
1469 	if (++q->q_cq_head >= q->q_entries) {
1470 		q->q_cq_head = 0;
1471 		q->q_cq_phase ^= NVME_CQE_PHASE;
1472 	}
1473 	nvme_write4(sc, q->q_cqhdbl, q->q_cq_head);
1474 	if ((NVME_CQE_SC(flags) != NVME_CQE_SC_SUCCESS) || (acqe->cid != cid))
1475 		return (EIO);
1476 
1477 	return (0);
1478 }
1479 
1480 int
1481 nvme_hibernate_io(dev_t dev, daddr_t blkno, vaddr_t addr, size_t size,
1482     int op, void *page)
1483 {
1484 	struct nvme_hibernate_page {
1485 		u_int64_t		prpl[MAXPHYS / PAGE_SIZE];
1486 
1487 		struct nvme_softc	*sc;
1488 		int			nsid;
1489 		int			sq_tail;
1490 		int			cq_head;
1491 		int			cqe_phase;
1492 
1493 		daddr_t			poffset;
1494 		size_t			psize;
1495 	} *my = page;
1496 	struct nvme_sqe_io *isqe;
1497 	struct nvme_cqe *icqe;
1498 	paddr_t data_phys, page_phys;
1499 	u_int64_t data_bus_phys, page_bus_phys;
1500 	u_int16_t flags;
1501 	int i;
1502 
1503 	if (op == HIB_INIT) {
1504 		struct device *disk;
1505 		struct device *scsibus;
1506 		extern struct cfdriver sd_cd;
1507 		struct scsi_link *link;
1508 		struct scsibus_softc *bus_sc;
1509 		struct nvme_sqe_q qsqe;
1510 		struct nvme_cqe qcqe;
1511 
1512 		/* find nvme softc */
1513 		disk = disk_lookup(&sd_cd, DISKUNIT(dev));
1514 		scsibus = disk->dv_parent;
1515 		my->sc = (struct nvme_softc *)disk->dv_parent->dv_parent;
1516 
1517 		/* find scsi_link, which tells us the target */
1518 		my->nsid = 0;
1519 		bus_sc = (struct scsibus_softc *)scsibus;
1520 		SLIST_FOREACH(link, &bus_sc->sc_link_list, bus_list) {
1521 			if (link->device_softc == disk) {
1522 				my->nsid = link->target;
1523 				break;
1524 			}
1525 		}
1526 		if (my->nsid == 0)
1527 			return (EIO);
1528 
1529 		my->poffset = blkno;
1530 		my->psize = size;
1531 
1532 		memset(NVME_DMA_KVA(my->sc->sc_hib_q->q_cq_dmamem), 0,
1533 		    my->sc->sc_hib_q->q_entries * sizeof(struct nvme_cqe));
1534 		memset(NVME_DMA_KVA(my->sc->sc_hib_q->q_sq_dmamem), 0,
1535 		    my->sc->sc_hib_q->q_entries * sizeof(struct nvme_sqe));
1536 
1537 		my->sq_tail = 0;
1538 		my->cq_head = 0;
1539 		my->cqe_phase = NVME_CQE_PHASE;
1540 
1541 		pmap_extract(pmap_kernel(), (vaddr_t)page, &page_phys);
1542 
1543 		memset(&qsqe, 0, sizeof(qsqe));
1544 		qsqe.opcode = NVM_ADMIN_ADD_IOCQ;
1545 		htolem64(&qsqe.prp1,
1546 		    NVME_DMA_DVA(my->sc->sc_hib_q->q_cq_dmamem));
1547 		htolem16(&qsqe.qsize, my->sc->sc_hib_q->q_entries - 1);
1548 		htolem16(&qsqe.qid, my->sc->sc_hib_q->q_id);
1549 		qsqe.qflags = NVM_SQE_CQ_IEN | NVM_SQE_Q_PC;
1550 		if (nvme_hibernate_admin_cmd(my->sc, (struct nvme_sqe *)&qsqe,
1551 		    &qcqe, 1) != 0)
1552 			return (EIO);
1553 
1554 		memset(&qsqe, 0, sizeof(qsqe));
1555 		qsqe.opcode = NVM_ADMIN_ADD_IOSQ;
1556 		htolem64(&qsqe.prp1,
1557 		    NVME_DMA_DVA(my->sc->sc_hib_q->q_sq_dmamem));
1558 		htolem16(&qsqe.qsize, my->sc->sc_hib_q->q_entries - 1);
1559 		htolem16(&qsqe.qid, my->sc->sc_hib_q->q_id);
1560 		htolem16(&qsqe.cqid, my->sc->sc_hib_q->q_id);
1561 		qsqe.qflags = NVM_SQE_Q_PC;
1562 		if (nvme_hibernate_admin_cmd(my->sc, (struct nvme_sqe *)&qsqe,
1563 		    &qcqe, 2) != 0)
1564 			return (EIO);
1565 
1566 		return (0);
1567 	}
1568 
1569 	if (op != HIB_W)
1570 		return (0);
1571 
1572 	isqe = NVME_DMA_KVA(my->sc->sc_hib_q->q_sq_dmamem);
1573 	isqe += my->sq_tail;
1574 	if (++my->sq_tail == my->sc->sc_hib_q->q_entries)
1575 		my->sq_tail = 0;
1576 
1577 	memset(isqe, 0, sizeof(*isqe));
1578 	isqe->opcode = NVM_CMD_WRITE;
1579 	htolem32(&isqe->nsid, my->nsid);
1580 
1581 	pmap_extract(pmap_kernel(), addr, &data_phys);
1582 	data_bus_phys = data_phys;
1583 	htolem64(&isqe->entry.prp[0], data_bus_phys);
1584 	if ((size > my->sc->sc_mps) && (size <= my->sc->sc_mps * 2)) {
1585 		htolem64(&isqe->entry.prp[1], data_bus_phys + my->sc->sc_mps);
1586 	} else if (size > my->sc->sc_mps * 2) {
1587 		pmap_extract(pmap_kernel(), (vaddr_t)page, &page_phys);
1588 		page_bus_phys = page_phys;
1589 		htolem64(&isqe->entry.prp[1], page_bus_phys +
1590 		    offsetof(struct nvme_hibernate_page, prpl));
1591 		for (i = 1; i < (size / my->sc->sc_mps); i++) {
1592 			htolem64(&my->prpl[i - 1], data_bus_phys +
1593 			    (i * my->sc->sc_mps));
1594 		}
1595 	}
1596 
1597 	isqe->slba = blkno + my->poffset;
1598 	isqe->nlb = (size / DEV_BSIZE) - 1;
1599 	isqe->cid = blkno % 0xffff;
1600 
1601 	nvme_write4(my->sc, NVME_SQTDBL(NVME_HIB_Q, my->sc->sc_dstrd),
1602 	    my->sq_tail);
1603 
1604 	icqe = NVME_DMA_KVA(my->sc->sc_hib_q->q_cq_dmamem);
1605 	icqe += my->cq_head;
1606 	for (;;) {
1607 		flags = lemtoh16(&icqe->flags);
1608 		if ((flags & NVME_CQE_PHASE) == my->cqe_phase)
1609 			break;
1610 
1611 		delay(10);
1612 	}
1613 
1614 	if (++my->cq_head == my->sc->sc_hib_q->q_entries) {
1615 		my->cq_head = 0;
1616 		my->cqe_phase ^= NVME_CQE_PHASE;
1617 	}
1618 	nvme_write4(my->sc, NVME_CQHDBL(NVME_HIB_Q, my->sc->sc_dstrd),
1619 	    my->cq_head);
1620 	if ((NVME_CQE_SC(flags) != NVME_CQE_SC_SUCCESS) ||
1621 	    (icqe->cid != blkno % 0xffff))
1622 		return (EIO);
1623 
1624 	return (0);
1625 }
1626 
1627 #endif
1628