1 /* $NetBSD: nvme.c,v 1.39 2018/04/18 10:11:45 nonaka Exp $ */ 2 /* $OpenBSD: nvme.c,v 1.49 2016/04/18 05:59:50 dlg Exp $ */ 3 4 /* 5 * Copyright (c) 2014 David Gwynne <dlg@openbsd.org> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 20 #include <sys/cdefs.h> 21 __KERNEL_RCSID(0, "$NetBSD: nvme.c,v 1.39 2018/04/18 10:11:45 nonaka Exp $"); 22 23 #include <sys/param.h> 24 #include <sys/systm.h> 25 #include <sys/kernel.h> 26 #include <sys/atomic.h> 27 #include <sys/bus.h> 28 #include <sys/buf.h> 29 #include <sys/conf.h> 30 #include <sys/device.h> 31 #include <sys/kmem.h> 32 #include <sys/once.h> 33 #include <sys/proc.h> 34 #include <sys/queue.h> 35 #include <sys/mutex.h> 36 37 #include <uvm/uvm_extern.h> 38 39 #include <dev/ic/nvmereg.h> 40 #include <dev/ic/nvmevar.h> 41 #include <dev/ic/nvmeio.h> 42 43 #include "ioconf.h" 44 45 #define B4_CHK_RDY_DELAY_MS 2300 /* workaround controller bug */ 46 47 int nvme_adminq_size = 32; 48 int nvme_ioq_size = 1024; 49 50 static int nvme_print(void *, const char *); 51 52 static int nvme_ready(struct nvme_softc *, uint32_t); 53 static int nvme_enable(struct nvme_softc *, u_int); 54 static int nvme_disable(struct nvme_softc *); 55 static int nvme_shutdown(struct nvme_softc *); 56 57 #ifdef NVME_DEBUG 58 static void nvme_dumpregs(struct nvme_softc *); 59 #endif 60 static int nvme_identify(struct nvme_softc *, u_int); 61 static void nvme_fill_identify(struct nvme_queue *, struct nvme_ccb *, 62 void *); 63 64 static int nvme_ccbs_alloc(struct nvme_queue *, uint16_t); 65 static void nvme_ccbs_free(struct nvme_queue *); 66 67 static struct nvme_ccb * 68 nvme_ccb_get(struct nvme_queue *, bool); 69 static void nvme_ccb_put(struct nvme_queue *, struct nvme_ccb *); 70 71 static int nvme_poll(struct nvme_softc *, struct nvme_queue *, 72 struct nvme_ccb *, void (*)(struct nvme_queue *, 73 struct nvme_ccb *, void *), int); 74 static void nvme_poll_fill(struct nvme_queue *, struct nvme_ccb *, void *); 75 static void nvme_poll_done(struct nvme_queue *, struct nvme_ccb *, 76 struct nvme_cqe *); 77 static void nvme_sqe_fill(struct nvme_queue *, struct nvme_ccb *, void *); 78 static void nvme_empty_done(struct nvme_queue *, struct nvme_ccb *, 79 struct nvme_cqe *); 80 81 static struct nvme_queue * 82 nvme_q_alloc(struct nvme_softc *, uint16_t, u_int, u_int); 83 static int nvme_q_create(struct nvme_softc *, struct nvme_queue *); 84 static int nvme_q_delete(struct nvme_softc *, struct nvme_queue *); 85 static void nvme_q_submit(struct nvme_softc *, struct nvme_queue *, 86 struct nvme_ccb *, void (*)(struct nvme_queue *, 87 struct nvme_ccb *, void *)); 88 static int nvme_q_complete(struct nvme_softc *, struct nvme_queue *q); 89 static void nvme_q_free(struct nvme_softc *, struct nvme_queue *); 90 static void nvme_q_wait_complete(struct nvme_softc *, struct nvme_queue *, 91 bool (*)(void *), void *); 92 93 static struct nvme_dmamem * 94 nvme_dmamem_alloc(struct nvme_softc *, size_t); 95 static void nvme_dmamem_free(struct nvme_softc *, struct nvme_dmamem *); 96 static void nvme_dmamem_sync(struct nvme_softc *, struct nvme_dmamem *, 97 int); 98 99 static void nvme_ns_io_fill(struct nvme_queue *, struct nvme_ccb *, 100 void *); 101 static void nvme_ns_io_done(struct nvme_queue *, struct nvme_ccb *, 102 struct nvme_cqe *); 103 static void nvme_ns_sync_fill(struct nvme_queue *, struct nvme_ccb *, 104 void *); 105 static void nvme_ns_sync_done(struct nvme_queue *, struct nvme_ccb *, 106 struct nvme_cqe *); 107 static void nvme_getcache_fill(struct nvme_queue *, struct nvme_ccb *, 108 void *); 109 static void nvme_getcache_done(struct nvme_queue *, struct nvme_ccb *, 110 struct nvme_cqe *); 111 112 static void nvme_pt_fill(struct nvme_queue *, struct nvme_ccb *, 113 void *); 114 static void nvme_pt_done(struct nvme_queue *, struct nvme_ccb *, 115 struct nvme_cqe *); 116 static int nvme_command_passthrough(struct nvme_softc *, 117 struct nvme_pt_command *, uint16_t, struct lwp *, bool); 118 119 static int nvme_get_number_of_queues(struct nvme_softc *, u_int *); 120 121 #define NVME_TIMO_QOP 5 /* queue create and delete timeout */ 122 #define NVME_TIMO_IDENT 10 /* probe identify timeout */ 123 #define NVME_TIMO_PT -1 /* passthrough cmd timeout */ 124 #define NVME_TIMO_SY 60 /* sync cache timeout */ 125 126 #define nvme_read4(_s, _r) \ 127 bus_space_read_4((_s)->sc_iot, (_s)->sc_ioh, (_r)) 128 #define nvme_write4(_s, _r, _v) \ 129 bus_space_write_4((_s)->sc_iot, (_s)->sc_ioh, (_r), (_v)) 130 /* 131 * Some controllers, at least Apple NVMe, always require split 132 * transfers, so don't use bus_space_{read,write}_8() on LP64. 133 */ 134 static inline uint64_t 135 nvme_read8(struct nvme_softc *sc, bus_size_t r) 136 { 137 uint64_t v; 138 uint32_t *a = (uint32_t *)&v; 139 140 #if _BYTE_ORDER == _LITTLE_ENDIAN 141 a[0] = nvme_read4(sc, r); 142 a[1] = nvme_read4(sc, r + 4); 143 #else /* _BYTE_ORDER == _LITTLE_ENDIAN */ 144 a[1] = nvme_read4(sc, r); 145 a[0] = nvme_read4(sc, r + 4); 146 #endif 147 148 return v; 149 } 150 151 static inline void 152 nvme_write8(struct nvme_softc *sc, bus_size_t r, uint64_t v) 153 { 154 uint32_t *a = (uint32_t *)&v; 155 156 #if _BYTE_ORDER == _LITTLE_ENDIAN 157 nvme_write4(sc, r, a[0]); 158 nvme_write4(sc, r + 4, a[1]); 159 #else /* _BYTE_ORDER == _LITTLE_ENDIAN */ 160 nvme_write4(sc, r, a[1]); 161 nvme_write4(sc, r + 4, a[0]); 162 #endif 163 } 164 #define nvme_barrier(_s, _r, _l, _f) \ 165 bus_space_barrier((_s)->sc_iot, (_s)->sc_ioh, (_r), (_l), (_f)) 166 167 #ifdef NVME_DEBUG 168 static __used void 169 nvme_dumpregs(struct nvme_softc *sc) 170 { 171 uint64_t r8; 172 uint32_t r4; 173 174 #define DEVNAME(_sc) device_xname((_sc)->sc_dev) 175 r8 = nvme_read8(sc, NVME_CAP); 176 printf("%s: cap 0x%016"PRIx64"\n", DEVNAME(sc), nvme_read8(sc, NVME_CAP)); 177 printf("%s: mpsmax %u (%u)\n", DEVNAME(sc), 178 (u_int)NVME_CAP_MPSMAX(r8), (1 << NVME_CAP_MPSMAX(r8))); 179 printf("%s: mpsmin %u (%u)\n", DEVNAME(sc), 180 (u_int)NVME_CAP_MPSMIN(r8), (1 << NVME_CAP_MPSMIN(r8))); 181 printf("%s: css %"PRIu64"\n", DEVNAME(sc), NVME_CAP_CSS(r8)); 182 printf("%s: nssrs %"PRIu64"\n", DEVNAME(sc), NVME_CAP_NSSRS(r8)); 183 printf("%s: dstrd %"PRIu64"\n", DEVNAME(sc), NVME_CAP_DSTRD(r8)); 184 printf("%s: to %"PRIu64" msec\n", DEVNAME(sc), NVME_CAP_TO(r8)); 185 printf("%s: ams %"PRIu64"\n", DEVNAME(sc), NVME_CAP_AMS(r8)); 186 printf("%s: cqr %"PRIu64"\n", DEVNAME(sc), NVME_CAP_CQR(r8)); 187 printf("%s: mqes %"PRIu64"\n", DEVNAME(sc), NVME_CAP_MQES(r8)); 188 189 printf("%s: vs 0x%04x\n", DEVNAME(sc), nvme_read4(sc, NVME_VS)); 190 191 r4 = nvme_read4(sc, NVME_CC); 192 printf("%s: cc 0x%04x\n", DEVNAME(sc), r4); 193 printf("%s: iocqes %u (%u)\n", DEVNAME(sc), NVME_CC_IOCQES_R(r4), 194 (1 << NVME_CC_IOCQES_R(r4))); 195 printf("%s: iosqes %u (%u)\n", DEVNAME(sc), NVME_CC_IOSQES_R(r4), 196 (1 << NVME_CC_IOSQES_R(r4))); 197 printf("%s: shn %u\n", DEVNAME(sc), NVME_CC_SHN_R(r4)); 198 printf("%s: ams %u\n", DEVNAME(sc), NVME_CC_AMS_R(r4)); 199 printf("%s: mps %u (%u)\n", DEVNAME(sc), NVME_CC_MPS_R(r4), 200 (1 << NVME_CC_MPS_R(r4))); 201 printf("%s: css %u\n", DEVNAME(sc), NVME_CC_CSS_R(r4)); 202 printf("%s: en %u\n", DEVNAME(sc), ISSET(r4, NVME_CC_EN) ? 1 : 0); 203 204 r4 = nvme_read4(sc, NVME_CSTS); 205 printf("%s: csts 0x%08x\n", DEVNAME(sc), r4); 206 printf("%s: rdy %u\n", DEVNAME(sc), r4 & NVME_CSTS_RDY); 207 printf("%s: cfs %u\n", DEVNAME(sc), r4 & NVME_CSTS_CFS); 208 printf("%s: shst %x\n", DEVNAME(sc), r4 & NVME_CSTS_SHST_MASK); 209 210 r4 = nvme_read4(sc, NVME_AQA); 211 printf("%s: aqa 0x%08x\n", DEVNAME(sc), r4); 212 printf("%s: acqs %u\n", DEVNAME(sc), NVME_AQA_ACQS_R(r4)); 213 printf("%s: asqs %u\n", DEVNAME(sc), NVME_AQA_ASQS_R(r4)); 214 215 printf("%s: asq 0x%016"PRIx64"\n", DEVNAME(sc), nvme_read8(sc, NVME_ASQ)); 216 printf("%s: acq 0x%016"PRIx64"\n", DEVNAME(sc), nvme_read8(sc, NVME_ACQ)); 217 #undef DEVNAME 218 } 219 #endif /* NVME_DEBUG */ 220 221 static int 222 nvme_ready(struct nvme_softc *sc, uint32_t rdy) 223 { 224 u_int i = 0; 225 226 while ((nvme_read4(sc, NVME_CSTS) & NVME_CSTS_RDY) != rdy) { 227 if (i++ > sc->sc_rdy_to) 228 return ENXIO; 229 230 delay(1000); 231 nvme_barrier(sc, NVME_CSTS, 4, BUS_SPACE_BARRIER_READ); 232 } 233 234 return 0; 235 } 236 237 static int 238 nvme_enable(struct nvme_softc *sc, u_int mps) 239 { 240 uint32_t cc, csts; 241 int error; 242 243 cc = nvme_read4(sc, NVME_CC); 244 csts = nvme_read4(sc, NVME_CSTS); 245 246 /* 247 * See note in nvme_disable. Short circuit if we're already enabled. 248 */ 249 if (ISSET(cc, NVME_CC_EN)) { 250 if (ISSET(csts, NVME_CSTS_RDY)) 251 return 0; 252 253 goto waitready; 254 } else { 255 /* EN == 0 already wait for RDY == 0 or fail */ 256 error = nvme_ready(sc, 0); 257 if (error) 258 return error; 259 } 260 261 nvme_write8(sc, NVME_ASQ, NVME_DMA_DVA(sc->sc_admin_q->q_sq_dmamem)); 262 nvme_barrier(sc, 0, sc->sc_ios, BUS_SPACE_BARRIER_WRITE); 263 delay(5000); 264 nvme_write8(sc, NVME_ACQ, NVME_DMA_DVA(sc->sc_admin_q->q_cq_dmamem)); 265 nvme_barrier(sc, 0, sc->sc_ios, BUS_SPACE_BARRIER_WRITE); 266 delay(5000); 267 268 nvme_write4(sc, NVME_AQA, NVME_AQA_ACQS(sc->sc_admin_q->q_entries) | 269 NVME_AQA_ASQS(sc->sc_admin_q->q_entries)); 270 nvme_barrier(sc, 0, sc->sc_ios, BUS_SPACE_BARRIER_WRITE); 271 delay(5000); 272 273 CLR(cc, NVME_CC_IOCQES_MASK | NVME_CC_IOSQES_MASK | NVME_CC_SHN_MASK | 274 NVME_CC_AMS_MASK | NVME_CC_MPS_MASK | NVME_CC_CSS_MASK); 275 SET(cc, NVME_CC_IOSQES(ffs(64) - 1) | NVME_CC_IOCQES(ffs(16) - 1)); 276 SET(cc, NVME_CC_SHN(NVME_CC_SHN_NONE)); 277 SET(cc, NVME_CC_CSS(NVME_CC_CSS_NVM)); 278 SET(cc, NVME_CC_AMS(NVME_CC_AMS_RR)); 279 SET(cc, NVME_CC_MPS(mps)); 280 SET(cc, NVME_CC_EN); 281 282 nvme_write4(sc, NVME_CC, cc); 283 nvme_barrier(sc, 0, sc->sc_ios, 284 BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); 285 286 waitready: 287 return nvme_ready(sc, NVME_CSTS_RDY); 288 } 289 290 static int 291 nvme_disable(struct nvme_softc *sc) 292 { 293 uint32_t cc, csts; 294 int error; 295 296 cc = nvme_read4(sc, NVME_CC); 297 csts = nvme_read4(sc, NVME_CSTS); 298 299 /* 300 * Per 3.1.5 in NVME 1.3 spec, transitioning CC.EN from 0 to 1 301 * when CSTS.RDY is 1 or transitioning CC.EN from 1 to 0 when 302 * CSTS.RDY is 0 "has undefined results" So make sure that CSTS.RDY 303 * isn't the desired value. Short circuit if we're already disabled. 304 */ 305 if (ISSET(cc, NVME_CC_EN)) { 306 if (!ISSET(csts, NVME_CSTS_RDY)) { 307 /* EN == 1, wait for RDY == 1 or fail */ 308 error = nvme_ready(sc, NVME_CSTS_RDY); 309 if (error) 310 return error; 311 } 312 } else { 313 /* EN == 0 already wait for RDY == 0 */ 314 if (!ISSET(csts, NVME_CSTS_RDY)) 315 return 0; 316 317 goto waitready; 318 } 319 320 CLR(cc, NVME_CC_EN); 321 nvme_write4(sc, NVME_CC, cc); 322 nvme_barrier(sc, 0, sc->sc_ios, BUS_SPACE_BARRIER_READ); 323 324 /* 325 * Some drives have issues with accessing the mmio after we disable, 326 * so delay for a bit after we write the bit to cope with these issues. 327 */ 328 if (ISSET(sc->sc_quirks, NVME_QUIRK_DELAY_B4_CHK_RDY)) 329 delay(B4_CHK_RDY_DELAY_MS); 330 331 waitready: 332 return nvme_ready(sc, 0); 333 } 334 335 int 336 nvme_attach(struct nvme_softc *sc) 337 { 338 uint64_t cap; 339 uint32_t reg; 340 u_int dstrd; 341 u_int mps = PAGE_SHIFT; 342 u_int ioq_allocated; 343 uint16_t adminq_entries = nvme_adminq_size; 344 uint16_t ioq_entries = nvme_ioq_size; 345 int i; 346 347 reg = nvme_read4(sc, NVME_VS); 348 if (reg == 0xffffffff) { 349 aprint_error_dev(sc->sc_dev, "invalid mapping\n"); 350 return 1; 351 } 352 353 if (NVME_VS_TER(reg) == 0) 354 aprint_normal_dev(sc->sc_dev, "NVMe %d.%d\n", NVME_VS_MJR(reg), 355 NVME_VS_MNR(reg)); 356 else 357 aprint_normal_dev(sc->sc_dev, "NVMe %d.%d.%d\n", NVME_VS_MJR(reg), 358 NVME_VS_MNR(reg), NVME_VS_TER(reg)); 359 360 cap = nvme_read8(sc, NVME_CAP); 361 dstrd = NVME_CAP_DSTRD(cap); 362 if (NVME_CAP_MPSMIN(cap) > PAGE_SHIFT) { 363 aprint_error_dev(sc->sc_dev, "NVMe minimum page size %u " 364 "is greater than CPU page size %u\n", 365 1 << NVME_CAP_MPSMIN(cap), 1 << PAGE_SHIFT); 366 return 1; 367 } 368 if (NVME_CAP_MPSMAX(cap) < mps) 369 mps = NVME_CAP_MPSMAX(cap); 370 if (ioq_entries > NVME_CAP_MQES(cap)) 371 ioq_entries = NVME_CAP_MQES(cap); 372 373 /* set initial values to be used for admin queue during probe */ 374 sc->sc_rdy_to = NVME_CAP_TO(cap); 375 sc->sc_mps = 1 << mps; 376 sc->sc_mdts = MAXPHYS; 377 sc->sc_max_sgl = 2; 378 379 if (nvme_disable(sc) != 0) { 380 aprint_error_dev(sc->sc_dev, "unable to disable controller\n"); 381 return 1; 382 } 383 384 sc->sc_admin_q = nvme_q_alloc(sc, NVME_ADMIN_Q, adminq_entries, dstrd); 385 if (sc->sc_admin_q == NULL) { 386 aprint_error_dev(sc->sc_dev, 387 "unable to allocate admin queue\n"); 388 return 1; 389 } 390 if (sc->sc_intr_establish(sc, NVME_ADMIN_Q, sc->sc_admin_q)) 391 goto free_admin_q; 392 393 if (nvme_enable(sc, mps) != 0) { 394 aprint_error_dev(sc->sc_dev, "unable to enable controller\n"); 395 goto disestablish_admin_q; 396 } 397 398 if (nvme_identify(sc, NVME_CAP_MPSMIN(cap)) != 0) { 399 aprint_error_dev(sc->sc_dev, "unable to identify controller\n"); 400 goto disable; 401 } 402 403 /* we know how big things are now */ 404 sc->sc_max_sgl = sc->sc_mdts / sc->sc_mps; 405 406 /* reallocate ccbs of admin queue with new max sgl. */ 407 nvme_ccbs_free(sc->sc_admin_q); 408 nvme_ccbs_alloc(sc->sc_admin_q, sc->sc_admin_q->q_entries); 409 410 if (sc->sc_use_mq) { 411 /* Limit the number of queues to the number allocated in HW */ 412 if (nvme_get_number_of_queues(sc, &ioq_allocated) != 0) { 413 aprint_error_dev(sc->sc_dev, 414 "unable to get number of queues\n"); 415 goto disable; 416 } 417 if (sc->sc_nq > ioq_allocated) 418 sc->sc_nq = ioq_allocated; 419 } 420 421 sc->sc_q = kmem_zalloc(sizeof(*sc->sc_q) * sc->sc_nq, KM_SLEEP); 422 for (i = 0; i < sc->sc_nq; i++) { 423 sc->sc_q[i] = nvme_q_alloc(sc, i + 1, ioq_entries, dstrd); 424 if (sc->sc_q[i] == NULL) { 425 aprint_error_dev(sc->sc_dev, 426 "unable to allocate io queue\n"); 427 goto free_q; 428 } 429 if (nvme_q_create(sc, sc->sc_q[i]) != 0) { 430 aprint_error_dev(sc->sc_dev, 431 "unable to create io queue\n"); 432 nvme_q_free(sc, sc->sc_q[i]); 433 goto free_q; 434 } 435 } 436 437 if (!sc->sc_use_mq) 438 nvme_write4(sc, NVME_INTMC, 1); 439 440 /* probe subdevices */ 441 sc->sc_namespaces = kmem_zalloc(sizeof(*sc->sc_namespaces) * sc->sc_nn, 442 KM_SLEEP); 443 nvme_rescan(sc->sc_dev, "nvme", &i); 444 445 return 0; 446 447 free_q: 448 while (--i >= 0) { 449 nvme_q_delete(sc, sc->sc_q[i]); 450 nvme_q_free(sc, sc->sc_q[i]); 451 } 452 disable: 453 nvme_disable(sc); 454 disestablish_admin_q: 455 sc->sc_intr_disestablish(sc, NVME_ADMIN_Q); 456 free_admin_q: 457 nvme_q_free(sc, sc->sc_admin_q); 458 459 return 1; 460 } 461 462 int 463 nvme_rescan(device_t self, const char *attr, const int *flags) 464 { 465 struct nvme_softc *sc = device_private(self); 466 struct nvme_attach_args naa; 467 uint64_t cap; 468 int ioq_entries = nvme_ioq_size; 469 int i; 470 471 cap = nvme_read8(sc, NVME_CAP); 472 if (ioq_entries > NVME_CAP_MQES(cap)) 473 ioq_entries = NVME_CAP_MQES(cap); 474 475 for (i = 0; i < sc->sc_nn; i++) { 476 if (sc->sc_namespaces[i].dev) 477 continue; 478 memset(&naa, 0, sizeof(naa)); 479 naa.naa_nsid = i + 1; 480 naa.naa_qentries = (ioq_entries - 1) * sc->sc_nq; 481 naa.naa_maxphys = sc->sc_mdts; 482 sc->sc_namespaces[i].dev = config_found(sc->sc_dev, &naa, 483 nvme_print); 484 } 485 return 0; 486 } 487 488 static int 489 nvme_print(void *aux, const char *pnp) 490 { 491 struct nvme_attach_args *naa = aux; 492 493 if (pnp) 494 aprint_normal("at %s", pnp); 495 496 if (naa->naa_nsid > 0) 497 aprint_normal(" nsid %d", naa->naa_nsid); 498 499 return UNCONF; 500 } 501 502 int 503 nvme_detach(struct nvme_softc *sc, int flags) 504 { 505 int i, error; 506 507 error = config_detach_children(sc->sc_dev, flags); 508 if (error) 509 return error; 510 511 error = nvme_shutdown(sc); 512 if (error) 513 return error; 514 515 /* from now on we are committed to detach, following will never fail */ 516 for (i = 0; i < sc->sc_nq; i++) 517 nvme_q_free(sc, sc->sc_q[i]); 518 kmem_free(sc->sc_q, sizeof(*sc->sc_q) * sc->sc_nq); 519 nvme_q_free(sc, sc->sc_admin_q); 520 521 return 0; 522 } 523 524 static int 525 nvme_shutdown(struct nvme_softc *sc) 526 { 527 uint32_t cc, csts; 528 bool disabled = false; 529 int i; 530 531 if (!sc->sc_use_mq) 532 nvme_write4(sc, NVME_INTMS, 1); 533 534 for (i = 0; i < sc->sc_nq; i++) { 535 if (nvme_q_delete(sc, sc->sc_q[i]) != 0) { 536 aprint_error_dev(sc->sc_dev, 537 "unable to delete io queue %d, disabling\n", i + 1); 538 disabled = true; 539 } 540 } 541 sc->sc_intr_disestablish(sc, NVME_ADMIN_Q); 542 if (disabled) 543 goto disable; 544 545 cc = nvme_read4(sc, NVME_CC); 546 CLR(cc, NVME_CC_SHN_MASK); 547 SET(cc, NVME_CC_SHN(NVME_CC_SHN_NORMAL)); 548 nvme_write4(sc, NVME_CC, cc); 549 550 for (i = 0; i < 4000; i++) { 551 nvme_barrier(sc, 0, sc->sc_ios, 552 BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); 553 csts = nvme_read4(sc, NVME_CSTS); 554 if ((csts & NVME_CSTS_SHST_MASK) == NVME_CSTS_SHST_DONE) 555 return 0; 556 557 delay(1000); 558 } 559 560 aprint_error_dev(sc->sc_dev, "unable to shudown, disabling\n"); 561 562 disable: 563 nvme_disable(sc); 564 return 0; 565 } 566 567 void 568 nvme_childdet(device_t self, device_t child) 569 { 570 struct nvme_softc *sc = device_private(self); 571 int i; 572 573 for (i = 0; i < sc->sc_nn; i++) { 574 if (sc->sc_namespaces[i].dev == child) { 575 /* Already freed ns->ident. */ 576 sc->sc_namespaces[i].dev = NULL; 577 break; 578 } 579 } 580 } 581 582 int 583 nvme_ns_identify(struct nvme_softc *sc, uint16_t nsid) 584 { 585 struct nvme_sqe sqe; 586 struct nvm_identify_namespace *identify; 587 struct nvme_dmamem *mem; 588 struct nvme_ccb *ccb; 589 struct nvme_namespace *ns; 590 int rv; 591 592 KASSERT(nsid > 0); 593 594 ccb = nvme_ccb_get(sc->sc_admin_q, false); 595 KASSERT(ccb != NULL); /* it's a bug if we don't have spare ccb here */ 596 597 mem = nvme_dmamem_alloc(sc, sizeof(*identify)); 598 if (mem == NULL) { 599 nvme_ccb_put(sc->sc_admin_q, ccb); 600 return ENOMEM; 601 } 602 603 memset(&sqe, 0, sizeof(sqe)); 604 sqe.opcode = NVM_ADMIN_IDENTIFY; 605 htolem32(&sqe.nsid, nsid); 606 htolem64(&sqe.entry.prp[0], NVME_DMA_DVA(mem)); 607 htolem32(&sqe.cdw10, 0); 608 609 ccb->ccb_done = nvme_empty_done; 610 ccb->ccb_cookie = &sqe; 611 612 nvme_dmamem_sync(sc, mem, BUS_DMASYNC_PREREAD); 613 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_IDENT); 614 nvme_dmamem_sync(sc, mem, BUS_DMASYNC_POSTREAD); 615 616 nvme_ccb_put(sc->sc_admin_q, ccb); 617 618 if (rv != 0) { 619 rv = EIO; 620 goto done; 621 } 622 623 /* commit */ 624 625 identify = kmem_zalloc(sizeof(*identify), KM_SLEEP); 626 *identify = *((volatile struct nvm_identify_namespace *)NVME_DMA_KVA(mem)); 627 628 /* Convert data to host endian */ 629 nvme_identify_namespace_swapbytes(identify); 630 631 ns = nvme_ns_get(sc, nsid); 632 KASSERT(ns); 633 KASSERT(ns->ident == NULL); 634 ns->ident = identify; 635 636 done: 637 nvme_dmamem_free(sc, mem); 638 639 return rv; 640 } 641 642 int 643 nvme_ns_dobio(struct nvme_softc *sc, uint16_t nsid, void *cookie, 644 struct buf *bp, void *data, size_t datasize, 645 int secsize, daddr_t blkno, int flags, nvme_nnc_done nnc_done) 646 { 647 struct nvme_queue *q = nvme_get_q(sc); 648 struct nvme_ccb *ccb; 649 bus_dmamap_t dmap; 650 int i, error; 651 652 ccb = nvme_ccb_get(q, false); 653 if (ccb == NULL) 654 return EAGAIN; 655 656 ccb->ccb_done = nvme_ns_io_done; 657 ccb->ccb_cookie = cookie; 658 659 /* namespace context */ 660 ccb->nnc_nsid = nsid; 661 ccb->nnc_flags = flags; 662 ccb->nnc_buf = bp; 663 ccb->nnc_datasize = datasize; 664 ccb->nnc_secsize = secsize; 665 ccb->nnc_blkno = blkno; 666 ccb->nnc_done = nnc_done; 667 668 dmap = ccb->ccb_dmamap; 669 error = bus_dmamap_load(sc->sc_dmat, dmap, data, 670 datasize, NULL, 671 (ISSET(flags, NVME_NS_CTX_F_POLL) ? 672 BUS_DMA_NOWAIT : BUS_DMA_WAITOK) | 673 (ISSET(flags, NVME_NS_CTX_F_READ) ? 674 BUS_DMA_READ : BUS_DMA_WRITE)); 675 if (error) { 676 nvme_ccb_put(q, ccb); 677 return error; 678 } 679 680 bus_dmamap_sync(sc->sc_dmat, dmap, 0, dmap->dm_mapsize, 681 ISSET(flags, NVME_NS_CTX_F_READ) ? 682 BUS_DMASYNC_PREREAD : BUS_DMASYNC_PREWRITE); 683 684 if (dmap->dm_nsegs > 2) { 685 for (i = 1; i < dmap->dm_nsegs; i++) { 686 htolem64(&ccb->ccb_prpl[i - 1], 687 dmap->dm_segs[i].ds_addr); 688 } 689 bus_dmamap_sync(sc->sc_dmat, 690 NVME_DMA_MAP(q->q_ccb_prpls), 691 ccb->ccb_prpl_off, 692 sizeof(*ccb->ccb_prpl) * (dmap->dm_nsegs - 1), 693 BUS_DMASYNC_PREWRITE); 694 } 695 696 if (ISSET(flags, NVME_NS_CTX_F_POLL)) { 697 if (nvme_poll(sc, q, ccb, nvme_ns_io_fill, NVME_TIMO_PT) != 0) 698 return EIO; 699 return 0; 700 } 701 702 nvme_q_submit(sc, q, ccb, nvme_ns_io_fill); 703 return 0; 704 } 705 706 static void 707 nvme_ns_io_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot) 708 { 709 struct nvme_sqe_io *sqe = slot; 710 bus_dmamap_t dmap = ccb->ccb_dmamap; 711 712 sqe->opcode = ISSET(ccb->nnc_flags, NVME_NS_CTX_F_READ) ? 713 NVM_CMD_READ : NVM_CMD_WRITE; 714 htolem32(&sqe->nsid, ccb->nnc_nsid); 715 716 htolem64(&sqe->entry.prp[0], dmap->dm_segs[0].ds_addr); 717 switch (dmap->dm_nsegs) { 718 case 1: 719 break; 720 case 2: 721 htolem64(&sqe->entry.prp[1], dmap->dm_segs[1].ds_addr); 722 break; 723 default: 724 /* the prp list is already set up and synced */ 725 htolem64(&sqe->entry.prp[1], ccb->ccb_prpl_dva); 726 break; 727 } 728 729 htolem64(&sqe->slba, ccb->nnc_blkno); 730 731 if (ISSET(ccb->nnc_flags, NVME_NS_CTX_F_FUA)) 732 htolem16(&sqe->ioflags, NVM_SQE_IO_FUA); 733 734 /* guaranteed by upper layers, but check just in case */ 735 KASSERT((ccb->nnc_datasize % ccb->nnc_secsize) == 0); 736 htolem16(&sqe->nlb, (ccb->nnc_datasize / ccb->nnc_secsize) - 1); 737 } 738 739 static void 740 nvme_ns_io_done(struct nvme_queue *q, struct nvme_ccb *ccb, 741 struct nvme_cqe *cqe) 742 { 743 struct nvme_softc *sc = q->q_sc; 744 bus_dmamap_t dmap = ccb->ccb_dmamap; 745 void *nnc_cookie = ccb->ccb_cookie; 746 nvme_nnc_done nnc_done = ccb->nnc_done; 747 struct buf *bp = ccb->nnc_buf; 748 749 if (dmap->dm_nsegs > 2) { 750 bus_dmamap_sync(sc->sc_dmat, 751 NVME_DMA_MAP(q->q_ccb_prpls), 752 ccb->ccb_prpl_off, 753 sizeof(*ccb->ccb_prpl) * (dmap->dm_nsegs - 1), 754 BUS_DMASYNC_POSTWRITE); 755 } 756 757 bus_dmamap_sync(sc->sc_dmat, dmap, 0, dmap->dm_mapsize, 758 ISSET(ccb->nnc_flags, NVME_NS_CTX_F_READ) ? 759 BUS_DMASYNC_POSTREAD : BUS_DMASYNC_POSTWRITE); 760 761 bus_dmamap_unload(sc->sc_dmat, dmap); 762 nvme_ccb_put(q, ccb); 763 764 nnc_done(nnc_cookie, bp, lemtoh16(&cqe->flags), lemtoh32(&cqe->cdw0)); 765 } 766 767 /* 768 * If there is no volatile write cache, it makes no sense to issue 769 * flush commands or query for the status. 770 */ 771 static bool 772 nvme_has_volatile_write_cache(struct nvme_softc *sc) 773 { 774 /* sc_identify is filled during attachment */ 775 return ((sc->sc_identify.vwc & NVME_ID_CTRLR_VWC_PRESENT) != 0); 776 } 777 778 static bool 779 nvme_ns_sync_finished(void *cookie) 780 { 781 int *result = cookie; 782 783 return (*result != 0); 784 } 785 786 int 787 nvme_ns_sync(struct nvme_softc *sc, uint16_t nsid, int flags) 788 { 789 struct nvme_queue *q = nvme_get_q(sc); 790 struct nvme_ccb *ccb; 791 int result = 0; 792 793 if (!nvme_has_volatile_write_cache(sc)) { 794 /* cache not present, no value in trying to flush it */ 795 return 0; 796 } 797 798 ccb = nvme_ccb_get(q, true); 799 if (ccb == NULL) 800 return EAGAIN; 801 802 ccb->ccb_done = nvme_ns_sync_done; 803 ccb->ccb_cookie = &result; 804 805 /* namespace context */ 806 ccb->nnc_nsid = nsid; 807 ccb->nnc_flags = flags; 808 ccb->nnc_done = NULL; 809 810 if (ISSET(flags, NVME_NS_CTX_F_POLL)) { 811 if (nvme_poll(sc, q, ccb, nvme_ns_sync_fill, NVME_TIMO_SY) != 0) 812 return EIO; 813 return 0; 814 } 815 816 nvme_q_submit(sc, q, ccb, nvme_ns_sync_fill); 817 818 /* wait for completion */ 819 nvme_q_wait_complete(sc, q, nvme_ns_sync_finished, &result); 820 KASSERT(result != 0); 821 822 return (result > 0) ? 0 : EIO; 823 } 824 825 static void 826 nvme_ns_sync_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot) 827 { 828 struct nvme_sqe *sqe = slot; 829 830 sqe->opcode = NVM_CMD_FLUSH; 831 htolem32(&sqe->nsid, ccb->nnc_nsid); 832 } 833 834 static void 835 nvme_ns_sync_done(struct nvme_queue *q, struct nvme_ccb *ccb, 836 struct nvme_cqe *cqe) 837 { 838 int *result = ccb->ccb_cookie; 839 uint16_t status = NVME_CQE_SC(lemtoh16(&cqe->flags)); 840 841 if (status == NVME_CQE_SC_SUCCESS) 842 *result = 1; 843 else 844 *result = -1; 845 846 nvme_ccb_put(q, ccb); 847 } 848 849 static bool 850 nvme_getcache_finished(void *xc) 851 { 852 int *addr = xc; 853 854 return (*addr != 0); 855 } 856 857 /* 858 * Get status of volatile write cache. Always asynchronous. 859 */ 860 int 861 nvme_admin_getcache(struct nvme_softc *sc, int *addr) 862 { 863 struct nvme_ccb *ccb; 864 struct nvme_queue *q = sc->sc_admin_q; 865 int result = 0, error; 866 867 if (!nvme_has_volatile_write_cache(sc)) { 868 /* cache simply not present */ 869 *addr = 0; 870 return 0; 871 } 872 873 ccb = nvme_ccb_get(q, true); 874 KASSERT(ccb != NULL); 875 876 ccb->ccb_done = nvme_getcache_done; 877 ccb->ccb_cookie = &result; 878 879 /* namespace context */ 880 ccb->nnc_flags = 0; 881 ccb->nnc_done = NULL; 882 883 nvme_q_submit(sc, q, ccb, nvme_getcache_fill); 884 885 /* wait for completion */ 886 nvme_q_wait_complete(sc, q, nvme_getcache_finished, &result); 887 KASSERT(result != 0); 888 889 if (result > 0) { 890 *addr = result; 891 error = 0; 892 } else 893 error = EINVAL; 894 895 return error; 896 } 897 898 static void 899 nvme_getcache_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot) 900 { 901 struct nvme_sqe *sqe = slot; 902 903 sqe->opcode = NVM_ADMIN_GET_FEATURES; 904 htolem32(&sqe->cdw10, NVM_FEATURE_VOLATILE_WRITE_CACHE); 905 } 906 907 static void 908 nvme_getcache_done(struct nvme_queue *q, struct nvme_ccb *ccb, 909 struct nvme_cqe *cqe) 910 { 911 int *addr = ccb->ccb_cookie; 912 uint16_t status = NVME_CQE_SC(lemtoh16(&cqe->flags)); 913 uint32_t cdw0 = lemtoh32(&cqe->cdw0); 914 int result; 915 916 if (status == NVME_CQE_SC_SUCCESS) { 917 result = 0; 918 919 /* 920 * DPO not supported, Dataset Management (DSM) field doesn't 921 * specify the same semantics. FUA is always supported. 922 */ 923 result = DKCACHE_FUA; 924 925 if (cdw0 & NVME_CQE_CDW0_VWC_WCE) 926 result |= DKCACHE_WRITE; 927 928 /* 929 * If volatile write cache is present, the flag shall also be 930 * settable. 931 */ 932 result |= DKCACHE_WCHANGE; 933 } else { 934 result = -1; 935 } 936 937 *addr = result; 938 939 nvme_ccb_put(q, ccb); 940 } 941 942 void 943 nvme_ns_free(struct nvme_softc *sc, uint16_t nsid) 944 { 945 struct nvme_namespace *ns; 946 struct nvm_identify_namespace *identify; 947 948 ns = nvme_ns_get(sc, nsid); 949 KASSERT(ns); 950 951 identify = ns->ident; 952 ns->ident = NULL; 953 if (identify != NULL) 954 kmem_free(identify, sizeof(*identify)); 955 } 956 957 struct nvme_pt_state { 958 struct nvme_pt_command *pt; 959 bool finished; 960 }; 961 962 static void 963 nvme_pt_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot) 964 { 965 struct nvme_softc *sc = q->q_sc; 966 struct nvme_sqe *sqe = slot; 967 struct nvme_pt_state *state = ccb->ccb_cookie; 968 struct nvme_pt_command *pt = state->pt; 969 bus_dmamap_t dmap = ccb->ccb_dmamap; 970 int i; 971 972 sqe->opcode = pt->cmd.opcode; 973 htolem32(&sqe->nsid, pt->cmd.nsid); 974 975 if (pt->buf != NULL && pt->len > 0) { 976 htolem64(&sqe->entry.prp[0], dmap->dm_segs[0].ds_addr); 977 switch (dmap->dm_nsegs) { 978 case 1: 979 break; 980 case 2: 981 htolem64(&sqe->entry.prp[1], dmap->dm_segs[1].ds_addr); 982 break; 983 default: 984 for (i = 1; i < dmap->dm_nsegs; i++) { 985 htolem64(&ccb->ccb_prpl[i - 1], 986 dmap->dm_segs[i].ds_addr); 987 } 988 bus_dmamap_sync(sc->sc_dmat, 989 NVME_DMA_MAP(q->q_ccb_prpls), 990 ccb->ccb_prpl_off, 991 sizeof(*ccb->ccb_prpl) * (dmap->dm_nsegs - 1), 992 BUS_DMASYNC_PREWRITE); 993 htolem64(&sqe->entry.prp[1], ccb->ccb_prpl_dva); 994 break; 995 } 996 } 997 998 htolem32(&sqe->cdw10, pt->cmd.cdw10); 999 htolem32(&sqe->cdw11, pt->cmd.cdw11); 1000 htolem32(&sqe->cdw12, pt->cmd.cdw12); 1001 htolem32(&sqe->cdw13, pt->cmd.cdw13); 1002 htolem32(&sqe->cdw14, pt->cmd.cdw14); 1003 htolem32(&sqe->cdw15, pt->cmd.cdw15); 1004 } 1005 1006 static void 1007 nvme_pt_done(struct nvme_queue *q, struct nvme_ccb *ccb, struct nvme_cqe *cqe) 1008 { 1009 struct nvme_softc *sc = q->q_sc; 1010 struct nvme_pt_state *state = ccb->ccb_cookie; 1011 struct nvme_pt_command *pt = state->pt; 1012 bus_dmamap_t dmap = ccb->ccb_dmamap; 1013 1014 if (pt->buf != NULL && pt->len > 0) { 1015 if (dmap->dm_nsegs > 2) { 1016 bus_dmamap_sync(sc->sc_dmat, 1017 NVME_DMA_MAP(q->q_ccb_prpls), 1018 ccb->ccb_prpl_off, 1019 sizeof(*ccb->ccb_prpl) * (dmap->dm_nsegs - 1), 1020 BUS_DMASYNC_POSTWRITE); 1021 } 1022 1023 bus_dmamap_sync(sc->sc_dmat, dmap, 0, dmap->dm_mapsize, 1024 pt->is_read ? BUS_DMASYNC_POSTREAD : BUS_DMASYNC_POSTWRITE); 1025 bus_dmamap_unload(sc->sc_dmat, dmap); 1026 } 1027 1028 pt->cpl.cdw0 = lemtoh32(&cqe->cdw0); 1029 pt->cpl.flags = lemtoh16(&cqe->flags) & ~NVME_CQE_PHASE; 1030 1031 state->finished = true; 1032 1033 nvme_ccb_put(q, ccb); 1034 } 1035 1036 static bool 1037 nvme_pt_finished(void *cookie) 1038 { 1039 struct nvme_pt_state *state = cookie; 1040 1041 return state->finished; 1042 } 1043 1044 static int 1045 nvme_command_passthrough(struct nvme_softc *sc, struct nvme_pt_command *pt, 1046 uint16_t nsid, struct lwp *l, bool is_adminq) 1047 { 1048 struct nvme_queue *q; 1049 struct nvme_ccb *ccb; 1050 void *buf = NULL; 1051 struct nvme_pt_state state; 1052 int error; 1053 1054 /* limit command size to maximum data transfer size */ 1055 if ((pt->buf == NULL && pt->len > 0) || 1056 (pt->buf != NULL && (pt->len == 0 || pt->len > sc->sc_mdts))) 1057 return EINVAL; 1058 1059 q = is_adminq ? sc->sc_admin_q : nvme_get_q(sc); 1060 ccb = nvme_ccb_get(q, true); 1061 KASSERT(ccb != NULL); 1062 1063 if (pt->buf != NULL) { 1064 KASSERT(pt->len > 0); 1065 buf = kmem_alloc(pt->len, KM_SLEEP); 1066 if (!pt->is_read) { 1067 error = copyin(pt->buf, buf, pt->len); 1068 if (error) 1069 goto kmem_free; 1070 } 1071 error = bus_dmamap_load(sc->sc_dmat, ccb->ccb_dmamap, buf, 1072 pt->len, NULL, 1073 BUS_DMA_WAITOK | 1074 (pt->is_read ? BUS_DMA_READ : BUS_DMA_WRITE)); 1075 if (error) 1076 goto kmem_free; 1077 bus_dmamap_sync(sc->sc_dmat, ccb->ccb_dmamap, 1078 0, ccb->ccb_dmamap->dm_mapsize, 1079 pt->is_read ? BUS_DMASYNC_PREREAD : BUS_DMASYNC_PREWRITE); 1080 } 1081 1082 memset(&state, 0, sizeof(state)); 1083 state.pt = pt; 1084 state.finished = false; 1085 1086 ccb->ccb_done = nvme_pt_done; 1087 ccb->ccb_cookie = &state; 1088 1089 pt->cmd.nsid = nsid; 1090 1091 nvme_q_submit(sc, q, ccb, nvme_pt_fill); 1092 1093 /* wait for completion */ 1094 nvme_q_wait_complete(sc, q, nvme_pt_finished, &state); 1095 KASSERT(state.finished); 1096 1097 error = 0; 1098 1099 if (buf != NULL) { 1100 if (error == 0 && pt->is_read) 1101 error = copyout(buf, pt->buf, pt->len); 1102 kmem_free: 1103 kmem_free(buf, pt->len); 1104 } 1105 1106 return error; 1107 } 1108 1109 static void 1110 nvme_q_submit(struct nvme_softc *sc, struct nvme_queue *q, struct nvme_ccb *ccb, 1111 void (*fill)(struct nvme_queue *, struct nvme_ccb *, void *)) 1112 { 1113 struct nvme_sqe *sqe = NVME_DMA_KVA(q->q_sq_dmamem); 1114 uint32_t tail; 1115 1116 mutex_enter(&q->q_sq_mtx); 1117 tail = q->q_sq_tail; 1118 if (++q->q_sq_tail >= q->q_entries) 1119 q->q_sq_tail = 0; 1120 1121 sqe += tail; 1122 1123 bus_dmamap_sync(sc->sc_dmat, NVME_DMA_MAP(q->q_sq_dmamem), 1124 sizeof(*sqe) * tail, sizeof(*sqe), BUS_DMASYNC_POSTWRITE); 1125 memset(sqe, 0, sizeof(*sqe)); 1126 (*fill)(q, ccb, sqe); 1127 htolem16(&sqe->cid, ccb->ccb_id); 1128 bus_dmamap_sync(sc->sc_dmat, NVME_DMA_MAP(q->q_sq_dmamem), 1129 sizeof(*sqe) * tail, sizeof(*sqe), BUS_DMASYNC_PREWRITE); 1130 1131 nvme_write4(sc, q->q_sqtdbl, q->q_sq_tail); 1132 mutex_exit(&q->q_sq_mtx); 1133 } 1134 1135 struct nvme_poll_state { 1136 struct nvme_sqe s; 1137 struct nvme_cqe c; 1138 void *cookie; 1139 void (*done)(struct nvme_queue *, struct nvme_ccb *, struct nvme_cqe *); 1140 }; 1141 1142 static int 1143 nvme_poll(struct nvme_softc *sc, struct nvme_queue *q, struct nvme_ccb *ccb, 1144 void (*fill)(struct nvme_queue *, struct nvme_ccb *, void *), int timo_sec) 1145 { 1146 struct nvme_poll_state state; 1147 uint16_t flags; 1148 int step = 10; 1149 int maxloop = timo_sec * 1000000 / step; 1150 int error = 0; 1151 1152 memset(&state, 0, sizeof(state)); 1153 (*fill)(q, ccb, &state.s); 1154 1155 state.done = ccb->ccb_done; 1156 state.cookie = ccb->ccb_cookie; 1157 1158 ccb->ccb_done = nvme_poll_done; 1159 ccb->ccb_cookie = &state; 1160 1161 nvme_q_submit(sc, q, ccb, nvme_poll_fill); 1162 while (!ISSET(state.c.flags, htole16(NVME_CQE_PHASE))) { 1163 if (nvme_q_complete(sc, q) == 0) 1164 delay(step); 1165 1166 if (timo_sec >= 0 && --maxloop <= 0) { 1167 error = ETIMEDOUT; 1168 break; 1169 } 1170 } 1171 1172 if (error == 0) { 1173 flags = lemtoh16(&state.c.flags); 1174 return flags & ~NVME_CQE_PHASE; 1175 } else { 1176 /* 1177 * If it succeds later, it would hit ccb which will have been 1178 * already reused for something else. Not good. Cross 1179 * fingers and hope for best. XXX do controller reset? 1180 */ 1181 aprint_error_dev(sc->sc_dev, "polled command timed out\n"); 1182 1183 /* Invoke the callback to clean state anyway */ 1184 struct nvme_cqe cqe; 1185 memset(&cqe, 0, sizeof(cqe)); 1186 ccb->ccb_done(q, ccb, &cqe); 1187 1188 return 1; 1189 } 1190 } 1191 1192 static void 1193 nvme_poll_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot) 1194 { 1195 struct nvme_sqe *sqe = slot; 1196 struct nvme_poll_state *state = ccb->ccb_cookie; 1197 1198 *sqe = state->s; 1199 } 1200 1201 static void 1202 nvme_poll_done(struct nvme_queue *q, struct nvme_ccb *ccb, 1203 struct nvme_cqe *cqe) 1204 { 1205 struct nvme_poll_state *state = ccb->ccb_cookie; 1206 1207 SET(cqe->flags, htole16(NVME_CQE_PHASE)); 1208 state->c = *cqe; 1209 1210 ccb->ccb_cookie = state->cookie; 1211 state->done(q, ccb, &state->c); 1212 } 1213 1214 static void 1215 nvme_sqe_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot) 1216 { 1217 struct nvme_sqe *src = ccb->ccb_cookie; 1218 struct nvme_sqe *dst = slot; 1219 1220 *dst = *src; 1221 } 1222 1223 static void 1224 nvme_empty_done(struct nvme_queue *q, struct nvme_ccb *ccb, 1225 struct nvme_cqe *cqe) 1226 { 1227 } 1228 1229 static int 1230 nvme_q_complete(struct nvme_softc *sc, struct nvme_queue *q) 1231 { 1232 struct nvme_ccb *ccb; 1233 struct nvme_cqe *ring = NVME_DMA_KVA(q->q_cq_dmamem), *cqe; 1234 uint16_t flags; 1235 int rv = 0; 1236 1237 mutex_enter(&q->q_cq_mtx); 1238 1239 nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_POSTREAD); 1240 for (;;) { 1241 cqe = &ring[q->q_cq_head]; 1242 flags = lemtoh16(&cqe->flags); 1243 if ((flags & NVME_CQE_PHASE) != q->q_cq_phase) 1244 break; 1245 1246 ccb = &q->q_ccbs[cqe->cid]; 1247 1248 if (++q->q_cq_head >= q->q_entries) { 1249 q->q_cq_head = 0; 1250 q->q_cq_phase ^= NVME_CQE_PHASE; 1251 } 1252 1253 #ifdef DEBUG 1254 /* 1255 * If we get spurious completion notification, something 1256 * is seriously hosed up. Very likely DMA to some random 1257 * memory place happened, so just bail out. 1258 */ 1259 if ((intptr_t)ccb->ccb_cookie == NVME_CCB_FREE) { 1260 panic("%s: invalid ccb detected", 1261 device_xname(sc->sc_dev)); 1262 /* NOTREACHED */ 1263 } 1264 #endif 1265 1266 rv++; 1267 1268 /* 1269 * Unlock the mutex before calling the ccb_done callback 1270 * and re-lock afterwards. The callback triggers lddone() 1271 * which schedules another i/o, and also calls nvme_ccb_put(). 1272 * Unlock/relock avoids possibility of deadlock. 1273 */ 1274 mutex_exit(&q->q_cq_mtx); 1275 ccb->ccb_done(q, ccb, cqe); 1276 mutex_enter(&q->q_cq_mtx); 1277 } 1278 nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_PREREAD); 1279 1280 if (rv) 1281 nvme_write4(sc, q->q_cqhdbl, q->q_cq_head); 1282 1283 mutex_exit(&q->q_cq_mtx); 1284 1285 return rv; 1286 } 1287 1288 static void 1289 nvme_q_wait_complete(struct nvme_softc *sc, 1290 struct nvme_queue *q, bool (*finished)(void *), void *cookie) 1291 { 1292 mutex_enter(&q->q_ccb_mtx); 1293 if (finished(cookie)) 1294 goto out; 1295 1296 for(;;) { 1297 q->q_ccb_waiting = true; 1298 cv_wait(&q->q_ccb_wait, &q->q_ccb_mtx); 1299 1300 if (finished(cookie)) 1301 break; 1302 } 1303 1304 out: 1305 mutex_exit(&q->q_ccb_mtx); 1306 } 1307 1308 static int 1309 nvme_identify(struct nvme_softc *sc, u_int mps) 1310 { 1311 char sn[41], mn[81], fr[17]; 1312 struct nvm_identify_controller *identify; 1313 struct nvme_dmamem *mem; 1314 struct nvme_ccb *ccb; 1315 u_int mdts; 1316 int rv = 1; 1317 1318 ccb = nvme_ccb_get(sc->sc_admin_q, false); 1319 KASSERT(ccb != NULL); /* it's a bug if we don't have spare ccb here */ 1320 1321 mem = nvme_dmamem_alloc(sc, sizeof(*identify)); 1322 if (mem == NULL) 1323 return 1; 1324 1325 ccb->ccb_done = nvme_empty_done; 1326 ccb->ccb_cookie = mem; 1327 1328 nvme_dmamem_sync(sc, mem, BUS_DMASYNC_PREREAD); 1329 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_fill_identify, 1330 NVME_TIMO_IDENT); 1331 nvme_dmamem_sync(sc, mem, BUS_DMASYNC_POSTREAD); 1332 1333 nvme_ccb_put(sc->sc_admin_q, ccb); 1334 1335 if (rv != 0) 1336 goto done; 1337 1338 identify = NVME_DMA_KVA(mem); 1339 sc->sc_identify = *identify; 1340 identify = NULL; 1341 1342 /* Convert data to host endian */ 1343 nvme_identify_controller_swapbytes(&sc->sc_identify); 1344 1345 strnvisx(sn, sizeof(sn), (const char *)sc->sc_identify.sn, 1346 sizeof(sc->sc_identify.sn), VIS_TRIM|VIS_SAFE|VIS_OCTAL); 1347 strnvisx(mn, sizeof(mn), (const char *)sc->sc_identify.mn, 1348 sizeof(sc->sc_identify.mn), VIS_TRIM|VIS_SAFE|VIS_OCTAL); 1349 strnvisx(fr, sizeof(fr), (const char *)sc->sc_identify.fr, 1350 sizeof(sc->sc_identify.fr), VIS_TRIM|VIS_SAFE|VIS_OCTAL); 1351 aprint_normal_dev(sc->sc_dev, "%s, firmware %s, serial %s\n", mn, fr, 1352 sn); 1353 1354 if (sc->sc_identify.mdts > 0) { 1355 mdts = (1 << sc->sc_identify.mdts) * (1 << mps); 1356 if (mdts < sc->sc_mdts) 1357 sc->sc_mdts = mdts; 1358 } 1359 1360 sc->sc_nn = sc->sc_identify.nn; 1361 1362 done: 1363 nvme_dmamem_free(sc, mem); 1364 1365 return rv; 1366 } 1367 1368 static int 1369 nvme_q_create(struct nvme_softc *sc, struct nvme_queue *q) 1370 { 1371 struct nvme_sqe_q sqe; 1372 struct nvme_ccb *ccb; 1373 int rv; 1374 1375 if (sc->sc_use_mq && sc->sc_intr_establish(sc, q->q_id, q) != 0) 1376 return 1; 1377 1378 ccb = nvme_ccb_get(sc->sc_admin_q, false); 1379 KASSERT(ccb != NULL); 1380 1381 ccb->ccb_done = nvme_empty_done; 1382 ccb->ccb_cookie = &sqe; 1383 1384 memset(&sqe, 0, sizeof(sqe)); 1385 sqe.opcode = NVM_ADMIN_ADD_IOCQ; 1386 htolem64(&sqe.prp1, NVME_DMA_DVA(q->q_cq_dmamem)); 1387 htolem16(&sqe.qsize, q->q_entries - 1); 1388 htolem16(&sqe.qid, q->q_id); 1389 sqe.qflags = NVM_SQE_CQ_IEN | NVM_SQE_Q_PC; 1390 if (sc->sc_use_mq) 1391 htolem16(&sqe.cqid, q->q_id); /* qid == vector */ 1392 1393 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_QOP); 1394 if (rv != 0) 1395 goto fail; 1396 1397 ccb->ccb_done = nvme_empty_done; 1398 ccb->ccb_cookie = &sqe; 1399 1400 memset(&sqe, 0, sizeof(sqe)); 1401 sqe.opcode = NVM_ADMIN_ADD_IOSQ; 1402 htolem64(&sqe.prp1, NVME_DMA_DVA(q->q_sq_dmamem)); 1403 htolem16(&sqe.qsize, q->q_entries - 1); 1404 htolem16(&sqe.qid, q->q_id); 1405 htolem16(&sqe.cqid, q->q_id); 1406 sqe.qflags = NVM_SQE_Q_PC; 1407 1408 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_QOP); 1409 if (rv != 0) 1410 goto fail; 1411 1412 fail: 1413 nvme_ccb_put(sc->sc_admin_q, ccb); 1414 return rv; 1415 } 1416 1417 static int 1418 nvme_q_delete(struct nvme_softc *sc, struct nvme_queue *q) 1419 { 1420 struct nvme_sqe_q sqe; 1421 struct nvme_ccb *ccb; 1422 int rv; 1423 1424 ccb = nvme_ccb_get(sc->sc_admin_q, false); 1425 KASSERT(ccb != NULL); 1426 1427 ccb->ccb_done = nvme_empty_done; 1428 ccb->ccb_cookie = &sqe; 1429 1430 memset(&sqe, 0, sizeof(sqe)); 1431 sqe.opcode = NVM_ADMIN_DEL_IOSQ; 1432 htolem16(&sqe.qid, q->q_id); 1433 1434 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_QOP); 1435 if (rv != 0) 1436 goto fail; 1437 1438 ccb->ccb_done = nvme_empty_done; 1439 ccb->ccb_cookie = &sqe; 1440 1441 memset(&sqe, 0, sizeof(sqe)); 1442 sqe.opcode = NVM_ADMIN_DEL_IOCQ; 1443 htolem16(&sqe.qid, q->q_id); 1444 1445 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_QOP); 1446 if (rv != 0) 1447 goto fail; 1448 1449 fail: 1450 nvme_ccb_put(sc->sc_admin_q, ccb); 1451 1452 if (rv == 0 && sc->sc_use_mq) { 1453 if (sc->sc_intr_disestablish(sc, q->q_id)) 1454 rv = 1; 1455 } 1456 1457 return rv; 1458 } 1459 1460 static void 1461 nvme_fill_identify(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot) 1462 { 1463 struct nvme_sqe *sqe = slot; 1464 struct nvme_dmamem *mem = ccb->ccb_cookie; 1465 1466 sqe->opcode = NVM_ADMIN_IDENTIFY; 1467 htolem64(&sqe->entry.prp[0], NVME_DMA_DVA(mem)); 1468 htolem32(&sqe->cdw10, 1); 1469 } 1470 1471 static int 1472 nvme_get_number_of_queues(struct nvme_softc *sc, u_int *nqap) 1473 { 1474 struct nvme_pt_state state; 1475 struct nvme_pt_command pt; 1476 struct nvme_ccb *ccb; 1477 uint16_t ncqa, nsqa; 1478 int rv; 1479 1480 ccb = nvme_ccb_get(sc->sc_admin_q, false); 1481 KASSERT(ccb != NULL); /* it's a bug if we don't have spare ccb here */ 1482 1483 memset(&pt, 0, sizeof(pt)); 1484 pt.cmd.opcode = NVM_ADMIN_GET_FEATURES; 1485 pt.cmd.cdw10 = NVM_FEATURE_NUMBER_OF_QUEUES; 1486 1487 memset(&state, 0, sizeof(state)); 1488 state.pt = &pt; 1489 state.finished = false; 1490 1491 ccb->ccb_done = nvme_pt_done; 1492 ccb->ccb_cookie = &state; 1493 1494 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_pt_fill, NVME_TIMO_QOP); 1495 1496 if (rv != 0) { 1497 *nqap = 0; 1498 return EIO; 1499 } 1500 1501 ncqa = pt.cpl.cdw0 >> 16; 1502 nsqa = pt.cpl.cdw0 & 0xffff; 1503 *nqap = MIN(ncqa, nsqa) + 1; 1504 1505 return 0; 1506 } 1507 1508 static int 1509 nvme_ccbs_alloc(struct nvme_queue *q, uint16_t nccbs) 1510 { 1511 struct nvme_softc *sc = q->q_sc; 1512 struct nvme_ccb *ccb; 1513 bus_addr_t off; 1514 uint64_t *prpl; 1515 u_int i; 1516 1517 mutex_init(&q->q_ccb_mtx, MUTEX_DEFAULT, IPL_BIO); 1518 cv_init(&q->q_ccb_wait, "nvmeqw"); 1519 q->q_ccb_waiting = false; 1520 SIMPLEQ_INIT(&q->q_ccb_list); 1521 1522 q->q_ccbs = kmem_alloc(sizeof(*ccb) * nccbs, KM_SLEEP); 1523 1524 q->q_nccbs = nccbs; 1525 q->q_ccb_prpls = nvme_dmamem_alloc(sc, 1526 sizeof(*prpl) * sc->sc_max_sgl * nccbs); 1527 1528 prpl = NVME_DMA_KVA(q->q_ccb_prpls); 1529 off = 0; 1530 1531 for (i = 0; i < nccbs; i++) { 1532 ccb = &q->q_ccbs[i]; 1533 1534 if (bus_dmamap_create(sc->sc_dmat, sc->sc_mdts, 1535 sc->sc_max_sgl + 1 /* we get a free prp in the sqe */, 1536 sc->sc_mps, sc->sc_mps, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, 1537 &ccb->ccb_dmamap) != 0) 1538 goto free_maps; 1539 1540 ccb->ccb_id = i; 1541 ccb->ccb_prpl = prpl; 1542 ccb->ccb_prpl_off = off; 1543 ccb->ccb_prpl_dva = NVME_DMA_DVA(q->q_ccb_prpls) + off; 1544 1545 SIMPLEQ_INSERT_TAIL(&q->q_ccb_list, ccb, ccb_entry); 1546 1547 prpl += sc->sc_max_sgl; 1548 off += sizeof(*prpl) * sc->sc_max_sgl; 1549 } 1550 1551 return 0; 1552 1553 free_maps: 1554 nvme_ccbs_free(q); 1555 return 1; 1556 } 1557 1558 static struct nvme_ccb * 1559 nvme_ccb_get(struct nvme_queue *q, bool wait) 1560 { 1561 struct nvme_ccb *ccb = NULL; 1562 1563 mutex_enter(&q->q_ccb_mtx); 1564 again: 1565 ccb = SIMPLEQ_FIRST(&q->q_ccb_list); 1566 if (ccb != NULL) { 1567 SIMPLEQ_REMOVE_HEAD(&q->q_ccb_list, ccb_entry); 1568 #ifdef DEBUG 1569 ccb->ccb_cookie = NULL; 1570 #endif 1571 } else { 1572 if (__predict_false(wait)) { 1573 q->q_ccb_waiting = true; 1574 cv_wait(&q->q_ccb_wait, &q->q_ccb_mtx); 1575 goto again; 1576 } 1577 } 1578 mutex_exit(&q->q_ccb_mtx); 1579 1580 return ccb; 1581 } 1582 1583 static void 1584 nvme_ccb_put(struct nvme_queue *q, struct nvme_ccb *ccb) 1585 { 1586 1587 mutex_enter(&q->q_ccb_mtx); 1588 #ifdef DEBUG 1589 ccb->ccb_cookie = (void *)NVME_CCB_FREE; 1590 #endif 1591 SIMPLEQ_INSERT_HEAD(&q->q_ccb_list, ccb, ccb_entry); 1592 1593 /* It's unlikely there are any waiters, it's not used for regular I/O */ 1594 if (__predict_false(q->q_ccb_waiting)) { 1595 q->q_ccb_waiting = false; 1596 cv_broadcast(&q->q_ccb_wait); 1597 } 1598 1599 mutex_exit(&q->q_ccb_mtx); 1600 } 1601 1602 static void 1603 nvme_ccbs_free(struct nvme_queue *q) 1604 { 1605 struct nvme_softc *sc = q->q_sc; 1606 struct nvme_ccb *ccb; 1607 1608 mutex_enter(&q->q_ccb_mtx); 1609 while ((ccb = SIMPLEQ_FIRST(&q->q_ccb_list)) != NULL) { 1610 SIMPLEQ_REMOVE_HEAD(&q->q_ccb_list, ccb_entry); 1611 bus_dmamap_destroy(sc->sc_dmat, ccb->ccb_dmamap); 1612 } 1613 mutex_exit(&q->q_ccb_mtx); 1614 1615 nvme_dmamem_free(sc, q->q_ccb_prpls); 1616 kmem_free(q->q_ccbs, sizeof(*ccb) * q->q_nccbs); 1617 q->q_ccbs = NULL; 1618 cv_destroy(&q->q_ccb_wait); 1619 mutex_destroy(&q->q_ccb_mtx); 1620 } 1621 1622 static struct nvme_queue * 1623 nvme_q_alloc(struct nvme_softc *sc, uint16_t id, u_int entries, u_int dstrd) 1624 { 1625 struct nvme_queue *q; 1626 1627 q = kmem_alloc(sizeof(*q), KM_SLEEP); 1628 q->q_sc = sc; 1629 q->q_sq_dmamem = nvme_dmamem_alloc(sc, 1630 sizeof(struct nvme_sqe) * entries); 1631 if (q->q_sq_dmamem == NULL) 1632 goto free; 1633 1634 q->q_cq_dmamem = nvme_dmamem_alloc(sc, 1635 sizeof(struct nvme_cqe) * entries); 1636 if (q->q_cq_dmamem == NULL) 1637 goto free_sq; 1638 1639 memset(NVME_DMA_KVA(q->q_sq_dmamem), 0, NVME_DMA_LEN(q->q_sq_dmamem)); 1640 memset(NVME_DMA_KVA(q->q_cq_dmamem), 0, NVME_DMA_LEN(q->q_cq_dmamem)); 1641 1642 mutex_init(&q->q_sq_mtx, MUTEX_DEFAULT, IPL_BIO); 1643 mutex_init(&q->q_cq_mtx, MUTEX_DEFAULT, IPL_BIO); 1644 q->q_sqtdbl = NVME_SQTDBL(id, dstrd); 1645 q->q_cqhdbl = NVME_CQHDBL(id, dstrd); 1646 q->q_id = id; 1647 q->q_entries = entries; 1648 q->q_sq_tail = 0; 1649 q->q_cq_head = 0; 1650 q->q_cq_phase = NVME_CQE_PHASE; 1651 1652 nvme_dmamem_sync(sc, q->q_sq_dmamem, BUS_DMASYNC_PREWRITE); 1653 nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_PREREAD); 1654 1655 /* 1656 * Due to definition of full and empty queue (queue is empty 1657 * when head == tail, full when tail is one less then head), 1658 * we can actually only have (entries - 1) in-flight commands. 1659 */ 1660 if (nvme_ccbs_alloc(q, entries - 1) != 0) { 1661 aprint_error_dev(sc->sc_dev, "unable to allocate ccbs\n"); 1662 goto free_cq; 1663 } 1664 1665 return q; 1666 1667 free_cq: 1668 nvme_dmamem_free(sc, q->q_cq_dmamem); 1669 free_sq: 1670 nvme_dmamem_free(sc, q->q_sq_dmamem); 1671 free: 1672 kmem_free(q, sizeof(*q)); 1673 1674 return NULL; 1675 } 1676 1677 static void 1678 nvme_q_free(struct nvme_softc *sc, struct nvme_queue *q) 1679 { 1680 nvme_ccbs_free(q); 1681 mutex_destroy(&q->q_sq_mtx); 1682 mutex_destroy(&q->q_cq_mtx); 1683 nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_POSTREAD); 1684 nvme_dmamem_sync(sc, q->q_sq_dmamem, BUS_DMASYNC_POSTWRITE); 1685 nvme_dmamem_free(sc, q->q_cq_dmamem); 1686 nvme_dmamem_free(sc, q->q_sq_dmamem); 1687 kmem_free(q, sizeof(*q)); 1688 } 1689 1690 int 1691 nvme_intr(void *xsc) 1692 { 1693 struct nvme_softc *sc = xsc; 1694 1695 /* 1696 * INTx is level triggered, controller deasserts the interrupt only 1697 * when we advance command queue head via write to the doorbell. 1698 * Tell the controller to block the interrupts while we process 1699 * the queue(s). 1700 */ 1701 nvme_write4(sc, NVME_INTMS, 1); 1702 1703 softint_schedule(sc->sc_softih[0]); 1704 1705 /* don't know, might not have been for us */ 1706 return 1; 1707 } 1708 1709 void 1710 nvme_softintr_intx(void *xq) 1711 { 1712 struct nvme_queue *q = xq; 1713 struct nvme_softc *sc = q->q_sc; 1714 1715 nvme_q_complete(sc, sc->sc_admin_q); 1716 if (sc->sc_q != NULL) 1717 nvme_q_complete(sc, sc->sc_q[0]); 1718 1719 /* 1720 * Processing done, tell controller to issue interrupts again. There 1721 * is no race, as NVMe spec requires the controller to maintain state, 1722 * and assert the interrupt whenever there are unacknowledged 1723 * completion queue entries. 1724 */ 1725 nvme_write4(sc, NVME_INTMC, 1); 1726 } 1727 1728 int 1729 nvme_intr_msi(void *xq) 1730 { 1731 struct nvme_queue *q = xq; 1732 1733 KASSERT(q && q->q_sc && q->q_sc->sc_softih 1734 && q->q_sc->sc_softih[q->q_id]); 1735 1736 /* 1737 * MSI/MSI-X are edge triggered, so can handover processing to softint 1738 * without masking the interrupt. 1739 */ 1740 softint_schedule(q->q_sc->sc_softih[q->q_id]); 1741 1742 return 1; 1743 } 1744 1745 void 1746 nvme_softintr_msi(void *xq) 1747 { 1748 struct nvme_queue *q = xq; 1749 struct nvme_softc *sc = q->q_sc; 1750 1751 nvme_q_complete(sc, q); 1752 } 1753 1754 static struct nvme_dmamem * 1755 nvme_dmamem_alloc(struct nvme_softc *sc, size_t size) 1756 { 1757 struct nvme_dmamem *ndm; 1758 int nsegs; 1759 1760 ndm = kmem_zalloc(sizeof(*ndm), KM_SLEEP); 1761 if (ndm == NULL) 1762 return NULL; 1763 1764 ndm->ndm_size = size; 1765 1766 if (bus_dmamap_create(sc->sc_dmat, size, 1, size, 0, 1767 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, &ndm->ndm_map) != 0) 1768 goto ndmfree; 1769 1770 if (bus_dmamem_alloc(sc->sc_dmat, size, sc->sc_mps, 0, &ndm->ndm_seg, 1771 1, &nsegs, BUS_DMA_WAITOK) != 0) 1772 goto destroy; 1773 1774 if (bus_dmamem_map(sc->sc_dmat, &ndm->ndm_seg, nsegs, size, 1775 &ndm->ndm_kva, BUS_DMA_WAITOK) != 0) 1776 goto free; 1777 memset(ndm->ndm_kva, 0, size); 1778 1779 if (bus_dmamap_load(sc->sc_dmat, ndm->ndm_map, ndm->ndm_kva, size, 1780 NULL, BUS_DMA_WAITOK) != 0) 1781 goto unmap; 1782 1783 return ndm; 1784 1785 unmap: 1786 bus_dmamem_unmap(sc->sc_dmat, ndm->ndm_kva, size); 1787 free: 1788 bus_dmamem_free(sc->sc_dmat, &ndm->ndm_seg, 1); 1789 destroy: 1790 bus_dmamap_destroy(sc->sc_dmat, ndm->ndm_map); 1791 ndmfree: 1792 kmem_free(ndm, sizeof(*ndm)); 1793 return NULL; 1794 } 1795 1796 static void 1797 nvme_dmamem_sync(struct nvme_softc *sc, struct nvme_dmamem *mem, int ops) 1798 { 1799 bus_dmamap_sync(sc->sc_dmat, NVME_DMA_MAP(mem), 1800 0, NVME_DMA_LEN(mem), ops); 1801 } 1802 1803 void 1804 nvme_dmamem_free(struct nvme_softc *sc, struct nvme_dmamem *ndm) 1805 { 1806 bus_dmamap_unload(sc->sc_dmat, ndm->ndm_map); 1807 bus_dmamem_unmap(sc->sc_dmat, ndm->ndm_kva, ndm->ndm_size); 1808 bus_dmamem_free(sc->sc_dmat, &ndm->ndm_seg, 1); 1809 bus_dmamap_destroy(sc->sc_dmat, ndm->ndm_map); 1810 kmem_free(ndm, sizeof(*ndm)); 1811 } 1812 1813 /* 1814 * ioctl 1815 */ 1816 1817 dev_type_open(nvmeopen); 1818 dev_type_close(nvmeclose); 1819 dev_type_ioctl(nvmeioctl); 1820 1821 const struct cdevsw nvme_cdevsw = { 1822 .d_open = nvmeopen, 1823 .d_close = nvmeclose, 1824 .d_read = noread, 1825 .d_write = nowrite, 1826 .d_ioctl = nvmeioctl, 1827 .d_stop = nostop, 1828 .d_tty = notty, 1829 .d_poll = nopoll, 1830 .d_mmap = nommap, 1831 .d_kqfilter = nokqfilter, 1832 .d_discard = nodiscard, 1833 .d_flag = D_OTHER, 1834 }; 1835 1836 /* 1837 * Accept an open operation on the control device. 1838 */ 1839 int 1840 nvmeopen(dev_t dev, int flag, int mode, struct lwp *l) 1841 { 1842 struct nvme_softc *sc; 1843 int unit = minor(dev) / 0x10000; 1844 int nsid = minor(dev) & 0xffff; 1845 int nsidx; 1846 1847 if ((sc = device_lookup_private(&nvme_cd, unit)) == NULL) 1848 return ENXIO; 1849 if ((sc->sc_flags & NVME_F_ATTACHED) == 0) 1850 return ENXIO; 1851 1852 if (nsid == 0) { 1853 /* controller */ 1854 if (ISSET(sc->sc_flags, NVME_F_OPEN)) 1855 return EBUSY; 1856 SET(sc->sc_flags, NVME_F_OPEN); 1857 } else { 1858 /* namespace */ 1859 nsidx = nsid - 1; 1860 if (nsidx >= sc->sc_nn || sc->sc_namespaces[nsidx].dev == NULL) 1861 return ENXIO; 1862 if (ISSET(sc->sc_namespaces[nsidx].flags, NVME_NS_F_OPEN)) 1863 return EBUSY; 1864 SET(sc->sc_namespaces[nsidx].flags, NVME_NS_F_OPEN); 1865 } 1866 return 0; 1867 } 1868 1869 /* 1870 * Accept the last close on the control device. 1871 */ 1872 int 1873 nvmeclose(dev_t dev, int flag, int mode, struct lwp *l) 1874 { 1875 struct nvme_softc *sc; 1876 int unit = minor(dev) / 0x10000; 1877 int nsid = minor(dev) & 0xffff; 1878 int nsidx; 1879 1880 sc = device_lookup_private(&nvme_cd, unit); 1881 if (sc == NULL) 1882 return ENXIO; 1883 1884 if (nsid == 0) { 1885 /* controller */ 1886 CLR(sc->sc_flags, NVME_F_OPEN); 1887 } else { 1888 /* namespace */ 1889 nsidx = nsid - 1; 1890 if (nsidx >= sc->sc_nn) 1891 return ENXIO; 1892 CLR(sc->sc_namespaces[nsidx].flags, NVME_NS_F_OPEN); 1893 } 1894 1895 return 0; 1896 } 1897 1898 /* 1899 * Handle control operations. 1900 */ 1901 int 1902 nvmeioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) 1903 { 1904 struct nvme_softc *sc; 1905 int unit = minor(dev) / 0x10000; 1906 int nsid = minor(dev) & 0xffff; 1907 struct nvme_pt_command *pt; 1908 1909 sc = device_lookup_private(&nvme_cd, unit); 1910 if (sc == NULL) 1911 return ENXIO; 1912 1913 switch (cmd) { 1914 case NVME_PASSTHROUGH_CMD: 1915 pt = data; 1916 return nvme_command_passthrough(sc, data, 1917 nsid == 0 ? pt->cmd.nsid : nsid, l, nsid == 0); 1918 } 1919 1920 return ENOTTY; 1921 } 1922