1 /* $NetBSD: nvme.c,v 1.22 2016/11/01 14:46:31 jdolecek Exp $ */ 2 /* $OpenBSD: nvme.c,v 1.49 2016/04/18 05:59:50 dlg Exp $ */ 3 4 /* 5 * Copyright (c) 2014 David Gwynne <dlg@openbsd.org> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 20 #include <sys/cdefs.h> 21 __KERNEL_RCSID(0, "$NetBSD: nvme.c,v 1.22 2016/11/01 14:46:31 jdolecek Exp $"); 22 23 #include <sys/param.h> 24 #include <sys/systm.h> 25 #include <sys/kernel.h> 26 #include <sys/atomic.h> 27 #include <sys/bus.h> 28 #include <sys/buf.h> 29 #include <sys/conf.h> 30 #include <sys/device.h> 31 #include <sys/kmem.h> 32 #include <sys/once.h> 33 #include <sys/proc.h> 34 #include <sys/queue.h> 35 #include <sys/mutex.h> 36 37 #include <uvm/uvm_extern.h> 38 39 #include <dev/ic/nvmereg.h> 40 #include <dev/ic/nvmevar.h> 41 #include <dev/ic/nvmeio.h> 42 43 int nvme_adminq_size = 32; 44 int nvme_ioq_size = 1024; 45 46 static int nvme_print(void *, const char *); 47 48 static int nvme_ready(struct nvme_softc *, uint32_t); 49 static int nvme_enable(struct nvme_softc *, u_int); 50 static int nvme_disable(struct nvme_softc *); 51 static int nvme_shutdown(struct nvme_softc *); 52 53 static void nvme_version(struct nvme_softc *, uint32_t); 54 #ifdef NVME_DEBUG 55 static void nvme_dumpregs(struct nvme_softc *); 56 #endif 57 static int nvme_identify(struct nvme_softc *, u_int); 58 static void nvme_fill_identify(struct nvme_queue *, struct nvme_ccb *, 59 void *); 60 61 static int nvme_ccbs_alloc(struct nvme_queue *, uint16_t); 62 static void nvme_ccbs_free(struct nvme_queue *); 63 64 static struct nvme_ccb * 65 nvme_ccb_get(struct nvme_queue *); 66 static void nvme_ccb_put(struct nvme_queue *, struct nvme_ccb *); 67 68 static int nvme_poll(struct nvme_softc *, struct nvme_queue *, 69 struct nvme_ccb *, void (*)(struct nvme_queue *, 70 struct nvme_ccb *, void *), int); 71 static void nvme_poll_fill(struct nvme_queue *, struct nvme_ccb *, void *); 72 static void nvme_poll_done(struct nvme_queue *, struct nvme_ccb *, 73 struct nvme_cqe *); 74 static void nvme_sqe_fill(struct nvme_queue *, struct nvme_ccb *, void *); 75 static void nvme_empty_done(struct nvme_queue *, struct nvme_ccb *, 76 struct nvme_cqe *); 77 78 static struct nvme_queue * 79 nvme_q_alloc(struct nvme_softc *, uint16_t, u_int, u_int); 80 static int nvme_q_create(struct nvme_softc *, struct nvme_queue *); 81 static int nvme_q_delete(struct nvme_softc *, struct nvme_queue *); 82 static void nvme_q_submit(struct nvme_softc *, struct nvme_queue *, 83 struct nvme_ccb *, void (*)(struct nvme_queue *, 84 struct nvme_ccb *, void *)); 85 static int nvme_q_complete(struct nvme_softc *, struct nvme_queue *q); 86 static void nvme_q_free(struct nvme_softc *, struct nvme_queue *); 87 88 static struct nvme_dmamem * 89 nvme_dmamem_alloc(struct nvme_softc *, size_t); 90 static void nvme_dmamem_free(struct nvme_softc *, struct nvme_dmamem *); 91 static void nvme_dmamem_sync(struct nvme_softc *, struct nvme_dmamem *, 92 int); 93 94 static void nvme_ns_io_fill(struct nvme_queue *, struct nvme_ccb *, 95 void *); 96 static void nvme_ns_io_done(struct nvme_queue *, struct nvme_ccb *, 97 struct nvme_cqe *); 98 static void nvme_ns_sync_fill(struct nvme_queue *, struct nvme_ccb *, 99 void *); 100 static void nvme_ns_sync_done(struct nvme_queue *, struct nvme_ccb *, 101 struct nvme_cqe *); 102 103 static void nvme_pt_fill(struct nvme_queue *, struct nvme_ccb *, 104 void *); 105 static void nvme_pt_done(struct nvme_queue *, struct nvme_ccb *, 106 struct nvme_cqe *); 107 static int nvme_command_passthrough(struct nvme_softc *, 108 struct nvme_pt_command *, uint16_t, struct lwp *, bool); 109 110 #define NVME_TIMO_QOP 5 /* queue create and delete timeout */ 111 #define NVME_TIMO_IDENT 10 /* probe identify timeout */ 112 #define NVME_TIMO_PT -1 /* passthrough cmd timeout */ 113 #define NVME_TIMO_SY 60 /* sync cache timeout */ 114 115 #define nvme_read4(_s, _r) \ 116 bus_space_read_4((_s)->sc_iot, (_s)->sc_ioh, (_r)) 117 #define nvme_write4(_s, _r, _v) \ 118 bus_space_write_4((_s)->sc_iot, (_s)->sc_ioh, (_r), (_v)) 119 #ifdef __LP64__ 120 #define nvme_read8(_s, _r) \ 121 bus_space_read_8((_s)->sc_iot, (_s)->sc_ioh, (_r)) 122 #define nvme_write8(_s, _r, _v) \ 123 bus_space_write_8((_s)->sc_iot, (_s)->sc_ioh, (_r), (_v)) 124 #else /* __LP64__ */ 125 static inline uint64_t 126 nvme_read8(struct nvme_softc *sc, bus_size_t r) 127 { 128 uint64_t v; 129 uint32_t *a = (uint32_t *)&v; 130 131 #if _BYTE_ORDER == _LITTLE_ENDIAN 132 a[0] = nvme_read4(sc, r); 133 a[1] = nvme_read4(sc, r + 4); 134 #else /* _BYTE_ORDER == _LITTLE_ENDIAN */ 135 a[1] = nvme_read4(sc, r); 136 a[0] = nvme_read4(sc, r + 4); 137 #endif 138 139 return v; 140 } 141 142 static inline void 143 nvme_write8(struct nvme_softc *sc, bus_size_t r, uint64_t v) 144 { 145 uint32_t *a = (uint32_t *)&v; 146 147 #if _BYTE_ORDER == _LITTLE_ENDIAN 148 nvme_write4(sc, r, a[0]); 149 nvme_write4(sc, r + 4, a[1]); 150 #else /* _BYTE_ORDER == _LITTLE_ENDIAN */ 151 nvme_write4(sc, r, a[1]); 152 nvme_write4(sc, r + 4, a[0]); 153 #endif 154 } 155 #endif /* __LP64__ */ 156 #define nvme_barrier(_s, _r, _l, _f) \ 157 bus_space_barrier((_s)->sc_iot, (_s)->sc_ioh, (_r), (_l), (_f)) 158 159 static void 160 nvme_version(struct nvme_softc *sc, uint32_t ver) 161 { 162 const char *v = NULL; 163 164 switch (ver) { 165 case NVME_VS_1_0: 166 v = "1.0"; 167 break; 168 case NVME_VS_1_1: 169 v = "1.1"; 170 break; 171 case NVME_VS_1_2: 172 v = "1.2"; 173 break; 174 default: 175 aprint_error_dev(sc->sc_dev, "unknown version 0x%08x\n", ver); 176 return; 177 } 178 179 aprint_normal_dev(sc->sc_dev, "NVMe %s\n", v); 180 } 181 182 #ifdef NVME_DEBUG 183 static __used void 184 nvme_dumpregs(struct nvme_softc *sc) 185 { 186 uint64_t r8; 187 uint32_t r4; 188 189 #define DEVNAME(_sc) device_xname((_sc)->sc_dev) 190 r8 = nvme_read8(sc, NVME_CAP); 191 printf("%s: cap 0x%016"PRIx64"\n", DEVNAME(sc), nvme_read8(sc, NVME_CAP)); 192 printf("%s: mpsmax %u (%u)\n", DEVNAME(sc), 193 (u_int)NVME_CAP_MPSMAX(r8), (1 << NVME_CAP_MPSMAX(r8))); 194 printf("%s: mpsmin %u (%u)\n", DEVNAME(sc), 195 (u_int)NVME_CAP_MPSMIN(r8), (1 << NVME_CAP_MPSMIN(r8))); 196 printf("%s: css %"PRIu64"\n", DEVNAME(sc), NVME_CAP_CSS(r8)); 197 printf("%s: nssrs %"PRIu64"\n", DEVNAME(sc), NVME_CAP_NSSRS(r8)); 198 printf("%s: dstrd %"PRIu64"\n", DEVNAME(sc), NVME_CAP_DSTRD(r8)); 199 printf("%s: to %"PRIu64" msec\n", DEVNAME(sc), NVME_CAP_TO(r8)); 200 printf("%s: ams %"PRIu64"\n", DEVNAME(sc), NVME_CAP_AMS(r8)); 201 printf("%s: cqr %"PRIu64"\n", DEVNAME(sc), NVME_CAP_CQR(r8)); 202 printf("%s: mqes %"PRIu64"\n", DEVNAME(sc), NVME_CAP_MQES(r8)); 203 204 printf("%s: vs 0x%04x\n", DEVNAME(sc), nvme_read4(sc, NVME_VS)); 205 206 r4 = nvme_read4(sc, NVME_CC); 207 printf("%s: cc 0x%04x\n", DEVNAME(sc), r4); 208 printf("%s: iocqes %u (%u)\n", DEVNAME(sc), NVME_CC_IOCQES_R(r4), 209 (1 << NVME_CC_IOCQES_R(r4))); 210 printf("%s: iosqes %u (%u)\n", DEVNAME(sc), NVME_CC_IOSQES_R(r4), 211 (1 << NVME_CC_IOSQES_R(r4))); 212 printf("%s: shn %u\n", DEVNAME(sc), NVME_CC_SHN_R(r4)); 213 printf("%s: ams %u\n", DEVNAME(sc), NVME_CC_AMS_R(r4)); 214 printf("%s: mps %u (%u)\n", DEVNAME(sc), NVME_CC_MPS_R(r4), 215 (1 << NVME_CC_MPS_R(r4))); 216 printf("%s: css %u\n", DEVNAME(sc), NVME_CC_CSS_R(r4)); 217 printf("%s: en %u\n", DEVNAME(sc), ISSET(r4, NVME_CC_EN) ? 1 : 0); 218 219 r4 = nvme_read4(sc, NVME_CSTS); 220 printf("%s: csts 0x%08x\n", DEVNAME(sc), r4); 221 printf("%s: rdy %u\n", DEVNAME(sc), r4 & NVME_CSTS_RDY); 222 printf("%s: cfs %u\n", DEVNAME(sc), r4 & NVME_CSTS_CFS); 223 printf("%s: shst %x\n", DEVNAME(sc), r4 & NVME_CSTS_SHST_MASK); 224 225 r4 = nvme_read4(sc, NVME_AQA); 226 printf("%s: aqa 0x%08x\n", DEVNAME(sc), r4); 227 printf("%s: acqs %u\n", DEVNAME(sc), NVME_AQA_ACQS_R(r4)); 228 printf("%s: asqs %u\n", DEVNAME(sc), NVME_AQA_ASQS_R(r4)); 229 230 printf("%s: asq 0x%016"PRIx64"\n", DEVNAME(sc), nvme_read8(sc, NVME_ASQ)); 231 printf("%s: acq 0x%016"PRIx64"\n", DEVNAME(sc), nvme_read8(sc, NVME_ACQ)); 232 #undef DEVNAME 233 } 234 #endif /* NVME_DEBUG */ 235 236 static int 237 nvme_ready(struct nvme_softc *sc, uint32_t rdy) 238 { 239 u_int i = 0; 240 uint32_t cc; 241 242 cc = nvme_read4(sc, NVME_CC); 243 if (((cc & NVME_CC_EN) != 0) != (rdy != 0)) { 244 aprint_error_dev(sc->sc_dev, 245 "controller enabled status expected %d, found to be %d\n", 246 (rdy != 0), ((cc & NVME_CC_EN) != 0)); 247 return ENXIO; 248 } 249 250 while ((nvme_read4(sc, NVME_CSTS) & NVME_CSTS_RDY) != rdy) { 251 if (i++ > sc->sc_rdy_to) 252 return ENXIO; 253 254 delay(1000); 255 nvme_barrier(sc, NVME_CSTS, 4, BUS_SPACE_BARRIER_READ); 256 } 257 258 return 0; 259 } 260 261 static int 262 nvme_enable(struct nvme_softc *sc, u_int mps) 263 { 264 uint32_t cc, csts; 265 266 cc = nvme_read4(sc, NVME_CC); 267 csts = nvme_read4(sc, NVME_CSTS); 268 269 if (ISSET(cc, NVME_CC_EN)) { 270 aprint_error_dev(sc->sc_dev, "controller unexpectedly enabled, failed to stay disabled\n"); 271 272 if (ISSET(csts, NVME_CSTS_RDY)) 273 return 1; 274 275 goto waitready; 276 } 277 278 nvme_write8(sc, NVME_ASQ, NVME_DMA_DVA(sc->sc_admin_q->q_sq_dmamem)); 279 nvme_barrier(sc, 0, sc->sc_ios, BUS_SPACE_BARRIER_WRITE); 280 delay(5000); 281 nvme_write8(sc, NVME_ACQ, NVME_DMA_DVA(sc->sc_admin_q->q_cq_dmamem)); 282 nvme_barrier(sc, 0, sc->sc_ios, BUS_SPACE_BARRIER_WRITE); 283 delay(5000); 284 285 nvme_write4(sc, NVME_AQA, NVME_AQA_ACQS(sc->sc_admin_q->q_entries) | 286 NVME_AQA_ASQS(sc->sc_admin_q->q_entries)); 287 nvme_barrier(sc, 0, sc->sc_ios, BUS_SPACE_BARRIER_WRITE); 288 delay(5000); 289 290 CLR(cc, NVME_CC_IOCQES_MASK | NVME_CC_IOSQES_MASK | NVME_CC_SHN_MASK | 291 NVME_CC_AMS_MASK | NVME_CC_MPS_MASK | NVME_CC_CSS_MASK); 292 SET(cc, NVME_CC_IOSQES(ffs(64) - 1) | NVME_CC_IOCQES(ffs(16) - 1)); 293 SET(cc, NVME_CC_SHN(NVME_CC_SHN_NONE)); 294 SET(cc, NVME_CC_CSS(NVME_CC_CSS_NVM)); 295 SET(cc, NVME_CC_AMS(NVME_CC_AMS_RR)); 296 SET(cc, NVME_CC_MPS(mps)); 297 SET(cc, NVME_CC_EN); 298 299 nvme_write4(sc, NVME_CC, cc); 300 nvme_barrier(sc, 0, sc->sc_ios, 301 BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); 302 delay(5000); 303 304 waitready: 305 return nvme_ready(sc, NVME_CSTS_RDY); 306 } 307 308 static int 309 nvme_disable(struct nvme_softc *sc) 310 { 311 uint32_t cc, csts; 312 313 cc = nvme_read4(sc, NVME_CC); 314 csts = nvme_read4(sc, NVME_CSTS); 315 316 if (ISSET(cc, NVME_CC_EN) && !ISSET(csts, NVME_CSTS_RDY)) 317 nvme_ready(sc, NVME_CSTS_RDY); 318 319 CLR(cc, NVME_CC_EN); 320 321 nvme_write4(sc, NVME_CC, cc); 322 nvme_barrier(sc, 0, sc->sc_ios, BUS_SPACE_BARRIER_READ); 323 324 delay(5000); 325 326 return nvme_ready(sc, 0); 327 } 328 329 int 330 nvme_attach(struct nvme_softc *sc) 331 { 332 uint64_t cap; 333 uint32_t reg; 334 u_int dstrd; 335 u_int mps = PAGE_SHIFT; 336 uint16_t adminq_entries = nvme_adminq_size; 337 uint16_t ioq_entries = nvme_ioq_size; 338 int i; 339 340 reg = nvme_read4(sc, NVME_VS); 341 if (reg == 0xffffffff) { 342 aprint_error_dev(sc->sc_dev, "invalid mapping\n"); 343 return 1; 344 } 345 346 nvme_version(sc, reg); 347 348 cap = nvme_read8(sc, NVME_CAP); 349 dstrd = NVME_CAP_DSTRD(cap); 350 if (NVME_CAP_MPSMIN(cap) > PAGE_SHIFT) { 351 aprint_error_dev(sc->sc_dev, "NVMe minimum page size %u " 352 "is greater than CPU page size %u\n", 353 1 << NVME_CAP_MPSMIN(cap), 1 << PAGE_SHIFT); 354 return 1; 355 } 356 if (NVME_CAP_MPSMAX(cap) < mps) 357 mps = NVME_CAP_MPSMAX(cap); 358 if (ioq_entries > NVME_CAP_MQES(cap)) 359 ioq_entries = NVME_CAP_MQES(cap); 360 361 /* set initial values to be used for admin queue during probe */ 362 sc->sc_rdy_to = NVME_CAP_TO(cap); 363 sc->sc_mps = 1 << mps; 364 sc->sc_mdts = MAXPHYS; 365 sc->sc_max_sgl = 2; 366 367 if (nvme_disable(sc) != 0) { 368 aprint_error_dev(sc->sc_dev, "unable to disable controller\n"); 369 return 1; 370 } 371 372 sc->sc_admin_q = nvme_q_alloc(sc, NVME_ADMIN_Q, adminq_entries, dstrd); 373 if (sc->sc_admin_q == NULL) { 374 aprint_error_dev(sc->sc_dev, 375 "unable to allocate admin queue\n"); 376 return 1; 377 } 378 if (sc->sc_intr_establish(sc, NVME_ADMIN_Q, sc->sc_admin_q)) 379 goto free_admin_q; 380 381 if (nvme_enable(sc, mps) != 0) { 382 aprint_error_dev(sc->sc_dev, "unable to enable controller\n"); 383 goto disestablish_admin_q; 384 } 385 386 if (nvme_identify(sc, NVME_CAP_MPSMIN(cap)) != 0) { 387 aprint_error_dev(sc->sc_dev, "unable to identify controller\n"); 388 goto disable; 389 } 390 391 /* we know how big things are now */ 392 sc->sc_max_sgl = sc->sc_mdts / sc->sc_mps; 393 394 /* reallocate ccbs of admin queue with new max sgl. */ 395 nvme_ccbs_free(sc->sc_admin_q); 396 nvme_ccbs_alloc(sc->sc_admin_q, sc->sc_admin_q->q_entries); 397 398 sc->sc_q = kmem_zalloc(sizeof(*sc->sc_q) * sc->sc_nq, KM_SLEEP); 399 if (sc->sc_q == NULL) { 400 aprint_error_dev(sc->sc_dev, "unable to allocate io queue\n"); 401 goto disable; 402 } 403 for (i = 0; i < sc->sc_nq; i++) { 404 sc->sc_q[i] = nvme_q_alloc(sc, i + 1, ioq_entries, dstrd); 405 if (sc->sc_q[i] == NULL) { 406 aprint_error_dev(sc->sc_dev, 407 "unable to allocate io queue\n"); 408 goto free_q; 409 } 410 if (nvme_q_create(sc, sc->sc_q[i]) != 0) { 411 aprint_error_dev(sc->sc_dev, 412 "unable to create io queue\n"); 413 nvme_q_free(sc, sc->sc_q[i]); 414 goto free_q; 415 } 416 } 417 418 if (!sc->sc_use_mq) 419 nvme_write4(sc, NVME_INTMC, 1); 420 421 /* probe subdevices */ 422 sc->sc_namespaces = kmem_zalloc(sizeof(*sc->sc_namespaces) * sc->sc_nn, 423 KM_SLEEP); 424 if (sc->sc_namespaces == NULL) 425 goto free_q; 426 nvme_rescan(sc->sc_dev, "nvme", &i); 427 428 return 0; 429 430 free_q: 431 while (--i >= 0) { 432 nvme_q_delete(sc, sc->sc_q[i]); 433 nvme_q_free(sc, sc->sc_q[i]); 434 } 435 disable: 436 nvme_disable(sc); 437 disestablish_admin_q: 438 sc->sc_intr_disestablish(sc, NVME_ADMIN_Q); 439 free_admin_q: 440 nvme_q_free(sc, sc->sc_admin_q); 441 442 return 1; 443 } 444 445 int 446 nvme_rescan(device_t self, const char *attr, const int *flags) 447 { 448 struct nvme_softc *sc = device_private(self); 449 struct nvme_attach_args naa; 450 uint64_t cap; 451 int ioq_entries = nvme_ioq_size; 452 int i; 453 454 cap = nvme_read8(sc, NVME_CAP); 455 if (ioq_entries > NVME_CAP_MQES(cap)) 456 ioq_entries = NVME_CAP_MQES(cap); 457 458 for (i = 0; i < sc->sc_nn; i++) { 459 if (sc->sc_namespaces[i].dev) 460 continue; 461 memset(&naa, 0, sizeof(naa)); 462 naa.naa_nsid = i + 1; 463 naa.naa_qentries = (ioq_entries - 1) * sc->sc_nq; 464 naa.naa_maxphys = sc->sc_mdts; 465 sc->sc_namespaces[i].dev = config_found(sc->sc_dev, &naa, 466 nvme_print); 467 } 468 return 0; 469 } 470 471 static int 472 nvme_print(void *aux, const char *pnp) 473 { 474 struct nvme_attach_args *naa = aux; 475 476 if (pnp) 477 aprint_normal("at %s", pnp); 478 479 if (naa->naa_nsid > 0) 480 aprint_normal(" nsid %d", naa->naa_nsid); 481 482 return UNCONF; 483 } 484 485 int 486 nvme_detach(struct nvme_softc *sc, int flags) 487 { 488 int i, error; 489 490 error = config_detach_children(sc->sc_dev, flags); 491 if (error) 492 return error; 493 494 error = nvme_shutdown(sc); 495 if (error) 496 return error; 497 498 /* from now on we are committed to detach, following will never fail */ 499 for (i = 0; i < sc->sc_nq; i++) 500 nvme_q_free(sc, sc->sc_q[i]); 501 kmem_free(sc->sc_q, sizeof(*sc->sc_q) * sc->sc_nq); 502 nvme_q_free(sc, sc->sc_admin_q); 503 504 return 0; 505 } 506 507 static int 508 nvme_shutdown(struct nvme_softc *sc) 509 { 510 uint32_t cc, csts; 511 bool disabled = false; 512 int i; 513 514 if (!sc->sc_use_mq) 515 nvme_write4(sc, NVME_INTMS, 1); 516 517 for (i = 0; i < sc->sc_nq; i++) { 518 if (nvme_q_delete(sc, sc->sc_q[i]) != 0) { 519 aprint_error_dev(sc->sc_dev, 520 "unable to delete io queue %d, disabling\n", i + 1); 521 disabled = true; 522 } 523 } 524 sc->sc_intr_disestablish(sc, NVME_ADMIN_Q); 525 if (disabled) 526 goto disable; 527 528 cc = nvme_read4(sc, NVME_CC); 529 CLR(cc, NVME_CC_SHN_MASK); 530 SET(cc, NVME_CC_SHN(NVME_CC_SHN_NORMAL)); 531 nvme_write4(sc, NVME_CC, cc); 532 533 for (i = 0; i < 4000; i++) { 534 nvme_barrier(sc, 0, sc->sc_ios, 535 BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); 536 csts = nvme_read4(sc, NVME_CSTS); 537 if ((csts & NVME_CSTS_SHST_MASK) == NVME_CSTS_SHST_DONE) 538 return 0; 539 540 delay(1000); 541 } 542 543 aprint_error_dev(sc->sc_dev, "unable to shudown, disabling\n"); 544 545 disable: 546 nvme_disable(sc); 547 return 0; 548 } 549 550 void 551 nvme_childdet(device_t self, device_t child) 552 { 553 struct nvme_softc *sc = device_private(self); 554 int i; 555 556 for (i = 0; i < sc->sc_nn; i++) { 557 if (sc->sc_namespaces[i].dev == child) { 558 /* Already freed ns->ident. */ 559 sc->sc_namespaces[i].dev = NULL; 560 break; 561 } 562 } 563 } 564 565 int 566 nvme_ns_identify(struct nvme_softc *sc, uint16_t nsid) 567 { 568 struct nvme_sqe sqe; 569 struct nvm_identify_namespace *identify; 570 struct nvme_dmamem *mem; 571 struct nvme_ccb *ccb; 572 struct nvme_namespace *ns; 573 int rv; 574 575 KASSERT(nsid > 0); 576 577 ccb = nvme_ccb_get(sc->sc_admin_q); 578 KASSERT(ccb != NULL); /* it's a bug if we don't have spare ccb here */ 579 580 mem = nvme_dmamem_alloc(sc, sizeof(*identify)); 581 if (mem == NULL) 582 return ENOMEM; 583 584 memset(&sqe, 0, sizeof(sqe)); 585 sqe.opcode = NVM_ADMIN_IDENTIFY; 586 htolem32(&sqe.nsid, nsid); 587 htolem64(&sqe.entry.prp[0], NVME_DMA_DVA(mem)); 588 htolem32(&sqe.cdw10, 0); 589 590 ccb->ccb_done = nvme_empty_done; 591 ccb->ccb_cookie = &sqe; 592 593 nvme_dmamem_sync(sc, mem, BUS_DMASYNC_PREREAD); 594 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_IDENT); 595 nvme_dmamem_sync(sc, mem, BUS_DMASYNC_POSTREAD); 596 597 nvme_ccb_put(sc->sc_admin_q, ccb); 598 599 if (rv != 0) { 600 rv = EIO; 601 goto done; 602 } 603 604 /* commit */ 605 606 identify = kmem_zalloc(sizeof(*identify), KM_SLEEP); 607 *identify = *((volatile struct nvm_identify_namespace *)NVME_DMA_KVA(mem)); 608 //memcpy(identify, NVME_DMA_KVA(mem), sizeof(*identify)); 609 610 ns = nvme_ns_get(sc, nsid); 611 KASSERT(ns); 612 ns->ident = identify; 613 614 done: 615 nvme_dmamem_free(sc, mem); 616 617 return rv; 618 } 619 620 int 621 nvme_ns_dobio(struct nvme_softc *sc, uint16_t nsid, void *cookie, 622 struct buf *bp, void *data, size_t datasize, 623 int secsize, daddr_t blkno, int flags, nvme_nnc_done nnc_done) 624 { 625 struct nvme_queue *q = nvme_get_q(sc); 626 struct nvme_ccb *ccb; 627 bus_dmamap_t dmap; 628 int i, error; 629 630 ccb = nvme_ccb_get(q); 631 if (ccb == NULL) 632 return EAGAIN; 633 634 ccb->ccb_done = nvme_ns_io_done; 635 ccb->ccb_cookie = cookie; 636 637 /* namespace context */ 638 ccb->nnc_nsid = nsid; 639 ccb->nnc_flags = flags; 640 ccb->nnc_buf = bp; 641 ccb->nnc_datasize = datasize; 642 ccb->nnc_secsize = secsize; 643 ccb->nnc_blkno = blkno; 644 ccb->nnc_done = nnc_done; 645 646 dmap = ccb->ccb_dmamap; 647 error = bus_dmamap_load(sc->sc_dmat, dmap, data, 648 datasize, NULL, 649 (ISSET(flags, NVME_NS_CTX_F_POLL) ? 650 BUS_DMA_NOWAIT : BUS_DMA_WAITOK) | 651 (ISSET(flags, NVME_NS_CTX_F_READ) ? 652 BUS_DMA_READ : BUS_DMA_WRITE)); 653 if (error) { 654 nvme_ccb_put(q, ccb); 655 return error; 656 } 657 658 bus_dmamap_sync(sc->sc_dmat, dmap, 0, dmap->dm_mapsize, 659 ISSET(flags, NVME_NS_CTX_F_READ) ? 660 BUS_DMASYNC_PREREAD : BUS_DMASYNC_PREWRITE); 661 662 if (dmap->dm_nsegs > 2) { 663 for (i = 1; i < dmap->dm_nsegs; i++) { 664 htolem64(&ccb->ccb_prpl[i - 1], 665 dmap->dm_segs[i].ds_addr); 666 } 667 bus_dmamap_sync(sc->sc_dmat, 668 NVME_DMA_MAP(q->q_ccb_prpls), 669 ccb->ccb_prpl_off, 670 sizeof(*ccb->ccb_prpl) * (dmap->dm_nsegs - 1), 671 BUS_DMASYNC_PREWRITE); 672 } 673 674 if (ISSET(flags, NVME_NS_CTX_F_POLL)) { 675 if (nvme_poll(sc, q, ccb, nvme_ns_io_fill, NVME_TIMO_PT) != 0) 676 return EIO; 677 return 0; 678 } 679 680 nvme_q_submit(sc, q, ccb, nvme_ns_io_fill); 681 return 0; 682 } 683 684 static void 685 nvme_ns_io_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot) 686 { 687 struct nvme_sqe_io *sqe = slot; 688 bus_dmamap_t dmap = ccb->ccb_dmamap; 689 690 sqe->opcode = ISSET(ccb->nnc_flags, NVME_NS_CTX_F_READ) ? 691 NVM_CMD_READ : NVM_CMD_WRITE; 692 htolem32(&sqe->nsid, ccb->nnc_nsid); 693 694 htolem64(&sqe->entry.prp[0], dmap->dm_segs[0].ds_addr); 695 switch (dmap->dm_nsegs) { 696 case 1: 697 break; 698 case 2: 699 htolem64(&sqe->entry.prp[1], dmap->dm_segs[1].ds_addr); 700 break; 701 default: 702 /* the prp list is already set up and synced */ 703 htolem64(&sqe->entry.prp[1], ccb->ccb_prpl_dva); 704 break; 705 } 706 707 htolem64(&sqe->slba, ccb->nnc_blkno); 708 709 /* guaranteed by upper layers, but check just in case */ 710 KASSERT((ccb->nnc_datasize % ccb->nnc_secsize) == 0); 711 htolem16(&sqe->nlb, (ccb->nnc_datasize / ccb->nnc_secsize) - 1); 712 } 713 714 static void 715 nvme_ns_io_done(struct nvme_queue *q, struct nvme_ccb *ccb, 716 struct nvme_cqe *cqe) 717 { 718 struct nvme_softc *sc = q->q_sc; 719 bus_dmamap_t dmap = ccb->ccb_dmamap; 720 void *nnc_cookie = ccb->ccb_cookie; 721 nvme_nnc_done nnc_done = ccb->nnc_done; 722 struct buf *bp = ccb->nnc_buf; 723 724 if (dmap->dm_nsegs > 2) { 725 bus_dmamap_sync(sc->sc_dmat, 726 NVME_DMA_MAP(q->q_ccb_prpls), 727 ccb->ccb_prpl_off, 728 sizeof(*ccb->ccb_prpl) * (dmap->dm_nsegs - 1), 729 BUS_DMASYNC_POSTWRITE); 730 } 731 732 bus_dmamap_sync(sc->sc_dmat, dmap, 0, dmap->dm_mapsize, 733 ISSET(ccb->nnc_flags, NVME_NS_CTX_F_READ) ? 734 BUS_DMASYNC_POSTREAD : BUS_DMASYNC_POSTWRITE); 735 736 bus_dmamap_unload(sc->sc_dmat, dmap); 737 nvme_ccb_put(q, ccb); 738 739 nnc_done(nnc_cookie, bp, lemtoh16(&cqe->flags)); 740 } 741 742 int 743 nvme_ns_sync(struct nvme_softc *sc, uint16_t nsid, void *cookie, 744 int flags, nvme_nnc_done nnc_done) 745 { 746 struct nvme_queue *q = nvme_get_q(sc); 747 struct nvme_ccb *ccb; 748 749 ccb = nvme_ccb_get(q); 750 if (ccb == NULL) 751 return EAGAIN; 752 753 ccb->ccb_done = nvme_ns_sync_done; 754 ccb->ccb_cookie = cookie; 755 756 /* namespace context */ 757 ccb->nnc_nsid = nsid; 758 ccb->nnc_flags = flags; 759 ccb->nnc_done = nnc_done; 760 761 if (ISSET(flags, NVME_NS_CTX_F_POLL)) { 762 if (nvme_poll(sc, q, ccb, nvme_ns_sync_fill, NVME_TIMO_SY) != 0) 763 return EIO; 764 return 0; 765 } 766 767 nvme_q_submit(sc, q, ccb, nvme_ns_sync_fill); 768 return 0; 769 } 770 771 static void 772 nvme_ns_sync_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot) 773 { 774 struct nvme_sqe *sqe = slot; 775 776 sqe->opcode = NVM_CMD_FLUSH; 777 htolem32(&sqe->nsid, ccb->nnc_nsid); 778 } 779 780 static void 781 nvme_ns_sync_done(struct nvme_queue *q, struct nvme_ccb *ccb, 782 struct nvme_cqe *cqe) 783 { 784 void *cookie = ccb->ccb_cookie; 785 nvme_nnc_done nnc_done = ccb->nnc_done; 786 787 nvme_ccb_put(q, ccb); 788 789 nnc_done(cookie, NULL, lemtoh16(&cqe->flags)); 790 } 791 792 void 793 nvme_ns_free(struct nvme_softc *sc, uint16_t nsid) 794 { 795 struct nvme_namespace *ns; 796 struct nvm_identify_namespace *identify; 797 798 ns = nvme_ns_get(sc, nsid); 799 KASSERT(ns); 800 801 identify = ns->ident; 802 ns->ident = NULL; 803 if (identify != NULL) 804 kmem_free(identify, sizeof(*identify)); 805 } 806 807 static void 808 nvme_pt_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot) 809 { 810 struct nvme_softc *sc = q->q_sc; 811 struct nvme_sqe *sqe = slot; 812 struct nvme_pt_command *pt = ccb->ccb_cookie; 813 bus_dmamap_t dmap = ccb->ccb_dmamap; 814 int i; 815 816 sqe->opcode = pt->cmd.opcode; 817 htolem32(&sqe->nsid, pt->cmd.nsid); 818 819 if (pt->buf != NULL && pt->len > 0) { 820 htolem64(&sqe->entry.prp[0], dmap->dm_segs[0].ds_addr); 821 switch (dmap->dm_nsegs) { 822 case 1: 823 break; 824 case 2: 825 htolem64(&sqe->entry.prp[1], dmap->dm_segs[1].ds_addr); 826 break; 827 default: 828 for (i = 1; i < dmap->dm_nsegs; i++) { 829 htolem64(&ccb->ccb_prpl[i - 1], 830 dmap->dm_segs[i].ds_addr); 831 } 832 bus_dmamap_sync(sc->sc_dmat, 833 NVME_DMA_MAP(q->q_ccb_prpls), 834 ccb->ccb_prpl_off, 835 sizeof(*ccb->ccb_prpl) * (dmap->dm_nsegs - 1), 836 BUS_DMASYNC_PREWRITE); 837 htolem64(&sqe->entry.prp[1], ccb->ccb_prpl_dva); 838 break; 839 } 840 } 841 842 htolem32(&sqe->cdw10, pt->cmd.cdw10); 843 htolem32(&sqe->cdw11, pt->cmd.cdw11); 844 htolem32(&sqe->cdw12, pt->cmd.cdw12); 845 htolem32(&sqe->cdw13, pt->cmd.cdw13); 846 htolem32(&sqe->cdw14, pt->cmd.cdw14); 847 htolem32(&sqe->cdw15, pt->cmd.cdw15); 848 } 849 850 static void 851 nvme_pt_done(struct nvme_queue *q, struct nvme_ccb *ccb, struct nvme_cqe *cqe) 852 { 853 struct nvme_softc *sc = q->q_sc; 854 struct nvme_pt_command *pt = ccb->ccb_cookie; 855 bus_dmamap_t dmap = ccb->ccb_dmamap; 856 857 if (pt->buf != NULL && pt->len > 0) { 858 if (dmap->dm_nsegs > 2) { 859 bus_dmamap_sync(sc->sc_dmat, 860 NVME_DMA_MAP(q->q_ccb_prpls), 861 ccb->ccb_prpl_off, 862 sizeof(*ccb->ccb_prpl) * (dmap->dm_nsegs - 1), 863 BUS_DMASYNC_POSTWRITE); 864 } 865 866 bus_dmamap_sync(sc->sc_dmat, dmap, 0, dmap->dm_mapsize, 867 pt->is_read ? BUS_DMASYNC_POSTREAD : BUS_DMASYNC_POSTWRITE); 868 bus_dmamap_unload(sc->sc_dmat, dmap); 869 } 870 871 pt->cpl.cdw0 = cqe->cdw0; 872 pt->cpl.flags = cqe->flags & ~NVME_CQE_PHASE; 873 } 874 875 static int 876 nvme_command_passthrough(struct nvme_softc *sc, struct nvme_pt_command *pt, 877 uint16_t nsid, struct lwp *l, bool is_adminq) 878 { 879 struct nvme_queue *q; 880 struct nvme_ccb *ccb; 881 void *buf = NULL; 882 int error; 883 884 /* limit command size to maximum data transfer size */ 885 if ((pt->buf == NULL && pt->len > 0) || 886 (pt->buf != NULL && (pt->len == 0 || pt->len > sc->sc_mdts))) 887 return EINVAL; 888 889 q = is_adminq ? sc->sc_admin_q : nvme_get_q(sc); 890 ccb = nvme_ccb_get(q); 891 if (ccb == NULL) 892 return EBUSY; 893 894 if (pt->buf != NULL) { 895 KASSERT(pt->len > 0); 896 buf = kmem_alloc(pt->len, KM_SLEEP); 897 if (buf == NULL) { 898 error = ENOMEM; 899 goto ccb_put; 900 } 901 if (!pt->is_read) { 902 error = copyin(pt->buf, buf, pt->len); 903 if (error) 904 goto kmem_free; 905 } 906 error = bus_dmamap_load(sc->sc_dmat, ccb->ccb_dmamap, buf, 907 pt->len, NULL, 908 BUS_DMA_WAITOK | 909 (pt->is_read ? BUS_DMA_READ : BUS_DMA_WRITE)); 910 if (error) 911 goto kmem_free; 912 bus_dmamap_sync(sc->sc_dmat, ccb->ccb_dmamap, 913 0, ccb->ccb_dmamap->dm_mapsize, 914 pt->is_read ? BUS_DMASYNC_PREREAD : BUS_DMASYNC_PREWRITE); 915 } 916 917 ccb->ccb_done = nvme_pt_done; 918 ccb->ccb_cookie = pt; 919 920 pt->cmd.nsid = nsid; 921 if (nvme_poll(sc, q, ccb, nvme_pt_fill, NVME_TIMO_PT)) { 922 error = EIO; 923 goto out; 924 } 925 926 error = 0; 927 out: 928 if (buf != NULL) { 929 if (error == 0 && pt->is_read) 930 error = copyout(buf, pt->buf, pt->len); 931 kmem_free: 932 kmem_free(buf, pt->len); 933 } 934 ccb_put: 935 nvme_ccb_put(q, ccb); 936 return error; 937 } 938 939 static void 940 nvme_q_submit(struct nvme_softc *sc, struct nvme_queue *q, struct nvme_ccb *ccb, 941 void (*fill)(struct nvme_queue *, struct nvme_ccb *, void *)) 942 { 943 struct nvme_sqe *sqe = NVME_DMA_KVA(q->q_sq_dmamem); 944 uint32_t tail; 945 946 mutex_enter(&q->q_sq_mtx); 947 tail = q->q_sq_tail; 948 if (++q->q_sq_tail >= q->q_entries) 949 q->q_sq_tail = 0; 950 951 sqe += tail; 952 953 bus_dmamap_sync(sc->sc_dmat, NVME_DMA_MAP(q->q_sq_dmamem), 954 sizeof(*sqe) * tail, sizeof(*sqe), BUS_DMASYNC_POSTWRITE); 955 memset(sqe, 0, sizeof(*sqe)); 956 (*fill)(q, ccb, sqe); 957 sqe->cid = ccb->ccb_id; 958 bus_dmamap_sync(sc->sc_dmat, NVME_DMA_MAP(q->q_sq_dmamem), 959 sizeof(*sqe) * tail, sizeof(*sqe), BUS_DMASYNC_PREWRITE); 960 961 nvme_write4(sc, q->q_sqtdbl, q->q_sq_tail); 962 mutex_exit(&q->q_sq_mtx); 963 } 964 965 struct nvme_poll_state { 966 struct nvme_sqe s; 967 struct nvme_cqe c; 968 }; 969 970 static int 971 nvme_poll(struct nvme_softc *sc, struct nvme_queue *q, struct nvme_ccb *ccb, 972 void (*fill)(struct nvme_queue *, struct nvme_ccb *, void *), int timo_sec) 973 { 974 struct nvme_poll_state state; 975 void (*done)(struct nvme_queue *, struct nvme_ccb *, struct nvme_cqe *); 976 void *cookie; 977 uint16_t flags; 978 int step = 10; 979 int maxloop = timo_sec * 1000000 / step; 980 int error = 0; 981 982 memset(&state, 0, sizeof(state)); 983 (*fill)(q, ccb, &state.s); 984 985 done = ccb->ccb_done; 986 cookie = ccb->ccb_cookie; 987 988 ccb->ccb_done = nvme_poll_done; 989 ccb->ccb_cookie = &state; 990 991 nvme_q_submit(sc, q, ccb, nvme_poll_fill); 992 while (!ISSET(state.c.flags, htole16(NVME_CQE_PHASE))) { 993 if (nvme_q_complete(sc, q) == 0) 994 delay(step); 995 996 if (timo_sec >= 0 && --maxloop <= 0) { 997 error = ETIMEDOUT; 998 break; 999 } 1000 } 1001 1002 ccb->ccb_cookie = cookie; 1003 done(q, ccb, &state.c); 1004 1005 if (error == 0) { 1006 flags = lemtoh16(&state.c.flags); 1007 return flags & ~NVME_CQE_PHASE; 1008 } else { 1009 return 1; 1010 } 1011 } 1012 1013 static void 1014 nvme_poll_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot) 1015 { 1016 struct nvme_sqe *sqe = slot; 1017 struct nvme_poll_state *state = ccb->ccb_cookie; 1018 1019 *sqe = state->s; 1020 } 1021 1022 static void 1023 nvme_poll_done(struct nvme_queue *q, struct nvme_ccb *ccb, 1024 struct nvme_cqe *cqe) 1025 { 1026 struct nvme_poll_state *state = ccb->ccb_cookie; 1027 1028 SET(cqe->flags, htole16(NVME_CQE_PHASE)); 1029 state->c = *cqe; 1030 } 1031 1032 static void 1033 nvme_sqe_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot) 1034 { 1035 struct nvme_sqe *src = ccb->ccb_cookie; 1036 struct nvme_sqe *dst = slot; 1037 1038 *dst = *src; 1039 } 1040 1041 static void 1042 nvme_empty_done(struct nvme_queue *q, struct nvme_ccb *ccb, 1043 struct nvme_cqe *cqe) 1044 { 1045 } 1046 1047 static int 1048 nvme_q_complete(struct nvme_softc *sc, struct nvme_queue *q) 1049 { 1050 struct nvme_ccb *ccb; 1051 struct nvme_cqe *ring = NVME_DMA_KVA(q->q_cq_dmamem), *cqe; 1052 uint16_t flags; 1053 int rv = 0; 1054 1055 mutex_enter(&q->q_cq_mtx); 1056 1057 nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_POSTREAD); 1058 for (;;) { 1059 cqe = &ring[q->q_cq_head]; 1060 flags = lemtoh16(&cqe->flags); 1061 if ((flags & NVME_CQE_PHASE) != q->q_cq_phase) 1062 break; 1063 1064 ccb = &q->q_ccbs[cqe->cid]; 1065 1066 if (++q->q_cq_head >= q->q_entries) { 1067 q->q_cq_head = 0; 1068 q->q_cq_phase ^= NVME_CQE_PHASE; 1069 } 1070 1071 #ifdef DEBUG 1072 /* 1073 * If we get spurious completion notification, something 1074 * is seriously hosed up. Very likely DMA to some random 1075 * memory place happened, so just bail out. 1076 */ 1077 if ((intptr_t)ccb->ccb_cookie == NVME_CCB_FREE) { 1078 panic("%s: invalid ccb detected", 1079 device_xname(sc->sc_dev)); 1080 /* NOTREACHED */ 1081 } 1082 #endif 1083 1084 rv++; 1085 1086 /* 1087 * Unlock the mutex before calling the ccb_done callback 1088 * and re-lock afterwards. The callback triggers lddone() 1089 * which schedules another i/o, and also calls nvme_ccb_put(). 1090 * Unlock/relock avoids possibility of deadlock. 1091 */ 1092 mutex_exit(&q->q_cq_mtx); 1093 ccb->ccb_done(q, ccb, cqe); 1094 mutex_enter(&q->q_cq_mtx); 1095 } 1096 nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_PREREAD); 1097 1098 if (rv) 1099 nvme_write4(sc, q->q_cqhdbl, q->q_cq_head); 1100 1101 mutex_exit(&q->q_cq_mtx); 1102 1103 if (rv) { 1104 mutex_enter(&q->q_ccb_mtx); 1105 q->q_nccbs_avail += rv; 1106 mutex_exit(&q->q_ccb_mtx); 1107 } 1108 1109 return rv; 1110 } 1111 1112 static int 1113 nvme_identify(struct nvme_softc *sc, u_int mps) 1114 { 1115 char sn[41], mn[81], fr[17]; 1116 struct nvm_identify_controller *identify; 1117 struct nvme_dmamem *mem; 1118 struct nvme_ccb *ccb; 1119 u_int mdts; 1120 int rv = 1; 1121 1122 ccb = nvme_ccb_get(sc->sc_admin_q); 1123 KASSERT(ccb != NULL); /* it's a bug if we don't have spare ccb here */ 1124 1125 mem = nvme_dmamem_alloc(sc, sizeof(*identify)); 1126 if (mem == NULL) 1127 return 1; 1128 1129 ccb->ccb_done = nvme_empty_done; 1130 ccb->ccb_cookie = mem; 1131 1132 nvme_dmamem_sync(sc, mem, BUS_DMASYNC_PREREAD); 1133 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_fill_identify, 1134 NVME_TIMO_IDENT); 1135 nvme_dmamem_sync(sc, mem, BUS_DMASYNC_POSTREAD); 1136 1137 nvme_ccb_put(sc->sc_admin_q, ccb); 1138 1139 if (rv != 0) 1140 goto done; 1141 1142 identify = NVME_DMA_KVA(mem); 1143 1144 strnvisx(sn, sizeof(sn), (const char *)identify->sn, 1145 sizeof(identify->sn), VIS_TRIM|VIS_SAFE|VIS_OCTAL); 1146 strnvisx(mn, sizeof(mn), (const char *)identify->mn, 1147 sizeof(identify->mn), VIS_TRIM|VIS_SAFE|VIS_OCTAL); 1148 strnvisx(fr, sizeof(fr), (const char *)identify->fr, 1149 sizeof(identify->fr), VIS_TRIM|VIS_SAFE|VIS_OCTAL); 1150 aprint_normal_dev(sc->sc_dev, "%s, firmware %s, serial %s\n", mn, fr, 1151 sn); 1152 1153 if (identify->mdts > 0) { 1154 mdts = (1 << identify->mdts) * (1 << mps); 1155 if (mdts < sc->sc_mdts) 1156 sc->sc_mdts = mdts; 1157 } 1158 1159 sc->sc_nn = lemtoh32(&identify->nn); 1160 1161 memcpy(&sc->sc_identify, identify, sizeof(sc->sc_identify)); 1162 1163 done: 1164 nvme_dmamem_free(sc, mem); 1165 1166 return rv; 1167 } 1168 1169 static int 1170 nvme_q_create(struct nvme_softc *sc, struct nvme_queue *q) 1171 { 1172 struct nvme_sqe_q sqe; 1173 struct nvme_ccb *ccb; 1174 int rv; 1175 1176 if (sc->sc_use_mq && sc->sc_intr_establish(sc, q->q_id, q) != 0) 1177 return 1; 1178 1179 ccb = nvme_ccb_get(sc->sc_admin_q); 1180 KASSERT(ccb != NULL); 1181 1182 ccb->ccb_done = nvme_empty_done; 1183 ccb->ccb_cookie = &sqe; 1184 1185 memset(&sqe, 0, sizeof(sqe)); 1186 sqe.opcode = NVM_ADMIN_ADD_IOCQ; 1187 htolem64(&sqe.prp1, NVME_DMA_DVA(q->q_cq_dmamem)); 1188 htolem16(&sqe.qsize, q->q_entries - 1); 1189 htolem16(&sqe.qid, q->q_id); 1190 sqe.qflags = NVM_SQE_CQ_IEN | NVM_SQE_Q_PC; 1191 if (sc->sc_use_mq) 1192 htolem16(&sqe.cqid, q->q_id); /* qid == vector */ 1193 1194 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_QOP); 1195 if (rv != 0) 1196 goto fail; 1197 1198 ccb->ccb_done = nvme_empty_done; 1199 ccb->ccb_cookie = &sqe; 1200 1201 memset(&sqe, 0, sizeof(sqe)); 1202 sqe.opcode = NVM_ADMIN_ADD_IOSQ; 1203 htolem64(&sqe.prp1, NVME_DMA_DVA(q->q_sq_dmamem)); 1204 htolem16(&sqe.qsize, q->q_entries - 1); 1205 htolem16(&sqe.qid, q->q_id); 1206 htolem16(&sqe.cqid, q->q_id); 1207 sqe.qflags = NVM_SQE_Q_PC; 1208 1209 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_QOP); 1210 if (rv != 0) 1211 goto fail; 1212 1213 fail: 1214 nvme_ccb_put(sc->sc_admin_q, ccb); 1215 return rv; 1216 } 1217 1218 static int 1219 nvme_q_delete(struct nvme_softc *sc, struct nvme_queue *q) 1220 { 1221 struct nvme_sqe_q sqe; 1222 struct nvme_ccb *ccb; 1223 int rv; 1224 1225 ccb = nvme_ccb_get(sc->sc_admin_q); 1226 KASSERT(ccb != NULL); 1227 1228 ccb->ccb_done = nvme_empty_done; 1229 ccb->ccb_cookie = &sqe; 1230 1231 memset(&sqe, 0, sizeof(sqe)); 1232 sqe.opcode = NVM_ADMIN_DEL_IOSQ; 1233 htolem16(&sqe.qid, q->q_id); 1234 1235 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_QOP); 1236 if (rv != 0) 1237 goto fail; 1238 1239 ccb->ccb_done = nvme_empty_done; 1240 ccb->ccb_cookie = &sqe; 1241 1242 memset(&sqe, 0, sizeof(sqe)); 1243 sqe.opcode = NVM_ADMIN_DEL_IOCQ; 1244 htolem64(&sqe.prp1, NVME_DMA_DVA(q->q_sq_dmamem)); 1245 htolem16(&sqe.qid, q->q_id); 1246 1247 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_QOP); 1248 if (rv != 0) 1249 goto fail; 1250 1251 fail: 1252 nvme_ccb_put(sc->sc_admin_q, ccb); 1253 1254 if (rv == 0 && sc->sc_use_mq) { 1255 if (sc->sc_intr_disestablish(sc, q->q_id)) 1256 rv = 1; 1257 } 1258 1259 return rv; 1260 } 1261 1262 static void 1263 nvme_fill_identify(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot) 1264 { 1265 struct nvme_sqe *sqe = slot; 1266 struct nvme_dmamem *mem = ccb->ccb_cookie; 1267 1268 sqe->opcode = NVM_ADMIN_IDENTIFY; 1269 htolem64(&sqe->entry.prp[0], NVME_DMA_DVA(mem)); 1270 htolem32(&sqe->cdw10, 1); 1271 } 1272 1273 static int 1274 nvme_ccbs_alloc(struct nvme_queue *q, uint16_t nccbs) 1275 { 1276 struct nvme_softc *sc = q->q_sc; 1277 struct nvme_ccb *ccb; 1278 bus_addr_t off; 1279 uint64_t *prpl; 1280 u_int i; 1281 1282 mutex_init(&q->q_ccb_mtx, MUTEX_DEFAULT, IPL_BIO); 1283 SIMPLEQ_INIT(&q->q_ccb_list); 1284 1285 q->q_ccbs = kmem_alloc(sizeof(*ccb) * nccbs, KM_SLEEP); 1286 if (q->q_ccbs == NULL) 1287 return 1; 1288 1289 q->q_nccbs = nccbs; 1290 q->q_nccbs_avail = nccbs; 1291 q->q_ccb_prpls = nvme_dmamem_alloc(sc, 1292 sizeof(*prpl) * sc->sc_max_sgl * nccbs); 1293 1294 prpl = NVME_DMA_KVA(q->q_ccb_prpls); 1295 off = 0; 1296 1297 for (i = 0; i < nccbs; i++) { 1298 ccb = &q->q_ccbs[i]; 1299 1300 if (bus_dmamap_create(sc->sc_dmat, sc->sc_mdts, 1301 sc->sc_max_sgl + 1 /* we get a free prp in the sqe */, 1302 sc->sc_mps, sc->sc_mps, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, 1303 &ccb->ccb_dmamap) != 0) 1304 goto free_maps; 1305 1306 ccb->ccb_id = i; 1307 ccb->ccb_prpl = prpl; 1308 ccb->ccb_prpl_off = off; 1309 ccb->ccb_prpl_dva = NVME_DMA_DVA(q->q_ccb_prpls) + off; 1310 1311 SIMPLEQ_INSERT_TAIL(&q->q_ccb_list, ccb, ccb_entry); 1312 1313 prpl += sc->sc_max_sgl; 1314 off += sizeof(*prpl) * sc->sc_max_sgl; 1315 } 1316 1317 return 0; 1318 1319 free_maps: 1320 nvme_ccbs_free(q); 1321 return 1; 1322 } 1323 1324 static struct nvme_ccb * 1325 nvme_ccb_get(struct nvme_queue *q) 1326 { 1327 struct nvme_ccb *ccb = NULL; 1328 1329 mutex_enter(&q->q_ccb_mtx); 1330 if (q->q_nccbs_avail > 0) { 1331 ccb = SIMPLEQ_FIRST(&q->q_ccb_list); 1332 KASSERT(ccb != NULL); 1333 q->q_nccbs_avail--; 1334 1335 SIMPLEQ_REMOVE_HEAD(&q->q_ccb_list, ccb_entry); 1336 #ifdef DEBUG 1337 ccb->ccb_cookie = NULL; 1338 #endif 1339 } 1340 mutex_exit(&q->q_ccb_mtx); 1341 1342 return ccb; 1343 } 1344 1345 static void 1346 nvme_ccb_put(struct nvme_queue *q, struct nvme_ccb *ccb) 1347 { 1348 1349 mutex_enter(&q->q_ccb_mtx); 1350 #ifdef DEBUG 1351 ccb->ccb_cookie = (void *)NVME_CCB_FREE; 1352 #endif 1353 SIMPLEQ_INSERT_HEAD(&q->q_ccb_list, ccb, ccb_entry); 1354 mutex_exit(&q->q_ccb_mtx); 1355 } 1356 1357 static void 1358 nvme_ccbs_free(struct nvme_queue *q) 1359 { 1360 struct nvme_softc *sc = q->q_sc; 1361 struct nvme_ccb *ccb; 1362 1363 mutex_enter(&q->q_ccb_mtx); 1364 while ((ccb = SIMPLEQ_FIRST(&q->q_ccb_list)) != NULL) { 1365 SIMPLEQ_REMOVE_HEAD(&q->q_ccb_list, ccb_entry); 1366 bus_dmamap_destroy(sc->sc_dmat, ccb->ccb_dmamap); 1367 } 1368 mutex_exit(&q->q_ccb_mtx); 1369 1370 nvme_dmamem_free(sc, q->q_ccb_prpls); 1371 kmem_free(q->q_ccbs, sizeof(*ccb) * q->q_nccbs); 1372 q->q_ccbs = NULL; 1373 mutex_destroy(&q->q_ccb_mtx); 1374 } 1375 1376 static struct nvme_queue * 1377 nvme_q_alloc(struct nvme_softc *sc, uint16_t id, u_int entries, u_int dstrd) 1378 { 1379 struct nvme_queue *q; 1380 1381 q = kmem_alloc(sizeof(*q), KM_SLEEP); 1382 if (q == NULL) 1383 return NULL; 1384 1385 q->q_sc = sc; 1386 q->q_sq_dmamem = nvme_dmamem_alloc(sc, 1387 sizeof(struct nvme_sqe) * entries); 1388 if (q->q_sq_dmamem == NULL) 1389 goto free; 1390 1391 q->q_cq_dmamem = nvme_dmamem_alloc(sc, 1392 sizeof(struct nvme_cqe) * entries); 1393 if (q->q_cq_dmamem == NULL) 1394 goto free_sq; 1395 1396 memset(NVME_DMA_KVA(q->q_sq_dmamem), 0, NVME_DMA_LEN(q->q_sq_dmamem)); 1397 memset(NVME_DMA_KVA(q->q_cq_dmamem), 0, NVME_DMA_LEN(q->q_cq_dmamem)); 1398 1399 mutex_init(&q->q_sq_mtx, MUTEX_DEFAULT, IPL_BIO); 1400 mutex_init(&q->q_cq_mtx, MUTEX_DEFAULT, IPL_BIO); 1401 q->q_sqtdbl = NVME_SQTDBL(id, dstrd); 1402 q->q_cqhdbl = NVME_CQHDBL(id, dstrd); 1403 q->q_id = id; 1404 q->q_entries = entries; 1405 q->q_sq_tail = 0; 1406 q->q_cq_head = 0; 1407 q->q_cq_phase = NVME_CQE_PHASE; 1408 1409 nvme_dmamem_sync(sc, q->q_sq_dmamem, BUS_DMASYNC_PREWRITE); 1410 nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_PREREAD); 1411 1412 /* 1413 * Due to definition of full and empty queue (queue is empty 1414 * when head == tail, full when tail is one less then head), 1415 * we can actually only have (entries - 1) in-flight commands. 1416 */ 1417 if (nvme_ccbs_alloc(q, entries - 1) != 0) { 1418 aprint_error_dev(sc->sc_dev, "unable to allocate ccbs\n"); 1419 goto free_cq; 1420 } 1421 1422 return q; 1423 1424 free_cq: 1425 nvme_dmamem_free(sc, q->q_cq_dmamem); 1426 free_sq: 1427 nvme_dmamem_free(sc, q->q_sq_dmamem); 1428 free: 1429 kmem_free(q, sizeof(*q)); 1430 1431 return NULL; 1432 } 1433 1434 static void 1435 nvme_q_free(struct nvme_softc *sc, struct nvme_queue *q) 1436 { 1437 nvme_ccbs_free(q); 1438 mutex_destroy(&q->q_sq_mtx); 1439 mutex_destroy(&q->q_cq_mtx); 1440 nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_POSTREAD); 1441 nvme_dmamem_sync(sc, q->q_sq_dmamem, BUS_DMASYNC_POSTWRITE); 1442 nvme_dmamem_free(sc, q->q_cq_dmamem); 1443 nvme_dmamem_free(sc, q->q_sq_dmamem); 1444 kmem_free(q, sizeof(*q)); 1445 } 1446 1447 int 1448 nvme_intr(void *xsc) 1449 { 1450 struct nvme_softc *sc = xsc; 1451 1452 /* 1453 * INTx is level triggered, controller deasserts the interrupt only 1454 * when we advance command queue head via write to the doorbell. 1455 * Tell the controller to block the interrupts while we process 1456 * the queue(s). 1457 */ 1458 nvme_write4(sc, NVME_INTMS, 1); 1459 1460 softint_schedule(sc->sc_softih[0]); 1461 1462 /* don't know, might not have been for us */ 1463 return 1; 1464 } 1465 1466 void 1467 nvme_softintr_intx(void *xq) 1468 { 1469 struct nvme_queue *q = xq; 1470 struct nvme_softc *sc = q->q_sc; 1471 1472 nvme_q_complete(sc, sc->sc_admin_q); 1473 if (sc->sc_q != NULL) 1474 nvme_q_complete(sc, sc->sc_q[0]); 1475 1476 /* 1477 * Processing done, tell controller to issue interrupts again. There 1478 * is no race, as NVMe spec requires the controller to maintain state, 1479 * and assert the interrupt whenever there are unacknowledged 1480 * completion queue entries. 1481 */ 1482 nvme_write4(sc, NVME_INTMC, 1); 1483 } 1484 1485 int 1486 nvme_intr_msi(void *xq) 1487 { 1488 struct nvme_queue *q = xq; 1489 1490 KASSERT(q && q->q_sc && q->q_sc->sc_softih 1491 && q->q_sc->sc_softih[q->q_id]); 1492 1493 /* 1494 * MSI/MSI-X are edge triggered, so can handover processing to softint 1495 * without masking the interrupt. 1496 */ 1497 softint_schedule(q->q_sc->sc_softih[q->q_id]); 1498 1499 return 1; 1500 } 1501 1502 void 1503 nvme_softintr_msi(void *xq) 1504 { 1505 struct nvme_queue *q = xq; 1506 struct nvme_softc *sc = q->q_sc; 1507 1508 nvme_q_complete(sc, q); 1509 } 1510 1511 static struct nvme_dmamem * 1512 nvme_dmamem_alloc(struct nvme_softc *sc, size_t size) 1513 { 1514 struct nvme_dmamem *ndm; 1515 int nsegs; 1516 1517 ndm = kmem_zalloc(sizeof(*ndm), KM_SLEEP); 1518 if (ndm == NULL) 1519 return NULL; 1520 1521 ndm->ndm_size = size; 1522 1523 if (bus_dmamap_create(sc->sc_dmat, size, 1, size, 0, 1524 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, &ndm->ndm_map) != 0) 1525 goto ndmfree; 1526 1527 if (bus_dmamem_alloc(sc->sc_dmat, size, sc->sc_mps, 0, &ndm->ndm_seg, 1528 1, &nsegs, BUS_DMA_WAITOK) != 0) 1529 goto destroy; 1530 1531 if (bus_dmamem_map(sc->sc_dmat, &ndm->ndm_seg, nsegs, size, 1532 &ndm->ndm_kva, BUS_DMA_WAITOK) != 0) 1533 goto free; 1534 memset(ndm->ndm_kva, 0, size); 1535 1536 if (bus_dmamap_load(sc->sc_dmat, ndm->ndm_map, ndm->ndm_kva, size, 1537 NULL, BUS_DMA_WAITOK) != 0) 1538 goto unmap; 1539 1540 return ndm; 1541 1542 unmap: 1543 bus_dmamem_unmap(sc->sc_dmat, ndm->ndm_kva, size); 1544 free: 1545 bus_dmamem_free(sc->sc_dmat, &ndm->ndm_seg, 1); 1546 destroy: 1547 bus_dmamap_destroy(sc->sc_dmat, ndm->ndm_map); 1548 ndmfree: 1549 kmem_free(ndm, sizeof(*ndm)); 1550 return NULL; 1551 } 1552 1553 static void 1554 nvme_dmamem_sync(struct nvme_softc *sc, struct nvme_dmamem *mem, int ops) 1555 { 1556 bus_dmamap_sync(sc->sc_dmat, NVME_DMA_MAP(mem), 1557 0, NVME_DMA_LEN(mem), ops); 1558 } 1559 1560 void 1561 nvme_dmamem_free(struct nvme_softc *sc, struct nvme_dmamem *ndm) 1562 { 1563 bus_dmamap_unload(sc->sc_dmat, ndm->ndm_map); 1564 bus_dmamem_unmap(sc->sc_dmat, ndm->ndm_kva, ndm->ndm_size); 1565 bus_dmamem_free(sc->sc_dmat, &ndm->ndm_seg, 1); 1566 bus_dmamap_destroy(sc->sc_dmat, ndm->ndm_map); 1567 kmem_free(ndm, sizeof(*ndm)); 1568 } 1569 1570 /* 1571 * ioctl 1572 */ 1573 1574 dev_type_open(nvmeopen); 1575 dev_type_close(nvmeclose); 1576 dev_type_ioctl(nvmeioctl); 1577 1578 const struct cdevsw nvme_cdevsw = { 1579 .d_open = nvmeopen, 1580 .d_close = nvmeclose, 1581 .d_read = noread, 1582 .d_write = nowrite, 1583 .d_ioctl = nvmeioctl, 1584 .d_stop = nostop, 1585 .d_tty = notty, 1586 .d_poll = nopoll, 1587 .d_mmap = nommap, 1588 .d_kqfilter = nokqfilter, 1589 .d_discard = nodiscard, 1590 .d_flag = D_OTHER, 1591 }; 1592 1593 extern struct cfdriver nvme_cd; 1594 1595 /* 1596 * Accept an open operation on the control device. 1597 */ 1598 int 1599 nvmeopen(dev_t dev, int flag, int mode, struct lwp *l) 1600 { 1601 struct nvme_softc *sc; 1602 int unit = minor(dev) / 0x10000; 1603 int nsid = minor(dev) & 0xffff; 1604 int nsidx; 1605 1606 if ((sc = device_lookup_private(&nvme_cd, unit)) == NULL) 1607 return ENXIO; 1608 if ((sc->sc_flags & NVME_F_ATTACHED) == 0) 1609 return ENXIO; 1610 1611 if (nsid == 0) { 1612 /* controller */ 1613 if (ISSET(sc->sc_flags, NVME_F_OPEN)) 1614 return EBUSY; 1615 SET(sc->sc_flags, NVME_F_OPEN); 1616 } else { 1617 /* namespace */ 1618 nsidx = nsid - 1; 1619 if (nsidx >= sc->sc_nn || sc->sc_namespaces[nsidx].dev == NULL) 1620 return ENXIO; 1621 if (ISSET(sc->sc_namespaces[nsidx].flags, NVME_NS_F_OPEN)) 1622 return EBUSY; 1623 SET(sc->sc_namespaces[nsidx].flags, NVME_NS_F_OPEN); 1624 } 1625 return 0; 1626 } 1627 1628 /* 1629 * Accept the last close on the control device. 1630 */ 1631 int 1632 nvmeclose(dev_t dev, int flag, int mode, struct lwp *l) 1633 { 1634 struct nvme_softc *sc; 1635 int unit = minor(dev) / 0x10000; 1636 int nsid = minor(dev) & 0xffff; 1637 int nsidx; 1638 1639 sc = device_lookup_private(&nvme_cd, unit); 1640 if (sc == NULL) 1641 return ENXIO; 1642 1643 if (nsid == 0) { 1644 /* controller */ 1645 CLR(sc->sc_flags, NVME_F_OPEN); 1646 } else { 1647 /* namespace */ 1648 nsidx = nsid - 1; 1649 if (nsidx >= sc->sc_nn) 1650 return ENXIO; 1651 CLR(sc->sc_namespaces[nsidx].flags, NVME_NS_F_OPEN); 1652 } 1653 1654 return 0; 1655 } 1656 1657 /* 1658 * Handle control operations. 1659 */ 1660 int 1661 nvmeioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) 1662 { 1663 struct nvme_softc *sc; 1664 int unit = minor(dev) / 0x10000; 1665 int nsid = minor(dev) & 0xffff; 1666 struct nvme_pt_command *pt; 1667 1668 sc = device_lookup_private(&nvme_cd, unit); 1669 if (sc == NULL) 1670 return ENXIO; 1671 1672 switch (cmd) { 1673 case NVME_PASSTHROUGH_CMD: 1674 pt = data; 1675 return nvme_command_passthrough(sc, data, 1676 nsid == 0 ? pt->cmd.nsid : nsid, l, nsid == 0); 1677 } 1678 1679 return ENOTTY; 1680 } 1681