1 /* $NetBSD: nvme.c,v 1.57 2021/05/29 10:48:23 riastradh Exp $ */ 2 /* $OpenBSD: nvme.c,v 1.49 2016/04/18 05:59:50 dlg Exp $ */ 3 4 /* 5 * Copyright (c) 2014 David Gwynne <dlg@openbsd.org> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 20 #include <sys/cdefs.h> 21 __KERNEL_RCSID(0, "$NetBSD: nvme.c,v 1.57 2021/05/29 10:48:23 riastradh Exp $"); 22 23 #include <sys/param.h> 24 #include <sys/systm.h> 25 #include <sys/kernel.h> 26 #include <sys/atomic.h> 27 #include <sys/bus.h> 28 #include <sys/buf.h> 29 #include <sys/conf.h> 30 #include <sys/device.h> 31 #include <sys/kmem.h> 32 #include <sys/once.h> 33 #include <sys/proc.h> 34 #include <sys/queue.h> 35 #include <sys/mutex.h> 36 37 #include <uvm/uvm_extern.h> 38 39 #include <dev/ic/nvmereg.h> 40 #include <dev/ic/nvmevar.h> 41 #include <dev/ic/nvmeio.h> 42 43 #include "ioconf.h" 44 #include "locators.h" 45 46 #define B4_CHK_RDY_DELAY_MS 2300 /* workaround controller bug */ 47 48 int nvme_adminq_size = 32; 49 int nvme_ioq_size = 1024; 50 51 static int nvme_print(void *, const char *); 52 53 static int nvme_ready(struct nvme_softc *, uint32_t); 54 static int nvme_enable(struct nvme_softc *, u_int); 55 static int nvme_disable(struct nvme_softc *); 56 static int nvme_shutdown(struct nvme_softc *); 57 58 #ifdef NVME_DEBUG 59 static void nvme_dumpregs(struct nvme_softc *); 60 #endif 61 static int nvme_identify(struct nvme_softc *, u_int); 62 static void nvme_fill_identify(struct nvme_queue *, struct nvme_ccb *, 63 void *); 64 65 static int nvme_ccbs_alloc(struct nvme_queue *, uint16_t); 66 static void nvme_ccbs_free(struct nvme_queue *); 67 68 static struct nvme_ccb * 69 nvme_ccb_get(struct nvme_queue *, bool); 70 static void nvme_ccb_put(struct nvme_queue *, struct nvme_ccb *); 71 72 static int nvme_poll(struct nvme_softc *, struct nvme_queue *, 73 struct nvme_ccb *, void (*)(struct nvme_queue *, 74 struct nvme_ccb *, void *), int); 75 static void nvme_poll_fill(struct nvme_queue *, struct nvme_ccb *, void *); 76 static void nvme_poll_done(struct nvme_queue *, struct nvme_ccb *, 77 struct nvme_cqe *); 78 static void nvme_sqe_fill(struct nvme_queue *, struct nvme_ccb *, void *); 79 static void nvme_empty_done(struct nvme_queue *, struct nvme_ccb *, 80 struct nvme_cqe *); 81 82 static struct nvme_queue * 83 nvme_q_alloc(struct nvme_softc *, uint16_t, u_int, u_int); 84 static int nvme_q_create(struct nvme_softc *, struct nvme_queue *); 85 static void nvme_q_reset(struct nvme_softc *, struct nvme_queue *); 86 static int nvme_q_delete(struct nvme_softc *, struct nvme_queue *); 87 static void nvme_q_submit(struct nvme_softc *, struct nvme_queue *, 88 struct nvme_ccb *, void (*)(struct nvme_queue *, 89 struct nvme_ccb *, void *)); 90 static int nvme_q_complete(struct nvme_softc *, struct nvme_queue *q); 91 static void nvme_q_free(struct nvme_softc *, struct nvme_queue *); 92 static void nvme_q_wait_complete(struct nvme_softc *, struct nvme_queue *, 93 bool (*)(void *), void *); 94 95 static struct nvme_dmamem * 96 nvme_dmamem_alloc(struct nvme_softc *, size_t); 97 static void nvme_dmamem_free(struct nvme_softc *, struct nvme_dmamem *); 98 static void nvme_dmamem_sync(struct nvme_softc *, struct nvme_dmamem *, 99 int); 100 101 static void nvme_ns_io_fill(struct nvme_queue *, struct nvme_ccb *, 102 void *); 103 static void nvme_ns_io_done(struct nvme_queue *, struct nvme_ccb *, 104 struct nvme_cqe *); 105 static void nvme_ns_sync_fill(struct nvme_queue *, struct nvme_ccb *, 106 void *); 107 static void nvme_ns_sync_done(struct nvme_queue *, struct nvme_ccb *, 108 struct nvme_cqe *); 109 static void nvme_getcache_fill(struct nvme_queue *, struct nvme_ccb *, 110 void *); 111 static void nvme_getcache_done(struct nvme_queue *, struct nvme_ccb *, 112 struct nvme_cqe *); 113 114 static void nvme_pt_fill(struct nvme_queue *, struct nvme_ccb *, 115 void *); 116 static void nvme_pt_done(struct nvme_queue *, struct nvme_ccb *, 117 struct nvme_cqe *); 118 static int nvme_command_passthrough(struct nvme_softc *, 119 struct nvme_pt_command *, uint16_t, struct lwp *, bool); 120 121 static int nvme_set_number_of_queues(struct nvme_softc *, u_int, u_int *, 122 u_int *); 123 124 #define NVME_TIMO_QOP 5 /* queue create and delete timeout */ 125 #define NVME_TIMO_IDENT 10 /* probe identify timeout */ 126 #define NVME_TIMO_PT -1 /* passthrough cmd timeout */ 127 #define NVME_TIMO_SY 60 /* sync cache timeout */ 128 129 #define nvme_read4(_s, _r) \ 130 bus_space_read_4((_s)->sc_iot, (_s)->sc_ioh, (_r)) 131 #define nvme_write4(_s, _r, _v) \ 132 bus_space_write_4((_s)->sc_iot, (_s)->sc_ioh, (_r), (_v)) 133 /* 134 * Some controllers, at least Apple NVMe, always require split 135 * transfers, so don't use bus_space_{read,write}_8() on LP64. 136 */ 137 static inline uint64_t 138 nvme_read8(struct nvme_softc *sc, bus_size_t r) 139 { 140 uint64_t v; 141 uint32_t *a = (uint32_t *)&v; 142 143 #if _BYTE_ORDER == _LITTLE_ENDIAN 144 a[0] = nvme_read4(sc, r); 145 a[1] = nvme_read4(sc, r + 4); 146 #else /* _BYTE_ORDER == _LITTLE_ENDIAN */ 147 a[1] = nvme_read4(sc, r); 148 a[0] = nvme_read4(sc, r + 4); 149 #endif 150 151 return v; 152 } 153 154 static inline void 155 nvme_write8(struct nvme_softc *sc, bus_size_t r, uint64_t v) 156 { 157 uint32_t *a = (uint32_t *)&v; 158 159 #if _BYTE_ORDER == _LITTLE_ENDIAN 160 nvme_write4(sc, r, a[0]); 161 nvme_write4(sc, r + 4, a[1]); 162 #else /* _BYTE_ORDER == _LITTLE_ENDIAN */ 163 nvme_write4(sc, r, a[1]); 164 nvme_write4(sc, r + 4, a[0]); 165 #endif 166 } 167 #define nvme_barrier(_s, _r, _l, _f) \ 168 bus_space_barrier((_s)->sc_iot, (_s)->sc_ioh, (_r), (_l), (_f)) 169 170 #ifdef NVME_DEBUG 171 static __used void 172 nvme_dumpregs(struct nvme_softc *sc) 173 { 174 uint64_t r8; 175 uint32_t r4; 176 177 #define DEVNAME(_sc) device_xname((_sc)->sc_dev) 178 r8 = nvme_read8(sc, NVME_CAP); 179 printf("%s: cap 0x%016"PRIx64"\n", DEVNAME(sc), nvme_read8(sc, NVME_CAP)); 180 printf("%s: mpsmax %u (%u)\n", DEVNAME(sc), 181 (u_int)NVME_CAP_MPSMAX(r8), (1 << NVME_CAP_MPSMAX(r8))); 182 printf("%s: mpsmin %u (%u)\n", DEVNAME(sc), 183 (u_int)NVME_CAP_MPSMIN(r8), (1 << NVME_CAP_MPSMIN(r8))); 184 printf("%s: css %"PRIu64"\n", DEVNAME(sc), NVME_CAP_CSS(r8)); 185 printf("%s: nssrs %"PRIu64"\n", DEVNAME(sc), NVME_CAP_NSSRS(r8)); 186 printf("%s: dstrd %"PRIu64"\n", DEVNAME(sc), NVME_CAP_DSTRD(r8)); 187 printf("%s: to %"PRIu64" msec\n", DEVNAME(sc), NVME_CAP_TO(r8)); 188 printf("%s: ams %"PRIu64"\n", DEVNAME(sc), NVME_CAP_AMS(r8)); 189 printf("%s: cqr %"PRIu64"\n", DEVNAME(sc), NVME_CAP_CQR(r8)); 190 printf("%s: mqes %"PRIu64"\n", DEVNAME(sc), NVME_CAP_MQES(r8)); 191 192 printf("%s: vs 0x%04x\n", DEVNAME(sc), nvme_read4(sc, NVME_VS)); 193 194 r4 = nvme_read4(sc, NVME_CC); 195 printf("%s: cc 0x%04x\n", DEVNAME(sc), r4); 196 printf("%s: iocqes %u (%u)\n", DEVNAME(sc), NVME_CC_IOCQES_R(r4), 197 (1 << NVME_CC_IOCQES_R(r4))); 198 printf("%s: iosqes %u (%u)\n", DEVNAME(sc), NVME_CC_IOSQES_R(r4), 199 (1 << NVME_CC_IOSQES_R(r4))); 200 printf("%s: shn %u\n", DEVNAME(sc), NVME_CC_SHN_R(r4)); 201 printf("%s: ams %u\n", DEVNAME(sc), NVME_CC_AMS_R(r4)); 202 printf("%s: mps %u (%u)\n", DEVNAME(sc), NVME_CC_MPS_R(r4), 203 (1 << NVME_CC_MPS_R(r4))); 204 printf("%s: css %u\n", DEVNAME(sc), NVME_CC_CSS_R(r4)); 205 printf("%s: en %u\n", DEVNAME(sc), ISSET(r4, NVME_CC_EN) ? 1 : 0); 206 207 r4 = nvme_read4(sc, NVME_CSTS); 208 printf("%s: csts 0x%08x\n", DEVNAME(sc), r4); 209 printf("%s: rdy %u\n", DEVNAME(sc), r4 & NVME_CSTS_RDY); 210 printf("%s: cfs %u\n", DEVNAME(sc), r4 & NVME_CSTS_CFS); 211 printf("%s: shst %x\n", DEVNAME(sc), r4 & NVME_CSTS_SHST_MASK); 212 213 r4 = nvme_read4(sc, NVME_AQA); 214 printf("%s: aqa 0x%08x\n", DEVNAME(sc), r4); 215 printf("%s: acqs %u\n", DEVNAME(sc), NVME_AQA_ACQS_R(r4)); 216 printf("%s: asqs %u\n", DEVNAME(sc), NVME_AQA_ASQS_R(r4)); 217 218 printf("%s: asq 0x%016"PRIx64"\n", DEVNAME(sc), nvme_read8(sc, NVME_ASQ)); 219 printf("%s: acq 0x%016"PRIx64"\n", DEVNAME(sc), nvme_read8(sc, NVME_ACQ)); 220 #undef DEVNAME 221 } 222 #endif /* NVME_DEBUG */ 223 224 static int 225 nvme_ready(struct nvme_softc *sc, uint32_t rdy) 226 { 227 u_int i = 0; 228 229 while ((nvme_read4(sc, NVME_CSTS) & NVME_CSTS_RDY) != rdy) { 230 if (i++ > sc->sc_rdy_to) 231 return ENXIO; 232 233 delay(1000); 234 nvme_barrier(sc, NVME_CSTS, 4, BUS_SPACE_BARRIER_READ); 235 } 236 237 return 0; 238 } 239 240 static int 241 nvme_enable(struct nvme_softc *sc, u_int mps) 242 { 243 uint32_t cc, csts; 244 int error; 245 246 cc = nvme_read4(sc, NVME_CC); 247 csts = nvme_read4(sc, NVME_CSTS); 248 249 /* 250 * See note in nvme_disable. Short circuit if we're already enabled. 251 */ 252 if (ISSET(cc, NVME_CC_EN)) { 253 if (ISSET(csts, NVME_CSTS_RDY)) 254 return 0; 255 256 goto waitready; 257 } else { 258 /* EN == 0 already wait for RDY == 0 or fail */ 259 error = nvme_ready(sc, 0); 260 if (error) 261 return error; 262 } 263 264 nvme_write8(sc, NVME_ASQ, NVME_DMA_DVA(sc->sc_admin_q->q_sq_dmamem)); 265 nvme_barrier(sc, 0, sc->sc_ios, BUS_SPACE_BARRIER_WRITE); 266 delay(5000); 267 nvme_write8(sc, NVME_ACQ, NVME_DMA_DVA(sc->sc_admin_q->q_cq_dmamem)); 268 nvme_barrier(sc, 0, sc->sc_ios, BUS_SPACE_BARRIER_WRITE); 269 delay(5000); 270 271 nvme_write4(sc, NVME_AQA, NVME_AQA_ACQS(sc->sc_admin_q->q_entries) | 272 NVME_AQA_ASQS(sc->sc_admin_q->q_entries)); 273 nvme_barrier(sc, 0, sc->sc_ios, BUS_SPACE_BARRIER_WRITE); 274 delay(5000); 275 276 CLR(cc, NVME_CC_IOCQES_MASK | NVME_CC_IOSQES_MASK | NVME_CC_SHN_MASK | 277 NVME_CC_AMS_MASK | NVME_CC_MPS_MASK | NVME_CC_CSS_MASK); 278 SET(cc, NVME_CC_IOSQES(ffs(64) - 1) | NVME_CC_IOCQES(ffs(16) - 1)); 279 SET(cc, NVME_CC_SHN(NVME_CC_SHN_NONE)); 280 SET(cc, NVME_CC_CSS(NVME_CC_CSS_NVM)); 281 SET(cc, NVME_CC_AMS(NVME_CC_AMS_RR)); 282 SET(cc, NVME_CC_MPS(mps)); 283 SET(cc, NVME_CC_EN); 284 285 nvme_write4(sc, NVME_CC, cc); 286 nvme_barrier(sc, 0, sc->sc_ios, 287 BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); 288 289 waitready: 290 return nvme_ready(sc, NVME_CSTS_RDY); 291 } 292 293 static int 294 nvme_disable(struct nvme_softc *sc) 295 { 296 uint32_t cc, csts; 297 int error; 298 299 cc = nvme_read4(sc, NVME_CC); 300 csts = nvme_read4(sc, NVME_CSTS); 301 302 /* 303 * Per 3.1.5 in NVME 1.3 spec, transitioning CC.EN from 0 to 1 304 * when CSTS.RDY is 1 or transitioning CC.EN from 1 to 0 when 305 * CSTS.RDY is 0 "has undefined results" So make sure that CSTS.RDY 306 * isn't the desired value. Short circuit if we're already disabled. 307 */ 308 if (ISSET(cc, NVME_CC_EN)) { 309 if (!ISSET(csts, NVME_CSTS_RDY)) { 310 /* EN == 1, wait for RDY == 1 or fail */ 311 error = nvme_ready(sc, NVME_CSTS_RDY); 312 if (error) 313 return error; 314 } 315 } else { 316 /* EN == 0 already wait for RDY == 0 */ 317 if (!ISSET(csts, NVME_CSTS_RDY)) 318 return 0; 319 320 goto waitready; 321 } 322 323 CLR(cc, NVME_CC_EN); 324 nvme_write4(sc, NVME_CC, cc); 325 nvme_barrier(sc, 0, sc->sc_ios, BUS_SPACE_BARRIER_READ); 326 327 /* 328 * Some drives have issues with accessing the mmio after we disable, 329 * so delay for a bit after we write the bit to cope with these issues. 330 */ 331 if (ISSET(sc->sc_quirks, NVME_QUIRK_DELAY_B4_CHK_RDY)) 332 delay(B4_CHK_RDY_DELAY_MS); 333 334 waitready: 335 return nvme_ready(sc, 0); 336 } 337 338 int 339 nvme_attach(struct nvme_softc *sc) 340 { 341 uint64_t cap; 342 uint32_t reg; 343 u_int mps = PAGE_SHIFT; 344 u_int ncq, nsq; 345 uint16_t adminq_entries = nvme_adminq_size; 346 uint16_t ioq_entries = nvme_ioq_size; 347 int i; 348 349 reg = nvme_read4(sc, NVME_VS); 350 if (reg == 0xffffffff) { 351 aprint_error_dev(sc->sc_dev, "invalid mapping\n"); 352 return 1; 353 } 354 355 if (NVME_VS_TER(reg) == 0) 356 aprint_normal_dev(sc->sc_dev, "NVMe %d.%d\n", NVME_VS_MJR(reg), 357 NVME_VS_MNR(reg)); 358 else 359 aprint_normal_dev(sc->sc_dev, "NVMe %d.%d.%d\n", NVME_VS_MJR(reg), 360 NVME_VS_MNR(reg), NVME_VS_TER(reg)); 361 362 cap = nvme_read8(sc, NVME_CAP); 363 sc->sc_dstrd = NVME_CAP_DSTRD(cap); 364 if (NVME_CAP_MPSMIN(cap) > PAGE_SHIFT) { 365 aprint_error_dev(sc->sc_dev, "NVMe minimum page size %u " 366 "is greater than CPU page size %u\n", 367 1 << NVME_CAP_MPSMIN(cap), 1 << PAGE_SHIFT); 368 return 1; 369 } 370 if (NVME_CAP_MPSMAX(cap) < mps) 371 mps = NVME_CAP_MPSMAX(cap); 372 if (ioq_entries > NVME_CAP_MQES(cap)) 373 ioq_entries = NVME_CAP_MQES(cap); 374 375 /* set initial values to be used for admin queue during probe */ 376 sc->sc_rdy_to = NVME_CAP_TO(cap); 377 sc->sc_mps = 1 << mps; 378 sc->sc_mdts = MAXPHYS; 379 sc->sc_max_sgl = btoc(round_page(sc->sc_mdts)); 380 381 if (nvme_disable(sc) != 0) { 382 aprint_error_dev(sc->sc_dev, "unable to disable controller\n"); 383 return 1; 384 } 385 386 sc->sc_admin_q = nvme_q_alloc(sc, NVME_ADMIN_Q, adminq_entries, 387 sc->sc_dstrd); 388 if (sc->sc_admin_q == NULL) { 389 aprint_error_dev(sc->sc_dev, 390 "unable to allocate admin queue\n"); 391 return 1; 392 } 393 if (sc->sc_intr_establish(sc, NVME_ADMIN_Q, sc->sc_admin_q)) 394 goto free_admin_q; 395 396 if (nvme_enable(sc, mps) != 0) { 397 aprint_error_dev(sc->sc_dev, "unable to enable controller\n"); 398 goto disestablish_admin_q; 399 } 400 401 if (nvme_identify(sc, NVME_CAP_MPSMIN(cap)) != 0) { 402 aprint_error_dev(sc->sc_dev, "unable to identify controller\n"); 403 goto disable; 404 } 405 if (sc->sc_nn == 0) { 406 aprint_error_dev(sc->sc_dev, "namespace not found\n"); 407 goto disable; 408 } 409 410 /* we know how big things are now */ 411 sc->sc_max_sgl = sc->sc_mdts / sc->sc_mps; 412 413 /* reallocate ccbs of admin queue with new max sgl. */ 414 nvme_ccbs_free(sc->sc_admin_q); 415 nvme_ccbs_alloc(sc->sc_admin_q, sc->sc_admin_q->q_entries); 416 417 if (sc->sc_use_mq) { 418 /* Limit the number of queues to the number allocated in HW */ 419 if (nvme_set_number_of_queues(sc, sc->sc_nq, &ncq, &nsq) != 0) { 420 aprint_error_dev(sc->sc_dev, 421 "unable to get number of queues\n"); 422 goto disable; 423 } 424 if (sc->sc_nq > ncq) 425 sc->sc_nq = ncq; 426 if (sc->sc_nq > nsq) 427 sc->sc_nq = nsq; 428 } 429 430 sc->sc_q = kmem_zalloc(sizeof(*sc->sc_q) * sc->sc_nq, KM_SLEEP); 431 for (i = 0; i < sc->sc_nq; i++) { 432 sc->sc_q[i] = nvme_q_alloc(sc, i + 1, ioq_entries, 433 sc->sc_dstrd); 434 if (sc->sc_q[i] == NULL) { 435 aprint_error_dev(sc->sc_dev, 436 "unable to allocate io queue\n"); 437 goto free_q; 438 } 439 if (nvme_q_create(sc, sc->sc_q[i]) != 0) { 440 aprint_error_dev(sc->sc_dev, 441 "unable to create io queue\n"); 442 nvme_q_free(sc, sc->sc_q[i]); 443 goto free_q; 444 } 445 } 446 447 if (!sc->sc_use_mq) 448 nvme_write4(sc, NVME_INTMC, 1); 449 450 /* probe subdevices */ 451 sc->sc_namespaces = kmem_zalloc(sizeof(*sc->sc_namespaces) * sc->sc_nn, 452 KM_SLEEP); 453 nvme_rescan(sc->sc_dev, NULL, NULL); 454 455 return 0; 456 457 free_q: 458 while (--i >= 0) { 459 nvme_q_delete(sc, sc->sc_q[i]); 460 nvme_q_free(sc, sc->sc_q[i]); 461 } 462 disable: 463 nvme_disable(sc); 464 disestablish_admin_q: 465 sc->sc_intr_disestablish(sc, NVME_ADMIN_Q); 466 free_admin_q: 467 nvme_q_free(sc, sc->sc_admin_q); 468 469 return 1; 470 } 471 472 int 473 nvme_rescan(device_t self, const char *ifattr, const int *locs) 474 { 475 struct nvme_softc *sc = device_private(self); 476 struct nvme_attach_args naa; 477 struct nvm_namespace_format *f; 478 struct nvme_namespace *ns; 479 uint64_t cap; 480 int ioq_entries = nvme_ioq_size; 481 int i, mlocs[NVMECF_NLOCS]; 482 int error; 483 484 cap = nvme_read8(sc, NVME_CAP); 485 if (ioq_entries > NVME_CAP_MQES(cap)) 486 ioq_entries = NVME_CAP_MQES(cap); 487 488 for (i = 1; i <= sc->sc_nn; i++) { 489 if (sc->sc_namespaces[i - 1].dev) 490 continue; 491 492 /* identify to check for availability */ 493 error = nvme_ns_identify(sc, i); 494 if (error) { 495 aprint_error_dev(self, "couldn't identify namespace #%d\n", i); 496 continue; 497 } 498 499 ns = nvme_ns_get(sc, i); 500 KASSERT(ns); 501 502 f = &ns->ident->lbaf[NVME_ID_NS_FLBAS(ns->ident->flbas)]; 503 504 /* 505 * NVME1.0e 6.11 Identify command 506 * 507 * LBADS values smaller than 9 are not supported, a value 508 * of zero means that the format is not used. 509 */ 510 if (f->lbads < 9) { 511 if (f->lbads > 0) 512 aprint_error_dev(self, 513 "unsupported logical data size %u\n", f->lbads); 514 continue; 515 } 516 517 mlocs[NVMECF_NSID] = i; 518 519 memset(&naa, 0, sizeof(naa)); 520 naa.naa_nsid = i; 521 naa.naa_qentries = (ioq_entries - 1) * sc->sc_nq; 522 naa.naa_maxphys = sc->sc_mdts; 523 naa.naa_typename = sc->sc_modelname; 524 sc->sc_namespaces[i - 1].dev = 525 config_found(sc->sc_dev, &naa, nvme_print, 526 CFARG_SUBMATCH, config_stdsubmatch, 527 CFARG_LOCATORS, mlocs, 528 CFARG_EOL); 529 } 530 return 0; 531 } 532 533 static int 534 nvme_print(void *aux, const char *pnp) 535 { 536 struct nvme_attach_args *naa = aux; 537 538 if (pnp) 539 aprint_normal("ld at %s", pnp); 540 541 if (naa->naa_nsid > 0) 542 aprint_normal(" nsid %d", naa->naa_nsid); 543 544 return UNCONF; 545 } 546 547 int 548 nvme_detach(struct nvme_softc *sc, int flags) 549 { 550 int i, error; 551 552 error = config_detach_children(sc->sc_dev, flags); 553 if (error) 554 return error; 555 556 error = nvme_shutdown(sc); 557 if (error) 558 return error; 559 560 /* from now on we are committed to detach, following will never fail */ 561 sc->sc_intr_disestablish(sc, NVME_ADMIN_Q); 562 for (i = 0; i < sc->sc_nq; i++) 563 nvme_q_free(sc, sc->sc_q[i]); 564 kmem_free(sc->sc_q, sizeof(*sc->sc_q) * sc->sc_nq); 565 nvme_q_free(sc, sc->sc_admin_q); 566 567 return 0; 568 } 569 570 int 571 nvme_suspend(struct nvme_softc *sc) 572 { 573 574 return nvme_shutdown(sc); 575 } 576 577 int 578 nvme_resume(struct nvme_softc *sc) 579 { 580 int ioq_entries = nvme_ioq_size; 581 uint64_t cap; 582 int i, error; 583 584 error = nvme_disable(sc); 585 if (error) { 586 device_printf(sc->sc_dev, "unable to disable controller\n"); 587 return error; 588 } 589 590 nvme_q_reset(sc, sc->sc_admin_q); 591 592 error = nvme_enable(sc, ffs(sc->sc_mps) - 1); 593 if (error) { 594 device_printf(sc->sc_dev, "unable to enable controller\n"); 595 return error; 596 } 597 598 for (i = 0; i < sc->sc_nq; i++) { 599 cap = nvme_read8(sc, NVME_CAP); 600 if (ioq_entries > NVME_CAP_MQES(cap)) 601 ioq_entries = NVME_CAP_MQES(cap); 602 sc->sc_q[i] = nvme_q_alloc(sc, i + 1, ioq_entries, 603 sc->sc_dstrd); 604 if (sc->sc_q[i] == NULL) { 605 error = ENOMEM; 606 device_printf(sc->sc_dev, "unable to allocate io q %d" 607 "\n", i); 608 goto disable; 609 } 610 if (nvme_q_create(sc, sc->sc_q[i]) != 0) { 611 error = EIO; 612 device_printf(sc->sc_dev, "unable to create io q %d" 613 "\n", i); 614 nvme_q_free(sc, sc->sc_q[i]); 615 goto free_q; 616 } 617 } 618 619 nvme_write4(sc, NVME_INTMC, 1); 620 621 return 0; 622 623 free_q: 624 while (i --> 0) 625 nvme_q_free(sc, sc->sc_q[i]); 626 disable: 627 (void)nvme_disable(sc); 628 629 return error; 630 } 631 632 static int 633 nvme_shutdown(struct nvme_softc *sc) 634 { 635 uint32_t cc, csts; 636 bool disabled = false; 637 int i; 638 639 if (!sc->sc_use_mq) 640 nvme_write4(sc, NVME_INTMS, 1); 641 642 for (i = 0; i < sc->sc_nq; i++) { 643 if (nvme_q_delete(sc, sc->sc_q[i]) != 0) { 644 aprint_error_dev(sc->sc_dev, 645 "unable to delete io queue %d, disabling\n", i + 1); 646 disabled = true; 647 } 648 } 649 if (disabled) 650 goto disable; 651 652 cc = nvme_read4(sc, NVME_CC); 653 CLR(cc, NVME_CC_SHN_MASK); 654 SET(cc, NVME_CC_SHN(NVME_CC_SHN_NORMAL)); 655 nvme_write4(sc, NVME_CC, cc); 656 657 for (i = 0; i < 4000; i++) { 658 nvme_barrier(sc, 0, sc->sc_ios, 659 BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); 660 csts = nvme_read4(sc, NVME_CSTS); 661 if ((csts & NVME_CSTS_SHST_MASK) == NVME_CSTS_SHST_DONE) 662 return 0; 663 664 delay(1000); 665 } 666 667 aprint_error_dev(sc->sc_dev, "unable to shudown, disabling\n"); 668 669 disable: 670 nvme_disable(sc); 671 return 0; 672 } 673 674 void 675 nvme_childdet(device_t self, device_t child) 676 { 677 struct nvme_softc *sc = device_private(self); 678 int i; 679 680 for (i = 0; i < sc->sc_nn; i++) { 681 if (sc->sc_namespaces[i].dev == child) { 682 /* Already freed ns->ident. */ 683 sc->sc_namespaces[i].dev = NULL; 684 break; 685 } 686 } 687 } 688 689 int 690 nvme_ns_identify(struct nvme_softc *sc, uint16_t nsid) 691 { 692 struct nvme_sqe sqe; 693 struct nvm_identify_namespace *identify; 694 struct nvme_dmamem *mem; 695 struct nvme_ccb *ccb; 696 struct nvme_namespace *ns; 697 int rv; 698 699 KASSERT(nsid > 0); 700 701 ns = nvme_ns_get(sc, nsid); 702 KASSERT(ns); 703 704 if (ns->ident != NULL) 705 return 0; 706 707 ccb = nvme_ccb_get(sc->sc_admin_q, false); 708 KASSERT(ccb != NULL); /* it's a bug if we don't have spare ccb here */ 709 710 mem = nvme_dmamem_alloc(sc, sizeof(*identify)); 711 if (mem == NULL) { 712 nvme_ccb_put(sc->sc_admin_q, ccb); 713 return ENOMEM; 714 } 715 716 memset(&sqe, 0, sizeof(sqe)); 717 sqe.opcode = NVM_ADMIN_IDENTIFY; 718 htolem32(&sqe.nsid, nsid); 719 htolem64(&sqe.entry.prp[0], NVME_DMA_DVA(mem)); 720 htolem32(&sqe.cdw10, 0); 721 722 ccb->ccb_done = nvme_empty_done; 723 ccb->ccb_cookie = &sqe; 724 725 nvme_dmamem_sync(sc, mem, BUS_DMASYNC_PREREAD); 726 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_IDENT); 727 nvme_dmamem_sync(sc, mem, BUS_DMASYNC_POSTREAD); 728 729 nvme_ccb_put(sc->sc_admin_q, ccb); 730 731 if (rv != 0) { 732 rv = EIO; 733 goto done; 734 } 735 736 /* commit */ 737 738 identify = kmem_zalloc(sizeof(*identify), KM_SLEEP); 739 *identify = *((volatile struct nvm_identify_namespace *)NVME_DMA_KVA(mem)); 740 741 /* Convert data to host endian */ 742 nvme_identify_namespace_swapbytes(identify); 743 744 ns->ident = identify; 745 746 done: 747 nvme_dmamem_free(sc, mem); 748 749 return rv; 750 } 751 752 int 753 nvme_ns_dobio(struct nvme_softc *sc, uint16_t nsid, void *cookie, 754 struct buf *bp, void *data, size_t datasize, 755 int secsize, daddr_t blkno, int flags, nvme_nnc_done nnc_done) 756 { 757 struct nvme_queue *q = nvme_get_q(sc, bp, false); 758 struct nvme_ccb *ccb; 759 bus_dmamap_t dmap; 760 int i, error; 761 762 ccb = nvme_ccb_get(q, false); 763 if (ccb == NULL) 764 return EAGAIN; 765 766 ccb->ccb_done = nvme_ns_io_done; 767 ccb->ccb_cookie = cookie; 768 769 /* namespace context */ 770 ccb->nnc_nsid = nsid; 771 ccb->nnc_flags = flags; 772 ccb->nnc_buf = bp; 773 ccb->nnc_datasize = datasize; 774 ccb->nnc_secsize = secsize; 775 ccb->nnc_blkno = blkno; 776 ccb->nnc_done = nnc_done; 777 778 dmap = ccb->ccb_dmamap; 779 error = bus_dmamap_load(sc->sc_dmat, dmap, data, 780 datasize, NULL, 781 (ISSET(flags, NVME_NS_CTX_F_POLL) ? 782 BUS_DMA_NOWAIT : BUS_DMA_WAITOK) | 783 (ISSET(flags, NVME_NS_CTX_F_READ) ? 784 BUS_DMA_READ : BUS_DMA_WRITE)); 785 if (error) { 786 nvme_ccb_put(q, ccb); 787 return error; 788 } 789 790 bus_dmamap_sync(sc->sc_dmat, dmap, 0, dmap->dm_mapsize, 791 ISSET(flags, NVME_NS_CTX_F_READ) ? 792 BUS_DMASYNC_PREREAD : BUS_DMASYNC_PREWRITE); 793 794 if (dmap->dm_nsegs > 2) { 795 for (i = 1; i < dmap->dm_nsegs; i++) { 796 htolem64(&ccb->ccb_prpl[i - 1], 797 dmap->dm_segs[i].ds_addr); 798 } 799 bus_dmamap_sync(sc->sc_dmat, 800 NVME_DMA_MAP(q->q_ccb_prpls), 801 ccb->ccb_prpl_off, 802 sizeof(*ccb->ccb_prpl) * (dmap->dm_nsegs - 1), 803 BUS_DMASYNC_PREWRITE); 804 } 805 806 if (ISSET(flags, NVME_NS_CTX_F_POLL)) { 807 if (nvme_poll(sc, q, ccb, nvme_ns_io_fill, NVME_TIMO_PT) != 0) 808 return EIO; 809 return 0; 810 } 811 812 nvme_q_submit(sc, q, ccb, nvme_ns_io_fill); 813 return 0; 814 } 815 816 static void 817 nvme_ns_io_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot) 818 { 819 struct nvme_sqe_io *sqe = slot; 820 bus_dmamap_t dmap = ccb->ccb_dmamap; 821 822 sqe->opcode = ISSET(ccb->nnc_flags, NVME_NS_CTX_F_READ) ? 823 NVM_CMD_READ : NVM_CMD_WRITE; 824 htolem32(&sqe->nsid, ccb->nnc_nsid); 825 826 htolem64(&sqe->entry.prp[0], dmap->dm_segs[0].ds_addr); 827 switch (dmap->dm_nsegs) { 828 case 1: 829 break; 830 case 2: 831 htolem64(&sqe->entry.prp[1], dmap->dm_segs[1].ds_addr); 832 break; 833 default: 834 /* the prp list is already set up and synced */ 835 htolem64(&sqe->entry.prp[1], ccb->ccb_prpl_dva); 836 break; 837 } 838 839 htolem64(&sqe->slba, ccb->nnc_blkno); 840 841 if (ISSET(ccb->nnc_flags, NVME_NS_CTX_F_FUA)) 842 htolem16(&sqe->ioflags, NVM_SQE_IO_FUA); 843 844 /* guaranteed by upper layers, but check just in case */ 845 KASSERT((ccb->nnc_datasize % ccb->nnc_secsize) == 0); 846 htolem16(&sqe->nlb, (ccb->nnc_datasize / ccb->nnc_secsize) - 1); 847 } 848 849 static void 850 nvme_ns_io_done(struct nvme_queue *q, struct nvme_ccb *ccb, 851 struct nvme_cqe *cqe) 852 { 853 struct nvme_softc *sc = q->q_sc; 854 bus_dmamap_t dmap = ccb->ccb_dmamap; 855 void *nnc_cookie = ccb->ccb_cookie; 856 nvme_nnc_done nnc_done = ccb->nnc_done; 857 struct buf *bp = ccb->nnc_buf; 858 859 if (dmap->dm_nsegs > 2) { 860 bus_dmamap_sync(sc->sc_dmat, 861 NVME_DMA_MAP(q->q_ccb_prpls), 862 ccb->ccb_prpl_off, 863 sizeof(*ccb->ccb_prpl) * (dmap->dm_nsegs - 1), 864 BUS_DMASYNC_POSTWRITE); 865 } 866 867 bus_dmamap_sync(sc->sc_dmat, dmap, 0, dmap->dm_mapsize, 868 ISSET(ccb->nnc_flags, NVME_NS_CTX_F_READ) ? 869 BUS_DMASYNC_POSTREAD : BUS_DMASYNC_POSTWRITE); 870 871 bus_dmamap_unload(sc->sc_dmat, dmap); 872 nvme_ccb_put(q, ccb); 873 874 nnc_done(nnc_cookie, bp, lemtoh16(&cqe->flags), lemtoh32(&cqe->cdw0)); 875 } 876 877 /* 878 * If there is no volatile write cache, it makes no sense to issue 879 * flush commands or query for the status. 880 */ 881 static bool 882 nvme_has_volatile_write_cache(struct nvme_softc *sc) 883 { 884 /* sc_identify is filled during attachment */ 885 return ((sc->sc_identify.vwc & NVME_ID_CTRLR_VWC_PRESENT) != 0); 886 } 887 888 static bool 889 nvme_ns_sync_finished(void *cookie) 890 { 891 int *result = cookie; 892 893 return (*result != 0); 894 } 895 896 int 897 nvme_ns_sync(struct nvme_softc *sc, uint16_t nsid, int flags) 898 { 899 struct nvme_queue *q = nvme_get_q(sc, NULL, true); 900 struct nvme_ccb *ccb; 901 int result = 0; 902 903 if (!nvme_has_volatile_write_cache(sc)) { 904 /* cache not present, no value in trying to flush it */ 905 return 0; 906 } 907 908 ccb = nvme_ccb_get(q, true); 909 KASSERT(ccb != NULL); 910 911 ccb->ccb_done = nvme_ns_sync_done; 912 ccb->ccb_cookie = &result; 913 914 /* namespace context */ 915 ccb->nnc_nsid = nsid; 916 ccb->nnc_flags = flags; 917 ccb->nnc_done = NULL; 918 919 if (ISSET(flags, NVME_NS_CTX_F_POLL)) { 920 if (nvme_poll(sc, q, ccb, nvme_ns_sync_fill, NVME_TIMO_SY) != 0) 921 return EIO; 922 return 0; 923 } 924 925 nvme_q_submit(sc, q, ccb, nvme_ns_sync_fill); 926 927 /* wait for completion */ 928 nvme_q_wait_complete(sc, q, nvme_ns_sync_finished, &result); 929 KASSERT(result != 0); 930 931 return (result > 0) ? 0 : EIO; 932 } 933 934 static void 935 nvme_ns_sync_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot) 936 { 937 struct nvme_sqe *sqe = slot; 938 939 sqe->opcode = NVM_CMD_FLUSH; 940 htolem32(&sqe->nsid, ccb->nnc_nsid); 941 } 942 943 static void 944 nvme_ns_sync_done(struct nvme_queue *q, struct nvme_ccb *ccb, 945 struct nvme_cqe *cqe) 946 { 947 int *result = ccb->ccb_cookie; 948 uint16_t status = NVME_CQE_SC(lemtoh16(&cqe->flags)); 949 950 if (status == NVME_CQE_SC_SUCCESS) 951 *result = 1; 952 else 953 *result = -1; 954 955 nvme_ccb_put(q, ccb); 956 } 957 958 static bool 959 nvme_getcache_finished(void *xc) 960 { 961 int *addr = xc; 962 963 return (*addr != 0); 964 } 965 966 /* 967 * Get status of volatile write cache. Always asynchronous. 968 */ 969 int 970 nvme_admin_getcache(struct nvme_softc *sc, int *addr) 971 { 972 struct nvme_ccb *ccb; 973 struct nvme_queue *q = sc->sc_admin_q; 974 int result = 0, error; 975 976 if (!nvme_has_volatile_write_cache(sc)) { 977 /* cache simply not present */ 978 *addr = 0; 979 return 0; 980 } 981 982 ccb = nvme_ccb_get(q, true); 983 KASSERT(ccb != NULL); 984 985 ccb->ccb_done = nvme_getcache_done; 986 ccb->ccb_cookie = &result; 987 988 /* namespace context */ 989 ccb->nnc_flags = 0; 990 ccb->nnc_done = NULL; 991 992 nvme_q_submit(sc, q, ccb, nvme_getcache_fill); 993 994 /* wait for completion */ 995 nvme_q_wait_complete(sc, q, nvme_getcache_finished, &result); 996 KASSERT(result != 0); 997 998 if (result > 0) { 999 *addr = result; 1000 error = 0; 1001 } else 1002 error = EINVAL; 1003 1004 return error; 1005 } 1006 1007 static void 1008 nvme_getcache_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot) 1009 { 1010 struct nvme_sqe *sqe = slot; 1011 1012 sqe->opcode = NVM_ADMIN_GET_FEATURES; 1013 htolem32(&sqe->cdw10, NVM_FEATURE_VOLATILE_WRITE_CACHE); 1014 htolem32(&sqe->cdw11, NVM_VOLATILE_WRITE_CACHE_WCE); 1015 } 1016 1017 static void 1018 nvme_getcache_done(struct nvme_queue *q, struct nvme_ccb *ccb, 1019 struct nvme_cqe *cqe) 1020 { 1021 int *addr = ccb->ccb_cookie; 1022 uint16_t status = NVME_CQE_SC(lemtoh16(&cqe->flags)); 1023 uint32_t cdw0 = lemtoh32(&cqe->cdw0); 1024 int result; 1025 1026 if (status == NVME_CQE_SC_SUCCESS) { 1027 result = 0; 1028 1029 /* 1030 * DPO not supported, Dataset Management (DSM) field doesn't 1031 * specify the same semantics. FUA is always supported. 1032 */ 1033 result = DKCACHE_FUA; 1034 1035 if (cdw0 & NVM_VOLATILE_WRITE_CACHE_WCE) 1036 result |= DKCACHE_WRITE; 1037 1038 /* 1039 * If volatile write cache is present, the flag shall also be 1040 * settable. 1041 */ 1042 result |= DKCACHE_WCHANGE; 1043 1044 /* 1045 * ONCS field indicates whether the optional SAVE is also 1046 * supported for Set Features. According to spec v1.3, 1047 * Volatile Write Cache however doesn't support persistency 1048 * across power cycle/reset. 1049 */ 1050 1051 } else { 1052 result = -1; 1053 } 1054 1055 *addr = result; 1056 1057 nvme_ccb_put(q, ccb); 1058 } 1059 1060 struct nvme_setcache_state { 1061 int dkcache; 1062 int result; 1063 }; 1064 1065 static bool 1066 nvme_setcache_finished(void *xc) 1067 { 1068 struct nvme_setcache_state *st = xc; 1069 1070 return (st->result != 0); 1071 } 1072 1073 static void 1074 nvme_setcache_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot) 1075 { 1076 struct nvme_sqe *sqe = slot; 1077 struct nvme_setcache_state *st = ccb->ccb_cookie; 1078 1079 sqe->opcode = NVM_ADMIN_SET_FEATURES; 1080 htolem32(&sqe->cdw10, NVM_FEATURE_VOLATILE_WRITE_CACHE); 1081 if (st->dkcache & DKCACHE_WRITE) 1082 htolem32(&sqe->cdw11, NVM_VOLATILE_WRITE_CACHE_WCE); 1083 } 1084 1085 static void 1086 nvme_setcache_done(struct nvme_queue *q, struct nvme_ccb *ccb, 1087 struct nvme_cqe *cqe) 1088 { 1089 struct nvme_setcache_state *st = ccb->ccb_cookie; 1090 uint16_t status = NVME_CQE_SC(lemtoh16(&cqe->flags)); 1091 1092 if (status == NVME_CQE_SC_SUCCESS) { 1093 st->result = 1; 1094 } else { 1095 st->result = -1; 1096 } 1097 1098 nvme_ccb_put(q, ccb); 1099 } 1100 1101 /* 1102 * Set status of volatile write cache. Always asynchronous. 1103 */ 1104 int 1105 nvme_admin_setcache(struct nvme_softc *sc, int dkcache) 1106 { 1107 struct nvme_ccb *ccb; 1108 struct nvme_queue *q = sc->sc_admin_q; 1109 int error; 1110 struct nvme_setcache_state st; 1111 1112 if (!nvme_has_volatile_write_cache(sc)) { 1113 /* cache simply not present */ 1114 return EOPNOTSUPP; 1115 } 1116 1117 if (dkcache & ~(DKCACHE_WRITE)) { 1118 /* unsupported parameters */ 1119 return EOPNOTSUPP; 1120 } 1121 1122 ccb = nvme_ccb_get(q, true); 1123 KASSERT(ccb != NULL); 1124 1125 memset(&st, 0, sizeof(st)); 1126 st.dkcache = dkcache; 1127 1128 ccb->ccb_done = nvme_setcache_done; 1129 ccb->ccb_cookie = &st; 1130 1131 /* namespace context */ 1132 ccb->nnc_flags = 0; 1133 ccb->nnc_done = NULL; 1134 1135 nvme_q_submit(sc, q, ccb, nvme_setcache_fill); 1136 1137 /* wait for completion */ 1138 nvme_q_wait_complete(sc, q, nvme_setcache_finished, &st); 1139 KASSERT(st.result != 0); 1140 1141 if (st.result > 0) 1142 error = 0; 1143 else 1144 error = EINVAL; 1145 1146 return error; 1147 } 1148 1149 void 1150 nvme_ns_free(struct nvme_softc *sc, uint16_t nsid) 1151 { 1152 struct nvme_namespace *ns; 1153 struct nvm_identify_namespace *identify; 1154 1155 ns = nvme_ns_get(sc, nsid); 1156 KASSERT(ns); 1157 1158 identify = ns->ident; 1159 ns->ident = NULL; 1160 if (identify != NULL) 1161 kmem_free(identify, sizeof(*identify)); 1162 } 1163 1164 struct nvme_pt_state { 1165 struct nvme_pt_command *pt; 1166 bool finished; 1167 }; 1168 1169 static void 1170 nvme_pt_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot) 1171 { 1172 struct nvme_softc *sc = q->q_sc; 1173 struct nvme_sqe *sqe = slot; 1174 struct nvme_pt_state *state = ccb->ccb_cookie; 1175 struct nvme_pt_command *pt = state->pt; 1176 bus_dmamap_t dmap = ccb->ccb_dmamap; 1177 int i; 1178 1179 sqe->opcode = pt->cmd.opcode; 1180 htolem32(&sqe->nsid, pt->cmd.nsid); 1181 1182 if (pt->buf != NULL && pt->len > 0) { 1183 htolem64(&sqe->entry.prp[0], dmap->dm_segs[0].ds_addr); 1184 switch (dmap->dm_nsegs) { 1185 case 1: 1186 break; 1187 case 2: 1188 htolem64(&sqe->entry.prp[1], dmap->dm_segs[1].ds_addr); 1189 break; 1190 default: 1191 for (i = 1; i < dmap->dm_nsegs; i++) { 1192 htolem64(&ccb->ccb_prpl[i - 1], 1193 dmap->dm_segs[i].ds_addr); 1194 } 1195 bus_dmamap_sync(sc->sc_dmat, 1196 NVME_DMA_MAP(q->q_ccb_prpls), 1197 ccb->ccb_prpl_off, 1198 sizeof(*ccb->ccb_prpl) * (dmap->dm_nsegs - 1), 1199 BUS_DMASYNC_PREWRITE); 1200 htolem64(&sqe->entry.prp[1], ccb->ccb_prpl_dva); 1201 break; 1202 } 1203 } 1204 1205 htolem32(&sqe->cdw10, pt->cmd.cdw10); 1206 htolem32(&sqe->cdw11, pt->cmd.cdw11); 1207 htolem32(&sqe->cdw12, pt->cmd.cdw12); 1208 htolem32(&sqe->cdw13, pt->cmd.cdw13); 1209 htolem32(&sqe->cdw14, pt->cmd.cdw14); 1210 htolem32(&sqe->cdw15, pt->cmd.cdw15); 1211 } 1212 1213 static void 1214 nvme_pt_done(struct nvme_queue *q, struct nvme_ccb *ccb, struct nvme_cqe *cqe) 1215 { 1216 struct nvme_softc *sc = q->q_sc; 1217 struct nvme_pt_state *state = ccb->ccb_cookie; 1218 struct nvme_pt_command *pt = state->pt; 1219 bus_dmamap_t dmap = ccb->ccb_dmamap; 1220 1221 if (pt->buf != NULL && pt->len > 0) { 1222 if (dmap->dm_nsegs > 2) { 1223 bus_dmamap_sync(sc->sc_dmat, 1224 NVME_DMA_MAP(q->q_ccb_prpls), 1225 ccb->ccb_prpl_off, 1226 sizeof(*ccb->ccb_prpl) * (dmap->dm_nsegs - 1), 1227 BUS_DMASYNC_POSTWRITE); 1228 } 1229 1230 bus_dmamap_sync(sc->sc_dmat, dmap, 0, dmap->dm_mapsize, 1231 pt->is_read ? BUS_DMASYNC_POSTREAD : BUS_DMASYNC_POSTWRITE); 1232 bus_dmamap_unload(sc->sc_dmat, dmap); 1233 } 1234 1235 pt->cpl.cdw0 = lemtoh32(&cqe->cdw0); 1236 pt->cpl.flags = lemtoh16(&cqe->flags) & ~NVME_CQE_PHASE; 1237 1238 state->finished = true; 1239 1240 nvme_ccb_put(q, ccb); 1241 } 1242 1243 static bool 1244 nvme_pt_finished(void *cookie) 1245 { 1246 struct nvme_pt_state *state = cookie; 1247 1248 return state->finished; 1249 } 1250 1251 static int 1252 nvme_command_passthrough(struct nvme_softc *sc, struct nvme_pt_command *pt, 1253 uint16_t nsid, struct lwp *l, bool is_adminq) 1254 { 1255 struct nvme_queue *q; 1256 struct nvme_ccb *ccb; 1257 void *buf = NULL; 1258 struct nvme_pt_state state; 1259 int error; 1260 1261 /* limit command size to maximum data transfer size */ 1262 if ((pt->buf == NULL && pt->len > 0) || 1263 (pt->buf != NULL && (pt->len == 0 || pt->len > sc->sc_mdts))) 1264 return EINVAL; 1265 1266 q = is_adminq ? sc->sc_admin_q : nvme_get_q(sc, NULL, true); 1267 ccb = nvme_ccb_get(q, true); 1268 KASSERT(ccb != NULL); 1269 1270 if (pt->buf != NULL) { 1271 KASSERT(pt->len > 0); 1272 buf = kmem_alloc(pt->len, KM_SLEEP); 1273 if (!pt->is_read) { 1274 error = copyin(pt->buf, buf, pt->len); 1275 if (error) 1276 goto kmem_free; 1277 } 1278 error = bus_dmamap_load(sc->sc_dmat, ccb->ccb_dmamap, buf, 1279 pt->len, NULL, 1280 BUS_DMA_WAITOK | 1281 (pt->is_read ? BUS_DMA_READ : BUS_DMA_WRITE)); 1282 if (error) 1283 goto kmem_free; 1284 bus_dmamap_sync(sc->sc_dmat, ccb->ccb_dmamap, 1285 0, ccb->ccb_dmamap->dm_mapsize, 1286 pt->is_read ? BUS_DMASYNC_PREREAD : BUS_DMASYNC_PREWRITE); 1287 } 1288 1289 memset(&state, 0, sizeof(state)); 1290 state.pt = pt; 1291 state.finished = false; 1292 1293 ccb->ccb_done = nvme_pt_done; 1294 ccb->ccb_cookie = &state; 1295 1296 pt->cmd.nsid = nsid; 1297 1298 nvme_q_submit(sc, q, ccb, nvme_pt_fill); 1299 1300 /* wait for completion */ 1301 nvme_q_wait_complete(sc, q, nvme_pt_finished, &state); 1302 KASSERT(state.finished); 1303 1304 error = 0; 1305 1306 if (buf != NULL) { 1307 if (error == 0 && pt->is_read) 1308 error = copyout(buf, pt->buf, pt->len); 1309 kmem_free: 1310 kmem_free(buf, pt->len); 1311 } 1312 1313 return error; 1314 } 1315 1316 static void 1317 nvme_q_submit(struct nvme_softc *sc, struct nvme_queue *q, struct nvme_ccb *ccb, 1318 void (*fill)(struct nvme_queue *, struct nvme_ccb *, void *)) 1319 { 1320 struct nvme_sqe *sqe = NVME_DMA_KVA(q->q_sq_dmamem); 1321 uint32_t tail; 1322 1323 mutex_enter(&q->q_sq_mtx); 1324 tail = q->q_sq_tail; 1325 if (++q->q_sq_tail >= q->q_entries) 1326 q->q_sq_tail = 0; 1327 1328 sqe += tail; 1329 1330 bus_dmamap_sync(sc->sc_dmat, NVME_DMA_MAP(q->q_sq_dmamem), 1331 sizeof(*sqe) * tail, sizeof(*sqe), BUS_DMASYNC_POSTWRITE); 1332 memset(sqe, 0, sizeof(*sqe)); 1333 (*fill)(q, ccb, sqe); 1334 htolem16(&sqe->cid, ccb->ccb_id); 1335 bus_dmamap_sync(sc->sc_dmat, NVME_DMA_MAP(q->q_sq_dmamem), 1336 sizeof(*sqe) * tail, sizeof(*sqe), BUS_DMASYNC_PREWRITE); 1337 1338 nvme_write4(sc, q->q_sqtdbl, q->q_sq_tail); 1339 mutex_exit(&q->q_sq_mtx); 1340 } 1341 1342 struct nvme_poll_state { 1343 struct nvme_sqe s; 1344 struct nvme_cqe c; 1345 void *cookie; 1346 void (*done)(struct nvme_queue *, struct nvme_ccb *, struct nvme_cqe *); 1347 }; 1348 1349 static int 1350 nvme_poll(struct nvme_softc *sc, struct nvme_queue *q, struct nvme_ccb *ccb, 1351 void (*fill)(struct nvme_queue *, struct nvme_ccb *, void *), int timo_sec) 1352 { 1353 struct nvme_poll_state state; 1354 uint16_t flags; 1355 int step = 10; 1356 int maxloop = timo_sec * 1000000 / step; 1357 int error = 0; 1358 1359 memset(&state, 0, sizeof(state)); 1360 (*fill)(q, ccb, &state.s); 1361 1362 state.done = ccb->ccb_done; 1363 state.cookie = ccb->ccb_cookie; 1364 1365 ccb->ccb_done = nvme_poll_done; 1366 ccb->ccb_cookie = &state; 1367 1368 nvme_q_submit(sc, q, ccb, nvme_poll_fill); 1369 while (!ISSET(state.c.flags, htole16(NVME_CQE_PHASE))) { 1370 if (nvme_q_complete(sc, q) == 0) 1371 delay(step); 1372 1373 if (timo_sec >= 0 && --maxloop <= 0) { 1374 error = ETIMEDOUT; 1375 break; 1376 } 1377 } 1378 1379 if (error == 0) { 1380 flags = lemtoh16(&state.c.flags); 1381 return flags & ~NVME_CQE_PHASE; 1382 } else { 1383 /* 1384 * If it succeds later, it would hit ccb which will have been 1385 * already reused for something else. Not good. Cross 1386 * fingers and hope for best. XXX do controller reset? 1387 */ 1388 aprint_error_dev(sc->sc_dev, "polled command timed out\n"); 1389 1390 /* Invoke the callback to clean state anyway */ 1391 struct nvme_cqe cqe; 1392 memset(&cqe, 0, sizeof(cqe)); 1393 ccb->ccb_done(q, ccb, &cqe); 1394 1395 return 1; 1396 } 1397 } 1398 1399 static void 1400 nvme_poll_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot) 1401 { 1402 struct nvme_sqe *sqe = slot; 1403 struct nvme_poll_state *state = ccb->ccb_cookie; 1404 1405 *sqe = state->s; 1406 } 1407 1408 static void 1409 nvme_poll_done(struct nvme_queue *q, struct nvme_ccb *ccb, 1410 struct nvme_cqe *cqe) 1411 { 1412 struct nvme_poll_state *state = ccb->ccb_cookie; 1413 1414 state->c = *cqe; 1415 SET(state->c.flags, htole16(NVME_CQE_PHASE)); 1416 1417 ccb->ccb_cookie = state->cookie; 1418 state->done(q, ccb, &state->c); 1419 } 1420 1421 static void 1422 nvme_sqe_fill(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot) 1423 { 1424 struct nvme_sqe *src = ccb->ccb_cookie; 1425 struct nvme_sqe *dst = slot; 1426 1427 *dst = *src; 1428 } 1429 1430 static void 1431 nvme_empty_done(struct nvme_queue *q, struct nvme_ccb *ccb, 1432 struct nvme_cqe *cqe) 1433 { 1434 } 1435 1436 static int 1437 nvme_q_complete(struct nvme_softc *sc, struct nvme_queue *q) 1438 { 1439 struct nvme_ccb *ccb; 1440 struct nvme_cqe *ring = NVME_DMA_KVA(q->q_cq_dmamem), *cqe; 1441 uint16_t flags; 1442 int rv = 0; 1443 1444 mutex_enter(&q->q_cq_mtx); 1445 1446 nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_POSTREAD); 1447 for (;;) { 1448 cqe = &ring[q->q_cq_head]; 1449 flags = lemtoh16(&cqe->flags); 1450 if ((flags & NVME_CQE_PHASE) != q->q_cq_phase) 1451 break; 1452 1453 ccb = &q->q_ccbs[lemtoh16(&cqe->cid)]; 1454 1455 if (++q->q_cq_head >= q->q_entries) { 1456 q->q_cq_head = 0; 1457 q->q_cq_phase ^= NVME_CQE_PHASE; 1458 } 1459 1460 #ifdef DEBUG 1461 /* 1462 * If we get spurious completion notification, something 1463 * is seriously hosed up. Very likely DMA to some random 1464 * memory place happened, so just bail out. 1465 */ 1466 if ((intptr_t)ccb->ccb_cookie == NVME_CCB_FREE) { 1467 panic("%s: invalid ccb detected", 1468 device_xname(sc->sc_dev)); 1469 /* NOTREACHED */ 1470 } 1471 #endif 1472 1473 rv++; 1474 1475 /* 1476 * Unlock the mutex before calling the ccb_done callback 1477 * and re-lock afterwards. The callback triggers lddone() 1478 * which schedules another i/o, and also calls nvme_ccb_put(). 1479 * Unlock/relock avoids possibility of deadlock. 1480 */ 1481 mutex_exit(&q->q_cq_mtx); 1482 ccb->ccb_done(q, ccb, cqe); 1483 mutex_enter(&q->q_cq_mtx); 1484 } 1485 nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_PREREAD); 1486 1487 if (rv) 1488 nvme_write4(sc, q->q_cqhdbl, q->q_cq_head); 1489 1490 mutex_exit(&q->q_cq_mtx); 1491 1492 return rv; 1493 } 1494 1495 static void 1496 nvme_q_wait_complete(struct nvme_softc *sc, 1497 struct nvme_queue *q, bool (*finished)(void *), void *cookie) 1498 { 1499 mutex_enter(&q->q_ccb_mtx); 1500 if (finished(cookie)) 1501 goto out; 1502 1503 for(;;) { 1504 q->q_ccb_waiting = true; 1505 cv_wait(&q->q_ccb_wait, &q->q_ccb_mtx); 1506 1507 if (finished(cookie)) 1508 break; 1509 } 1510 1511 out: 1512 mutex_exit(&q->q_ccb_mtx); 1513 } 1514 1515 static int 1516 nvme_identify(struct nvme_softc *sc, u_int mps) 1517 { 1518 char sn[41], mn[81], fr[17]; 1519 struct nvm_identify_controller *identify; 1520 struct nvme_dmamem *mem; 1521 struct nvme_ccb *ccb; 1522 u_int mdts; 1523 int rv = 1; 1524 1525 ccb = nvme_ccb_get(sc->sc_admin_q, false); 1526 KASSERT(ccb != NULL); /* it's a bug if we don't have spare ccb here */ 1527 1528 mem = nvme_dmamem_alloc(sc, sizeof(*identify)); 1529 if (mem == NULL) 1530 return 1; 1531 1532 ccb->ccb_done = nvme_empty_done; 1533 ccb->ccb_cookie = mem; 1534 1535 nvme_dmamem_sync(sc, mem, BUS_DMASYNC_PREREAD); 1536 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_fill_identify, 1537 NVME_TIMO_IDENT); 1538 nvme_dmamem_sync(sc, mem, BUS_DMASYNC_POSTREAD); 1539 1540 nvme_ccb_put(sc->sc_admin_q, ccb); 1541 1542 if (rv != 0) 1543 goto done; 1544 1545 identify = NVME_DMA_KVA(mem); 1546 sc->sc_identify = *identify; 1547 identify = NULL; 1548 1549 /* Convert data to host endian */ 1550 nvme_identify_controller_swapbytes(&sc->sc_identify); 1551 1552 strnvisx(sn, sizeof(sn), (const char *)sc->sc_identify.sn, 1553 sizeof(sc->sc_identify.sn), VIS_TRIM|VIS_SAFE|VIS_OCTAL); 1554 strnvisx(mn, sizeof(mn), (const char *)sc->sc_identify.mn, 1555 sizeof(sc->sc_identify.mn), VIS_TRIM|VIS_SAFE|VIS_OCTAL); 1556 strnvisx(fr, sizeof(fr), (const char *)sc->sc_identify.fr, 1557 sizeof(sc->sc_identify.fr), VIS_TRIM|VIS_SAFE|VIS_OCTAL); 1558 aprint_normal_dev(sc->sc_dev, "%s, firmware %s, serial %s\n", mn, fr, 1559 sn); 1560 1561 strlcpy(sc->sc_modelname, mn, sizeof(sc->sc_modelname)); 1562 1563 if (sc->sc_identify.mdts > 0) { 1564 mdts = (1 << sc->sc_identify.mdts) * (1 << mps); 1565 if (mdts < sc->sc_mdts) 1566 sc->sc_mdts = mdts; 1567 } 1568 1569 sc->sc_nn = sc->sc_identify.nn; 1570 1571 done: 1572 nvme_dmamem_free(sc, mem); 1573 1574 return rv; 1575 } 1576 1577 static int 1578 nvme_q_create(struct nvme_softc *sc, struct nvme_queue *q) 1579 { 1580 struct nvme_sqe_q sqe; 1581 struct nvme_ccb *ccb; 1582 int rv; 1583 1584 if (sc->sc_use_mq && sc->sc_intr_establish(sc, q->q_id, q) != 0) 1585 return 1; 1586 1587 ccb = nvme_ccb_get(sc->sc_admin_q, false); 1588 KASSERT(ccb != NULL); 1589 1590 ccb->ccb_done = nvme_empty_done; 1591 ccb->ccb_cookie = &sqe; 1592 1593 memset(&sqe, 0, sizeof(sqe)); 1594 sqe.opcode = NVM_ADMIN_ADD_IOCQ; 1595 htolem64(&sqe.prp1, NVME_DMA_DVA(q->q_cq_dmamem)); 1596 htolem16(&sqe.qsize, q->q_entries - 1); 1597 htolem16(&sqe.qid, q->q_id); 1598 sqe.qflags = NVM_SQE_CQ_IEN | NVM_SQE_Q_PC; 1599 if (sc->sc_use_mq) 1600 htolem16(&sqe.cqid, q->q_id); /* qid == vector */ 1601 1602 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_QOP); 1603 if (rv != 0) 1604 goto fail; 1605 1606 ccb->ccb_done = nvme_empty_done; 1607 ccb->ccb_cookie = &sqe; 1608 1609 memset(&sqe, 0, sizeof(sqe)); 1610 sqe.opcode = NVM_ADMIN_ADD_IOSQ; 1611 htolem64(&sqe.prp1, NVME_DMA_DVA(q->q_sq_dmamem)); 1612 htolem16(&sqe.qsize, q->q_entries - 1); 1613 htolem16(&sqe.qid, q->q_id); 1614 htolem16(&sqe.cqid, q->q_id); 1615 sqe.qflags = NVM_SQE_Q_PC; 1616 1617 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_QOP); 1618 if (rv != 0) 1619 goto fail; 1620 1621 nvme_ccb_put(sc->sc_admin_q, ccb); 1622 return 0; 1623 1624 fail: 1625 if (sc->sc_use_mq) 1626 sc->sc_intr_disestablish(sc, q->q_id); 1627 1628 nvme_ccb_put(sc->sc_admin_q, ccb); 1629 return rv; 1630 } 1631 1632 static int 1633 nvme_q_delete(struct nvme_softc *sc, struct nvme_queue *q) 1634 { 1635 struct nvme_sqe_q sqe; 1636 struct nvme_ccb *ccb; 1637 int rv; 1638 1639 ccb = nvme_ccb_get(sc->sc_admin_q, false); 1640 KASSERT(ccb != NULL); 1641 1642 ccb->ccb_done = nvme_empty_done; 1643 ccb->ccb_cookie = &sqe; 1644 1645 memset(&sqe, 0, sizeof(sqe)); 1646 sqe.opcode = NVM_ADMIN_DEL_IOSQ; 1647 htolem16(&sqe.qid, q->q_id); 1648 1649 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_QOP); 1650 if (rv != 0) 1651 goto fail; 1652 1653 ccb->ccb_done = nvme_empty_done; 1654 ccb->ccb_cookie = &sqe; 1655 1656 memset(&sqe, 0, sizeof(sqe)); 1657 sqe.opcode = NVM_ADMIN_DEL_IOCQ; 1658 htolem16(&sqe.qid, q->q_id); 1659 1660 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_QOP); 1661 if (rv != 0) 1662 goto fail; 1663 1664 fail: 1665 nvme_ccb_put(sc->sc_admin_q, ccb); 1666 1667 if (rv == 0 && sc->sc_use_mq) { 1668 if (sc->sc_intr_disestablish(sc, q->q_id)) 1669 rv = 1; 1670 } 1671 1672 return rv; 1673 } 1674 1675 static void 1676 nvme_fill_identify(struct nvme_queue *q, struct nvme_ccb *ccb, void *slot) 1677 { 1678 struct nvme_sqe *sqe = slot; 1679 struct nvme_dmamem *mem = ccb->ccb_cookie; 1680 1681 sqe->opcode = NVM_ADMIN_IDENTIFY; 1682 htolem64(&sqe->entry.prp[0], NVME_DMA_DVA(mem)); 1683 htolem32(&sqe->cdw10, 1); 1684 } 1685 1686 static int 1687 nvme_set_number_of_queues(struct nvme_softc *sc, u_int nq, u_int *ncqa, 1688 u_int *nsqa) 1689 { 1690 struct nvme_pt_state state; 1691 struct nvme_pt_command pt; 1692 struct nvme_ccb *ccb; 1693 int rv; 1694 1695 ccb = nvme_ccb_get(sc->sc_admin_q, false); 1696 KASSERT(ccb != NULL); /* it's a bug if we don't have spare ccb here */ 1697 1698 memset(&pt, 0, sizeof(pt)); 1699 pt.cmd.opcode = NVM_ADMIN_SET_FEATURES; 1700 pt.cmd.cdw10 = NVM_FEATURE_NUMBER_OF_QUEUES; 1701 pt.cmd.cdw11 = ((nq - 1) << 16) | (nq - 1); 1702 1703 memset(&state, 0, sizeof(state)); 1704 state.pt = &pt; 1705 state.finished = false; 1706 1707 ccb->ccb_done = nvme_pt_done; 1708 ccb->ccb_cookie = &state; 1709 1710 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_pt_fill, NVME_TIMO_QOP); 1711 1712 if (rv != 0) { 1713 *ncqa = *nsqa = 0; 1714 return EIO; 1715 } 1716 1717 *ncqa = (pt.cpl.cdw0 >> 16) + 1; 1718 *nsqa = (pt.cpl.cdw0 & 0xffff) + 1; 1719 1720 return 0; 1721 } 1722 1723 static int 1724 nvme_ccbs_alloc(struct nvme_queue *q, uint16_t nccbs) 1725 { 1726 struct nvme_softc *sc = q->q_sc; 1727 struct nvme_ccb *ccb; 1728 bus_addr_t off; 1729 uint64_t *prpl; 1730 u_int i; 1731 1732 mutex_init(&q->q_ccb_mtx, MUTEX_DEFAULT, IPL_BIO); 1733 cv_init(&q->q_ccb_wait, "nvmeqw"); 1734 q->q_ccb_waiting = false; 1735 SIMPLEQ_INIT(&q->q_ccb_list); 1736 1737 q->q_ccbs = kmem_alloc(sizeof(*ccb) * nccbs, KM_SLEEP); 1738 1739 q->q_nccbs = nccbs; 1740 q->q_ccb_prpls = nvme_dmamem_alloc(sc, 1741 sizeof(*prpl) * sc->sc_max_sgl * nccbs); 1742 1743 prpl = NVME_DMA_KVA(q->q_ccb_prpls); 1744 off = 0; 1745 1746 for (i = 0; i < nccbs; i++) { 1747 ccb = &q->q_ccbs[i]; 1748 1749 if (bus_dmamap_create(sc->sc_dmat, sc->sc_mdts, 1750 sc->sc_max_sgl + 1 /* we get a free prp in the sqe */, 1751 sc->sc_mps, sc->sc_mps, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, 1752 &ccb->ccb_dmamap) != 0) 1753 goto free_maps; 1754 1755 ccb->ccb_id = i; 1756 ccb->ccb_prpl = prpl; 1757 ccb->ccb_prpl_off = off; 1758 ccb->ccb_prpl_dva = NVME_DMA_DVA(q->q_ccb_prpls) + off; 1759 1760 SIMPLEQ_INSERT_TAIL(&q->q_ccb_list, ccb, ccb_entry); 1761 1762 prpl += sc->sc_max_sgl; 1763 off += sizeof(*prpl) * sc->sc_max_sgl; 1764 } 1765 1766 return 0; 1767 1768 free_maps: 1769 nvme_ccbs_free(q); 1770 return 1; 1771 } 1772 1773 static struct nvme_ccb * 1774 nvme_ccb_get(struct nvme_queue *q, bool wait) 1775 { 1776 struct nvme_ccb *ccb = NULL; 1777 1778 mutex_enter(&q->q_ccb_mtx); 1779 again: 1780 ccb = SIMPLEQ_FIRST(&q->q_ccb_list); 1781 if (ccb != NULL) { 1782 SIMPLEQ_REMOVE_HEAD(&q->q_ccb_list, ccb_entry); 1783 #ifdef DEBUG 1784 ccb->ccb_cookie = NULL; 1785 #endif 1786 } else { 1787 if (__predict_false(wait)) { 1788 q->q_ccb_waiting = true; 1789 cv_wait(&q->q_ccb_wait, &q->q_ccb_mtx); 1790 goto again; 1791 } 1792 } 1793 mutex_exit(&q->q_ccb_mtx); 1794 1795 return ccb; 1796 } 1797 1798 static void 1799 nvme_ccb_put(struct nvme_queue *q, struct nvme_ccb *ccb) 1800 { 1801 1802 mutex_enter(&q->q_ccb_mtx); 1803 #ifdef DEBUG 1804 ccb->ccb_cookie = (void *)NVME_CCB_FREE; 1805 #endif 1806 SIMPLEQ_INSERT_HEAD(&q->q_ccb_list, ccb, ccb_entry); 1807 1808 /* It's unlikely there are any waiters, it's not used for regular I/O */ 1809 if (__predict_false(q->q_ccb_waiting)) { 1810 q->q_ccb_waiting = false; 1811 cv_broadcast(&q->q_ccb_wait); 1812 } 1813 1814 mutex_exit(&q->q_ccb_mtx); 1815 } 1816 1817 static void 1818 nvme_ccbs_free(struct nvme_queue *q) 1819 { 1820 struct nvme_softc *sc = q->q_sc; 1821 struct nvme_ccb *ccb; 1822 1823 mutex_enter(&q->q_ccb_mtx); 1824 while ((ccb = SIMPLEQ_FIRST(&q->q_ccb_list)) != NULL) { 1825 SIMPLEQ_REMOVE_HEAD(&q->q_ccb_list, ccb_entry); 1826 /* 1827 * bus_dmamap_destroy() may call vm_map_lock() and rw_enter() 1828 * internally. don't hold spin mutex 1829 */ 1830 mutex_exit(&q->q_ccb_mtx); 1831 bus_dmamap_destroy(sc->sc_dmat, ccb->ccb_dmamap); 1832 mutex_enter(&q->q_ccb_mtx); 1833 } 1834 mutex_exit(&q->q_ccb_mtx); 1835 1836 nvme_dmamem_free(sc, q->q_ccb_prpls); 1837 kmem_free(q->q_ccbs, sizeof(*ccb) * q->q_nccbs); 1838 q->q_ccbs = NULL; 1839 cv_destroy(&q->q_ccb_wait); 1840 mutex_destroy(&q->q_ccb_mtx); 1841 } 1842 1843 static struct nvme_queue * 1844 nvme_q_alloc(struct nvme_softc *sc, uint16_t id, u_int entries, u_int dstrd) 1845 { 1846 struct nvme_queue *q; 1847 1848 q = kmem_alloc(sizeof(*q), KM_SLEEP); 1849 q->q_sc = sc; 1850 q->q_sq_dmamem = nvme_dmamem_alloc(sc, 1851 sizeof(struct nvme_sqe) * entries); 1852 if (q->q_sq_dmamem == NULL) 1853 goto free; 1854 1855 q->q_cq_dmamem = nvme_dmamem_alloc(sc, 1856 sizeof(struct nvme_cqe) * entries); 1857 if (q->q_cq_dmamem == NULL) 1858 goto free_sq; 1859 1860 memset(NVME_DMA_KVA(q->q_sq_dmamem), 0, NVME_DMA_LEN(q->q_sq_dmamem)); 1861 memset(NVME_DMA_KVA(q->q_cq_dmamem), 0, NVME_DMA_LEN(q->q_cq_dmamem)); 1862 1863 mutex_init(&q->q_sq_mtx, MUTEX_DEFAULT, IPL_BIO); 1864 mutex_init(&q->q_cq_mtx, MUTEX_DEFAULT, IPL_BIO); 1865 q->q_sqtdbl = NVME_SQTDBL(id, dstrd); 1866 q->q_cqhdbl = NVME_CQHDBL(id, dstrd); 1867 q->q_id = id; 1868 q->q_entries = entries; 1869 q->q_sq_tail = 0; 1870 q->q_cq_head = 0; 1871 q->q_cq_phase = NVME_CQE_PHASE; 1872 1873 nvme_dmamem_sync(sc, q->q_sq_dmamem, BUS_DMASYNC_PREWRITE); 1874 nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_PREREAD); 1875 1876 /* 1877 * Due to definition of full and empty queue (queue is empty 1878 * when head == tail, full when tail is one less then head), 1879 * we can actually only have (entries - 1) in-flight commands. 1880 */ 1881 if (nvme_ccbs_alloc(q, entries - 1) != 0) { 1882 aprint_error_dev(sc->sc_dev, "unable to allocate ccbs\n"); 1883 goto free_cq; 1884 } 1885 1886 return q; 1887 1888 free_cq: 1889 nvme_dmamem_free(sc, q->q_cq_dmamem); 1890 free_sq: 1891 nvme_dmamem_free(sc, q->q_sq_dmamem); 1892 free: 1893 kmem_free(q, sizeof(*q)); 1894 1895 return NULL; 1896 } 1897 1898 static void 1899 nvme_q_reset(struct nvme_softc *sc, struct nvme_queue *q) 1900 { 1901 1902 memset(NVME_DMA_KVA(q->q_sq_dmamem), 0, NVME_DMA_LEN(q->q_sq_dmamem)); 1903 memset(NVME_DMA_KVA(q->q_cq_dmamem), 0, NVME_DMA_LEN(q->q_cq_dmamem)); 1904 1905 q->q_sqtdbl = NVME_SQTDBL(q->q_id, sc->sc_dstrd); 1906 q->q_cqhdbl = NVME_CQHDBL(q->q_id, sc->sc_dstrd); 1907 1908 q->q_sq_tail = 0; 1909 q->q_cq_head = 0; 1910 q->q_cq_phase = NVME_CQE_PHASE; 1911 1912 nvme_dmamem_sync(sc, q->q_sq_dmamem, BUS_DMASYNC_PREWRITE); 1913 nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_PREREAD); 1914 } 1915 1916 static void 1917 nvme_q_free(struct nvme_softc *sc, struct nvme_queue *q) 1918 { 1919 nvme_ccbs_free(q); 1920 mutex_destroy(&q->q_sq_mtx); 1921 mutex_destroy(&q->q_cq_mtx); 1922 nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_POSTREAD); 1923 nvme_dmamem_sync(sc, q->q_sq_dmamem, BUS_DMASYNC_POSTWRITE); 1924 nvme_dmamem_free(sc, q->q_cq_dmamem); 1925 nvme_dmamem_free(sc, q->q_sq_dmamem); 1926 kmem_free(q, sizeof(*q)); 1927 } 1928 1929 int 1930 nvme_intr(void *xsc) 1931 { 1932 struct nvme_softc *sc = xsc; 1933 1934 /* 1935 * INTx is level triggered, controller deasserts the interrupt only 1936 * when we advance command queue head via write to the doorbell. 1937 * Tell the controller to block the interrupts while we process 1938 * the queue(s). 1939 */ 1940 nvme_write4(sc, NVME_INTMS, 1); 1941 1942 softint_schedule(sc->sc_softih[0]); 1943 1944 /* don't know, might not have been for us */ 1945 return 1; 1946 } 1947 1948 void 1949 nvme_softintr_intx(void *xq) 1950 { 1951 struct nvme_queue *q = xq; 1952 struct nvme_softc *sc = q->q_sc; 1953 1954 nvme_q_complete(sc, sc->sc_admin_q); 1955 if (sc->sc_q != NULL) 1956 nvme_q_complete(sc, sc->sc_q[0]); 1957 1958 /* 1959 * Processing done, tell controller to issue interrupts again. There 1960 * is no race, as NVMe spec requires the controller to maintain state, 1961 * and assert the interrupt whenever there are unacknowledged 1962 * completion queue entries. 1963 */ 1964 nvme_write4(sc, NVME_INTMC, 1); 1965 } 1966 1967 int 1968 nvme_intr_msi(void *xq) 1969 { 1970 struct nvme_queue *q = xq; 1971 1972 KASSERT(q && q->q_sc && q->q_sc->sc_softih 1973 && q->q_sc->sc_softih[q->q_id]); 1974 1975 /* 1976 * MSI/MSI-X are edge triggered, so can handover processing to softint 1977 * without masking the interrupt. 1978 */ 1979 softint_schedule(q->q_sc->sc_softih[q->q_id]); 1980 1981 return 1; 1982 } 1983 1984 void 1985 nvme_softintr_msi(void *xq) 1986 { 1987 struct nvme_queue *q = xq; 1988 struct nvme_softc *sc = q->q_sc; 1989 1990 nvme_q_complete(sc, q); 1991 } 1992 1993 static struct nvme_dmamem * 1994 nvme_dmamem_alloc(struct nvme_softc *sc, size_t size) 1995 { 1996 struct nvme_dmamem *ndm; 1997 int nsegs; 1998 1999 ndm = kmem_zalloc(sizeof(*ndm), KM_SLEEP); 2000 if (ndm == NULL) 2001 return NULL; 2002 2003 ndm->ndm_size = size; 2004 2005 if (bus_dmamap_create(sc->sc_dmat, size, btoc(round_page(size)), size, 0, 2006 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, &ndm->ndm_map) != 0) 2007 goto ndmfree; 2008 2009 if (bus_dmamem_alloc(sc->sc_dmat, size, sc->sc_mps, 0, &ndm->ndm_seg, 2010 1, &nsegs, BUS_DMA_WAITOK) != 0) 2011 goto destroy; 2012 2013 if (bus_dmamem_map(sc->sc_dmat, &ndm->ndm_seg, nsegs, size, 2014 &ndm->ndm_kva, BUS_DMA_WAITOK) != 0) 2015 goto free; 2016 2017 if (bus_dmamap_load(sc->sc_dmat, ndm->ndm_map, ndm->ndm_kva, size, 2018 NULL, BUS_DMA_WAITOK) != 0) 2019 goto unmap; 2020 2021 memset(ndm->ndm_kva, 0, size); 2022 bus_dmamap_sync(sc->sc_dmat, ndm->ndm_map, 0, size, BUS_DMASYNC_PREREAD); 2023 2024 return ndm; 2025 2026 unmap: 2027 bus_dmamem_unmap(sc->sc_dmat, ndm->ndm_kva, size); 2028 free: 2029 bus_dmamem_free(sc->sc_dmat, &ndm->ndm_seg, 1); 2030 destroy: 2031 bus_dmamap_destroy(sc->sc_dmat, ndm->ndm_map); 2032 ndmfree: 2033 kmem_free(ndm, sizeof(*ndm)); 2034 return NULL; 2035 } 2036 2037 static void 2038 nvme_dmamem_sync(struct nvme_softc *sc, struct nvme_dmamem *mem, int ops) 2039 { 2040 bus_dmamap_sync(sc->sc_dmat, NVME_DMA_MAP(mem), 2041 0, NVME_DMA_LEN(mem), ops); 2042 } 2043 2044 void 2045 nvme_dmamem_free(struct nvme_softc *sc, struct nvme_dmamem *ndm) 2046 { 2047 bus_dmamap_unload(sc->sc_dmat, ndm->ndm_map); 2048 bus_dmamem_unmap(sc->sc_dmat, ndm->ndm_kva, ndm->ndm_size); 2049 bus_dmamem_free(sc->sc_dmat, &ndm->ndm_seg, 1); 2050 bus_dmamap_destroy(sc->sc_dmat, ndm->ndm_map); 2051 kmem_free(ndm, sizeof(*ndm)); 2052 } 2053 2054 /* 2055 * ioctl 2056 */ 2057 2058 dev_type_open(nvmeopen); 2059 dev_type_close(nvmeclose); 2060 dev_type_ioctl(nvmeioctl); 2061 2062 const struct cdevsw nvme_cdevsw = { 2063 .d_open = nvmeopen, 2064 .d_close = nvmeclose, 2065 .d_read = noread, 2066 .d_write = nowrite, 2067 .d_ioctl = nvmeioctl, 2068 .d_stop = nostop, 2069 .d_tty = notty, 2070 .d_poll = nopoll, 2071 .d_mmap = nommap, 2072 .d_kqfilter = nokqfilter, 2073 .d_discard = nodiscard, 2074 .d_flag = D_OTHER, 2075 }; 2076 2077 /* 2078 * Accept an open operation on the control device. 2079 */ 2080 int 2081 nvmeopen(dev_t dev, int flag, int mode, struct lwp *l) 2082 { 2083 struct nvme_softc *sc; 2084 int unit = minor(dev) / 0x10000; 2085 int nsid = minor(dev) & 0xffff; 2086 int nsidx; 2087 2088 if ((sc = device_lookup_private(&nvme_cd, unit)) == NULL) 2089 return ENXIO; 2090 if ((sc->sc_flags & NVME_F_ATTACHED) == 0) 2091 return ENXIO; 2092 2093 if (nsid == 0) { 2094 /* controller */ 2095 if (ISSET(sc->sc_flags, NVME_F_OPEN)) 2096 return EBUSY; 2097 SET(sc->sc_flags, NVME_F_OPEN); 2098 } else { 2099 /* namespace */ 2100 nsidx = nsid - 1; 2101 if (nsidx >= sc->sc_nn || sc->sc_namespaces[nsidx].dev == NULL) 2102 return ENXIO; 2103 if (ISSET(sc->sc_namespaces[nsidx].flags, NVME_NS_F_OPEN)) 2104 return EBUSY; 2105 SET(sc->sc_namespaces[nsidx].flags, NVME_NS_F_OPEN); 2106 } 2107 return 0; 2108 } 2109 2110 /* 2111 * Accept the last close on the control device. 2112 */ 2113 int 2114 nvmeclose(dev_t dev, int flag, int mode, struct lwp *l) 2115 { 2116 struct nvme_softc *sc; 2117 int unit = minor(dev) / 0x10000; 2118 int nsid = minor(dev) & 0xffff; 2119 int nsidx; 2120 2121 sc = device_lookup_private(&nvme_cd, unit); 2122 if (sc == NULL) 2123 return ENXIO; 2124 2125 if (nsid == 0) { 2126 /* controller */ 2127 CLR(sc->sc_flags, NVME_F_OPEN); 2128 } else { 2129 /* namespace */ 2130 nsidx = nsid - 1; 2131 if (nsidx >= sc->sc_nn) 2132 return ENXIO; 2133 CLR(sc->sc_namespaces[nsidx].flags, NVME_NS_F_OPEN); 2134 } 2135 2136 return 0; 2137 } 2138 2139 /* 2140 * Handle control operations. 2141 */ 2142 int 2143 nvmeioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) 2144 { 2145 struct nvme_softc *sc; 2146 int unit = minor(dev) / 0x10000; 2147 int nsid = minor(dev) & 0xffff; 2148 struct nvme_pt_command *pt; 2149 2150 sc = device_lookup_private(&nvme_cd, unit); 2151 if (sc == NULL) 2152 return ENXIO; 2153 2154 switch (cmd) { 2155 case NVME_PASSTHROUGH_CMD: 2156 pt = data; 2157 return nvme_command_passthrough(sc, data, 2158 nsid == 0 ? pt->cmd.nsid : nsid, l, nsid == 0); 2159 } 2160 2161 return ENOTTY; 2162 } 2163