1 /* $OpenBSD: nvme.c,v 1.124 2024/10/08 19:41:23 kettenis Exp $ */ 2 3 /* 4 * Copyright (c) 2014 David Gwynne <dlg@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include "bio.h" 20 21 #include <sys/param.h> 22 #include <sys/ioctl.h> 23 #include <sys/systm.h> 24 #include <sys/buf.h> 25 #include <sys/kernel.h> 26 #include <sys/malloc.h> 27 #include <sys/device.h> 28 #include <sys/queue.h> 29 #include <sys/mutex.h> 30 #include <sys/pool.h> 31 #include <sys/disk.h> 32 33 #include <sys/atomic.h> 34 35 #include <machine/bus.h> 36 37 #include <scsi/scsi_all.h> 38 #include <scsi/scsi_disk.h> 39 #include <scsi/scsiconf.h> 40 #include <scsi/sdvar.h> 41 42 #include <dev/biovar.h> 43 #include <dev/ic/nvmereg.h> 44 #include <dev/ic/nvmevar.h> 45 #include <dev/ic/nvmeio.h> 46 47 struct cfdriver nvme_cd = { 48 NULL, 49 "nvme", 50 DV_DULL 51 }; 52 53 int nvme_ready(struct nvme_softc *, u_int32_t); 54 int nvme_enable(struct nvme_softc *); 55 int nvme_disable(struct nvme_softc *); 56 int nvme_shutdown(struct nvme_softc *); 57 int nvme_resume(struct nvme_softc *); 58 59 void nvme_dumpregs(struct nvme_softc *); 60 int nvme_identify(struct nvme_softc *, u_int); 61 void nvme_fill_identify(struct nvme_softc *, struct nvme_ccb *, void *); 62 63 #ifndef SMALL_KERNEL 64 void nvme_refresh_sensors(void *); 65 #endif 66 67 int nvme_ccbs_alloc(struct nvme_softc *, u_int); 68 void nvme_ccbs_free(struct nvme_softc *, u_int); 69 70 void * nvme_ccb_get(void *); 71 void nvme_ccb_put(void *, void *); 72 73 int nvme_poll(struct nvme_softc *, struct nvme_queue *, struct nvme_ccb *, 74 void (*)(struct nvme_softc *, struct nvme_ccb *, void *), u_int32_t); 75 void nvme_poll_fill(struct nvme_softc *, struct nvme_ccb *, void *); 76 void nvme_poll_done(struct nvme_softc *, struct nvme_ccb *, 77 struct nvme_cqe *); 78 void nvme_sqe_fill(struct nvme_softc *, struct nvme_ccb *, void *); 79 void nvme_empty_done(struct nvme_softc *, struct nvme_ccb *, 80 struct nvme_cqe *); 81 82 struct nvme_queue * 83 nvme_q_alloc(struct nvme_softc *, u_int16_t, u_int, u_int); 84 int nvme_q_create(struct nvme_softc *, struct nvme_queue *); 85 int nvme_q_reset(struct nvme_softc *, struct nvme_queue *); 86 int nvme_q_delete(struct nvme_softc *, struct nvme_queue *); 87 void nvme_q_submit(struct nvme_softc *, 88 struct nvme_queue *, struct nvme_ccb *, 89 void (*)(struct nvme_softc *, struct nvme_ccb *, void *)); 90 int nvme_q_complete(struct nvme_softc *, struct nvme_queue *); 91 void nvme_q_free(struct nvme_softc *, struct nvme_queue *); 92 93 void nvme_scsi_cmd(struct scsi_xfer *); 94 void nvme_minphys(struct buf *, struct scsi_link *); 95 int nvme_scsi_probe(struct scsi_link *); 96 void nvme_scsi_free(struct scsi_link *); 97 uint64_t nvme_scsi_size(const struct nvm_identify_namespace *); 98 int nvme_scsi_ioctl(struct scsi_link *, u_long, caddr_t, int); 99 int nvme_passthrough_cmd(struct nvme_softc *, struct nvme_pt_cmd *, 100 int, int); 101 102 #ifdef HIBERNATE 103 #include <uvm/uvm_extern.h> 104 #include <sys/hibernate.h> 105 #include <sys/disklabel.h> 106 107 int nvme_hibernate_io(dev_t, daddr_t, vaddr_t, size_t, int, void *); 108 #endif 109 110 #if NBIO > 0 111 void nvme_bio_status(struct bio_status *, const char *, ...); 112 113 const char *nvme_bioctl_sdname(const struct nvme_softc *, int); 114 115 int nvme_bioctl(struct device *, u_long, caddr_t); 116 int nvme_bioctl_inq(struct nvme_softc *, struct bioc_inq *); 117 int nvme_bioctl_vol(struct nvme_softc *, struct bioc_vol *); 118 int nvme_bioctl_disk(struct nvme_softc *, struct bioc_disk *); 119 #endif /* NBIO > 0 */ 120 121 const struct scsi_adapter nvme_switch = { 122 nvme_scsi_cmd, nvme_minphys, nvme_scsi_probe, nvme_scsi_free, 123 nvme_scsi_ioctl 124 }; 125 126 void nvme_scsi_io(struct scsi_xfer *, int); 127 void nvme_scsi_io_fill(struct nvme_softc *, struct nvme_ccb *, void *); 128 void nvme_scsi_io_done(struct nvme_softc *, struct nvme_ccb *, 129 struct nvme_cqe *); 130 131 void nvme_scsi_sync(struct scsi_xfer *); 132 void nvme_scsi_sync_fill(struct nvme_softc *, struct nvme_ccb *, void *); 133 void nvme_scsi_sync_done(struct nvme_softc *, struct nvme_ccb *, 134 struct nvme_cqe *); 135 136 void nvme_scsi_inq(struct scsi_xfer *); 137 void nvme_scsi_inquiry(struct scsi_xfer *); 138 void nvme_scsi_capacity16(struct scsi_xfer *); 139 void nvme_scsi_capacity(struct scsi_xfer *); 140 141 uint32_t nvme_op_sq_enter(struct nvme_softc *, 142 struct nvme_queue *, struct nvme_ccb *); 143 void nvme_op_sq_leave(struct nvme_softc *, 144 struct nvme_queue *, struct nvme_ccb *); 145 uint32_t nvme_op_sq_enter_locked(struct nvme_softc *, 146 struct nvme_queue *, struct nvme_ccb *); 147 void nvme_op_sq_leave_locked(struct nvme_softc *, 148 struct nvme_queue *, struct nvme_ccb *); 149 150 void nvme_op_cq_done(struct nvme_softc *, 151 struct nvme_queue *, struct nvme_ccb *); 152 153 static const struct nvme_ops nvme_ops = { 154 .op_sq_enter = nvme_op_sq_enter, 155 .op_sq_leave = nvme_op_sq_leave, 156 .op_sq_enter_locked = nvme_op_sq_enter_locked, 157 .op_sq_leave_locked = nvme_op_sq_leave_locked, 158 159 .op_cq_done = nvme_op_cq_done, 160 }; 161 162 #define NVME_TIMO_QOP 5000 /* ms to create/delete queue */ 163 #define NVME_TIMO_PT 5000 /* ms to complete passthrough */ 164 #define NVME_TIMO_IDENT 10000 /* ms to probe/identify */ 165 #define NVME_TIMO_LOG_PAGE 5000 /* ms to read log pages */ 166 #define NVME_TIMO_DELAYNS 10 /* ns to delay() in poll loop */ 167 168 /* 169 * Some controllers, at least Apple NVMe, always require split 170 * transfers, so don't use bus_space_{read,write}_8() on LP64. 171 */ 172 u_int64_t 173 nvme_read8(struct nvme_softc *sc, bus_size_t r) 174 { 175 u_int64_t v; 176 177 v = (u_int64_t)nvme_read4(sc, r) | 178 (u_int64_t)nvme_read4(sc, r + 4) << 32; 179 180 return (v); 181 } 182 183 void 184 nvme_write8(struct nvme_softc *sc, bus_size_t r, u_int64_t v) 185 { 186 nvme_write4(sc, r, v); 187 nvme_write4(sc, r + 4, v >> 32); 188 } 189 190 void 191 nvme_dumpregs(struct nvme_softc *sc) 192 { 193 u_int64_t r8; 194 u_int32_t r4; 195 196 r8 = nvme_read8(sc, NVME_CAP); 197 printf("%s: cap 0x%016llx\n", DEVNAME(sc), nvme_read8(sc, NVME_CAP)); 198 printf("%s: mpsmax %u (%u)\n", DEVNAME(sc), 199 (u_int)NVME_CAP_MPSMAX(r8), (1 << NVME_CAP_MPSMAX(r8))); 200 printf("%s: mpsmin %u (%u)\n", DEVNAME(sc), 201 (u_int)NVME_CAP_MPSMIN(r8), (1 << NVME_CAP_MPSMIN(r8))); 202 printf("%s: css %llu\n", DEVNAME(sc), NVME_CAP_CSS(r8)); 203 printf("%s: nssrs %llu\n", DEVNAME(sc), NVME_CAP_NSSRS(r8)); 204 printf("%s: dstrd %u\n", DEVNAME(sc), NVME_CAP_DSTRD(r8)); 205 printf("%s: to %llu msec\n", DEVNAME(sc), NVME_CAP_TO(r8)); 206 printf("%s: ams %llu\n", DEVNAME(sc), NVME_CAP_AMS(r8)); 207 printf("%s: cqr %llu\n", DEVNAME(sc), NVME_CAP_CQR(r8)); 208 printf("%s: mqes %llu\n", DEVNAME(sc), NVME_CAP_MQES(r8)); 209 210 printf("%s: vs 0x%04x\n", DEVNAME(sc), nvme_read4(sc, NVME_VS)); 211 212 r4 = nvme_read4(sc, NVME_CC); 213 printf("%s: cc 0x%04x\n", DEVNAME(sc), r4); 214 printf("%s: iocqes %u\n", DEVNAME(sc), NVME_CC_IOCQES_R(r4)); 215 printf("%s: iosqes %u\n", DEVNAME(sc), NVME_CC_IOSQES_R(r4)); 216 printf("%s: shn %u\n", DEVNAME(sc), NVME_CC_SHN_R(r4)); 217 printf("%s: ams %u\n", DEVNAME(sc), NVME_CC_AMS_R(r4)); 218 printf("%s: mps %u\n", DEVNAME(sc), NVME_CC_MPS_R(r4)); 219 printf("%s: css %u\n", DEVNAME(sc), NVME_CC_CSS_R(r4)); 220 printf("%s: en %u\n", DEVNAME(sc), ISSET(r4, NVME_CC_EN)); 221 222 printf("%s: csts 0x%08x\n", DEVNAME(sc), nvme_read4(sc, NVME_CSTS)); 223 printf("%s: aqa 0x%08x\n", DEVNAME(sc), nvme_read4(sc, NVME_AQA)); 224 printf("%s: asq 0x%016llx\n", DEVNAME(sc), nvme_read8(sc, NVME_ASQ)); 225 printf("%s: acq 0x%016llx\n", DEVNAME(sc), nvme_read8(sc, NVME_ACQ)); 226 } 227 228 int 229 nvme_ready(struct nvme_softc *sc, u_int32_t rdy) 230 { 231 u_int i = 0; 232 233 while ((nvme_read4(sc, NVME_CSTS) & NVME_CSTS_RDY) != rdy) { 234 if (i++ > sc->sc_rdy_to) 235 return (1); 236 237 delay(1000); 238 nvme_barrier(sc, NVME_CSTS, 4, BUS_SPACE_BARRIER_READ); 239 } 240 241 return (0); 242 } 243 244 int 245 nvme_enable(struct nvme_softc *sc) 246 { 247 u_int32_t cc; 248 249 cc = nvme_read4(sc, NVME_CC); 250 if (ISSET(cc, NVME_CC_EN)) 251 return (nvme_ready(sc, NVME_CSTS_RDY)); 252 253 if (sc->sc_ops->op_enable != NULL) 254 sc->sc_ops->op_enable(sc); 255 256 nvme_write4(sc, NVME_AQA, NVME_AQA_ACQS(sc->sc_admin_q->q_entries) | 257 NVME_AQA_ASQS(sc->sc_admin_q->q_entries)); 258 nvme_barrier(sc, 0, sc->sc_ios, BUS_SPACE_BARRIER_WRITE); 259 260 nvme_write8(sc, NVME_ASQ, NVME_DMA_DVA(sc->sc_admin_q->q_sq_dmamem)); 261 nvme_barrier(sc, 0, sc->sc_ios, BUS_SPACE_BARRIER_WRITE); 262 nvme_write8(sc, NVME_ACQ, NVME_DMA_DVA(sc->sc_admin_q->q_cq_dmamem)); 263 nvme_barrier(sc, 0, sc->sc_ios, BUS_SPACE_BARRIER_WRITE); 264 265 CLR(cc, NVME_CC_IOCQES_MASK | NVME_CC_IOSQES_MASK | NVME_CC_SHN_MASK | 266 NVME_CC_AMS_MASK | NVME_CC_MPS_MASK | NVME_CC_CSS_MASK); 267 SET(cc, NVME_CC_IOSQES(6)); /* Submission queue size == 2**6 (64) */ 268 SET(cc, NVME_CC_IOCQES(4)); /* Completion queue size == 2**4 (16) */ 269 SET(cc, NVME_CC_SHN(NVME_CC_SHN_NONE)); 270 SET(cc, NVME_CC_CSS(NVME_CC_CSS_NVM)); 271 SET(cc, NVME_CC_AMS(NVME_CC_AMS_RR)); 272 SET(cc, NVME_CC_MPS(ffs(sc->sc_mps) - 1)); 273 SET(cc, NVME_CC_EN); 274 275 nvme_write4(sc, NVME_CC, cc); 276 nvme_barrier(sc, 0, sc->sc_ios, 277 BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); 278 279 return (nvme_ready(sc, NVME_CSTS_RDY)); 280 } 281 282 int 283 nvme_disable(struct nvme_softc *sc) 284 { 285 u_int32_t cc, csts; 286 287 cc = nvme_read4(sc, NVME_CC); 288 if (ISSET(cc, NVME_CC_EN)) { 289 csts = nvme_read4(sc, NVME_CSTS); 290 if (!ISSET(csts, NVME_CSTS_CFS) && 291 nvme_ready(sc, NVME_CSTS_RDY) != 0) 292 return (1); 293 } 294 295 CLR(cc, NVME_CC_EN); 296 297 nvme_write4(sc, NVME_CC, cc); 298 nvme_barrier(sc, 0, sc->sc_ios, 299 BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); 300 301 return (nvme_ready(sc, 0)); 302 } 303 304 int 305 nvme_attach(struct nvme_softc *sc) 306 { 307 struct scsibus_attach_args saa; 308 u_int64_t cap; 309 u_int32_t reg; 310 u_int nccbs = 0; 311 312 mtx_init(&sc->sc_ccb_mtx, IPL_BIO); 313 rw_init(&sc->sc_lock, "nvme_lock"); 314 SIMPLEQ_INIT(&sc->sc_ccb_list); 315 scsi_iopool_init(&sc->sc_iopool, sc, nvme_ccb_get, nvme_ccb_put); 316 if (sc->sc_ops == NULL) 317 sc->sc_ops = &nvme_ops; 318 if (sc->sc_openings == 0) 319 sc->sc_openings = 64; 320 321 reg = nvme_read4(sc, NVME_VS); 322 if (reg == 0xffffffff) { 323 printf("invalid mapping\n"); 324 return (1); 325 } 326 327 printf("NVMe %d.%d\n", NVME_VS_MJR(reg), NVME_VS_MNR(reg)); 328 329 cap = nvme_read8(sc, NVME_CAP); 330 sc->sc_dstrd = NVME_CAP_DSTRD(cap); 331 if (NVME_CAP_MPSMIN(cap) > PAGE_SHIFT) { 332 printf("%s: NVMe minimum page size %u " 333 "is greater than CPU page size %u\n", DEVNAME(sc), 334 1 << NVME_CAP_MPSMIN(cap), 1 << PAGE_SHIFT); 335 return (1); 336 } 337 if (NVME_CAP_MPSMAX(cap) < PAGE_SHIFT) 338 sc->sc_mps = 1 << NVME_CAP_MPSMAX(cap); 339 else 340 sc->sc_mps = 1 << PAGE_SHIFT; 341 342 sc->sc_rdy_to = NVME_CAP_TO(cap); 343 sc->sc_mdts = MAXPHYS; 344 sc->sc_max_prpl = sc->sc_mdts / sc->sc_mps; 345 346 if (nvme_disable(sc) != 0) { 347 printf("%s: unable to disable controller\n", DEVNAME(sc)); 348 return (1); 349 } 350 351 sc->sc_admin_q = nvme_q_alloc(sc, NVME_ADMIN_Q, 128, sc->sc_dstrd); 352 if (sc->sc_admin_q == NULL) { 353 printf("%s: unable to allocate admin queue\n", DEVNAME(sc)); 354 return (1); 355 } 356 357 if (nvme_ccbs_alloc(sc, 16) != 0) { 358 printf("%s: unable to allocate initial ccbs\n", DEVNAME(sc)); 359 goto free_admin_q; 360 } 361 nccbs = 16; 362 363 if (nvme_enable(sc) != 0) { 364 printf("%s: unable to enable controller\n", DEVNAME(sc)); 365 goto free_ccbs; 366 } 367 368 if (nvme_identify(sc, NVME_CAP_MPSMIN(cap)) != 0) { 369 printf("%s: unable to identify controller\n", DEVNAME(sc)); 370 goto disable; 371 } 372 373 /* We now know the real values of sc_mdts and sc_max_prpl. */ 374 nvme_ccbs_free(sc, nccbs); 375 if (nvme_ccbs_alloc(sc, 64) != 0) { 376 printf("%s: unable to allocate ccbs\n", DEVNAME(sc)); 377 goto free_admin_q; 378 } 379 nccbs = 64; 380 381 sc->sc_q = nvme_q_alloc(sc, NVME_IO_Q, 128, sc->sc_dstrd); 382 if (sc->sc_q == NULL) { 383 printf("%s: unable to allocate io q\n", DEVNAME(sc)); 384 goto disable; 385 } 386 387 if (nvme_q_create(sc, sc->sc_q) != 0) { 388 printf("%s: unable to create io q\n", DEVNAME(sc)); 389 goto free_q; 390 } 391 392 #ifdef HIBERNATE 393 sc->sc_hib_q = nvme_q_alloc(sc, NVME_HIB_Q, 4, sc->sc_dstrd); 394 if (sc->sc_hib_q == NULL) { 395 printf("%s: unable to allocate hibernate io queue\n", DEVNAME(sc)); 396 goto free_q; 397 } 398 #endif 399 400 nvme_write4(sc, NVME_INTMC, 1); 401 402 sc->sc_namespaces = mallocarray(sc->sc_nn + 1, 403 sizeof(*sc->sc_namespaces), M_DEVBUF, M_WAITOK|M_ZERO); 404 405 saa.saa_adapter = &nvme_switch; 406 saa.saa_adapter_softc = sc; 407 saa.saa_adapter_buswidth = sc->sc_nn + 1; 408 saa.saa_luns = 1; 409 saa.saa_adapter_target = 0; 410 saa.saa_openings = sc->sc_openings; 411 saa.saa_pool = &sc->sc_iopool; 412 saa.saa_quirks = saa.saa_flags = 0; 413 saa.saa_wwpn = saa.saa_wwnn = 0; 414 415 strlcpy(sc->sc_sensordev.xname, DEVNAME(sc), sizeof(sc->sc_sensordev.xname)); 416 417 #ifndef SMALL_KERNEL 418 sc->sc_temp_sensor.type = SENSOR_TEMP; 419 sc->sc_temp_sensor.status = SENSOR_S_UNKNOWN; 420 sensor_attach(&sc->sc_sensordev, &sc->sc_temp_sensor); 421 422 sc->sc_usage_sensor.type = SENSOR_PERCENT; 423 sc->sc_usage_sensor.status = SENSOR_S_UNKNOWN; 424 strlcpy(sc->sc_usage_sensor.desc, "endurance used", 425 sizeof(sc->sc_usage_sensor.desc)); 426 sensor_attach(&sc->sc_sensordev, &sc->sc_usage_sensor); 427 428 sc->sc_spare_sensor.type = SENSOR_PERCENT; 429 sc->sc_spare_sensor.status = SENSOR_S_UNKNOWN; 430 strlcpy(sc->sc_spare_sensor.desc, "available spare", 431 sizeof(sc->sc_spare_sensor.desc)); 432 sensor_attach(&sc->sc_sensordev, &sc->sc_spare_sensor); 433 434 if (sensor_task_register(sc, nvme_refresh_sensors, 60) == NULL) 435 goto free_q; 436 437 sensordev_install(&sc->sc_sensordev); 438 #endif 439 440 sc->sc_scsibus = (struct scsibus_softc *)config_found(&sc->sc_dev, 441 &saa, scsiprint); 442 #if NBIO > 0 443 if (bio_register(&sc->sc_dev, nvme_bioctl) != 0) 444 printf("%s: unable to register bioctl\n", DEVNAME(sc)); 445 #endif /* NBIO > 0 */ 446 447 return (0); 448 449 free_q: 450 nvme_q_free(sc, sc->sc_q); 451 disable: 452 nvme_disable(sc); 453 free_ccbs: 454 nvme_ccbs_free(sc, nccbs); 455 free_admin_q: 456 nvme_q_free(sc, sc->sc_admin_q); 457 458 return (1); 459 } 460 461 int 462 nvme_resume(struct nvme_softc *sc) 463 { 464 if (nvme_disable(sc) != 0) { 465 printf("%s: unable to disable controller\n", DEVNAME(sc)); 466 return (1); 467 } 468 469 if (nvme_q_reset(sc, sc->sc_admin_q) != 0) { 470 printf("%s: unable to reset admin queue\n", DEVNAME(sc)); 471 return (1); 472 } 473 474 if (nvme_enable(sc) != 0) { 475 printf("%s: unable to enable controller\n", DEVNAME(sc)); 476 return (1); 477 } 478 479 sc->sc_q = nvme_q_alloc(sc, NVME_IO_Q, 128, sc->sc_dstrd); 480 if (sc->sc_q == NULL) { 481 printf("%s: unable to allocate io q\n", DEVNAME(sc)); 482 goto disable; 483 } 484 485 if (nvme_q_create(sc, sc->sc_q) != 0) { 486 printf("%s: unable to create io q\n", DEVNAME(sc)); 487 goto free_q; 488 } 489 490 nvme_write4(sc, NVME_INTMC, 1); 491 492 return (0); 493 494 free_q: 495 nvme_q_free(sc, sc->sc_q); 496 disable: 497 nvme_disable(sc); 498 499 return (1); 500 } 501 502 int 503 nvme_scsi_probe(struct scsi_link *link) 504 { 505 struct nvme_softc *sc = link->bus->sb_adapter_softc; 506 struct nvme_sqe sqe; 507 struct nvm_identify_namespace *identify; 508 struct nvme_dmamem *mem; 509 struct nvme_ccb *ccb; 510 int rv; 511 512 ccb = scsi_io_get(&sc->sc_iopool, 0); 513 KASSERT(ccb != NULL); 514 515 mem = nvme_dmamem_alloc(sc, sizeof(*identify)); 516 if (mem == NULL) 517 return (ENOMEM); 518 519 memset(&sqe, 0, sizeof(sqe)); 520 sqe.opcode = NVM_ADMIN_IDENTIFY; 521 htolem32(&sqe.nsid, link->target); 522 htolem64(&sqe.entry.prp[0], NVME_DMA_DVA(mem)); 523 htolem32(&sqe.cdw10, 0); 524 525 ccb->ccb_done = nvme_empty_done; 526 ccb->ccb_cookie = &sqe; 527 528 nvme_dmamem_sync(sc, mem, BUS_DMASYNC_PREREAD); 529 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_IDENT); 530 nvme_dmamem_sync(sc, mem, BUS_DMASYNC_POSTREAD); 531 532 scsi_io_put(&sc->sc_iopool, ccb); 533 534 identify = NVME_DMA_KVA(mem); 535 if (rv == 0) { 536 if (nvme_scsi_size(identify) > 0) { 537 /* Commit namespace if it has a size greater than zero. */ 538 identify = malloc(sizeof(*identify), M_DEVBUF, M_WAITOK); 539 memcpy(identify, NVME_DMA_KVA(mem), sizeof(*identify)); 540 sc->sc_namespaces[link->target].ident = identify; 541 } else { 542 /* Don't attach a namespace if its size is zero. */ 543 rv = ENXIO; 544 } 545 } 546 547 nvme_dmamem_free(sc, mem); 548 549 return (rv); 550 } 551 552 int 553 nvme_shutdown(struct nvme_softc *sc) 554 { 555 u_int32_t cc, csts; 556 int i; 557 558 nvme_write4(sc, NVME_INTMC, 0); 559 560 if (nvme_q_delete(sc, sc->sc_q) != 0) { 561 printf("%s: unable to delete q, disabling\n", DEVNAME(sc)); 562 goto disable; 563 } 564 565 cc = nvme_read4(sc, NVME_CC); 566 CLR(cc, NVME_CC_SHN_MASK); 567 SET(cc, NVME_CC_SHN(NVME_CC_SHN_NORMAL)); 568 nvme_write4(sc, NVME_CC, cc); 569 570 for (i = 0; i < 4000; i++) { 571 nvme_barrier(sc, 0, sc->sc_ios, 572 BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); 573 csts = nvme_read4(sc, NVME_CSTS); 574 if ((csts & NVME_CSTS_SHST_MASK) == NVME_CSTS_SHST_DONE) 575 return (0); 576 577 delay(1000); 578 } 579 580 printf("%s: unable to shutdown, disabling\n", DEVNAME(sc)); 581 582 disable: 583 nvme_disable(sc); 584 return (0); 585 } 586 587 int 588 nvme_activate(struct nvme_softc *sc, int act) 589 { 590 int rv; 591 592 switch (act) { 593 case DVACT_POWERDOWN: 594 rv = config_activate_children(&sc->sc_dev, act); 595 nvme_shutdown(sc); 596 break; 597 case DVACT_RESUME: 598 rv = nvme_resume(sc); 599 if (rv == 0) 600 rv = config_activate_children(&sc->sc_dev, act); 601 break; 602 default: 603 rv = config_activate_children(&sc->sc_dev, act); 604 break; 605 } 606 607 return (rv); 608 } 609 610 void 611 nvme_scsi_cmd(struct scsi_xfer *xs) 612 { 613 switch (xs->cmd.opcode) { 614 case READ_COMMAND: 615 case READ_10: 616 case READ_12: 617 case READ_16: 618 nvme_scsi_io(xs, SCSI_DATA_IN); 619 return; 620 case WRITE_COMMAND: 621 case WRITE_10: 622 case WRITE_12: 623 case WRITE_16: 624 nvme_scsi_io(xs, SCSI_DATA_OUT); 625 return; 626 627 case SYNCHRONIZE_CACHE: 628 nvme_scsi_sync(xs); 629 return; 630 631 case INQUIRY: 632 nvme_scsi_inq(xs); 633 return; 634 case READ_CAPACITY_16: 635 nvme_scsi_capacity16(xs); 636 return; 637 case READ_CAPACITY: 638 nvme_scsi_capacity(xs); 639 return; 640 641 case TEST_UNIT_READY: 642 case PREVENT_ALLOW: 643 case START_STOP: 644 xs->error = XS_NOERROR; 645 scsi_done(xs); 646 return; 647 648 default: 649 break; 650 } 651 652 xs->error = XS_DRIVER_STUFFUP; 653 scsi_done(xs); 654 } 655 656 void 657 nvme_minphys(struct buf *bp, struct scsi_link *link) 658 { 659 struct nvme_softc *sc = link->bus->sb_adapter_softc; 660 661 if (bp->b_bcount > sc->sc_mdts) 662 bp->b_bcount = sc->sc_mdts; 663 } 664 665 void 666 nvme_scsi_io(struct scsi_xfer *xs, int dir) 667 { 668 struct scsi_link *link = xs->sc_link; 669 struct nvme_softc *sc = link->bus->sb_adapter_softc; 670 struct nvme_ccb *ccb = xs->io; 671 bus_dmamap_t dmap = ccb->ccb_dmamap; 672 int i; 673 674 if ((xs->flags & (SCSI_DATA_IN|SCSI_DATA_OUT)) != dir) 675 goto stuffup; 676 677 ccb->ccb_done = nvme_scsi_io_done; 678 ccb->ccb_cookie = xs; 679 680 if (bus_dmamap_load(sc->sc_dmat, dmap, 681 xs->data, xs->datalen, NULL, ISSET(xs->flags, SCSI_NOSLEEP) ? 682 BUS_DMA_NOWAIT : BUS_DMA_WAITOK) != 0) 683 goto stuffup; 684 685 bus_dmamap_sync(sc->sc_dmat, dmap, 0, dmap->dm_mapsize, 686 ISSET(xs->flags, SCSI_DATA_IN) ? 687 BUS_DMASYNC_PREREAD : BUS_DMASYNC_PREWRITE); 688 689 if (dmap->dm_nsegs > 2) { 690 for (i = 1; i < dmap->dm_nsegs; i++) { 691 htolem64(&ccb->ccb_prpl[i - 1], 692 dmap->dm_segs[i].ds_addr); 693 } 694 bus_dmamap_sync(sc->sc_dmat, 695 NVME_DMA_MAP(sc->sc_ccb_prpls), 696 ccb->ccb_prpl_off, 697 sizeof(*ccb->ccb_prpl) * (dmap->dm_nsegs - 1), 698 BUS_DMASYNC_PREWRITE); 699 } 700 701 if (ISSET(xs->flags, SCSI_POLL)) { 702 nvme_poll(sc, sc->sc_q, ccb, nvme_scsi_io_fill, xs->timeout); 703 return; 704 } 705 706 nvme_q_submit(sc, sc->sc_q, ccb, nvme_scsi_io_fill); 707 return; 708 709 stuffup: 710 xs->error = XS_DRIVER_STUFFUP; 711 scsi_done(xs); 712 } 713 714 void 715 nvme_scsi_io_fill(struct nvme_softc *sc, struct nvme_ccb *ccb, void *slot) 716 { 717 struct nvme_sqe_io *sqe = slot; 718 struct scsi_xfer *xs = ccb->ccb_cookie; 719 struct scsi_link *link = xs->sc_link; 720 bus_dmamap_t dmap = ccb->ccb_dmamap; 721 u_int64_t lba; 722 u_int32_t blocks; 723 724 scsi_cmd_rw_decode(&xs->cmd, &lba, &blocks); 725 726 sqe->opcode = ISSET(xs->flags, SCSI_DATA_IN) ? 727 NVM_CMD_READ : NVM_CMD_WRITE; 728 htolem32(&sqe->nsid, link->target); 729 730 htolem64(&sqe->entry.prp[0], dmap->dm_segs[0].ds_addr); 731 switch (dmap->dm_nsegs) { 732 case 1: 733 break; 734 case 2: 735 htolem64(&sqe->entry.prp[1], dmap->dm_segs[1].ds_addr); 736 break; 737 default: 738 /* the prp list is already set up and synced */ 739 htolem64(&sqe->entry.prp[1], ccb->ccb_prpl_dva); 740 break; 741 } 742 743 htolem64(&sqe->slba, lba); 744 htolem16(&sqe->nlb, blocks - 1); 745 } 746 747 void 748 nvme_scsi_io_done(struct nvme_softc *sc, struct nvme_ccb *ccb, 749 struct nvme_cqe *cqe) 750 { 751 struct scsi_xfer *xs = ccb->ccb_cookie; 752 bus_dmamap_t dmap = ccb->ccb_dmamap; 753 u_int16_t flags; 754 755 if (dmap->dm_nsegs > 2) { 756 bus_dmamap_sync(sc->sc_dmat, 757 NVME_DMA_MAP(sc->sc_ccb_prpls), 758 ccb->ccb_prpl_off, 759 sizeof(*ccb->ccb_prpl) * (dmap->dm_nsegs - 1), 760 BUS_DMASYNC_POSTWRITE); 761 } 762 763 bus_dmamap_sync(sc->sc_dmat, dmap, 0, dmap->dm_mapsize, 764 ISSET(xs->flags, SCSI_DATA_IN) ? 765 BUS_DMASYNC_POSTREAD : BUS_DMASYNC_POSTWRITE); 766 767 bus_dmamap_unload(sc->sc_dmat, dmap); 768 769 flags = lemtoh16(&cqe->flags); 770 771 xs->error = (NVME_CQE_SC(flags) == NVME_CQE_SC_SUCCESS) ? 772 XS_NOERROR : XS_DRIVER_STUFFUP; 773 xs->status = SCSI_OK; 774 xs->resid = 0; 775 scsi_done(xs); 776 } 777 778 void 779 nvme_scsi_sync(struct scsi_xfer *xs) 780 { 781 struct scsi_link *link = xs->sc_link; 782 struct nvme_softc *sc = link->bus->sb_adapter_softc; 783 struct nvme_ccb *ccb = xs->io; 784 785 ccb->ccb_done = nvme_scsi_sync_done; 786 ccb->ccb_cookie = xs; 787 788 if (ISSET(xs->flags, SCSI_POLL)) { 789 nvme_poll(sc, sc->sc_q, ccb, nvme_scsi_sync_fill, xs->timeout); 790 return; 791 } 792 793 nvme_q_submit(sc, sc->sc_q, ccb, nvme_scsi_sync_fill); 794 } 795 796 void 797 nvme_scsi_sync_fill(struct nvme_softc *sc, struct nvme_ccb *ccb, void *slot) 798 { 799 struct nvme_sqe *sqe = slot; 800 struct scsi_xfer *xs = ccb->ccb_cookie; 801 struct scsi_link *link = xs->sc_link; 802 803 sqe->opcode = NVM_CMD_FLUSH; 804 htolem32(&sqe->nsid, link->target); 805 } 806 807 void 808 nvme_scsi_sync_done(struct nvme_softc *sc, struct nvme_ccb *ccb, 809 struct nvme_cqe *cqe) 810 { 811 struct scsi_xfer *xs = ccb->ccb_cookie; 812 u_int16_t flags; 813 814 flags = lemtoh16(&cqe->flags); 815 816 xs->error = (NVME_CQE_SC(flags) == NVME_CQE_SC_SUCCESS) ? 817 XS_NOERROR : XS_DRIVER_STUFFUP; 818 xs->status = SCSI_OK; 819 xs->resid = 0; 820 scsi_done(xs); 821 } 822 823 void 824 nvme_scsi_inq(struct scsi_xfer *xs) 825 { 826 struct scsi_inquiry *inq = (struct scsi_inquiry *)&xs->cmd; 827 828 if (!ISSET(inq->flags, SI_EVPD)) { 829 nvme_scsi_inquiry(xs); 830 return; 831 } 832 833 switch (inq->pagecode) { 834 default: 835 /* printf("%s: %d\n", __func__, inq->pagecode); */ 836 break; 837 } 838 839 xs->error = XS_DRIVER_STUFFUP; 840 scsi_done(xs); 841 } 842 843 void 844 nvme_scsi_inquiry(struct scsi_xfer *xs) 845 { 846 struct scsi_inquiry_data inq; 847 struct scsi_link *link = xs->sc_link; 848 struct nvme_softc *sc = link->bus->sb_adapter_softc; 849 struct nvm_identify_namespace *ns; 850 851 ns = sc->sc_namespaces[link->target].ident; 852 853 memset(&inq, 0, sizeof(inq)); 854 855 inq.device = T_DIRECT; 856 inq.version = SCSI_REV_SPC4; 857 inq.response_format = SID_SCSI2_RESPONSE; 858 inq.additional_length = SID_SCSI2_ALEN; 859 inq.flags |= SID_CmdQue; 860 memcpy(inq.vendor, "NVMe ", sizeof(inq.vendor)); 861 memcpy(inq.product, sc->sc_identify.mn, sizeof(inq.product)); 862 memcpy(inq.revision, sc->sc_identify.fr, sizeof(inq.revision)); 863 864 scsi_copy_internal_data(xs, &inq, sizeof(inq)); 865 866 xs->error = XS_NOERROR; 867 scsi_done(xs); 868 } 869 870 void 871 nvme_scsi_capacity16(struct scsi_xfer *xs) 872 { 873 struct scsi_read_cap_data_16 rcd; 874 struct scsi_link *link = xs->sc_link; 875 struct nvme_softc *sc = link->bus->sb_adapter_softc; 876 struct nvm_identify_namespace *ns; 877 struct nvm_namespace_format *f; 878 u_int64_t addr; 879 u_int16_t tpe = READ_CAP_16_TPE; 880 881 ns = sc->sc_namespaces[link->target].ident; 882 883 if (xs->cmdlen != sizeof(struct scsi_read_capacity_16)) { 884 xs->error = XS_DRIVER_STUFFUP; 885 scsi_done(xs); 886 return; 887 } 888 889 addr = nvme_scsi_size(ns) - 1; 890 f = &ns->lbaf[NVME_ID_NS_FLBAS(ns->flbas)]; 891 892 memset(&rcd, 0, sizeof(rcd)); 893 _lto8b(addr, rcd.addr); 894 _lto4b(1 << f->lbads, rcd.length); 895 _lto2b(tpe, rcd.lowest_aligned); 896 897 memcpy(xs->data, &rcd, MIN(sizeof(rcd), xs->datalen)); 898 899 xs->error = XS_NOERROR; 900 scsi_done(xs); 901 } 902 903 void 904 nvme_scsi_capacity(struct scsi_xfer *xs) 905 { 906 struct scsi_read_cap_data rcd; 907 struct scsi_link *link = xs->sc_link; 908 struct nvme_softc *sc = link->bus->sb_adapter_softc; 909 struct nvm_identify_namespace *ns; 910 struct nvm_namespace_format *f; 911 u_int64_t addr; 912 913 ns = sc->sc_namespaces[link->target].ident; 914 915 if (xs->cmdlen != sizeof(struct scsi_read_capacity)) { 916 xs->error = XS_DRIVER_STUFFUP; 917 scsi_done(xs); 918 return; 919 } 920 921 addr = nvme_scsi_size(ns) - 1; 922 if (addr > 0xffffffff) 923 addr = 0xffffffff; 924 925 f = &ns->lbaf[NVME_ID_NS_FLBAS(ns->flbas)]; 926 927 memset(&rcd, 0, sizeof(rcd)); 928 _lto4b(addr, rcd.addr); 929 _lto4b(1 << f->lbads, rcd.length); 930 931 memcpy(xs->data, &rcd, MIN(sizeof(rcd), xs->datalen)); 932 933 xs->error = XS_NOERROR; 934 scsi_done(xs); 935 } 936 937 void 938 nvme_scsi_free(struct scsi_link *link) 939 { 940 struct nvme_softc *sc = link->bus->sb_adapter_softc; 941 struct nvm_identify_namespace *identify; 942 943 identify = sc->sc_namespaces[link->target].ident; 944 sc->sc_namespaces[link->target].ident = NULL; 945 946 free(identify, M_DEVBUF, sizeof(*identify)); 947 } 948 949 uint64_t 950 nvme_scsi_size(const struct nvm_identify_namespace *ns) 951 { 952 uint64_t ncap, nsze; 953 954 ncap = lemtoh64(&ns->ncap); /* Max allowed allocation. */ 955 nsze = lemtoh64(&ns->nsze); 956 957 if ((ns->nsfeat & NVME_ID_NS_NSFEAT_THIN_PROV) && ncap < nsze) 958 return ncap; 959 else 960 return nsze; 961 } 962 963 int 964 nvme_passthrough_cmd(struct nvme_softc *sc, struct nvme_pt_cmd *pt, int dv_unit, 965 int nsid) 966 { 967 struct nvme_pt_status pt_status; 968 struct nvme_sqe sqe; 969 struct nvme_dmamem *mem = NULL; 970 struct nvme_ccb *ccb = NULL; 971 int flags; 972 int rv = 0; 973 974 ccb = nvme_ccb_get(sc); 975 if (ccb == NULL) 976 panic("nvme_passthrough_cmd: nvme_ccb_get returned NULL"); 977 978 memset(&sqe, 0, sizeof(sqe)); 979 sqe.opcode = pt->pt_opcode; 980 htolem32(&sqe.nsid, pt->pt_nsid); 981 htolem32(&sqe.cdw10, pt->pt_cdw10); 982 htolem32(&sqe.cdw11, pt->pt_cdw11); 983 htolem32(&sqe.cdw12, pt->pt_cdw12); 984 htolem32(&sqe.cdw13, pt->pt_cdw13); 985 htolem32(&sqe.cdw14, pt->pt_cdw14); 986 htolem32(&sqe.cdw15, pt->pt_cdw15); 987 988 ccb->ccb_done = nvme_empty_done; 989 ccb->ccb_cookie = &sqe; 990 991 switch (pt->pt_opcode) { 992 case NVM_ADMIN_IDENTIFY: 993 case NVM_ADMIN_GET_LOG_PG: 994 case NVM_ADMIN_SELFTEST: 995 break; 996 997 default: 998 rv = ENOTTY; 999 goto done; 1000 } 1001 1002 if (pt->pt_databuflen > 0) { 1003 mem = nvme_dmamem_alloc(sc, pt->pt_databuflen); 1004 if (mem == NULL) { 1005 rv = ENOMEM; 1006 goto done; 1007 } 1008 htolem64(&sqe.entry.prp[0], NVME_DMA_DVA(mem)); 1009 nvme_dmamem_sync(sc, mem, BUS_DMASYNC_PREREAD); 1010 } 1011 1012 flags = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_PT); 1013 1014 if (pt->pt_databuflen > 0) { 1015 nvme_dmamem_sync(sc, mem, BUS_DMASYNC_POSTREAD); 1016 if (flags == 0) 1017 rv = copyout(NVME_DMA_KVA(mem), pt->pt_databuf, 1018 pt->pt_databuflen); 1019 } 1020 1021 if (rv == 0 && pt->pt_statuslen > 0) { 1022 pt_status.ps_dv_unit = dv_unit; 1023 pt_status.ps_nsid = nsid; 1024 pt_status.ps_flags = flags; 1025 pt_status.ps_cc = nvme_read4(sc, NVME_CC); 1026 pt_status.ps_csts = nvme_read4(sc, NVME_CSTS); 1027 rv = copyout(&pt_status, pt->pt_status, pt->pt_statuslen); 1028 } 1029 1030 done: 1031 if (mem) 1032 nvme_dmamem_free(sc, mem); 1033 if (ccb) 1034 nvme_ccb_put(sc, ccb); 1035 1036 return rv; 1037 } 1038 1039 int 1040 nvme_scsi_ioctl(struct scsi_link *link, u_long cmd, caddr_t addr, int flag) 1041 { 1042 struct nvme_softc *sc = link->bus->sb_adapter_softc; 1043 struct nvme_pt_cmd *pt = (struct nvme_pt_cmd *)addr; 1044 int rv; 1045 1046 switch (cmd) { 1047 case NVME_PASSTHROUGH_CMD: 1048 break; 1049 default: 1050 return ENOTTY; 1051 } 1052 1053 if ((pt->pt_cdw10 & 0xff) == 0) 1054 pt->pt_nsid = link->target; 1055 1056 rv = nvme_passthrough_cmd(sc, pt, sc->sc_dev.dv_unit, link->target); 1057 if (rv) 1058 goto done; 1059 1060 done: 1061 return rv; 1062 } 1063 1064 uint32_t 1065 nvme_op_sq_enter(struct nvme_softc *sc, 1066 struct nvme_queue *q, struct nvme_ccb *ccb) 1067 { 1068 mtx_enter(&q->q_sq_mtx); 1069 return (nvme_op_sq_enter_locked(sc, q, ccb)); 1070 } 1071 1072 uint32_t 1073 nvme_op_sq_enter_locked(struct nvme_softc *sc, 1074 struct nvme_queue *q, struct nvme_ccb *ccb) 1075 { 1076 return (q->q_sq_tail); 1077 } 1078 1079 void 1080 nvme_op_sq_leave_locked(struct nvme_softc *sc, 1081 struct nvme_queue *q, struct nvme_ccb *ccb) 1082 { 1083 uint32_t tail; 1084 1085 tail = ++q->q_sq_tail; 1086 if (tail >= q->q_entries) 1087 tail = 0; 1088 q->q_sq_tail = tail; 1089 nvme_write4(sc, q->q_sqtdbl, tail); 1090 } 1091 1092 void 1093 nvme_op_sq_leave(struct nvme_softc *sc, 1094 struct nvme_queue *q, struct nvme_ccb *ccb) 1095 { 1096 nvme_op_sq_leave_locked(sc, q, ccb); 1097 mtx_leave(&q->q_sq_mtx); 1098 } 1099 1100 void 1101 nvme_q_submit(struct nvme_softc *sc, struct nvme_queue *q, struct nvme_ccb *ccb, 1102 void (*fill)(struct nvme_softc *, struct nvme_ccb *, void *)) 1103 { 1104 struct nvme_sqe *sqe = NVME_DMA_KVA(q->q_sq_dmamem); 1105 u_int32_t tail; 1106 1107 tail = sc->sc_ops->op_sq_enter(sc, q, ccb); 1108 1109 sqe += tail; 1110 1111 bus_dmamap_sync(sc->sc_dmat, NVME_DMA_MAP(q->q_sq_dmamem), 1112 sizeof(*sqe) * tail, sizeof(*sqe), BUS_DMASYNC_POSTWRITE); 1113 memset(sqe, 0, sizeof(*sqe)); 1114 (*fill)(sc, ccb, sqe); 1115 sqe->cid = ccb->ccb_id; 1116 bus_dmamap_sync(sc->sc_dmat, NVME_DMA_MAP(q->q_sq_dmamem), 1117 sizeof(*sqe) * tail, sizeof(*sqe), BUS_DMASYNC_PREWRITE); 1118 1119 sc->sc_ops->op_sq_leave(sc, q, ccb); 1120 } 1121 1122 struct nvme_poll_state { 1123 struct nvme_sqe s; 1124 struct nvme_cqe c; 1125 }; 1126 1127 int 1128 nvme_poll(struct nvme_softc *sc, struct nvme_queue *q, struct nvme_ccb *ccb, 1129 void (*fill)(struct nvme_softc *, struct nvme_ccb *, void *), u_int32_t ms) 1130 { 1131 struct nvme_poll_state state; 1132 void (*done)(struct nvme_softc *, struct nvme_ccb *, struct nvme_cqe *); 1133 void *cookie; 1134 int64_t us; 1135 u_int16_t flags; 1136 1137 memset(&state, 0, sizeof(state)); 1138 (*fill)(sc, ccb, &state.s); 1139 1140 done = ccb->ccb_done; 1141 cookie = ccb->ccb_cookie; 1142 1143 ccb->ccb_done = nvme_poll_done; 1144 ccb->ccb_cookie = &state; 1145 1146 nvme_q_submit(sc, q, ccb, nvme_poll_fill); 1147 for (us = ms * 1000; ms == 0 || us > 0; us -= NVME_TIMO_DELAYNS) { 1148 if (ISSET(state.c.flags, htole16(NVME_CQE_PHASE))) 1149 break; 1150 if (nvme_q_complete(sc, q) == 0) 1151 delay(NVME_TIMO_DELAYNS); 1152 nvme_barrier(sc, NVME_CSTS, 4, BUS_SPACE_BARRIER_READ); 1153 } 1154 1155 ccb->ccb_cookie = cookie; 1156 done(sc, ccb, &state.c); 1157 1158 flags = lemtoh16(&state.c.flags); 1159 1160 return (flags & ~NVME_CQE_PHASE); 1161 } 1162 1163 void 1164 nvme_poll_fill(struct nvme_softc *sc, struct nvme_ccb *ccb, void *slot) 1165 { 1166 struct nvme_sqe *sqe = slot; 1167 struct nvme_poll_state *state = ccb->ccb_cookie; 1168 1169 *sqe = state->s; 1170 } 1171 1172 void 1173 nvme_poll_done(struct nvme_softc *sc, struct nvme_ccb *ccb, 1174 struct nvme_cqe *cqe) 1175 { 1176 struct nvme_poll_state *state = ccb->ccb_cookie; 1177 1178 state->c = *cqe; 1179 SET(state->c.flags, htole16(NVME_CQE_PHASE)); 1180 } 1181 1182 void 1183 nvme_sqe_fill(struct nvme_softc *sc, struct nvme_ccb *ccb, void *slot) 1184 { 1185 struct nvme_sqe *src = ccb->ccb_cookie; 1186 struct nvme_sqe *dst = slot; 1187 1188 *dst = *src; 1189 } 1190 1191 void 1192 nvme_empty_done(struct nvme_softc *sc, struct nvme_ccb *ccb, 1193 struct nvme_cqe *cqe) 1194 { 1195 } 1196 1197 void 1198 nvme_op_cq_done(struct nvme_softc *sc, 1199 struct nvme_queue *q, struct nvme_ccb *ccb) 1200 { 1201 /* nop */ 1202 } 1203 1204 int 1205 nvme_q_complete(struct nvme_softc *sc, struct nvme_queue *q) 1206 { 1207 struct nvme_ccb *ccb; 1208 struct nvme_cqe *ring = NVME_DMA_KVA(q->q_cq_dmamem), *cqe; 1209 u_int32_t head; 1210 u_int16_t flags; 1211 int rv = 0; 1212 1213 if (!mtx_enter_try(&q->q_cq_mtx)) 1214 return (-1); 1215 1216 head = q->q_cq_head; 1217 1218 nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_POSTREAD); 1219 for (;;) { 1220 cqe = &ring[head]; 1221 flags = lemtoh16(&cqe->flags); 1222 if ((flags & NVME_CQE_PHASE) != q->q_cq_phase) 1223 break; 1224 1225 membar_consumer(); 1226 1227 ccb = &sc->sc_ccbs[cqe->cid]; 1228 sc->sc_ops->op_cq_done(sc, q, ccb); 1229 ccb->ccb_done(sc, ccb, cqe); 1230 1231 if (++head >= q->q_entries) { 1232 head = 0; 1233 q->q_cq_phase ^= NVME_CQE_PHASE; 1234 } 1235 1236 rv = 1; 1237 } 1238 nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_PREREAD); 1239 1240 if (rv) 1241 nvme_write4(sc, q->q_cqhdbl, q->q_cq_head = head); 1242 mtx_leave(&q->q_cq_mtx); 1243 1244 return (rv); 1245 } 1246 1247 int 1248 nvme_identify(struct nvme_softc *sc, u_int mpsmin) 1249 { 1250 char sn[41], mn[81], fr[17]; 1251 struct nvm_identify_controller *identify; 1252 struct nvme_dmamem *mem; 1253 struct nvme_ccb *ccb; 1254 int rv = 1; 1255 1256 ccb = nvme_ccb_get(sc); 1257 if (ccb == NULL) 1258 panic("nvme_identify: nvme_ccb_get returned NULL"); 1259 1260 mem = nvme_dmamem_alloc(sc, sizeof(*identify)); 1261 if (mem == NULL) 1262 return (1); 1263 1264 ccb->ccb_done = nvme_empty_done; 1265 ccb->ccb_cookie = mem; 1266 1267 nvme_dmamem_sync(sc, mem, BUS_DMASYNC_PREREAD); 1268 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_fill_identify, 1269 NVME_TIMO_IDENT); 1270 nvme_dmamem_sync(sc, mem, BUS_DMASYNC_POSTREAD); 1271 1272 nvme_ccb_put(sc, ccb); 1273 1274 if (rv != 0) 1275 goto done; 1276 1277 identify = NVME_DMA_KVA(mem); 1278 1279 scsi_strvis(sn, identify->sn, sizeof(identify->sn)); 1280 scsi_strvis(mn, identify->mn, sizeof(identify->mn)); 1281 scsi_strvis(fr, identify->fr, sizeof(identify->fr)); 1282 1283 printf("%s: %s, firmware %s, serial %s\n", DEVNAME(sc), mn, fr, sn); 1284 1285 if (identify->mdts > 0) { 1286 sc->sc_mdts = (1 << identify->mdts) * (1 << mpsmin); 1287 if (sc->sc_mdts > NVME_MAXPHYS) 1288 sc->sc_mdts = NVME_MAXPHYS; 1289 sc->sc_max_prpl = sc->sc_mdts / sc->sc_mps; 1290 } 1291 1292 sc->sc_nn = lemtoh32(&identify->nn); 1293 1294 /* 1295 * At least one Apple NVMe device presents a second, bogus disk that is 1296 * inaccessible, so cap targets at 1. 1297 * 1298 * sd1 at scsibus1 targ 2 lun 0: <NVMe, APPLE SSD AP0512, 16.1> [..] 1299 * sd1: 0MB, 4096 bytes/sector, 2 sectors 1300 */ 1301 if (sc->sc_nn > 1 && 1302 mn[0] == 'A' && mn[1] == 'P' && mn[2] == 'P' && mn[3] == 'L' && 1303 mn[4] == 'E') 1304 sc->sc_nn = 1; 1305 1306 memcpy(&sc->sc_identify, identify, sizeof(sc->sc_identify)); 1307 1308 done: 1309 nvme_dmamem_free(sc, mem); 1310 1311 return (rv); 1312 } 1313 1314 int 1315 nvme_q_create(struct nvme_softc *sc, struct nvme_queue *q) 1316 { 1317 struct nvme_sqe_q sqe; 1318 struct nvme_ccb *ccb; 1319 int rv; 1320 1321 ccb = scsi_io_get(&sc->sc_iopool, 0); 1322 KASSERT(ccb != NULL); 1323 1324 ccb->ccb_done = nvme_empty_done; 1325 ccb->ccb_cookie = &sqe; 1326 1327 memset(&sqe, 0, sizeof(sqe)); 1328 sqe.opcode = NVM_ADMIN_ADD_IOCQ; 1329 htolem64(&sqe.prp1, NVME_DMA_DVA(q->q_cq_dmamem)); 1330 htolem16(&sqe.qsize, q->q_entries - 1); 1331 htolem16(&sqe.qid, q->q_id); 1332 sqe.qflags = NVM_SQE_CQ_IEN | NVM_SQE_Q_PC; 1333 1334 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_QOP); 1335 if (rv != 0) 1336 goto fail; 1337 1338 ccb->ccb_done = nvme_empty_done; 1339 ccb->ccb_cookie = &sqe; 1340 1341 memset(&sqe, 0, sizeof(sqe)); 1342 sqe.opcode = NVM_ADMIN_ADD_IOSQ; 1343 htolem64(&sqe.prp1, NVME_DMA_DVA(q->q_sq_dmamem)); 1344 htolem16(&sqe.qsize, q->q_entries - 1); 1345 htolem16(&sqe.qid, q->q_id); 1346 htolem16(&sqe.cqid, q->q_id); 1347 sqe.qflags = NVM_SQE_Q_PC; 1348 1349 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_QOP); 1350 if (rv != 0) 1351 goto fail; 1352 1353 fail: 1354 scsi_io_put(&sc->sc_iopool, ccb); 1355 return (rv); 1356 } 1357 1358 int 1359 nvme_q_delete(struct nvme_softc *sc, struct nvme_queue *q) 1360 { 1361 struct nvme_sqe_q sqe; 1362 struct nvme_ccb *ccb; 1363 int rv; 1364 1365 ccb = scsi_io_get(&sc->sc_iopool, 0); 1366 KASSERT(ccb != NULL); 1367 1368 ccb->ccb_done = nvme_empty_done; 1369 ccb->ccb_cookie = &sqe; 1370 1371 memset(&sqe, 0, sizeof(sqe)); 1372 sqe.opcode = NVM_ADMIN_DEL_IOSQ; 1373 htolem16(&sqe.qid, q->q_id); 1374 1375 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_QOP); 1376 if (rv != 0) 1377 goto fail; 1378 1379 ccb->ccb_done = nvme_empty_done; 1380 ccb->ccb_cookie = &sqe; 1381 1382 memset(&sqe, 0, sizeof(sqe)); 1383 sqe.opcode = NVM_ADMIN_DEL_IOCQ; 1384 htolem16(&sqe.qid, q->q_id); 1385 1386 rv = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_QOP); 1387 if (rv != 0) 1388 goto fail; 1389 1390 nvme_q_free(sc, q); 1391 1392 fail: 1393 scsi_io_put(&sc->sc_iopool, ccb); 1394 return (rv); 1395 1396 } 1397 1398 void 1399 nvme_fill_identify(struct nvme_softc *sc, struct nvme_ccb *ccb, void *slot) 1400 { 1401 struct nvme_sqe *sqe = slot; 1402 struct nvme_dmamem *mem = ccb->ccb_cookie; 1403 1404 sqe->opcode = NVM_ADMIN_IDENTIFY; 1405 htolem64(&sqe->entry.prp[0], NVME_DMA_DVA(mem)); 1406 htolem32(&sqe->cdw10, 1); 1407 } 1408 1409 int 1410 nvme_ccbs_alloc(struct nvme_softc *sc, u_int nccbs) 1411 { 1412 struct nvme_ccb *ccb; 1413 bus_addr_t off; 1414 u_int64_t *prpl; 1415 u_int i; 1416 1417 sc->sc_ccbs = mallocarray(nccbs, sizeof(*ccb), M_DEVBUF, 1418 M_WAITOK | M_CANFAIL); 1419 if (sc->sc_ccbs == NULL) 1420 return (1); 1421 1422 sc->sc_ccb_prpls = nvme_dmamem_alloc(sc, 1423 sizeof(*prpl) * sc->sc_max_prpl * nccbs); 1424 1425 prpl = NVME_DMA_KVA(sc->sc_ccb_prpls); 1426 off = 0; 1427 1428 for (i = 0; i < nccbs; i++) { 1429 ccb = &sc->sc_ccbs[i]; 1430 1431 if (bus_dmamap_create(sc->sc_dmat, sc->sc_mdts, 1432 sc->sc_max_prpl + 1, /* we get a free prp in the sqe */ 1433 sc->sc_mps, sc->sc_mps, 1434 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW | BUS_DMA_64BIT, 1435 &ccb->ccb_dmamap) != 0) 1436 goto free_maps; 1437 1438 ccb->ccb_id = i; 1439 ccb->ccb_prpl = prpl; 1440 ccb->ccb_prpl_off = off; 1441 ccb->ccb_prpl_dva = NVME_DMA_DVA(sc->sc_ccb_prpls) + off; 1442 1443 SIMPLEQ_INSERT_TAIL(&sc->sc_ccb_list, ccb, ccb_entry); 1444 1445 prpl += sc->sc_max_prpl; 1446 off += sizeof(*prpl) * sc->sc_max_prpl; 1447 } 1448 1449 return (0); 1450 1451 free_maps: 1452 nvme_ccbs_free(sc, nccbs); 1453 return (1); 1454 } 1455 1456 void * 1457 nvme_ccb_get(void *cookie) 1458 { 1459 struct nvme_softc *sc = cookie; 1460 struct nvme_ccb *ccb; 1461 1462 mtx_enter(&sc->sc_ccb_mtx); 1463 ccb = SIMPLEQ_FIRST(&sc->sc_ccb_list); 1464 if (ccb != NULL) 1465 SIMPLEQ_REMOVE_HEAD(&sc->sc_ccb_list, ccb_entry); 1466 mtx_leave(&sc->sc_ccb_mtx); 1467 1468 return (ccb); 1469 } 1470 1471 void 1472 nvme_ccb_put(void *cookie, void *io) 1473 { 1474 struct nvme_softc *sc = cookie; 1475 struct nvme_ccb *ccb = io; 1476 1477 mtx_enter(&sc->sc_ccb_mtx); 1478 SIMPLEQ_INSERT_HEAD(&sc->sc_ccb_list, ccb, ccb_entry); 1479 mtx_leave(&sc->sc_ccb_mtx); 1480 } 1481 1482 void 1483 nvme_ccbs_free(struct nvme_softc *sc, unsigned int nccbs) 1484 { 1485 struct nvme_ccb *ccb; 1486 1487 while ((ccb = SIMPLEQ_FIRST(&sc->sc_ccb_list)) != NULL) { 1488 SIMPLEQ_REMOVE_HEAD(&sc->sc_ccb_list, ccb_entry); 1489 bus_dmamap_destroy(sc->sc_dmat, ccb->ccb_dmamap); 1490 } 1491 1492 nvme_dmamem_free(sc, sc->sc_ccb_prpls); 1493 free(sc->sc_ccbs, M_DEVBUF, nccbs * sizeof(*ccb)); 1494 } 1495 1496 struct nvme_queue * 1497 nvme_q_alloc(struct nvme_softc *sc, u_int16_t id, u_int entries, u_int dstrd) 1498 { 1499 struct nvme_queue *q; 1500 1501 q = malloc(sizeof(*q), M_DEVBUF, M_WAITOK | M_CANFAIL); 1502 if (q == NULL) 1503 return (NULL); 1504 1505 q->q_sq_dmamem = nvme_dmamem_alloc(sc, 1506 sizeof(struct nvme_sqe) * entries); 1507 if (q->q_sq_dmamem == NULL) 1508 goto free; 1509 1510 q->q_cq_dmamem = nvme_dmamem_alloc(sc, 1511 sizeof(struct nvme_cqe) * entries); 1512 if (q->q_cq_dmamem == NULL) 1513 goto free_sq; 1514 1515 memset(NVME_DMA_KVA(q->q_sq_dmamem), 0, NVME_DMA_LEN(q->q_sq_dmamem)); 1516 memset(NVME_DMA_KVA(q->q_cq_dmamem), 0, NVME_DMA_LEN(q->q_cq_dmamem)); 1517 1518 mtx_init(&q->q_sq_mtx, IPL_BIO); 1519 mtx_init(&q->q_cq_mtx, IPL_BIO); 1520 q->q_sqtdbl = NVME_SQTDBL(id, dstrd); 1521 q->q_cqhdbl = NVME_CQHDBL(id, dstrd); 1522 1523 q->q_id = id; 1524 q->q_entries = entries; 1525 q->q_sq_tail = 0; 1526 q->q_cq_head = 0; 1527 q->q_cq_phase = NVME_CQE_PHASE; 1528 1529 if (sc->sc_ops->op_q_alloc != NULL) { 1530 if (sc->sc_ops->op_q_alloc(sc, q) != 0) 1531 goto free_cq; 1532 } 1533 1534 nvme_dmamem_sync(sc, q->q_sq_dmamem, BUS_DMASYNC_PREWRITE); 1535 nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_PREREAD); 1536 1537 return (q); 1538 1539 free_cq: 1540 nvme_dmamem_free(sc, q->q_cq_dmamem); 1541 free_sq: 1542 nvme_dmamem_free(sc, q->q_sq_dmamem); 1543 free: 1544 free(q, M_DEVBUF, sizeof *q); 1545 1546 return (NULL); 1547 } 1548 1549 int 1550 nvme_q_reset(struct nvme_softc *sc, struct nvme_queue *q) 1551 { 1552 memset(NVME_DMA_KVA(q->q_sq_dmamem), 0, NVME_DMA_LEN(q->q_sq_dmamem)); 1553 memset(NVME_DMA_KVA(q->q_cq_dmamem), 0, NVME_DMA_LEN(q->q_cq_dmamem)); 1554 1555 q->q_sq_tail = 0; 1556 q->q_cq_head = 0; 1557 q->q_cq_phase = NVME_CQE_PHASE; 1558 1559 nvme_dmamem_sync(sc, q->q_sq_dmamem, BUS_DMASYNC_PREWRITE); 1560 nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_PREREAD); 1561 1562 return (0); 1563 } 1564 1565 void 1566 nvme_q_free(struct nvme_softc *sc, struct nvme_queue *q) 1567 { 1568 nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_POSTREAD); 1569 nvme_dmamem_sync(sc, q->q_sq_dmamem, BUS_DMASYNC_POSTWRITE); 1570 1571 if (sc->sc_ops->op_q_free != NULL) 1572 sc->sc_ops->op_q_free(sc, q); 1573 1574 nvme_dmamem_free(sc, q->q_cq_dmamem); 1575 nvme_dmamem_free(sc, q->q_sq_dmamem); 1576 free(q, M_DEVBUF, sizeof *q); 1577 } 1578 1579 int 1580 nvme_intr(void *xsc) 1581 { 1582 struct nvme_softc *sc = xsc; 1583 int rv = 0; 1584 1585 if (nvme_q_complete(sc, sc->sc_q)) 1586 rv = 1; 1587 if (nvme_q_complete(sc, sc->sc_admin_q)) 1588 rv = 1; 1589 1590 return (rv); 1591 } 1592 1593 int 1594 nvme_intr_intx(void *xsc) 1595 { 1596 struct nvme_softc *sc = xsc; 1597 int rv; 1598 1599 nvme_write4(sc, NVME_INTMS, 1); 1600 rv = nvme_intr(sc); 1601 nvme_write4(sc, NVME_INTMC, 1); 1602 1603 return (rv); 1604 } 1605 1606 struct nvme_dmamem * 1607 nvme_dmamem_alloc(struct nvme_softc *sc, size_t size) 1608 { 1609 struct nvme_dmamem *ndm; 1610 int nsegs; 1611 1612 ndm = malloc(sizeof(*ndm), M_DEVBUF, M_WAITOK | M_ZERO); 1613 if (ndm == NULL) 1614 return (NULL); 1615 1616 ndm->ndm_size = size; 1617 1618 if (bus_dmamap_create(sc->sc_dmat, size, 1, size, 0, 1619 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW | BUS_DMA_64BIT, 1620 &ndm->ndm_map) != 0) 1621 goto ndmfree; 1622 1623 if (bus_dmamem_alloc(sc->sc_dmat, size, sc->sc_mps, 0, &ndm->ndm_seg, 1624 1, &nsegs, BUS_DMA_WAITOK | BUS_DMA_ZERO | BUS_DMA_64BIT) != 0) 1625 goto destroy; 1626 1627 if (bus_dmamem_map(sc->sc_dmat, &ndm->ndm_seg, nsegs, size, 1628 &ndm->ndm_kva, BUS_DMA_WAITOK) != 0) 1629 goto free; 1630 1631 if (bus_dmamap_load(sc->sc_dmat, ndm->ndm_map, ndm->ndm_kva, size, 1632 NULL, BUS_DMA_WAITOK) != 0) 1633 goto unmap; 1634 1635 return (ndm); 1636 1637 unmap: 1638 bus_dmamem_unmap(sc->sc_dmat, ndm->ndm_kva, size); 1639 free: 1640 bus_dmamem_free(sc->sc_dmat, &ndm->ndm_seg, 1); 1641 destroy: 1642 bus_dmamap_destroy(sc->sc_dmat, ndm->ndm_map); 1643 ndmfree: 1644 free(ndm, M_DEVBUF, sizeof *ndm); 1645 1646 return (NULL); 1647 } 1648 1649 void 1650 nvme_dmamem_sync(struct nvme_softc *sc, struct nvme_dmamem *mem, int ops) 1651 { 1652 bus_dmamap_sync(sc->sc_dmat, NVME_DMA_MAP(mem), 1653 0, NVME_DMA_LEN(mem), ops); 1654 } 1655 1656 void 1657 nvme_dmamem_free(struct nvme_softc *sc, struct nvme_dmamem *ndm) 1658 { 1659 bus_dmamap_unload(sc->sc_dmat, ndm->ndm_map); 1660 bus_dmamem_unmap(sc->sc_dmat, ndm->ndm_kva, ndm->ndm_size); 1661 bus_dmamem_free(sc->sc_dmat, &ndm->ndm_seg, 1); 1662 bus_dmamap_destroy(sc->sc_dmat, ndm->ndm_map); 1663 free(ndm, M_DEVBUF, sizeof *ndm); 1664 } 1665 1666 #ifdef HIBERNATE 1667 1668 int 1669 nvme_hibernate_admin_cmd(struct nvme_softc *sc, struct nvme_sqe *sqe, 1670 struct nvme_cqe *cqe, int cid) 1671 { 1672 struct nvme_sqe *asqe = NVME_DMA_KVA(sc->sc_admin_q->q_sq_dmamem); 1673 struct nvme_cqe *acqe = NVME_DMA_KVA(sc->sc_admin_q->q_cq_dmamem); 1674 struct nvme_queue *q = sc->sc_admin_q; 1675 int tail; 1676 u_int16_t flags; 1677 1678 /* submit command */ 1679 tail = sc->sc_ops->op_sq_enter_locked(sc, q, /* XXX ccb */ NULL); 1680 1681 asqe += tail; 1682 bus_dmamap_sync(sc->sc_dmat, NVME_DMA_MAP(q->q_sq_dmamem), 1683 sizeof(*sqe) * tail, sizeof(*sqe), BUS_DMASYNC_POSTWRITE); 1684 *asqe = *sqe; 1685 asqe->cid = cid; 1686 bus_dmamap_sync(sc->sc_dmat, NVME_DMA_MAP(q->q_sq_dmamem), 1687 sizeof(*sqe) * tail, sizeof(*sqe), BUS_DMASYNC_PREWRITE); 1688 1689 sc->sc_ops->op_sq_leave_locked(sc, q, /* XXX ccb */ NULL); 1690 1691 /* wait for completion */ 1692 acqe += q->q_cq_head; 1693 for (;;) { 1694 nvme_dmamem_sync(sc, q->q_cq_dmamem, BUS_DMASYNC_POSTREAD); 1695 flags = lemtoh16(&acqe->flags); 1696 if ((flags & NVME_CQE_PHASE) == q->q_cq_phase) 1697 break; 1698 1699 delay(10); 1700 } 1701 1702 if (++q->q_cq_head >= q->q_entries) { 1703 q->q_cq_head = 0; 1704 q->q_cq_phase ^= NVME_CQE_PHASE; 1705 } 1706 nvme_write4(sc, q->q_cqhdbl, q->q_cq_head); 1707 if ((NVME_CQE_SC(flags) != NVME_CQE_SC_SUCCESS) || (acqe->cid != cid)) 1708 return (EIO); 1709 1710 return (0); 1711 } 1712 1713 int 1714 nvme_hibernate_io(dev_t dev, daddr_t blkno, vaddr_t addr, size_t size, 1715 int op, void *page) 1716 { 1717 struct nvme_hibernate_page { 1718 u_int64_t prpl[MAXPHYS / PAGE_SIZE]; 1719 1720 struct nvme_softc *sc; 1721 int nsid; 1722 int sq_tail; 1723 int cq_head; 1724 int cqe_phase; 1725 1726 daddr_t poffset; 1727 size_t psize; 1728 u_int32_t secsize; 1729 } *my = page; 1730 struct nvme_sqe_io *isqe; 1731 struct nvme_cqe *icqe; 1732 paddr_t data_phys, page_phys; 1733 u_int64_t data_bus_phys, page_bus_phys; 1734 u_int16_t flags; 1735 int i; 1736 int error; 1737 1738 if (op == HIB_INIT) { 1739 struct device *disk; 1740 struct device *scsibus; 1741 struct nvm_identify_namespace *ns; 1742 struct nvm_namespace_format *f; 1743 extern struct cfdriver sd_cd; 1744 struct scsi_link *link; 1745 struct scsibus_softc *bus_sc; 1746 struct nvme_sqe_q qsqe; 1747 struct nvme_cqe qcqe; 1748 1749 /* find nvme softc */ 1750 disk = disk_lookup(&sd_cd, DISKUNIT(dev)); 1751 scsibus = disk->dv_parent; 1752 my->sc = (struct nvme_softc *)disk->dv_parent->dv_parent; 1753 1754 /* find scsi_link, which tells us the target */ 1755 my->nsid = 0; 1756 bus_sc = (struct scsibus_softc *)scsibus; 1757 SLIST_FOREACH(link, &bus_sc->sc_link_list, bus_list) { 1758 if (link->device_softc == disk) { 1759 my->nsid = link->target; 1760 break; 1761 } 1762 } 1763 if (my->nsid == 0) 1764 return (EIO); 1765 ns = my->sc->sc_namespaces[my->nsid].ident; 1766 f = &ns->lbaf[NVME_ID_NS_FLBAS(ns->flbas)]; 1767 1768 my->poffset = blkno; 1769 my->psize = size; 1770 my->secsize = 1 << f->lbads; 1771 1772 memset(NVME_DMA_KVA(my->sc->sc_hib_q->q_cq_dmamem), 0, 1773 my->sc->sc_hib_q->q_entries * sizeof(struct nvme_cqe)); 1774 memset(NVME_DMA_KVA(my->sc->sc_hib_q->q_sq_dmamem), 0, 1775 my->sc->sc_hib_q->q_entries * sizeof(struct nvme_sqe)); 1776 1777 my->sq_tail = 0; 1778 my->cq_head = 0; 1779 my->cqe_phase = NVME_CQE_PHASE; 1780 1781 memset(&qsqe, 0, sizeof(qsqe)); 1782 qsqe.opcode = NVM_ADMIN_ADD_IOCQ; 1783 htolem64(&qsqe.prp1, 1784 NVME_DMA_DVA(my->sc->sc_hib_q->q_cq_dmamem)); 1785 htolem16(&qsqe.qsize, my->sc->sc_hib_q->q_entries - 1); 1786 htolem16(&qsqe.qid, my->sc->sc_hib_q->q_id); 1787 qsqe.qflags = NVM_SQE_CQ_IEN | NVM_SQE_Q_PC; 1788 if (nvme_hibernate_admin_cmd(my->sc, (struct nvme_sqe *)&qsqe, 1789 &qcqe, 1) != 0) 1790 return (EIO); 1791 1792 memset(&qsqe, 0, sizeof(qsqe)); 1793 qsqe.opcode = NVM_ADMIN_ADD_IOSQ; 1794 htolem64(&qsqe.prp1, 1795 NVME_DMA_DVA(my->sc->sc_hib_q->q_sq_dmamem)); 1796 htolem16(&qsqe.qsize, my->sc->sc_hib_q->q_entries - 1); 1797 htolem16(&qsqe.qid, my->sc->sc_hib_q->q_id); 1798 htolem16(&qsqe.cqid, my->sc->sc_hib_q->q_id); 1799 qsqe.qflags = NVM_SQE_Q_PC; 1800 if (nvme_hibernate_admin_cmd(my->sc, (struct nvme_sqe *)&qsqe, 1801 &qcqe, 2) != 0) 1802 return (EIO); 1803 1804 return (0); 1805 } 1806 1807 if (op != HIB_W) 1808 return (0); 1809 1810 if (blkno + (size / DEV_BSIZE) > my->psize) 1811 return E2BIG; 1812 1813 isqe = NVME_DMA_KVA(my->sc->sc_hib_q->q_sq_dmamem); 1814 isqe += my->sq_tail; 1815 if (++my->sq_tail == my->sc->sc_hib_q->q_entries) 1816 my->sq_tail = 0; 1817 1818 memset(isqe, 0, sizeof(*isqe)); 1819 isqe->opcode = NVM_CMD_WRITE; 1820 htolem32(&isqe->nsid, my->nsid); 1821 1822 pmap_extract(pmap_kernel(), addr, &data_phys); 1823 data_bus_phys = data_phys; 1824 htolem64(&isqe->entry.prp[0], data_bus_phys); 1825 if ((size > my->sc->sc_mps) && (size <= my->sc->sc_mps * 2)) { 1826 htolem64(&isqe->entry.prp[1], data_bus_phys + my->sc->sc_mps); 1827 } else if (size > my->sc->sc_mps * 2) { 1828 pmap_extract(pmap_kernel(), (vaddr_t)page, &page_phys); 1829 page_bus_phys = page_phys; 1830 htolem64(&isqe->entry.prp[1], page_bus_phys + 1831 offsetof(struct nvme_hibernate_page, prpl)); 1832 for (i = 1; i < howmany(size, my->sc->sc_mps); i++) { 1833 htolem64(&my->prpl[i - 1], data_bus_phys + 1834 (i * my->sc->sc_mps)); 1835 } 1836 } 1837 1838 isqe->slba = (blkno + my->poffset) / (my->secsize / DEV_BSIZE); 1839 isqe->nlb = (size / my->secsize) - 1; 1840 isqe->cid = blkno % 0xffff; 1841 1842 nvme_write4(my->sc, NVME_SQTDBL(NVME_HIB_Q, my->sc->sc_dstrd), 1843 my->sq_tail); 1844 nvme_barrier(my->sc, NVME_SQTDBL(NVME_HIB_Q, my->sc->sc_dstrd), 4, 1845 BUS_SPACE_BARRIER_WRITE); 1846 1847 error = 0; 1848 1849 icqe = NVME_DMA_KVA(my->sc->sc_hib_q->q_cq_dmamem); 1850 icqe += my->cq_head; 1851 1852 nvme_dmamem_sync(my->sc, my->sc->sc_hib_q->q_cq_dmamem, 1853 BUS_DMASYNC_POSTREAD); 1854 for (;;) { 1855 flags = lemtoh16(&icqe->flags); 1856 if ((flags & NVME_CQE_PHASE) == my->cqe_phase) { 1857 if ((NVME_CQE_SC(flags) != NVME_CQE_SC_SUCCESS) || 1858 (icqe->cid != blkno % 0xffff)) 1859 error = EIO; 1860 1861 break; 1862 } 1863 1864 delay(1); 1865 nvme_dmamem_sync(my->sc, my->sc->sc_hib_q->q_cq_dmamem, 1866 BUS_DMASYNC_PREREAD|BUS_DMASYNC_POSTREAD); 1867 } 1868 nvme_dmamem_sync(my->sc, my->sc->sc_hib_q->q_cq_dmamem, 1869 BUS_DMASYNC_PREREAD); 1870 1871 if (++my->cq_head == my->sc->sc_hib_q->q_entries) { 1872 my->cq_head = 0; 1873 my->cqe_phase ^= NVME_CQE_PHASE; 1874 } 1875 1876 nvme_write4(my->sc, NVME_CQHDBL(NVME_HIB_Q, my->sc->sc_dstrd), 1877 my->cq_head); 1878 nvme_barrier(my->sc, NVME_CQHDBL(NVME_HIB_Q, my->sc->sc_dstrd), 4, 1879 BUS_SPACE_BARRIER_WRITE); 1880 1881 return (error); 1882 } 1883 1884 #endif 1885 1886 #if NBIO > 0 1887 int 1888 nvme_bioctl(struct device *self, u_long cmd, caddr_t data) 1889 { 1890 struct nvme_softc *sc = (struct nvme_softc *)self; 1891 struct nvme_pt_cmd *pt; 1892 int error = 0; 1893 1894 rw_enter_write(&sc->sc_lock); 1895 1896 switch (cmd) { 1897 case BIOCINQ: 1898 error = nvme_bioctl_inq(sc, (struct bioc_inq *)data); 1899 break; 1900 case BIOCVOL: 1901 error = nvme_bioctl_vol(sc, (struct bioc_vol *)data); 1902 break; 1903 case BIOCDISK: 1904 error = nvme_bioctl_disk(sc, (struct bioc_disk *)data); 1905 break; 1906 case NVME_PASSTHROUGH_CMD: 1907 pt = (struct nvme_pt_cmd *)data; 1908 error = nvme_passthrough_cmd(sc, pt, sc->sc_dev.dv_unit, -1); 1909 break; 1910 default: 1911 printf("nvme_bioctl() Unknown command (%lu)\n", cmd); 1912 error = ENOTTY; 1913 } 1914 1915 rw_exit_write(&sc->sc_lock); 1916 1917 return error; 1918 } 1919 1920 void 1921 nvme_bio_status(struct bio_status *bs, const char *fmt, ...) 1922 { 1923 va_list ap; 1924 1925 va_start(ap, fmt); 1926 bio_status(bs, 0, BIO_MSG_INFO, fmt, &ap); 1927 va_end(ap); 1928 } 1929 1930 const char * 1931 nvme_bioctl_sdname(const struct nvme_softc *sc, int target) 1932 { 1933 const struct scsi_link *link; 1934 const struct sd_softc *sd; 1935 1936 link = scsi_get_link(sc->sc_scsibus, target, 0); 1937 if (link == NULL) 1938 return NULL; 1939 sd = (struct sd_softc *)(link->device_softc); 1940 if (ISSET(link->state, SDEV_S_DYING) || sd == NULL || 1941 ISSET(sd->flags, SDF_DYING)) 1942 return NULL; 1943 1944 if (nvme_read4(sc, NVME_VS) == 0xffffffff) 1945 return NULL; 1946 1947 return DEVNAME(sd); 1948 } 1949 1950 int 1951 nvme_bioctl_inq(struct nvme_softc *sc, struct bioc_inq *bi) 1952 { 1953 char sn[41], mn[81], fr[17]; 1954 struct nvm_identify_controller *idctrl = &sc->sc_identify; 1955 struct bio_status *bs; 1956 unsigned int nn; 1957 uint32_t cc, csts, vs; 1958 1959 /* Don't tell bioctl about namespaces > last configured namespace. */ 1960 for (nn = sc->sc_nn; nn > 0; nn--) { 1961 if (sc->sc_namespaces[nn].ident) 1962 break; 1963 } 1964 bi->bi_novol = bi->bi_nodisk = nn; 1965 strlcpy(bi->bi_dev, DEVNAME(sc), sizeof(bi->bi_dev)); 1966 1967 bs = &bi->bi_bio.bio_status; 1968 bio_status_init(bs, &sc->sc_dev); 1969 bs->bs_status = BIO_STATUS_SUCCESS; 1970 1971 scsi_strvis(sn, idctrl->sn, sizeof(idctrl->sn)); 1972 scsi_strvis(mn, idctrl->mn, sizeof(idctrl->mn)); 1973 scsi_strvis(fr, idctrl->fr, sizeof(idctrl->fr)); 1974 1975 nvme_bio_status(bs, "%s, %s, %s", mn, fr, sn); 1976 nvme_bio_status(bs, "Max i/o %zu bytes%s%s%s, Sanitize 0x%b", 1977 sc->sc_mdts, 1978 ISSET(idctrl->lpa, NVM_ID_CTRL_LPA_PE) ? 1979 ", Persistent Event Log" : "", 1980 ISSET(idctrl->fna, NVM_ID_CTRL_FNA_CRYPTOFORMAT) ? 1981 ", CryptoFormat" : "", 1982 ISSET(idctrl->vwc, NVM_ID_CTRL_VWC_PRESENT) ? 1983 ", Volatile Write Cache" : "", 1984 lemtoh32(&idctrl->sanicap), NVM_ID_CTRL_SANICAP_FMT 1985 ); 1986 1987 if (idctrl->ctratt != 0) 1988 nvme_bio_status(bs, "Features 0x%b", lemtoh32(&idctrl->ctratt), 1989 NVM_ID_CTRL_CTRATT_FMT); 1990 1991 if (idctrl->oacs || idctrl->oncs) { 1992 nvme_bio_status(bs, "Admin commands 0x%b, NVM commands 0x%b", 1993 lemtoh16(&idctrl->oacs), NVM_ID_CTRL_OACS_FMT, 1994 lemtoh16(&idctrl->oncs), NVM_ID_CTRL_ONCS_FMT); 1995 } 1996 1997 cc = nvme_read4(sc, NVME_CC); 1998 csts = nvme_read4(sc, NVME_CSTS); 1999 vs = nvme_read4(sc, NVME_VS); 2000 2001 if (vs == 0xffffffff) { 2002 nvme_bio_status(bs, "Invalid PCIe register mapping"); 2003 return 0; 2004 } 2005 2006 nvme_bio_status(bs, "NVMe %u.%u%s%s%sabled, %sReady%s%s%s%s", 2007 NVME_VS_MJR(vs), NVME_VS_MNR(vs), 2008 (NVME_CC_CSS_R(cc) == NVME_CC_CSS_NVM) ? ", NVM I/O command set" : "", 2009 (NVME_CC_CSS_R(cc) == 0x7) ? ", Admin command set only" : "", 2010 ISSET(cc, NVME_CC_EN) ? ", En" : "Dis", 2011 ISSET(csts, NVME_CSTS_RDY) ? "" : "Not ", 2012 ISSET(csts, NVME_CSTS_CFS) ? ", Fatal Error, " : "", 2013 (NVME_CC_SHN_R(cc) == NVME_CC_SHN_NORMAL) ? ", Normal shutdown" : "", 2014 (NVME_CC_SHN_R(cc) == NVME_CC_SHN_ABRUPT) ? ", Abrupt shutdown" : "", 2015 ISSET(csts, NVME_CSTS_SHST_DONE) ? " complete" : ""); 2016 2017 return 0; 2018 } 2019 2020 int 2021 nvme_bioctl_vol(struct nvme_softc *sc, struct bioc_vol *bv) 2022 { 2023 const struct nvm_identify_namespace *idns; 2024 const char *sd; 2025 int target; 2026 unsigned int lbaf; 2027 2028 target = bv->bv_volid + 1; 2029 if (target > sc->sc_nn) { 2030 bv->bv_status = BIOC_SVINVALID; 2031 return 0; 2032 } 2033 2034 bv->bv_level = 'c'; 2035 bv->bv_nodisk = 1; 2036 2037 idns = sc->sc_namespaces[target].ident; 2038 if (idns == NULL) { 2039 bv->bv_status = BIOC_SVINVALID; 2040 return 0; 2041 } 2042 2043 lbaf = NVME_ID_NS_FLBAS(idns->flbas); 2044 if (idns->nlbaf > 16) 2045 lbaf |= (idns->flbas >> 1) & 0x3f; 2046 bv->bv_size = nvme_scsi_size(idns) << idns->lbaf[lbaf].lbads; 2047 2048 sd = nvme_bioctl_sdname(sc, target); 2049 if (sd) { 2050 strlcpy(bv->bv_dev, sd, sizeof(bv->bv_dev)); 2051 bv->bv_status = BIOC_SVONLINE; 2052 } else 2053 bv->bv_status = BIOC_SVOFFLINE; 2054 2055 return 0; 2056 } 2057 2058 int 2059 nvme_bioctl_disk(struct nvme_softc *sc, struct bioc_disk *bd) 2060 { 2061 const char *rpdesc[4] = { 2062 " (Best)", 2063 " (Better)", 2064 " (Good)", 2065 " (Degraded)" 2066 }; 2067 const char *protection[4] = { 2068 "not enabled", 2069 "Type 1", 2070 "Type 2", 2071 "Type 3", 2072 }; 2073 char buf[32], msg[BIO_MSG_LEN]; 2074 struct nvm_identify_namespace *idns; 2075 struct bio_status *bs; 2076 uint64_t id1, id2; 2077 unsigned int i, lbaf, target; 2078 uint16_t ms; 2079 uint8_t dps; 2080 2081 target = bd->bd_volid + 1; 2082 if (target > sc->sc_nn) 2083 return EINVAL; 2084 bd->bd_channel = sc->sc_scsibus->sc_dev.dv_unit; 2085 bd->bd_target = target; 2086 bd->bd_lun = 0; 2087 snprintf(bd->bd_procdev, sizeof(bd->bd_procdev), "Namespace %u", target); 2088 2089 bs = &bd->bd_bio.bio_status; 2090 bs->bs_status = BIO_STATUS_SUCCESS; 2091 snprintf(bs->bs_controller, sizeof(bs->bs_controller), "%11u", 2092 bd->bd_diskid); 2093 2094 idns = sc->sc_namespaces[target].ident; 2095 if (idns == NULL) { 2096 bd->bd_status = BIOC_SDUNUSED; 2097 return 0; 2098 } 2099 2100 lbaf = NVME_ID_NS_FLBAS(idns->flbas); 2101 if (idns->nlbaf > nitems(idns->lbaf)) 2102 lbaf |= (idns->flbas >> 1) & 0x3f; 2103 bd->bd_size = lemtoh64(&idns->nsze) << idns->lbaf[lbaf].lbads; 2104 2105 if (memcmp(idns->nguid, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", 16)) { 2106 memcpy(&id1, idns->nguid, sizeof(uint64_t)); 2107 memcpy(&id2, idns->nguid + sizeof(uint64_t), sizeof(uint64_t)); 2108 snprintf(bd->bd_serial, sizeof(bd->bd_serial), "%08llx%08llx", 2109 id1, id2); 2110 } else if (memcmp(idns->eui64, "\0\0\0\0\0\0\0\0", 8)) { 2111 memcpy(&id1, idns->eui64, sizeof(uint64_t)); 2112 snprintf(bd->bd_serial, sizeof(bd->bd_serial), "%08llx", id1); 2113 } 2114 2115 msg[0] = '\0'; 2116 for (i = 0; i <= idns->nlbaf; i++) { 2117 if (idns->lbaf[i].lbads == 0) 2118 continue; 2119 snprintf(buf, sizeof(buf), "%s%s%u", 2120 strlen(msg) ? ", " : "", (i == lbaf) ? "*" : "", 2121 1 << idns->lbaf[i].lbads); 2122 strlcat(msg, buf, sizeof(msg)); 2123 ms = lemtoh16(&idns->lbaf[i].ms); 2124 if (ms) { 2125 snprintf(buf, sizeof(buf), "+%u", ms); 2126 strlcat(msg, buf, sizeof(msg)); 2127 } 2128 strlcat(msg, rpdesc[idns->lbaf[i].rp], sizeof(msg)); 2129 } 2130 nvme_bio_status(bs, "Formats %s", msg); 2131 2132 if (idns->nsfeat) 2133 nvme_bio_status(bs, "Features 0x%b", idns->nsfeat, 2134 NVME_ID_NS_NSFEAT_FMT); 2135 2136 if (idns->dps) { 2137 dps = idns->dps; 2138 snprintf(msg, sizeof(msg), "Data Protection (0x%02x) " 2139 "Protection Data in ", dps); 2140 if (ISSET(dps, NVME_ID_NS_DPS_PIP)) 2141 strlcat(msg, "first", sizeof(msg)); 2142 else 2143 strlcat(msg, "last", sizeof(msg)); 2144 strlcat(msg, "bytes of metadata, Protection ", sizeof(msg)); 2145 if (NVME_ID_NS_DPS_TYPE(dps) >= nitems(protection)) 2146 strlcat(msg, "Type unknown", sizeof(msg)); 2147 else 2148 strlcat(msg, protection[NVME_ID_NS_DPS_TYPE(dps)], 2149 sizeof(msg)); 2150 nvme_bio_status(bs, "%s", msg); 2151 } 2152 2153 if (nvme_bioctl_sdname(sc, target) == NULL) 2154 bd->bd_status = BIOC_SDOFFLINE; 2155 else 2156 bd->bd_status = BIOC_SDONLINE; 2157 2158 return 0; 2159 } 2160 #endif /* NBIO > 0 */ 2161 2162 #ifndef SMALL_KERNEL 2163 void 2164 nvme_refresh_sensors(void *arg) 2165 { 2166 struct nvme_softc *sc = arg; 2167 struct nvme_sqe sqe; 2168 struct nvme_dmamem *mem = NULL; 2169 struct nvme_ccb *ccb = NULL; 2170 struct nvm_smart_health *health; 2171 uint32_t dwlen; 2172 uint8_t cw; 2173 int flags; 2174 int64_t temp; 2175 2176 ccb = nvme_ccb_get(sc); 2177 if (ccb == NULL) 2178 goto failed; 2179 2180 mem = nvme_dmamem_alloc(sc, sizeof(*health)); 2181 if (mem == NULL) 2182 goto failed; 2183 nvme_dmamem_sync(sc, mem, BUS_DMASYNC_PREREAD); 2184 2185 dwlen = (sizeof(*health) >> 2) - 1; 2186 memset(&sqe, 0, sizeof(sqe)); 2187 sqe.opcode = NVM_ADMIN_GET_LOG_PG; 2188 htolem32(&sqe.nsid, 0xffffffff); 2189 htolem32(&sqe.cdw10, (dwlen << 16 | NVM_LOG_PAGE_SMART_HEALTH)); 2190 htolem64(&sqe.entry.prp[0], NVME_DMA_DVA(mem)); 2191 2192 ccb->ccb_done = nvme_empty_done; 2193 ccb->ccb_cookie = &sqe; 2194 flags = nvme_poll(sc, sc->sc_admin_q, ccb, nvme_sqe_fill, NVME_TIMO_LOG_PAGE); 2195 2196 nvme_dmamem_sync(sc, mem, BUS_DMASYNC_POSTREAD); 2197 2198 if (flags != 0) 2199 goto failed; 2200 2201 health = NVME_DMA_KVA(mem); 2202 cw = health->critical_warning; 2203 2204 sc->sc_temp_sensor.status = (cw & NVM_HEALTH_CW_TEMP) ? 2205 SENSOR_S_CRIT : SENSOR_S_OK; 2206 temp = letoh16(health->temperature); 2207 sc->sc_temp_sensor.value = (temp * 1000000) + 150000; 2208 2209 sc->sc_spare_sensor.status = (cw & NVM_HEALTH_CW_SPARE) ? 2210 SENSOR_S_CRIT : SENSOR_S_OK; 2211 sc->sc_spare_sensor.value = health->avail_spare * 1000; 2212 2213 sc->sc_usage_sensor.status = SENSOR_S_OK; 2214 sc->sc_usage_sensor.value = health->percent_used * 1000; 2215 goto done; 2216 2217 failed: 2218 sc->sc_temp_sensor.status = SENSOR_S_UNKNOWN; 2219 sc->sc_usage_sensor.status = SENSOR_S_UNKNOWN; 2220 sc->sc_spare_sensor.status = SENSOR_S_UNKNOWN; 2221 done: 2222 if (mem != NULL) 2223 nvme_dmamem_free(sc, mem); 2224 if (ccb != NULL) 2225 nvme_ccb_put(sc, ccb); 2226 } 2227 #endif /* SMALL_KERNEL */ 2228