1 /* $NetBSD: nvme_pci.c,v 1.31 2021/10/10 07:50:53 skrll Exp $ */ 2 /* $OpenBSD: nvme_pci.c,v 1.3 2016/04/14 11:18:32 dlg Exp $ */ 3 4 /* 5 * Copyright (c) 2014 David Gwynne <dlg@openbsd.org> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 20 /*- 21 * Copyright (C) 2016 NONAKA Kimihiro <nonaka@netbsd.org> 22 * All rights reserved. 23 * 24 * Redistribution and use in source and binary forms, with or without 25 * modification, are permitted provided that the following conditions 26 * are met: 27 * 1. Redistributions of source code must retain the above copyright 28 * notice, this list of conditions and the following disclaimer. 29 * 2. Redistributions in binary form must reproduce the above copyright 30 * notice, this list of conditions and the following disclaimer in the 31 * documentation and/or other materials provided with the distribution. 32 * 33 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 34 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 35 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 36 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 37 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 38 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 39 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 40 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 41 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 42 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 43 */ 44 45 #include <sys/cdefs.h> 46 __KERNEL_RCSID(0, "$NetBSD: nvme_pci.c,v 1.31 2021/10/10 07:50:53 skrll Exp $"); 47 48 #include <sys/param.h> 49 #include <sys/systm.h> 50 #include <sys/kernel.h> 51 #include <sys/device.h> 52 #include <sys/bitops.h> 53 #include <sys/bus.h> 54 #include <sys/cpu.h> 55 #include <sys/interrupt.h> 56 #include <sys/kmem.h> 57 #include <sys/pmf.h> 58 #include <sys/module.h> 59 60 #include <dev/pci/pcireg.h> 61 #include <dev/pci/pcivar.h> 62 #include <dev/pci/pcidevs.h> 63 64 #include <dev/ic/nvmereg.h> 65 #include <dev/ic/nvmevar.h> 66 67 int nvme_pci_force_intx = 0; 68 int nvme_pci_mpsafe = 1; 69 int nvme_pci_mq = 1; /* INTx: ioq=1, MSI/MSI-X: ioq=ncpu */ 70 71 #define NVME_PCI_BAR 0x10 72 73 struct nvme_pci_softc { 74 struct nvme_softc psc_nvme; 75 76 pci_chipset_tag_t psc_pc; 77 pci_intr_handle_t *psc_intrs; 78 int psc_nintrs; 79 }; 80 81 static int nvme_pci_match(device_t, cfdata_t, void *); 82 static void nvme_pci_attach(device_t, device_t, void *); 83 static int nvme_pci_detach(device_t, int); 84 static int nvme_pci_rescan(device_t, const char *, const int *); 85 static bool nvme_pci_suspend(device_t, const pmf_qual_t *); 86 static bool nvme_pci_resume(device_t, const pmf_qual_t *); 87 88 CFATTACH_DECL3_NEW(nvme_pci, sizeof(struct nvme_pci_softc), 89 nvme_pci_match, nvme_pci_attach, nvme_pci_detach, NULL, nvme_pci_rescan, 90 nvme_childdet, DVF_DETACH_SHUTDOWN); 91 92 static int nvme_pci_intr_establish(struct nvme_softc *, 93 uint16_t, struct nvme_queue *); 94 static int nvme_pci_intr_disestablish(struct nvme_softc *, uint16_t); 95 static int nvme_pci_setup_intr(struct pci_attach_args *, 96 struct nvme_pci_softc *); 97 98 static const struct nvme_pci_quirk { 99 pci_vendor_id_t vendor; 100 pci_product_id_t product; 101 uint32_t quirks; 102 } nvme_pci_quirks[] = { 103 { PCI_VENDOR_HGST, PCI_PRODUCT_HGST_SN100, 104 NVME_QUIRK_DELAY_B4_CHK_RDY }, 105 { PCI_VENDOR_HGST, PCI_PRODUCT_HGST_SN200, 106 NVME_QUIRK_DELAY_B4_CHK_RDY }, 107 { PCI_VENDOR_BEIJING_MEMBLAZE, PCI_PRODUCT_BEIJING_MEMBLAZE_PBLAZE4, 108 NVME_QUIRK_DELAY_B4_CHK_RDY }, 109 { PCI_VENDOR_SAMSUNGELEC3, PCI_PRODUCT_SAMSUNGELEC3_172X, 110 NVME_QUIRK_DELAY_B4_CHK_RDY }, 111 { PCI_VENDOR_SAMSUNGELEC3, PCI_PRODUCT_SAMSUNGELEC3_172XAB, 112 NVME_QUIRK_DELAY_B4_CHK_RDY }, 113 { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_DC_P4500_SSD, 114 NVME_QUIRK_NOMSI }, 115 }; 116 117 static const struct nvme_pci_quirk * 118 nvme_pci_lookup_quirk(struct pci_attach_args *pa) 119 { 120 const struct nvme_pci_quirk *q; 121 int i; 122 123 for (i = 0; i < __arraycount(nvme_pci_quirks); i++) { 124 q = &nvme_pci_quirks[i]; 125 126 if (PCI_VENDOR(pa->pa_id) == q->vendor && 127 PCI_PRODUCT(pa->pa_id) == q->product) 128 return q; 129 } 130 return NULL; 131 } 132 133 static int 134 nvme_pci_match(device_t parent, cfdata_t match, void *aux) 135 { 136 struct pci_attach_args *pa = aux; 137 138 if (PCI_CLASS(pa->pa_class) == PCI_CLASS_MASS_STORAGE && 139 PCI_SUBCLASS(pa->pa_class) == PCI_SUBCLASS_MASS_STORAGE_NVM && 140 PCI_INTERFACE(pa->pa_class) == PCI_INTERFACE_NVM_NVME_IO) 141 return 1; 142 143 return 0; 144 } 145 146 static void 147 nvme_pci_attach(device_t parent, device_t self, void *aux) 148 { 149 struct nvme_pci_softc *psc = device_private(self); 150 struct nvme_softc *sc = &psc->psc_nvme; 151 struct pci_attach_args *pa = aux; 152 const struct nvme_pci_quirk *quirk; 153 pcireg_t memtype, reg; 154 bus_addr_t memaddr; 155 int flags, error; 156 int msixoff; 157 158 sc->sc_dev = self; 159 psc->psc_pc = pa->pa_pc; 160 if (pci_dma64_available(pa)) 161 sc->sc_dmat = pa->pa_dmat64; 162 else 163 sc->sc_dmat = pa->pa_dmat; 164 165 quirk = nvme_pci_lookup_quirk(pa); 166 if (quirk != NULL) 167 sc->sc_quirks = quirk->quirks; 168 169 pci_aprint_devinfo(pa, NULL); 170 171 /* Map registers */ 172 memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, NVME_PCI_BAR); 173 if (PCI_MAPREG_TYPE(memtype) != PCI_MAPREG_TYPE_MEM) { 174 aprint_error_dev(self, "invalid type (type=0x%x)\n", memtype); 175 return; 176 } 177 reg = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG); 178 if (((reg & PCI_COMMAND_MASTER_ENABLE) == 0) || 179 ((reg & PCI_COMMAND_MEM_ENABLE) == 0)) { 180 /* 181 * Enable address decoding for memory range in case BIOS or 182 * UEFI didn't set it. 183 */ 184 reg |= PCI_COMMAND_MASTER_ENABLE | PCI_COMMAND_MEM_ENABLE; 185 pci_conf_write(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG, 186 reg); 187 } 188 189 sc->sc_iot = pa->pa_memt; 190 error = pci_mapreg_info(pa->pa_pc, pa->pa_tag, NVME_PCI_BAR, 191 memtype, &memaddr, &sc->sc_ios, &flags); 192 if (error) { 193 aprint_error_dev(self, "can't get map info\n"); 194 return; 195 } 196 197 if (pci_get_capability(pa->pa_pc, pa->pa_tag, PCI_CAP_MSIX, &msixoff, 198 NULL)) { 199 pcireg_t msixtbl; 200 uint32_t table_offset; 201 int bir; 202 203 msixtbl = pci_conf_read(pa->pa_pc, pa->pa_tag, 204 msixoff + PCI_MSIX_TBLOFFSET); 205 table_offset = msixtbl & PCI_MSIX_TBLOFFSET_MASK; 206 bir = msixtbl & PCI_MSIX_TBLBIR_MASK; 207 if (bir == PCI_MAPREG_NUM(NVME_PCI_BAR)) { 208 sc->sc_ios = table_offset; 209 } 210 } 211 212 error = bus_space_map(sc->sc_iot, memaddr, sc->sc_ios, flags, 213 &sc->sc_ioh); 214 if (error != 0) { 215 aprint_error_dev(self, "can't map mem space (error=%d)\n", 216 error); 217 return; 218 } 219 220 /* Establish interrupts */ 221 if (nvme_pci_setup_intr(pa, psc) != 0) { 222 aprint_error_dev(self, "unable to allocate interrupt\n"); 223 goto unmap; 224 } 225 sc->sc_intr_establish = nvme_pci_intr_establish; 226 sc->sc_intr_disestablish = nvme_pci_intr_disestablish; 227 228 sc->sc_ih = kmem_zalloc(sizeof(*sc->sc_ih) * psc->psc_nintrs, KM_SLEEP); 229 sc->sc_softih = kmem_zalloc( 230 sizeof(*sc->sc_softih) * psc->psc_nintrs, KM_SLEEP); 231 232 if (nvme_attach(sc) != 0) { 233 /* error printed by nvme_attach() */ 234 goto softintr_free; 235 } 236 237 if (!pmf_device_register(self, nvme_pci_suspend, nvme_pci_resume)) 238 aprint_error_dev(self, "couldn't establish power handler\n"); 239 240 SET(sc->sc_flags, NVME_F_ATTACHED); 241 return; 242 243 softintr_free: 244 kmem_free(sc->sc_softih, sizeof(*sc->sc_softih) * psc->psc_nintrs); 245 kmem_free(sc->sc_ih, sizeof(*sc->sc_ih) * psc->psc_nintrs); 246 sc->sc_nq = 0; 247 pci_intr_release(pa->pa_pc, psc->psc_intrs, psc->psc_nintrs); 248 psc->psc_nintrs = 0; 249 unmap: 250 bus_space_unmap(sc->sc_iot, sc->sc_ioh, sc->sc_ios); 251 sc->sc_ios = 0; 252 } 253 254 static int 255 nvme_pci_rescan(device_t self, const char *attr, const int *flags) 256 { 257 258 return nvme_rescan(self, attr, flags); 259 } 260 261 static bool 262 nvme_pci_suspend(device_t self, const pmf_qual_t *qual) 263 { 264 struct nvme_pci_softc *psc = device_private(self); 265 struct nvme_softc *sc = &psc->psc_nvme; 266 int error; 267 268 error = nvme_suspend(sc); 269 if (error) 270 return false; 271 272 return true; 273 } 274 275 static bool 276 nvme_pci_resume(device_t self, const pmf_qual_t *qual) 277 { 278 struct nvme_pci_softc *psc = device_private(self); 279 struct nvme_softc *sc = &psc->psc_nvme; 280 int error; 281 282 error = nvme_resume(sc); 283 if (error) 284 return false; 285 286 return true; 287 } 288 289 static int 290 nvme_pci_detach(device_t self, int flags) 291 { 292 struct nvme_pci_softc *psc = device_private(self); 293 struct nvme_softc *sc = &psc->psc_nvme; 294 int error; 295 296 if (!ISSET(sc->sc_flags, NVME_F_ATTACHED)) 297 return 0; 298 299 error = nvme_detach(sc, flags); 300 if (error) 301 return error; 302 303 kmem_free(sc->sc_softih, sizeof(*sc->sc_softih) * psc->psc_nintrs); 304 sc->sc_softih = NULL; 305 306 kmem_free(sc->sc_ih, sizeof(*sc->sc_ih) * psc->psc_nintrs); 307 pci_intr_release(psc->psc_pc, psc->psc_intrs, psc->psc_nintrs); 308 bus_space_unmap(sc->sc_iot, sc->sc_ioh, sc->sc_ios); 309 return 0; 310 } 311 312 static int 313 nvme_pci_intr_establish(struct nvme_softc *sc, uint16_t qid, 314 struct nvme_queue *q) 315 { 316 struct nvme_pci_softc *psc = (struct nvme_pci_softc *)sc; 317 char intr_xname[INTRDEVNAMEBUF]; 318 char intrbuf[PCI_INTRSTR_LEN]; 319 const char *intrstr = NULL; 320 int (*ih_func)(void *); 321 void (*ih_func_soft)(void *); 322 void *ih_arg; 323 int error; 324 325 KASSERT(sc->sc_use_mq || qid == NVME_ADMIN_Q); 326 KASSERT(sc->sc_ih[qid] == NULL); 327 328 if (nvme_pci_mpsafe) { 329 pci_intr_setattr(psc->psc_pc, &psc->psc_intrs[qid], 330 PCI_INTR_MPSAFE, true); 331 } 332 333 if (!sc->sc_use_mq) { 334 snprintf(intr_xname, sizeof(intr_xname), "%s", 335 device_xname(sc->sc_dev)); 336 ih_arg = sc; 337 ih_func = nvme_intr; 338 ih_func_soft = nvme_softintr_intx; 339 } else { 340 if (qid == NVME_ADMIN_Q) { 341 snprintf(intr_xname, sizeof(intr_xname), "%s adminq", 342 device_xname(sc->sc_dev)); 343 } else { 344 snprintf(intr_xname, sizeof(intr_xname), "%s ioq%d", 345 device_xname(sc->sc_dev), qid); 346 } 347 ih_arg = q; 348 ih_func = nvme_intr_msi; 349 ih_func_soft = nvme_softintr_msi; 350 } 351 352 /* establish hardware interrupt */ 353 sc->sc_ih[qid] = pci_intr_establish_xname(psc->psc_pc, 354 psc->psc_intrs[qid], IPL_BIO, ih_func, ih_arg, intr_xname); 355 if (sc->sc_ih[qid] == NULL) { 356 aprint_error_dev(sc->sc_dev, 357 "unable to establish %s interrupt\n", intr_xname); 358 return 1; 359 } 360 361 /* establish also the software interrupt */ 362 sc->sc_softih[qid] = softint_establish( 363 SOFTINT_BIO|(nvme_pci_mpsafe ? SOFTINT_MPSAFE : 0), 364 ih_func_soft, q); 365 if (sc->sc_softih[qid] == NULL) { 366 pci_intr_disestablish(psc->psc_pc, sc->sc_ih[qid]); 367 sc->sc_ih[qid] = NULL; 368 369 aprint_error_dev(sc->sc_dev, 370 "unable to establish %s soft interrupt\n", 371 intr_xname); 372 return 1; 373 } 374 375 intrstr = pci_intr_string(psc->psc_pc, psc->psc_intrs[qid], intrbuf, 376 sizeof(intrbuf)); 377 if (!sc->sc_use_mq) { 378 aprint_normal_dev(sc->sc_dev, "interrupting at %s\n", intrstr); 379 } else if (qid == NVME_ADMIN_Q) { 380 aprint_normal_dev(sc->sc_dev, 381 "for admin queue interrupting at %s\n", intrstr); 382 } else if (!nvme_pci_mpsafe) { 383 aprint_normal_dev(sc->sc_dev, 384 "for io queue %d interrupting at %s\n", qid, intrstr); 385 } else { 386 kcpuset_t *affinity; 387 cpuid_t affinity_to; 388 389 kcpuset_create(&affinity, true); 390 affinity_to = (qid - 1) % ncpu; 391 kcpuset_set(affinity, affinity_to); 392 error = interrupt_distribute(sc->sc_ih[qid], affinity, NULL); 393 kcpuset_destroy(affinity); 394 aprint_normal_dev(sc->sc_dev, 395 "for io queue %d interrupting at %s", qid, intrstr); 396 if (error == 0) 397 aprint_normal(" affinity to cpu%lu", affinity_to); 398 aprint_normal("\n"); 399 } 400 return 0; 401 } 402 403 static int 404 nvme_pci_intr_disestablish(struct nvme_softc *sc, uint16_t qid) 405 { 406 struct nvme_pci_softc *psc = (struct nvme_pci_softc *)sc; 407 408 KASSERT(sc->sc_use_mq || qid == NVME_ADMIN_Q); 409 KASSERT(sc->sc_ih[qid] != NULL); 410 411 if (sc->sc_softih) { 412 softint_disestablish(sc->sc_softih[qid]); 413 sc->sc_softih[qid] = NULL; 414 } 415 416 pci_intr_disestablish(psc->psc_pc, sc->sc_ih[qid]); 417 sc->sc_ih[qid] = NULL; 418 419 return 0; 420 } 421 422 static int 423 nvme_pci_setup_intr(struct pci_attach_args *pa, struct nvme_pci_softc *psc) 424 { 425 struct nvme_softc *sc = &psc->psc_nvme; 426 int error; 427 int counts[PCI_INTR_TYPE_SIZE]; 428 pci_intr_handle_t *ihps; 429 int intr_type; 430 431 memset(counts, 0, sizeof(counts)); 432 433 if (nvme_pci_force_intx) 434 goto setup_intx; 435 436 /* MSI-X */ 437 counts[PCI_INTR_TYPE_MSIX] = uimin(pci_msix_count(pa->pa_pc, pa->pa_tag), 438 ncpu + 1); 439 if (counts[PCI_INTR_TYPE_MSIX] < 1) { 440 counts[PCI_INTR_TYPE_MSIX] = 0; 441 } else if (!nvme_pci_mq || !nvme_pci_mpsafe) { 442 if (counts[PCI_INTR_TYPE_MSIX] > 2) 443 counts[PCI_INTR_TYPE_MSIX] = 2; /* adminq + 1 ioq */ 444 } 445 446 /* MSI */ 447 if (sc->sc_quirks & NVME_QUIRK_NOMSI) 448 goto setup_intx; 449 counts[PCI_INTR_TYPE_MSI] = pci_msi_count(pa->pa_pc, pa->pa_tag); 450 if (counts[PCI_INTR_TYPE_MSI] > 0) { 451 while (counts[PCI_INTR_TYPE_MSI] > ncpu + 1) { 452 if (counts[PCI_INTR_TYPE_MSI] / 2 <= ncpu + 1) 453 break; 454 counts[PCI_INTR_TYPE_MSI] /= 2; 455 } 456 } 457 if (counts[PCI_INTR_TYPE_MSI] < 1) { 458 counts[PCI_INTR_TYPE_MSI] = 0; 459 } else if (!nvme_pci_mq || !nvme_pci_mpsafe) { 460 if (counts[PCI_INTR_TYPE_MSI] > 2) 461 counts[PCI_INTR_TYPE_MSI] = 2; /* adminq + 1 ioq */ 462 } 463 464 setup_intx: 465 /* INTx */ 466 counts[PCI_INTR_TYPE_INTX] = 1; 467 468 error = pci_intr_alloc(pa, &ihps, counts, PCI_INTR_TYPE_MSIX); 469 if (error) 470 return error; 471 472 intr_type = pci_intr_type(pa->pa_pc, ihps[0]); 473 474 psc->psc_intrs = ihps; 475 psc->psc_nintrs = counts[intr_type]; 476 if (intr_type == PCI_INTR_TYPE_MSI) { 477 if (counts[intr_type] > ncpu + 1) 478 counts[intr_type] = ncpu + 1; 479 } 480 sc->sc_use_mq = counts[intr_type] > 1; 481 sc->sc_nq = sc->sc_use_mq ? counts[intr_type] - 1 : 1; 482 483 return 0; 484 } 485 486 MODULE(MODULE_CLASS_DRIVER, nvme, "pci,dk_subr"); 487 488 #ifdef _MODULE 489 #include "ioconf.c" 490 #endif 491 492 static int 493 nvme_modcmd(modcmd_t cmd, void *opaque) 494 { 495 #ifdef _MODULE 496 devmajor_t cmajor, bmajor; 497 extern const struct cdevsw nvme_cdevsw; 498 #endif 499 int error = 0; 500 501 #ifdef _MODULE 502 switch (cmd) { 503 case MODULE_CMD_INIT: 504 error = config_init_component(cfdriver_ioconf_nvme_pci, 505 cfattach_ioconf_nvme_pci, cfdata_ioconf_nvme_pci); 506 if (error) 507 break; 508 509 bmajor = cmajor = NODEVMAJOR; 510 error = devsw_attach(nvme_cd.cd_name, NULL, &bmajor, 511 &nvme_cdevsw, &cmajor); 512 if (error) { 513 aprint_error("%s: unable to register devsw\n", 514 nvme_cd.cd_name); 515 /* do not abort, just /dev/nvme* will not work */ 516 } 517 break; 518 case MODULE_CMD_FINI: 519 devsw_detach(NULL, &nvme_cdevsw); 520 521 error = config_fini_component(cfdriver_ioconf_nvme_pci, 522 cfattach_ioconf_nvme_pci, cfdata_ioconf_nvme_pci); 523 break; 524 default: 525 break; 526 } 527 #endif 528 return error; 529 } 530