1 /* $NetBSD: nvme_pci.c,v 1.16 2016/10/19 19:31:23 jdolecek Exp $ */ 2 /* $OpenBSD: nvme_pci.c,v 1.3 2016/04/14 11:18:32 dlg Exp $ */ 3 4 /* 5 * Copyright (c) 2014 David Gwynne <dlg@openbsd.org> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 20 /*- 21 * Copyright (C) 2016 NONAKA Kimihiro <nonaka@netbsd.org> 22 * All rights reserved. 23 * 24 * Redistribution and use in source and binary forms, with or without 25 * modification, are permitted provided that the following conditions 26 * are met: 27 * 1. Redistributions of source code must retain the above copyright 28 * notice, this list of conditions and the following disclaimer. 29 * 2. Redistributions in binary form must reproduce the above copyright 30 * notice, this list of conditions and the following disclaimer in the 31 * documentation and/or other materials provided with the distribution. 32 * 33 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 34 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 35 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 36 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 37 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 38 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 39 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 40 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 41 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 42 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 43 */ 44 45 #include <sys/cdefs.h> 46 __KERNEL_RCSID(0, "$NetBSD: nvme_pci.c,v 1.16 2016/10/19 19:31:23 jdolecek Exp $"); 47 48 #include <sys/param.h> 49 #include <sys/systm.h> 50 #include <sys/kernel.h> 51 #include <sys/device.h> 52 #include <sys/bitops.h> 53 #include <sys/bus.h> 54 #include <sys/cpu.h> 55 #include <sys/interrupt.h> 56 #include <sys/kmem.h> 57 #include <sys/pmf.h> 58 #include <sys/module.h> 59 60 #include <dev/pci/pcireg.h> 61 #include <dev/pci/pcivar.h> 62 63 #include <dev/ic/nvmereg.h> 64 #include <dev/ic/nvmevar.h> 65 66 int nvme_pci_force_intx = 0; 67 int nvme_pci_mpsafe = 1; 68 int nvme_pci_mq = 1; /* INTx: ioq=1, MSI/MSI-X: ioq=ncpu */ 69 70 #define NVME_PCI_BAR 0x10 71 72 #ifndef __HAVE_PCI_MSI_MSIX 73 #define pci_intr_release(pc, intrs, nintrs) \ 74 kmem_free(intrs, sizeof(*intrs) * nintrs) 75 #define pci_intr_establish_xname(pc, ih, level, intrhand, intrarg, xname) \ 76 pci_intr_establish(pc, ih, level, intrhand, intrarg) 77 #endif 78 79 struct nvme_pci_softc { 80 struct nvme_softc psc_nvme; 81 82 pci_chipset_tag_t psc_pc; 83 pci_intr_handle_t *psc_intrs; 84 int psc_nintrs; 85 }; 86 87 static int nvme_pci_match(device_t, cfdata_t, void *); 88 static void nvme_pci_attach(device_t, device_t, void *); 89 static int nvme_pci_detach(device_t, int); 90 static int nvme_pci_rescan(device_t, const char *, const int *); 91 92 CFATTACH_DECL3_NEW(nvme_pci, sizeof(struct nvme_pci_softc), 93 nvme_pci_match, nvme_pci_attach, nvme_pci_detach, NULL, nvme_pci_rescan, 94 nvme_childdet, DVF_DETACH_SHUTDOWN); 95 96 static int nvme_pci_intr_establish(struct nvme_softc *, 97 uint16_t, struct nvme_queue *); 98 static int nvme_pci_intr_disestablish(struct nvme_softc *, uint16_t); 99 static int nvme_pci_setup_intr(struct pci_attach_args *, 100 struct nvme_pci_softc *); 101 102 static int 103 nvme_pci_match(device_t parent, cfdata_t match, void *aux) 104 { 105 struct pci_attach_args *pa = aux; 106 107 if (PCI_CLASS(pa->pa_class) == PCI_CLASS_MASS_STORAGE && 108 PCI_SUBCLASS(pa->pa_class) == PCI_SUBCLASS_MASS_STORAGE_NVM && 109 PCI_INTERFACE(pa->pa_class) == PCI_INTERFACE_NVM_NVME) 110 return 1; 111 112 return 0; 113 } 114 115 static void 116 nvme_pci_attach(device_t parent, device_t self, void *aux) 117 { 118 struct nvme_pci_softc *psc = device_private(self); 119 struct nvme_softc *sc = &psc->psc_nvme; 120 struct pci_attach_args *pa = aux; 121 pcireg_t memtype, reg; 122 bus_addr_t memaddr; 123 int flags, error; 124 #ifdef __HAVE_PCI_MSI_MSIX 125 int msixoff; 126 #endif 127 128 sc->sc_dev = self; 129 psc->psc_pc = pa->pa_pc; 130 if (pci_dma64_available(pa)) 131 sc->sc_dmat = pa->pa_dmat64; 132 else 133 sc->sc_dmat = pa->pa_dmat; 134 135 pci_aprint_devinfo(pa, NULL); 136 137 reg = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG); 138 if ((reg & PCI_COMMAND_MASTER_ENABLE) == 0) { 139 reg |= PCI_COMMAND_MASTER_ENABLE; 140 pci_conf_write(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG, reg); 141 } 142 143 /* Map registers */ 144 memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, NVME_PCI_BAR); 145 if (PCI_MAPREG_TYPE(memtype) != PCI_MAPREG_TYPE_MEM) { 146 aprint_error_dev(self, "invalid type (type=0x%x)\n", memtype); 147 return; 148 } 149 sc->sc_iot = pa->pa_memt; 150 error = pci_mapreg_info(pa->pa_pc, pa->pa_tag, PCI_MAPREG_START, 151 memtype, &memaddr, &sc->sc_ios, &flags); 152 if (error) { 153 aprint_error_dev(self, "can't get map info\n"); 154 return; 155 } 156 157 #ifdef __HAVE_PCI_MSI_MSIX 158 if (pci_get_capability(pa->pa_pc, pa->pa_tag, PCI_CAP_MSIX, &msixoff, 159 NULL)) { 160 pcireg_t msixtbl; 161 uint32_t table_offset; 162 int bir; 163 164 msixtbl = pci_conf_read(pa->pa_pc, pa->pa_tag, 165 msixoff + PCI_MSIX_TBLOFFSET); 166 table_offset = msixtbl & PCI_MSIX_TBLOFFSET_MASK; 167 bir = msixtbl & PCI_MSIX_PBABIR_MASK; 168 if (bir == 0) { 169 sc->sc_ios = table_offset; 170 } 171 } 172 #endif /* __HAVE_PCI_MSI_MSIX */ 173 174 error = bus_space_map(sc->sc_iot, memaddr, sc->sc_ios, flags, 175 &sc->sc_ioh); 176 if (error != 0) { 177 aprint_error_dev(self, "can't map mem space (error=%d)\n", 178 error); 179 return; 180 } 181 182 /* Establish interrupts */ 183 if (nvme_pci_setup_intr(pa, psc) != 0) { 184 aprint_error_dev(self, "unable to allocate interrupt\n"); 185 goto unmap; 186 } 187 sc->sc_intr_establish = nvme_pci_intr_establish; 188 sc->sc_intr_disestablish = nvme_pci_intr_disestablish; 189 190 sc->sc_ih = kmem_zalloc(sizeof(*sc->sc_ih) * psc->psc_nintrs, KM_SLEEP); 191 if (sc->sc_ih == NULL) { 192 aprint_error_dev(self, "unable to allocate ih memory\n"); 193 goto intr_release; 194 } 195 196 sc->sc_softih = kmem_zalloc( 197 sizeof(*sc->sc_softih) * psc->psc_nintrs, KM_SLEEP); 198 if (sc->sc_softih == NULL) { 199 aprint_error_dev(self, 200 "unable to allocate softih memory\n"); 201 goto intr_free; 202 } 203 204 if (nvme_attach(sc) != 0) { 205 /* error printed by nvme_attach() */ 206 goto softintr_free; 207 } 208 209 if (!pmf_device_register(self, NULL, NULL)) 210 aprint_error_dev(self, "couldn't establish power handler\n"); 211 212 SET(sc->sc_flags, NVME_F_ATTACHED); 213 return; 214 215 softintr_free: 216 kmem_free(sc->sc_softih, sizeof(*sc->sc_softih) * psc->psc_nintrs); 217 intr_free: 218 kmem_free(sc->sc_ih, sizeof(*sc->sc_ih) * psc->psc_nintrs); 219 sc->sc_nq = 0; 220 intr_release: 221 pci_intr_release(pa->pa_pc, psc->psc_intrs, psc->psc_nintrs); 222 psc->psc_nintrs = 0; 223 unmap: 224 bus_space_unmap(sc->sc_iot, sc->sc_ioh, sc->sc_ios); 225 sc->sc_ios = 0; 226 } 227 228 static int 229 nvme_pci_rescan(device_t self, const char *attr, const int *flags) 230 { 231 232 return nvme_rescan(self, attr, flags); 233 } 234 235 static int 236 nvme_pci_detach(device_t self, int flags) 237 { 238 struct nvme_pci_softc *psc = device_private(self); 239 struct nvme_softc *sc = &psc->psc_nvme; 240 int error; 241 242 if (!ISSET(sc->sc_flags, NVME_F_ATTACHED)) 243 return 0; 244 245 error = nvme_detach(sc, flags); 246 if (error) 247 return error; 248 249 kmem_free(sc->sc_softih, sizeof(*sc->sc_softih) * psc->psc_nintrs); 250 sc->sc_softih = NULL; 251 252 kmem_free(sc->sc_ih, sizeof(*sc->sc_ih) * psc->psc_nintrs); 253 pci_intr_release(psc->psc_pc, psc->psc_intrs, psc->psc_nintrs); 254 bus_space_unmap(sc->sc_iot, sc->sc_ioh, sc->sc_ios); 255 return 0; 256 } 257 258 static int 259 nvme_pci_intr_establish(struct nvme_softc *sc, uint16_t qid, 260 struct nvme_queue *q) 261 { 262 struct nvme_pci_softc *psc = (struct nvme_pci_softc *)sc; 263 char intr_xname[INTRDEVNAMEBUF]; 264 char intrbuf[PCI_INTRSTR_LEN]; 265 const char *intrstr = NULL; 266 int (*ih_func)(void *); 267 void (*ih_func_soft)(void *); 268 void *ih_arg; 269 #ifdef __HAVE_PCI_MSI_MSIX 270 int error; 271 #endif 272 273 KASSERT(sc->sc_use_mq || qid == NVME_ADMIN_Q); 274 KASSERT(sc->sc_ih[qid] == NULL); 275 276 if (nvme_pci_mpsafe) { 277 pci_intr_setattr(psc->psc_pc, &psc->psc_intrs[qid], 278 PCI_INTR_MPSAFE, true); 279 } 280 281 #ifdef __HAVE_PCI_MSI_MSIX 282 if (!sc->sc_use_mq) { 283 #endif 284 snprintf(intr_xname, sizeof(intr_xname), "%s", 285 device_xname(sc->sc_dev)); 286 ih_arg = sc; 287 ih_func = nvme_intr; 288 ih_func_soft = nvme_softintr_intx; 289 #ifdef __HAVE_PCI_MSI_MSIX 290 } 291 else { 292 if (qid == NVME_ADMIN_Q) { 293 snprintf(intr_xname, sizeof(intr_xname), "%s adminq", 294 device_xname(sc->sc_dev)); 295 } else { 296 snprintf(intr_xname, sizeof(intr_xname), "%s ioq%d", 297 device_xname(sc->sc_dev), qid); 298 } 299 ih_arg = q; 300 ih_func = nvme_intr_msi; 301 ih_func_soft = nvme_softintr_msi; 302 } 303 #endif /* __HAVE_PCI_MSI_MSIX */ 304 305 /* establish hardware interrupt */ 306 sc->sc_ih[qid] = pci_intr_establish_xname(psc->psc_pc, 307 psc->psc_intrs[qid], IPL_BIO, ih_func, ih_arg, intr_xname); 308 if (sc->sc_ih[qid] == NULL) { 309 aprint_error_dev(sc->sc_dev, 310 "unable to establish %s interrupt\n", intr_xname); 311 return 1; 312 } 313 314 /* establish also the software interrupt */ 315 sc->sc_softih[qid] = softint_establish( 316 SOFTINT_BIO|(nvme_pci_mpsafe ? SOFTINT_MPSAFE : 0), 317 ih_func_soft, q); 318 if (sc->sc_softih[qid] == NULL) { 319 pci_intr_disestablish(psc->psc_pc, sc->sc_ih[qid]); 320 sc->sc_ih[qid] = NULL; 321 322 aprint_error_dev(sc->sc_dev, 323 "unable to establish %s soft interrupt\n", 324 intr_xname); 325 return 1; 326 } 327 328 intrstr = pci_intr_string(psc->psc_pc, psc->psc_intrs[qid], intrbuf, 329 sizeof(intrbuf)); 330 if (!sc->sc_use_mq) { 331 aprint_normal_dev(sc->sc_dev, "interrupting at %s\n", intrstr); 332 } 333 #ifdef __HAVE_PCI_MSI_MSIX 334 else if (qid == NVME_ADMIN_Q) { 335 aprint_normal_dev(sc->sc_dev, 336 "for admin queue interrupting at %s\n", intrstr); 337 } else if (!nvme_pci_mpsafe) { 338 aprint_normal_dev(sc->sc_dev, 339 "for io queue %d interrupting at %s\n", qid, intrstr); 340 } else { 341 kcpuset_t *affinity; 342 cpuid_t affinity_to; 343 344 kcpuset_create(&affinity, true); 345 affinity_to = (qid - 1) % ncpu; 346 kcpuset_set(affinity, affinity_to); 347 error = interrupt_distribute(sc->sc_ih[qid], affinity, NULL); 348 kcpuset_destroy(affinity); 349 aprint_normal_dev(sc->sc_dev, 350 "for io queue %d interrupting at %s", qid, intrstr); 351 if (error == 0) 352 aprint_normal(" affinity to cpu%lu", affinity_to); 353 aprint_normal("\n"); 354 } 355 #endif 356 return 0; 357 } 358 359 static int 360 nvme_pci_intr_disestablish(struct nvme_softc *sc, uint16_t qid) 361 { 362 struct nvme_pci_softc *psc = (struct nvme_pci_softc *)sc; 363 364 KASSERT(sc->sc_use_mq || qid == NVME_ADMIN_Q); 365 KASSERT(sc->sc_ih[qid] != NULL); 366 367 if (sc->sc_softih) { 368 softint_disestablish(sc->sc_softih[qid]); 369 sc->sc_softih[qid] = NULL; 370 } 371 372 pci_intr_disestablish(psc->psc_pc, sc->sc_ih[qid]); 373 sc->sc_ih[qid] = NULL; 374 375 return 0; 376 } 377 378 static int 379 nvme_pci_setup_intr(struct pci_attach_args *pa, struct nvme_pci_softc *psc) 380 { 381 struct nvme_softc *sc = &psc->psc_nvme; 382 #ifdef __HAVE_PCI_MSI_MSIX 383 int error; 384 int counts[PCI_INTR_TYPE_SIZE], alloced_counts[PCI_INTR_TYPE_SIZE]; 385 pci_intr_handle_t *ihps; 386 int max_type, intr_type; 387 #else 388 pci_intr_handle_t ih; 389 #endif /* __HAVE_PCI_MSI_MSIX */ 390 391 #ifdef __HAVE_PCI_MSI_MSIX 392 if (nvme_pci_force_intx) { 393 max_type = PCI_INTR_TYPE_INTX; 394 goto force_intx; 395 } 396 397 /* MSI-X */ 398 max_type = PCI_INTR_TYPE_MSIX; 399 counts[PCI_INTR_TYPE_MSIX] = min(pci_msix_count(pa->pa_pc, pa->pa_tag), 400 ncpu + 1); 401 if (counts[PCI_INTR_TYPE_MSIX] > 0) { 402 memset(alloced_counts, 0, sizeof(alloced_counts)); 403 alloced_counts[PCI_INTR_TYPE_MSIX] = counts[PCI_INTR_TYPE_MSIX]; 404 if (pci_intr_alloc(pa, &ihps, alloced_counts, 405 PCI_INTR_TYPE_MSIX)) { 406 counts[PCI_INTR_TYPE_MSIX] = 0; 407 } else { 408 counts[PCI_INTR_TYPE_MSIX] = 409 alloced_counts[PCI_INTR_TYPE_MSIX]; 410 pci_intr_release(pa->pa_pc, ihps, 411 alloced_counts[PCI_INTR_TYPE_MSIX]); 412 } 413 } 414 if (counts[PCI_INTR_TYPE_MSIX] < 2) { 415 counts[PCI_INTR_TYPE_MSIX] = 0; 416 max_type = PCI_INTR_TYPE_MSI; 417 } else if (!nvme_pci_mq || !nvme_pci_mpsafe) { 418 counts[PCI_INTR_TYPE_MSIX] = 2; /* adminq + 1 ioq */ 419 } 420 421 retry_msi: 422 /* MSI */ 423 counts[PCI_INTR_TYPE_MSI] = pci_msi_count(pa->pa_pc, pa->pa_tag); 424 if (counts[PCI_INTR_TYPE_MSI] > 0) { 425 while (counts[PCI_INTR_TYPE_MSI] > ncpu + 1) { 426 if (counts[PCI_INTR_TYPE_MSI] / 2 <= ncpu + 1) 427 break; 428 counts[PCI_INTR_TYPE_MSI] /= 2; 429 } 430 memset(alloced_counts, 0, sizeof(alloced_counts)); 431 alloced_counts[PCI_INTR_TYPE_MSI] = counts[PCI_INTR_TYPE_MSI]; 432 if (pci_intr_alloc(pa, &ihps, alloced_counts, 433 PCI_INTR_TYPE_MSI)) { 434 counts[PCI_INTR_TYPE_MSI] = 0; 435 } else { 436 counts[PCI_INTR_TYPE_MSI] = 437 alloced_counts[PCI_INTR_TYPE_MSI]; 438 pci_intr_release(pa->pa_pc, ihps, 439 alloced_counts[PCI_INTR_TYPE_MSI]); 440 } 441 } 442 if (counts[PCI_INTR_TYPE_MSI] < 1) { 443 counts[PCI_INTR_TYPE_MSI] = 0; 444 if (max_type == PCI_INTR_TYPE_MSI) 445 max_type = PCI_INTR_TYPE_INTX; 446 } else if (!nvme_pci_mq || !nvme_pci_mpsafe) { 447 if (counts[PCI_INTR_TYPE_MSI] > 2) 448 counts[PCI_INTR_TYPE_MSI] = 2; /* adminq + 1 ioq */ 449 } 450 451 force_intx: 452 /* INTx */ 453 counts[PCI_INTR_TYPE_INTX] = 1; 454 455 memcpy(alloced_counts, counts, sizeof(counts)); 456 error = pci_intr_alloc(pa, &ihps, alloced_counts, max_type); 457 if (error) { 458 if (max_type != PCI_INTR_TYPE_INTX) { 459 retry: 460 memset(counts, 0, sizeof(counts)); 461 if (max_type == PCI_INTR_TYPE_MSIX) { 462 max_type = PCI_INTR_TYPE_MSI; 463 goto retry_msi; 464 } else { 465 max_type = PCI_INTR_TYPE_INTX; 466 goto force_intx; 467 } 468 } 469 return error; 470 } 471 472 intr_type = pci_intr_type(pa->pa_pc, ihps[0]); 473 if (alloced_counts[intr_type] < counts[intr_type]) { 474 if (intr_type != PCI_INTR_TYPE_INTX) { 475 pci_intr_release(pa->pa_pc, ihps, 476 alloced_counts[intr_type]); 477 max_type = intr_type; 478 goto retry; 479 } 480 return EBUSY; 481 } 482 483 psc->psc_intrs = ihps; 484 psc->psc_nintrs = alloced_counts[intr_type]; 485 if (intr_type == PCI_INTR_TYPE_MSI) { 486 if (alloced_counts[intr_type] > ncpu + 1) 487 alloced_counts[intr_type] = ncpu + 1; 488 } 489 sc->sc_use_mq = alloced_counts[intr_type] > 1; 490 sc->sc_nq = sc->sc_use_mq ? alloced_counts[intr_type] - 1 : 1; 491 492 #else /* !__HAVE_PCI_MSI_MSIX */ 493 if (pci_intr_map(pa, &ih)) { 494 aprint_error_dev(sc->sc_dev, "couldn't map interrupt\n"); 495 return EBUSY; 496 } 497 498 psc->psc_intrs = kmem_zalloc(sizeof(ih), KM_SLEEP); 499 psc->psc_intrs[0] = ih; 500 psc->psc_nintrs = 1; 501 sc->sc_use_mq = 0; 502 sc->sc_nq = 1; 503 #endif /* __HAVE_PCI_MSI_MSIX */ 504 505 return 0; 506 } 507 508 MODULE(MODULE_CLASS_DRIVER, nvme, "pci,dk_subr"); 509 510 #ifdef _MODULE 511 #include "ioconf.c" 512 #endif 513 514 static int 515 nvme_modcmd(modcmd_t cmd, void *opaque) 516 { 517 #ifdef _MODULE 518 devmajor_t cmajor, bmajor; 519 extern const struct cdevsw nvme_cdevsw; 520 #endif 521 int error = 0; 522 523 #ifdef _MODULE 524 switch (cmd) { 525 case MODULE_CMD_INIT: 526 error = config_init_component(cfdriver_ioconf_nvme_pci, 527 cfattach_ioconf_nvme_pci, cfdata_ioconf_nvme_pci); 528 if (error) 529 break; 530 531 bmajor = cmajor = NODEVMAJOR; 532 error = devsw_attach(nvme_cd.cd_name, NULL, &bmajor, 533 &nvme_cdevsw, &cmajor); 534 if (error) { 535 aprint_error("%s: unable to register devsw\n", 536 nvme_cd.cd_name); 537 /* do not abort, just /dev/nvme* will not work */ 538 } 539 break; 540 case MODULE_CMD_FINI: 541 devsw_detach(NULL, &nvme_cdevsw); 542 543 error = config_fini_component(cfdriver_ioconf_nvme_pci, 544 cfattach_ioconf_nvme_pci, cfdata_ioconf_nvme_pci); 545 break; 546 default: 547 break; 548 } 549 #endif 550 return error; 551 } 552