1 /* $NetBSD: nvme_pci.c,v 1.37 2022/08/15 18:06:04 pgoyette Exp $ */
2 /* $OpenBSD: nvme_pci.c,v 1.3 2016/04/14 11:18:32 dlg Exp $ */
3
4 /*
5 * Copyright (c) 2014 David Gwynne <dlg@openbsd.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19
20 /*-
21 * Copyright (C) 2016 NONAKA Kimihiro <nonaka@netbsd.org>
22 * All rights reserved.
23 *
24 * Redistribution and use in source and binary forms, with or without
25 * modification, are permitted provided that the following conditions
26 * are met:
27 * 1. Redistributions of source code must retain the above copyright
28 * notice, this list of conditions and the following disclaimer.
29 * 2. Redistributions in binary form must reproduce the above copyright
30 * notice, this list of conditions and the following disclaimer in the
31 * documentation and/or other materials provided with the distribution.
32 *
33 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
34 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
35 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
36 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
37 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
38 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
39 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
40 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
41 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
42 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 */
44
45 #include <sys/cdefs.h>
46 __KERNEL_RCSID(0, "$NetBSD: nvme_pci.c,v 1.37 2022/08/15 18:06:04 pgoyette Exp $");
47
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/kernel.h>
51 #include <sys/device.h>
52 #include <sys/bitops.h>
53 #include <sys/bus.h>
54 #include <sys/cpu.h>
55 #include <sys/interrupt.h>
56 #include <sys/kmem.h>
57 #include <sys/pmf.h>
58 #include <sys/module.h>
59
60 #include <dev/pci/pcireg.h>
61 #include <dev/pci/pcivar.h>
62 #include <dev/pci/pcidevs.h>
63
64 #include <dev/ic/nvmereg.h>
65 #include <dev/ic/nvmevar.h>
66
67 int nvme_pci_force_intx = 0;
68 int nvme_pci_mpsafe = 1;
69 int nvme_pci_mq = 1; /* INTx: ioq=1, MSI/MSI-X: ioq=ncpu */
70
71 #define NVME_PCI_BAR 0x10
72
73 struct nvme_pci_softc {
74 struct nvme_softc psc_nvme;
75
76 pci_chipset_tag_t psc_pc;
77 pci_intr_handle_t *psc_intrs;
78 int psc_nintrs;
79 };
80
81 static int nvme_pci_match(device_t, cfdata_t, void *);
82 static void nvme_pci_attach(device_t, device_t, void *);
83 static int nvme_pci_detach(device_t, int);
84 static int nvme_pci_rescan(device_t, const char *, const int *);
85 static bool nvme_pci_suspend(device_t, const pmf_qual_t *);
86 static bool nvme_pci_resume(device_t, const pmf_qual_t *);
87
88 CFATTACH_DECL3_NEW(nvme_pci, sizeof(struct nvme_pci_softc),
89 nvme_pci_match, nvme_pci_attach, nvme_pci_detach, NULL, nvme_pci_rescan,
90 nvme_childdet, DVF_DETACH_SHUTDOWN);
91
92 static int nvme_pci_intr_establish(struct nvme_softc *,
93 uint16_t, struct nvme_queue *);
94 static int nvme_pci_intr_disestablish(struct nvme_softc *, uint16_t);
95 static int nvme_pci_setup_intr(struct pci_attach_args *,
96 struct nvme_pci_softc *);
97
98 static const struct nvme_pci_quirk {
99 pci_vendor_id_t vendor;
100 pci_product_id_t product;
101 uint32_t quirks;
102 } nvme_pci_quirks[] = {
103 { PCI_VENDOR_HGST, PCI_PRODUCT_HGST_SN100,
104 NVME_QUIRK_DELAY_B4_CHK_RDY },
105 { PCI_VENDOR_HGST, PCI_PRODUCT_HGST_SN200,
106 NVME_QUIRK_DELAY_B4_CHK_RDY },
107 { PCI_VENDOR_BEIJING_MEMBLAZE, PCI_PRODUCT_BEIJING_MEMBLAZE_PBLAZE4,
108 NVME_QUIRK_DELAY_B4_CHK_RDY },
109 { PCI_VENDOR_SAMSUNGELEC3, PCI_PRODUCT_SAMSUNGELEC3_172X,
110 NVME_QUIRK_DELAY_B4_CHK_RDY },
111 { PCI_VENDOR_SAMSUNGELEC3, PCI_PRODUCT_SAMSUNGELEC3_172XAB,
112 NVME_QUIRK_DELAY_B4_CHK_RDY },
113 { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_DC_P4500_SSD,
114 NVME_QUIRK_NOMSI },
115 };
116
117 static const struct nvme_pci_quirk *
nvme_pci_lookup_quirk(struct pci_attach_args * pa)118 nvme_pci_lookup_quirk(struct pci_attach_args *pa)
119 {
120 const struct nvme_pci_quirk *q;
121 int i;
122
123 for (i = 0; i < __arraycount(nvme_pci_quirks); i++) {
124 q = &nvme_pci_quirks[i];
125
126 if (PCI_VENDOR(pa->pa_id) == q->vendor &&
127 PCI_PRODUCT(pa->pa_id) == q->product)
128 return q;
129 }
130 return NULL;
131 }
132
133 static int
nvme_pci_match(device_t parent,cfdata_t match,void * aux)134 nvme_pci_match(device_t parent, cfdata_t match, void *aux)
135 {
136 struct pci_attach_args *pa = aux;
137
138 if (PCI_CLASS(pa->pa_class) == PCI_CLASS_MASS_STORAGE &&
139 PCI_SUBCLASS(pa->pa_class) == PCI_SUBCLASS_MASS_STORAGE_NVM &&
140 PCI_INTERFACE(pa->pa_class) == PCI_INTERFACE_NVM_NVME_IO)
141 return 1;
142
143 return 0;
144 }
145
146 static void
nvme_pci_attach(device_t parent,device_t self,void * aux)147 nvme_pci_attach(device_t parent, device_t self, void *aux)
148 {
149 struct nvme_pci_softc *psc = device_private(self);
150 struct nvme_softc *sc = &psc->psc_nvme;
151 struct pci_attach_args *pa = aux;
152 const struct nvme_pci_quirk *quirk;
153 pcireg_t memtype, reg;
154 bus_addr_t memaddr;
155 int flags, error;
156 int msixoff;
157
158 sc->sc_dev = self;
159 psc->psc_pc = pa->pa_pc;
160 if (pci_dma64_available(pa))
161 sc->sc_dmat = pa->pa_dmat64;
162 else
163 sc->sc_dmat = pa->pa_dmat;
164
165 quirk = nvme_pci_lookup_quirk(pa);
166 if (quirk != NULL)
167 sc->sc_quirks = quirk->quirks;
168
169 pci_aprint_devinfo(pa, NULL);
170
171 /* Map registers */
172 memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, NVME_PCI_BAR);
173 if (PCI_MAPREG_TYPE(memtype) != PCI_MAPREG_TYPE_MEM) {
174 aprint_error_dev(self, "invalid type (type=0x%x)\n", memtype);
175 return;
176 }
177 reg = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG);
178 if (((reg & PCI_COMMAND_MASTER_ENABLE) == 0) ||
179 ((reg & PCI_COMMAND_MEM_ENABLE) == 0)) {
180 /*
181 * Enable address decoding for memory range in case BIOS or
182 * UEFI didn't set it.
183 */
184 reg |= PCI_COMMAND_MASTER_ENABLE | PCI_COMMAND_MEM_ENABLE;
185 pci_conf_write(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG,
186 reg);
187 }
188
189 sc->sc_iot = pa->pa_memt;
190 error = pci_mapreg_info(pa->pa_pc, pa->pa_tag, NVME_PCI_BAR,
191 memtype, &memaddr, &sc->sc_ios, &flags);
192 if (error) {
193 aprint_error_dev(self, "can't get map info\n");
194 return;
195 }
196
197 if (pci_get_capability(pa->pa_pc, pa->pa_tag, PCI_CAP_MSIX, &msixoff,
198 NULL)) {
199 pcireg_t msixtbl;
200 uint32_t table_offset;
201 int bir;
202
203 msixtbl = pci_conf_read(pa->pa_pc, pa->pa_tag,
204 msixoff + PCI_MSIX_TBLOFFSET);
205 table_offset = msixtbl & PCI_MSIX_TBLOFFSET_MASK;
206 bir = msixtbl & PCI_MSIX_TBLBIR_MASK;
207 if (bir == PCI_MAPREG_NUM(NVME_PCI_BAR)) {
208 sc->sc_ios = table_offset;
209 }
210 }
211
212 error = bus_space_map(sc->sc_iot, memaddr, sc->sc_ios, flags,
213 &sc->sc_ioh);
214 if (error != 0) {
215 aprint_error_dev(self, "can't map mem space (error=%d)\n",
216 error);
217 return;
218 }
219
220 /* Establish interrupts */
221 if (nvme_pci_setup_intr(pa, psc) != 0) {
222 aprint_error_dev(self, "unable to allocate interrupt\n");
223 goto unmap;
224 }
225 sc->sc_intr_establish = nvme_pci_intr_establish;
226 sc->sc_intr_disestablish = nvme_pci_intr_disestablish;
227
228 sc->sc_ih = kmem_zalloc(sizeof(*sc->sc_ih) * psc->psc_nintrs, KM_SLEEP);
229 sc->sc_softih = kmem_zalloc(
230 sizeof(*sc->sc_softih) * psc->psc_nintrs, KM_SLEEP);
231
232 if (nvme_attach(sc) != 0) {
233 /* error printed by nvme_attach() */
234 goto softintr_free;
235 }
236
237 if (!pmf_device_register(self, nvme_pci_suspend, nvme_pci_resume))
238 aprint_error_dev(self, "couldn't establish power handler\n");
239
240 SET(sc->sc_flags, NVME_F_ATTACHED);
241 return;
242
243 softintr_free:
244 kmem_free(sc->sc_softih, sizeof(*sc->sc_softih) * psc->psc_nintrs);
245 kmem_free(sc->sc_ih, sizeof(*sc->sc_ih) * psc->psc_nintrs);
246 sc->sc_nq = 0;
247 pci_intr_release(pa->pa_pc, psc->psc_intrs, psc->psc_nintrs);
248 psc->psc_nintrs = 0;
249 unmap:
250 bus_space_unmap(sc->sc_iot, sc->sc_ioh, sc->sc_ios);
251 sc->sc_ios = 0;
252 }
253
254 static int
nvme_pci_rescan(device_t self,const char * attr,const int * flags)255 nvme_pci_rescan(device_t self, const char *attr, const int *flags)
256 {
257
258 return nvme_rescan(self, attr, flags);
259 }
260
261 static bool
nvme_pci_suspend(device_t self,const pmf_qual_t * qual)262 nvme_pci_suspend(device_t self, const pmf_qual_t *qual)
263 {
264 struct nvme_pci_softc *psc = device_private(self);
265 struct nvme_softc *sc = &psc->psc_nvme;
266 int error;
267
268 error = nvme_suspend(sc);
269 if (error)
270 return false;
271
272 return true;
273 }
274
275 static bool
nvme_pci_resume(device_t self,const pmf_qual_t * qual)276 nvme_pci_resume(device_t self, const pmf_qual_t *qual)
277 {
278 struct nvme_pci_softc *psc = device_private(self);
279 struct nvme_softc *sc = &psc->psc_nvme;
280 int error;
281
282 error = nvme_resume(sc);
283 if (error)
284 return false;
285
286 return true;
287 }
288
289 static int
nvme_pci_detach(device_t self,int flags)290 nvme_pci_detach(device_t self, int flags)
291 {
292 struct nvme_pci_softc *psc = device_private(self);
293 struct nvme_softc *sc = &psc->psc_nvme;
294 int error;
295
296 if (!ISSET(sc->sc_flags, NVME_F_ATTACHED))
297 return 0;
298
299 error = nvme_detach(sc, flags);
300 if (error)
301 return error;
302
303 kmem_free(sc->sc_softih, sizeof(*sc->sc_softih) * psc->psc_nintrs);
304 sc->sc_softih = NULL;
305
306 kmem_free(sc->sc_ih, sizeof(*sc->sc_ih) * psc->psc_nintrs);
307 pci_intr_release(psc->psc_pc, psc->psc_intrs, psc->psc_nintrs);
308 bus_space_unmap(sc->sc_iot, sc->sc_ioh, sc->sc_ios);
309 return 0;
310 }
311
312 static int
nvme_pci_intr_establish(struct nvme_softc * sc,uint16_t qid,struct nvme_queue * q)313 nvme_pci_intr_establish(struct nvme_softc *sc, uint16_t qid,
314 struct nvme_queue *q)
315 {
316 struct nvme_pci_softc *psc = (struct nvme_pci_softc *)sc;
317 char intr_xname[INTRDEVNAMEBUF];
318 char intrbuf[PCI_INTRSTR_LEN];
319 const char *intrstr = NULL;
320 int (*ih_func)(void *);
321 void (*ih_func_soft)(void *);
322 void *ih_arg;
323 int error;
324
325 KASSERT(sc->sc_use_mq || qid == NVME_ADMIN_Q);
326 KASSERT(sc->sc_ih[qid] == NULL);
327
328 if (nvme_pci_mpsafe) {
329 pci_intr_setattr(psc->psc_pc, &psc->psc_intrs[qid],
330 PCI_INTR_MPSAFE, true);
331 }
332
333 if (!sc->sc_use_mq) {
334 snprintf(intr_xname, sizeof(intr_xname), "%s",
335 device_xname(sc->sc_dev));
336 ih_arg = sc;
337 ih_func = nvme_intr;
338 ih_func_soft = nvme_softintr_intx;
339 } else {
340 if (qid == NVME_ADMIN_Q) {
341 snprintf(intr_xname, sizeof(intr_xname), "%s adminq",
342 device_xname(sc->sc_dev));
343 } else {
344 snprintf(intr_xname, sizeof(intr_xname), "%s ioq%d",
345 device_xname(sc->sc_dev), qid);
346 }
347 ih_arg = q;
348 ih_func = nvme_intr_msi;
349 ih_func_soft = nvme_softintr_msi;
350 }
351
352 /* establish hardware interrupt */
353 sc->sc_ih[qid] = pci_intr_establish_xname(psc->psc_pc,
354 psc->psc_intrs[qid], IPL_BIO, ih_func, ih_arg, intr_xname);
355 if (sc->sc_ih[qid] == NULL) {
356 aprint_error_dev(sc->sc_dev,
357 "unable to establish %s interrupt\n", intr_xname);
358 return 1;
359 }
360
361 /* establish also the software interrupt */
362 sc->sc_softih[qid] = softint_establish(
363 SOFTINT_BIO|(nvme_pci_mpsafe ? SOFTINT_MPSAFE : 0),
364 ih_func_soft, q);
365 if (sc->sc_softih[qid] == NULL) {
366 pci_intr_disestablish(psc->psc_pc, sc->sc_ih[qid]);
367 sc->sc_ih[qid] = NULL;
368
369 aprint_error_dev(sc->sc_dev,
370 "unable to establish %s soft interrupt\n",
371 intr_xname);
372 return 1;
373 }
374
375 intrstr = pci_intr_string(psc->psc_pc, psc->psc_intrs[qid], intrbuf,
376 sizeof(intrbuf));
377 if (!sc->sc_use_mq) {
378 aprint_normal_dev(sc->sc_dev, "interrupting at %s\n", intrstr);
379 } else if (qid == NVME_ADMIN_Q) {
380 aprint_normal_dev(sc->sc_dev,
381 "for admin queue interrupting at %s\n", intrstr);
382 } else if (!nvme_pci_mpsafe) {
383 aprint_normal_dev(sc->sc_dev,
384 "for io queue %d interrupting at %s\n", qid, intrstr);
385 } else {
386 kcpuset_t *affinity;
387 cpuid_t affinity_to;
388
389 kcpuset_create(&affinity, true);
390 affinity_to = (qid - 1) % ncpu;
391 kcpuset_set(affinity, affinity_to);
392 error = interrupt_distribute(sc->sc_ih[qid], affinity, NULL);
393 kcpuset_destroy(affinity);
394 aprint_normal_dev(sc->sc_dev,
395 "for io queue %d interrupting at %s", qid, intrstr);
396 if (error == 0)
397 aprint_normal(" affinity to cpu%lu", affinity_to);
398 aprint_normal("\n");
399 }
400 return 0;
401 }
402
403 static int
nvme_pci_intr_disestablish(struct nvme_softc * sc,uint16_t qid)404 nvme_pci_intr_disestablish(struct nvme_softc *sc, uint16_t qid)
405 {
406 struct nvme_pci_softc *psc = (struct nvme_pci_softc *)sc;
407
408 KASSERT(sc->sc_use_mq || qid == NVME_ADMIN_Q);
409 KASSERT(sc->sc_ih[qid] != NULL);
410
411 if (sc->sc_softih) {
412 softint_disestablish(sc->sc_softih[qid]);
413 sc->sc_softih[qid] = NULL;
414 }
415
416 pci_intr_disestablish(psc->psc_pc, sc->sc_ih[qid]);
417 sc->sc_ih[qid] = NULL;
418
419 return 0;
420 }
421
422 static int
nvme_pci_setup_intr(struct pci_attach_args * pa,struct nvme_pci_softc * psc)423 nvme_pci_setup_intr(struct pci_attach_args *pa, struct nvme_pci_softc *psc)
424 {
425 struct nvme_softc *sc = &psc->psc_nvme;
426 int error;
427 int counts[PCI_INTR_TYPE_SIZE];
428 pci_intr_handle_t *ihps;
429 int intr_type;
430
431 memset(counts, 0, sizeof(counts));
432
433 if (nvme_pci_force_intx)
434 goto setup_intx;
435
436 /* MSI-X */
437 counts[PCI_INTR_TYPE_MSIX] = uimin(pci_msix_count(pa->pa_pc, pa->pa_tag),
438 ncpu + 1);
439 if (counts[PCI_INTR_TYPE_MSIX] < 1) {
440 counts[PCI_INTR_TYPE_MSIX] = 0;
441 } else if (!nvme_pci_mq || !nvme_pci_mpsafe) {
442 if (counts[PCI_INTR_TYPE_MSIX] > 2)
443 counts[PCI_INTR_TYPE_MSIX] = 2; /* adminq + 1 ioq */
444 }
445
446 /* MSI */
447 if (sc->sc_quirks & NVME_QUIRK_NOMSI)
448 goto setup_intx;
449 counts[PCI_INTR_TYPE_MSI] = pci_msi_count(pa->pa_pc, pa->pa_tag);
450 if (counts[PCI_INTR_TYPE_MSI] > 0) {
451 while (counts[PCI_INTR_TYPE_MSI] > ncpu + 1) {
452 if (counts[PCI_INTR_TYPE_MSI] / 2 <= ncpu + 1)
453 break;
454 counts[PCI_INTR_TYPE_MSI] /= 2;
455 }
456 }
457 if (counts[PCI_INTR_TYPE_MSI] < 1) {
458 counts[PCI_INTR_TYPE_MSI] = 0;
459 } else if (!nvme_pci_mq || !nvme_pci_mpsafe) {
460 if (counts[PCI_INTR_TYPE_MSI] > 2)
461 counts[PCI_INTR_TYPE_MSI] = 2; /* adminq + 1 ioq */
462 }
463
464 setup_intx:
465 /* INTx */
466 counts[PCI_INTR_TYPE_INTX] = 1;
467
468 error = pci_intr_alloc(pa, &ihps, counts, PCI_INTR_TYPE_MSIX);
469 if (error)
470 return error;
471
472 intr_type = pci_intr_type(pa->pa_pc, ihps[0]);
473
474 psc->psc_intrs = ihps;
475 psc->psc_nintrs = counts[intr_type];
476 if (intr_type == PCI_INTR_TYPE_MSI) {
477 if (counts[intr_type] > ncpu + 1)
478 counts[intr_type] = ncpu + 1;
479 }
480 sc->sc_use_mq = counts[intr_type] > 1;
481 sc->sc_nq = sc->sc_use_mq ? counts[intr_type] - 1 : 1;
482
483 return 0;
484 }
485
486 MODULE(MODULE_CLASS_DRIVER, nvme, "pci,dk_subr");
487
488 #ifdef _MODULE
489 #include "ioconf.c"
490 #endif
491
492 static int
nvme_modcmd(modcmd_t cmd,void * opaque)493 nvme_modcmd(modcmd_t cmd, void *opaque)
494 {
495 #ifdef _MODULE
496 devmajor_t cmajor, bmajor;
497 extern const struct cdevsw nvme_cdevsw;
498 static bool devsw_ok;
499 #endif
500 int error = 0;
501
502 #ifdef _MODULE
503 switch (cmd) {
504 case MODULE_CMD_INIT:
505 bmajor = cmajor = NODEVMAJOR;
506 error = devsw_attach(nvme_cd.cd_name, NULL, &bmajor,
507 &nvme_cdevsw, &cmajor);
508 if (error) {
509 aprint_error("%s: unable to register devsw, err %d\n",
510 nvme_cd.cd_name, error);
511 /* do not abort, just /dev/nvme* will not work */
512 }
513 else
514 devsw_ok = true;
515
516 error = config_init_component(cfdriver_ioconf_nvme_pci,
517 cfattach_ioconf_nvme_pci, cfdata_ioconf_nvme_pci);
518 if (error) {
519 if (devsw_ok) {
520 devsw_detach(NULL, &nvme_cdevsw);
521 devsw_ok = false;
522 }
523 break;
524 }
525 break;
526 case MODULE_CMD_FINI:
527 error = config_fini_component(cfdriver_ioconf_nvme_pci,
528 cfattach_ioconf_nvme_pci, cfdata_ioconf_nvme_pci);
529 if (devsw_ok) {
530 devsw_detach(NULL, &nvme_cdevsw);
531 devsw_ok = false;
532 }
533 break;
534 default:
535 break;
536 }
537 #endif
538 return error;
539 }
540