xref: /netbsd-src/sys/dev/pci/nvme_pci.c (revision 7330f729ccf0bd976a06f95fad452fe774fc7fd1)
1 /*	$NetBSD: nvme_pci.c,v 1.26 2019/01/23 06:56:19 msaitoh Exp $	*/
2 /*	$OpenBSD: nvme_pci.c,v 1.3 2016/04/14 11:18:32 dlg Exp $ */
3 
4 /*
5  * Copyright (c) 2014 David Gwynne <dlg@openbsd.org>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 /*-
21  * Copyright (C) 2016 NONAKA Kimihiro <nonaka@netbsd.org>
22  * All rights reserved.
23  *
24  * Redistribution and use in source and binary forms, with or without
25  * modification, are permitted provided that the following conditions
26  * are met:
27  * 1. Redistributions of source code must retain the above copyright
28  *    notice, this list of conditions and the following disclaimer.
29  * 2. Redistributions in binary form must reproduce the above copyright
30  *    notice, this list of conditions and the following disclaimer in the
31  *    documentation and/or other materials provided with the distribution.
32  *
33  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
34  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
35  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
36  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
37  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
38  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
39  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
40  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
41  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
42  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43  */
44 
45 #include <sys/cdefs.h>
46 __KERNEL_RCSID(0, "$NetBSD: nvme_pci.c,v 1.26 2019/01/23 06:56:19 msaitoh Exp $");
47 
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/kernel.h>
51 #include <sys/device.h>
52 #include <sys/bitops.h>
53 #include <sys/bus.h>
54 #include <sys/cpu.h>
55 #include <sys/interrupt.h>
56 #include <sys/kmem.h>
57 #include <sys/pmf.h>
58 #include <sys/module.h>
59 
60 #include <dev/pci/pcireg.h>
61 #include <dev/pci/pcivar.h>
62 #include <dev/pci/pcidevs.h>
63 
64 #include <dev/ic/nvmereg.h>
65 #include <dev/ic/nvmevar.h>
66 
67 int nvme_pci_force_intx = 0;
68 int nvme_pci_mpsafe = 1;
69 int nvme_pci_mq = 1;		/* INTx: ioq=1, MSI/MSI-X: ioq=ncpu */
70 
71 #define NVME_PCI_BAR		0x10
72 
73 struct nvme_pci_softc {
74 	struct nvme_softc	psc_nvme;
75 
76 	pci_chipset_tag_t	psc_pc;
77 	pci_intr_handle_t	*psc_intrs;
78 	int			psc_nintrs;
79 };
80 
81 static int	nvme_pci_match(device_t, cfdata_t, void *);
82 static void	nvme_pci_attach(device_t, device_t, void *);
83 static int	nvme_pci_detach(device_t, int);
84 static int	nvme_pci_rescan(device_t, const char *, const int *);
85 
86 CFATTACH_DECL3_NEW(nvme_pci, sizeof(struct nvme_pci_softc),
87     nvme_pci_match, nvme_pci_attach, nvme_pci_detach, NULL, nvme_pci_rescan,
88     nvme_childdet, DVF_DETACH_SHUTDOWN);
89 
90 static int	nvme_pci_intr_establish(struct nvme_softc *,
91 		    uint16_t, struct nvme_queue *);
92 static int	nvme_pci_intr_disestablish(struct nvme_softc *, uint16_t);
93 static int	nvme_pci_setup_intr(struct pci_attach_args *,
94 		    struct nvme_pci_softc *);
95 
96 static const struct nvme_pci_quirk {
97 	pci_vendor_id_t		vendor;
98 	pci_product_id_t	product;
99 	uint32_t		quirks;
100 } nvme_pci_quirks[] = {
101 	{ PCI_VENDOR_HGST, PCI_PRODUCT_HGST_SN100,
102 	    NVME_QUIRK_DELAY_B4_CHK_RDY },
103 	{ PCI_VENDOR_HGST, PCI_PRODUCT_HGST_SN200,
104 	    NVME_QUIRK_DELAY_B4_CHK_RDY },
105 	{ PCI_VENDOR_BEIJING_MEMBLAZE, PCI_PRODUCT_BEIJING_MEMBLAZE_PBLAZE4,
106 	    NVME_QUIRK_DELAY_B4_CHK_RDY },
107 	{ PCI_VENDOR_SAMSUNGELEC3, PCI_PRODUCT_SAMSUNGELEC3_172X,
108 	    NVME_QUIRK_DELAY_B4_CHK_RDY },
109 	{ PCI_VENDOR_SAMSUNGELEC3, PCI_PRODUCT_SAMSUNGELEC3_172XAB,
110 	    NVME_QUIRK_DELAY_B4_CHK_RDY },
111 };
112 
113 static const struct nvme_pci_quirk *
114 nvme_pci_lookup_quirk(struct pci_attach_args *pa)
115 {
116 	const struct nvme_pci_quirk *q;
117 	int i;
118 
119 	for (i = 0; i < __arraycount(nvme_pci_quirks); i++) {
120 		q = &nvme_pci_quirks[i];
121 
122 		if (PCI_VENDOR(pa->pa_id) == q->vendor &&
123 		    PCI_PRODUCT(pa->pa_id) == q->product)
124 			return q;
125 	}
126 	return NULL;
127 }
128 
129 static int
130 nvme_pci_match(device_t parent, cfdata_t match, void *aux)
131 {
132 	struct pci_attach_args *pa = aux;
133 
134 	if (PCI_CLASS(pa->pa_class) == PCI_CLASS_MASS_STORAGE &&
135 	    PCI_SUBCLASS(pa->pa_class) == PCI_SUBCLASS_MASS_STORAGE_NVM &&
136 	    PCI_INTERFACE(pa->pa_class) == PCI_INTERFACE_NVM_NVME)
137 		return 1;
138 
139 	return 0;
140 }
141 
142 static void
143 nvme_pci_attach(device_t parent, device_t self, void *aux)
144 {
145 	struct nvme_pci_softc *psc = device_private(self);
146 	struct nvme_softc *sc = &psc->psc_nvme;
147 	struct pci_attach_args *pa = aux;
148 	const struct nvme_pci_quirk *quirk;
149 	pcireg_t memtype, reg;
150 	bus_addr_t memaddr;
151 	int flags, error;
152 	int msixoff;
153 
154 	sc->sc_dev = self;
155 	psc->psc_pc = pa->pa_pc;
156 	if (pci_dma64_available(pa))
157 		sc->sc_dmat = pa->pa_dmat64;
158 	else
159 		sc->sc_dmat = pa->pa_dmat;
160 
161 	pci_aprint_devinfo(pa, NULL);
162 
163 	/* Map registers */
164 	memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, NVME_PCI_BAR);
165 	if (PCI_MAPREG_TYPE(memtype) != PCI_MAPREG_TYPE_MEM) {
166 		aprint_error_dev(self, "invalid type (type=0x%x)\n", memtype);
167 		return;
168 	}
169 	reg = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG);
170 	if (((reg & PCI_COMMAND_MASTER_ENABLE) == 0) ||
171 	    ((reg & PCI_COMMAND_MEM_ENABLE) == 0)) {
172 		/*
173 		 * Enable address decoding for memory range in case BIOS or
174 		 * UEFI didn't set it.
175 		 */
176 		reg |= PCI_COMMAND_MASTER_ENABLE | PCI_COMMAND_MEM_ENABLE;
177         	pci_conf_write(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG,
178 		    reg);
179 	}
180 
181 	sc->sc_iot = pa->pa_memt;
182 	error = pci_mapreg_info(pa->pa_pc, pa->pa_tag, NVME_PCI_BAR,
183 	    memtype, &memaddr, &sc->sc_ios, &flags);
184 	if (error) {
185 		aprint_error_dev(self, "can't get map info\n");
186 		return;
187 	}
188 
189 	if (pci_get_capability(pa->pa_pc, pa->pa_tag, PCI_CAP_MSIX, &msixoff,
190 	    NULL)) {
191 		pcireg_t msixtbl;
192 		uint32_t table_offset;
193 		int bir;
194 
195 		msixtbl = pci_conf_read(pa->pa_pc, pa->pa_tag,
196 		    msixoff + PCI_MSIX_TBLOFFSET);
197 		table_offset = msixtbl & PCI_MSIX_TBLOFFSET_MASK;
198 		bir = msixtbl & PCI_MSIX_PBABIR_MASK;
199 		if (bir == PCI_MAPREG_NUM(NVME_PCI_BAR)) {
200 			sc->sc_ios = table_offset;
201 		}
202 	}
203 
204 	error = bus_space_map(sc->sc_iot, memaddr, sc->sc_ios, flags,
205 	    &sc->sc_ioh);
206 	if (error != 0) {
207 		aprint_error_dev(self, "can't map mem space (error=%d)\n",
208 		    error);
209 		return;
210 	}
211 
212 	/* Establish interrupts */
213 	if (nvme_pci_setup_intr(pa, psc) != 0) {
214 		aprint_error_dev(self, "unable to allocate interrupt\n");
215 		goto unmap;
216 	}
217 	sc->sc_intr_establish = nvme_pci_intr_establish;
218 	sc->sc_intr_disestablish = nvme_pci_intr_disestablish;
219 
220 	sc->sc_ih = kmem_zalloc(sizeof(*sc->sc_ih) * psc->psc_nintrs, KM_SLEEP);
221 	sc->sc_softih = kmem_zalloc(
222 	    sizeof(*sc->sc_softih) * psc->psc_nintrs, KM_SLEEP);
223 
224 	quirk = nvme_pci_lookup_quirk(pa);
225 	if (quirk != NULL)
226 		sc->sc_quirks = quirk->quirks;
227 
228 	if (nvme_attach(sc) != 0) {
229 		/* error printed by nvme_attach() */
230 		goto softintr_free;
231 	}
232 
233 	if (!pmf_device_register(self, NULL, NULL))
234 		aprint_error_dev(self, "couldn't establish power handler\n");
235 
236 	SET(sc->sc_flags, NVME_F_ATTACHED);
237 	return;
238 
239 softintr_free:
240 	kmem_free(sc->sc_softih, sizeof(*sc->sc_softih) * psc->psc_nintrs);
241 	kmem_free(sc->sc_ih, sizeof(*sc->sc_ih) * psc->psc_nintrs);
242 	sc->sc_nq = 0;
243 	pci_intr_release(pa->pa_pc, psc->psc_intrs, psc->psc_nintrs);
244 	psc->psc_nintrs = 0;
245 unmap:
246 	bus_space_unmap(sc->sc_iot, sc->sc_ioh, sc->sc_ios);
247 	sc->sc_ios = 0;
248 }
249 
250 static int
251 nvme_pci_rescan(device_t self, const char *attr, const int *flags)
252 {
253 
254 	return nvme_rescan(self, attr, flags);
255 }
256 
257 static int
258 nvme_pci_detach(device_t self, int flags)
259 {
260 	struct nvme_pci_softc *psc = device_private(self);
261 	struct nvme_softc *sc = &psc->psc_nvme;
262 	int error;
263 
264 	if (!ISSET(sc->sc_flags, NVME_F_ATTACHED))
265 		return 0;
266 
267 	error = nvme_detach(sc, flags);
268 	if (error)
269 		return error;
270 
271 	kmem_free(sc->sc_softih, sizeof(*sc->sc_softih) * psc->psc_nintrs);
272 	sc->sc_softih = NULL;
273 
274 	kmem_free(sc->sc_ih, sizeof(*sc->sc_ih) * psc->psc_nintrs);
275 	pci_intr_release(psc->psc_pc, psc->psc_intrs, psc->psc_nintrs);
276 	bus_space_unmap(sc->sc_iot, sc->sc_ioh, sc->sc_ios);
277 	return 0;
278 }
279 
280 static int
281 nvme_pci_intr_establish(struct nvme_softc *sc, uint16_t qid,
282     struct nvme_queue *q)
283 {
284 	struct nvme_pci_softc *psc = (struct nvme_pci_softc *)sc;
285 	char intr_xname[INTRDEVNAMEBUF];
286 	char intrbuf[PCI_INTRSTR_LEN];
287 	const char *intrstr = NULL;
288 	int (*ih_func)(void *);
289 	void (*ih_func_soft)(void *);
290 	void *ih_arg;
291 	int error;
292 
293 	KASSERT(sc->sc_use_mq || qid == NVME_ADMIN_Q);
294 	KASSERT(sc->sc_ih[qid] == NULL);
295 
296 	if (nvme_pci_mpsafe) {
297 		pci_intr_setattr(psc->psc_pc, &psc->psc_intrs[qid],
298 		    PCI_INTR_MPSAFE, true);
299 	}
300 
301 	if (!sc->sc_use_mq) {
302 		snprintf(intr_xname, sizeof(intr_xname), "%s",
303 		    device_xname(sc->sc_dev));
304 		ih_arg = sc;
305 		ih_func = nvme_intr;
306 		ih_func_soft = nvme_softintr_intx;
307 	} else {
308 		if (qid == NVME_ADMIN_Q) {
309 			snprintf(intr_xname, sizeof(intr_xname), "%s adminq",
310 			    device_xname(sc->sc_dev));
311 		} else {
312 			snprintf(intr_xname, sizeof(intr_xname), "%s ioq%d",
313 			    device_xname(sc->sc_dev), qid);
314 		}
315 		ih_arg = q;
316 		ih_func = nvme_intr_msi;
317 		ih_func_soft = nvme_softintr_msi;
318 	}
319 
320 	/* establish hardware interrupt */
321 	sc->sc_ih[qid] = pci_intr_establish_xname(psc->psc_pc,
322 	    psc->psc_intrs[qid], IPL_BIO, ih_func, ih_arg, intr_xname);
323 	if (sc->sc_ih[qid] == NULL) {
324 		aprint_error_dev(sc->sc_dev,
325 		    "unable to establish %s interrupt\n", intr_xname);
326 		return 1;
327 	}
328 
329 	/* establish also the software interrupt */
330 	sc->sc_softih[qid] = softint_establish(
331 	    SOFTINT_BIO|(nvme_pci_mpsafe ? SOFTINT_MPSAFE : 0),
332 	    ih_func_soft, q);
333 	if (sc->sc_softih[qid] == NULL) {
334 		pci_intr_disestablish(psc->psc_pc, sc->sc_ih[qid]);
335 		sc->sc_ih[qid] = NULL;
336 
337 		aprint_error_dev(sc->sc_dev,
338 		    "unable to establish %s soft interrupt\n",
339 		    intr_xname);
340 		return 1;
341 	}
342 
343 	intrstr = pci_intr_string(psc->psc_pc, psc->psc_intrs[qid], intrbuf,
344 	    sizeof(intrbuf));
345 	if (!sc->sc_use_mq) {
346 		aprint_normal_dev(sc->sc_dev, "interrupting at %s\n", intrstr);
347 	} else if (qid == NVME_ADMIN_Q) {
348 		aprint_normal_dev(sc->sc_dev,
349 		    "for admin queue interrupting at %s\n", intrstr);
350 	} else if (!nvme_pci_mpsafe) {
351 		aprint_normal_dev(sc->sc_dev,
352 		    "for io queue %d interrupting at %s\n", qid, intrstr);
353 	} else {
354 		kcpuset_t *affinity;
355 		cpuid_t affinity_to;
356 
357 		kcpuset_create(&affinity, true);
358 		affinity_to = (qid - 1) % ncpu;
359 		kcpuset_set(affinity, affinity_to);
360 		error = interrupt_distribute(sc->sc_ih[qid], affinity, NULL);
361 		kcpuset_destroy(affinity);
362 		aprint_normal_dev(sc->sc_dev,
363 		    "for io queue %d interrupting at %s", qid, intrstr);
364 		if (error == 0)
365 			aprint_normal(" affinity to cpu%lu", affinity_to);
366 		aprint_normal("\n");
367 	}
368 	return 0;
369 }
370 
371 static int
372 nvme_pci_intr_disestablish(struct nvme_softc *sc, uint16_t qid)
373 {
374 	struct nvme_pci_softc *psc = (struct nvme_pci_softc *)sc;
375 
376 	KASSERT(sc->sc_use_mq || qid == NVME_ADMIN_Q);
377 	KASSERT(sc->sc_ih[qid] != NULL);
378 
379 	if (sc->sc_softih) {
380 		softint_disestablish(sc->sc_softih[qid]);
381 		sc->sc_softih[qid] = NULL;
382 	}
383 
384 	pci_intr_disestablish(psc->psc_pc, sc->sc_ih[qid]);
385 	sc->sc_ih[qid] = NULL;
386 
387 	return 0;
388 }
389 
390 static int
391 nvme_pci_setup_intr(struct pci_attach_args *pa, struct nvme_pci_softc *psc)
392 {
393 	struct nvme_softc *sc = &psc->psc_nvme;
394 	int error;
395 	int counts[PCI_INTR_TYPE_SIZE];
396 	pci_intr_handle_t *ihps;
397 	int intr_type;
398 
399 	memset(counts, 0, sizeof(counts));
400 
401 	if (nvme_pci_force_intx)
402 		goto force_intx;
403 
404 	/* MSI-X */
405 	counts[PCI_INTR_TYPE_MSIX] = uimin(pci_msix_count(pa->pa_pc, pa->pa_tag),
406 	    ncpu + 1);
407 	if (counts[PCI_INTR_TYPE_MSIX] < 1) {
408 		counts[PCI_INTR_TYPE_MSIX] = 0;
409 	} else if (!nvme_pci_mq || !nvme_pci_mpsafe) {
410 		if (counts[PCI_INTR_TYPE_MSIX] > 2)
411 			counts[PCI_INTR_TYPE_MSIX] = 2;	/* adminq + 1 ioq */
412 	}
413 
414 	/* MSI */
415 	counts[PCI_INTR_TYPE_MSI] = pci_msi_count(pa->pa_pc, pa->pa_tag);
416 	if (counts[PCI_INTR_TYPE_MSI] > 0) {
417 		while (counts[PCI_INTR_TYPE_MSI] > ncpu + 1) {
418 			if (counts[PCI_INTR_TYPE_MSI] / 2 <= ncpu + 1)
419 				break;
420 			counts[PCI_INTR_TYPE_MSI] /= 2;
421 		}
422 	}
423 	if (counts[PCI_INTR_TYPE_MSI] < 1) {
424 		counts[PCI_INTR_TYPE_MSI] = 0;
425 	} else if (!nvme_pci_mq || !nvme_pci_mpsafe) {
426 		if (counts[PCI_INTR_TYPE_MSI] > 2)
427 			counts[PCI_INTR_TYPE_MSI] = 2;	/* adminq + 1 ioq */
428 	}
429 
430 force_intx:
431 	/* INTx */
432 	counts[PCI_INTR_TYPE_INTX] = 1;
433 
434 	error = pci_intr_alloc(pa, &ihps, counts, PCI_INTR_TYPE_MSIX);
435 	if (error)
436 		return error;
437 
438 	intr_type = pci_intr_type(pa->pa_pc, ihps[0]);
439 
440 	psc->psc_intrs = ihps;
441 	psc->psc_nintrs = counts[intr_type];
442 	if (intr_type == PCI_INTR_TYPE_MSI) {
443 		if (counts[intr_type] > ncpu + 1)
444 			counts[intr_type] = ncpu + 1;
445 	}
446 	sc->sc_use_mq = counts[intr_type] > 1;
447 	sc->sc_nq = sc->sc_use_mq ? counts[intr_type] - 1 : 1;
448 
449 	return 0;
450 }
451 
452 MODULE(MODULE_CLASS_DRIVER, nvme, "pci,dk_subr");
453 
454 #ifdef _MODULE
455 #include "ioconf.c"
456 #endif
457 
458 static int
459 nvme_modcmd(modcmd_t cmd, void *opaque)
460 {
461 #ifdef _MODULE
462 	devmajor_t cmajor, bmajor;
463 	extern const struct cdevsw nvme_cdevsw;
464 #endif
465 	int error = 0;
466 
467 #ifdef _MODULE
468 	switch (cmd) {
469 	case MODULE_CMD_INIT:
470 		error = config_init_component(cfdriver_ioconf_nvme_pci,
471 		    cfattach_ioconf_nvme_pci, cfdata_ioconf_nvme_pci);
472 		if (error)
473 			break;
474 
475 		bmajor = cmajor = NODEVMAJOR;
476 		error = devsw_attach(nvme_cd.cd_name, NULL, &bmajor,
477 		    &nvme_cdevsw, &cmajor);
478 		if (error) {
479 			aprint_error("%s: unable to register devsw\n",
480 			    nvme_cd.cd_name);
481 			/* do not abort, just /dev/nvme* will not work */
482 		}
483 		break;
484 	case MODULE_CMD_FINI:
485 		devsw_detach(NULL, &nvme_cdevsw);
486 
487 		error = config_fini_component(cfdriver_ioconf_nvme_pci,
488 		    cfattach_ioconf_nvme_pci, cfdata_ioconf_nvme_pci);
489 		break;
490 	default:
491 		break;
492 	}
493 #endif
494 	return error;
495 }
496