xref: /netbsd-src/sys/dev/pci/nvme_pci.c (revision e89934bbf778a6d6d6894877c4da59d0c7835b0f)
1 /*	$NetBSD: nvme_pci.c,v 1.18 2017/02/13 04:42:15 nonaka Exp $	*/
2 /*	$OpenBSD: nvme_pci.c,v 1.3 2016/04/14 11:18:32 dlg Exp $ */
3 
4 /*
5  * Copyright (c) 2014 David Gwynne <dlg@openbsd.org>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 /*-
21  * Copyright (C) 2016 NONAKA Kimihiro <nonaka@netbsd.org>
22  * All rights reserved.
23  *
24  * Redistribution and use in source and binary forms, with or without
25  * modification, are permitted provided that the following conditions
26  * are met:
27  * 1. Redistributions of source code must retain the above copyright
28  *    notice, this list of conditions and the following disclaimer.
29  * 2. Redistributions in binary form must reproduce the above copyright
30  *    notice, this list of conditions and the following disclaimer in the
31  *    documentation and/or other materials provided with the distribution.
32  *
33  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
34  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
35  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
36  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
37  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
38  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
39  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
40  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
41  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
42  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43  */
44 
45 #include <sys/cdefs.h>
46 __KERNEL_RCSID(0, "$NetBSD: nvme_pci.c,v 1.18 2017/02/13 04:42:15 nonaka Exp $");
47 
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/kernel.h>
51 #include <sys/device.h>
52 #include <sys/bitops.h>
53 #include <sys/bus.h>
54 #include <sys/cpu.h>
55 #include <sys/interrupt.h>
56 #include <sys/kmem.h>
57 #include <sys/pmf.h>
58 #include <sys/module.h>
59 
60 #include <dev/pci/pcireg.h>
61 #include <dev/pci/pcivar.h>
62 
63 #include <dev/ic/nvmereg.h>
64 #include <dev/ic/nvmevar.h>
65 
66 int nvme_pci_force_intx = 0;
67 int nvme_pci_mpsafe = 1;
68 int nvme_pci_mq = 1;		/* INTx: ioq=1, MSI/MSI-X: ioq=ncpu */
69 
70 #define NVME_PCI_BAR		0x10
71 
72 struct nvme_pci_softc {
73 	struct nvme_softc	psc_nvme;
74 
75 	pci_chipset_tag_t	psc_pc;
76 	pci_intr_handle_t	*psc_intrs;
77 	int			psc_nintrs;
78 };
79 
80 static int	nvme_pci_match(device_t, cfdata_t, void *);
81 static void	nvme_pci_attach(device_t, device_t, void *);
82 static int	nvme_pci_detach(device_t, int);
83 static int	nvme_pci_rescan(device_t, const char *, const int *);
84 
85 CFATTACH_DECL3_NEW(nvme_pci, sizeof(struct nvme_pci_softc),
86     nvme_pci_match, nvme_pci_attach, nvme_pci_detach, NULL, nvme_pci_rescan,
87     nvme_childdet, DVF_DETACH_SHUTDOWN);
88 
89 static int	nvme_pci_intr_establish(struct nvme_softc *,
90 		    uint16_t, struct nvme_queue *);
91 static int	nvme_pci_intr_disestablish(struct nvme_softc *, uint16_t);
92 static int	nvme_pci_setup_intr(struct pci_attach_args *,
93 		    struct nvme_pci_softc *);
94 
95 static int
96 nvme_pci_match(device_t parent, cfdata_t match, void *aux)
97 {
98 	struct pci_attach_args *pa = aux;
99 
100 	if (PCI_CLASS(pa->pa_class) == PCI_CLASS_MASS_STORAGE &&
101 	    PCI_SUBCLASS(pa->pa_class) == PCI_SUBCLASS_MASS_STORAGE_NVM &&
102 	    PCI_INTERFACE(pa->pa_class) == PCI_INTERFACE_NVM_NVME)
103 		return 1;
104 
105 	return 0;
106 }
107 
108 static void
109 nvme_pci_attach(device_t parent, device_t self, void *aux)
110 {
111 	struct nvme_pci_softc *psc = device_private(self);
112 	struct nvme_softc *sc = &psc->psc_nvme;
113 	struct pci_attach_args *pa = aux;
114 	pcireg_t memtype, reg;
115 	bus_addr_t memaddr;
116 	int flags, error;
117 	int msixoff;
118 
119 	sc->sc_dev = self;
120 	psc->psc_pc = pa->pa_pc;
121 	if (pci_dma64_available(pa))
122 		sc->sc_dmat = pa->pa_dmat64;
123 	else
124 		sc->sc_dmat = pa->pa_dmat;
125 
126 	pci_aprint_devinfo(pa, NULL);
127 
128 	reg = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG);
129 	if ((reg & PCI_COMMAND_MASTER_ENABLE) == 0) {
130 		reg |= PCI_COMMAND_MASTER_ENABLE;
131         	pci_conf_write(pa->pa_pc, pa->pa_tag, PCI_COMMAND_STATUS_REG, reg);
132 	}
133 
134 	/* Map registers */
135 	memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, NVME_PCI_BAR);
136 	if (PCI_MAPREG_TYPE(memtype) != PCI_MAPREG_TYPE_MEM) {
137 		aprint_error_dev(self, "invalid type (type=0x%x)\n", memtype);
138 		return;
139 	}
140 	sc->sc_iot = pa->pa_memt;
141 	error = pci_mapreg_info(pa->pa_pc, pa->pa_tag, PCI_MAPREG_START,
142 	    memtype, &memaddr, &sc->sc_ios, &flags);
143 	if (error) {
144 		aprint_error_dev(self, "can't get map info\n");
145 		return;
146 	}
147 
148 	if (pci_get_capability(pa->pa_pc, pa->pa_tag, PCI_CAP_MSIX, &msixoff,
149 	    NULL)) {
150 		pcireg_t msixtbl;
151 		uint32_t table_offset;
152 		int bir;
153 
154 		msixtbl = pci_conf_read(pa->pa_pc, pa->pa_tag,
155 		    msixoff + PCI_MSIX_TBLOFFSET);
156 		table_offset = msixtbl & PCI_MSIX_TBLOFFSET_MASK;
157 		bir = msixtbl & PCI_MSIX_PBABIR_MASK;
158 		if (bir == 0) {
159 			sc->sc_ios = table_offset;
160 		}
161 	}
162 
163 	error = bus_space_map(sc->sc_iot, memaddr, sc->sc_ios, flags,
164 	    &sc->sc_ioh);
165 	if (error != 0) {
166 		aprint_error_dev(self, "can't map mem space (error=%d)\n",
167 		    error);
168 		return;
169 	}
170 
171 	/* Establish interrupts */
172 	if (nvme_pci_setup_intr(pa, psc) != 0) {
173 		aprint_error_dev(self, "unable to allocate interrupt\n");
174 		goto unmap;
175 	}
176 	sc->sc_intr_establish = nvme_pci_intr_establish;
177 	sc->sc_intr_disestablish = nvme_pci_intr_disestablish;
178 
179 	sc->sc_ih = kmem_zalloc(sizeof(*sc->sc_ih) * psc->psc_nintrs, KM_SLEEP);
180 	if (sc->sc_ih == NULL) {
181 		aprint_error_dev(self, "unable to allocate ih memory\n");
182 		goto intr_release;
183 	}
184 
185 	sc->sc_softih = kmem_zalloc(
186 	    sizeof(*sc->sc_softih) * psc->psc_nintrs, KM_SLEEP);
187 	if (sc->sc_softih == NULL) {
188 		aprint_error_dev(self,
189 		    "unable to allocate softih memory\n");
190 		goto intr_free;
191 	}
192 
193 	if (nvme_attach(sc) != 0) {
194 		/* error printed by nvme_attach() */
195 		goto softintr_free;
196 	}
197 
198 	if (!pmf_device_register(self, NULL, NULL))
199 		aprint_error_dev(self, "couldn't establish power handler\n");
200 
201 	SET(sc->sc_flags, NVME_F_ATTACHED);
202 	return;
203 
204 softintr_free:
205 	kmem_free(sc->sc_softih, sizeof(*sc->sc_softih) * psc->psc_nintrs);
206 intr_free:
207 	kmem_free(sc->sc_ih, sizeof(*sc->sc_ih) * psc->psc_nintrs);
208 	sc->sc_nq = 0;
209 intr_release:
210 	pci_intr_release(pa->pa_pc, psc->psc_intrs, psc->psc_nintrs);
211 	psc->psc_nintrs = 0;
212 unmap:
213 	bus_space_unmap(sc->sc_iot, sc->sc_ioh, sc->sc_ios);
214 	sc->sc_ios = 0;
215 }
216 
217 static int
218 nvme_pci_rescan(device_t self, const char *attr, const int *flags)
219 {
220 
221 	return nvme_rescan(self, attr, flags);
222 }
223 
224 static int
225 nvme_pci_detach(device_t self, int flags)
226 {
227 	struct nvme_pci_softc *psc = device_private(self);
228 	struct nvme_softc *sc = &psc->psc_nvme;
229 	int error;
230 
231 	if (!ISSET(sc->sc_flags, NVME_F_ATTACHED))
232 		return 0;
233 
234 	error = nvme_detach(sc, flags);
235 	if (error)
236 		return error;
237 
238 	kmem_free(sc->sc_softih, sizeof(*sc->sc_softih) * psc->psc_nintrs);
239 	sc->sc_softih = NULL;
240 
241 	kmem_free(sc->sc_ih, sizeof(*sc->sc_ih) * psc->psc_nintrs);
242 	pci_intr_release(psc->psc_pc, psc->psc_intrs, psc->psc_nintrs);
243 	bus_space_unmap(sc->sc_iot, sc->sc_ioh, sc->sc_ios);
244 	return 0;
245 }
246 
247 static int
248 nvme_pci_intr_establish(struct nvme_softc *sc, uint16_t qid,
249     struct nvme_queue *q)
250 {
251 	struct nvme_pci_softc *psc = (struct nvme_pci_softc *)sc;
252 	char intr_xname[INTRDEVNAMEBUF];
253 	char intrbuf[PCI_INTRSTR_LEN];
254 	const char *intrstr = NULL;
255 	int (*ih_func)(void *);
256 	void (*ih_func_soft)(void *);
257 	void *ih_arg;
258 	int error;
259 
260 	KASSERT(sc->sc_use_mq || qid == NVME_ADMIN_Q);
261 	KASSERT(sc->sc_ih[qid] == NULL);
262 
263 	if (nvme_pci_mpsafe) {
264 		pci_intr_setattr(psc->psc_pc, &psc->psc_intrs[qid],
265 		    PCI_INTR_MPSAFE, true);
266 	}
267 
268 	if (!sc->sc_use_mq) {
269 		snprintf(intr_xname, sizeof(intr_xname), "%s",
270 		    device_xname(sc->sc_dev));
271 		ih_arg = sc;
272 		ih_func = nvme_intr;
273 		ih_func_soft = nvme_softintr_intx;
274 	} else {
275 		if (qid == NVME_ADMIN_Q) {
276 			snprintf(intr_xname, sizeof(intr_xname), "%s adminq",
277 			    device_xname(sc->sc_dev));
278 		} else {
279 			snprintf(intr_xname, sizeof(intr_xname), "%s ioq%d",
280 			    device_xname(sc->sc_dev), qid);
281 		}
282 		ih_arg = q;
283 		ih_func = nvme_intr_msi;
284 		ih_func_soft = nvme_softintr_msi;
285 	}
286 
287 	/* establish hardware interrupt */
288 	sc->sc_ih[qid] = pci_intr_establish_xname(psc->psc_pc,
289 	    psc->psc_intrs[qid], IPL_BIO, ih_func, ih_arg, intr_xname);
290 	if (sc->sc_ih[qid] == NULL) {
291 		aprint_error_dev(sc->sc_dev,
292 		    "unable to establish %s interrupt\n", intr_xname);
293 		return 1;
294 	}
295 
296 	/* establish also the software interrupt */
297 	sc->sc_softih[qid] = softint_establish(
298 	    SOFTINT_BIO|(nvme_pci_mpsafe ? SOFTINT_MPSAFE : 0),
299 	    ih_func_soft, q);
300 	if (sc->sc_softih[qid] == NULL) {
301 		pci_intr_disestablish(psc->psc_pc, sc->sc_ih[qid]);
302 		sc->sc_ih[qid] = NULL;
303 
304 		aprint_error_dev(sc->sc_dev,
305 		    "unable to establish %s soft interrupt\n",
306 		    intr_xname);
307 		return 1;
308 	}
309 
310 	intrstr = pci_intr_string(psc->psc_pc, psc->psc_intrs[qid], intrbuf,
311 	    sizeof(intrbuf));
312 	if (!sc->sc_use_mq) {
313 		aprint_normal_dev(sc->sc_dev, "interrupting at %s\n", intrstr);
314 	} else if (qid == NVME_ADMIN_Q) {
315 		aprint_normal_dev(sc->sc_dev,
316 		    "for admin queue interrupting at %s\n", intrstr);
317 	} else if (!nvme_pci_mpsafe) {
318 		aprint_normal_dev(sc->sc_dev,
319 		    "for io queue %d interrupting at %s\n", qid, intrstr);
320 	} else {
321 		kcpuset_t *affinity;
322 		cpuid_t affinity_to;
323 
324 		kcpuset_create(&affinity, true);
325 		affinity_to = (qid - 1) % ncpu;
326 		kcpuset_set(affinity, affinity_to);
327 		error = interrupt_distribute(sc->sc_ih[qid], affinity, NULL);
328 		kcpuset_destroy(affinity);
329 		aprint_normal_dev(sc->sc_dev,
330 		    "for io queue %d interrupting at %s", qid, intrstr);
331 		if (error == 0)
332 			aprint_normal(" affinity to cpu%lu", affinity_to);
333 		aprint_normal("\n");
334 	}
335 	return 0;
336 }
337 
338 static int
339 nvme_pci_intr_disestablish(struct nvme_softc *sc, uint16_t qid)
340 {
341 	struct nvme_pci_softc *psc = (struct nvme_pci_softc *)sc;
342 
343 	KASSERT(sc->sc_use_mq || qid == NVME_ADMIN_Q);
344 	KASSERT(sc->sc_ih[qid] != NULL);
345 
346 	if (sc->sc_softih) {
347 		softint_disestablish(sc->sc_softih[qid]);
348 		sc->sc_softih[qid] = NULL;
349 	}
350 
351 	pci_intr_disestablish(psc->psc_pc, sc->sc_ih[qid]);
352 	sc->sc_ih[qid] = NULL;
353 
354 	return 0;
355 }
356 
357 static int
358 nvme_pci_setup_intr(struct pci_attach_args *pa, struct nvme_pci_softc *psc)
359 {
360 	struct nvme_softc *sc = &psc->psc_nvme;
361 	int error;
362 	int counts[PCI_INTR_TYPE_SIZE], alloced_counts[PCI_INTR_TYPE_SIZE];
363 	pci_intr_handle_t *ihps;
364 	int max_type, intr_type;
365 
366 	if (nvme_pci_force_intx) {
367 		max_type = PCI_INTR_TYPE_INTX;
368 		goto force_intx;
369 	}
370 
371 	/* MSI-X */
372 	max_type = PCI_INTR_TYPE_MSIX;
373 	counts[PCI_INTR_TYPE_MSIX] = min(pci_msix_count(pa->pa_pc, pa->pa_tag),
374 	    ncpu + 1);
375 	if (counts[PCI_INTR_TYPE_MSIX] > 0) {
376 		memset(alloced_counts, 0, sizeof(alloced_counts));
377 		alloced_counts[PCI_INTR_TYPE_MSIX] = counts[PCI_INTR_TYPE_MSIX];
378 		if (pci_intr_alloc(pa, &ihps, alloced_counts,
379 		    PCI_INTR_TYPE_MSIX)) {
380 			counts[PCI_INTR_TYPE_MSIX] = 0;
381 		} else {
382 			counts[PCI_INTR_TYPE_MSIX] =
383 			    alloced_counts[PCI_INTR_TYPE_MSIX];
384 			pci_intr_release(pa->pa_pc, ihps,
385 			    alloced_counts[PCI_INTR_TYPE_MSIX]);
386 		}
387 	}
388 	if (counts[PCI_INTR_TYPE_MSIX] < 2) {
389 		counts[PCI_INTR_TYPE_MSIX] = 0;
390 		max_type = PCI_INTR_TYPE_MSI;
391 	} else if (!nvme_pci_mq || !nvme_pci_mpsafe) {
392 		counts[PCI_INTR_TYPE_MSIX] = 2;	/* adminq + 1 ioq */
393 	}
394 
395 retry_msi:
396 	/* MSI */
397 	counts[PCI_INTR_TYPE_MSI] = pci_msi_count(pa->pa_pc, pa->pa_tag);
398 	if (counts[PCI_INTR_TYPE_MSI] > 0) {
399 		while (counts[PCI_INTR_TYPE_MSI] > ncpu + 1) {
400 			if (counts[PCI_INTR_TYPE_MSI] / 2 <= ncpu + 1)
401 				break;
402 			counts[PCI_INTR_TYPE_MSI] /= 2;
403 		}
404 		memset(alloced_counts, 0, sizeof(alloced_counts));
405 		alloced_counts[PCI_INTR_TYPE_MSI] = counts[PCI_INTR_TYPE_MSI];
406 		if (pci_intr_alloc(pa, &ihps, alloced_counts,
407 		    PCI_INTR_TYPE_MSI)) {
408 			counts[PCI_INTR_TYPE_MSI] = 0;
409 		} else {
410 			counts[PCI_INTR_TYPE_MSI] =
411 			    alloced_counts[PCI_INTR_TYPE_MSI];
412 			pci_intr_release(pa->pa_pc, ihps,
413 			    alloced_counts[PCI_INTR_TYPE_MSI]);
414 		}
415 	}
416 	if (counts[PCI_INTR_TYPE_MSI] < 1) {
417 		counts[PCI_INTR_TYPE_MSI] = 0;
418 		if (max_type == PCI_INTR_TYPE_MSI)
419 			max_type = PCI_INTR_TYPE_INTX;
420 	} else if (!nvme_pci_mq || !nvme_pci_mpsafe) {
421 		if (counts[PCI_INTR_TYPE_MSI] > 2)
422 			counts[PCI_INTR_TYPE_MSI] = 2;	/* adminq + 1 ioq */
423 	}
424 
425 force_intx:
426 	/* INTx */
427 	counts[PCI_INTR_TYPE_INTX] = 1;
428 
429 	memcpy(alloced_counts, counts, sizeof(counts));
430 	error = pci_intr_alloc(pa, &ihps, alloced_counts, max_type);
431 	if (error) {
432 		if (max_type != PCI_INTR_TYPE_INTX) {
433 retry:
434 			memset(counts, 0, sizeof(counts));
435 			if (max_type == PCI_INTR_TYPE_MSIX) {
436 				max_type = PCI_INTR_TYPE_MSI;
437 				goto retry_msi;
438 			} else {
439 				max_type = PCI_INTR_TYPE_INTX;
440 				goto force_intx;
441 			}
442 		}
443 		return error;
444 	}
445 
446 	intr_type = pci_intr_type(pa->pa_pc, ihps[0]);
447 	if (alloced_counts[intr_type] < counts[intr_type]) {
448 		if (intr_type != PCI_INTR_TYPE_INTX) {
449 			pci_intr_release(pa->pa_pc, ihps,
450 			    alloced_counts[intr_type]);
451 			max_type = intr_type;
452 			goto retry;
453 		}
454 		return EBUSY;
455 	}
456 
457 	psc->psc_intrs = ihps;
458 	psc->psc_nintrs = alloced_counts[intr_type];
459 	if (intr_type == PCI_INTR_TYPE_MSI) {
460 		if (alloced_counts[intr_type] > ncpu + 1)
461 			alloced_counts[intr_type] = ncpu + 1;
462 	}
463 	sc->sc_use_mq = alloced_counts[intr_type] > 1;
464 	sc->sc_nq = sc->sc_use_mq ? alloced_counts[intr_type] - 1 : 1;
465 
466 	return 0;
467 }
468 
469 MODULE(MODULE_CLASS_DRIVER, nvme, "pci,dk_subr");
470 
471 #ifdef _MODULE
472 #include "ioconf.c"
473 #endif
474 
475 static int
476 nvme_modcmd(modcmd_t cmd, void *opaque)
477 {
478 #ifdef _MODULE
479 	devmajor_t cmajor, bmajor;
480 	extern const struct cdevsw nvme_cdevsw;
481 #endif
482 	int error = 0;
483 
484 #ifdef _MODULE
485 	switch (cmd) {
486 	case MODULE_CMD_INIT:
487 		error = config_init_component(cfdriver_ioconf_nvme_pci,
488 		    cfattach_ioconf_nvme_pci, cfdata_ioconf_nvme_pci);
489 		if (error)
490 			break;
491 
492 		bmajor = cmajor = NODEVMAJOR;
493 		error = devsw_attach(nvme_cd.cd_name, NULL, &bmajor,
494 		    &nvme_cdevsw, &cmajor);
495 		if (error) {
496 			aprint_error("%s: unable to register devsw\n",
497 			    nvme_cd.cd_name);
498 			/* do not abort, just /dev/nvme* will not work */
499 		}
500 		break;
501 	case MODULE_CMD_FINI:
502 		devsw_detach(NULL, &nvme_cdevsw);
503 
504 		error = config_fini_component(cfdriver_ioconf_nvme_pci,
505 		    cfattach_ioconf_nvme_pci, cfdata_ioconf_nvme_pci);
506 		break;
507 	default:
508 		break;
509 	}
510 #endif
511 	return error;
512 }
513