xref: /netbsd-src/sys/dev/pci/nvme_pci.c (revision d909946ca08dceb44d7d0f22ec9488679695d976)
1 /*	$NetBSD: nvme_pci.c,v 1.4 2016/07/11 06:14:51 knakahara Exp $	*/
2 /*	$OpenBSD: nvme_pci.c,v 1.3 2016/04/14 11:18:32 dlg Exp $ */
3 
4 /*
5  * Copyright (c) 2014 David Gwynne <dlg@openbsd.org>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 /*-
21  * Copyright (C) 2016 NONAKA Kimihiro <nonaka@netbsd.org>
22  * All rights reserved.
23  *
24  * Redistribution and use in source and binary forms, with or without
25  * modification, are permitted provided that the following conditions
26  * are met:
27  * 1. Redistributions of source code must retain the above copyright
28  *    notice, this list of conditions and the following disclaimer.
29  * 2. Redistributions in binary form must reproduce the above copyright
30  *    notice, this list of conditions and the following disclaimer in the
31  *    documentation and/or other materials provided with the distribution.
32  *
33  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
34  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
35  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
36  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
37  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
38  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
39  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
40  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
41  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
42  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43  */
44 
45 #include <sys/cdefs.h>
46 __KERNEL_RCSID(0, "$NetBSD: nvme_pci.c,v 1.4 2016/07/11 06:14:51 knakahara Exp $");
47 
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/kernel.h>
51 #include <sys/device.h>
52 #include <sys/bitops.h>
53 #include <sys/bus.h>
54 #include <sys/cpu.h>
55 #include <sys/interrupt.h>
56 #include <sys/kmem.h>
57 #include <sys/pmf.h>
58 
59 #include <dev/pci/pcireg.h>
60 #include <dev/pci/pcivar.h>
61 
62 #include <dev/ic/nvmereg.h>
63 #include <dev/ic/nvmevar.h>
64 
65 int nvme_pci_force_intx = 0;
66 int nvme_pci_mpsafe = 0;
67 int nvme_pci_mq = 1;		/* INTx: ioq=1, MSI/MSI-X: ioq=ncpu */
68 
69 #define NVME_PCI_BAR		0x10
70 
71 struct nvme_pci_softc {
72 	struct nvme_softc	psc_nvme;
73 
74 	pci_chipset_tag_t	psc_pc;
75 	pci_intr_handle_t	*psc_intrs;
76 	int			psc_nintrs;
77 };
78 
79 static int	nvme_pci_match(device_t, cfdata_t, void *);
80 static void	nvme_pci_attach(device_t, device_t, void *);
81 static int	nvme_pci_detach(device_t, int);
82 
83 CFATTACH_DECL3_NEW(nvme_pci, sizeof(struct nvme_pci_softc),
84     nvme_pci_match, nvme_pci_attach, nvme_pci_detach, NULL, NULL,
85     nvme_childdet, DVF_DETACH_SHUTDOWN);
86 
87 static int	nvme_pci_intr_establish(struct nvme_softc *,
88 		    uint16_t, struct nvme_queue *);
89 static int	nvme_pci_intr_disestablish(struct nvme_softc *, uint16_t);
90 static int	nvme_pci_setup_intr(struct pci_attach_args *,
91 		    struct nvme_pci_softc *);
92 
93 static int
94 nvme_pci_match(device_t parent, cfdata_t match, void *aux)
95 {
96 	struct pci_attach_args *pa = aux;
97 
98 	if (PCI_CLASS(pa->pa_class) == PCI_CLASS_MASS_STORAGE &&
99 	    PCI_SUBCLASS(pa->pa_class) == PCI_SUBCLASS_MASS_STORAGE_NVM &&
100 	    PCI_INTERFACE(pa->pa_class) == PCI_INTERFACE_NVM_NVME)
101 		return 1;
102 
103 	return 0;
104 }
105 
106 static void
107 nvme_pci_attach(device_t parent, device_t self, void *aux)
108 {
109 	struct nvme_pci_softc *psc = device_private(self);
110 	struct nvme_softc *sc = &psc->psc_nvme;
111 	struct pci_attach_args *pa = aux;
112 	pcireg_t memtype;
113 	bus_addr_t memaddr;
114 	int flags, msixoff;
115 	int nq, error;
116 
117 	sc->sc_dev = self;
118 	psc->psc_pc = pa->pa_pc;
119 	if (pci_dma64_available(pa))
120 		sc->sc_dmat = pa->pa_dmat64;
121 	else
122 		sc->sc_dmat = pa->pa_dmat;
123 
124 	pci_aprint_devinfo(pa, NULL);
125 
126 	/* Map registers */
127 	memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, NVME_PCI_BAR);
128 	if (PCI_MAPREG_TYPE(memtype) != PCI_MAPREG_TYPE_MEM) {
129 		aprint_error_dev(self, "invalid type (type=0x%x)\n", memtype);
130 		return;
131 	}
132 	sc->sc_iot = pa->pa_memt;
133 	error = pci_mapreg_info(pa->pa_pc, pa->pa_tag, PCI_MAPREG_START,
134 	    memtype, &memaddr, &sc->sc_ios, &flags);
135 	if (error) {
136 		aprint_error_dev(self, "can't get map info\n");
137 		return;
138 	}
139 	if (pci_get_capability(pa->pa_pc, pa->pa_tag, PCI_CAP_MSIX, &msixoff,
140 	    NULL)) {
141 		pcireg_t msixtbl;
142 		uint32_t table_offset;
143 		int bir;
144 
145 		msixtbl = pci_conf_read(pa->pa_pc, pa->pa_tag,
146 		    msixoff + PCI_MSIX_TBLOFFSET);
147 		table_offset = msixtbl & PCI_MSIX_TBLOFFSET_MASK;
148 		bir = msixtbl & PCI_MSIX_PBABIR_MASK;
149 		if (bir == 0) {
150 			sc->sc_ios = table_offset;
151 		}
152 	}
153 	error = bus_space_map(sc->sc_iot, memaddr, sc->sc_ios, flags,
154 	    &sc->sc_ioh);
155 	if (error != 0) {
156 		aprint_error_dev(self, "can't map mem space (error=%d)\n",
157 		    error);
158 		return;
159 	}
160 
161 	/* Establish interrupts */
162 	if (nvme_pci_setup_intr(pa, psc) != 0) {
163 		aprint_error_dev(self, "unable to allocate interrupt\n");
164 		goto unmap;
165 	}
166 	sc->sc_intr_establish = nvme_pci_intr_establish;
167 	sc->sc_intr_disestablish = nvme_pci_intr_disestablish;
168 
169 	nq = sc->sc_nq + (sc->sc_use_mq ? 1 : 0);
170 	sc->sc_ih = kmem_zalloc(sizeof(*sc->sc_ih) * nq, KM_SLEEP);
171 	if (sc->sc_ih == NULL) {
172 		aprint_error_dev(self, "unable to allocate ih memory\n");
173 		goto intr_release;
174 	}
175 
176 	if (nvme_attach(sc) != 0) {
177 		/* error printed by nvme_attach() */
178 		goto intr_free;
179 	}
180 
181 	if (!pmf_device_register(self, NULL, NULL))
182 		aprint_error_dev(self, "couldn't establish power handler\n");
183 
184 	SET(sc->sc_flags, NVME_F_ATTACHED);
185 	return;
186 
187 intr_free:
188 	kmem_free(sc->sc_ih, sizeof(*sc->sc_ih) * nq);
189 	sc->sc_nq = 0;
190 intr_release:
191 	pci_intr_release(pa->pa_pc, psc->psc_intrs, psc->psc_nintrs);
192 	psc->psc_nintrs = 0;
193 unmap:
194 	bus_space_unmap(sc->sc_iot, sc->sc_ioh, sc->sc_ios);
195 	sc->sc_ios = 0;
196 }
197 
198 static int
199 nvme_pci_detach(device_t self, int flags)
200 {
201 	struct nvme_pci_softc *psc = device_private(self);
202 	struct nvme_softc *sc = &psc->psc_nvme;
203 	int i, nq, error;
204 
205 	if (!ISSET(sc->sc_flags, NVME_F_ATTACHED))
206 		return 0;
207 
208 	error = nvme_detach(sc, flags);
209 	if (error)
210 		return error;
211 
212 	nq = sc->sc_nq + (sc->sc_use_mq ? 1 : 0);
213 	if (!sc->sc_use_mq) {
214 		for (i = 0; i < nq; i++)
215 			pci_intr_disestablish(psc->psc_pc, sc->sc_ih[i]);
216 	}
217 	kmem_free(sc->sc_ih, sizeof(*sc->sc_ih) * nq);
218 	pci_intr_release(psc->psc_pc, psc->psc_intrs, psc->psc_nintrs);
219 	bus_space_unmap(sc->sc_iot, sc->sc_ioh, sc->sc_ios);
220 	return 0;
221 }
222 
223 static int
224 nvme_pci_intr_establish(struct nvme_softc *sc, uint16_t qid,
225     struct nvme_queue *q)
226 {
227 	struct nvme_pci_softc *psc = (struct nvme_pci_softc *)sc;
228 	char intr_xname[INTRDEVNAMEBUF];
229 	char intrbuf[PCI_INTRSTR_LEN];
230 	const char *intrstr = NULL;
231 	int (*ih_func)(void *);
232 	void *ih_arg;
233 	kcpuset_t *affinity;
234 	cpuid_t affinity_to;
235 	int error;
236 
237 	if (!sc->sc_use_mq && qid > 0)
238 		return 0;
239 
240 	KASSERT(sc->sc_ih[qid] == NULL);
241 
242 	if (nvme_pci_mpsafe) {
243 		pci_intr_setattr(psc->psc_pc, &psc->psc_intrs[qid],
244 		    PCI_INTR_MPSAFE, true);
245 	}
246 	if (!sc->sc_use_mq) {
247 		snprintf(intr_xname, sizeof(intr_xname), "%s",
248 		    device_xname(sc->sc_dev));
249 		ih_arg = sc;
250 		ih_func = nvme_intr;
251 	} else {
252 		if (qid == 0) {
253 			snprintf(intr_xname, sizeof(intr_xname), "%s adminq",
254 			    device_xname(sc->sc_dev));
255 		} else {
256 			snprintf(intr_xname, sizeof(intr_xname), "%s ioq%d",
257 			    device_xname(sc->sc_dev), qid);
258 		}
259 		ih_arg = q;
260 		if (pci_intr_type(psc->psc_pc, psc->psc_intrs[qid])
261 		    == PCI_INTR_TYPE_MSIX)
262 			ih_func = nvme_mq_msix_intr;
263 		else
264 			ih_func = nvme_mq_msi_intr;
265 	}
266 	sc->sc_ih[qid] = pci_intr_establish_xname(psc->psc_pc,
267 	    psc->psc_intrs[qid], IPL_BIO, ih_func, ih_arg, intr_xname);
268 	if (sc->sc_ih[qid] == NULL) {
269 		aprint_error_dev(sc->sc_dev,
270 		    "unable to establish %s interrupt\n", intr_xname);
271 		return 1;
272 	}
273 	intrstr = pci_intr_string(psc->psc_pc, psc->psc_intrs[qid], intrbuf,
274 	    sizeof(intrbuf));
275 	if (!sc->sc_use_mq) {
276 		aprint_normal_dev(sc->sc_dev, "interrupting at %s\n", intrstr);
277 	} else if (qid == 0) {
278 		aprint_normal_dev(sc->sc_dev,
279 		    "for admin queue interrupting at %s\n", intrstr);
280 	} else if (!nvme_pci_mpsafe) {
281 		aprint_normal_dev(sc->sc_dev,
282 		    "for io queue %d interrupting at %s\n", qid, intrstr);
283 	} else {
284 		kcpuset_create(&affinity, true);
285 		affinity_to = (qid - 1) % ncpu;
286 		kcpuset_set(affinity, affinity_to);
287 		error = interrupt_distribute(sc->sc_ih[qid], affinity, NULL);
288 		kcpuset_destroy(affinity);
289 		aprint_normal_dev(sc->sc_dev,
290 		    "for io queue %d interrupting at %s", qid, intrstr);
291 		if (error == 0)
292 			aprint_normal(" affinity to cpu%lu", affinity_to);
293 		aprint_normal("\n");
294 	}
295 	return 0;
296 }
297 
298 static int
299 nvme_pci_intr_disestablish(struct nvme_softc *sc, uint16_t qid)
300 {
301 	struct nvme_pci_softc *psc = (struct nvme_pci_softc *)sc;
302 
303 	if (!sc->sc_use_mq && qid > 0)
304 		return 0;
305 
306 	KASSERT(sc->sc_ih[qid] != NULL);
307 
308 	pci_intr_disestablish(psc->psc_pc, sc->sc_ih[qid]);
309 	sc->sc_ih[qid] = NULL;
310 
311 	return 0;
312 }
313 
314 static int
315 nvme_pci_setup_intr(struct pci_attach_args *pa, struct nvme_pci_softc *psc)
316 {
317 	struct nvme_softc *sc = &psc->psc_nvme;
318 	pci_intr_handle_t *ihps;
319 	int counts[PCI_INTR_TYPE_SIZE], alloced_counts[PCI_INTR_TYPE_SIZE];
320 	int max_type, intr_type;
321 	int error;
322 
323 	if (nvme_pci_force_intx) {
324 		max_type = PCI_INTR_TYPE_INTX;
325 		goto force_intx;
326 	}
327 
328 	/* MSI-X */
329 	max_type = PCI_INTR_TYPE_MSIX;
330 	counts[PCI_INTR_TYPE_MSIX] = min(pci_msix_count(pa->pa_pc, pa->pa_tag),
331 	    ncpu + 1);
332 	if (counts[PCI_INTR_TYPE_MSIX] > 0) {
333 		memset(alloced_counts, 0, sizeof(alloced_counts));
334 		alloced_counts[PCI_INTR_TYPE_MSIX] = counts[PCI_INTR_TYPE_MSIX];
335 		if (pci_intr_alloc(pa, &ihps, alloced_counts,
336 		    PCI_INTR_TYPE_MSIX)) {
337 			counts[PCI_INTR_TYPE_MSIX] = 0;
338 		} else {
339 			counts[PCI_INTR_TYPE_MSIX] =
340 			    alloced_counts[PCI_INTR_TYPE_MSIX];
341 			pci_intr_release(pa->pa_pc, ihps,
342 			    alloced_counts[PCI_INTR_TYPE_MSIX]);
343 		}
344 	}
345 	if (counts[PCI_INTR_TYPE_MSIX] < 2) {
346 		counts[PCI_INTR_TYPE_MSIX] = 0;
347 		max_type = PCI_INTR_TYPE_MSI;
348 	} else if (!nvme_pci_mq || !nvme_pci_mpsafe) {
349 		counts[PCI_INTR_TYPE_MSIX] = 2;	/* adminq + 1 ioq */
350 	}
351 
352 retry_msi:
353 	/* MSI */
354 	counts[PCI_INTR_TYPE_MSI] = pci_msi_count(pa->pa_pc, pa->pa_tag);
355 	if (counts[PCI_INTR_TYPE_MSI] > 0) {
356 		while (counts[PCI_INTR_TYPE_MSI] > ncpu + 1) {
357 			if (counts[PCI_INTR_TYPE_MSI] / 2 <= ncpu + 1)
358 				break;
359 			counts[PCI_INTR_TYPE_MSI] /= 2;
360 		}
361 		memset(alloced_counts, 0, sizeof(alloced_counts));
362 		alloced_counts[PCI_INTR_TYPE_MSI] = counts[PCI_INTR_TYPE_MSI];
363 		if (pci_intr_alloc(pa, &ihps, alloced_counts,
364 		    PCI_INTR_TYPE_MSI)) {
365 			counts[PCI_INTR_TYPE_MSI] = 0;
366 		} else {
367 			counts[PCI_INTR_TYPE_MSI] =
368 			    alloced_counts[PCI_INTR_TYPE_MSI];
369 			pci_intr_release(pa->pa_pc, ihps,
370 			    alloced_counts[PCI_INTR_TYPE_MSI]);
371 		}
372 	}
373 	if (counts[PCI_INTR_TYPE_MSI] < 1) {
374 		counts[PCI_INTR_TYPE_MSI] = 0;
375 		if (max_type == PCI_INTR_TYPE_MSI)
376 			max_type = PCI_INTR_TYPE_INTX;
377 	} else if (!nvme_pci_mq || !nvme_pci_mpsafe) {
378 		if (counts[PCI_INTR_TYPE_MSI] > 2)
379 			counts[PCI_INTR_TYPE_MSI] = 2;	/* adminq + 1 ioq */
380 	}
381 
382 force_intx:
383 	/* INTx */
384 	counts[PCI_INTR_TYPE_INTX] = 1;
385 
386 	memcpy(alloced_counts, counts, sizeof(counts));
387 	error = pci_intr_alloc(pa, &ihps, alloced_counts, max_type);
388 	if (error) {
389 		if (max_type != PCI_INTR_TYPE_INTX) {
390 retry:
391 			memset(counts, 0, sizeof(counts));
392 			if (max_type == PCI_INTR_TYPE_MSIX) {
393 				max_type = PCI_INTR_TYPE_MSI;
394 				goto retry_msi;
395 			} else {
396 				max_type = PCI_INTR_TYPE_INTX;
397 				goto force_intx;
398 			}
399 		}
400 		return error;
401 	}
402 
403 	intr_type = pci_intr_type(pa->pa_pc, ihps[0]);
404 	if (alloced_counts[intr_type] < counts[intr_type]) {
405 		if (intr_type != PCI_INTR_TYPE_INTX) {
406 			pci_intr_release(pa->pa_pc, ihps,
407 			    alloced_counts[intr_type]);
408 			max_type = intr_type;
409 			goto retry;
410 		}
411 		return EBUSY;
412 	}
413 
414 	psc->psc_intrs = ihps;
415 	psc->psc_nintrs = alloced_counts[intr_type];
416 	if (intr_type == PCI_INTR_TYPE_MSI) {
417 		if (alloced_counts[intr_type] > ncpu + 1)
418 			alloced_counts[intr_type] = ncpu + 1;
419 	}
420 	sc->sc_use_mq = alloced_counts[intr_type] > 1;
421 	sc->sc_nq = sc->sc_use_mq ? alloced_counts[intr_type] - 1 : 1;
422 	return 0;
423 }
424