xref: /openbsd-src/sys/dev/pci/virtio_pci.c (revision d8499287995aa7782ab7c2281bcd1ad4f7cb0a47)
1 /*	$OpenBSD: virtio_pci.c,v 1.27 2019/05/26 15:22:31 sf Exp $	*/
2 /*	$NetBSD: virtio.c,v 1.3 2011/11/02 23:05:52 njoly Exp $	*/
3 
4 /*
5  * Copyright (c) 2012 Stefan Fritsch.
6  * Copyright (c) 2010 Minoura Makoto.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/device.h>
33 #include <sys/mutex.h>
34 
35 #include <dev/pci/pcidevs.h>
36 #include <dev/pci/pcireg.h>
37 #include <dev/pci/pcivar.h>
38 #include <dev/pci/virtio_pcireg.h>
39 
40 #include <dev/pv/virtioreg.h>
41 #include <dev/pv/virtiovar.h>
42 #include <dev/pci/virtio_pcireg.h>
43 
44 #define DNPRINTF(n,x...)				\
45     do { if (VIRTIO_DEBUG >= n) printf(x); } while(0)
46 
47 
48 /*
49  * XXX: Before being used on big endian arches, the access to config registers
50  * XXX: needs to be reviewed/fixed. The non-device specific registers are
51  * XXX: PCI-endian while the device specific registers are native endian.
52  */
53 
54 #define MAX_MSIX_VECS	8
55 
56 struct virtio_pci_softc;
57 
58 int		virtio_pci_match(struct device *, void *, void *);
59 void		virtio_pci_attach(struct device *, struct device *, void *);
60 int		virtio_pci_attach_09(struct virtio_pci_softc *sc, struct pci_attach_args *pa);
61 int		virtio_pci_attach_10(struct virtio_pci_softc *sc, struct pci_attach_args *pa);
62 int		virtio_pci_detach(struct device *, int);
63 
64 void		virtio_pci_kick(struct virtio_softc *, uint16_t);
65 int		virtio_pci_adjust_config_region(struct virtio_pci_softc *);
66 uint8_t		virtio_pci_read_device_config_1(struct virtio_softc *, int);
67 uint16_t	virtio_pci_read_device_config_2(struct virtio_softc *, int);
68 uint32_t	virtio_pci_read_device_config_4(struct virtio_softc *, int);
69 uint64_t	virtio_pci_read_device_config_8(struct virtio_softc *, int);
70 void		virtio_pci_write_device_config_1(struct virtio_softc *, int, uint8_t);
71 void		virtio_pci_write_device_config_2(struct virtio_softc *, int, uint16_t);
72 void		virtio_pci_write_device_config_4(struct virtio_softc *, int, uint32_t);
73 void		virtio_pci_write_device_config_8(struct virtio_softc *, int, uint64_t);
74 uint16_t	virtio_pci_read_queue_size(struct virtio_softc *, uint16_t);
75 void		virtio_pci_setup_queue(struct virtio_softc *, struct virtqueue *, uint64_t);
76 void		virtio_pci_set_status(struct virtio_softc *, int);
77 int		virtio_pci_negotiate_features(struct virtio_softc *, const struct virtio_feature_name *);
78 int		virtio_pci_negotiate_features_10(struct virtio_softc *, const struct virtio_feature_name *);
79 void		virtio_pci_set_msix_queue_vector(struct virtio_pci_softc *, uint32_t, uint16_t);
80 void		virtio_pci_set_msix_config_vector(struct virtio_pci_softc *, uint16_t);
81 int		virtio_pci_msix_establish(struct virtio_pci_softc *, struct pci_attach_args *, int, int (*)(void *), void *);
82 int		virtio_pci_setup_msix(struct virtio_pci_softc *, struct pci_attach_args *, int);
83 void		virtio_pci_free_irqs(struct virtio_pci_softc *);
84 int		virtio_pci_poll_intr(void *);
85 int		virtio_pci_legacy_intr(void *);
86 int		virtio_pci_legacy_intr_mpsafe(void *);
87 int		virtio_pci_config_intr(void *);
88 int		virtio_pci_queue_intr(void *);
89 int		virtio_pci_shared_queue_intr(void *);
90 int		virtio_pci_find_cap(struct virtio_pci_softc *sc, int cfg_type, void *buf, int buflen);
91 #if VIRTIO_DEBUG
92 void virtio_pci_dump_caps(struct virtio_pci_softc *sc);
93 #endif
94 
95 enum irq_type {
96 	IRQ_NO_MSIX,
97 	IRQ_MSIX_SHARED, /* vec 0: config irq, vec 1 shared by all vqs */
98 	IRQ_MSIX_PER_VQ, /* vec 0: config irq, vec n: irq of vq[n-1] */
99 };
100 
101 struct virtio_pci_softc {
102 	struct virtio_softc	sc_sc;
103 	pci_chipset_tag_t	sc_pc;
104 	pcitag_t		sc_ptag;
105 
106 	bus_space_tag_t		sc_iot;
107 	bus_space_handle_t	sc_ioh;
108 	bus_size_t		sc_iosize;
109 
110 	bus_space_tag_t		sc_bars_iot[4];
111 	bus_space_handle_t	sc_bars_ioh[4];
112 	bus_size_t		sc_bars_iosize[4];
113 
114 	bus_space_tag_t		sc_notify_iot;
115 	bus_space_handle_t	sc_notify_ioh;
116 	bus_size_t		sc_notify_iosize;
117 	unsigned int		sc_notify_off_multiplier;
118 
119 	bus_space_tag_t		sc_devcfg_iot;
120 	bus_space_handle_t	sc_devcfg_ioh;
121 	bus_size_t		sc_devcfg_iosize;
122 	/*
123 	 * With 0.9, the offset of the devcfg region in the io bar changes
124 	 * depending on MSI-X being enabled or not.
125 	 * With 1.0, this field is still used to remember if MSI-X is enabled
126 	 * or not.
127 	 */
128 	unsigned int		sc_devcfg_offset;
129 
130 	bus_space_tag_t		sc_isr_iot;
131 	bus_space_handle_t	sc_isr_ioh;
132 	bus_size_t		sc_isr_iosize;
133 
134 	void			*sc_ih[MAX_MSIX_VECS];
135 
136 	enum irq_type		sc_irq_type;
137 };
138 
139 struct cfattach virtio_pci_ca = {
140 	sizeof(struct virtio_pci_softc),
141 	virtio_pci_match,
142 	virtio_pci_attach,
143 	virtio_pci_detach,
144 	NULL
145 };
146 
147 struct virtio_ops virtio_pci_ops = {
148 	virtio_pci_kick,
149 	virtio_pci_read_device_config_1,
150 	virtio_pci_read_device_config_2,
151 	virtio_pci_read_device_config_4,
152 	virtio_pci_read_device_config_8,
153 	virtio_pci_write_device_config_1,
154 	virtio_pci_write_device_config_2,
155 	virtio_pci_write_device_config_4,
156 	virtio_pci_write_device_config_8,
157 	virtio_pci_read_queue_size,
158 	virtio_pci_setup_queue,
159 	virtio_pci_set_status,
160 	virtio_pci_negotiate_features,
161 	virtio_pci_poll_intr,
162 };
163 
164 static inline
165 uint64_t _cread(struct virtio_pci_softc *sc, unsigned off, unsigned size)
166 {
167 	uint64_t val;
168 	switch (size) {
169 	case 1:
170 		val = bus_space_read_1(sc->sc_iot, sc->sc_ioh, off);
171 		break;
172 	case 2:
173 		val = bus_space_read_2(sc->sc_iot, sc->sc_ioh, off);
174 		break;
175 	case 4:
176 		val = bus_space_read_4(sc->sc_iot, sc->sc_ioh, off);
177 		break;
178 	case 8:
179 		val = bus_space_read_8(sc->sc_iot, sc->sc_ioh, off);
180 		break;
181 	}
182 	return val;
183 }
184 
185 #define CREAD(sc, memb)  _cread(sc, offsetof(struct virtio_pci_common_cfg, memb), \
186     sizeof(((struct virtio_pci_common_cfg *)0)->memb))
187 
188 #define CWRITE(sc, memb, val)							\
189 	do {									\
190 		struct virtio_pci_common_cfg c;					\
191 		size_t off = offsetof(struct virtio_pci_common_cfg, memb);	\
192 		size_t size = sizeof(c.memb);					\
193 										\
194 		DNPRINTF(2, "%s: %d: off %#zx size %#zx write %#llx\n",		\
195 		    __func__, __LINE__, off, size, (unsigned long long)val);	\
196 		switch (size) {							\
197 		case 1:								\
198 			bus_space_write_1(sc->sc_iot, sc->sc_ioh, off, val);	\
199 			break;							\
200 		case 2:								\
201 			bus_space_write_2(sc->sc_iot, sc->sc_ioh, off, val);	\
202 			break;							\
203 		case 4:								\
204 			bus_space_write_4(sc->sc_iot, sc->sc_ioh, off, val);	\
205 			break;							\
206 		case 8:								\
207 			bus_space_write_8(sc->sc_iot, sc->sc_ioh, off, val);	\
208 			break;							\
209 		}								\
210 	} while (0)
211 
212 uint16_t
213 virtio_pci_read_queue_size(struct virtio_softc *vsc, uint16_t idx)
214 {
215 	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
216 	uint16_t ret;
217 	if (sc->sc_sc.sc_version_1) {
218 		CWRITE(sc, queue_select, idx);
219 		ret = CREAD(sc, queue_size);
220 	} else {
221 		bus_space_write_2(sc->sc_iot, sc->sc_ioh,
222 		    VIRTIO_CONFIG_QUEUE_SELECT, idx);
223 		ret = bus_space_read_2(sc->sc_iot, sc->sc_ioh,
224 		    VIRTIO_CONFIG_QUEUE_SIZE);
225 	}
226 	return ret;
227 }
228 
229 void
230 virtio_pci_setup_queue(struct virtio_softc *vsc, struct virtqueue *vq,
231     uint64_t addr)
232 {
233 	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
234 	if (sc->sc_sc.sc_version_1) {
235 		CWRITE(sc, queue_select, vq->vq_index);
236 		if (addr == 0) {
237 			CWRITE(sc, queue_enable, 0);
238 			CWRITE(sc, queue_desc, 0);
239 			CWRITE(sc, queue_avail, 0);
240 			CWRITE(sc, queue_used, 0);
241 		} else {
242 			CWRITE(sc, queue_desc, addr);
243 			CWRITE(sc, queue_avail, addr + vq->vq_availoffset);
244 			CWRITE(sc, queue_used, addr + vq->vq_usedoffset);
245 			CWRITE(sc, queue_enable, 1);
246 			vq->vq_notify_off = CREAD(sc, queue_notify_off);
247 		}
248 	} else {
249 		bus_space_write_2(sc->sc_iot, sc->sc_ioh,
250 		    VIRTIO_CONFIG_QUEUE_SELECT, vq->vq_index);
251 		bus_space_write_4(sc->sc_iot, sc->sc_ioh,
252 		    VIRTIO_CONFIG_QUEUE_ADDRESS, addr / VIRTIO_PAGE_SIZE);
253 	}
254 
255 	/*
256 	 * This path is only executed if this function is called after
257 	 * the child's attach function has finished. In other cases,
258 	 * it's done in virtio_pci_setup_msix().
259 	 */
260 	if (sc->sc_irq_type != IRQ_NO_MSIX) {
261 		int vec = 1;
262 		if (sc->sc_irq_type == IRQ_MSIX_PER_VQ)
263 		       vec += vq->vq_index;
264 		if (sc->sc_sc.sc_version_1) {
265 			CWRITE(sc, queue_msix_vector, vec);
266 		} else {
267 			bus_space_write_2(sc->sc_iot, sc->sc_ioh,
268 			    VIRTIO_MSI_QUEUE_VECTOR, vec);
269 		}
270 	}
271 }
272 
273 void
274 virtio_pci_set_status(struct virtio_softc *vsc, int status)
275 {
276 	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
277 	int old = 0;
278 
279 	if (sc->sc_sc.sc_version_1) {
280 		if (status != 0)
281 			old = CREAD(sc, device_status);
282 		CWRITE(sc, device_status, status|old);
283 	} else {
284 		if (status != 0)
285 			old = bus_space_read_1(sc->sc_iot, sc->sc_ioh,
286 			    VIRTIO_CONFIG_DEVICE_STATUS);
287 		bus_space_write_1(sc->sc_iot, sc->sc_ioh,
288 		    VIRTIO_CONFIG_DEVICE_STATUS, status|old);
289 	}
290 }
291 
292 int
293 virtio_pci_match(struct device *parent, void *match, void *aux)
294 {
295 	struct pci_attach_args *pa;
296 
297 	pa = (struct pci_attach_args *)aux;
298 	if (PCI_VENDOR(pa->pa_id) == PCI_VENDOR_OPENBSD &&
299 	    PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_OPENBSD_CONTROL)
300 		return 1;
301 	if (PCI_VENDOR(pa->pa_id) != PCI_VENDOR_QUMRANET)
302 		return 0;
303 	/* virtio 0.9 */
304 	if (PCI_PRODUCT(pa->pa_id) >= 0x1000 &&
305 	    PCI_PRODUCT(pa->pa_id) <= 0x103f &&
306 	    PCI_REVISION(pa->pa_class) == 0)
307 		return 1;
308 	/* virtio 1.0 */
309 	if (PCI_PRODUCT(pa->pa_id) >= 0x1040 &&
310 	    PCI_PRODUCT(pa->pa_id) <= 0x107f &&
311 	    PCI_REVISION(pa->pa_class) == 1)
312 		return 1;
313 	return 0;
314 }
315 
316 #if VIRTIO_DEBUG
317 void
318 virtio_pci_dump_caps(struct virtio_pci_softc *sc)
319 {
320 	pci_chipset_tag_t pc = sc->sc_pc;
321 	pcitag_t tag = sc->sc_ptag;
322 	int offset;
323 	union {
324 		pcireg_t reg[4];
325 		struct virtio_pci_cap vcap;
326 	} v;
327 
328 	if (!pci_get_capability(pc, tag, PCI_CAP_VENDSPEC, &offset, &v.reg[0]))
329 		return;
330 
331 	printf("\n");
332 	do {
333 		for (int i = 0; i < 4; i++)
334 			v.reg[i] = pci_conf_read(pc, tag, offset + i * 4);
335 		printf("%s: cfgoff %#x len %#x type %#x bar %#x: off %#x len %#x\n",
336 			__func__, offset, v.vcap.cap_len, v.vcap.cfg_type, v.vcap.bar,
337 			v.vcap.offset, v.vcap.length);
338 		offset = v.vcap.cap_next;
339 	} while (offset != 0);
340 }
341 #endif
342 
343 int
344 virtio_pci_find_cap(struct virtio_pci_softc *sc, int cfg_type, void *buf, int buflen)
345 {
346 	pci_chipset_tag_t pc = sc->sc_pc;
347 	pcitag_t tag = sc->sc_ptag;
348 	unsigned int offset, i, len;
349 	union {
350 		pcireg_t reg[8];
351 		struct virtio_pci_cap vcap;
352 	} *v = buf;
353 
354 	if (buflen < sizeof(struct virtio_pci_cap))
355 		return ERANGE;
356 
357 	if (!pci_get_capability(pc, tag, PCI_CAP_VENDSPEC, &offset, &v->reg[0]))
358 		return ENOENT;
359 
360 	do {
361 		for (i = 0; i < 4; i++)
362 			v->reg[i] = pci_conf_read(pc, tag, offset + i * 4);
363 		if (v->vcap.cfg_type == cfg_type)
364 			break;
365 		offset = v->vcap.cap_next;
366 	} while (offset != 0);
367 
368 	if (offset == 0)
369 		return ENOENT;
370 
371 	if (v->vcap.cap_len > sizeof(struct virtio_pci_cap)) {
372 		len = roundup(v->vcap.cap_len, sizeof(pcireg_t));
373 		if (len > buflen) {
374 			printf("%s: cap too large\n", __func__);
375 			return ERANGE;
376 		}
377 		for (i = 4; i < len / sizeof(pcireg_t);  i++)
378 			v->reg[i] = pci_conf_read(pc, tag, offset + i * 4);
379 	}
380 
381 	return 0;
382 }
383 
384 
385 #define NMAPREG		((PCI_MAPREG_END - PCI_MAPREG_START) / \
386 				sizeof(pcireg_t))
387 
388 int
389 virtio_pci_attach_10(struct virtio_pci_softc *sc, struct pci_attach_args *pa)
390 {
391 	struct virtio_pci_cap common, isr, device;
392 	struct virtio_pci_notify_cap notify;
393 	int have_device_cfg = 0;
394 	bus_size_t bars[NMAPREG] = { 0 };
395 	int bars_idx[NMAPREG] = { 0 };
396 	struct virtio_pci_cap *caps[] = { &common, &isr, &device, &notify.cap };
397 	int i, j = 0, ret = 0;
398 
399 	if (virtio_pci_find_cap(sc, VIRTIO_PCI_CAP_COMMON_CFG, &common, sizeof(common)) != 0)
400 		return ENODEV;
401 
402 	if (virtio_pci_find_cap(sc, VIRTIO_PCI_CAP_NOTIFY_CFG, &notify, sizeof(notify)) != 0)
403 		return ENODEV;
404 	if (virtio_pci_find_cap(sc, VIRTIO_PCI_CAP_ISR_CFG, &isr, sizeof(isr)) != 0)
405 		return ENODEV;
406 	if (virtio_pci_find_cap(sc, VIRTIO_PCI_CAP_DEVICE_CFG, &device, sizeof(device)) != 0)
407 		memset(&device, 0, sizeof(device));
408 	else
409 		have_device_cfg = 1;
410 
411 	/*
412 	 * XXX Maybe there are devices that offer the pci caps but not the
413 	 * XXX VERSION_1 feature bit? Then we should check the feature bit
414 	 * XXX here and fall back to 0.9 out if not present.
415 	 */
416 
417 	/* Figure out which bars we need to map */
418 	for (i = 0; i < nitems(caps); i++) {
419 		int bar = caps[i]->bar;
420 		bus_size_t len = caps[i]->offset + caps[i]->length;
421 		if (caps[i]->length == 0)
422 			continue;
423 		if (bars[bar] < len)
424 			bars[bar] = len;
425 	}
426 
427 	for (i = 0; i < nitems(bars); i++) {
428 		int reg;
429 		pcireg_t type;
430 		if (bars[i] == 0)
431 			continue;
432 		reg = PCI_MAPREG_START + i * 4;
433 		type = pci_mapreg_type(sc->sc_pc, sc->sc_ptag, reg);
434 		if (pci_mapreg_map(pa, reg, type, 0, &sc->sc_bars_iot[j],
435 		    &sc->sc_bars_ioh[j], NULL, &sc->sc_bars_iosize[j],
436 		    bars[i])) {
437 			printf("%s: can't map bar %u \n",
438 			    sc->sc_sc.sc_dev.dv_xname, i);
439 			ret = EIO;
440 			goto err;
441 		}
442 		bars_idx[i] = j;
443 		j++;
444 	}
445 
446 	i = bars_idx[notify.cap.bar];
447 	if (bus_space_subregion(sc->sc_bars_iot[i], sc->sc_bars_ioh[i],
448 	    notify.cap.offset, notify.cap.length, &sc->sc_notify_ioh) != 0) {
449 		printf("%s: can't map notify i/o space\n",
450 		    sc->sc_sc.sc_dev.dv_xname);
451 		ret = EIO;
452 		goto err;
453 	}
454 	sc->sc_notify_iosize = notify.cap.length;
455 	sc->sc_notify_iot = sc->sc_bars_iot[i];
456 	sc->sc_notify_off_multiplier = notify.notify_off_multiplier;
457 
458 	if (have_device_cfg) {
459 		i = bars_idx[device.bar];
460 		if (bus_space_subregion(sc->sc_bars_iot[i], sc->sc_bars_ioh[i],
461 		    device.offset, device.length, &sc->sc_devcfg_ioh) != 0) {
462 			printf("%s: can't map devcfg i/o space\n",
463 			    sc->sc_sc.sc_dev.dv_xname);
464 			ret = EIO;
465 			goto err;
466 		}
467 		sc->sc_devcfg_iosize = device.length;
468 		sc->sc_devcfg_iot = sc->sc_bars_iot[i];
469 	}
470 
471 	i = bars_idx[isr.bar];
472 	if (bus_space_subregion(sc->sc_bars_iot[i], sc->sc_bars_ioh[i],
473 	    isr.offset, isr.length, &sc->sc_isr_ioh) != 0) {
474 		printf("%s: can't map isr i/o space\n",
475 		    sc->sc_sc.sc_dev.dv_xname);
476 		ret = EIO;
477 		goto err;
478 	}
479 	sc->sc_isr_iosize = isr.length;
480 	sc->sc_isr_iot = sc->sc_bars_iot[i];
481 
482 	i = bars_idx[common.bar];
483 	if (bus_space_subregion(sc->sc_bars_iot[i], sc->sc_bars_ioh[i],
484 	    common.offset, common.length, &sc->sc_ioh) != 0) {
485 		printf("%s: can't map common i/o space\n",
486 		    sc->sc_sc.sc_dev.dv_xname);
487 		ret = EIO;
488 		goto err;
489 	}
490 	sc->sc_iosize = common.length;
491 	sc->sc_iot = sc->sc_bars_iot[i];
492 
493 	sc->sc_sc.sc_version_1 = 1;
494 	return 0;
495 
496 err:
497 	/* there is no pci_mapreg_unmap() */
498 	return ret;
499 }
500 
501 int
502 virtio_pci_attach_09(struct virtio_pci_softc *sc, struct pci_attach_args *pa)
503 {
504 	struct virtio_softc *vsc = &sc->sc_sc;
505 	if (pci_mapreg_map(pa, PCI_MAPREG_START, PCI_MAPREG_TYPE_IO, 0,
506 	    &sc->sc_iot, &sc->sc_ioh, NULL, &sc->sc_iosize, 0)) {
507 		printf("%s: can't map i/o space\n", vsc->sc_dev.dv_xname);
508 		return EIO;
509 	}
510 
511 	if (bus_space_subregion(sc->sc_iot, sc->sc_ioh,
512 	    VIRTIO_CONFIG_QUEUE_NOTIFY, 2, &sc->sc_notify_ioh) != 0) {
513 		printf("%s: can't map notify i/o space\n",
514 		    vsc->sc_dev.dv_xname);
515 		return EIO;
516 	}
517 	sc->sc_notify_iosize = 2;
518 	sc->sc_notify_iot = sc->sc_iot;
519 
520 	if (bus_space_subregion(sc->sc_iot, sc->sc_ioh,
521 	    VIRTIO_CONFIG_ISR_STATUS, 1, &sc->sc_isr_ioh) != 0) {
522 		printf("%s: can't map isr i/o space\n",
523 		    vsc->sc_dev.dv_xname);
524 		return EIO;
525 	}
526 	sc->sc_isr_iosize = 1;
527 	sc->sc_isr_iot = sc->sc_iot;
528 
529 	return 0;
530 }
531 
532 void
533 virtio_pci_attach(struct device *parent, struct device *self, void *aux)
534 {
535 	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)self;
536 	struct virtio_softc *vsc = &sc->sc_sc;
537 	struct pci_attach_args *pa = (struct pci_attach_args *)aux;
538 	pci_chipset_tag_t pc = pa->pa_pc;
539 	pcitag_t tag = pa->pa_tag;
540 	int revision, ret = ENODEV;
541 	pcireg_t id;
542 	char const *intrstr;
543 	pci_intr_handle_t ih;
544 
545 	revision = PCI_REVISION(pa->pa_class);
546 	switch (revision) {
547 	case 0:
548 		/* subsystem ID shows what I am */
549 		id = PCI_PRODUCT(pci_conf_read(pc, tag, PCI_SUBSYS_ID_REG));
550 		break;
551 	case 1:
552 		id = PCI_PRODUCT(pa->pa_id) - 0x1040;
553 		break;
554 	default:
555 		printf("unknown revision 0x%02x; giving up\n", revision);
556 		return;
557 	}
558 
559 	sc->sc_pc = pc;
560 	sc->sc_ptag = pa->pa_tag;
561 	vsc->sc_dmat = pa->pa_dmat;
562 
563 	/*
564 	 * For virtio, ignore normal MSI black/white-listing depending on the
565 	 * PCI bridge but enable it unconditionally.
566 	 */
567 	pa->pa_flags |= PCI_FLAGS_MSI_ENABLED;
568 
569 #if VIRTIO_DEBUG
570 	virtio_pci_dump_caps(sc);
571 #endif
572 
573 	vsc->sc_ops = &virtio_pci_ops;
574 	if ((vsc->sc_dev.dv_cfdata->cf_flags & VIRTIO_CF_NO_VERSION_1) == 0 &&
575 	    (revision == 1 ||
576 	     (vsc->sc_dev.dv_cfdata->cf_flags & VIRTIO_CF_PREFER_VERSION_1))) {
577 		ret = virtio_pci_attach_10(sc, pa);
578 	}
579 	if (ret != 0 && revision == 0) {
580 		/* revision 0 means 0.9 only or both 0.9 and 1.0 */
581 		ret = virtio_pci_attach_09(sc, pa);
582 	}
583 	if (ret != 0) {
584 		printf(": Cannot attach (%d)\n", ret);
585 		return;
586 	}
587 
588 	sc->sc_devcfg_offset = VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI;
589 	sc->sc_irq_type = IRQ_NO_MSIX;
590 	if (virtio_pci_adjust_config_region(sc) != 0)
591 		return;
592 
593 	virtio_device_reset(vsc);
594 	virtio_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_ACK);
595 	virtio_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER);
596 
597 	printf("\n");
598 	vsc->sc_childdevid = id;
599 	vsc->sc_child = NULL;
600 	config_found(self, sc, NULL);
601 	if (vsc->sc_child == NULL) {
602 		printf("%s: no matching child driver; not configured\n",
603 		    vsc->sc_dev.dv_xname);
604 		goto fail_1;
605 	}
606 	if (vsc->sc_child == VIRTIO_CHILD_ERROR) {
607 		printf("%s: virtio configuration failed\n",
608 		    vsc->sc_dev.dv_xname);
609 		goto fail_1;
610 	}
611 
612 	if (virtio_pci_setup_msix(sc, pa, 0) == 0) {
613 		sc->sc_irq_type = IRQ_MSIX_PER_VQ;
614 		intrstr = "msix per-VQ";
615 	} else if (virtio_pci_setup_msix(sc, pa, 1) == 0) {
616 		sc->sc_irq_type = IRQ_MSIX_SHARED;
617 		intrstr = "msix shared";
618 	} else {
619 		int (*ih_func)(void *) = virtio_pci_legacy_intr;
620 		if (pci_intr_map_msi(pa, &ih) != 0 && pci_intr_map(pa, &ih) != 0) {
621 			printf("%s: couldn't map interrupt\n", vsc->sc_dev.dv_xname);
622 			goto fail_2;
623 		}
624 		intrstr = pci_intr_string(pc, ih);
625 		/*
626 		 * We always set the IPL_MPSAFE flag in order to do the relatively
627 		 * expensive ISR read without lock, and then grab the kernel lock in
628 		 * the interrupt handler.
629 		 */
630 		if (vsc->sc_ipl & IPL_MPSAFE)
631 			ih_func = virtio_pci_legacy_intr_mpsafe;
632 		sc->sc_ih[0] = pci_intr_establish(pc, ih, vsc->sc_ipl | IPL_MPSAFE,
633 		    ih_func, sc, vsc->sc_dev.dv_xname);
634 		if (sc->sc_ih[0] == NULL) {
635 			printf("%s: couldn't establish interrupt", vsc->sc_dev.dv_xname);
636 			if (intrstr != NULL)
637 				printf(" at %s", intrstr);
638 			printf("\n");
639 			goto fail_2;
640 		}
641 	}
642 	printf("%s: %s\n", vsc->sc_dev.dv_xname, intrstr);
643 
644 	virtio_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
645 	return;
646 
647 fail_2:
648 	config_detach(vsc->sc_child, 0);
649 fail_1:
650 	/* no pci_mapreg_unmap() or pci_intr_unmap() */
651 	virtio_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
652 }
653 
654 int
655 virtio_pci_detach(struct device *self, int flags)
656 {
657 	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)self;
658 	struct virtio_softc *vsc = &sc->sc_sc;
659 	int r;
660 
661 	if (vsc->sc_child != 0 && vsc->sc_child != VIRTIO_CHILD_ERROR) {
662 		r = config_detach(vsc->sc_child, flags);
663 		if (r)
664 			return r;
665 	}
666 	KASSERT(vsc->sc_child == 0 || vsc->sc_child == VIRTIO_CHILD_ERROR);
667 	KASSERT(vsc->sc_vqs == 0);
668 	virtio_pci_free_irqs(sc);
669 	if (sc->sc_iosize)
670 		bus_space_unmap(sc->sc_iot, sc->sc_ioh, sc->sc_iosize);
671 	sc->sc_iosize = 0;
672 
673 	return 0;
674 }
675 
676 int
677 virtio_pci_adjust_config_region(struct virtio_pci_softc *sc)
678 {
679 	if (sc->sc_sc.sc_version_1)
680 		return 0;
681 	sc->sc_devcfg_iosize = sc->sc_iosize - sc->sc_devcfg_offset;
682 	sc->sc_devcfg_iot = sc->sc_iot;
683 	if (bus_space_subregion(sc->sc_iot, sc->sc_ioh, sc->sc_devcfg_offset,
684 	    sc->sc_devcfg_iosize, &sc->sc_devcfg_ioh) != 0) {
685 		printf("%s: can't map config i/o space\n",
686 		    sc->sc_sc.sc_dev.dv_xname);
687 		return 1;
688 	}
689 	return 0;
690 }
691 
692 /*
693  * Feature negotiation.
694  * Prints available / negotiated features if guest_feature_names != NULL and
695  * VIRTIO_DEBUG is 1
696  */
697 int
698 virtio_pci_negotiate_features(struct virtio_softc *vsc,
699     const struct virtio_feature_name *guest_feature_names)
700 {
701 	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
702 	uint64_t host, negotiated;
703 
704 	vsc->sc_active_features = 0;
705 
706 	/*
707 	 * We enable indirect descriptors by default. They can be switched
708 	 * off by setting bit 1 in the driver flags, see config(8)
709 	 */
710 	if (!(vsc->sc_dev.dv_cfdata->cf_flags & VIRTIO_CF_NO_INDIRECT) &&
711 	    !(vsc->sc_child->dv_cfdata->cf_flags & VIRTIO_CF_NO_INDIRECT)) {
712 		vsc->sc_driver_features |= VIRTIO_F_RING_INDIRECT_DESC;
713 	} else if (guest_feature_names != NULL) {
714 		printf(" RingIndirectDesc disabled by UKC");
715 	}
716 
717 	/*
718 	 * The driver must add VIRTIO_F_RING_EVENT_IDX if it supports it.
719 	 * If it did, check if it is disabled by bit 2 in the driver flags.
720 	 */
721 	if ((vsc->sc_driver_features & VIRTIO_F_RING_EVENT_IDX) &&
722 	    ((vsc->sc_dev.dv_cfdata->cf_flags & VIRTIO_CF_NO_EVENT_IDX) ||
723 	    (vsc->sc_child->dv_cfdata->cf_flags & VIRTIO_CF_NO_EVENT_IDX))) {
724 		if (guest_feature_names != NULL)
725 			printf(" RingEventIdx disabled by UKC");
726 		vsc->sc_driver_features &= ~VIRTIO_F_RING_EVENT_IDX;
727 	}
728 
729 	if (vsc->sc_version_1) {
730 		return virtio_pci_negotiate_features_10(vsc,
731 		    guest_feature_names);
732 	}
733 
734 	/* virtio 0.9 only */
735 	host = bus_space_read_4(sc->sc_iot, sc->sc_ioh,
736 				VIRTIO_CONFIG_DEVICE_FEATURES);
737 	negotiated = host & vsc->sc_driver_features;
738 #if VIRTIO_DEBUG
739 	if (guest_feature_names)
740 		virtio_log_features(host, negotiated, guest_feature_names);
741 #endif
742 	bus_space_write_4(sc->sc_iot, sc->sc_ioh,
743 			  VIRTIO_CONFIG_GUEST_FEATURES, negotiated);
744 	vsc->sc_active_features = negotiated;
745 	if (negotiated & VIRTIO_F_RING_INDIRECT_DESC)
746 		vsc->sc_indirect = 1;
747 	else
748 		vsc->sc_indirect = 0;
749 	return 0;
750 }
751 
752 int
753 virtio_pci_negotiate_features_10(struct virtio_softc *vsc,
754     const struct virtio_feature_name *guest_feature_names)
755 {
756 	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
757 	uint64_t host, negotiated;
758 
759 	vsc->sc_driver_features |= VIRTIO_F_VERSION_1;
760 	/* notify on empty is 0.9 only */
761 	vsc->sc_driver_features &= ~VIRTIO_F_NOTIFY_ON_EMPTY;
762 	CWRITE(sc, device_feature_select, 0);
763 	host = CREAD(sc, device_feature);
764 	CWRITE(sc, device_feature_select, 1);
765 	host |= (uint64_t)CREAD(sc, device_feature) << 32;
766 
767 	negotiated = host & vsc->sc_driver_features;
768 #if VIRTIO_DEBUG
769 	if (guest_feature_names)
770 		virtio_log_features(host, negotiated, guest_feature_names);
771 #endif
772 	CWRITE(sc, driver_feature_select, 0);
773 	CWRITE(sc, driver_feature, negotiated & 0xffffffff);
774 	CWRITE(sc, driver_feature_select, 1);
775 	CWRITE(sc, driver_feature, negotiated >> 32);
776 	virtio_pci_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_FEATURES_OK);
777 
778 	if ((CREAD(sc, device_status) &
779 	    VIRTIO_CONFIG_DEVICE_STATUS_FEATURES_OK) == 0) {
780 		printf("%s: Feature negotiation failed\n",
781 		    vsc->sc_dev.dv_xname);
782 		CWRITE(sc, device_status, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
783 		return ENXIO;
784 	}
785 	vsc->sc_active_features = negotiated;
786 
787 	if (negotiated & VIRTIO_F_RING_INDIRECT_DESC)
788 		vsc->sc_indirect = 1;
789 	else
790 		vsc->sc_indirect = 0;
791 
792 	if ((negotiated & VIRTIO_F_VERSION_1) == 0) {
793 #if VIRTIO_DEBUG
794 		printf("%s: Host rejected Version_1\n", __func__);
795 #endif
796 		CWRITE(sc, device_status, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
797 		return EINVAL;
798 	}
799 	return 0;
800 }
801 
802 /*
803  * Device configuration registers.
804  */
805 uint8_t
806 virtio_pci_read_device_config_1(struct virtio_softc *vsc, int index)
807 {
808 	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
809 	return bus_space_read_1(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index);
810 }
811 
812 uint16_t
813 virtio_pci_read_device_config_2(struct virtio_softc *vsc, int index)
814 {
815 	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
816 	return bus_space_read_2(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index);
817 }
818 
819 uint32_t
820 virtio_pci_read_device_config_4(struct virtio_softc *vsc, int index)
821 {
822 	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
823 	return bus_space_read_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index);
824 }
825 
826 uint64_t
827 virtio_pci_read_device_config_8(struct virtio_softc *vsc, int index)
828 {
829 	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
830 	uint64_t r;
831 
832 	r = bus_space_read_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh,
833 	    index + sizeof(uint32_t));
834 	r <<= 32;
835 	r += bus_space_read_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index);
836 	return r;
837 }
838 
839 void
840 virtio_pci_write_device_config_1(struct virtio_softc *vsc, int index,
841     uint8_t value)
842 {
843 	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
844 	bus_space_write_1(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index, value);
845 }
846 
847 void
848 virtio_pci_write_device_config_2(struct virtio_softc *vsc, int index,
849     uint16_t value)
850 {
851 	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
852 	bus_space_write_2(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index, value);
853 }
854 
855 void
856 virtio_pci_write_device_config_4(struct virtio_softc *vsc,
857 			     int index, uint32_t value)
858 {
859 	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
860 	bus_space_write_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index, value);
861 }
862 
863 void
864 virtio_pci_write_device_config_8(struct virtio_softc *vsc,
865 			     int index, uint64_t value)
866 {
867 	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
868 	bus_space_write_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh,
869 	    index, value & 0xffffffff);
870 	bus_space_write_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh,
871 	    index + sizeof(uint32_t), value >> 32);
872 }
873 
874 int
875 virtio_pci_msix_establish(struct virtio_pci_softc *sc,
876     struct pci_attach_args *pa, int idx, int (*handler)(void *), void *ih_arg)
877 {
878 	struct virtio_softc *vsc = &sc->sc_sc;
879 	pci_intr_handle_t ih;
880 
881 	if (pci_intr_map_msix(pa, idx, &ih) != 0) {
882 #if VIRTIO_DEBUG
883 		printf("%s[%d]: pci_intr_map_msix failed\n",
884 		    vsc->sc_dev.dv_xname, idx);
885 #endif
886 		return 1;
887 	}
888 	sc->sc_ih[idx] = pci_intr_establish(sc->sc_pc, ih, vsc->sc_ipl,
889 	    handler, ih_arg, vsc->sc_dev.dv_xname);
890 	if (sc->sc_ih[idx] == NULL) {
891 		printf("%s[%d]: couldn't establish msix interrupt\n",
892 		    vsc->sc_dev.dv_xname, idx);
893 		return 1;
894 	}
895 	return 0;
896 }
897 
898 void
899 virtio_pci_set_msix_queue_vector(struct virtio_pci_softc *sc, uint32_t idx, uint16_t vector)
900 {
901 	if (sc->sc_sc.sc_version_1) {
902 		CWRITE(sc, queue_select, idx);
903 		CWRITE(sc, queue_msix_vector, vector);
904 	} else {
905 		bus_space_write_2(sc->sc_iot, sc->sc_ioh,
906 		    VIRTIO_CONFIG_QUEUE_SELECT, idx);
907 		bus_space_write_2(sc->sc_iot, sc->sc_ioh,
908 		    VIRTIO_MSI_QUEUE_VECTOR, vector);
909 	}
910 }
911 
912 void
913 virtio_pci_set_msix_config_vector(struct virtio_pci_softc *sc, uint16_t vector)
914 {
915 	if (sc->sc_sc.sc_version_1) {
916 		CWRITE(sc, config_msix_vector, vector);
917 	} else {
918 		bus_space_write_2(sc->sc_iot, sc->sc_ioh,
919 		    VIRTIO_MSI_CONFIG_VECTOR, vector);
920 	}
921 }
922 
923 
924 void
925 virtio_pci_free_irqs(struct virtio_pci_softc *sc)
926 {
927 	struct virtio_softc *vsc = &sc->sc_sc;
928 	int i;
929 
930 	if (sc->sc_devcfg_offset == VIRTIO_CONFIG_DEVICE_CONFIG_MSI) {
931 		for (i = 0; i < vsc->sc_nvqs; i++) {
932 			virtio_pci_set_msix_queue_vector(sc, i,
933 			    VIRTIO_MSI_NO_VECTOR);
934 		}
935 	}
936 
937 	for (i = 0; i < MAX_MSIX_VECS; i++) {
938 		if (sc->sc_ih[i]) {
939 			pci_intr_disestablish(sc->sc_pc, sc->sc_ih[i]);
940 			sc->sc_ih[i] = NULL;
941 		}
942 	}
943 
944 	sc->sc_devcfg_offset = VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI;
945 	virtio_pci_adjust_config_region(sc);
946 }
947 
948 int
949 virtio_pci_setup_msix(struct virtio_pci_softc *sc, struct pci_attach_args *pa,
950     int shared)
951 {
952 	struct virtio_softc *vsc = &sc->sc_sc;
953 	int i;
954 
955 	if (virtio_pci_msix_establish(sc, pa, 0, virtio_pci_config_intr, vsc))
956 		return 1;
957 	sc->sc_devcfg_offset = VIRTIO_CONFIG_DEVICE_CONFIG_MSI;
958 	virtio_pci_adjust_config_region(sc);
959 	virtio_pci_set_msix_config_vector(sc, 0);
960 
961 	if (shared) {
962 		if (virtio_pci_msix_establish(sc, pa, 1,
963 		    virtio_pci_shared_queue_intr, vsc)) {
964 			goto fail;
965 		}
966 
967 		for (i = 0; i < vsc->sc_nvqs; i++)
968 			virtio_pci_set_msix_queue_vector(sc, i, 1);
969 	} else {
970 		for (i = 0; i <= vsc->sc_nvqs; i++) {
971 			if (virtio_pci_msix_establish(sc, pa, i + 1,
972 			    virtio_pci_queue_intr, &vsc->sc_vqs[i])) {
973 				goto fail;
974 			}
975 			virtio_pci_set_msix_queue_vector(sc, i, i + 1);
976 		}
977 	}
978 
979 	return 0;
980 fail:
981 	virtio_pci_free_irqs(sc);
982 	return 1;
983 }
984 
985 /*
986  * Interrupt handler.
987  */
988 
989 /*
990  * Only used without MSI-X
991  */
992 int
993 virtio_pci_legacy_intr(void *arg)
994 {
995 	struct virtio_pci_softc *sc = arg;
996 	struct virtio_softc *vsc = &sc->sc_sc;
997 	int isr, r = 0;
998 
999 	/* check and ack the interrupt */
1000 	isr = bus_space_read_1(sc->sc_isr_iot, sc->sc_isr_ioh, 0);
1001 	if (isr == 0)
1002 		return 0;
1003 	KERNEL_LOCK();
1004 	if ((isr & VIRTIO_CONFIG_ISR_CONFIG_CHANGE) &&
1005 	    (vsc->sc_config_change != NULL)) {
1006 		r = (vsc->sc_config_change)(vsc);
1007 	}
1008 	r |= virtio_check_vqs(vsc);
1009 	KERNEL_UNLOCK();
1010 
1011 	return r;
1012 }
1013 
1014 int
1015 virtio_pci_legacy_intr_mpsafe(void *arg)
1016 {
1017 	struct virtio_pci_softc *sc = arg;
1018 	struct virtio_softc *vsc = &sc->sc_sc;
1019 	int isr, r = 0;
1020 
1021 	/* check and ack the interrupt */
1022 	isr = bus_space_read_1(sc->sc_isr_iot, sc->sc_isr_ioh, 0);
1023 	if (isr == 0)
1024 		return 0;
1025 	if ((isr & VIRTIO_CONFIG_ISR_CONFIG_CHANGE) &&
1026 	    (vsc->sc_config_change != NULL)) {
1027 		r = (vsc->sc_config_change)(vsc);
1028 	}
1029 	r |= virtio_check_vqs(vsc);
1030 	return r;
1031 }
1032 
1033 /*
1034  * Only used with MSI-X
1035  */
1036 int
1037 virtio_pci_config_intr(void *arg)
1038 {
1039 	struct virtio_softc *vsc = arg;
1040 
1041 	if (vsc->sc_config_change != NULL)
1042 		return vsc->sc_config_change(vsc);
1043 	return 0;
1044 }
1045 
1046 /*
1047  * Only used with MSI-X
1048  */
1049 int
1050 virtio_pci_queue_intr(void *arg)
1051 {
1052 	struct virtqueue *vq = arg;
1053 
1054 	if (vq->vq_done)
1055 		return (vq->vq_done)(vq);
1056 	return 0;
1057 }
1058 
1059 int
1060 virtio_pci_shared_queue_intr(void *arg)
1061 {
1062 	struct virtio_softc *vsc = arg;
1063 
1064 	return virtio_check_vqs(vsc);
1065 }
1066 
1067 /*
1068  * Interrupt handler to be used when polling.
1069  * We cannot use isr here because it is not defined in MSI-X mode.
1070  */
1071 int
1072 virtio_pci_poll_intr(void *arg)
1073 {
1074 	struct virtio_pci_softc *sc = arg;
1075 	struct virtio_softc *vsc = &sc->sc_sc;
1076 	int r = 0;
1077 
1078 	if (vsc->sc_config_change != NULL)
1079 		r = (vsc->sc_config_change)(vsc);
1080 
1081 	r |= virtio_check_vqs(vsc);
1082 
1083 	return r;
1084 }
1085 
1086 void
1087 virtio_pci_kick(struct virtio_softc *vsc, uint16_t idx)
1088 {
1089 	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
1090 	unsigned offset = 0;
1091 	if (vsc->sc_version_1) {
1092 		offset = vsc->sc_vqs[idx].vq_notify_off *
1093 		    sc->sc_notify_off_multiplier;
1094 	}
1095 	bus_space_write_2(sc->sc_notify_iot, sc->sc_notify_ioh, offset, idx);
1096 }
1097