xref: /openbsd-src/sys/dev/pci/virtio_pci.c (revision 46035553bfdd96e63c94e32da0210227ec2e3cf1)
1 /*	$OpenBSD: virtio_pci.c,v 1.28 2019/05/27 15:55:01 sf Exp $	*/
2 /*	$NetBSD: virtio.c,v 1.3 2011/11/02 23:05:52 njoly Exp $	*/
3 
4 /*
5  * Copyright (c) 2012 Stefan Fritsch.
6  * Copyright (c) 2010 Minoura Makoto.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/device.h>
33 #include <sys/mutex.h>
34 
35 #include <dev/pci/pcidevs.h>
36 #include <dev/pci/pcireg.h>
37 #include <dev/pci/pcivar.h>
38 #include <dev/pci/virtio_pcireg.h>
39 
40 #include <dev/pv/virtioreg.h>
41 #include <dev/pv/virtiovar.h>
42 #include <dev/pci/virtio_pcireg.h>
43 
44 #define DNPRINTF(n,x...)				\
45     do { if (VIRTIO_DEBUG >= n) printf(x); } while(0)
46 
47 
48 /*
49  * XXX: Before being used on big endian arches, the access to config registers
50  * XXX: needs to be reviewed/fixed. The non-device specific registers are
51  * XXX: PCI-endian while the device specific registers are native endian.
52  */
53 
54 #define MAX_MSIX_VECS	8
55 
56 struct virtio_pci_softc;
57 
58 int		virtio_pci_match(struct device *, void *, void *);
59 void		virtio_pci_attach(struct device *, struct device *, void *);
60 int		virtio_pci_attach_09(struct virtio_pci_softc *sc, struct pci_attach_args *pa);
61 int		virtio_pci_attach_10(struct virtio_pci_softc *sc, struct pci_attach_args *pa);
62 int		virtio_pci_detach(struct device *, int);
63 
64 void		virtio_pci_kick(struct virtio_softc *, uint16_t);
65 int		virtio_pci_adjust_config_region(struct virtio_pci_softc *);
66 uint8_t		virtio_pci_read_device_config_1(struct virtio_softc *, int);
67 uint16_t	virtio_pci_read_device_config_2(struct virtio_softc *, int);
68 uint32_t	virtio_pci_read_device_config_4(struct virtio_softc *, int);
69 uint64_t	virtio_pci_read_device_config_8(struct virtio_softc *, int);
70 void		virtio_pci_write_device_config_1(struct virtio_softc *, int, uint8_t);
71 void		virtio_pci_write_device_config_2(struct virtio_softc *, int, uint16_t);
72 void		virtio_pci_write_device_config_4(struct virtio_softc *, int, uint32_t);
73 void		virtio_pci_write_device_config_8(struct virtio_softc *, int, uint64_t);
74 uint16_t	virtio_pci_read_queue_size(struct virtio_softc *, uint16_t);
75 void		virtio_pci_setup_queue(struct virtio_softc *, struct virtqueue *, uint64_t);
76 void		virtio_pci_set_status(struct virtio_softc *, int);
77 int		virtio_pci_negotiate_features(struct virtio_softc *, const struct virtio_feature_name *);
78 int		virtio_pci_negotiate_features_10(struct virtio_softc *, const struct virtio_feature_name *);
79 void		virtio_pci_set_msix_queue_vector(struct virtio_pci_softc *, uint32_t, uint16_t);
80 void		virtio_pci_set_msix_config_vector(struct virtio_pci_softc *, uint16_t);
81 int		virtio_pci_msix_establish(struct virtio_pci_softc *, struct pci_attach_args *, int, int (*)(void *), void *);
82 int		virtio_pci_setup_msix(struct virtio_pci_softc *, struct pci_attach_args *, int);
83 void		virtio_pci_free_irqs(struct virtio_pci_softc *);
84 int		virtio_pci_poll_intr(void *);
85 int		virtio_pci_legacy_intr(void *);
86 int		virtio_pci_legacy_intr_mpsafe(void *);
87 int		virtio_pci_config_intr(void *);
88 int		virtio_pci_queue_intr(void *);
89 int		virtio_pci_shared_queue_intr(void *);
90 int		virtio_pci_find_cap(struct virtio_pci_softc *sc, int cfg_type, void *buf, int buflen);
91 #if VIRTIO_DEBUG
92 void virtio_pci_dump_caps(struct virtio_pci_softc *sc);
93 #endif
94 
95 enum irq_type {
96 	IRQ_NO_MSIX,
97 	IRQ_MSIX_SHARED, /* vec 0: config irq, vec 1 shared by all vqs */
98 	IRQ_MSIX_PER_VQ, /* vec 0: config irq, vec n: irq of vq[n-1] */
99 };
100 
101 struct virtio_pci_softc {
102 	struct virtio_softc	sc_sc;
103 	pci_chipset_tag_t	sc_pc;
104 	pcitag_t		sc_ptag;
105 
106 	bus_space_tag_t		sc_iot;
107 	bus_space_handle_t	sc_ioh;
108 	bus_size_t		sc_iosize;
109 
110 	bus_space_tag_t		sc_bars_iot[4];
111 	bus_space_handle_t	sc_bars_ioh[4];
112 	bus_size_t		sc_bars_iosize[4];
113 
114 	bus_space_tag_t		sc_notify_iot;
115 	bus_space_handle_t	sc_notify_ioh;
116 	bus_size_t		sc_notify_iosize;
117 	unsigned int		sc_notify_off_multiplier;
118 
119 	bus_space_tag_t		sc_devcfg_iot;
120 	bus_space_handle_t	sc_devcfg_ioh;
121 	bus_size_t		sc_devcfg_iosize;
122 	/*
123 	 * With 0.9, the offset of the devcfg region in the io bar changes
124 	 * depending on MSI-X being enabled or not.
125 	 * With 1.0, this field is still used to remember if MSI-X is enabled
126 	 * or not.
127 	 */
128 	unsigned int		sc_devcfg_offset;
129 
130 	bus_space_tag_t		sc_isr_iot;
131 	bus_space_handle_t	sc_isr_ioh;
132 	bus_size_t		sc_isr_iosize;
133 
134 	void			*sc_ih[MAX_MSIX_VECS];
135 
136 	enum irq_type		sc_irq_type;
137 };
138 
139 struct cfattach virtio_pci_ca = {
140 	sizeof(struct virtio_pci_softc),
141 	virtio_pci_match,
142 	virtio_pci_attach,
143 	virtio_pci_detach,
144 	NULL
145 };
146 
147 struct virtio_ops virtio_pci_ops = {
148 	virtio_pci_kick,
149 	virtio_pci_read_device_config_1,
150 	virtio_pci_read_device_config_2,
151 	virtio_pci_read_device_config_4,
152 	virtio_pci_read_device_config_8,
153 	virtio_pci_write_device_config_1,
154 	virtio_pci_write_device_config_2,
155 	virtio_pci_write_device_config_4,
156 	virtio_pci_write_device_config_8,
157 	virtio_pci_read_queue_size,
158 	virtio_pci_setup_queue,
159 	virtio_pci_set_status,
160 	virtio_pci_negotiate_features,
161 	virtio_pci_poll_intr,
162 };
163 
164 static inline
165 uint64_t _cread(struct virtio_pci_softc *sc, unsigned off, unsigned size)
166 {
167 	uint64_t val;
168 	switch (size) {
169 	case 1:
170 		val = bus_space_read_1(sc->sc_iot, sc->sc_ioh, off);
171 		break;
172 	case 2:
173 		val = bus_space_read_2(sc->sc_iot, sc->sc_ioh, off);
174 		break;
175 	case 4:
176 		val = bus_space_read_4(sc->sc_iot, sc->sc_ioh, off);
177 		break;
178 	case 8:
179 		val = bus_space_read_4(sc->sc_iot, sc->sc_ioh,
180 		    off + sizeof(uint32_t));
181 		val <<= 32;
182 		val += bus_space_read_4(sc->sc_iot, sc->sc_ioh, off);
183 		break;
184 	}
185 	return val;
186 }
187 
188 #define CREAD(sc, memb)  _cread(sc, offsetof(struct virtio_pci_common_cfg, memb), \
189     sizeof(((struct virtio_pci_common_cfg *)0)->memb))
190 
191 #define CWRITE(sc, memb, val)							\
192 	do {									\
193 		struct virtio_pci_common_cfg c;					\
194 		size_t off = offsetof(struct virtio_pci_common_cfg, memb);	\
195 		size_t size = sizeof(c.memb);					\
196 										\
197 		DNPRINTF(2, "%s: %d: off %#zx size %#zx write %#llx\n",		\
198 		    __func__, __LINE__, off, size, (unsigned long long)val);	\
199 		switch (size) {							\
200 		case 1:								\
201 			bus_space_write_1(sc->sc_iot, sc->sc_ioh, off, val);	\
202 			break;							\
203 		case 2:								\
204 			bus_space_write_2(sc->sc_iot, sc->sc_ioh, off, val);	\
205 			break;							\
206 		case 4:								\
207 			bus_space_write_4(sc->sc_iot, sc->sc_ioh, off, val);	\
208 			break;							\
209 		case 8:								\
210 			bus_space_write_4(sc->sc_iot, sc->sc_ioh, off,		\
211 			    (val) & 0xffffffff);				\
212 			bus_space_write_4(sc->sc_iot, sc->sc_ioh,		\
213 			    (off) + sizeof(uint32_t), (uint64_t)(val) >> 32);	\
214 			break;							\
215 		}								\
216 	} while (0)
217 
218 uint16_t
219 virtio_pci_read_queue_size(struct virtio_softc *vsc, uint16_t idx)
220 {
221 	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
222 	uint16_t ret;
223 	if (sc->sc_sc.sc_version_1) {
224 		CWRITE(sc, queue_select, idx);
225 		ret = CREAD(sc, queue_size);
226 	} else {
227 		bus_space_write_2(sc->sc_iot, sc->sc_ioh,
228 		    VIRTIO_CONFIG_QUEUE_SELECT, idx);
229 		ret = bus_space_read_2(sc->sc_iot, sc->sc_ioh,
230 		    VIRTIO_CONFIG_QUEUE_SIZE);
231 	}
232 	return ret;
233 }
234 
235 void
236 virtio_pci_setup_queue(struct virtio_softc *vsc, struct virtqueue *vq,
237     uint64_t addr)
238 {
239 	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
240 	if (sc->sc_sc.sc_version_1) {
241 		CWRITE(sc, queue_select, vq->vq_index);
242 		if (addr == 0) {
243 			CWRITE(sc, queue_enable, 0);
244 			CWRITE(sc, queue_desc, 0);
245 			CWRITE(sc, queue_avail, 0);
246 			CWRITE(sc, queue_used, 0);
247 		} else {
248 			CWRITE(sc, queue_desc, addr);
249 			CWRITE(sc, queue_avail, addr + vq->vq_availoffset);
250 			CWRITE(sc, queue_used, addr + vq->vq_usedoffset);
251 			CWRITE(sc, queue_enable, 1);
252 			vq->vq_notify_off = CREAD(sc, queue_notify_off);
253 		}
254 	} else {
255 		bus_space_write_2(sc->sc_iot, sc->sc_ioh,
256 		    VIRTIO_CONFIG_QUEUE_SELECT, vq->vq_index);
257 		bus_space_write_4(sc->sc_iot, sc->sc_ioh,
258 		    VIRTIO_CONFIG_QUEUE_ADDRESS, addr / VIRTIO_PAGE_SIZE);
259 	}
260 
261 	/*
262 	 * This path is only executed if this function is called after
263 	 * the child's attach function has finished. In other cases,
264 	 * it's done in virtio_pci_setup_msix().
265 	 */
266 	if (sc->sc_irq_type != IRQ_NO_MSIX) {
267 		int vec = 1;
268 		if (sc->sc_irq_type == IRQ_MSIX_PER_VQ)
269 		       vec += vq->vq_index;
270 		if (sc->sc_sc.sc_version_1) {
271 			CWRITE(sc, queue_msix_vector, vec);
272 		} else {
273 			bus_space_write_2(sc->sc_iot, sc->sc_ioh,
274 			    VIRTIO_MSI_QUEUE_VECTOR, vec);
275 		}
276 	}
277 }
278 
279 void
280 virtio_pci_set_status(struct virtio_softc *vsc, int status)
281 {
282 	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
283 	int old = 0;
284 
285 	if (sc->sc_sc.sc_version_1) {
286 		if (status != 0)
287 			old = CREAD(sc, device_status);
288 		CWRITE(sc, device_status, status|old);
289 	} else {
290 		if (status != 0)
291 			old = bus_space_read_1(sc->sc_iot, sc->sc_ioh,
292 			    VIRTIO_CONFIG_DEVICE_STATUS);
293 		bus_space_write_1(sc->sc_iot, sc->sc_ioh,
294 		    VIRTIO_CONFIG_DEVICE_STATUS, status|old);
295 	}
296 }
297 
298 int
299 virtio_pci_match(struct device *parent, void *match, void *aux)
300 {
301 	struct pci_attach_args *pa;
302 
303 	pa = (struct pci_attach_args *)aux;
304 	if (PCI_VENDOR(pa->pa_id) == PCI_VENDOR_OPENBSD &&
305 	    PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_OPENBSD_CONTROL)
306 		return 1;
307 	if (PCI_VENDOR(pa->pa_id) != PCI_VENDOR_QUMRANET)
308 		return 0;
309 	/* virtio 0.9 */
310 	if (PCI_PRODUCT(pa->pa_id) >= 0x1000 &&
311 	    PCI_PRODUCT(pa->pa_id) <= 0x103f &&
312 	    PCI_REVISION(pa->pa_class) == 0)
313 		return 1;
314 	/* virtio 1.0 */
315 	if (PCI_PRODUCT(pa->pa_id) >= 0x1040 &&
316 	    PCI_PRODUCT(pa->pa_id) <= 0x107f &&
317 	    PCI_REVISION(pa->pa_class) == 1)
318 		return 1;
319 	return 0;
320 }
321 
322 #if VIRTIO_DEBUG
323 void
324 virtio_pci_dump_caps(struct virtio_pci_softc *sc)
325 {
326 	pci_chipset_tag_t pc = sc->sc_pc;
327 	pcitag_t tag = sc->sc_ptag;
328 	int offset;
329 	union {
330 		pcireg_t reg[4];
331 		struct virtio_pci_cap vcap;
332 	} v;
333 
334 	if (!pci_get_capability(pc, tag, PCI_CAP_VENDSPEC, &offset, &v.reg[0]))
335 		return;
336 
337 	printf("\n");
338 	do {
339 		for (int i = 0; i < 4; i++)
340 			v.reg[i] = pci_conf_read(pc, tag, offset + i * 4);
341 		printf("%s: cfgoff %#x len %#x type %#x bar %#x: off %#x len %#x\n",
342 			__func__, offset, v.vcap.cap_len, v.vcap.cfg_type, v.vcap.bar,
343 			v.vcap.offset, v.vcap.length);
344 		offset = v.vcap.cap_next;
345 	} while (offset != 0);
346 }
347 #endif
348 
349 int
350 virtio_pci_find_cap(struct virtio_pci_softc *sc, int cfg_type, void *buf, int buflen)
351 {
352 	pci_chipset_tag_t pc = sc->sc_pc;
353 	pcitag_t tag = sc->sc_ptag;
354 	unsigned int offset, i, len;
355 	union {
356 		pcireg_t reg[8];
357 		struct virtio_pci_cap vcap;
358 	} *v = buf;
359 
360 	if (buflen < sizeof(struct virtio_pci_cap))
361 		return ERANGE;
362 
363 	if (!pci_get_capability(pc, tag, PCI_CAP_VENDSPEC, &offset, &v->reg[0]))
364 		return ENOENT;
365 
366 	do {
367 		for (i = 0; i < 4; i++)
368 			v->reg[i] = pci_conf_read(pc, tag, offset + i * 4);
369 		if (v->vcap.cfg_type == cfg_type)
370 			break;
371 		offset = v->vcap.cap_next;
372 	} while (offset != 0);
373 
374 	if (offset == 0)
375 		return ENOENT;
376 
377 	if (v->vcap.cap_len > sizeof(struct virtio_pci_cap)) {
378 		len = roundup(v->vcap.cap_len, sizeof(pcireg_t));
379 		if (len > buflen) {
380 			printf("%s: cap too large\n", __func__);
381 			return ERANGE;
382 		}
383 		for (i = 4; i < len / sizeof(pcireg_t);  i++)
384 			v->reg[i] = pci_conf_read(pc, tag, offset + i * 4);
385 	}
386 
387 	return 0;
388 }
389 
390 
391 #define NMAPREG		((PCI_MAPREG_END - PCI_MAPREG_START) / \
392 				sizeof(pcireg_t))
393 
394 int
395 virtio_pci_attach_10(struct virtio_pci_softc *sc, struct pci_attach_args *pa)
396 {
397 	struct virtio_pci_cap common, isr, device;
398 	struct virtio_pci_notify_cap notify;
399 	int have_device_cfg = 0;
400 	bus_size_t bars[NMAPREG] = { 0 };
401 	int bars_idx[NMAPREG] = { 0 };
402 	struct virtio_pci_cap *caps[] = { &common, &isr, &device, &notify.cap };
403 	int i, j = 0, ret = 0;
404 
405 	if (virtio_pci_find_cap(sc, VIRTIO_PCI_CAP_COMMON_CFG, &common, sizeof(common)) != 0)
406 		return ENODEV;
407 
408 	if (virtio_pci_find_cap(sc, VIRTIO_PCI_CAP_NOTIFY_CFG, &notify, sizeof(notify)) != 0)
409 		return ENODEV;
410 	if (virtio_pci_find_cap(sc, VIRTIO_PCI_CAP_ISR_CFG, &isr, sizeof(isr)) != 0)
411 		return ENODEV;
412 	if (virtio_pci_find_cap(sc, VIRTIO_PCI_CAP_DEVICE_CFG, &device, sizeof(device)) != 0)
413 		memset(&device, 0, sizeof(device));
414 	else
415 		have_device_cfg = 1;
416 
417 	/*
418 	 * XXX Maybe there are devices that offer the pci caps but not the
419 	 * XXX VERSION_1 feature bit? Then we should check the feature bit
420 	 * XXX here and fall back to 0.9 out if not present.
421 	 */
422 
423 	/* Figure out which bars we need to map */
424 	for (i = 0; i < nitems(caps); i++) {
425 		int bar = caps[i]->bar;
426 		bus_size_t len = caps[i]->offset + caps[i]->length;
427 		if (caps[i]->length == 0)
428 			continue;
429 		if (bars[bar] < len)
430 			bars[bar] = len;
431 	}
432 
433 	for (i = 0; i < nitems(bars); i++) {
434 		int reg;
435 		pcireg_t type;
436 		if (bars[i] == 0)
437 			continue;
438 		reg = PCI_MAPREG_START + i * 4;
439 		type = pci_mapreg_type(sc->sc_pc, sc->sc_ptag, reg);
440 		if (pci_mapreg_map(pa, reg, type, 0, &sc->sc_bars_iot[j],
441 		    &sc->sc_bars_ioh[j], NULL, &sc->sc_bars_iosize[j],
442 		    bars[i])) {
443 			printf("%s: can't map bar %u \n",
444 			    sc->sc_sc.sc_dev.dv_xname, i);
445 			ret = EIO;
446 			goto err;
447 		}
448 		bars_idx[i] = j;
449 		j++;
450 	}
451 
452 	i = bars_idx[notify.cap.bar];
453 	if (bus_space_subregion(sc->sc_bars_iot[i], sc->sc_bars_ioh[i],
454 	    notify.cap.offset, notify.cap.length, &sc->sc_notify_ioh) != 0) {
455 		printf("%s: can't map notify i/o space\n",
456 		    sc->sc_sc.sc_dev.dv_xname);
457 		ret = EIO;
458 		goto err;
459 	}
460 	sc->sc_notify_iosize = notify.cap.length;
461 	sc->sc_notify_iot = sc->sc_bars_iot[i];
462 	sc->sc_notify_off_multiplier = notify.notify_off_multiplier;
463 
464 	if (have_device_cfg) {
465 		i = bars_idx[device.bar];
466 		if (bus_space_subregion(sc->sc_bars_iot[i], sc->sc_bars_ioh[i],
467 		    device.offset, device.length, &sc->sc_devcfg_ioh) != 0) {
468 			printf("%s: can't map devcfg i/o space\n",
469 			    sc->sc_sc.sc_dev.dv_xname);
470 			ret = EIO;
471 			goto err;
472 		}
473 		sc->sc_devcfg_iosize = device.length;
474 		sc->sc_devcfg_iot = sc->sc_bars_iot[i];
475 	}
476 
477 	i = bars_idx[isr.bar];
478 	if (bus_space_subregion(sc->sc_bars_iot[i], sc->sc_bars_ioh[i],
479 	    isr.offset, isr.length, &sc->sc_isr_ioh) != 0) {
480 		printf("%s: can't map isr i/o space\n",
481 		    sc->sc_sc.sc_dev.dv_xname);
482 		ret = EIO;
483 		goto err;
484 	}
485 	sc->sc_isr_iosize = isr.length;
486 	sc->sc_isr_iot = sc->sc_bars_iot[i];
487 
488 	i = bars_idx[common.bar];
489 	if (bus_space_subregion(sc->sc_bars_iot[i], sc->sc_bars_ioh[i],
490 	    common.offset, common.length, &sc->sc_ioh) != 0) {
491 		printf("%s: can't map common i/o space\n",
492 		    sc->sc_sc.sc_dev.dv_xname);
493 		ret = EIO;
494 		goto err;
495 	}
496 	sc->sc_iosize = common.length;
497 	sc->sc_iot = sc->sc_bars_iot[i];
498 
499 	sc->sc_sc.sc_version_1 = 1;
500 	return 0;
501 
502 err:
503 	/* there is no pci_mapreg_unmap() */
504 	return ret;
505 }
506 
507 int
508 virtio_pci_attach_09(struct virtio_pci_softc *sc, struct pci_attach_args *pa)
509 {
510 	struct virtio_softc *vsc = &sc->sc_sc;
511 	if (pci_mapreg_map(pa, PCI_MAPREG_START, PCI_MAPREG_TYPE_IO, 0,
512 	    &sc->sc_iot, &sc->sc_ioh, NULL, &sc->sc_iosize, 0)) {
513 		printf("%s: can't map i/o space\n", vsc->sc_dev.dv_xname);
514 		return EIO;
515 	}
516 
517 	if (bus_space_subregion(sc->sc_iot, sc->sc_ioh,
518 	    VIRTIO_CONFIG_QUEUE_NOTIFY, 2, &sc->sc_notify_ioh) != 0) {
519 		printf("%s: can't map notify i/o space\n",
520 		    vsc->sc_dev.dv_xname);
521 		return EIO;
522 	}
523 	sc->sc_notify_iosize = 2;
524 	sc->sc_notify_iot = sc->sc_iot;
525 
526 	if (bus_space_subregion(sc->sc_iot, sc->sc_ioh,
527 	    VIRTIO_CONFIG_ISR_STATUS, 1, &sc->sc_isr_ioh) != 0) {
528 		printf("%s: can't map isr i/o space\n",
529 		    vsc->sc_dev.dv_xname);
530 		return EIO;
531 	}
532 	sc->sc_isr_iosize = 1;
533 	sc->sc_isr_iot = sc->sc_iot;
534 
535 	return 0;
536 }
537 
538 void
539 virtio_pci_attach(struct device *parent, struct device *self, void *aux)
540 {
541 	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)self;
542 	struct virtio_softc *vsc = &sc->sc_sc;
543 	struct pci_attach_args *pa = (struct pci_attach_args *)aux;
544 	pci_chipset_tag_t pc = pa->pa_pc;
545 	pcitag_t tag = pa->pa_tag;
546 	int revision, ret = ENODEV;
547 	pcireg_t id;
548 	char const *intrstr;
549 	pci_intr_handle_t ih;
550 
551 	revision = PCI_REVISION(pa->pa_class);
552 	switch (revision) {
553 	case 0:
554 		/* subsystem ID shows what I am */
555 		id = PCI_PRODUCT(pci_conf_read(pc, tag, PCI_SUBSYS_ID_REG));
556 		break;
557 	case 1:
558 		id = PCI_PRODUCT(pa->pa_id) - 0x1040;
559 		break;
560 	default:
561 		printf("unknown revision 0x%02x; giving up\n", revision);
562 		return;
563 	}
564 
565 	sc->sc_pc = pc;
566 	sc->sc_ptag = pa->pa_tag;
567 	vsc->sc_dmat = pa->pa_dmat;
568 
569 	/*
570 	 * For virtio, ignore normal MSI black/white-listing depending on the
571 	 * PCI bridge but enable it unconditionally.
572 	 */
573 	pa->pa_flags |= PCI_FLAGS_MSI_ENABLED;
574 
575 #if VIRTIO_DEBUG
576 	virtio_pci_dump_caps(sc);
577 #endif
578 
579 	vsc->sc_ops = &virtio_pci_ops;
580 	if ((vsc->sc_dev.dv_cfdata->cf_flags & VIRTIO_CF_NO_VERSION_1) == 0 &&
581 	    (revision == 1 ||
582 	     (vsc->sc_dev.dv_cfdata->cf_flags & VIRTIO_CF_PREFER_VERSION_1))) {
583 		ret = virtio_pci_attach_10(sc, pa);
584 	}
585 	if (ret != 0 && revision == 0) {
586 		/* revision 0 means 0.9 only or both 0.9 and 1.0 */
587 		ret = virtio_pci_attach_09(sc, pa);
588 	}
589 	if (ret != 0) {
590 		printf(": Cannot attach (%d)\n", ret);
591 		return;
592 	}
593 
594 	sc->sc_devcfg_offset = VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI;
595 	sc->sc_irq_type = IRQ_NO_MSIX;
596 	if (virtio_pci_adjust_config_region(sc) != 0)
597 		return;
598 
599 	virtio_device_reset(vsc);
600 	virtio_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_ACK);
601 	virtio_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER);
602 
603 	printf("\n");
604 	vsc->sc_childdevid = id;
605 	vsc->sc_child = NULL;
606 	config_found(self, sc, NULL);
607 	if (vsc->sc_child == NULL) {
608 		printf("%s: no matching child driver; not configured\n",
609 		    vsc->sc_dev.dv_xname);
610 		goto fail_1;
611 	}
612 	if (vsc->sc_child == VIRTIO_CHILD_ERROR) {
613 		printf("%s: virtio configuration failed\n",
614 		    vsc->sc_dev.dv_xname);
615 		goto fail_1;
616 	}
617 
618 	if (virtio_pci_setup_msix(sc, pa, 0) == 0) {
619 		sc->sc_irq_type = IRQ_MSIX_PER_VQ;
620 		intrstr = "msix per-VQ";
621 	} else if (virtio_pci_setup_msix(sc, pa, 1) == 0) {
622 		sc->sc_irq_type = IRQ_MSIX_SHARED;
623 		intrstr = "msix shared";
624 	} else {
625 		int (*ih_func)(void *) = virtio_pci_legacy_intr;
626 		if (pci_intr_map_msi(pa, &ih) != 0 && pci_intr_map(pa, &ih) != 0) {
627 			printf("%s: couldn't map interrupt\n", vsc->sc_dev.dv_xname);
628 			goto fail_2;
629 		}
630 		intrstr = pci_intr_string(pc, ih);
631 		/*
632 		 * We always set the IPL_MPSAFE flag in order to do the relatively
633 		 * expensive ISR read without lock, and then grab the kernel lock in
634 		 * the interrupt handler.
635 		 */
636 		if (vsc->sc_ipl & IPL_MPSAFE)
637 			ih_func = virtio_pci_legacy_intr_mpsafe;
638 		sc->sc_ih[0] = pci_intr_establish(pc, ih, vsc->sc_ipl | IPL_MPSAFE,
639 		    ih_func, sc, vsc->sc_dev.dv_xname);
640 		if (sc->sc_ih[0] == NULL) {
641 			printf("%s: couldn't establish interrupt", vsc->sc_dev.dv_xname);
642 			if (intrstr != NULL)
643 				printf(" at %s", intrstr);
644 			printf("\n");
645 			goto fail_2;
646 		}
647 	}
648 	printf("%s: %s\n", vsc->sc_dev.dv_xname, intrstr);
649 
650 	virtio_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
651 	return;
652 
653 fail_2:
654 	config_detach(vsc->sc_child, 0);
655 fail_1:
656 	/* no pci_mapreg_unmap() or pci_intr_unmap() */
657 	virtio_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
658 }
659 
660 int
661 virtio_pci_detach(struct device *self, int flags)
662 {
663 	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)self;
664 	struct virtio_softc *vsc = &sc->sc_sc;
665 	int r;
666 
667 	if (vsc->sc_child != 0 && vsc->sc_child != VIRTIO_CHILD_ERROR) {
668 		r = config_detach(vsc->sc_child, flags);
669 		if (r)
670 			return r;
671 	}
672 	KASSERT(vsc->sc_child == 0 || vsc->sc_child == VIRTIO_CHILD_ERROR);
673 	KASSERT(vsc->sc_vqs == 0);
674 	virtio_pci_free_irqs(sc);
675 	if (sc->sc_iosize)
676 		bus_space_unmap(sc->sc_iot, sc->sc_ioh, sc->sc_iosize);
677 	sc->sc_iosize = 0;
678 
679 	return 0;
680 }
681 
682 int
683 virtio_pci_adjust_config_region(struct virtio_pci_softc *sc)
684 {
685 	if (sc->sc_sc.sc_version_1)
686 		return 0;
687 	sc->sc_devcfg_iosize = sc->sc_iosize - sc->sc_devcfg_offset;
688 	sc->sc_devcfg_iot = sc->sc_iot;
689 	if (bus_space_subregion(sc->sc_iot, sc->sc_ioh, sc->sc_devcfg_offset,
690 	    sc->sc_devcfg_iosize, &sc->sc_devcfg_ioh) != 0) {
691 		printf("%s: can't map config i/o space\n",
692 		    sc->sc_sc.sc_dev.dv_xname);
693 		return 1;
694 	}
695 	return 0;
696 }
697 
698 /*
699  * Feature negotiation.
700  * Prints available / negotiated features if guest_feature_names != NULL and
701  * VIRTIO_DEBUG is 1
702  */
703 int
704 virtio_pci_negotiate_features(struct virtio_softc *vsc,
705     const struct virtio_feature_name *guest_feature_names)
706 {
707 	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
708 	uint64_t host, negotiated;
709 
710 	vsc->sc_active_features = 0;
711 
712 	/*
713 	 * We enable indirect descriptors by default. They can be switched
714 	 * off by setting bit 1 in the driver flags, see config(8)
715 	 */
716 	if (!(vsc->sc_dev.dv_cfdata->cf_flags & VIRTIO_CF_NO_INDIRECT) &&
717 	    !(vsc->sc_child->dv_cfdata->cf_flags & VIRTIO_CF_NO_INDIRECT)) {
718 		vsc->sc_driver_features |= VIRTIO_F_RING_INDIRECT_DESC;
719 	} else if (guest_feature_names != NULL) {
720 		printf(" RingIndirectDesc disabled by UKC");
721 	}
722 
723 	/*
724 	 * The driver must add VIRTIO_F_RING_EVENT_IDX if it supports it.
725 	 * If it did, check if it is disabled by bit 2 in the driver flags.
726 	 */
727 	if ((vsc->sc_driver_features & VIRTIO_F_RING_EVENT_IDX) &&
728 	    ((vsc->sc_dev.dv_cfdata->cf_flags & VIRTIO_CF_NO_EVENT_IDX) ||
729 	    (vsc->sc_child->dv_cfdata->cf_flags & VIRTIO_CF_NO_EVENT_IDX))) {
730 		if (guest_feature_names != NULL)
731 			printf(" RingEventIdx disabled by UKC");
732 		vsc->sc_driver_features &= ~VIRTIO_F_RING_EVENT_IDX;
733 	}
734 
735 	if (vsc->sc_version_1) {
736 		return virtio_pci_negotiate_features_10(vsc,
737 		    guest_feature_names);
738 	}
739 
740 	/* virtio 0.9 only */
741 	host = bus_space_read_4(sc->sc_iot, sc->sc_ioh,
742 				VIRTIO_CONFIG_DEVICE_FEATURES);
743 	negotiated = host & vsc->sc_driver_features;
744 #if VIRTIO_DEBUG
745 	if (guest_feature_names)
746 		virtio_log_features(host, negotiated, guest_feature_names);
747 #endif
748 	bus_space_write_4(sc->sc_iot, sc->sc_ioh,
749 			  VIRTIO_CONFIG_GUEST_FEATURES, negotiated);
750 	vsc->sc_active_features = negotiated;
751 	if (negotiated & VIRTIO_F_RING_INDIRECT_DESC)
752 		vsc->sc_indirect = 1;
753 	else
754 		vsc->sc_indirect = 0;
755 	return 0;
756 }
757 
758 int
759 virtio_pci_negotiate_features_10(struct virtio_softc *vsc,
760     const struct virtio_feature_name *guest_feature_names)
761 {
762 	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
763 	uint64_t host, negotiated;
764 
765 	vsc->sc_driver_features |= VIRTIO_F_VERSION_1;
766 	/* notify on empty is 0.9 only */
767 	vsc->sc_driver_features &= ~VIRTIO_F_NOTIFY_ON_EMPTY;
768 	CWRITE(sc, device_feature_select, 0);
769 	host = CREAD(sc, device_feature);
770 	CWRITE(sc, device_feature_select, 1);
771 	host |= (uint64_t)CREAD(sc, device_feature) << 32;
772 
773 	negotiated = host & vsc->sc_driver_features;
774 #if VIRTIO_DEBUG
775 	if (guest_feature_names)
776 		virtio_log_features(host, negotiated, guest_feature_names);
777 #endif
778 	CWRITE(sc, driver_feature_select, 0);
779 	CWRITE(sc, driver_feature, negotiated & 0xffffffff);
780 	CWRITE(sc, driver_feature_select, 1);
781 	CWRITE(sc, driver_feature, negotiated >> 32);
782 	virtio_pci_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_FEATURES_OK);
783 
784 	if ((CREAD(sc, device_status) &
785 	    VIRTIO_CONFIG_DEVICE_STATUS_FEATURES_OK) == 0) {
786 		printf("%s: Feature negotiation failed\n",
787 		    vsc->sc_dev.dv_xname);
788 		CWRITE(sc, device_status, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
789 		return ENXIO;
790 	}
791 	vsc->sc_active_features = negotiated;
792 
793 	if (negotiated & VIRTIO_F_RING_INDIRECT_DESC)
794 		vsc->sc_indirect = 1;
795 	else
796 		vsc->sc_indirect = 0;
797 
798 	if ((negotiated & VIRTIO_F_VERSION_1) == 0) {
799 #if VIRTIO_DEBUG
800 		printf("%s: Host rejected Version_1\n", __func__);
801 #endif
802 		CWRITE(sc, device_status, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
803 		return EINVAL;
804 	}
805 	return 0;
806 }
807 
808 /*
809  * Device configuration registers.
810  */
811 uint8_t
812 virtio_pci_read_device_config_1(struct virtio_softc *vsc, int index)
813 {
814 	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
815 	return bus_space_read_1(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index);
816 }
817 
818 uint16_t
819 virtio_pci_read_device_config_2(struct virtio_softc *vsc, int index)
820 {
821 	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
822 	return bus_space_read_2(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index);
823 }
824 
825 uint32_t
826 virtio_pci_read_device_config_4(struct virtio_softc *vsc, int index)
827 {
828 	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
829 	return bus_space_read_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index);
830 }
831 
832 uint64_t
833 virtio_pci_read_device_config_8(struct virtio_softc *vsc, int index)
834 {
835 	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
836 	uint64_t r;
837 
838 	r = bus_space_read_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh,
839 	    index + sizeof(uint32_t));
840 	r <<= 32;
841 	r += bus_space_read_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index);
842 	return r;
843 }
844 
845 void
846 virtio_pci_write_device_config_1(struct virtio_softc *vsc, int index,
847     uint8_t value)
848 {
849 	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
850 	bus_space_write_1(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index, value);
851 }
852 
853 void
854 virtio_pci_write_device_config_2(struct virtio_softc *vsc, int index,
855     uint16_t value)
856 {
857 	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
858 	bus_space_write_2(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index, value);
859 }
860 
861 void
862 virtio_pci_write_device_config_4(struct virtio_softc *vsc,
863 			     int index, uint32_t value)
864 {
865 	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
866 	bus_space_write_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index, value);
867 }
868 
869 void
870 virtio_pci_write_device_config_8(struct virtio_softc *vsc,
871 			     int index, uint64_t value)
872 {
873 	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
874 	bus_space_write_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh,
875 	    index, value & 0xffffffff);
876 	bus_space_write_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh,
877 	    index + sizeof(uint32_t), value >> 32);
878 }
879 
880 int
881 virtio_pci_msix_establish(struct virtio_pci_softc *sc,
882     struct pci_attach_args *pa, int idx, int (*handler)(void *), void *ih_arg)
883 {
884 	struct virtio_softc *vsc = &sc->sc_sc;
885 	pci_intr_handle_t ih;
886 
887 	if (pci_intr_map_msix(pa, idx, &ih) != 0) {
888 #if VIRTIO_DEBUG
889 		printf("%s[%d]: pci_intr_map_msix failed\n",
890 		    vsc->sc_dev.dv_xname, idx);
891 #endif
892 		return 1;
893 	}
894 	sc->sc_ih[idx] = pci_intr_establish(sc->sc_pc, ih, vsc->sc_ipl,
895 	    handler, ih_arg, vsc->sc_dev.dv_xname);
896 	if (sc->sc_ih[idx] == NULL) {
897 		printf("%s[%d]: couldn't establish msix interrupt\n",
898 		    vsc->sc_dev.dv_xname, idx);
899 		return 1;
900 	}
901 	return 0;
902 }
903 
904 void
905 virtio_pci_set_msix_queue_vector(struct virtio_pci_softc *sc, uint32_t idx, uint16_t vector)
906 {
907 	if (sc->sc_sc.sc_version_1) {
908 		CWRITE(sc, queue_select, idx);
909 		CWRITE(sc, queue_msix_vector, vector);
910 	} else {
911 		bus_space_write_2(sc->sc_iot, sc->sc_ioh,
912 		    VIRTIO_CONFIG_QUEUE_SELECT, idx);
913 		bus_space_write_2(sc->sc_iot, sc->sc_ioh,
914 		    VIRTIO_MSI_QUEUE_VECTOR, vector);
915 	}
916 }
917 
918 void
919 virtio_pci_set_msix_config_vector(struct virtio_pci_softc *sc, uint16_t vector)
920 {
921 	if (sc->sc_sc.sc_version_1) {
922 		CWRITE(sc, config_msix_vector, vector);
923 	} else {
924 		bus_space_write_2(sc->sc_iot, sc->sc_ioh,
925 		    VIRTIO_MSI_CONFIG_VECTOR, vector);
926 	}
927 }
928 
929 
930 void
931 virtio_pci_free_irqs(struct virtio_pci_softc *sc)
932 {
933 	struct virtio_softc *vsc = &sc->sc_sc;
934 	int i;
935 
936 	if (sc->sc_devcfg_offset == VIRTIO_CONFIG_DEVICE_CONFIG_MSI) {
937 		for (i = 0; i < vsc->sc_nvqs; i++) {
938 			virtio_pci_set_msix_queue_vector(sc, i,
939 			    VIRTIO_MSI_NO_VECTOR);
940 		}
941 	}
942 
943 	for (i = 0; i < MAX_MSIX_VECS; i++) {
944 		if (sc->sc_ih[i]) {
945 			pci_intr_disestablish(sc->sc_pc, sc->sc_ih[i]);
946 			sc->sc_ih[i] = NULL;
947 		}
948 	}
949 
950 	sc->sc_devcfg_offset = VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI;
951 	virtio_pci_adjust_config_region(sc);
952 }
953 
954 int
955 virtio_pci_setup_msix(struct virtio_pci_softc *sc, struct pci_attach_args *pa,
956     int shared)
957 {
958 	struct virtio_softc *vsc = &sc->sc_sc;
959 	int i;
960 
961 	if (virtio_pci_msix_establish(sc, pa, 0, virtio_pci_config_intr, vsc))
962 		return 1;
963 	sc->sc_devcfg_offset = VIRTIO_CONFIG_DEVICE_CONFIG_MSI;
964 	virtio_pci_adjust_config_region(sc);
965 	virtio_pci_set_msix_config_vector(sc, 0);
966 
967 	if (shared) {
968 		if (virtio_pci_msix_establish(sc, pa, 1,
969 		    virtio_pci_shared_queue_intr, vsc)) {
970 			goto fail;
971 		}
972 
973 		for (i = 0; i < vsc->sc_nvqs; i++)
974 			virtio_pci_set_msix_queue_vector(sc, i, 1);
975 	} else {
976 		for (i = 0; i <= vsc->sc_nvqs; i++) {
977 			if (virtio_pci_msix_establish(sc, pa, i + 1,
978 			    virtio_pci_queue_intr, &vsc->sc_vqs[i])) {
979 				goto fail;
980 			}
981 			virtio_pci_set_msix_queue_vector(sc, i, i + 1);
982 		}
983 	}
984 
985 	return 0;
986 fail:
987 	virtio_pci_free_irqs(sc);
988 	return 1;
989 }
990 
991 /*
992  * Interrupt handler.
993  */
994 
995 /*
996  * Only used without MSI-X
997  */
998 int
999 virtio_pci_legacy_intr(void *arg)
1000 {
1001 	struct virtio_pci_softc *sc = arg;
1002 	struct virtio_softc *vsc = &sc->sc_sc;
1003 	int isr, r = 0;
1004 
1005 	/* check and ack the interrupt */
1006 	isr = bus_space_read_1(sc->sc_isr_iot, sc->sc_isr_ioh, 0);
1007 	if (isr == 0)
1008 		return 0;
1009 	KERNEL_LOCK();
1010 	if ((isr & VIRTIO_CONFIG_ISR_CONFIG_CHANGE) &&
1011 	    (vsc->sc_config_change != NULL)) {
1012 		r = (vsc->sc_config_change)(vsc);
1013 	}
1014 	r |= virtio_check_vqs(vsc);
1015 	KERNEL_UNLOCK();
1016 
1017 	return r;
1018 }
1019 
1020 int
1021 virtio_pci_legacy_intr_mpsafe(void *arg)
1022 {
1023 	struct virtio_pci_softc *sc = arg;
1024 	struct virtio_softc *vsc = &sc->sc_sc;
1025 	int isr, r = 0;
1026 
1027 	/* check and ack the interrupt */
1028 	isr = bus_space_read_1(sc->sc_isr_iot, sc->sc_isr_ioh, 0);
1029 	if (isr == 0)
1030 		return 0;
1031 	if ((isr & VIRTIO_CONFIG_ISR_CONFIG_CHANGE) &&
1032 	    (vsc->sc_config_change != NULL)) {
1033 		r = (vsc->sc_config_change)(vsc);
1034 	}
1035 	r |= virtio_check_vqs(vsc);
1036 	return r;
1037 }
1038 
1039 /*
1040  * Only used with MSI-X
1041  */
1042 int
1043 virtio_pci_config_intr(void *arg)
1044 {
1045 	struct virtio_softc *vsc = arg;
1046 
1047 	if (vsc->sc_config_change != NULL)
1048 		return vsc->sc_config_change(vsc);
1049 	return 0;
1050 }
1051 
1052 /*
1053  * Only used with MSI-X
1054  */
1055 int
1056 virtio_pci_queue_intr(void *arg)
1057 {
1058 	struct virtqueue *vq = arg;
1059 
1060 	if (vq->vq_done)
1061 		return (vq->vq_done)(vq);
1062 	return 0;
1063 }
1064 
1065 int
1066 virtio_pci_shared_queue_intr(void *arg)
1067 {
1068 	struct virtio_softc *vsc = arg;
1069 
1070 	return virtio_check_vqs(vsc);
1071 }
1072 
1073 /*
1074  * Interrupt handler to be used when polling.
1075  * We cannot use isr here because it is not defined in MSI-X mode.
1076  */
1077 int
1078 virtio_pci_poll_intr(void *arg)
1079 {
1080 	struct virtio_pci_softc *sc = arg;
1081 	struct virtio_softc *vsc = &sc->sc_sc;
1082 	int r = 0;
1083 
1084 	if (vsc->sc_config_change != NULL)
1085 		r = (vsc->sc_config_change)(vsc);
1086 
1087 	r |= virtio_check_vqs(vsc);
1088 
1089 	return r;
1090 }
1091 
1092 void
1093 virtio_pci_kick(struct virtio_softc *vsc, uint16_t idx)
1094 {
1095 	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
1096 	unsigned offset = 0;
1097 	if (vsc->sc_version_1) {
1098 		offset = vsc->sc_vqs[idx].vq_notify_off *
1099 		    sc->sc_notify_off_multiplier;
1100 	}
1101 	bus_space_write_2(sc->sc_notify_iot, sc->sc_notify_ioh, offset, idx);
1102 }
1103