xref: /openbsd-src/sys/dev/pci/virtio_pci.c (revision de3309eee246ab22f714794c6ed0d90cabe5acc6)
1 /*	$OpenBSD: virtio_pci.c,v 1.34 2023/07/05 18:11:08 patrick Exp $	*/
2 /*	$NetBSD: virtio.c,v 1.3 2011/11/02 23:05:52 njoly Exp $	*/
3 
4 /*
5  * Copyright (c) 2012 Stefan Fritsch.
6  * Copyright (c) 2010 Minoura Makoto.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/device.h>
33 #include <sys/mutex.h>
34 
35 #include <dev/pci/pcidevs.h>
36 #include <dev/pci/pcireg.h>
37 #include <dev/pci/pcivar.h>
38 #include <dev/pci/virtio_pcireg.h>
39 
40 #include <dev/pv/virtioreg.h>
41 #include <dev/pv/virtiovar.h>
42 
43 #define DNPRINTF(n,x...)				\
44     do { if (VIRTIO_DEBUG >= n) printf(x); } while(0)
45 
46 
47 /*
48  * XXX: Before being used on big endian arches, the access to config registers
49  * XXX: needs to be reviewed/fixed. The non-device specific registers are
50  * XXX: PCI-endian while the device specific registers are native endian.
51  */
52 
53 #define MAX_MSIX_VECS	8
54 
55 struct virtio_pci_softc;
56 
57 int		virtio_pci_match(struct device *, void *, void *);
58 void		virtio_pci_attach(struct device *, struct device *, void *);
59 int		virtio_pci_attach_09(struct virtio_pci_softc *sc, struct pci_attach_args *pa);
60 int		virtio_pci_attach_10(struct virtio_pci_softc *sc, struct pci_attach_args *pa);
61 int		virtio_pci_detach(struct device *, int);
62 
63 void		virtio_pci_kick(struct virtio_softc *, uint16_t);
64 int		virtio_pci_adjust_config_region(struct virtio_pci_softc *);
65 uint8_t		virtio_pci_read_device_config_1(struct virtio_softc *, int);
66 uint16_t	virtio_pci_read_device_config_2(struct virtio_softc *, int);
67 uint32_t	virtio_pci_read_device_config_4(struct virtio_softc *, int);
68 uint64_t	virtio_pci_read_device_config_8(struct virtio_softc *, int);
69 void		virtio_pci_write_device_config_1(struct virtio_softc *, int, uint8_t);
70 void		virtio_pci_write_device_config_2(struct virtio_softc *, int, uint16_t);
71 void		virtio_pci_write_device_config_4(struct virtio_softc *, int, uint32_t);
72 void		virtio_pci_write_device_config_8(struct virtio_softc *, int, uint64_t);
73 uint16_t	virtio_pci_read_queue_size(struct virtio_softc *, uint16_t);
74 void		virtio_pci_setup_queue(struct virtio_softc *, struct virtqueue *, uint64_t);
75 void		virtio_pci_set_status(struct virtio_softc *, int);
76 int		virtio_pci_negotiate_features(struct virtio_softc *, const struct virtio_feature_name *);
77 int		virtio_pci_negotiate_features_10(struct virtio_softc *, const struct virtio_feature_name *);
78 void		virtio_pci_set_msix_queue_vector(struct virtio_pci_softc *, uint32_t, uint16_t);
79 void		virtio_pci_set_msix_config_vector(struct virtio_pci_softc *, uint16_t);
80 int		virtio_pci_msix_establish(struct virtio_pci_softc *, struct pci_attach_args *, int, int (*)(void *), void *);
81 int		virtio_pci_setup_msix(struct virtio_pci_softc *, struct pci_attach_args *, int);
82 void		virtio_pci_free_irqs(struct virtio_pci_softc *);
83 int		virtio_pci_poll_intr(void *);
84 int		virtio_pci_legacy_intr(void *);
85 int		virtio_pci_legacy_intr_mpsafe(void *);
86 int		virtio_pci_config_intr(void *);
87 int		virtio_pci_queue_intr(void *);
88 int		virtio_pci_shared_queue_intr(void *);
89 int		virtio_pci_find_cap(struct virtio_pci_softc *sc, int cfg_type, void *buf, int buflen);
90 #if VIRTIO_DEBUG
91 void virtio_pci_dump_caps(struct virtio_pci_softc *sc);
92 #endif
93 
94 enum irq_type {
95 	IRQ_NO_MSIX,
96 	IRQ_MSIX_SHARED, /* vec 0: config irq, vec 1 shared by all vqs */
97 	IRQ_MSIX_PER_VQ, /* vec 0: config irq, vec n: irq of vq[n-1] */
98 };
99 
100 struct virtio_pci_softc {
101 	struct virtio_softc	sc_sc;
102 	pci_chipset_tag_t	sc_pc;
103 	pcitag_t		sc_ptag;
104 
105 	bus_space_tag_t		sc_iot;
106 	bus_space_handle_t	sc_ioh;
107 	bus_size_t		sc_iosize;
108 
109 	bus_space_tag_t		sc_bars_iot[4];
110 	bus_space_handle_t	sc_bars_ioh[4];
111 	bus_size_t		sc_bars_iosize[4];
112 
113 	bus_space_tag_t		sc_notify_iot;
114 	bus_space_handle_t	sc_notify_ioh;
115 	bus_size_t		sc_notify_iosize;
116 	unsigned int		sc_notify_off_multiplier;
117 
118 	bus_space_tag_t		sc_devcfg_iot;
119 	bus_space_handle_t	sc_devcfg_ioh;
120 	bus_size_t		sc_devcfg_iosize;
121 	/*
122 	 * With 0.9, the offset of the devcfg region in the io bar changes
123 	 * depending on MSI-X being enabled or not.
124 	 * With 1.0, this field is still used to remember if MSI-X is enabled
125 	 * or not.
126 	 */
127 	unsigned int		sc_devcfg_offset;
128 
129 	bus_space_tag_t		sc_isr_iot;
130 	bus_space_handle_t	sc_isr_ioh;
131 	bus_size_t		sc_isr_iosize;
132 
133 	void			*sc_ih[MAX_MSIX_VECS];
134 
135 	enum irq_type		sc_irq_type;
136 };
137 
138 const struct cfattach virtio_pci_ca = {
139 	sizeof(struct virtio_pci_softc),
140 	virtio_pci_match,
141 	virtio_pci_attach,
142 	virtio_pci_detach,
143 	NULL
144 };
145 
146 struct virtio_ops virtio_pci_ops = {
147 	virtio_pci_kick,
148 	virtio_pci_read_device_config_1,
149 	virtio_pci_read_device_config_2,
150 	virtio_pci_read_device_config_4,
151 	virtio_pci_read_device_config_8,
152 	virtio_pci_write_device_config_1,
153 	virtio_pci_write_device_config_2,
154 	virtio_pci_write_device_config_4,
155 	virtio_pci_write_device_config_8,
156 	virtio_pci_read_queue_size,
157 	virtio_pci_setup_queue,
158 	virtio_pci_set_status,
159 	virtio_pci_negotiate_features,
160 	virtio_pci_poll_intr,
161 };
162 
163 static inline
164 uint64_t _cread(struct virtio_pci_softc *sc, unsigned off, unsigned size)
165 {
166 	uint64_t val;
167 	switch (size) {
168 	case 1:
169 		val = bus_space_read_1(sc->sc_iot, sc->sc_ioh, off);
170 		break;
171 	case 2:
172 		val = bus_space_read_2(sc->sc_iot, sc->sc_ioh, off);
173 		break;
174 	case 4:
175 		val = bus_space_read_4(sc->sc_iot, sc->sc_ioh, off);
176 		break;
177 	case 8:
178 		val = bus_space_read_4(sc->sc_iot, sc->sc_ioh,
179 		    off + sizeof(uint32_t));
180 		val <<= 32;
181 		val += bus_space_read_4(sc->sc_iot, sc->sc_ioh, off);
182 		break;
183 	}
184 	return val;
185 }
186 
187 #define CREAD(sc, memb)  _cread(sc, offsetof(struct virtio_pci_common_cfg, memb), \
188     sizeof(((struct virtio_pci_common_cfg *)0)->memb))
189 
190 #define CWRITE(sc, memb, val)							\
191 	do {									\
192 		struct virtio_pci_common_cfg c;					\
193 		size_t off = offsetof(struct virtio_pci_common_cfg, memb);	\
194 		size_t size = sizeof(c.memb);					\
195 										\
196 		DNPRINTF(2, "%s: %d: off %#zx size %#zx write %#llx\n",		\
197 		    __func__, __LINE__, off, size, (unsigned long long)val);	\
198 		switch (size) {							\
199 		case 1:								\
200 			bus_space_write_1(sc->sc_iot, sc->sc_ioh, off, val);	\
201 			break;							\
202 		case 2:								\
203 			bus_space_write_2(sc->sc_iot, sc->sc_ioh, off, val);	\
204 			break;							\
205 		case 4:								\
206 			bus_space_write_4(sc->sc_iot, sc->sc_ioh, off, val);	\
207 			break;							\
208 		case 8:								\
209 			bus_space_write_4(sc->sc_iot, sc->sc_ioh, off,		\
210 			    (val) & 0xffffffff);				\
211 			bus_space_write_4(sc->sc_iot, sc->sc_ioh,		\
212 			    (off) + sizeof(uint32_t), (uint64_t)(val) >> 32);	\
213 			break;							\
214 		}								\
215 	} while (0)
216 
217 uint16_t
218 virtio_pci_read_queue_size(struct virtio_softc *vsc, uint16_t idx)
219 {
220 	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
221 	uint16_t ret;
222 	if (sc->sc_sc.sc_version_1) {
223 		CWRITE(sc, queue_select, idx);
224 		ret = CREAD(sc, queue_size);
225 	} else {
226 		bus_space_write_2(sc->sc_iot, sc->sc_ioh,
227 		    VIRTIO_CONFIG_QUEUE_SELECT, idx);
228 		ret = bus_space_read_2(sc->sc_iot, sc->sc_ioh,
229 		    VIRTIO_CONFIG_QUEUE_SIZE);
230 	}
231 	return ret;
232 }
233 
234 void
235 virtio_pci_setup_queue(struct virtio_softc *vsc, struct virtqueue *vq,
236     uint64_t addr)
237 {
238 	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
239 	if (sc->sc_sc.sc_version_1) {
240 		CWRITE(sc, queue_select, vq->vq_index);
241 		if (addr == 0) {
242 			CWRITE(sc, queue_enable, 0);
243 			CWRITE(sc, queue_desc, 0);
244 			CWRITE(sc, queue_avail, 0);
245 			CWRITE(sc, queue_used, 0);
246 		} else {
247 			CWRITE(sc, queue_desc, addr);
248 			CWRITE(sc, queue_avail, addr + vq->vq_availoffset);
249 			CWRITE(sc, queue_used, addr + vq->vq_usedoffset);
250 			CWRITE(sc, queue_enable, 1);
251 			vq->vq_notify_off = CREAD(sc, queue_notify_off);
252 		}
253 	} else {
254 		bus_space_write_2(sc->sc_iot, sc->sc_ioh,
255 		    VIRTIO_CONFIG_QUEUE_SELECT, vq->vq_index);
256 		bus_space_write_4(sc->sc_iot, sc->sc_ioh,
257 		    VIRTIO_CONFIG_QUEUE_ADDRESS, addr / VIRTIO_PAGE_SIZE);
258 	}
259 
260 	/*
261 	 * This path is only executed if this function is called after
262 	 * the child's attach function has finished. In other cases,
263 	 * it's done in virtio_pci_setup_msix().
264 	 */
265 	if (sc->sc_irq_type != IRQ_NO_MSIX) {
266 		int vec = 1;
267 		if (sc->sc_irq_type == IRQ_MSIX_PER_VQ)
268 		       vec += vq->vq_index;
269 		if (sc->sc_sc.sc_version_1) {
270 			CWRITE(sc, queue_msix_vector, vec);
271 		} else {
272 			bus_space_write_2(sc->sc_iot, sc->sc_ioh,
273 			    VIRTIO_MSI_QUEUE_VECTOR, vec);
274 		}
275 	}
276 }
277 
278 void
279 virtio_pci_set_status(struct virtio_softc *vsc, int status)
280 {
281 	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
282 	int old = 0;
283 
284 	if (sc->sc_sc.sc_version_1) {
285 		if (status != 0)
286 			old = CREAD(sc, device_status);
287 		CWRITE(sc, device_status, status|old);
288 	} else {
289 		if (status != 0)
290 			old = bus_space_read_1(sc->sc_iot, sc->sc_ioh,
291 			    VIRTIO_CONFIG_DEVICE_STATUS);
292 		bus_space_write_1(sc->sc_iot, sc->sc_ioh,
293 		    VIRTIO_CONFIG_DEVICE_STATUS, status|old);
294 	}
295 }
296 
297 int
298 virtio_pci_match(struct device *parent, void *match, void *aux)
299 {
300 	struct pci_attach_args *pa;
301 
302 	pa = (struct pci_attach_args *)aux;
303 	if (PCI_VENDOR(pa->pa_id) == PCI_VENDOR_OPENBSD &&
304 	    PCI_PRODUCT(pa->pa_id) == PCI_PRODUCT_OPENBSD_CONTROL)
305 		return 1;
306 	if (PCI_VENDOR(pa->pa_id) != PCI_VENDOR_QUMRANET)
307 		return 0;
308 	/* virtio 0.9 */
309 	if (PCI_PRODUCT(pa->pa_id) >= 0x1000 &&
310 	    PCI_PRODUCT(pa->pa_id) <= 0x103f &&
311 	    PCI_REVISION(pa->pa_class) == 0)
312 		return 1;
313 	/* virtio 1.0 */
314 	if (PCI_PRODUCT(pa->pa_id) >= 0x1040 &&
315 	    PCI_PRODUCT(pa->pa_id) <= 0x107f &&
316 	    PCI_REVISION(pa->pa_class) == 1)
317 		return 1;
318 	return 0;
319 }
320 
321 #if VIRTIO_DEBUG
322 void
323 virtio_pci_dump_caps(struct virtio_pci_softc *sc)
324 {
325 	pci_chipset_tag_t pc = sc->sc_pc;
326 	pcitag_t tag = sc->sc_ptag;
327 	int offset;
328 	union {
329 		pcireg_t reg[4];
330 		struct virtio_pci_cap vcap;
331 	} v;
332 
333 	if (!pci_get_capability(pc, tag, PCI_CAP_VENDSPEC, &offset, &v.reg[0]))
334 		return;
335 
336 	printf("\n");
337 	do {
338 		for (int i = 0; i < 4; i++)
339 			v.reg[i] = pci_conf_read(pc, tag, offset + i * 4);
340 		printf("%s: cfgoff %#x len %#x type %#x bar %#x: off %#x len %#x\n",
341 			__func__, offset, v.vcap.cap_len, v.vcap.cfg_type, v.vcap.bar,
342 			v.vcap.offset, v.vcap.length);
343 		offset = v.vcap.cap_next;
344 	} while (offset != 0);
345 }
346 #endif
347 
348 int
349 virtio_pci_find_cap(struct virtio_pci_softc *sc, int cfg_type, void *buf, int buflen)
350 {
351 	pci_chipset_tag_t pc = sc->sc_pc;
352 	pcitag_t tag = sc->sc_ptag;
353 	unsigned int offset, i, len;
354 	union {
355 		pcireg_t reg[8];
356 		struct virtio_pci_cap vcap;
357 	} *v = buf;
358 
359 	if (buflen < sizeof(struct virtio_pci_cap))
360 		return ERANGE;
361 
362 	if (!pci_get_capability(pc, tag, PCI_CAP_VENDSPEC, &offset, &v->reg[0]))
363 		return ENOENT;
364 
365 	do {
366 		for (i = 0; i < 4; i++)
367 			v->reg[i] = pci_conf_read(pc, tag, offset + i * 4);
368 		if (v->vcap.cfg_type == cfg_type)
369 			break;
370 		offset = v->vcap.cap_next;
371 	} while (offset != 0);
372 
373 	if (offset == 0)
374 		return ENOENT;
375 
376 	if (v->vcap.cap_len > sizeof(struct virtio_pci_cap)) {
377 		len = roundup(v->vcap.cap_len, sizeof(pcireg_t));
378 		if (len > buflen) {
379 			printf("%s: cap too large\n", __func__);
380 			return ERANGE;
381 		}
382 		for (i = 4; i < len / sizeof(pcireg_t);  i++)
383 			v->reg[i] = pci_conf_read(pc, tag, offset + i * 4);
384 	}
385 
386 	return 0;
387 }
388 
389 
390 #define NMAPREG		((PCI_MAPREG_END - PCI_MAPREG_START) / \
391 				sizeof(pcireg_t))
392 
393 int
394 virtio_pci_attach_10(struct virtio_pci_softc *sc, struct pci_attach_args *pa)
395 {
396 	struct virtio_pci_cap common, isr, device;
397 	struct virtio_pci_notify_cap notify;
398 	int have_device_cfg = 0;
399 	bus_size_t bars[NMAPREG] = { 0 };
400 	int bars_idx[NMAPREG] = { 0 };
401 	struct virtio_pci_cap *caps[] = { &common, &isr, &device, &notify.cap };
402 	int i, j = 0, ret = 0;
403 
404 	if (virtio_pci_find_cap(sc, VIRTIO_PCI_CAP_COMMON_CFG, &common, sizeof(common)) != 0)
405 		return ENODEV;
406 
407 	if (virtio_pci_find_cap(sc, VIRTIO_PCI_CAP_NOTIFY_CFG, &notify, sizeof(notify)) != 0)
408 		return ENODEV;
409 	if (virtio_pci_find_cap(sc, VIRTIO_PCI_CAP_ISR_CFG, &isr, sizeof(isr)) != 0)
410 		return ENODEV;
411 	if (virtio_pci_find_cap(sc, VIRTIO_PCI_CAP_DEVICE_CFG, &device, sizeof(device)) != 0)
412 		memset(&device, 0, sizeof(device));
413 	else
414 		have_device_cfg = 1;
415 
416 	/*
417 	 * XXX Maybe there are devices that offer the pci caps but not the
418 	 * XXX VERSION_1 feature bit? Then we should check the feature bit
419 	 * XXX here and fall back to 0.9 out if not present.
420 	 */
421 
422 	/* Figure out which bars we need to map */
423 	for (i = 0; i < nitems(caps); i++) {
424 		int bar = caps[i]->bar;
425 		bus_size_t len = caps[i]->offset + caps[i]->length;
426 		if (caps[i]->length == 0)
427 			continue;
428 		if (bars[bar] < len)
429 			bars[bar] = len;
430 	}
431 
432 	for (i = 0; i < nitems(bars); i++) {
433 		int reg;
434 		pcireg_t type;
435 		if (bars[i] == 0)
436 			continue;
437 		reg = PCI_MAPREG_START + i * 4;
438 		type = pci_mapreg_type(sc->sc_pc, sc->sc_ptag, reg);
439 		if (pci_mapreg_map(pa, reg, type, 0, &sc->sc_bars_iot[j],
440 		    &sc->sc_bars_ioh[j], NULL, &sc->sc_bars_iosize[j],
441 		    bars[i])) {
442 			printf("%s: can't map bar %u \n",
443 			    sc->sc_sc.sc_dev.dv_xname, i);
444 			ret = EIO;
445 			goto err;
446 		}
447 		bars_idx[i] = j;
448 		j++;
449 	}
450 
451 	i = bars_idx[notify.cap.bar];
452 	if (bus_space_subregion(sc->sc_bars_iot[i], sc->sc_bars_ioh[i],
453 	    notify.cap.offset, notify.cap.length, &sc->sc_notify_ioh) != 0) {
454 		printf("%s: can't map notify i/o space\n",
455 		    sc->sc_sc.sc_dev.dv_xname);
456 		ret = EIO;
457 		goto err;
458 	}
459 	sc->sc_notify_iosize = notify.cap.length;
460 	sc->sc_notify_iot = sc->sc_bars_iot[i];
461 	sc->sc_notify_off_multiplier = notify.notify_off_multiplier;
462 
463 	if (have_device_cfg) {
464 		i = bars_idx[device.bar];
465 		if (bus_space_subregion(sc->sc_bars_iot[i], sc->sc_bars_ioh[i],
466 		    device.offset, device.length, &sc->sc_devcfg_ioh) != 0) {
467 			printf("%s: can't map devcfg i/o space\n",
468 			    sc->sc_sc.sc_dev.dv_xname);
469 			ret = EIO;
470 			goto err;
471 		}
472 		sc->sc_devcfg_iosize = device.length;
473 		sc->sc_devcfg_iot = sc->sc_bars_iot[i];
474 	}
475 
476 	i = bars_idx[isr.bar];
477 	if (bus_space_subregion(sc->sc_bars_iot[i], sc->sc_bars_ioh[i],
478 	    isr.offset, isr.length, &sc->sc_isr_ioh) != 0) {
479 		printf("%s: can't map isr i/o space\n",
480 		    sc->sc_sc.sc_dev.dv_xname);
481 		ret = EIO;
482 		goto err;
483 	}
484 	sc->sc_isr_iosize = isr.length;
485 	sc->sc_isr_iot = sc->sc_bars_iot[i];
486 
487 	i = bars_idx[common.bar];
488 	if (bus_space_subregion(sc->sc_bars_iot[i], sc->sc_bars_ioh[i],
489 	    common.offset, common.length, &sc->sc_ioh) != 0) {
490 		printf("%s: can't map common i/o space\n",
491 		    sc->sc_sc.sc_dev.dv_xname);
492 		ret = EIO;
493 		goto err;
494 	}
495 	sc->sc_iosize = common.length;
496 	sc->sc_iot = sc->sc_bars_iot[i];
497 
498 	sc->sc_sc.sc_version_1 = 1;
499 	return 0;
500 
501 err:
502 	/* there is no pci_mapreg_unmap() */
503 	return ret;
504 }
505 
506 int
507 virtio_pci_attach_09(struct virtio_pci_softc *sc, struct pci_attach_args *pa)
508 {
509 	struct virtio_softc *vsc = &sc->sc_sc;
510 	pcireg_t type;
511 
512 	type = pci_mapreg_type(pa->pa_pc, pa->pa_tag, PCI_MAPREG_START);
513 	if (pci_mapreg_map(pa, PCI_MAPREG_START, type, 0,
514 	    &sc->sc_iot, &sc->sc_ioh, NULL, &sc->sc_iosize, 0)) {
515 		printf("%s: can't map i/o space\n", vsc->sc_dev.dv_xname);
516 		return EIO;
517 	}
518 
519 	if (bus_space_subregion(sc->sc_iot, sc->sc_ioh,
520 	    VIRTIO_CONFIG_QUEUE_NOTIFY, 2, &sc->sc_notify_ioh) != 0) {
521 		printf("%s: can't map notify i/o space\n",
522 		    vsc->sc_dev.dv_xname);
523 		return EIO;
524 	}
525 	sc->sc_notify_iosize = 2;
526 	sc->sc_notify_iot = sc->sc_iot;
527 
528 	if (bus_space_subregion(sc->sc_iot, sc->sc_ioh,
529 	    VIRTIO_CONFIG_ISR_STATUS, 1, &sc->sc_isr_ioh) != 0) {
530 		printf("%s: can't map isr i/o space\n",
531 		    vsc->sc_dev.dv_xname);
532 		return EIO;
533 	}
534 	sc->sc_isr_iosize = 1;
535 	sc->sc_isr_iot = sc->sc_iot;
536 
537 	return 0;
538 }
539 
540 void
541 virtio_pci_attach(struct device *parent, struct device *self, void *aux)
542 {
543 	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)self;
544 	struct virtio_softc *vsc = &sc->sc_sc;
545 	struct pci_attach_args *pa = (struct pci_attach_args *)aux;
546 	pci_chipset_tag_t pc = pa->pa_pc;
547 	pcitag_t tag = pa->pa_tag;
548 	int revision, ret = ENODEV;
549 	pcireg_t id;
550 	char const *intrstr;
551 	pci_intr_handle_t ih;
552 
553 	revision = PCI_REVISION(pa->pa_class);
554 	switch (revision) {
555 	case 0:
556 		/* subsystem ID shows what I am */
557 		id = PCI_PRODUCT(pci_conf_read(pc, tag, PCI_SUBSYS_ID_REG));
558 		break;
559 	case 1:
560 		id = PCI_PRODUCT(pa->pa_id) - 0x1040;
561 		break;
562 	default:
563 		printf("unknown revision 0x%02x; giving up\n", revision);
564 		return;
565 	}
566 
567 	sc->sc_pc = pc;
568 	sc->sc_ptag = pa->pa_tag;
569 	vsc->sc_dmat = pa->pa_dmat;
570 
571 #if defined(__i386__) || defined(__amd64__)
572 	/*
573 	 * For virtio, ignore normal MSI black/white-listing depending on the
574 	 * PCI bridge but enable it unconditionally.
575 	 */
576 	pa->pa_flags |= PCI_FLAGS_MSI_ENABLED;
577 #endif
578 
579 #if VIRTIO_DEBUG
580 	virtio_pci_dump_caps(sc);
581 #endif
582 
583 	vsc->sc_ops = &virtio_pci_ops;
584 	if ((vsc->sc_dev.dv_cfdata->cf_flags & VIRTIO_CF_NO_VERSION_1) == 0 &&
585 	    (revision == 1 ||
586 	     (vsc->sc_dev.dv_cfdata->cf_flags & VIRTIO_CF_PREFER_VERSION_1))) {
587 		ret = virtio_pci_attach_10(sc, pa);
588 	}
589 	if (ret != 0 && revision == 0) {
590 		/* revision 0 means 0.9 only or both 0.9 and 1.0 */
591 		ret = virtio_pci_attach_09(sc, pa);
592 	}
593 	if (ret != 0) {
594 		printf(": Cannot attach (%d)\n", ret);
595 		return;
596 	}
597 
598 	sc->sc_devcfg_offset = VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI;
599 	sc->sc_irq_type = IRQ_NO_MSIX;
600 	if (virtio_pci_adjust_config_region(sc) != 0)
601 		return;
602 
603 	virtio_device_reset(vsc);
604 	virtio_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_ACK);
605 	virtio_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER);
606 
607 	printf("\n");
608 	vsc->sc_childdevid = id;
609 	vsc->sc_child = NULL;
610 	config_found(self, sc, NULL);
611 	if (vsc->sc_child == NULL) {
612 		printf("%s: no matching child driver; not configured\n",
613 		    vsc->sc_dev.dv_xname);
614 		goto fail_1;
615 	}
616 	if (vsc->sc_child == VIRTIO_CHILD_ERROR) {
617 		printf("%s: virtio configuration failed\n",
618 		    vsc->sc_dev.dv_xname);
619 		goto fail_1;
620 	}
621 
622 	if (virtio_pci_setup_msix(sc, pa, 0) == 0) {
623 		sc->sc_irq_type = IRQ_MSIX_PER_VQ;
624 		intrstr = "msix per-VQ";
625 	} else if (virtio_pci_setup_msix(sc, pa, 1) == 0) {
626 		sc->sc_irq_type = IRQ_MSIX_SHARED;
627 		intrstr = "msix shared";
628 	} else {
629 		int (*ih_func)(void *) = virtio_pci_legacy_intr;
630 		if (pci_intr_map_msi(pa, &ih) != 0 && pci_intr_map(pa, &ih) != 0) {
631 			printf("%s: couldn't map interrupt\n", vsc->sc_dev.dv_xname);
632 			goto fail_2;
633 		}
634 		intrstr = pci_intr_string(pc, ih);
635 		/*
636 		 * We always set the IPL_MPSAFE flag in order to do the relatively
637 		 * expensive ISR read without lock, and then grab the kernel lock in
638 		 * the interrupt handler.
639 		 */
640 		if (vsc->sc_ipl & IPL_MPSAFE)
641 			ih_func = virtio_pci_legacy_intr_mpsafe;
642 		sc->sc_ih[0] = pci_intr_establish(pc, ih, vsc->sc_ipl | IPL_MPSAFE,
643 		    ih_func, sc, vsc->sc_dev.dv_xname);
644 		if (sc->sc_ih[0] == NULL) {
645 			printf("%s: couldn't establish interrupt", vsc->sc_dev.dv_xname);
646 			if (intrstr != NULL)
647 				printf(" at %s", intrstr);
648 			printf("\n");
649 			goto fail_2;
650 		}
651 	}
652 	printf("%s: %s\n", vsc->sc_dev.dv_xname, intrstr);
653 
654 	return;
655 
656 fail_2:
657 	config_detach(vsc->sc_child, 0);
658 fail_1:
659 	/* no pci_mapreg_unmap() or pci_intr_unmap() */
660 	virtio_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
661 }
662 
663 int
664 virtio_pci_detach(struct device *self, int flags)
665 {
666 	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)self;
667 	struct virtio_softc *vsc = &sc->sc_sc;
668 	int r;
669 
670 	if (vsc->sc_child != 0 && vsc->sc_child != VIRTIO_CHILD_ERROR) {
671 		r = config_detach(vsc->sc_child, flags);
672 		if (r)
673 			return r;
674 	}
675 	KASSERT(vsc->sc_child == 0 || vsc->sc_child == VIRTIO_CHILD_ERROR);
676 	KASSERT(vsc->sc_vqs == 0);
677 	virtio_pci_free_irqs(sc);
678 	if (sc->sc_iosize)
679 		bus_space_unmap(sc->sc_iot, sc->sc_ioh, sc->sc_iosize);
680 	sc->sc_iosize = 0;
681 
682 	return 0;
683 }
684 
685 int
686 virtio_pci_adjust_config_region(struct virtio_pci_softc *sc)
687 {
688 	if (sc->sc_sc.sc_version_1)
689 		return 0;
690 	sc->sc_devcfg_iosize = sc->sc_iosize - sc->sc_devcfg_offset;
691 	sc->sc_devcfg_iot = sc->sc_iot;
692 	if (bus_space_subregion(sc->sc_iot, sc->sc_ioh, sc->sc_devcfg_offset,
693 	    sc->sc_devcfg_iosize, &sc->sc_devcfg_ioh) != 0) {
694 		printf("%s: can't map config i/o space\n",
695 		    sc->sc_sc.sc_dev.dv_xname);
696 		return 1;
697 	}
698 	return 0;
699 }
700 
701 /*
702  * Feature negotiation.
703  * Prints available / negotiated features if guest_feature_names != NULL and
704  * VIRTIO_DEBUG is 1
705  */
706 int
707 virtio_pci_negotiate_features(struct virtio_softc *vsc,
708     const struct virtio_feature_name *guest_feature_names)
709 {
710 	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
711 	uint64_t host, negotiated;
712 
713 	vsc->sc_active_features = 0;
714 
715 	/*
716 	 * We enable indirect descriptors by default. They can be switched
717 	 * off by setting bit 1 in the driver flags, see config(8)
718 	 */
719 	if (!(vsc->sc_dev.dv_cfdata->cf_flags & VIRTIO_CF_NO_INDIRECT) &&
720 	    !(vsc->sc_child->dv_cfdata->cf_flags & VIRTIO_CF_NO_INDIRECT)) {
721 		vsc->sc_driver_features |= VIRTIO_F_RING_INDIRECT_DESC;
722 	} else if (guest_feature_names != NULL) {
723 		printf(" RingIndirectDesc disabled by UKC");
724 	}
725 
726 	/*
727 	 * The driver must add VIRTIO_F_RING_EVENT_IDX if it supports it.
728 	 * If it did, check if it is disabled by bit 2 in the driver flags.
729 	 */
730 	if ((vsc->sc_driver_features & VIRTIO_F_RING_EVENT_IDX) &&
731 	    ((vsc->sc_dev.dv_cfdata->cf_flags & VIRTIO_CF_NO_EVENT_IDX) ||
732 	    (vsc->sc_child->dv_cfdata->cf_flags & VIRTIO_CF_NO_EVENT_IDX))) {
733 		if (guest_feature_names != NULL)
734 			printf(" RingEventIdx disabled by UKC");
735 		vsc->sc_driver_features &= ~VIRTIO_F_RING_EVENT_IDX;
736 	}
737 
738 	if (vsc->sc_version_1) {
739 		return virtio_pci_negotiate_features_10(vsc,
740 		    guest_feature_names);
741 	}
742 
743 	/* virtio 0.9 only */
744 	host = bus_space_read_4(sc->sc_iot, sc->sc_ioh,
745 				VIRTIO_CONFIG_DEVICE_FEATURES);
746 	negotiated = host & vsc->sc_driver_features;
747 #if VIRTIO_DEBUG
748 	if (guest_feature_names)
749 		virtio_log_features(host, negotiated, guest_feature_names);
750 #endif
751 	bus_space_write_4(sc->sc_iot, sc->sc_ioh,
752 			  VIRTIO_CONFIG_GUEST_FEATURES, negotiated);
753 	vsc->sc_active_features = negotiated;
754 	if (negotiated & VIRTIO_F_RING_INDIRECT_DESC)
755 		vsc->sc_indirect = 1;
756 	else
757 		vsc->sc_indirect = 0;
758 	return 0;
759 }
760 
761 int
762 virtio_pci_negotiate_features_10(struct virtio_softc *vsc,
763     const struct virtio_feature_name *guest_feature_names)
764 {
765 	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
766 	uint64_t host, negotiated;
767 
768 	vsc->sc_driver_features |= VIRTIO_F_VERSION_1;
769 	/* notify on empty is 0.9 only */
770 	vsc->sc_driver_features &= ~VIRTIO_F_NOTIFY_ON_EMPTY;
771 	CWRITE(sc, device_feature_select, 0);
772 	host = CREAD(sc, device_feature);
773 	CWRITE(sc, device_feature_select, 1);
774 	host |= (uint64_t)CREAD(sc, device_feature) << 32;
775 
776 	negotiated = host & vsc->sc_driver_features;
777 #if VIRTIO_DEBUG
778 	if (guest_feature_names)
779 		virtio_log_features(host, negotiated, guest_feature_names);
780 #endif
781 	CWRITE(sc, driver_feature_select, 0);
782 	CWRITE(sc, driver_feature, negotiated & 0xffffffff);
783 	CWRITE(sc, driver_feature_select, 1);
784 	CWRITE(sc, driver_feature, negotiated >> 32);
785 	virtio_pci_set_status(vsc, VIRTIO_CONFIG_DEVICE_STATUS_FEATURES_OK);
786 
787 	if ((CREAD(sc, device_status) &
788 	    VIRTIO_CONFIG_DEVICE_STATUS_FEATURES_OK) == 0) {
789 		printf("%s: Feature negotiation failed\n",
790 		    vsc->sc_dev.dv_xname);
791 		CWRITE(sc, device_status, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
792 		return ENXIO;
793 	}
794 	vsc->sc_active_features = negotiated;
795 
796 	if (negotiated & VIRTIO_F_RING_INDIRECT_DESC)
797 		vsc->sc_indirect = 1;
798 	else
799 		vsc->sc_indirect = 0;
800 
801 	if ((negotiated & VIRTIO_F_VERSION_1) == 0) {
802 #if VIRTIO_DEBUG
803 		printf("%s: Host rejected Version_1\n", __func__);
804 #endif
805 		CWRITE(sc, device_status, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
806 		return EINVAL;
807 	}
808 	return 0;
809 }
810 
811 /*
812  * Device configuration registers.
813  */
814 uint8_t
815 virtio_pci_read_device_config_1(struct virtio_softc *vsc, int index)
816 {
817 	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
818 	return bus_space_read_1(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index);
819 }
820 
821 uint16_t
822 virtio_pci_read_device_config_2(struct virtio_softc *vsc, int index)
823 {
824 	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
825 	return bus_space_read_2(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index);
826 }
827 
828 uint32_t
829 virtio_pci_read_device_config_4(struct virtio_softc *vsc, int index)
830 {
831 	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
832 	return bus_space_read_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index);
833 }
834 
835 uint64_t
836 virtio_pci_read_device_config_8(struct virtio_softc *vsc, int index)
837 {
838 	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
839 	uint64_t r;
840 
841 	r = bus_space_read_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh,
842 	    index + sizeof(uint32_t));
843 	r <<= 32;
844 	r += bus_space_read_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index);
845 	return r;
846 }
847 
848 void
849 virtio_pci_write_device_config_1(struct virtio_softc *vsc, int index,
850     uint8_t value)
851 {
852 	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
853 	bus_space_write_1(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index, value);
854 }
855 
856 void
857 virtio_pci_write_device_config_2(struct virtio_softc *vsc, int index,
858     uint16_t value)
859 {
860 	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
861 	bus_space_write_2(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index, value);
862 }
863 
864 void
865 virtio_pci_write_device_config_4(struct virtio_softc *vsc,
866 			     int index, uint32_t value)
867 {
868 	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
869 	bus_space_write_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index, value);
870 }
871 
872 void
873 virtio_pci_write_device_config_8(struct virtio_softc *vsc,
874 			     int index, uint64_t value)
875 {
876 	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
877 	bus_space_write_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh,
878 	    index, value & 0xffffffff);
879 	bus_space_write_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh,
880 	    index + sizeof(uint32_t), value >> 32);
881 }
882 
883 int
884 virtio_pci_msix_establish(struct virtio_pci_softc *sc,
885     struct pci_attach_args *pa, int idx, int (*handler)(void *), void *ih_arg)
886 {
887 	struct virtio_softc *vsc = &sc->sc_sc;
888 	pci_intr_handle_t ih;
889 
890 	if (pci_intr_map_msix(pa, idx, &ih) != 0) {
891 #if VIRTIO_DEBUG
892 		printf("%s[%d]: pci_intr_map_msix failed\n",
893 		    vsc->sc_dev.dv_xname, idx);
894 #endif
895 		return 1;
896 	}
897 	sc->sc_ih[idx] = pci_intr_establish(sc->sc_pc, ih, vsc->sc_ipl,
898 	    handler, ih_arg, vsc->sc_dev.dv_xname);
899 	if (sc->sc_ih[idx] == NULL) {
900 		printf("%s[%d]: couldn't establish msix interrupt\n",
901 		    vsc->sc_dev.dv_xname, idx);
902 		return 1;
903 	}
904 	return 0;
905 }
906 
907 void
908 virtio_pci_set_msix_queue_vector(struct virtio_pci_softc *sc, uint32_t idx, uint16_t vector)
909 {
910 	if (sc->sc_sc.sc_version_1) {
911 		CWRITE(sc, queue_select, idx);
912 		CWRITE(sc, queue_msix_vector, vector);
913 	} else {
914 		bus_space_write_2(sc->sc_iot, sc->sc_ioh,
915 		    VIRTIO_CONFIG_QUEUE_SELECT, idx);
916 		bus_space_write_2(sc->sc_iot, sc->sc_ioh,
917 		    VIRTIO_MSI_QUEUE_VECTOR, vector);
918 	}
919 }
920 
921 void
922 virtio_pci_set_msix_config_vector(struct virtio_pci_softc *sc, uint16_t vector)
923 {
924 	if (sc->sc_sc.sc_version_1) {
925 		CWRITE(sc, config_msix_vector, vector);
926 	} else {
927 		bus_space_write_2(sc->sc_iot, sc->sc_ioh,
928 		    VIRTIO_MSI_CONFIG_VECTOR, vector);
929 	}
930 }
931 
932 
933 void
934 virtio_pci_free_irqs(struct virtio_pci_softc *sc)
935 {
936 	struct virtio_softc *vsc = &sc->sc_sc;
937 	int i;
938 
939 	if (sc->sc_devcfg_offset == VIRTIO_CONFIG_DEVICE_CONFIG_MSI) {
940 		for (i = 0; i < vsc->sc_nvqs; i++) {
941 			virtio_pci_set_msix_queue_vector(sc, i,
942 			    VIRTIO_MSI_NO_VECTOR);
943 		}
944 	}
945 
946 	for (i = 0; i < MAX_MSIX_VECS; i++) {
947 		if (sc->sc_ih[i]) {
948 			pci_intr_disestablish(sc->sc_pc, sc->sc_ih[i]);
949 			sc->sc_ih[i] = NULL;
950 		}
951 	}
952 
953 	sc->sc_devcfg_offset = VIRTIO_CONFIG_DEVICE_CONFIG_NOMSI;
954 	virtio_pci_adjust_config_region(sc);
955 }
956 
957 int
958 virtio_pci_setup_msix(struct virtio_pci_softc *sc, struct pci_attach_args *pa,
959     int shared)
960 {
961 	struct virtio_softc *vsc = &sc->sc_sc;
962 	int i;
963 
964 	if (virtio_pci_msix_establish(sc, pa, 0, virtio_pci_config_intr, vsc))
965 		return 1;
966 	sc->sc_devcfg_offset = VIRTIO_CONFIG_DEVICE_CONFIG_MSI;
967 	virtio_pci_adjust_config_region(sc);
968 	virtio_pci_set_msix_config_vector(sc, 0);
969 
970 	if (shared) {
971 		if (virtio_pci_msix_establish(sc, pa, 1,
972 		    virtio_pci_shared_queue_intr, vsc)) {
973 			goto fail;
974 		}
975 
976 		for (i = 0; i < vsc->sc_nvqs; i++)
977 			virtio_pci_set_msix_queue_vector(sc, i, 1);
978 	} else {
979 		for (i = 0; i < vsc->sc_nvqs; i++) {
980 			if (virtio_pci_msix_establish(sc, pa, i + 1,
981 			    virtio_pci_queue_intr, &vsc->sc_vqs[i])) {
982 				goto fail;
983 			}
984 			virtio_pci_set_msix_queue_vector(sc, i, i + 1);
985 		}
986 	}
987 
988 	return 0;
989 fail:
990 	virtio_pci_free_irqs(sc);
991 	return 1;
992 }
993 
994 /*
995  * Interrupt handler.
996  */
997 
998 /*
999  * Only used without MSI-X
1000  */
1001 int
1002 virtio_pci_legacy_intr(void *arg)
1003 {
1004 	struct virtio_pci_softc *sc = arg;
1005 	struct virtio_softc *vsc = &sc->sc_sc;
1006 	int isr, r = 0;
1007 
1008 	/* check and ack the interrupt */
1009 	isr = bus_space_read_1(sc->sc_isr_iot, sc->sc_isr_ioh, 0);
1010 	if (isr == 0)
1011 		return 0;
1012 	KERNEL_LOCK();
1013 	if ((isr & VIRTIO_CONFIG_ISR_CONFIG_CHANGE) &&
1014 	    (vsc->sc_config_change != NULL)) {
1015 		r = (vsc->sc_config_change)(vsc);
1016 	}
1017 	r |= virtio_check_vqs(vsc);
1018 	KERNEL_UNLOCK();
1019 
1020 	return r;
1021 }
1022 
1023 int
1024 virtio_pci_legacy_intr_mpsafe(void *arg)
1025 {
1026 	struct virtio_pci_softc *sc = arg;
1027 	struct virtio_softc *vsc = &sc->sc_sc;
1028 	int isr, r = 0;
1029 
1030 	/* check and ack the interrupt */
1031 	isr = bus_space_read_1(sc->sc_isr_iot, sc->sc_isr_ioh, 0);
1032 	if (isr == 0)
1033 		return 0;
1034 	if ((isr & VIRTIO_CONFIG_ISR_CONFIG_CHANGE) &&
1035 	    (vsc->sc_config_change != NULL)) {
1036 		r = (vsc->sc_config_change)(vsc);
1037 	}
1038 	r |= virtio_check_vqs(vsc);
1039 	return r;
1040 }
1041 
1042 /*
1043  * Only used with MSI-X
1044  */
1045 int
1046 virtio_pci_config_intr(void *arg)
1047 {
1048 	struct virtio_softc *vsc = arg;
1049 
1050 	if (vsc->sc_config_change != NULL)
1051 		return vsc->sc_config_change(vsc);
1052 	return 0;
1053 }
1054 
1055 /*
1056  * Only used with MSI-X
1057  */
1058 int
1059 virtio_pci_queue_intr(void *arg)
1060 {
1061 	struct virtqueue *vq = arg;
1062 
1063 	if (vq->vq_done)
1064 		return (vq->vq_done)(vq);
1065 	return 0;
1066 }
1067 
1068 int
1069 virtio_pci_shared_queue_intr(void *arg)
1070 {
1071 	struct virtio_softc *vsc = arg;
1072 
1073 	return virtio_check_vqs(vsc);
1074 }
1075 
1076 /*
1077  * Interrupt handler to be used when polling.
1078  * We cannot use isr here because it is not defined in MSI-X mode.
1079  */
1080 int
1081 virtio_pci_poll_intr(void *arg)
1082 {
1083 	struct virtio_pci_softc *sc = arg;
1084 	struct virtio_softc *vsc = &sc->sc_sc;
1085 	int r = 0;
1086 
1087 	if (vsc->sc_config_change != NULL)
1088 		r = (vsc->sc_config_change)(vsc);
1089 
1090 	r |= virtio_check_vqs(vsc);
1091 
1092 	return r;
1093 }
1094 
1095 void
1096 virtio_pci_kick(struct virtio_softc *vsc, uint16_t idx)
1097 {
1098 	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
1099 	unsigned offset = 0;
1100 	if (vsc->sc_version_1) {
1101 		offset = vsc->sc_vqs[idx].vq_notify_off *
1102 		    sc->sc_notify_off_multiplier;
1103 	}
1104 	bus_space_write_2(sc->sc_notify_iot, sc->sc_notify_ioh, offset, idx);
1105 }
1106