xref: /netbsd-src/sys/dev/pci/virtio.c (revision 8feb0f0b7eaff0608f8350bbfa3098827b4bb91b)
1 /*	$NetBSD: virtio.c,v 1.65 2023/01/03 19:33:31 jakllsch Exp $	*/
2 
3 /*
4  * Copyright (c) 2020 The NetBSD Foundation, Inc.
5  * Copyright (c) 2012 Stefan Fritsch, Alexander Fiveg.
6  * Copyright (c) 2010 Minoura Makoto.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 __KERNEL_RCSID(0, "$NetBSD: virtio.c,v 1.65 2023/01/03 19:33:31 jakllsch Exp $");
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/atomic.h>
37 #include <sys/bus.h>
38 #include <sys/device.h>
39 #include <sys/kmem.h>
40 #include <sys/module.h>
41 
42 #define VIRTIO_PRIVATE
43 
44 #include <dev/pci/virtioreg.h> /* XXX: move to non-pci */
45 #include <dev/pci/virtiovar.h> /* XXX: move to non-pci */
46 
47 #define MINSEG_INDIRECT		2 /* use indirect if nsegs >= this value */
48 
49 /* incomplete list */
50 static const char *virtio_device_name[] = {
51 	"unknown (0)",			/*  0 */
52 	"network",			/*  1 */
53 	"block",			/*  2 */
54 	"console",			/*  3 */
55 	"entropy",			/*  4 */
56 	"memory balloon",		/*  5 */
57 	"I/O memory",			/*  6 */
58 	"remote processor messaging",	/*  7 */
59 	"SCSI",				/*  8 */
60 	"9P transport",			/*  9 */
61 };
62 #define NDEVNAMES	__arraycount(virtio_device_name)
63 
64 static void	virtio_init_vq(struct virtio_softc *,
65 		    struct virtqueue *, const bool);
66 
67 void
68 virtio_set_status(struct virtio_softc *sc, int status)
69 {
70 	sc->sc_ops->set_status(sc, status);
71 }
72 
73 /*
74  * Reset the device.
75  */
76 /*
77  * To reset the device to a known state, do following:
78  *	virtio_reset(sc);	     // this will stop the device activity
79  *	<dequeue finished requests>; // virtio_dequeue() still can be called
80  *	<revoke pending requests in the vqs if any>;
81  *	virtio_reinit_start(sc);     // dequeue prohibitted
82  *	newfeatures = virtio_negotiate_features(sc, requestedfeatures);
83  *	<some other initialization>;
84  *	virtio_reinit_end(sc);	     // device activated; enqueue allowed
85  * Once attached, feature negotiation can only be allowed after virtio_reset.
86  */
87 void
88 virtio_reset(struct virtio_softc *sc)
89 {
90 	virtio_device_reset(sc);
91 }
92 
93 int
94 virtio_reinit_start(struct virtio_softc *sc)
95 {
96 	int i, r;
97 
98 	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_ACK);
99 	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER);
100 	for (i = 0; i < sc->sc_nvqs; i++) {
101 		int n;
102 		struct virtqueue *vq = &sc->sc_vqs[i];
103 		n = sc->sc_ops->read_queue_size(sc, vq->vq_index);
104 		if (n == 0)	/* vq disappeared */
105 			continue;
106 		if (n != vq->vq_num) {
107 			panic("%s: virtqueue size changed, vq index %d\n",
108 			    device_xname(sc->sc_dev),
109 			    vq->vq_index);
110 		}
111 		virtio_init_vq(sc, vq, true);
112 		sc->sc_ops->setup_queue(sc, vq->vq_index,
113 		    vq->vq_dmamap->dm_segs[0].ds_addr);
114 	}
115 
116 	r = sc->sc_ops->setup_interrupts(sc, 1);
117 	if (r != 0)
118 		goto fail;
119 
120 	return 0;
121 
122 fail:
123 	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
124 
125 	return 1;
126 }
127 
128 void
129 virtio_reinit_end(struct virtio_softc *sc)
130 {
131 	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
132 }
133 
134 /*
135  * Feature negotiation.
136  */
137 void
138 virtio_negotiate_features(struct virtio_softc *sc, uint64_t guest_features)
139 {
140 	if (!(device_cfdata(sc->sc_dev)->cf_flags & 1) &&
141 	    !(device_cfdata(sc->sc_child)->cf_flags & 1)) /* XXX */
142 		guest_features |= VIRTIO_F_RING_INDIRECT_DESC;
143 	sc->sc_ops->neg_features(sc, guest_features);
144 	if (sc->sc_active_features & VIRTIO_F_RING_INDIRECT_DESC)
145 		sc->sc_indirect = true;
146 	else
147 		sc->sc_indirect = false;
148 }
149 
150 
151 /*
152  * Device configuration registers readers/writers
153  */
154 #if 0
155 #define DPRINTFR(n, fmt, val, index, num) \
156 	printf("\n%s (", n); \
157 	for (int i = 0; i < num; i++) \
158 		printf("%02x ", bus_space_read_1(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index+i)); \
159 	printf(") -> "); printf(fmt, val); printf("\n");
160 #define DPRINTFR2(n, fmt, val_s, val_n) \
161 	printf("%s ", n); \
162 	printf("\n        stream "); printf(fmt, val_s); printf(" norm "); printf(fmt, val_n); printf("\n");
163 #else
164 #define DPRINTFR(n, fmt, val, index, num)
165 #define DPRINTFR2(n, fmt, val_s, val_n)
166 #endif
167 
168 
169 uint8_t
170 virtio_read_device_config_1(struct virtio_softc *sc, int index)
171 {
172 	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
173 	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
174 	uint8_t val;
175 
176 	val = bus_space_read_1(iot, ioh, index);
177 
178 	DPRINTFR("read_1", "%02x", val, index, 1);
179 	return val;
180 }
181 
182 uint16_t
183 virtio_read_device_config_2(struct virtio_softc *sc, int index)
184 {
185 	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
186 	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
187 	uint16_t val;
188 
189 	val = bus_space_read_2(iot, ioh, index);
190 	if (BYTE_ORDER != sc->sc_bus_endian)
191 		val = bswap16(val);
192 
193 	DPRINTFR("read_2", "%04x", val, index, 2);
194 	DPRINTFR2("read_2", "%04x",
195 	    bus_space_read_stream_2(sc->sc_devcfg_iot, sc->sc_devcfg_ioh,
196 		index),
197 	    bus_space_read_2(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index));
198 	return val;
199 }
200 
201 uint32_t
202 virtio_read_device_config_4(struct virtio_softc *sc, int index)
203 {
204 	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
205 	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
206 	uint32_t val;
207 
208 	val = bus_space_read_4(iot, ioh, index);
209 	if (BYTE_ORDER != sc->sc_bus_endian)
210 		val = bswap32(val);
211 
212 	DPRINTFR("read_4", "%08x", val, index, 4);
213 	DPRINTFR2("read_4", "%08x",
214 	    bus_space_read_stream_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh,
215 		index),
216 	    bus_space_read_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index));
217 	return val;
218 }
219 
220 /*
221  * The Virtio spec explicitly tells that reading and writing 8 bytes are not
222  * considered atomic and no triggers may be connected to reading or writing
223  * it. We access it using two 32 reads. See virtio spec 4.1.3.1.
224  */
225 uint64_t
226 virtio_read_device_config_8(struct virtio_softc *sc, int index)
227 {
228 	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
229 	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
230 	union {
231 		uint64_t u64;
232 		uint32_t l[2];
233 	} v;
234 	uint64_t val;
235 
236 	v.l[0] = bus_space_read_4(iot, ioh, index);
237 	v.l[1] = bus_space_read_4(iot, ioh, index + 4);
238 	if (sc->sc_bus_endian != sc->sc_struct_endian) {
239 		v.l[0] = bswap32(v.l[0]);
240 		v.l[1] = bswap32(v.l[1]);
241 	}
242 	val = v.u64;
243 
244 	if (BYTE_ORDER != sc->sc_struct_endian)
245 		val = bswap64(val);
246 
247 	DPRINTFR("read_8", "%08"PRIx64, val, index, 8);
248 	DPRINTFR2("read_8 low ", "%08x",
249 	    bus_space_read_stream_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh,
250 		index),
251 	    bus_space_read_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index));
252 	DPRINTFR2("read_8 high ", "%08x",
253 	    bus_space_read_stream_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh,
254 		index + 4),
255 	    bus_space_read_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index + 4));
256 	return val;
257 }
258 
259 /*
260  * In the older virtio spec, device config registers are host endian. On newer
261  * they are little endian. Some newer devices however explicitly specify their
262  * register to always be little endian. These functions cater for these.
263  */
264 uint16_t
265 virtio_read_device_config_le_2(struct virtio_softc *sc, int index)
266 {
267 	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
268 	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
269 	uint16_t val;
270 
271 	val = bus_space_read_2(iot, ioh, index);
272 	if (sc->sc_bus_endian != LITTLE_ENDIAN)
273 		val = bswap16(val);
274 
275 	DPRINTFR("read_le_2", "%04x", val, index, 2);
276 	DPRINTFR2("read_le_2", "%04x",
277 	    bus_space_read_stream_2(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, 0),
278 	    bus_space_read_2(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, 0));
279 	return val;
280 }
281 
282 uint32_t
283 virtio_read_device_config_le_4(struct virtio_softc *sc, int index)
284 {
285 	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
286 	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
287 	uint32_t val;
288 
289 	val = bus_space_read_4(iot, ioh, index);
290 	if (sc->sc_bus_endian != LITTLE_ENDIAN)
291 		val = bswap32(val);
292 
293 	DPRINTFR("read_le_4", "%08x", val, index, 4);
294 	DPRINTFR2("read_le_4", "%08x",
295 	    bus_space_read_stream_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, 0),
296 	    bus_space_read_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, 0));
297 	return val;
298 }
299 
300 void
301 virtio_write_device_config_1(struct virtio_softc *sc, int index, uint8_t value)
302 {
303 	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
304 	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
305 
306 	bus_space_write_1(iot, ioh, index, value);
307 }
308 
309 void
310 virtio_write_device_config_2(struct virtio_softc *sc, int index,
311     uint16_t value)
312 {
313 	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
314 	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
315 
316 	if (BYTE_ORDER != sc->sc_bus_endian)
317 		value = bswap16(value);
318 	bus_space_write_2(iot, ioh, index, value);
319 }
320 
321 void
322 virtio_write_device_config_4(struct virtio_softc *sc, int index,
323     uint32_t value)
324 {
325 	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
326 	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
327 
328 	if (BYTE_ORDER != sc->sc_bus_endian)
329 		value = bswap32(value);
330 	bus_space_write_4(iot, ioh, index, value);
331 }
332 
333 /*
334  * The Virtio spec explicitly tells that reading and writing 8 bytes are not
335  * considered atomic and no triggers may be connected to reading or writing
336  * it. We access it using two 32 bit writes. For good measure it is stated to
337  * always write lsb first just in case of a hypervisor bug. See See virtio
338  * spec 4.1.3.1.
339  */
340 void
341 virtio_write_device_config_8(struct virtio_softc *sc, int index,
342     uint64_t value)
343 {
344 	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
345 	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
346 	union {
347 		uint64_t u64;
348 		uint32_t l[2];
349 	} v;
350 
351 	if (BYTE_ORDER != sc->sc_struct_endian)
352 		value = bswap64(value);
353 
354 	v.u64 = value;
355 	if (sc->sc_bus_endian != sc->sc_struct_endian) {
356 		v.l[0] = bswap32(v.l[0]);
357 		v.l[1] = bswap32(v.l[1]);
358 	}
359 
360 	if (sc->sc_struct_endian == LITTLE_ENDIAN) {
361 		bus_space_write_4(iot, ioh, index,     v.l[0]);
362 		bus_space_write_4(iot, ioh, index + 4, v.l[1]);
363 	} else {
364 		bus_space_write_4(iot, ioh, index + 4, v.l[1]);
365 		bus_space_write_4(iot, ioh, index,     v.l[0]);
366 	}
367 }
368 
369 /*
370  * In the older virtio spec, device config registers are host endian. On newer
371  * they are little endian. Some newer devices however explicitly specify their
372  * register to always be little endian. These functions cater for these.
373  */
374 void
375 virtio_write_device_config_le_2(struct virtio_softc *sc, int index,
376     uint16_t value)
377 {
378 	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
379 	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
380 
381 	if (sc->sc_bus_endian != LITTLE_ENDIAN)
382 		value = bswap16(value);
383 	bus_space_write_2(iot, ioh, index, value);
384 }
385 
386 void
387 virtio_write_device_config_le_4(struct virtio_softc *sc, int index,
388     uint32_t value)
389 {
390 	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
391 	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
392 
393 	if (sc->sc_bus_endian != LITTLE_ENDIAN)
394 		value = bswap32(value);
395 	bus_space_write_4(iot, ioh, index, value);
396 }
397 
398 
399 /*
400  * data structures endian helpers
401  */
402 uint16_t
403 virtio_rw16(struct virtio_softc *sc, uint16_t val)
404 {
405 	KASSERT(sc);
406 	return BYTE_ORDER != sc->sc_struct_endian ? bswap16(val) : val;
407 }
408 
409 uint32_t
410 virtio_rw32(struct virtio_softc *sc, uint32_t val)
411 {
412 	KASSERT(sc);
413 	return BYTE_ORDER != sc->sc_struct_endian ? bswap32(val) : val;
414 }
415 
416 uint64_t
417 virtio_rw64(struct virtio_softc *sc, uint64_t val)
418 {
419 	KASSERT(sc);
420 	return BYTE_ORDER != sc->sc_struct_endian ? bswap64(val) : val;
421 }
422 
423 
424 /*
425  * Interrupt handler.
426  */
427 static void
428 virtio_soft_intr(void *arg)
429 {
430 	struct virtio_softc *sc = arg;
431 
432 	KASSERT(sc->sc_intrhand != NULL);
433 
434 	(*sc->sc_intrhand)(sc);
435 }
436 
437 /*
438  * dmamap sync operations for a virtqueue.
439  */
440 static inline void
441 vq_sync_descs(struct virtio_softc *sc, struct virtqueue *vq, int ops)
442 {
443 
444 	/* availoffset == sizeof(vring_desc) * vq_num */
445 	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap, 0, vq->vq_availoffset,
446 	    ops);
447 }
448 
449 static inline void
450 vq_sync_aring_all(struct virtio_softc *sc, struct virtqueue *vq, int ops)
451 {
452 	uint16_t hdrlen = offsetof(struct vring_avail, ring);
453 	size_t payloadlen = vq->vq_num * sizeof(uint16_t);
454 	size_t usedlen = 0;
455 
456 	if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX)
457 		usedlen = sizeof(uint16_t);
458 	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
459 	    vq->vq_availoffset, hdrlen + payloadlen + usedlen, ops);
460 }
461 
462 static inline void
463 vq_sync_aring_header(struct virtio_softc *sc, struct virtqueue *vq, int ops)
464 {
465 	uint16_t hdrlen = offsetof(struct vring_avail, ring);
466 
467 	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
468 	    vq->vq_availoffset, hdrlen, ops);
469 }
470 
471 static inline void
472 vq_sync_aring_payload(struct virtio_softc *sc, struct virtqueue *vq, int ops)
473 {
474 	uint16_t hdrlen = offsetof(struct vring_avail, ring);
475 	size_t payloadlen = vq->vq_num * sizeof(uint16_t);
476 
477 	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
478 	    vq->vq_availoffset + hdrlen, payloadlen, ops);
479 }
480 
481 static inline void
482 vq_sync_aring_used(struct virtio_softc *sc, struct virtqueue *vq, int ops)
483 {
484 	uint16_t hdrlen = offsetof(struct vring_avail, ring);
485 	size_t payloadlen = vq->vq_num * sizeof(uint16_t);
486 	size_t usedlen = sizeof(uint16_t);
487 
488 	if ((sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) == 0)
489 		return;
490 	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
491 	    vq->vq_availoffset + hdrlen + payloadlen, usedlen, ops);
492 }
493 
494 static inline void
495 vq_sync_uring_all(struct virtio_softc *sc, struct virtqueue *vq, int ops)
496 {
497 	uint16_t hdrlen = offsetof(struct vring_used, ring);
498 	size_t payloadlen = vq->vq_num * sizeof(struct vring_used_elem);
499 	size_t availlen = 0;
500 
501 	if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX)
502 		availlen = sizeof(uint16_t);
503 	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
504 	    vq->vq_usedoffset, hdrlen + payloadlen + availlen, ops);
505 }
506 
507 static inline void
508 vq_sync_uring_header(struct virtio_softc *sc, struct virtqueue *vq, int ops)
509 {
510 	uint16_t hdrlen = offsetof(struct vring_used, ring);
511 
512 	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
513 	    vq->vq_usedoffset, hdrlen, ops);
514 }
515 
516 static inline void
517 vq_sync_uring_payload(struct virtio_softc *sc, struct virtqueue *vq, int ops)
518 {
519 	uint16_t hdrlen = offsetof(struct vring_used, ring);
520 	size_t payloadlen = vq->vq_num * sizeof(struct vring_used_elem);
521 
522 	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
523 	    vq->vq_usedoffset + hdrlen, payloadlen, ops);
524 }
525 
526 static inline void
527 vq_sync_uring_avail(struct virtio_softc *sc, struct virtqueue *vq, int ops)
528 {
529 	uint16_t hdrlen = offsetof(struct vring_used, ring);
530 	size_t payloadlen = vq->vq_num * sizeof(struct vring_used_elem);
531 	size_t availlen = sizeof(uint16_t);
532 
533 	if ((sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) == 0)
534 		return;
535 	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
536 	    vq->vq_usedoffset + hdrlen + payloadlen, availlen, ops);
537 }
538 
539 static inline void
540 vq_sync_indirect(struct virtio_softc *sc, struct virtqueue *vq, int slot,
541     int ops)
542 {
543 	int offset = vq->vq_indirectoffset +
544 	    sizeof(struct vring_desc) * vq->vq_maxnsegs * slot;
545 
546 	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
547 	    offset, sizeof(struct vring_desc) * vq->vq_maxnsegs, ops);
548 }
549 
550 bool
551 virtio_vq_is_enqueued(struct virtio_softc *sc, struct virtqueue *vq)
552 {
553 
554 	if (vq->vq_queued) {
555 		vq->vq_queued = 0;
556 		vq_sync_aring_all(sc, vq, BUS_DMASYNC_POSTWRITE);
557 	}
558 
559 	vq_sync_uring_header(sc, vq, BUS_DMASYNC_POSTREAD);
560 	if (vq->vq_used_idx == virtio_rw16(sc, vq->vq_used->idx))
561 		return 0;
562 	vq_sync_uring_payload(sc, vq, BUS_DMASYNC_POSTREAD);
563 	return 1;
564 }
565 
566 /*
567  * Scan vq, bus_dmamap_sync for the vqs (not for the payload),
568  * and calls (*vq_done)() if some entries are consumed.
569  *
570  * Can be used as sc_intrhand.
571  */
572 int
573 virtio_vq_intr(struct virtio_softc *sc)
574 {
575 	struct virtqueue *vq;
576 	int i, r = 0;
577 
578 	for (i = 0; i < sc->sc_nvqs; i++) {
579 		vq = &sc->sc_vqs[i];
580 		if (virtio_vq_is_enqueued(sc, vq) == 1) {
581 			if (vq->vq_done)
582 				r |= (*vq->vq_done)(vq);
583 		}
584 	}
585 
586 	return r;
587 }
588 
589 int
590 virtio_vq_intrhand(struct virtio_softc *sc)
591 {
592 	struct virtqueue *vq;
593 	int i, r = 0;
594 
595 	for (i = 0; i < sc->sc_nvqs; i++) {
596 		vq = &sc->sc_vqs[i];
597 		r |= (*vq->vq_intrhand)(vq->vq_intrhand_arg);
598 	}
599 
600 	return r;
601 }
602 
603 
604 /*
605  * Increase the event index in order to delay interrupts.
606  */
607 int
608 virtio_postpone_intr(struct virtio_softc *sc, struct virtqueue *vq,
609     uint16_t nslots)
610 {
611 	uint16_t	idx, nused;
612 
613 	idx = vq->vq_used_idx + nslots;
614 
615 	/* set the new event index: avail_ring->used_event = idx */
616 	*vq->vq_used_event = virtio_rw16(sc, idx);
617 	vq_sync_aring_used(vq->vq_owner, vq, BUS_DMASYNC_PREWRITE);
618 	vq->vq_queued++;
619 
620 	nused = (uint16_t)
621 	    (virtio_rw16(sc, vq->vq_used->idx) - vq->vq_used_idx);
622 	KASSERT(nused <= vq->vq_num);
623 
624 	return nslots < nused;
625 }
626 
627 /*
628  * Postpone interrupt until 3/4 of the available descriptors have been
629  * consumed.
630  */
631 int
632 virtio_postpone_intr_smart(struct virtio_softc *sc, struct virtqueue *vq)
633 {
634 	uint16_t	nslots;
635 
636 	nslots = (uint16_t)
637 	    (virtio_rw16(sc, vq->vq_avail->idx) - vq->vq_used_idx) * 3 / 4;
638 
639 	return virtio_postpone_intr(sc, vq, nslots);
640 }
641 
642 /*
643  * Postpone interrupt until all of the available descriptors have been
644  * consumed.
645  */
646 int
647 virtio_postpone_intr_far(struct virtio_softc *sc, struct virtqueue *vq)
648 {
649 	uint16_t	nslots;
650 
651 	nslots = (uint16_t)
652 	    (virtio_rw16(sc, vq->vq_avail->idx) - vq->vq_used_idx);
653 
654 	return virtio_postpone_intr(sc, vq, nslots);
655 }
656 
657 /*
658  * Start/stop vq interrupt.  No guarantee.
659  */
660 void
661 virtio_stop_vq_intr(struct virtio_softc *sc, struct virtqueue *vq)
662 {
663 
664 	if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) {
665 		/*
666 		 * No way to disable the interrupt completely with
667 		 * RingEventIdx. Instead advance used_event by half the
668 		 * possible value. This won't happen soon and is far enough in
669 		 * the past to not trigger a spurios interrupt.
670 		 */
671 		*vq->vq_used_event = virtio_rw16(sc, vq->vq_used_idx + 0x8000);
672 		vq_sync_aring_used(sc, vq, BUS_DMASYNC_PREWRITE);
673 	} else {
674 		vq->vq_avail->flags |=
675 		    virtio_rw16(sc, VRING_AVAIL_F_NO_INTERRUPT);
676 		vq_sync_aring_header(sc, vq, BUS_DMASYNC_PREWRITE);
677 	}
678 	vq->vq_queued++;
679 }
680 
681 int
682 virtio_start_vq_intr(struct virtio_softc *sc, struct virtqueue *vq)
683 {
684 
685 	if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) {
686 		/*
687 		 * If event index feature is negotiated, enabling interrupts
688 		 * is done through setting the latest consumed index in the
689 		 * used_event field
690 		 */
691 		*vq->vq_used_event = virtio_rw16(sc, vq->vq_used_idx);
692 		vq_sync_aring_used(sc, vq, BUS_DMASYNC_PREWRITE);
693 	} else {
694 		vq->vq_avail->flags &=
695 		    ~virtio_rw16(sc, VRING_AVAIL_F_NO_INTERRUPT);
696 		vq_sync_aring_header(sc, vq, BUS_DMASYNC_PREWRITE);
697 	}
698 	vq->vq_queued++;
699 
700 	vq_sync_uring_header(sc, vq, BUS_DMASYNC_POSTREAD);
701 	if (vq->vq_used_idx == virtio_rw16(sc, vq->vq_used->idx))
702 		return 0;
703 	vq_sync_uring_payload(sc, vq, BUS_DMASYNC_POSTREAD);
704 	return 1;
705 }
706 
707 /*
708  * Initialize vq structure.
709  */
710 static void
711 virtio_init_vq(struct virtio_softc *sc, struct virtqueue *vq,
712     const bool reinit)
713 {
714 	int i, j;
715 	int vq_size = vq->vq_num;
716 
717 	memset(vq->vq_vaddr, 0, vq->vq_bytesize);
718 
719 	/* build the indirect descriptor chain */
720 	if (vq->vq_indirect != NULL) {
721 		struct vring_desc *vd;
722 
723 		for (i = 0; i < vq_size; i++) {
724 			vd = vq->vq_indirect;
725 			vd += vq->vq_maxnsegs * i;
726 			for (j = 0; j < vq->vq_maxnsegs - 1; j++) {
727 				vd[j].next = virtio_rw16(sc, j + 1);
728 			}
729 		}
730 	}
731 
732 	/* free slot management */
733 	SIMPLEQ_INIT(&vq->vq_freelist);
734 	for (i = 0; i < vq_size; i++) {
735 		SIMPLEQ_INSERT_TAIL(&vq->vq_freelist, &vq->vq_entries[i],
736 		    qe_list);
737 		vq->vq_entries[i].qe_index = i;
738 	}
739 	if (!reinit)
740 		mutex_init(&vq->vq_freelist_lock, MUTEX_SPIN, sc->sc_ipl);
741 
742 	/* enqueue/dequeue status */
743 	vq->vq_avail_idx = 0;
744 	vq->vq_used_idx = 0;
745 	vq->vq_queued = 0;
746 	if (!reinit) {
747 		mutex_init(&vq->vq_aring_lock, MUTEX_SPIN, sc->sc_ipl);
748 		mutex_init(&vq->vq_uring_lock, MUTEX_SPIN, sc->sc_ipl);
749 	}
750 	vq_sync_uring_all(sc, vq, BUS_DMASYNC_PREREAD);
751 	vq->vq_queued++;
752 }
753 
754 /*
755  * Allocate/free a vq.
756  */
757 int
758 virtio_alloc_vq(struct virtio_softc *sc, struct virtqueue *vq, int index,
759     int maxsegsize, int maxnsegs, const char *name)
760 {
761 	int vq_size, allocsize1, allocsize2, allocsize3, allocsize = 0;
762 	int rsegs, r, hdrlen;
763 #define VIRTQUEUE_ALIGN(n)	roundup(n, VIRTIO_PAGE_SIZE)
764 
765 	/* Make sure callers allocate vqs in order */
766 	KASSERT(sc->sc_nvqs == index);
767 
768 	memset(vq, 0, sizeof(*vq));
769 
770 	vq_size = sc->sc_ops->read_queue_size(sc, index);
771 	if (vq_size == 0) {
772 		aprint_error_dev(sc->sc_dev,
773 		    "virtqueue not exist, index %d for %s\n",
774 		    index, name);
775 		goto err;
776 	}
777 
778 	hdrlen = sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX ? 3 : 2;
779 
780 	/* allocsize1: descriptor table + avail ring + pad */
781 	allocsize1 = VIRTQUEUE_ALIGN(sizeof(struct vring_desc) * vq_size
782 	    + sizeof(uint16_t) * (hdrlen + vq_size));
783 	/* allocsize2: used ring + pad */
784 	allocsize2 = VIRTQUEUE_ALIGN(sizeof(uint16_t) * hdrlen
785 	    + sizeof(struct vring_used_elem) * vq_size);
786 	/* allocsize3: indirect table */
787 	if (sc->sc_indirect && maxnsegs >= MINSEG_INDIRECT)
788 		allocsize3 = sizeof(struct vring_desc) * maxnsegs * vq_size;
789 	else
790 		allocsize3 = 0;
791 	allocsize = allocsize1 + allocsize2 + allocsize3;
792 
793 	/* alloc and map the memory */
794 	r = bus_dmamem_alloc(sc->sc_dmat, allocsize, VIRTIO_PAGE_SIZE, 0,
795 	    &vq->vq_segs[0], 1, &rsegs, BUS_DMA_WAITOK);
796 	if (r != 0) {
797 		aprint_error_dev(sc->sc_dev,
798 		    "virtqueue %d for %s allocation failed, "
799 		    "error code %d\n", index, name, r);
800 		goto err;
801 	}
802 	r = bus_dmamem_map(sc->sc_dmat, &vq->vq_segs[0], rsegs, allocsize,
803 	    &vq->vq_vaddr, BUS_DMA_WAITOK);
804 	if (r != 0) {
805 		aprint_error_dev(sc->sc_dev,
806 		    "virtqueue %d for %s map failed, "
807 		    "error code %d\n", index, name, r);
808 		goto err;
809 	}
810 	r = bus_dmamap_create(sc->sc_dmat, allocsize, 1, allocsize, 0,
811 	    BUS_DMA_WAITOK, &vq->vq_dmamap);
812 	if (r != 0) {
813 		aprint_error_dev(sc->sc_dev,
814 		    "virtqueue %d for %s dmamap creation failed, "
815 		    "error code %d\n", index, name, r);
816 		goto err;
817 	}
818 	r = bus_dmamap_load(sc->sc_dmat, vq->vq_dmamap,
819 	    vq->vq_vaddr, allocsize, NULL, BUS_DMA_WAITOK);
820 	if (r != 0) {
821 		aprint_error_dev(sc->sc_dev,
822 		    "virtqueue %d for %s dmamap load failed, "
823 		    "error code %d\n", index, name, r);
824 		goto err;
825 	}
826 
827 	/* remember addresses and offsets for later use */
828 	vq->vq_owner = sc;
829 	vq->vq_num = vq_size;
830 	vq->vq_index = index;
831 	vq->vq_desc = vq->vq_vaddr;
832 	vq->vq_availoffset = sizeof(struct vring_desc) * vq_size;
833 	vq->vq_avail = (void *)(((char *)vq->vq_desc) + vq->vq_availoffset);
834 	vq->vq_used_event = (uint16_t *)((char *)vq->vq_avail +
835 	    offsetof(struct vring_avail, ring[vq->vq_num]));
836 	vq->vq_usedoffset = allocsize1;
837 	vq->vq_used = (void *)(((char *)vq->vq_desc) + vq->vq_usedoffset);
838 	vq->vq_avail_event = (uint16_t *)((char *)vq->vq_used +
839 	    offsetof(struct vring_used, ring[vq->vq_num]));
840 
841 	if (allocsize3 > 0) {
842 		vq->vq_indirectoffset = allocsize1 + allocsize2;
843 		vq->vq_indirect = (void *)(((char *)vq->vq_desc)
844 		    + vq->vq_indirectoffset);
845 	}
846 	vq->vq_bytesize = allocsize;
847 	vq->vq_maxsegsize = maxsegsize;
848 	vq->vq_maxnsegs = maxnsegs;
849 
850 	/* free slot management */
851 	vq->vq_entries = kmem_zalloc(sizeof(struct vq_entry) * vq_size,
852 	    KM_SLEEP);
853 	virtio_init_vq(sc, vq, false);
854 
855 	/* set the vq address */
856 	sc->sc_ops->setup_queue(sc, index,
857 	    vq->vq_dmamap->dm_segs[0].ds_addr);
858 
859 	aprint_verbose_dev(sc->sc_dev,
860 	    "allocated %u byte for virtqueue %d for %s, size %d\n",
861 	    allocsize, index, name, vq_size);
862 	if (allocsize3 > 0)
863 		aprint_verbose_dev(sc->sc_dev,
864 		    "using %d byte (%d entries) indirect descriptors\n",
865 		    allocsize3, maxnsegs * vq_size);
866 
867 	sc->sc_nvqs++;
868 
869 	return 0;
870 
871 err:
872 	sc->sc_ops->setup_queue(sc, index, 0);
873 	if (vq->vq_dmamap)
874 		bus_dmamap_destroy(sc->sc_dmat, vq->vq_dmamap);
875 	if (vq->vq_vaddr)
876 		bus_dmamem_unmap(sc->sc_dmat, vq->vq_vaddr, allocsize);
877 	if (vq->vq_segs[0].ds_addr)
878 		bus_dmamem_free(sc->sc_dmat, &vq->vq_segs[0], 1);
879 	memset(vq, 0, sizeof(*vq));
880 
881 	return -1;
882 }
883 
884 int
885 virtio_free_vq(struct virtio_softc *sc, struct virtqueue *vq)
886 {
887 	struct vq_entry *qe;
888 	int i = 0;
889 
890 	/* device must be already deactivated */
891 	/* confirm the vq is empty */
892 	SIMPLEQ_FOREACH(qe, &vq->vq_freelist, qe_list) {
893 		i++;
894 	}
895 	if (i != vq->vq_num) {
896 		printf("%s: freeing non-empty vq, index %d\n",
897 		    device_xname(sc->sc_dev), vq->vq_index);
898 		return EBUSY;
899 	}
900 
901 	/* tell device that there's no virtqueue any longer */
902 	sc->sc_ops->setup_queue(sc, vq->vq_index, 0);
903 
904 	vq_sync_aring_all(sc, vq, BUS_DMASYNC_POSTWRITE);
905 
906 	kmem_free(vq->vq_entries, sizeof(*vq->vq_entries) * vq->vq_num);
907 	bus_dmamap_unload(sc->sc_dmat, vq->vq_dmamap);
908 	bus_dmamap_destroy(sc->sc_dmat, vq->vq_dmamap);
909 	bus_dmamem_unmap(sc->sc_dmat, vq->vq_vaddr, vq->vq_bytesize);
910 	bus_dmamem_free(sc->sc_dmat, &vq->vq_segs[0], 1);
911 	mutex_destroy(&vq->vq_freelist_lock);
912 	mutex_destroy(&vq->vq_uring_lock);
913 	mutex_destroy(&vq->vq_aring_lock);
914 	memset(vq, 0, sizeof(*vq));
915 
916 	sc->sc_nvqs--;
917 
918 	return 0;
919 }
920 
921 /*
922  * Free descriptor management.
923  */
924 static struct vq_entry *
925 vq_alloc_entry(struct virtqueue *vq)
926 {
927 	struct vq_entry *qe;
928 
929 	mutex_enter(&vq->vq_freelist_lock);
930 	if (SIMPLEQ_EMPTY(&vq->vq_freelist)) {
931 		mutex_exit(&vq->vq_freelist_lock);
932 		return NULL;
933 	}
934 	qe = SIMPLEQ_FIRST(&vq->vq_freelist);
935 	SIMPLEQ_REMOVE_HEAD(&vq->vq_freelist, qe_list);
936 	mutex_exit(&vq->vq_freelist_lock);
937 
938 	return qe;
939 }
940 
941 static void
942 vq_free_entry(struct virtqueue *vq, struct vq_entry *qe)
943 {
944 	mutex_enter(&vq->vq_freelist_lock);
945 	SIMPLEQ_INSERT_TAIL(&vq->vq_freelist, qe, qe_list);
946 	mutex_exit(&vq->vq_freelist_lock);
947 
948 	return;
949 }
950 
951 /*
952  * Enqueue several dmamaps as a single request.
953  */
954 /*
955  * Typical usage:
956  *  <queue size> number of followings are stored in arrays
957  *  - command blocks (in dmamem) should be pre-allocated and mapped
958  *  - dmamaps for command blocks should be pre-allocated and loaded
959  *  - dmamaps for payload should be pre-allocated
960  *      r = virtio_enqueue_prep(sc, vq, &slot);		// allocate a slot
961  *	if (r)		// currently 0 or EAGAIN
962  *		return r;
963  *	r = bus_dmamap_load(dmat, dmamap_payload[slot], data, count, ..);
964  *	if (r) {
965  *		virtio_enqueue_abort(sc, vq, slot);
966  *		return r;
967  *	}
968  *	r = virtio_enqueue_reserve(sc, vq, slot,
969  *	    dmamap_payload[slot]->dm_nsegs + 1);
970  *							// ^ +1 for command
971  *	if (r) {	// currently 0 or EAGAIN
972  *		bus_dmamap_unload(dmat, dmamap_payload[slot]);
973  *		return r;				// do not call abort()
974  *	}
975  *	<setup and prepare commands>
976  *	bus_dmamap_sync(dmat, dmamap_cmd[slot],... BUS_DMASYNC_PREWRITE);
977  *	bus_dmamap_sync(dmat, dmamap_payload[slot],...);
978  *	virtio_enqueue(sc, vq, slot, dmamap_cmd[slot], false);
979  *	virtio_enqueue(sc, vq, slot, dmamap_payload[slot], iswrite);
980  *	virtio_enqueue_commit(sc, vq, slot, true);
981  */
982 
983 /*
984  * enqueue_prep: allocate a slot number
985  */
986 int
987 virtio_enqueue_prep(struct virtio_softc *sc, struct virtqueue *vq, int *slotp)
988 {
989 	struct vq_entry *qe1;
990 
991 	KASSERT(slotp != NULL);
992 
993 	qe1 = vq_alloc_entry(vq);
994 	if (qe1 == NULL)
995 		return EAGAIN;
996 	/* next slot is not allocated yet */
997 	qe1->qe_next = -1;
998 	*slotp = qe1->qe_index;
999 
1000 	return 0;
1001 }
1002 
1003 /*
1004  * enqueue_reserve: allocate remaining slots and build the descriptor chain.
1005  */
1006 int
1007 virtio_enqueue_reserve(struct virtio_softc *sc, struct virtqueue *vq,
1008     int slot, int nsegs)
1009 {
1010 	int indirect;
1011 	struct vq_entry *qe1 = &vq->vq_entries[slot];
1012 
1013 	KASSERT(qe1->qe_next == -1);
1014 	KASSERT(1 <= nsegs && nsegs <= vq->vq_num);
1015 
1016 	if ((vq->vq_indirect != NULL) &&
1017 	    (nsegs >= MINSEG_INDIRECT) &&
1018 	    (nsegs <= vq->vq_maxnsegs))
1019 		indirect = 1;
1020 	else
1021 		indirect = 0;
1022 	qe1->qe_indirect = indirect;
1023 
1024 	if (indirect) {
1025 		struct vring_desc *vd;
1026 		uint64_t addr;
1027 		int i;
1028 
1029 		vd = &vq->vq_desc[qe1->qe_index];
1030 		addr = vq->vq_dmamap->dm_segs[0].ds_addr
1031 		    + vq->vq_indirectoffset;
1032 		addr += sizeof(struct vring_desc)
1033 		    * vq->vq_maxnsegs * qe1->qe_index;
1034 		vd->addr  = virtio_rw64(sc, addr);
1035 		vd->len   = virtio_rw32(sc, sizeof(struct vring_desc) * nsegs);
1036 		vd->flags = virtio_rw16(sc, VRING_DESC_F_INDIRECT);
1037 
1038 		vd = vq->vq_indirect;
1039 		vd += vq->vq_maxnsegs * qe1->qe_index;
1040 		qe1->qe_desc_base = vd;
1041 
1042 		for (i = 0; i < nsegs - 1; i++) {
1043 			vd[i].flags = virtio_rw16(sc, VRING_DESC_F_NEXT);
1044 		}
1045 		vd[i].flags  = virtio_rw16(sc, 0);
1046 		qe1->qe_next = 0;
1047 
1048 		return 0;
1049 	} else {
1050 		struct vring_desc *vd;
1051 		struct vq_entry *qe;
1052 		int i, s;
1053 
1054 		vd = &vq->vq_desc[0];
1055 		qe1->qe_desc_base = vd;
1056 		qe1->qe_next = qe1->qe_index;
1057 		s = slot;
1058 		for (i = 0; i < nsegs - 1; i++) {
1059 			qe = vq_alloc_entry(vq);
1060 			if (qe == NULL) {
1061 				vd[s].flags = virtio_rw16(sc, 0);
1062 				virtio_enqueue_abort(sc, vq, slot);
1063 				return EAGAIN;
1064 			}
1065 			vd[s].flags = virtio_rw16(sc, VRING_DESC_F_NEXT);
1066 			vd[s].next  = virtio_rw16(sc, qe->qe_index);
1067 			s = qe->qe_index;
1068 		}
1069 		vd[s].flags = virtio_rw16(sc, 0);
1070 
1071 		return 0;
1072 	}
1073 }
1074 
1075 /*
1076  * enqueue: enqueue a single dmamap.
1077  */
1078 int
1079 virtio_enqueue(struct virtio_softc *sc, struct virtqueue *vq, int slot,
1080     bus_dmamap_t dmamap, bool write)
1081 {
1082 	struct vq_entry *qe1 = &vq->vq_entries[slot];
1083 	struct vring_desc *vd = qe1->qe_desc_base;
1084 	int i;
1085 	int s = qe1->qe_next;
1086 
1087 	KASSERT(s >= 0);
1088 	KASSERT(dmamap->dm_nsegs > 0);
1089 
1090 	for (i = 0; i < dmamap->dm_nsegs; i++) {
1091 		vd[s].addr = virtio_rw64(sc, dmamap->dm_segs[i].ds_addr);
1092 		vd[s].len  = virtio_rw32(sc, dmamap->dm_segs[i].ds_len);
1093 		if (!write)
1094 			vd[s].flags |= virtio_rw16(sc, VRING_DESC_F_WRITE);
1095 		s = virtio_rw16(sc, vd[s].next);
1096 	}
1097 	qe1->qe_next = s;
1098 
1099 	return 0;
1100 }
1101 
1102 int
1103 virtio_enqueue_p(struct virtio_softc *sc, struct virtqueue *vq, int slot,
1104     bus_dmamap_t dmamap, bus_addr_t start, bus_size_t len,
1105     bool write)
1106 {
1107 	struct vq_entry *qe1 = &vq->vq_entries[slot];
1108 	struct vring_desc *vd = qe1->qe_desc_base;
1109 	int s = qe1->qe_next;
1110 
1111 	KASSERT(s >= 0);
1112 	KASSERT(dmamap->dm_nsegs == 1); /* XXX */
1113 	KASSERT(dmamap->dm_segs[0].ds_len > start);
1114 	KASSERT(dmamap->dm_segs[0].ds_len >= start + len);
1115 
1116 	vd[s].addr = virtio_rw64(sc, dmamap->dm_segs[0].ds_addr + start);
1117 	vd[s].len  = virtio_rw32(sc, len);
1118 	if (!write)
1119 		vd[s].flags |= virtio_rw16(sc, VRING_DESC_F_WRITE);
1120 	qe1->qe_next = virtio_rw16(sc, vd[s].next);
1121 
1122 	return 0;
1123 }
1124 
1125 /*
1126  * enqueue_commit: add it to the aring.
1127  */
1128 int
1129 virtio_enqueue_commit(struct virtio_softc *sc, struct virtqueue *vq, int slot,
1130     bool notifynow)
1131 {
1132 	struct vq_entry *qe1;
1133 
1134 	if (slot < 0) {
1135 		mutex_enter(&vq->vq_aring_lock);
1136 		goto notify;
1137 	}
1138 	vq_sync_descs(sc, vq, BUS_DMASYNC_PREWRITE);
1139 	qe1 = &vq->vq_entries[slot];
1140 	if (qe1->qe_indirect)
1141 		vq_sync_indirect(sc, vq, slot, BUS_DMASYNC_PREWRITE);
1142 	mutex_enter(&vq->vq_aring_lock);
1143 	vq->vq_avail->ring[(vq->vq_avail_idx++) % vq->vq_num] =
1144 	    virtio_rw16(sc, slot);
1145 
1146 notify:
1147 	if (notifynow) {
1148 		uint16_t o, n, t;
1149 		uint16_t flags;
1150 
1151 		o = virtio_rw16(sc, vq->vq_avail->idx) - 1;
1152 		n = vq->vq_avail_idx;
1153 
1154 		/*
1155 		 * Prepare for `device->CPU' (host->guest) transfer
1156 		 * into the buffer.  This must happen before we commit
1157 		 * the vq->vq_avail->idx update to ensure we're not
1158 		 * still using the buffer in case program-prior loads
1159 		 * or stores in it get delayed past the store to
1160 		 * vq->vq_avail->idx.
1161 		 */
1162 		vq_sync_uring_all(sc, vq, BUS_DMASYNC_PREREAD);
1163 
1164 		/* ensure payload is published, then avail idx */
1165 		vq_sync_aring_payload(sc, vq, BUS_DMASYNC_PREWRITE);
1166 		vq->vq_avail->idx = virtio_rw16(sc, vq->vq_avail_idx);
1167 		vq_sync_aring_header(sc, vq, BUS_DMASYNC_PREWRITE);
1168 		vq->vq_queued++;
1169 
1170 		if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) {
1171 			vq_sync_uring_avail(sc, vq, BUS_DMASYNC_POSTREAD);
1172 			t = virtio_rw16(sc, *vq->vq_avail_event) + 1;
1173 			if ((uint16_t) (n - t) < (uint16_t) (n - o))
1174 				sc->sc_ops->kick(sc, vq->vq_index);
1175 		} else {
1176 			vq_sync_uring_header(sc, vq, BUS_DMASYNC_POSTREAD);
1177 			flags = virtio_rw16(sc, vq->vq_used->flags);
1178 			if (!(flags & VRING_USED_F_NO_NOTIFY))
1179 				sc->sc_ops->kick(sc, vq->vq_index);
1180 		}
1181 	}
1182 	mutex_exit(&vq->vq_aring_lock);
1183 
1184 	return 0;
1185 }
1186 
1187 /*
1188  * enqueue_abort: rollback.
1189  */
1190 int
1191 virtio_enqueue_abort(struct virtio_softc *sc, struct virtqueue *vq, int slot)
1192 {
1193 	struct vq_entry *qe = &vq->vq_entries[slot];
1194 	struct vring_desc *vd;
1195 	int s;
1196 
1197 	if (qe->qe_next < 0) {
1198 		vq_free_entry(vq, qe);
1199 		return 0;
1200 	}
1201 
1202 	s = slot;
1203 	vd = &vq->vq_desc[0];
1204 	while (virtio_rw16(sc, vd[s].flags) & VRING_DESC_F_NEXT) {
1205 		s = virtio_rw16(sc, vd[s].next);
1206 		vq_free_entry(vq, qe);
1207 		qe = &vq->vq_entries[s];
1208 	}
1209 	vq_free_entry(vq, qe);
1210 	return 0;
1211 }
1212 
1213 /*
1214  * Dequeue a request.
1215  */
1216 /*
1217  * dequeue: dequeue a request from uring; dmamap_sync for uring is
1218  *	    already done in the interrupt handler.
1219  */
1220 int
1221 virtio_dequeue(struct virtio_softc *sc, struct virtqueue *vq,
1222     int *slotp, int *lenp)
1223 {
1224 	uint16_t slot, usedidx;
1225 	struct vq_entry *qe;
1226 
1227 	if (vq->vq_used_idx == virtio_rw16(sc, vq->vq_used->idx))
1228 		return ENOENT;
1229 	mutex_enter(&vq->vq_uring_lock);
1230 	usedidx = vq->vq_used_idx++;
1231 	mutex_exit(&vq->vq_uring_lock);
1232 	usedidx %= vq->vq_num;
1233 	slot = virtio_rw32(sc, vq->vq_used->ring[usedidx].id);
1234 	qe = &vq->vq_entries[slot];
1235 
1236 	if (qe->qe_indirect)
1237 		vq_sync_indirect(sc, vq, slot, BUS_DMASYNC_POSTWRITE);
1238 
1239 	if (slotp)
1240 		*slotp = slot;
1241 	if (lenp)
1242 		*lenp = virtio_rw32(sc, vq->vq_used->ring[usedidx].len);
1243 
1244 	return 0;
1245 }
1246 
1247 /*
1248  * dequeue_commit: complete dequeue; the slot is recycled for future use.
1249  *                 if you forget to call this the slot will be leaked.
1250  */
1251 int
1252 virtio_dequeue_commit(struct virtio_softc *sc, struct virtqueue *vq, int slot)
1253 {
1254 	struct vq_entry *qe = &vq->vq_entries[slot];
1255 	struct vring_desc *vd = &vq->vq_desc[0];
1256 	int s = slot;
1257 
1258 	while (virtio_rw16(sc, vd[s].flags) & VRING_DESC_F_NEXT) {
1259 		s = virtio_rw16(sc, vd[s].next);
1260 		vq_free_entry(vq, qe);
1261 		qe = &vq->vq_entries[s];
1262 	}
1263 	vq_free_entry(vq, qe);
1264 
1265 	return 0;
1266 }
1267 
1268 /*
1269  * Attach a child, fill all the members.
1270  */
1271 void
1272 virtio_child_attach_start(struct virtio_softc *sc, device_t child, int ipl,
1273     struct virtqueue *vqs,
1274     virtio_callback config_change,
1275     virtio_callback intr_hand,
1276     int req_flags, int req_features, const char *feat_bits)
1277 {
1278 	char buf[1024];
1279 
1280 	sc->sc_child = child;
1281 	sc->sc_ipl = ipl;
1282 	sc->sc_vqs = vqs;
1283 	sc->sc_config_change = config_change;
1284 	sc->sc_intrhand = intr_hand;
1285 	sc->sc_flags = req_flags;
1286 
1287 	virtio_negotiate_features(sc, req_features);
1288 	snprintb(buf, sizeof(buf), feat_bits, sc->sc_active_features);
1289 	aprint_normal(": features: %s\n", buf);
1290 	aprint_naive("\n");
1291 }
1292 
1293 void
1294 virtio_child_attach_set_vqs(struct virtio_softc *sc,
1295     struct virtqueue *vqs, int nvq_pairs)
1296 {
1297 
1298 	KASSERT(nvq_pairs == 1 ||
1299 	    (sc->sc_flags & VIRTIO_F_INTR_SOFTINT) == 0);
1300 	if (nvq_pairs > 1)
1301 		sc->sc_child_mq = true;
1302 
1303 	sc->sc_vqs = vqs;
1304 }
1305 
1306 int
1307 virtio_child_attach_finish(struct virtio_softc *sc)
1308 {
1309 	int r;
1310 
1311 	sc->sc_finished_called = true;
1312 	r = sc->sc_ops->alloc_interrupts(sc);
1313 	if (r != 0) {
1314 		aprint_error_dev(sc->sc_dev,
1315 		    "failed to allocate interrupts\n");
1316 		goto fail;
1317 	}
1318 
1319 	r = sc->sc_ops->setup_interrupts(sc, 0);
1320 	if (r != 0) {
1321 		aprint_error_dev(sc->sc_dev, "failed to setup interrupts\n");
1322 		goto fail;
1323 	}
1324 
1325 	KASSERT(sc->sc_soft_ih == NULL);
1326 	if (sc->sc_flags & VIRTIO_F_INTR_SOFTINT) {
1327 		u_int flags = SOFTINT_NET;
1328 		if (sc->sc_flags & VIRTIO_F_INTR_MPSAFE)
1329 			flags |= SOFTINT_MPSAFE;
1330 
1331 		sc->sc_soft_ih = softint_establish(flags, virtio_soft_intr,
1332 		    sc);
1333 		if (sc->sc_soft_ih == NULL) {
1334 			sc->sc_ops->free_interrupts(sc);
1335 			aprint_error_dev(sc->sc_dev,
1336 			    "failed to establish soft interrupt\n");
1337 			goto fail;
1338 		}
1339 	}
1340 
1341 	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
1342 	return 0;
1343 
1344 fail:
1345 	if (sc->sc_soft_ih) {
1346 		softint_disestablish(sc->sc_soft_ih);
1347 		sc->sc_soft_ih = NULL;
1348 	}
1349 
1350 	sc->sc_ops->free_interrupts(sc);
1351 
1352 	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
1353 	return 1;
1354 }
1355 
1356 void
1357 virtio_child_detach(struct virtio_softc *sc)
1358 {
1359 	sc->sc_child = NULL;
1360 	sc->sc_vqs = NULL;
1361 
1362 	virtio_device_reset(sc);
1363 
1364 	sc->sc_ops->free_interrupts(sc);
1365 
1366 	if (sc->sc_soft_ih) {
1367 		softint_disestablish(sc->sc_soft_ih);
1368 		sc->sc_soft_ih = NULL;
1369 	}
1370 }
1371 
1372 void
1373 virtio_child_attach_failed(struct virtio_softc *sc)
1374 {
1375 	virtio_child_detach(sc);
1376 
1377 	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
1378 
1379 	sc->sc_child = VIRTIO_CHILD_FAILED;
1380 }
1381 
1382 bus_dma_tag_t
1383 virtio_dmat(struct virtio_softc *sc)
1384 {
1385 	return sc->sc_dmat;
1386 }
1387 
1388 device_t
1389 virtio_child(struct virtio_softc *sc)
1390 {
1391 	return sc->sc_child;
1392 }
1393 
1394 int
1395 virtio_intrhand(struct virtio_softc *sc)
1396 {
1397 	return (*sc->sc_intrhand)(sc);
1398 }
1399 
1400 uint64_t
1401 virtio_features(struct virtio_softc *sc)
1402 {
1403 	return sc->sc_active_features;
1404 }
1405 
1406 int
1407 virtio_attach_failed(struct virtio_softc *sc)
1408 {
1409 	device_t self = sc->sc_dev;
1410 
1411 	/* no error if its not connected, but its failed */
1412 	if (sc->sc_childdevid == 0)
1413 		return 1;
1414 
1415 	if (sc->sc_child == NULL) {
1416 		aprint_error_dev(self,
1417 		    "no matching child driver; not configured\n");
1418 		return 1;
1419 	}
1420 
1421 	if (sc->sc_child == VIRTIO_CHILD_FAILED) {
1422 		aprint_error_dev(self, "virtio configuration failed\n");
1423 		return 1;
1424 	}
1425 
1426 	/* sanity check */
1427 	if (!sc->sc_finished_called) {
1428 		aprint_error_dev(self, "virtio internal error, child driver "
1429 		    "signaled OK but didn't initialize interrupts\n");
1430 		return 1;
1431 	}
1432 
1433 	return 0;
1434 }
1435 
1436 void
1437 virtio_print_device_type(device_t self, int id, int revision)
1438 {
1439 	aprint_normal_dev(self, "%s device (id %d, rev. 0x%02x)\n",
1440 	    (id < NDEVNAMES ? virtio_device_name[id] : "Unknown"),
1441 	    id,
1442 	    revision);
1443 }
1444 
1445 
1446 MODULE(MODULE_CLASS_DRIVER, virtio, NULL);
1447 
1448 #ifdef _MODULE
1449 #include "ioconf.c"
1450 #endif
1451 
1452 static int
1453 virtio_modcmd(modcmd_t cmd, void *opaque)
1454 {
1455 	int error = 0;
1456 
1457 #ifdef _MODULE
1458 	switch (cmd) {
1459 	case MODULE_CMD_INIT:
1460 		error = config_init_component(cfdriver_ioconf_virtio,
1461 		    cfattach_ioconf_virtio, cfdata_ioconf_virtio);
1462 		break;
1463 	case MODULE_CMD_FINI:
1464 		error = config_fini_component(cfdriver_ioconf_virtio,
1465 		    cfattach_ioconf_virtio, cfdata_ioconf_virtio);
1466 		break;
1467 	default:
1468 		error = ENOTTY;
1469 		break;
1470 	}
1471 #endif
1472 
1473 	return error;
1474 }
1475