xref: /netbsd-src/sys/dev/pci/virtio.c (revision ccd9df534e375a4366c5b55f23782053c7a98d82)
1 /*	$NetBSD: virtio.c,v 1.81 2024/02/10 02:25:15 isaki Exp $	*/
2 
3 /*
4  * Copyright (c) 2020 The NetBSD Foundation, Inc.
5  * Copyright (c) 2012 Stefan Fritsch, Alexander Fiveg.
6  * Copyright (c) 2010 Minoura Makoto.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 __KERNEL_RCSID(0, "$NetBSD: virtio.c,v 1.81 2024/02/10 02:25:15 isaki Exp $");
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/atomic.h>
37 #include <sys/bus.h>
38 #include <sys/device.h>
39 #include <sys/kmem.h>
40 #include <sys/module.h>
41 
42 #define VIRTIO_PRIVATE
43 
44 #include <dev/pci/virtioreg.h> /* XXX: move to non-pci */
45 #include <dev/pci/virtiovar.h> /* XXX: move to non-pci */
46 
47 #define MINSEG_INDIRECT		2 /* use indirect if nsegs >= this value */
48 
49 /*
50  * The maximum descriptor size is 2^15. Use that value as the end of
51  * descriptor chain terminator since it will never be a valid index
52  * in the descriptor table.
53  */
54 #define VRING_DESC_CHAIN_END		32768
55 
56 /* incomplete list */
57 static const char *virtio_device_name[] = {
58 	"unknown (0)",			/*  0 */
59 	"network",			/*  1 */
60 	"block",			/*  2 */
61 	"console",			/*  3 */
62 	"entropy",			/*  4 */
63 	"memory balloon",		/*  5 */
64 	"I/O memory",			/*  6 */
65 	"remote processor messaging",	/*  7 */
66 	"SCSI",				/*  8 */
67 	"9P transport",			/*  9 */
68 };
69 #define NDEVNAMES	__arraycount(virtio_device_name)
70 
71 static void	virtio_reset_vq(struct virtio_softc *,
72 		    struct virtqueue *);
73 
74 void
75 virtio_set_status(struct virtio_softc *sc, int status)
76 {
77 	sc->sc_ops->set_status(sc, status);
78 }
79 
80 /*
81  * Reset the device.
82  */
83 /*
84  * To reset the device to a known state, do following:
85  *	virtio_reset(sc);	     // this will stop the device activity
86  *	<dequeue finished requests>; // virtio_dequeue() still can be called
87  *	<revoke pending requests in the vqs if any>;
88  *	virtio_reinit_start(sc);     // dequeue prohibited
89  *	newfeatures = virtio_negotiate_features(sc, requestedfeatures);
90  *	<some other initialization>;
91  *	virtio_reinit_end(sc);	     // device activated; enqueue allowed
92  * Once attached, feature negotiation can only be allowed after virtio_reset.
93  */
94 void
95 virtio_reset(struct virtio_softc *sc)
96 {
97 	virtio_device_reset(sc);
98 }
99 
100 int
101 virtio_reinit_start(struct virtio_softc *sc)
102 {
103 	int i, r;
104 
105 	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_ACK);
106 	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER);
107 	for (i = 0; i < sc->sc_nvqs; i++) {
108 		int n;
109 		struct virtqueue *vq = &sc->sc_vqs[i];
110 		n = sc->sc_ops->read_queue_size(sc, vq->vq_index);
111 		if (n == 0)	/* vq disappeared */
112 			continue;
113 		if (n != vq->vq_num) {
114 			panic("%s: virtqueue size changed, vq index %d\n",
115 			    device_xname(sc->sc_dev),
116 			    vq->vq_index);
117 		}
118 		virtio_reset_vq(sc, vq);
119 		sc->sc_ops->setup_queue(sc, vq->vq_index,
120 		    vq->vq_dmamap->dm_segs[0].ds_addr);
121 	}
122 
123 	r = sc->sc_ops->setup_interrupts(sc, 1);
124 	if (r != 0)
125 		goto fail;
126 
127 	return 0;
128 
129 fail:
130 	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
131 
132 	return 1;
133 }
134 
135 void
136 virtio_reinit_end(struct virtio_softc *sc)
137 {
138 	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
139 }
140 
141 /*
142  * Feature negotiation.
143  */
144 void
145 virtio_negotiate_features(struct virtio_softc *sc, uint64_t guest_features)
146 {
147 	if (!(device_cfdata(sc->sc_dev)->cf_flags & 1) &&
148 	    !(device_cfdata(sc->sc_child)->cf_flags & 1)) /* XXX */
149 		guest_features |= VIRTIO_F_RING_INDIRECT_DESC;
150 	sc->sc_ops->neg_features(sc, guest_features);
151 	if (sc->sc_active_features & VIRTIO_F_RING_INDIRECT_DESC)
152 		sc->sc_indirect = true;
153 	else
154 		sc->sc_indirect = false;
155 }
156 
157 
158 /*
159  * Device configuration registers readers/writers
160  */
161 #if 0
162 #define DPRINTFR(n, fmt, val, index, num) \
163 	printf("\n%s (", n); \
164 	for (int i = 0; i < num; i++) \
165 		printf("%02x ", bus_space_read_1(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index+i)); \
166 	printf(") -> "); printf(fmt, val); printf("\n");
167 #define DPRINTFR2(n, fmt, val_s, val_n) \
168 	printf("%s ", n); \
169 	printf("\n        stream "); printf(fmt, val_s); printf(" norm "); printf(fmt, val_n); printf("\n");
170 #else
171 #define DPRINTFR(n, fmt, val, index, num)
172 #define DPRINTFR2(n, fmt, val_s, val_n)
173 #endif
174 
175 
176 uint8_t
177 virtio_read_device_config_1(struct virtio_softc *sc, int index)
178 {
179 	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
180 	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
181 	uint8_t val;
182 
183 	val = bus_space_read_1(iot, ioh, index);
184 
185 	DPRINTFR("read_1", "%02x", val, index, 1);
186 	return val;
187 }
188 
189 uint16_t
190 virtio_read_device_config_2(struct virtio_softc *sc, int index)
191 {
192 	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
193 	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
194 	uint16_t val;
195 
196 	val = bus_space_read_2(iot, ioh, index);
197 	if (BYTE_ORDER != sc->sc_bus_endian)
198 		val = bswap16(val);
199 
200 	DPRINTFR("read_2", "%04x", val, index, 2);
201 	DPRINTFR2("read_2", "%04x",
202 	    bus_space_read_stream_2(sc->sc_devcfg_iot, sc->sc_devcfg_ioh,
203 		index),
204 	    bus_space_read_2(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index));
205 	return val;
206 }
207 
208 uint32_t
209 virtio_read_device_config_4(struct virtio_softc *sc, int index)
210 {
211 	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
212 	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
213 	uint32_t val;
214 
215 	val = bus_space_read_4(iot, ioh, index);
216 	if (BYTE_ORDER != sc->sc_bus_endian)
217 		val = bswap32(val);
218 
219 	DPRINTFR("read_4", "%08x", val, index, 4);
220 	DPRINTFR2("read_4", "%08x",
221 	    bus_space_read_stream_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh,
222 		index),
223 	    bus_space_read_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index));
224 	return val;
225 }
226 
227 /*
228  * The Virtio spec explicitly tells that reading and writing 8 bytes are not
229  * considered atomic and no triggers may be connected to reading or writing
230  * it. We access it using two 32 reads. See virtio spec 4.1.3.1.
231  */
232 uint64_t
233 virtio_read_device_config_8(struct virtio_softc *sc, int index)
234 {
235 	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
236 	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
237 	union {
238 		uint64_t u64;
239 		uint32_t l[2];
240 	} v;
241 	uint64_t val;
242 
243 	v.l[0] = bus_space_read_4(iot, ioh, index);
244 	v.l[1] = bus_space_read_4(iot, ioh, index + 4);
245 	if (sc->sc_bus_endian != sc->sc_struct_endian) {
246 		v.l[0] = bswap32(v.l[0]);
247 		v.l[1] = bswap32(v.l[1]);
248 	}
249 	val = v.u64;
250 
251 	if (BYTE_ORDER != sc->sc_struct_endian)
252 		val = bswap64(val);
253 
254 	DPRINTFR("read_8", "%08"PRIx64, val, index, 8);
255 	DPRINTFR2("read_8 low ", "%08x",
256 	    bus_space_read_stream_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh,
257 		index),
258 	    bus_space_read_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index));
259 	DPRINTFR2("read_8 high ", "%08x",
260 	    bus_space_read_stream_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh,
261 		index + 4),
262 	    bus_space_read_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, index + 4));
263 	return val;
264 }
265 
266 /*
267  * In the older virtio spec, device config registers are host endian. On newer
268  * they are little endian. Some newer devices however explicitly specify their
269  * register to always be little endian. These functions cater for these.
270  */
271 uint16_t
272 virtio_read_device_config_le_2(struct virtio_softc *sc, int index)
273 {
274 	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
275 	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
276 	uint16_t val;
277 
278 	val = bus_space_read_2(iot, ioh, index);
279 #if !defined(__aarch64__) && !defined(__arm__)
280 	/*
281 	 * For big-endian aarch64/armv7, bus endian is always LSB, but
282 	 * byte-order is automatically swapped by bus_space(9) (see also
283 	 * comments in virtio_pci.c). Therefore, no need to swap here.
284 	 */
285 	if (sc->sc_bus_endian != LITTLE_ENDIAN)
286 		val = bswap16(val);
287 #endif
288 
289 	DPRINTFR("read_le_2", "%04x", val, index, 2);
290 	DPRINTFR2("read_le_2", "%04x",
291 	    bus_space_read_stream_2(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, 0),
292 	    bus_space_read_2(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, 0));
293 	return val;
294 }
295 
296 uint32_t
297 virtio_read_device_config_le_4(struct virtio_softc *sc, int index)
298 {
299 	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
300 	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
301 	uint32_t val;
302 
303 	val = bus_space_read_4(iot, ioh, index);
304 #if !defined(__aarch64__) && !defined(__arm__)
305 	/* See virtio_read_device_config_le_2() above. */
306 	if (sc->sc_bus_endian != LITTLE_ENDIAN)
307 		val = bswap32(val);
308 #endif
309 
310 	DPRINTFR("read_le_4", "%08x", val, index, 4);
311 	DPRINTFR2("read_le_4", "%08x",
312 	    bus_space_read_stream_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, 0),
313 	    bus_space_read_4(sc->sc_devcfg_iot, sc->sc_devcfg_ioh, 0));
314 	return val;
315 }
316 
317 void
318 virtio_write_device_config_1(struct virtio_softc *sc, int index, uint8_t value)
319 {
320 	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
321 	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
322 
323 	bus_space_write_1(iot, ioh, index, value);
324 }
325 
326 void
327 virtio_write_device_config_2(struct virtio_softc *sc, int index,
328     uint16_t value)
329 {
330 	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
331 	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
332 
333 	if (BYTE_ORDER != sc->sc_bus_endian)
334 		value = bswap16(value);
335 	bus_space_write_2(iot, ioh, index, value);
336 }
337 
338 void
339 virtio_write_device_config_4(struct virtio_softc *sc, int index,
340     uint32_t value)
341 {
342 	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
343 	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
344 
345 	if (BYTE_ORDER != sc->sc_bus_endian)
346 		value = bswap32(value);
347 	bus_space_write_4(iot, ioh, index, value);
348 }
349 
350 /*
351  * The Virtio spec explicitly tells that reading and writing 8 bytes are not
352  * considered atomic and no triggers may be connected to reading or writing
353  * it. We access it using two 32 bit writes. For good measure it is stated to
354  * always write lsb first just in case of a hypervisor bug. See See virtio
355  * spec 4.1.3.1.
356  */
357 void
358 virtio_write_device_config_8(struct virtio_softc *sc, int index,
359     uint64_t value)
360 {
361 	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
362 	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
363 	union {
364 		uint64_t u64;
365 		uint32_t l[2];
366 	} v;
367 
368 	if (BYTE_ORDER != sc->sc_struct_endian)
369 		value = bswap64(value);
370 
371 	v.u64 = value;
372 	if (sc->sc_bus_endian != sc->sc_struct_endian) {
373 		v.l[0] = bswap32(v.l[0]);
374 		v.l[1] = bswap32(v.l[1]);
375 	}
376 
377 	if (sc->sc_struct_endian == LITTLE_ENDIAN) {
378 		bus_space_write_4(iot, ioh, index,     v.l[0]);
379 		bus_space_write_4(iot, ioh, index + 4, v.l[1]);
380 	} else {
381 		bus_space_write_4(iot, ioh, index + 4, v.l[1]);
382 		bus_space_write_4(iot, ioh, index,     v.l[0]);
383 	}
384 }
385 
386 /*
387  * In the older virtio spec, device config registers are host endian. On newer
388  * they are little endian. Some newer devices however explicitly specify their
389  * register to always be little endian. These functions cater for these.
390  */
391 void
392 virtio_write_device_config_le_2(struct virtio_softc *sc, int index,
393     uint16_t value)
394 {
395 	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
396 	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
397 
398 	if (sc->sc_bus_endian != LITTLE_ENDIAN)
399 		value = bswap16(value);
400 	bus_space_write_2(iot, ioh, index, value);
401 }
402 
403 void
404 virtio_write_device_config_le_4(struct virtio_softc *sc, int index,
405     uint32_t value)
406 {
407 	bus_space_tag_t	   iot = sc->sc_devcfg_iot;
408 	bus_space_handle_t ioh = sc->sc_devcfg_ioh;
409 
410 	if (sc->sc_bus_endian != LITTLE_ENDIAN)
411 		value = bswap32(value);
412 	bus_space_write_4(iot, ioh, index, value);
413 }
414 
415 
416 /*
417  * data structures endian helpers
418  */
419 uint16_t
420 virtio_rw16(struct virtio_softc *sc, uint16_t val)
421 {
422 	KASSERT(sc);
423 	return BYTE_ORDER != sc->sc_struct_endian ? bswap16(val) : val;
424 }
425 
426 uint32_t
427 virtio_rw32(struct virtio_softc *sc, uint32_t val)
428 {
429 	KASSERT(sc);
430 	return BYTE_ORDER != sc->sc_struct_endian ? bswap32(val) : val;
431 }
432 
433 uint64_t
434 virtio_rw64(struct virtio_softc *sc, uint64_t val)
435 {
436 	KASSERT(sc);
437 	return BYTE_ORDER != sc->sc_struct_endian ? bswap64(val) : val;
438 }
439 
440 
441 /*
442  * Interrupt handler.
443  */
444 static void
445 virtio_soft_intr(void *arg)
446 {
447 	struct virtio_softc *sc = arg;
448 
449 	KASSERT(sc->sc_intrhand != NULL);
450 
451 	(*sc->sc_intrhand)(sc);
452 }
453 
454 /* set to vq->vq_intrhand in virtio_init_vq_vqdone() */
455 static int
456 virtio_vq_done(void *xvq)
457 {
458 	struct virtqueue *vq = xvq;
459 
460 	return vq->vq_done(vq);
461 }
462 
463 static int
464 virtio_vq_intr(struct virtio_softc *sc)
465 {
466 	struct virtqueue *vq;
467 	int i, r = 0;
468 
469 	for (i = 0; i < sc->sc_nvqs; i++) {
470 		vq = &sc->sc_vqs[i];
471 		if (virtio_vq_is_enqueued(sc, vq) == 1) {
472 			r |= (*vq->vq_intrhand)(vq->vq_intrhand_arg);
473 		}
474 	}
475 
476 	return r;
477 }
478 
479 /*
480  * dmamap sync operations for a virtqueue.
481  */
482 static inline void
483 vq_sync_descs(struct virtio_softc *sc, struct virtqueue *vq, int ops)
484 {
485 
486 	/* availoffset == sizeof(vring_desc) * vq_num */
487 	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap, 0, vq->vq_availoffset,
488 	    ops);
489 }
490 
491 static inline void
492 vq_sync_aring_all(struct virtio_softc *sc, struct virtqueue *vq, int ops)
493 {
494 	uint16_t hdrlen = offsetof(struct vring_avail, ring);
495 	size_t payloadlen = vq->vq_num * sizeof(uint16_t);
496 	size_t usedlen = 0;
497 
498 	if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX)
499 		usedlen = sizeof(uint16_t);
500 	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
501 	    vq->vq_availoffset, hdrlen + payloadlen + usedlen, ops);
502 }
503 
504 static inline void
505 vq_sync_aring_header(struct virtio_softc *sc, struct virtqueue *vq, int ops)
506 {
507 	uint16_t hdrlen = offsetof(struct vring_avail, ring);
508 
509 	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
510 	    vq->vq_availoffset, hdrlen, ops);
511 }
512 
513 static inline void
514 vq_sync_aring_payload(struct virtio_softc *sc, struct virtqueue *vq, int ops)
515 {
516 	uint16_t hdrlen = offsetof(struct vring_avail, ring);
517 	size_t payloadlen = vq->vq_num * sizeof(uint16_t);
518 
519 	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
520 	    vq->vq_availoffset + hdrlen, payloadlen, ops);
521 }
522 
523 static inline void
524 vq_sync_aring_used(struct virtio_softc *sc, struct virtqueue *vq, int ops)
525 {
526 	uint16_t hdrlen = offsetof(struct vring_avail, ring);
527 	size_t payloadlen = vq->vq_num * sizeof(uint16_t);
528 	size_t usedlen = sizeof(uint16_t);
529 
530 	if ((sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) == 0)
531 		return;
532 	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
533 	    vq->vq_availoffset + hdrlen + payloadlen, usedlen, ops);
534 }
535 
536 static inline void
537 vq_sync_uring_all(struct virtio_softc *sc, struct virtqueue *vq, int ops)
538 {
539 	uint16_t hdrlen = offsetof(struct vring_used, ring);
540 	size_t payloadlen = vq->vq_num * sizeof(struct vring_used_elem);
541 	size_t availlen = 0;
542 
543 	if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX)
544 		availlen = sizeof(uint16_t);
545 	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
546 	    vq->vq_usedoffset, hdrlen + payloadlen + availlen, ops);
547 }
548 
549 static inline void
550 vq_sync_uring_header(struct virtio_softc *sc, struct virtqueue *vq, int ops)
551 {
552 	uint16_t hdrlen = offsetof(struct vring_used, ring);
553 
554 	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
555 	    vq->vq_usedoffset, hdrlen, ops);
556 }
557 
558 static inline void
559 vq_sync_uring_payload(struct virtio_softc *sc, struct virtqueue *vq, int ops)
560 {
561 	uint16_t hdrlen = offsetof(struct vring_used, ring);
562 	size_t payloadlen = vq->vq_num * sizeof(struct vring_used_elem);
563 
564 	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
565 	    vq->vq_usedoffset + hdrlen, payloadlen, ops);
566 }
567 
568 static inline void
569 vq_sync_uring_avail(struct virtio_softc *sc, struct virtqueue *vq, int ops)
570 {
571 	uint16_t hdrlen = offsetof(struct vring_used, ring);
572 	size_t payloadlen = vq->vq_num * sizeof(struct vring_used_elem);
573 	size_t availlen = sizeof(uint16_t);
574 
575 	if ((sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) == 0)
576 		return;
577 	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
578 	    vq->vq_usedoffset + hdrlen + payloadlen, availlen, ops);
579 }
580 
581 static inline void
582 vq_sync_indirect(struct virtio_softc *sc, struct virtqueue *vq, int slot,
583     int ops)
584 {
585 	int offset = vq->vq_indirectoffset +
586 	    sizeof(struct vring_desc) * vq->vq_maxnsegs * slot;
587 
588 	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap,
589 	    offset, sizeof(struct vring_desc) * vq->vq_maxnsegs, ops);
590 }
591 
592 bool
593 virtio_vq_is_enqueued(struct virtio_softc *sc, struct virtqueue *vq)
594 {
595 
596 	if (vq->vq_queued) {
597 		vq->vq_queued = 0;
598 		vq_sync_aring_all(sc, vq, BUS_DMASYNC_POSTWRITE);
599 	}
600 
601 	vq_sync_uring_header(sc, vq, BUS_DMASYNC_POSTREAD);
602 	if (vq->vq_used_idx == virtio_rw16(sc, vq->vq_used->idx))
603 		return 0;
604 	vq_sync_uring_payload(sc, vq, BUS_DMASYNC_POSTREAD);
605 	return 1;
606 }
607 
608 /*
609  * Increase the event index in order to delay interrupts.
610  */
611 int
612 virtio_postpone_intr(struct virtio_softc *sc, struct virtqueue *vq,
613     uint16_t nslots)
614 {
615 	uint16_t	idx, nused;
616 
617 	idx = vq->vq_used_idx + nslots;
618 
619 	/* set the new event index: avail_ring->used_event = idx */
620 	*vq->vq_used_event = virtio_rw16(sc, idx);
621 	vq_sync_aring_used(vq->vq_owner, vq, BUS_DMASYNC_PREWRITE);
622 	vq->vq_queued++;
623 
624 	nused = (uint16_t)
625 	    (virtio_rw16(sc, vq->vq_used->idx) - vq->vq_used_idx);
626 	KASSERT(nused <= vq->vq_num);
627 
628 	return nslots < nused;
629 }
630 
631 /*
632  * Postpone interrupt until 3/4 of the available descriptors have been
633  * consumed.
634  */
635 int
636 virtio_postpone_intr_smart(struct virtio_softc *sc, struct virtqueue *vq)
637 {
638 	uint16_t	nslots;
639 
640 	nslots = (uint16_t)
641 	    (virtio_rw16(sc, vq->vq_avail->idx) - vq->vq_used_idx) * 3 / 4;
642 
643 	return virtio_postpone_intr(sc, vq, nslots);
644 }
645 
646 /*
647  * Postpone interrupt until all of the available descriptors have been
648  * consumed.
649  */
650 int
651 virtio_postpone_intr_far(struct virtio_softc *sc, struct virtqueue *vq)
652 {
653 	uint16_t	nslots;
654 
655 	nslots = (uint16_t)
656 	    (virtio_rw16(sc, vq->vq_avail->idx) - vq->vq_used_idx);
657 
658 	return virtio_postpone_intr(sc, vq, nslots);
659 }
660 
661 /*
662  * Start/stop vq interrupt.  No guarantee.
663  */
664 void
665 virtio_stop_vq_intr(struct virtio_softc *sc, struct virtqueue *vq)
666 {
667 
668 	if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) {
669 		/*
670 		 * No way to disable the interrupt completely with
671 		 * RingEventIdx. Instead advance used_event by half the
672 		 * possible value. This won't happen soon and is far enough in
673 		 * the past to not trigger a spurious interrupt.
674 		 */
675 		*vq->vq_used_event = virtio_rw16(sc, vq->vq_used_idx + 0x8000);
676 		vq_sync_aring_used(sc, vq, BUS_DMASYNC_PREWRITE);
677 	} else {
678 		vq->vq_avail->flags |=
679 		    virtio_rw16(sc, VRING_AVAIL_F_NO_INTERRUPT);
680 		vq_sync_aring_header(sc, vq, BUS_DMASYNC_PREWRITE);
681 	}
682 	vq->vq_queued++;
683 }
684 
685 int
686 virtio_start_vq_intr(struct virtio_softc *sc, struct virtqueue *vq)
687 {
688 
689 	if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) {
690 		/*
691 		 * If event index feature is negotiated, enabling interrupts
692 		 * is done through setting the latest consumed index in the
693 		 * used_event field
694 		 */
695 		*vq->vq_used_event = virtio_rw16(sc, vq->vq_used_idx);
696 		vq_sync_aring_used(sc, vq, BUS_DMASYNC_PREWRITE);
697 	} else {
698 		vq->vq_avail->flags &=
699 		    ~virtio_rw16(sc, VRING_AVAIL_F_NO_INTERRUPT);
700 		vq_sync_aring_header(sc, vq, BUS_DMASYNC_PREWRITE);
701 	}
702 	vq->vq_queued++;
703 
704 	vq_sync_uring_header(sc, vq, BUS_DMASYNC_POSTREAD);
705 	if (vq->vq_used_idx == virtio_rw16(sc, vq->vq_used->idx))
706 		return 0;
707 	vq_sync_uring_payload(sc, vq, BUS_DMASYNC_POSTREAD);
708 	return 1;
709 }
710 
711 /*
712  * Initialize vq structure.
713  */
714 /*
715  * Reset virtqueue parameters
716  */
717 static void
718 virtio_reset_vq(struct virtio_softc *sc, struct virtqueue *vq)
719 {
720 	struct vring_desc *vds;
721 	int i, j;
722 	int vq_size = vq->vq_num;
723 
724 	memset(vq->vq_vaddr, 0, vq->vq_bytesize);
725 
726 	/* build the descriptor chain for free slot management */
727 	vds = vq->vq_desc;
728 	for (i = 0; i < vq_size - 1; i++) {
729 		vds[i].next = virtio_rw16(sc, i + 1);
730 	}
731 	vds[i].next = virtio_rw16(sc, VRING_DESC_CHAIN_END);
732 	vq->vq_free_idx = 0;
733 
734 	/* build the indirect descriptor chain */
735 	if (vq->vq_indirect != NULL) {
736 		struct vring_desc *vd;
737 
738 		for (i = 0; i < vq_size; i++) {
739 			vd = vq->vq_indirect;
740 			vd += vq->vq_maxnsegs * i;
741 			for (j = 0; j < vq->vq_maxnsegs - 1; j++) {
742 				vd[j].next = virtio_rw16(sc, j + 1);
743 			}
744 		}
745 	}
746 
747 	/* enqueue/dequeue status */
748 	vq->vq_avail_idx = 0;
749 	vq->vq_used_idx = 0;
750 	vq->vq_queued = 0;
751 	vq_sync_uring_all(sc, vq, BUS_DMASYNC_PREREAD);
752 	vq->vq_queued++;
753 }
754 
755 /* Initialize vq */
756 void
757 virtio_init_vq_vqdone(struct virtio_softc *sc, struct virtqueue *vq,
758     int index, int (*vq_done)(struct virtqueue *))
759 {
760 
761 	virtio_init_vq(sc, vq, index, virtio_vq_done, vq);
762 	vq->vq_done = vq_done;
763 }
764 
765 void
766 virtio_init_vq(struct virtio_softc *sc, struct virtqueue *vq, int index,
767    int (*func)(void *), void *arg)
768 {
769 
770 	memset(vq, 0, sizeof(*vq));
771 
772 	vq->vq_owner = sc;
773 	vq->vq_num = sc->sc_ops->read_queue_size(sc, index);
774 	vq->vq_index = index;
775 	vq->vq_intrhand = func;
776 	vq->vq_intrhand_arg = arg;
777 }
778 
779 /*
780  * Allocate/free a vq.
781  */
782 int
783 virtio_alloc_vq(struct virtio_softc *sc, struct virtqueue *vq,
784     int maxsegsize, int maxnsegs, const char *name)
785 {
786 	bus_size_t size_desc, size_avail, size_used, size_indirect;
787 	bus_size_t allocsize = 0, size_desc_avail;
788 	int rsegs, r, hdrlen;
789 	unsigned int vq_num;
790 #define VIRTQUEUE_ALIGN(n)	roundup(n, VIRTIO_PAGE_SIZE)
791 
792 	vq_num = vq->vq_num;
793 
794 	if (vq_num == 0) {
795 		aprint_error_dev(sc->sc_dev,
796 		    "virtqueue not exist, index %d for %s\n",
797 		    vq->vq_index, name);
798 		goto err;
799 	}
800 
801 	hdrlen = sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX ? 3 : 2;
802 
803 	size_desc = sizeof(vq->vq_desc[0]) * vq_num;
804 	size_avail = sizeof(uint16_t) * hdrlen
805 	    + sizeof(vq->vq_avail[0].ring[0]) * vq_num;
806 	size_used = sizeof(uint16_t) *hdrlen
807 	    + sizeof(vq->vq_used[0].ring[0]) * vq_num;
808 	size_indirect = (sc->sc_indirect && maxnsegs >= MINSEG_INDIRECT) ?
809 	    sizeof(struct vring_desc) * maxnsegs * vq_num : 0;
810 
811 	size_desc_avail = VIRTQUEUE_ALIGN(size_desc + size_avail);
812 	size_used = VIRTQUEUE_ALIGN(size_used);
813 
814 	allocsize = size_desc_avail + size_used + size_indirect;
815 
816 	/* alloc and map the memory */
817 	r = bus_dmamem_alloc(sc->sc_dmat, allocsize, VIRTIO_PAGE_SIZE, 0,
818 	    &vq->vq_segs[0], 1, &rsegs, BUS_DMA_WAITOK);
819 	if (r != 0) {
820 		aprint_error_dev(sc->sc_dev,
821 		    "virtqueue %d for %s allocation failed, "
822 		    "error code %d\n", vq->vq_index, name, r);
823 		goto err;
824 	}
825 
826 	r = bus_dmamem_map(sc->sc_dmat, &vq->vq_segs[0], rsegs, allocsize,
827 	    &vq->vq_vaddr, BUS_DMA_WAITOK);
828 	if (r != 0) {
829 		aprint_error_dev(sc->sc_dev,
830 		    "virtqueue %d for %s map failed, "
831 		    "error code %d\n", vq->vq_index, name, r);
832 		goto err;
833 	}
834 
835 	r = bus_dmamap_create(sc->sc_dmat, allocsize, 1, allocsize, 0,
836 	    BUS_DMA_WAITOK, &vq->vq_dmamap);
837 	if (r != 0) {
838 		aprint_error_dev(sc->sc_dev,
839 		    "virtqueue %d for %s dmamap creation failed, "
840 		    "error code %d\n", vq->vq_index, name, r);
841 		goto err;
842 	}
843 
844 	r = bus_dmamap_load(sc->sc_dmat, vq->vq_dmamap,
845 	    vq->vq_vaddr, allocsize, NULL, BUS_DMA_WAITOK);
846 	if (r != 0) {
847 		aprint_error_dev(sc->sc_dev,
848 		    "virtqueue %d for %s dmamap load failed, "
849 		    "error code %d\n", vq->vq_index, name, r);
850 		goto err;
851 	}
852 
853 	vq->vq_bytesize = allocsize;
854 	vq->vq_maxsegsize = maxsegsize;
855 	vq->vq_maxnsegs = maxnsegs;
856 
857 #define VIRTIO_PTR(base, offset)	(void *)((intptr_t)(base) + (offset))
858 	/* initialize vring pointers */
859 	vq->vq_desc = VIRTIO_PTR(vq->vq_vaddr, 0);
860 	vq->vq_availoffset = size_desc;
861 	vq->vq_avail = VIRTIO_PTR(vq->vq_vaddr, vq->vq_availoffset);
862 	vq->vq_used_event = VIRTIO_PTR(vq->vq_avail,
863 	    offsetof(struct vring_avail, ring[vq_num]));
864 	vq->vq_usedoffset = size_desc_avail;
865 	vq->vq_used = VIRTIO_PTR(vq->vq_vaddr, vq->vq_usedoffset);
866 	vq->vq_avail_event = VIRTIO_PTR(vq->vq_used,
867 	    offsetof(struct vring_used, ring[vq_num]));
868 
869 	if (size_indirect > 0) {
870 		vq->vq_indirectoffset = size_desc_avail + size_used;
871 		vq->vq_indirect = VIRTIO_PTR(vq->vq_vaddr,
872 		    vq->vq_indirectoffset);
873 	}
874 #undef VIRTIO_PTR
875 
876 	vq->vq_descx = kmem_zalloc(sizeof(vq->vq_descx[0]) * vq_num,
877 	    KM_SLEEP);
878 
879 	mutex_init(&vq->vq_freedesc_lock, MUTEX_SPIN, sc->sc_ipl);
880 	mutex_init(&vq->vq_aring_lock, MUTEX_SPIN, sc->sc_ipl);
881 	mutex_init(&vq->vq_uring_lock, MUTEX_SPIN, sc->sc_ipl);
882 
883 	virtio_reset_vq(sc, vq);
884 
885 	aprint_verbose_dev(sc->sc_dev,
886 	    "allocated %" PRIuBUSSIZE " byte for virtqueue %d for %s, "
887 	    "size %d\n", allocsize, vq->vq_index, name, vq_num);
888 	if (size_indirect > 0)
889 		aprint_verbose_dev(sc->sc_dev,
890 		    "using %" PRIuBUSSIZE " byte (%d entries) indirect "
891 		    "descriptors\n", size_indirect, maxnsegs * vq_num);
892 
893 	return 0;
894 
895 err:
896 	sc->sc_ops->setup_queue(sc, vq->vq_index, 0);
897 	if (vq->vq_dmamap)
898 		bus_dmamap_destroy(sc->sc_dmat, vq->vq_dmamap);
899 	if (vq->vq_vaddr)
900 		bus_dmamem_unmap(sc->sc_dmat, vq->vq_vaddr, allocsize);
901 	if (vq->vq_segs[0].ds_addr)
902 		bus_dmamem_free(sc->sc_dmat, &vq->vq_segs[0], 1);
903 	memset(vq, 0, sizeof(*vq));
904 
905 	return -1;
906 }
907 
908 int
909 virtio_free_vq(struct virtio_softc *sc, struct virtqueue *vq)
910 {
911 	uint16_t s;
912 	size_t i;
913 
914 	if (vq->vq_vaddr == NULL)
915 		return 0;
916 
917 	/* device must be already deactivated */
918 	/* confirm the vq is empty */
919 	s = vq->vq_free_idx;
920 	i = 0;
921 	while (s != virtio_rw16(sc, VRING_DESC_CHAIN_END)) {
922 		s = vq->vq_desc[s].next;
923 		i++;
924 	}
925 	if (i != vq->vq_num) {
926 		printf("%s: freeing non-empty vq, index %d\n",
927 		    device_xname(sc->sc_dev), vq->vq_index);
928 		return EBUSY;
929 	}
930 
931 	/* tell device that there's no virtqueue any longer */
932 	sc->sc_ops->setup_queue(sc, vq->vq_index, 0);
933 
934 	vq_sync_aring_all(sc, vq, BUS_DMASYNC_POSTWRITE);
935 
936 	kmem_free(vq->vq_descx, sizeof(vq->vq_descx[0]) * vq->vq_num);
937 	bus_dmamap_unload(sc->sc_dmat, vq->vq_dmamap);
938 	bus_dmamap_destroy(sc->sc_dmat, vq->vq_dmamap);
939 	bus_dmamem_unmap(sc->sc_dmat, vq->vq_vaddr, vq->vq_bytesize);
940 	bus_dmamem_free(sc->sc_dmat, &vq->vq_segs[0], 1);
941 	mutex_destroy(&vq->vq_freedesc_lock);
942 	mutex_destroy(&vq->vq_uring_lock);
943 	mutex_destroy(&vq->vq_aring_lock);
944 	memset(vq, 0, sizeof(*vq));
945 
946 	return 0;
947 }
948 
949 /*
950  * Free descriptor management.
951  */
952 static int
953 vq_alloc_slot_locked(struct virtio_softc *sc, struct virtqueue *vq,
954     size_t nslots)
955 {
956 	struct vring_desc *vd;
957 	uint16_t head, tail;
958 	size_t i;
959 
960 	KASSERT(mutex_owned(&vq->vq_freedesc_lock));
961 
962 	head = tail = virtio_rw16(sc, vq->vq_free_idx);
963 	for (i = 0; i < nslots - 1; i++) {
964 		if (tail == VRING_DESC_CHAIN_END)
965 			return VRING_DESC_CHAIN_END;
966 
967 		vd = &vq->vq_desc[tail];
968 		vd->flags = virtio_rw16(sc, VRING_DESC_F_NEXT);
969 		tail = virtio_rw16(sc, vd->next);
970 	}
971 
972 	if (tail == VRING_DESC_CHAIN_END)
973 		return VRING_DESC_CHAIN_END;
974 
975 	vd = &vq->vq_desc[tail];
976 	vd->flags = virtio_rw16(sc, 0);
977 	vq->vq_free_idx = vd->next;
978 
979 	return head;
980 }
981 static uint16_t
982 vq_alloc_slot(struct virtio_softc *sc, struct virtqueue *vq, size_t nslots)
983 {
984 	uint16_t rv;
985 
986 	mutex_enter(&vq->vq_freedesc_lock);
987 	rv = vq_alloc_slot_locked(sc, vq, nslots);
988 	mutex_exit(&vq->vq_freedesc_lock);
989 
990 	return rv;
991 }
992 
993 static void
994 vq_free_slot(struct virtio_softc *sc, struct virtqueue *vq, uint16_t slot)
995 {
996 	struct vring_desc *vd;
997 	uint16_t s;
998 
999 	mutex_enter(&vq->vq_freedesc_lock);
1000 	vd = &vq->vq_desc[slot];
1001 	while ((vd->flags & virtio_rw16(sc, VRING_DESC_F_NEXT)) != 0) {
1002 		s = virtio_rw16(sc, vd->next);
1003 		vd = &vq->vq_desc[s];
1004 	}
1005 	vd->next = vq->vq_free_idx;
1006 	vq->vq_free_idx = virtio_rw16(sc, slot);
1007 	mutex_exit(&vq->vq_freedesc_lock);
1008 }
1009 
1010 /*
1011  * Enqueue several dmamaps as a single request.
1012  */
1013 /*
1014  * Typical usage:
1015  *  <queue size> number of followings are stored in arrays
1016  *  - command blocks (in dmamem) should be pre-allocated and mapped
1017  *  - dmamaps for command blocks should be pre-allocated and loaded
1018  *  - dmamaps for payload should be pre-allocated
1019  *      r = virtio_enqueue_prep(sc, vq, &slot);		// allocate a slot
1020  *	if (r)		// currently 0 or EAGAIN
1021  *		return r;
1022  *	r = bus_dmamap_load(dmat, dmamap_payload[slot], data, count, ..);
1023  *	if (r) {
1024  *		virtio_enqueue_abort(sc, vq, slot);
1025  *		return r;
1026  *	}
1027  *	r = virtio_enqueue_reserve(sc, vq, slot,
1028  *	    dmamap_payload[slot]->dm_nsegs + 1);
1029  *							// ^ +1 for command
1030  *	if (r) {	// currently 0 or EAGAIN
1031  *		bus_dmamap_unload(dmat, dmamap_payload[slot]);
1032  *		return r;				// do not call abort()
1033  *	}
1034  *	<setup and prepare commands>
1035  *	bus_dmamap_sync(dmat, dmamap_cmd[slot],... BUS_DMASYNC_PREWRITE);
1036  *	bus_dmamap_sync(dmat, dmamap_payload[slot],...);
1037  *	virtio_enqueue(sc, vq, slot, dmamap_cmd[slot], false);
1038  *	virtio_enqueue(sc, vq, slot, dmamap_payload[slot], iswrite);
1039  *	virtio_enqueue_commit(sc, vq, slot, true);
1040  */
1041 
1042 /*
1043  * enqueue_prep: allocate a slot number
1044  */
1045 int
1046 virtio_enqueue_prep(struct virtio_softc *sc, struct virtqueue *vq, int *slotp)
1047 {
1048 	uint16_t slot;
1049 
1050 	KASSERT(slotp != NULL);
1051 
1052 	slot = vq_alloc_slot(sc, vq, 1);
1053 	if (slot == VRING_DESC_CHAIN_END)
1054 		return EAGAIN;
1055 
1056 	*slotp = slot;
1057 
1058 	return 0;
1059 }
1060 
1061 /*
1062  * enqueue_reserve: allocate remaining slots and build the descriptor chain.
1063  */
1064 int
1065 virtio_enqueue_reserve(struct virtio_softc *sc, struct virtqueue *vq,
1066     int slot, int nsegs)
1067 {
1068 	struct vring_desc *vd;
1069 	struct vring_desc_extra *vdx;
1070 	int i;
1071 
1072 	KASSERT(1 <= nsegs);
1073 	KASSERT(nsegs <= vq->vq_num);
1074 
1075 	vdx = &vq->vq_descx[slot];
1076 	vd = &vq->vq_desc[slot];
1077 
1078 	KASSERT((vd->flags & virtio_rw16(sc, VRING_DESC_F_NEXT)) == 0);
1079 
1080 	if ((vq->vq_indirect != NULL) &&
1081 	    (nsegs >= MINSEG_INDIRECT) &&
1082 	    (nsegs <= vq->vq_maxnsegs))
1083 		vdx->use_indirect = true;
1084 	else
1085 		vdx->use_indirect = false;
1086 
1087 	if (vdx->use_indirect) {
1088 		uint64_t addr;
1089 
1090 		addr = vq->vq_dmamap->dm_segs[0].ds_addr
1091 		    + vq->vq_indirectoffset;
1092 		addr += sizeof(struct vring_desc)
1093 		    * vq->vq_maxnsegs * slot;
1094 
1095 		vd->addr  = virtio_rw64(sc, addr);
1096 		vd->len   = virtio_rw32(sc, sizeof(struct vring_desc) * nsegs);
1097 		vd->flags = virtio_rw16(sc, VRING_DESC_F_INDIRECT);
1098 
1099 		vd = &vq->vq_indirect[vq->vq_maxnsegs * slot];
1100 		vdx->desc_base = vd;
1101 		vdx->desc_free_idx = 0;
1102 
1103 		for (i = 0; i < nsegs - 1; i++) {
1104 			vd[i].flags = virtio_rw16(sc, VRING_DESC_F_NEXT);
1105 		}
1106 		vd[i].flags  = virtio_rw16(sc, 0);
1107 	} else {
1108 		if (nsegs > 1) {
1109 			uint16_t s;
1110 
1111 			s = vq_alloc_slot(sc, vq, nsegs - 1);
1112 			if (s == VRING_DESC_CHAIN_END) {
1113 				vq_free_slot(sc, vq, slot);
1114 				return EAGAIN;
1115 			}
1116 			vd->next = virtio_rw16(sc, s);
1117 			vd->flags = virtio_rw16(sc, VRING_DESC_F_NEXT);
1118 		}
1119 
1120 		vdx->desc_base = &vq->vq_desc[0];
1121 		vdx->desc_free_idx = slot;
1122 	}
1123 
1124 	return 0;
1125 }
1126 
1127 /*
1128  * enqueue: enqueue a single dmamap.
1129  */
1130 int
1131 virtio_enqueue(struct virtio_softc *sc, struct virtqueue *vq, int slot,
1132     bus_dmamap_t dmamap, bool write)
1133 {
1134 	struct vring_desc *vds;
1135 	struct vring_desc_extra *vdx;
1136 	uint16_t s;
1137 	int i;
1138 
1139 	KASSERT(dmamap->dm_nsegs > 0);
1140 
1141 	vdx = &vq->vq_descx[slot];
1142 	vds = vdx->desc_base;
1143 	s = vdx->desc_free_idx;
1144 
1145 	KASSERT(vds != NULL);
1146 
1147 	for (i = 0; i < dmamap->dm_nsegs; i++) {
1148 		KASSERT(s != VRING_DESC_CHAIN_END);
1149 
1150 		vds[s].addr = virtio_rw64(sc, dmamap->dm_segs[i].ds_addr);
1151 		vds[s].len  = virtio_rw32(sc, dmamap->dm_segs[i].ds_len);
1152 		if (!write)
1153 			vds[s].flags |= virtio_rw16(sc, VRING_DESC_F_WRITE);
1154 
1155 		if ((vds[s].flags & virtio_rw16(sc, VRING_DESC_F_NEXT)) == 0) {
1156 			s = VRING_DESC_CHAIN_END;
1157 		} else {
1158 			s = virtio_rw16(sc, vds[s].next);
1159 		}
1160 	}
1161 
1162 	vdx->desc_free_idx = s;
1163 
1164 	return 0;
1165 }
1166 
1167 int
1168 virtio_enqueue_p(struct virtio_softc *sc, struct virtqueue *vq, int slot,
1169     bus_dmamap_t dmamap, bus_addr_t start, bus_size_t len,
1170     bool write)
1171 {
1172 	struct vring_desc_extra *vdx;
1173 	struct vring_desc *vds;
1174 	uint16_t s;
1175 
1176 	vdx = &vq->vq_descx[slot];
1177 	vds = vdx->desc_base;
1178 	s = vdx->desc_free_idx;
1179 
1180 	KASSERT(s != VRING_DESC_CHAIN_END);
1181 	KASSERT(vds != NULL);
1182 	KASSERT(dmamap->dm_nsegs == 1); /* XXX */
1183 	KASSERT(dmamap->dm_segs[0].ds_len > start);
1184 	KASSERT(dmamap->dm_segs[0].ds_len >= start + len);
1185 
1186 	vds[s].addr = virtio_rw64(sc, dmamap->dm_segs[0].ds_addr + start);
1187 	vds[s].len  = virtio_rw32(sc, len);
1188 	if (!write)
1189 		vds[s].flags |= virtio_rw16(sc, VRING_DESC_F_WRITE);
1190 
1191 	if ((vds[s].flags & virtio_rw16(sc, VRING_DESC_F_NEXT)) == 0) {
1192 		s = VRING_DESC_CHAIN_END;
1193 	} else {
1194 		s = virtio_rw16(sc, vds[s].next);
1195 	}
1196 
1197 	vdx->desc_free_idx = s;
1198 
1199 	return 0;
1200 }
1201 
1202 /*
1203  * enqueue_commit: add it to the aring.
1204  */
1205 int
1206 virtio_enqueue_commit(struct virtio_softc *sc, struct virtqueue *vq, int slot,
1207     bool notifynow)
1208 {
1209 
1210 	if (slot < 0) {
1211 		mutex_enter(&vq->vq_aring_lock);
1212 		goto notify;
1213 	}
1214 
1215 	vq_sync_descs(sc, vq, BUS_DMASYNC_PREWRITE);
1216 	if (vq->vq_descx[slot].use_indirect)
1217 		vq_sync_indirect(sc, vq, slot, BUS_DMASYNC_PREWRITE);
1218 
1219 	mutex_enter(&vq->vq_aring_lock);
1220 	vq->vq_avail->ring[(vq->vq_avail_idx++) % vq->vq_num] =
1221 	    virtio_rw16(sc, slot);
1222 
1223 notify:
1224 	if (notifynow) {
1225 		uint16_t o, n, t;
1226 		uint16_t flags;
1227 
1228 		o = virtio_rw16(sc, vq->vq_avail->idx) - 1;
1229 		n = vq->vq_avail_idx;
1230 
1231 		/*
1232 		 * Prepare for `device->CPU' (host->guest) transfer
1233 		 * into the buffer.  This must happen before we commit
1234 		 * the vq->vq_avail->idx update to ensure we're not
1235 		 * still using the buffer in case program-prior loads
1236 		 * or stores in it get delayed past the store to
1237 		 * vq->vq_avail->idx.
1238 		 */
1239 		vq_sync_uring_all(sc, vq, BUS_DMASYNC_PREREAD);
1240 
1241 		/* ensure payload is published, then avail idx */
1242 		vq_sync_aring_payload(sc, vq, BUS_DMASYNC_PREWRITE);
1243 		vq->vq_avail->idx = virtio_rw16(sc, vq->vq_avail_idx);
1244 		vq_sync_aring_header(sc, vq, BUS_DMASYNC_PREWRITE);
1245 		vq->vq_queued++;
1246 
1247 		if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) {
1248 			vq_sync_uring_avail(sc, vq, BUS_DMASYNC_POSTREAD);
1249 			t = virtio_rw16(sc, *vq->vq_avail_event) + 1;
1250 			if ((uint16_t) (n - t) < (uint16_t) (n - o))
1251 				sc->sc_ops->kick(sc, vq->vq_index);
1252 		} else {
1253 			vq_sync_uring_header(sc, vq, BUS_DMASYNC_POSTREAD);
1254 			flags = virtio_rw16(sc, vq->vq_used->flags);
1255 			if (!(flags & VRING_USED_F_NO_NOTIFY))
1256 				sc->sc_ops->kick(sc, vq->vq_index);
1257 		}
1258 	}
1259 	mutex_exit(&vq->vq_aring_lock);
1260 
1261 	return 0;
1262 }
1263 
1264 /*
1265  * enqueue_abort: rollback.
1266  */
1267 int
1268 virtio_enqueue_abort(struct virtio_softc *sc, struct virtqueue *vq, int slot)
1269 {
1270 	struct vring_desc_extra *vdx;
1271 
1272 	vdx = &vq->vq_descx[slot];
1273 	vdx->desc_free_idx = VRING_DESC_CHAIN_END;
1274 	vdx->desc_base = NULL;
1275 
1276 	vq_free_slot(sc, vq, slot);
1277 
1278 	return 0;
1279 }
1280 
1281 /*
1282  * Dequeue a request.
1283  */
1284 /*
1285  * dequeue: dequeue a request from uring; dmamap_sync for uring is
1286  *	    already done in the interrupt handler.
1287  */
1288 int
1289 virtio_dequeue(struct virtio_softc *sc, struct virtqueue *vq,
1290     int *slotp, int *lenp)
1291 {
1292 	uint16_t slot, usedidx;
1293 
1294 	if (vq->vq_used_idx == virtio_rw16(sc, vq->vq_used->idx))
1295 		return ENOENT;
1296 	mutex_enter(&vq->vq_uring_lock);
1297 	usedidx = vq->vq_used_idx++;
1298 	mutex_exit(&vq->vq_uring_lock);
1299 	usedidx %= vq->vq_num;
1300 	slot = virtio_rw32(sc, vq->vq_used->ring[usedidx].id);
1301 
1302 	if (vq->vq_descx[slot].use_indirect)
1303 		vq_sync_indirect(sc, vq, slot, BUS_DMASYNC_POSTWRITE);
1304 
1305 	if (slotp)
1306 		*slotp = slot;
1307 	if (lenp)
1308 		*lenp = virtio_rw32(sc, vq->vq_used->ring[usedidx].len);
1309 
1310 	return 0;
1311 }
1312 
1313 /*
1314  * dequeue_commit: complete dequeue; the slot is recycled for future use.
1315  *                 if you forget to call this the slot will be leaked.
1316  */
1317 int
1318 virtio_dequeue_commit(struct virtio_softc *sc, struct virtqueue *vq, int slot)
1319 {
1320 	struct vring_desc_extra *vdx;
1321 
1322 	vdx = &vq->vq_descx[slot];
1323 	vdx->desc_base = NULL;
1324 	vdx->desc_free_idx = VRING_DESC_CHAIN_END;
1325 
1326 	vq_free_slot(sc, vq, slot);
1327 
1328 	return 0;
1329 }
1330 
1331 /*
1332  * Attach a child, fill all the members.
1333  */
1334 void
1335 virtio_child_attach_start(struct virtio_softc *sc, device_t child, int ipl,
1336     uint64_t req_features, const char *feat_bits)
1337 {
1338 	char buf[1024];
1339 
1340 	KASSERT(sc->sc_child == NULL);
1341 	KASSERT(sc->sc_child_state == VIRTIO_NO_CHILD);
1342 
1343 	sc->sc_child = child;
1344 	sc->sc_ipl = ipl;
1345 
1346 	virtio_negotiate_features(sc, req_features);
1347 	snprintb(buf, sizeof(buf), feat_bits, sc->sc_active_features);
1348 	aprint_normal(": features: %s\n", buf);
1349 	aprint_naive("\n");
1350 }
1351 
1352 int
1353 virtio_child_attach_finish(struct virtio_softc *sc,
1354     struct virtqueue *vqs, size_t nvqs,
1355     virtio_callback config_change,
1356     int req_flags)
1357 {
1358 	size_t i;
1359 	int r;
1360 
1361 #ifdef DIAGNOSTIC
1362 	KASSERT(nvqs > 0);
1363 #define VIRTIO_ASSERT_FLAGS	(VIRTIO_F_INTR_SOFTINT | VIRTIO_F_INTR_PERVQ)
1364 	KASSERT((req_flags & VIRTIO_ASSERT_FLAGS) != VIRTIO_ASSERT_FLAGS);
1365 #undef VIRTIO_ASSERT_FLAGS
1366 
1367 	for (i = 0; i < nvqs; i++){
1368 		KASSERT(vqs[i].vq_index == i);
1369 		KASSERT(vqs[i].vq_intrhand != NULL);
1370 		KASSERT(vqs[i].vq_done == NULL ||
1371 		    vqs[i].vq_intrhand == virtio_vq_done);
1372 	}
1373 #endif
1374 
1375 
1376 	sc->sc_vqs = vqs;
1377 	sc->sc_nvqs = nvqs;
1378 	sc->sc_config_change = config_change;
1379 	sc->sc_intrhand = virtio_vq_intr;
1380 	sc->sc_flags = req_flags;
1381 
1382 	/* set the vq address */
1383 	for (i = 0; i < nvqs; i++) {
1384 		sc->sc_ops->setup_queue(sc, vqs[i].vq_index,
1385 		    vqs[i].vq_dmamap->dm_segs[0].ds_addr);
1386 	}
1387 
1388 	r = sc->sc_ops->alloc_interrupts(sc);
1389 	if (r != 0) {
1390 		aprint_error_dev(sc->sc_dev,
1391 		    "failed to allocate interrupts\n");
1392 		goto fail;
1393 	}
1394 
1395 	r = sc->sc_ops->setup_interrupts(sc, 0);
1396 	if (r != 0) {
1397 		aprint_error_dev(sc->sc_dev, "failed to setup interrupts\n");
1398 		goto fail;
1399 	}
1400 
1401 	KASSERT(sc->sc_soft_ih == NULL);
1402 	if (sc->sc_flags & VIRTIO_F_INTR_SOFTINT) {
1403 		u_int flags = SOFTINT_NET;
1404 		if (sc->sc_flags & VIRTIO_F_INTR_MPSAFE)
1405 			flags |= SOFTINT_MPSAFE;
1406 
1407 		sc->sc_soft_ih = softint_establish(flags, virtio_soft_intr,
1408 		    sc);
1409 		if (sc->sc_soft_ih == NULL) {
1410 			sc->sc_ops->free_interrupts(sc);
1411 			aprint_error_dev(sc->sc_dev,
1412 			    "failed to establish soft interrupt\n");
1413 			goto fail;
1414 		}
1415 	}
1416 
1417 	sc->sc_child_state = VIRTIO_CHILD_ATTACH_FINISHED;
1418 	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
1419 	return 0;
1420 
1421 fail:
1422 	if (sc->sc_soft_ih) {
1423 		softint_disestablish(sc->sc_soft_ih);
1424 		sc->sc_soft_ih = NULL;
1425 	}
1426 
1427 	sc->sc_ops->free_interrupts(sc);
1428 
1429 	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
1430 	return 1;
1431 }
1432 
1433 void
1434 virtio_child_detach(struct virtio_softc *sc)
1435 {
1436 
1437 	/* already detached */
1438 	if (sc->sc_child == NULL)
1439 		return;
1440 
1441 
1442 	virtio_device_reset(sc);
1443 
1444 	sc->sc_ops->free_interrupts(sc);
1445 
1446 	if (sc->sc_soft_ih) {
1447 		softint_disestablish(sc->sc_soft_ih);
1448 		sc->sc_soft_ih = NULL;
1449 	}
1450 
1451 	sc->sc_vqs = NULL;
1452 	sc->sc_child = NULL;
1453 }
1454 
1455 void
1456 virtio_child_attach_failed(struct virtio_softc *sc)
1457 {
1458 	virtio_child_detach(sc);
1459 
1460 	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
1461 
1462 	sc->sc_child_state = VIRTIO_CHILD_ATTACH_FAILED;
1463 }
1464 
1465 bus_dma_tag_t
1466 virtio_dmat(struct virtio_softc *sc)
1467 {
1468 	return sc->sc_dmat;
1469 }
1470 
1471 device_t
1472 virtio_child(struct virtio_softc *sc)
1473 {
1474 	return sc->sc_child;
1475 }
1476 
1477 int
1478 virtio_intrhand(struct virtio_softc *sc)
1479 {
1480 	return (*sc->sc_intrhand)(sc);
1481 }
1482 
1483 uint64_t
1484 virtio_features(struct virtio_softc *sc)
1485 {
1486 	return sc->sc_active_features;
1487 }
1488 
1489 int
1490 virtio_attach_failed(struct virtio_softc *sc)
1491 {
1492 	device_t self = sc->sc_dev;
1493 
1494 	/* no error if its not connected, but its failed */
1495 	if (sc->sc_childdevid == 0)
1496 		return 1;
1497 
1498 	if (sc->sc_child == NULL) {
1499 		switch (sc->sc_child_state) {
1500 		case VIRTIO_CHILD_ATTACH_FAILED:
1501 			aprint_error_dev(self,
1502 			    "virtio configuration failed\n");
1503 			break;
1504 		case VIRTIO_NO_CHILD:
1505 			aprint_error_dev(self,
1506 			    "no matching child driver; not configured\n");
1507 			break;
1508 		default:
1509 			/* sanity check */
1510 			aprint_error_dev(self,
1511 			    "virtio internal error, "
1512 			    "child driver is not configured\n");
1513 			break;
1514 		}
1515 
1516 		return 1;
1517 	}
1518 
1519 	/* sanity check */
1520 	if (sc->sc_child_state != VIRTIO_CHILD_ATTACH_FINISHED) {
1521 		aprint_error_dev(self, "virtio internal error, child driver "
1522 		    "signaled OK but didn't initialize interrupts\n");
1523 		return 1;
1524 	}
1525 
1526 	return 0;
1527 }
1528 
1529 void
1530 virtio_print_device_type(device_t self, int id, int revision)
1531 {
1532 	aprint_normal_dev(self, "%s device (id %d, rev. 0x%02x)\n",
1533 	    (id < NDEVNAMES ? virtio_device_name[id] : "Unknown"),
1534 	    id,
1535 	    revision);
1536 }
1537 
1538 
1539 MODULE(MODULE_CLASS_DRIVER, virtio, NULL);
1540 
1541 #ifdef _MODULE
1542 #include "ioconf.c"
1543 #endif
1544 
1545 static int
1546 virtio_modcmd(modcmd_t cmd, void *opaque)
1547 {
1548 	int error = 0;
1549 
1550 #ifdef _MODULE
1551 	switch (cmd) {
1552 	case MODULE_CMD_INIT:
1553 		error = config_init_component(cfdriver_ioconf_virtio,
1554 		    cfattach_ioconf_virtio, cfdata_ioconf_virtio);
1555 		break;
1556 	case MODULE_CMD_FINI:
1557 		error = config_fini_component(cfdriver_ioconf_virtio,
1558 		    cfattach_ioconf_virtio, cfdata_ioconf_virtio);
1559 		break;
1560 	default:
1561 		error = ENOTTY;
1562 		break;
1563 	}
1564 #endif
1565 
1566 	return error;
1567 }
1568