xref: /openbsd-src/sys/dev/pv/virtio.c (revision 1ad61ae0a79a724d2d3ec69e69c8e1d1ff6b53a0)
1 /*	$OpenBSD: virtio.c,v 1.23 2023/07/07 10:23:39 patrick Exp $	*/
2 /*	$NetBSD: virtio.c,v 1.3 2011/11/02 23:05:52 njoly Exp $	*/
3 
4 /*
5  * Copyright (c) 2012 Stefan Fritsch, Alexander Fiveg.
6  * Copyright (c) 2010 Minoura Makoto.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/kernel.h>
33 #include <sys/device.h>
34 #include <sys/mutex.h>
35 #include <sys/atomic.h>
36 #include <sys/malloc.h>
37 
38 #include <dev/pv/virtioreg.h>
39 #include <dev/pv/virtiovar.h>
40 
41 #if VIRTIO_DEBUG
42 #define VIRTIO_ASSERT(x)	KASSERT(x)
43 #else
44 #define VIRTIO_ASSERT(x)
45 #endif
46 
47 void		 virtio_init_vq(struct virtio_softc *,
48 				struct virtqueue *);
49 void		 vq_free_entry(struct virtqueue *, struct vq_entry *);
50 struct vq_entry	*vq_alloc_entry(struct virtqueue *);
51 
52 struct cfdriver virtio_cd = {
53 	NULL, "virtio", DV_DULL
54 };
55 
56 static const char * const virtio_device_name[] = {
57 	"Unknown (0)",		/* 0 */
58 	"Network",		/* 1 */
59 	"Block",		/* 2 */
60 	"Console",		/* 3 */
61 	"Entropy",		/* 4 */
62 	"Memory Balloon",	/* 5 */
63 	"IO Memory",		/* 6 */
64 	"Rpmsg",		/* 7 */
65 	"SCSI host",		/* 8 */
66 	"9P Transport",		/* 9 */
67 	"mac80211 wlan",	/* 10 */
68 	NULL,			/* 11 */
69 	NULL,			/* 12 */
70 	NULL,			/* 13 */
71 	NULL,			/* 14 */
72 	NULL,			/* 15 */
73 	"GPU",			/* 16 */
74 };
75 #define NDEVNAMES	(sizeof(virtio_device_name)/sizeof(char*))
76 
77 const char *
78 virtio_device_string(int id)
79 {
80 	return id < NDEVNAMES ? virtio_device_name[id] : "Unknown";
81 }
82 
83 #if VIRTIO_DEBUG
84 static const struct virtio_feature_name transport_feature_names[] = {
85 	{ VIRTIO_F_NOTIFY_ON_EMPTY,	"NotifyOnEmpty"},
86 	{ VIRTIO_F_RING_INDIRECT_DESC,	"RingIndirectDesc"},
87 	{ VIRTIO_F_RING_EVENT_IDX,	"RingEventIdx"},
88 	{ VIRTIO_F_BAD_FEATURE,		"BadFeature"},
89 	{ VIRTIO_F_VERSION_1,		"Version1"},
90 	{ 0,				NULL}
91 };
92 
93 void
94 virtio_log_features(uint64_t host, uint64_t neg,
95     const struct virtio_feature_name *guest_feature_names)
96 {
97 	const struct virtio_feature_name *namep;
98 	int i;
99 	char c;
100 	uint32_t bit;
101 
102 	for (i = 0; i < 64; i++) {
103 		if (i == 30) {
104 			/*
105 			 * VIRTIO_F_BAD_FEATURE is only used for
106 			 * checking correct negotiation
107 			 */
108 			continue;
109 		}
110 		bit = 1 << i;
111 		if ((host&bit) == 0)
112 			continue;
113 		namep = (i < 24 || i > 37) ? guest_feature_names :
114 		    transport_feature_names;
115 		while (namep->bit && namep->bit != bit)
116 			namep++;
117 		c = (neg&bit) ? '+' : '-';
118 		if (namep->name)
119 			printf(" %c%s", c, namep->name);
120 		else
121 			printf(" %cUnknown(%d)", c, i);
122 	}
123 }
124 #endif
125 
126 /*
127  * Reset the device.
128  */
129 /*
130  * To reset the device to a known state, do following:
131  *	virtio_reset(sc);	     // this will stop the device activity
132  *	<dequeue finished requests>; // virtio_dequeue() still can be called
133  *	<revoke pending requests in the vqs if any>;
134  *	virtio_reinit_start(sc);     // dequeue prohibited
135  *	<some other initialization>;
136  *	virtio_reinit_end(sc);	     // device activated; enqueue allowed
137  * Once attached, features are assumed to not change again.
138  */
139 void
140 virtio_reset(struct virtio_softc *sc)
141 {
142 	virtio_device_reset(sc);
143 	sc->sc_active_features = 0;
144 }
145 
146 void
147 virtio_reinit_start(struct virtio_softc *sc)
148 {
149 	int i;
150 
151 	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_ACK);
152 	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER);
153 	virtio_negotiate_features(sc, NULL);
154 	for (i = 0; i < sc->sc_nvqs; i++) {
155 		int n;
156 		struct virtqueue *vq = &sc->sc_vqs[i];
157 		n = virtio_read_queue_size(sc, vq->vq_index);
158 		if (n == 0)	/* vq disappeared */
159 			continue;
160 		if (n != vq->vq_num) {
161 			panic("%s: virtqueue size changed, vq index %d",
162 			    sc->sc_dev.dv_xname, vq->vq_index);
163 		}
164 		virtio_init_vq(sc, vq);
165 		virtio_setup_queue(sc, vq, vq->vq_dmamap->dm_segs[0].ds_addr);
166 	}
167 }
168 
169 void
170 virtio_reinit_end(struct virtio_softc *sc)
171 {
172 	virtio_set_status(sc, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
173 }
174 
175 /*
176  * dmamap sync operations for a virtqueue.
177  */
178 static inline void
179 vq_sync_descs(struct virtio_softc *sc, struct virtqueue *vq, int ops)
180 {
181 	/* availoffset == sizeof(vring_desc)*vq_num */
182 	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap, 0, vq->vq_availoffset,
183 	    ops);
184 }
185 
186 static inline void
187 vq_sync_aring(struct virtio_softc *sc, struct virtqueue *vq, int ops)
188 {
189 	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap, vq->vq_availoffset,
190 	    offsetof(struct vring_avail, ring) + vq->vq_num * sizeof(uint16_t),
191 	    ops);
192 }
193 
194 static inline void
195 vq_sync_uring(struct virtio_softc *sc, struct virtqueue *vq, int ops)
196 {
197 	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap, vq->vq_usedoffset,
198 	    offsetof(struct vring_used, ring) + vq->vq_num *
199 	    sizeof(struct vring_used_elem), ops);
200 }
201 
202 static inline void
203 vq_sync_indirect(struct virtio_softc *sc, struct virtqueue *vq, int slot,
204     int ops)
205 {
206 	int offset = vq->vq_indirectoffset +
207 	    sizeof(struct vring_desc) * vq->vq_maxnsegs * slot;
208 
209 	bus_dmamap_sync(sc->sc_dmat, vq->vq_dmamap, offset,
210 	    sizeof(struct vring_desc) * vq->vq_maxnsegs, ops);
211 }
212 
213 /*
214  * Scan vq, bus_dmamap_sync for the vqs (not for the payload),
215  * and calls (*vq_done)() if some entries are consumed.
216  * For use in transport specific irq handlers.
217  */
218 int
219 virtio_check_vqs(struct virtio_softc *sc)
220 {
221 	int i, r = 0;
222 
223 	/* going backwards is better for if_vio */
224 	for (i = sc->sc_nvqs - 1; i >= 0; i--)
225 		r |= virtio_check_vq(sc, &sc->sc_vqs[i]);
226 
227 	return r;
228 }
229 
230 int
231 virtio_check_vq(struct virtio_softc *sc, struct virtqueue *vq)
232 {
233 	if (vq->vq_queued) {
234 		vq->vq_queued = 0;
235 		vq_sync_aring(sc, vq, BUS_DMASYNC_POSTWRITE);
236 	}
237 	vq_sync_uring(sc, vq, BUS_DMASYNC_POSTREAD);
238 	if (vq->vq_used_idx != vq->vq_used->idx) {
239 		if (vq->vq_done)
240 			return (vq->vq_done)(vq);
241 	}
242 
243 	return 0;
244 }
245 
246 /*
247  * Initialize vq structure.
248  */
249 void
250 virtio_init_vq(struct virtio_softc *sc, struct virtqueue *vq)
251 {
252 	int i, j;
253 	int vq_size = vq->vq_num;
254 
255 	memset(vq->vq_vaddr, 0, vq->vq_bytesize);
256 
257 	/* build the indirect descriptor chain */
258 	if (vq->vq_indirect != NULL) {
259 		struct vring_desc *vd;
260 
261 		for (i = 0; i < vq_size; i++) {
262 			vd = vq->vq_indirect;
263 			vd += vq->vq_maxnsegs * i;
264 			for (j = 0; j < vq->vq_maxnsegs-1; j++)
265 				vd[j].next = j + 1;
266 		}
267 	}
268 
269 	/* free slot management */
270 	SLIST_INIT(&vq->vq_freelist);
271 	/*
272 	 * virtio_enqueue_trim needs monotonely raising entries, therefore
273 	 * initialize in reverse order
274 	 */
275 	for (i = vq_size - 1; i >= 0; i--) {
276 		SLIST_INSERT_HEAD(&vq->vq_freelist, &vq->vq_entries[i],
277 		    qe_list);
278 		vq->vq_entries[i].qe_index = i;
279 	}
280 
281 	/* enqueue/dequeue status */
282 	vq->vq_avail_idx = 0;
283 	vq->vq_used_idx = 0;
284 	vq_sync_aring(sc, vq, BUS_DMASYNC_PREWRITE);
285 	vq_sync_uring(sc, vq, BUS_DMASYNC_PREREAD);
286 	vq->vq_queued = 1;
287 }
288 
289 /*
290  * Allocate/free a vq.
291  *
292  * maxnsegs denotes how much space should be allocated for indirect
293  * descriptors. maxnsegs == 1 can be used to disable use indirect
294  * descriptors for this queue.
295  */
296 int
297 virtio_alloc_vq(struct virtio_softc *sc, struct virtqueue *vq, int index,
298     int maxsegsize, int maxnsegs, const char *name)
299 {
300 	int vq_size, allocsize1, allocsize2, allocsize3, allocsize = 0;
301 	int rsegs, r, hdrlen;
302 #define VIRTQUEUE_ALIGN(n)	(((n)+(VIRTIO_PAGE_SIZE-1))&	\
303 				 ~(VIRTIO_PAGE_SIZE-1))
304 
305 	memset(vq, 0, sizeof(*vq));
306 
307 	vq_size = virtio_read_queue_size(sc, index);
308 	if (vq_size == 0) {
309 		printf("virtqueue not exist, index %d for %s\n", index, name);
310 		goto err;
311 	}
312 	if (((vq_size - 1) & vq_size) != 0)
313 		panic("vq_size not power of two: %d", vq_size);
314 
315 	hdrlen = virtio_has_feature(sc, VIRTIO_F_RING_EVENT_IDX) ? 3 : 2;
316 
317 	/* allocsize1: descriptor table + avail ring + pad */
318 	allocsize1 = VIRTQUEUE_ALIGN(sizeof(struct vring_desc) * vq_size
319 	    + sizeof(uint16_t) * (hdrlen + vq_size));
320 	/* allocsize2: used ring + pad */
321 	allocsize2 = VIRTQUEUE_ALIGN(sizeof(uint16_t) * hdrlen
322 	    + sizeof(struct vring_used_elem) * vq_size);
323 	/* allocsize3: indirect table */
324 	if (sc->sc_indirect && maxnsegs > 1)
325 		allocsize3 = sizeof(struct vring_desc) * maxnsegs * vq_size;
326 	else
327 		allocsize3 = 0;
328 	allocsize = allocsize1 + allocsize2 + allocsize3;
329 
330 	/* alloc and map the memory */
331 	r = bus_dmamem_alloc(sc->sc_dmat, allocsize, VIRTIO_PAGE_SIZE, 0,
332 	    &vq->vq_segs[0], 1, &rsegs, BUS_DMA_NOWAIT);
333 	if (r != 0) {
334 		printf("virtqueue %d for %s allocation failed, error %d\n",
335 		       index, name, r);
336 		goto err;
337 	}
338 	r = bus_dmamem_map(sc->sc_dmat, &vq->vq_segs[0], 1, allocsize,
339 	    (caddr_t*)&vq->vq_vaddr, BUS_DMA_NOWAIT);
340 	if (r != 0) {
341 		printf("virtqueue %d for %s map failed, error %d\n", index,
342 		    name, r);
343 		goto err;
344 	}
345 	r = bus_dmamap_create(sc->sc_dmat, allocsize, 1, allocsize, 0,
346 	    BUS_DMA_NOWAIT, &vq->vq_dmamap);
347 	if (r != 0) {
348 		printf("virtqueue %d for %s dmamap creation failed, "
349 		    "error %d\n", index, name, r);
350 		goto err;
351 	}
352 	r = bus_dmamap_load(sc->sc_dmat, vq->vq_dmamap, vq->vq_vaddr,
353 	    allocsize, NULL, BUS_DMA_NOWAIT);
354 	if (r != 0) {
355 		printf("virtqueue %d for %s dmamap load failed, error %d\n",
356 		    index, name, r);
357 		goto err;
358 	}
359 
360 	/* remember addresses and offsets for later use */
361 	vq->vq_owner = sc;
362 	vq->vq_num = vq_size;
363 	vq->vq_mask = vq_size - 1;
364 	vq->vq_index = index;
365 	vq->vq_desc = vq->vq_vaddr;
366 	vq->vq_availoffset = sizeof(struct vring_desc)*vq_size;
367 	vq->vq_avail = (struct vring_avail*)(((char*)vq->vq_desc) +
368 	    vq->vq_availoffset);
369 	vq->vq_usedoffset = allocsize1;
370 	vq->vq_used = (struct vring_used*)(((char*)vq->vq_desc) +
371 	    vq->vq_usedoffset);
372 	if (allocsize3 > 0) {
373 		vq->vq_indirectoffset = allocsize1 + allocsize2;
374 		vq->vq_indirect = (void*)(((char*)vq->vq_desc)
375 		    + vq->vq_indirectoffset);
376 	}
377 	vq->vq_bytesize = allocsize;
378 	vq->vq_maxnsegs = maxnsegs;
379 
380 	/* free slot management */
381 	vq->vq_entries = mallocarray(vq_size, sizeof(struct vq_entry),
382 	    M_DEVBUF, M_NOWAIT | M_ZERO);
383 	if (vq->vq_entries == NULL) {
384 		r = ENOMEM;
385 		goto err;
386 	}
387 
388 	virtio_init_vq(sc, vq);
389 	virtio_setup_queue(sc, vq, vq->vq_dmamap->dm_segs[0].ds_addr);
390 
391 #if VIRTIO_DEBUG
392 	printf("\nallocated %u byte for virtqueue %d for %s, size %d\n",
393 	    allocsize, index, name, vq_size);
394 	if (allocsize3 > 0)
395 		printf("using %d byte (%d entries) indirect descriptors\n",
396 		    allocsize3, maxnsegs * vq_size);
397 #endif
398 	return 0;
399 
400 err:
401 	if (vq->vq_dmamap)
402 		bus_dmamap_destroy(sc->sc_dmat, vq->vq_dmamap);
403 	if (vq->vq_vaddr)
404 		bus_dmamem_unmap(sc->sc_dmat, vq->vq_vaddr, allocsize);
405 	if (vq->vq_segs[0].ds_addr)
406 		bus_dmamem_free(sc->sc_dmat, &vq->vq_segs[0], 1);
407 	memset(vq, 0, sizeof(*vq));
408 
409 	return -1;
410 }
411 
412 int
413 virtio_free_vq(struct virtio_softc *sc, struct virtqueue *vq)
414 {
415 	struct vq_entry *qe;
416 	int i = 0;
417 
418 	/* device must be already deactivated */
419 	/* confirm the vq is empty */
420 	SLIST_FOREACH(qe, &vq->vq_freelist, qe_list) {
421 		i++;
422 	}
423 	if (i != vq->vq_num) {
424 		printf("%s: freeing non-empty vq, index %d\n",
425 		    sc->sc_dev.dv_xname, vq->vq_index);
426 		return EBUSY;
427 	}
428 
429 	/* tell device that there's no virtqueue any longer */
430 	virtio_setup_queue(sc, vq, 0);
431 
432 	free(vq->vq_entries, M_DEVBUF, 0);
433 	bus_dmamap_unload(sc->sc_dmat, vq->vq_dmamap);
434 	bus_dmamap_destroy(sc->sc_dmat, vq->vq_dmamap);
435 	bus_dmamem_unmap(sc->sc_dmat, vq->vq_vaddr, vq->vq_bytesize);
436 	bus_dmamem_free(sc->sc_dmat, &vq->vq_segs[0], 1);
437 	memset(vq, 0, sizeof(*vq));
438 
439 	return 0;
440 }
441 
442 /*
443  * Free descriptor management.
444  */
445 struct vq_entry *
446 vq_alloc_entry(struct virtqueue *vq)
447 {
448 	struct vq_entry *qe;
449 
450 	if (SLIST_EMPTY(&vq->vq_freelist))
451 		return NULL;
452 	qe = SLIST_FIRST(&vq->vq_freelist);
453 	SLIST_REMOVE_HEAD(&vq->vq_freelist, qe_list);
454 
455 	return qe;
456 }
457 
458 void
459 vq_free_entry(struct virtqueue *vq, struct vq_entry *qe)
460 {
461 	SLIST_INSERT_HEAD(&vq->vq_freelist, qe, qe_list);
462 }
463 
464 /*
465  * Enqueue several dmamaps as a single request.
466  */
467 /*
468  * Typical usage:
469  *  <queue size> number of followings are stored in arrays
470  *  - command blocks (in dmamem) should be pre-allocated and mapped
471  *  - dmamaps for command blocks should be pre-allocated and loaded
472  *  - dmamaps for payload should be pre-allocated
473  *	r = virtio_enqueue_prep(sc, vq, &slot);		// allocate a slot
474  *	if (r)		// currently 0 or EAGAIN
475  *	  return r;
476  *	r = bus_dmamap_load(dmat, dmamap_payload[slot], data, count, ..);
477  *	if (r) {
478  *	  virtio_enqueue_abort(sc, vq, slot);
479  *	  bus_dmamap_unload(dmat, dmamap_payload[slot]);
480  *	  return r;
481  *	}
482  *	r = virtio_enqueue_reserve(sc, vq, slot,
483  *				   dmamap_payload[slot]->dm_nsegs+1);
484  *							// ^ +1 for command
485  *	if (r) {	// currently 0 or EAGAIN
486  *	  bus_dmamap_unload(dmat, dmamap_payload[slot]);
487  *	  return r;					// do not call abort()
488  *	}
489  *	<setup and prepare commands>
490  *	bus_dmamap_sync(dmat, dmamap_cmd[slot],... BUS_DMASYNC_PREWRITE);
491  *	bus_dmamap_sync(dmat, dmamap_payload[slot],...);
492  *	virtio_enqueue(sc, vq, slot, dmamap_cmd[slot], 0);
493  *	virtio_enqueue(sc, vq, slot, dmamap_payload[slot], iswrite);
494  *	virtio_enqueue_commit(sc, vq, slot, 1);
495  *
496  * Alternative usage with statically allocated slots:
497  *	<during initialization>
498  *	// while not out of slots, do
499  *	virtio_enqueue_prep(sc, vq, &slot);		// allocate a slot
500  *	virtio_enqueue_reserve(sc, vq, slot, max_segs);	// reserve all slots
501  *						that may ever be needed
502  *
503  *	<when enqueuing a request>
504  *	// Don't call virtio_enqueue_prep()
505  *	bus_dmamap_load(dmat, dmamap_payload[slot], data, count, ..);
506  *	bus_dmamap_sync(dmat, dmamap_cmd[slot],... BUS_DMASYNC_PREWRITE);
507  *	bus_dmamap_sync(dmat, dmamap_payload[slot],...);
508  *	virtio_enqueue_trim(sc, vq, slot, num_segs_needed);
509  *	virtio_enqueue(sc, vq, slot, dmamap_cmd[slot], 0);
510  *	virtio_enqueue(sc, vq, slot, dmamap_payload[slot], iswrite);
511  *	virtio_enqueue_commit(sc, vq, slot, 1);
512  *
513  *	<when dequeuing>
514  *	// don't call virtio_dequeue_commit()
515  */
516 
517 /*
518  * enqueue_prep: allocate a slot number
519  */
520 int
521 virtio_enqueue_prep(struct virtqueue *vq, int *slotp)
522 {
523 	struct vq_entry *qe1;
524 
525 	VIRTIO_ASSERT(slotp != NULL);
526 
527 	qe1 = vq_alloc_entry(vq);
528 	if (qe1 == NULL)
529 		return EAGAIN;
530 	/* next slot is not allocated yet */
531 	qe1->qe_next = -1;
532 	*slotp = qe1->qe_index;
533 
534 	return 0;
535 }
536 
537 /*
538  * enqueue_reserve: allocate remaining slots and build the descriptor chain.
539  * Calls virtio_enqueue_abort() on failure.
540  */
541 int
542 virtio_enqueue_reserve(struct virtqueue *vq, int slot, int nsegs)
543 {
544 	struct vq_entry *qe1 = &vq->vq_entries[slot];
545 
546 	VIRTIO_ASSERT(qe1->qe_next == -1);
547 	VIRTIO_ASSERT(1 <= nsegs && nsegs <= vq->vq_num);
548 
549 	if (vq->vq_indirect != NULL && nsegs > 1 && nsegs <= vq->vq_maxnsegs) {
550 		struct vring_desc *vd;
551 		int i;
552 
553 		qe1->qe_indirect = 1;
554 
555 		vd = &vq->vq_desc[qe1->qe_index];
556 		vd->addr = vq->vq_dmamap->dm_segs[0].ds_addr +
557 		    vq->vq_indirectoffset;
558 		vd->addr += sizeof(struct vring_desc) * vq->vq_maxnsegs *
559 		    qe1->qe_index;
560 		vd->len = sizeof(struct vring_desc) * nsegs;
561 		vd->flags = VRING_DESC_F_INDIRECT;
562 
563 		vd = vq->vq_indirect;
564 		vd += vq->vq_maxnsegs * qe1->qe_index;
565 		qe1->qe_desc_base = vd;
566 
567 		for (i = 0; i < nsegs-1; i++)
568 			vd[i].flags = VRING_DESC_F_NEXT;
569 		vd[i].flags = 0;
570 		qe1->qe_next = 0;
571 
572 		return 0;
573 	} else {
574 		struct vring_desc *vd;
575 		struct vq_entry *qe;
576 		int i, s;
577 
578 		qe1->qe_indirect = 0;
579 
580 		vd = &vq->vq_desc[0];
581 		qe1->qe_desc_base = vd;
582 		qe1->qe_next = qe1->qe_index;
583 		s = slot;
584 		for (i = 0; i < nsegs - 1; i++) {
585 			qe = vq_alloc_entry(vq);
586 			if (qe == NULL) {
587 				vd[s].flags = 0;
588 				virtio_enqueue_abort(vq, slot);
589 				return EAGAIN;
590 			}
591 			vd[s].flags = VRING_DESC_F_NEXT;
592 			vd[s].next = qe->qe_index;
593 			s = qe->qe_index;
594 		}
595 		vd[s].flags = 0;
596 
597 		return 0;
598 	}
599 }
600 
601 /*
602  * enqueue: enqueue a single dmamap.
603  */
604 int
605 virtio_enqueue(struct virtqueue *vq, int slot, bus_dmamap_t dmamap, int write)
606 {
607 	struct vq_entry *qe1 = &vq->vq_entries[slot];
608 	struct vring_desc *vd = qe1->qe_desc_base;
609 	int i;
610 	int s = qe1->qe_next;
611 
612 	VIRTIO_ASSERT(s >= 0);
613 	VIRTIO_ASSERT(dmamap->dm_nsegs > 0);
614 	if (dmamap->dm_nsegs > vq->vq_maxnsegs) {
615 #if VIRTIO_DEBUG
616 		for (i = 0; i < dmamap->dm_nsegs; i++) {
617 			printf(" %d (%d): %p %lx \n", i, write,
618 			    (void *)dmamap->dm_segs[i].ds_addr,
619 			    dmamap->dm_segs[i].ds_len);
620 		}
621 #endif
622 		panic("dmamap->dm_nseg %d > vq->vq_maxnsegs %d",
623 		    dmamap->dm_nsegs, vq->vq_maxnsegs);
624 	}
625 
626 	for (i = 0; i < dmamap->dm_nsegs; i++) {
627 		vd[s].addr = dmamap->dm_segs[i].ds_addr;
628 		vd[s].len = dmamap->dm_segs[i].ds_len;
629 		if (!write)
630 			vd[s].flags |= VRING_DESC_F_WRITE;
631 		s = vd[s].next;
632 	}
633 	qe1->qe_next = s;
634 
635 	return 0;
636 }
637 
638 int
639 virtio_enqueue_p(struct virtqueue *vq, int slot, bus_dmamap_t dmamap,
640     bus_addr_t start, bus_size_t len, int write)
641 {
642 	struct vq_entry *qe1 = &vq->vq_entries[slot];
643 	struct vring_desc *vd = qe1->qe_desc_base;
644 	int s = qe1->qe_next;
645 
646 	VIRTIO_ASSERT(s >= 0);
647 	/* XXX todo: handle more segments */
648 	VIRTIO_ASSERT(dmamap->dm_nsegs == 1);
649 	VIRTIO_ASSERT((dmamap->dm_segs[0].ds_len > start) &&
650 	    (dmamap->dm_segs[0].ds_len >= start + len));
651 
652 	vd[s].addr = dmamap->dm_segs[0].ds_addr + start;
653 	vd[s].len = len;
654 	if (!write)
655 		vd[s].flags |= VRING_DESC_F_WRITE;
656 	qe1->qe_next = vd[s].next;
657 
658 	return 0;
659 }
660 
661 static void
662 publish_avail_idx(struct virtio_softc *sc, struct virtqueue *vq)
663 {
664 	vq_sync_aring(sc, vq, BUS_DMASYNC_PREWRITE);
665 
666 	virtio_membar_producer();
667 	vq->vq_avail->idx = vq->vq_avail_idx;
668 	vq_sync_aring(sc, vq, BUS_DMASYNC_POSTWRITE);
669 	vq->vq_queued = 1;
670 }
671 
672 /*
673  * enqueue_commit: add it to the aring.
674  */
675 void
676 virtio_enqueue_commit(struct virtio_softc *sc, struct virtqueue *vq, int slot,
677     int notifynow)
678 {
679 	struct vq_entry *qe1;
680 
681 	if (slot < 0)
682 		goto notify;
683 	vq_sync_descs(sc, vq, BUS_DMASYNC_PREWRITE);
684 	qe1 = &vq->vq_entries[slot];
685 	if (qe1->qe_indirect)
686 		vq_sync_indirect(sc, vq, slot, BUS_DMASYNC_PREWRITE);
687 	vq->vq_avail->ring[(vq->vq_avail_idx++) & vq->vq_mask] = slot;
688 
689 notify:
690 	if (notifynow) {
691 		if (virtio_has_feature(vq->vq_owner, VIRTIO_F_RING_EVENT_IDX)) {
692 			uint16_t o = vq->vq_avail->idx;
693 			uint16_t n = vq->vq_avail_idx;
694 			uint16_t t;
695 			publish_avail_idx(sc, vq);
696 
697 			virtio_membar_sync();
698 			t = VQ_AVAIL_EVENT(vq) + 1;
699 			if ((uint16_t)(n - t) < (uint16_t)(n - o))
700 				sc->sc_ops->kick(sc, vq->vq_index);
701 		} else {
702 			publish_avail_idx(sc, vq);
703 
704 			virtio_membar_sync();
705 			if (!(vq->vq_used->flags & VRING_USED_F_NO_NOTIFY))
706 				sc->sc_ops->kick(sc, vq->vq_index);
707 		}
708 	}
709 }
710 
711 /*
712  * enqueue_abort: rollback.
713  */
714 int
715 virtio_enqueue_abort(struct virtqueue *vq, int slot)
716 {
717 	struct vq_entry *qe = &vq->vq_entries[slot];
718 	struct vring_desc *vd;
719 	int s;
720 
721 	if (qe->qe_next < 0) {
722 		vq_free_entry(vq, qe);
723 		return 0;
724 	}
725 
726 	s = slot;
727 	vd = &vq->vq_desc[0];
728 	while (vd[s].flags & VRING_DESC_F_NEXT) {
729 		s = vd[s].next;
730 		vq_free_entry(vq, qe);
731 		qe = &vq->vq_entries[s];
732 	}
733 	vq_free_entry(vq, qe);
734 	return 0;
735 }
736 
737 /*
738  * enqueue_trim: adjust buffer size to given # of segments, a.k.a.
739  * descriptors.
740  */
741 void
742 virtio_enqueue_trim(struct virtqueue *vq, int slot, int nsegs)
743 {
744 	struct vq_entry *qe1 = &vq->vq_entries[slot];
745 	struct vring_desc *vd = &vq->vq_desc[0];
746 	int i;
747 
748 	if ((vd[slot].flags & VRING_DESC_F_INDIRECT) == 0) {
749 		qe1->qe_next = qe1->qe_index;
750 		/*
751 		 * N.B.: the vq_entries are ASSUMED to be a contiguous
752 		 *       block with slot being the index to the first one.
753 		 */
754 	} else {
755 		qe1->qe_next = 0;
756 		vd = &vq->vq_desc[qe1->qe_index];
757 		vd->len = sizeof(struct vring_desc) * nsegs;
758 		vd = qe1->qe_desc_base;
759 		slot = 0;
760 	}
761 
762 	for (i = 0; i < nsegs -1 ; i++) {
763 		vd[slot].flags = VRING_DESC_F_NEXT;
764 		slot++;
765 	}
766 	vd[slot].flags = 0;
767 }
768 
769 /*
770  * Dequeue a request.
771  */
772 /*
773  * dequeue: dequeue a request from uring; dmamap_sync for uring is
774  *	    already done in the interrupt handler.
775  */
776 int
777 virtio_dequeue(struct virtio_softc *sc, struct virtqueue *vq,
778     int *slotp, int *lenp)
779 {
780 	uint16_t slot, usedidx;
781 	struct vq_entry *qe;
782 
783 	if (vq->vq_used_idx == vq->vq_used->idx)
784 		return ENOENT;
785 	usedidx = vq->vq_used_idx++;
786 	usedidx &= vq->vq_mask;
787 
788 	virtio_membar_consumer();
789 	slot = vq->vq_used->ring[usedidx].id;
790 	qe = &vq->vq_entries[slot];
791 
792 	if (qe->qe_indirect)
793 		vq_sync_indirect(sc, vq, slot, BUS_DMASYNC_POSTWRITE);
794 
795 	if (slotp)
796 		*slotp = slot;
797 	if (lenp)
798 		*lenp = vq->vq_used->ring[usedidx].len;
799 
800 	return 0;
801 }
802 
803 /*
804  * dequeue_commit: complete dequeue; the slot is recycled for future use.
805  *                 if you forget to call this the slot will be leaked.
806  *
807  *                 Don't call this if you use statically allocated slots
808  *                 and virtio_dequeue_trim().
809  */
810 int
811 virtio_dequeue_commit(struct virtqueue *vq, int slot)
812 {
813 	struct vq_entry *qe = &vq->vq_entries[slot];
814 	struct vring_desc *vd = &vq->vq_desc[0];
815 	int s = slot;
816 
817 	while (vd[s].flags & VRING_DESC_F_NEXT) {
818 		s = vd[s].next;
819 		vq_free_entry(vq, qe);
820 		qe = &vq->vq_entries[s];
821 	}
822 	vq_free_entry(vq, qe);
823 
824 	return 0;
825 }
826 
827 /*
828  * Increase the event index in order to delay interrupts.
829  * Returns 0 on success; returns 1 if the used ring has already advanced
830  * too far, and the caller must process the queue again (otherwise, no
831  * more interrupts will happen).
832  */
833 int
834 virtio_postpone_intr(struct virtqueue *vq, uint16_t nslots)
835 {
836 	uint16_t	idx;
837 
838 	idx = vq->vq_used_idx + nslots;
839 
840 	/* set the new event index: avail_ring->used_event = idx */
841 	VQ_USED_EVENT(vq) = idx;
842 	virtio_membar_sync();
843 
844 	vq_sync_aring(vq->vq_owner, vq, BUS_DMASYNC_PREWRITE);
845 	vq->vq_queued++;
846 
847 	if (nslots < virtio_nused(vq))
848 		return 1;
849 
850 	return 0;
851 }
852 
853 /*
854  * Postpone interrupt until 3/4 of the available descriptors have been
855  * consumed.
856  */
857 int
858 virtio_postpone_intr_smart(struct virtqueue *vq)
859 {
860 	uint16_t	nslots;
861 
862 	nslots = (uint16_t)(vq->vq_avail->idx - vq->vq_used_idx) * 3 / 4;
863 
864 	return virtio_postpone_intr(vq, nslots);
865 }
866 
867 /*
868  * Postpone interrupt until all of the available descriptors have been
869  * consumed.
870  */
871 int
872 virtio_postpone_intr_far(struct virtqueue *vq)
873 {
874 	uint16_t	nslots;
875 
876 	nslots = (uint16_t)(vq->vq_avail->idx - vq->vq_used_idx);
877 
878 	return virtio_postpone_intr(vq, nslots);
879 }
880 
881 
882 /*
883  * Start/stop vq interrupt.  No guarantee.
884  */
885 void
886 virtio_stop_vq_intr(struct virtio_softc *sc, struct virtqueue *vq)
887 {
888 	if (virtio_has_feature(sc, VIRTIO_F_RING_EVENT_IDX)) {
889 		/*
890 		 * No way to disable the interrupt completely with
891 		 * RingEventIdx. Instead advance used_event by half
892 		 * the possible value. This won't happen soon and
893 		 * is far enough in the past to not trigger a spurious
894 		 * interrupt.
895 		 */
896 		VQ_USED_EVENT(vq) = vq->vq_used_idx + 0x8000;
897 	} else {
898 		vq->vq_avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
899 	}
900 	vq_sync_aring(sc, vq, BUS_DMASYNC_PREWRITE);
901 	vq->vq_queued++;
902 }
903 
904 int
905 virtio_start_vq_intr(struct virtio_softc *sc, struct virtqueue *vq)
906 {
907 	/*
908 	 * If event index feature is negotiated, enabling
909 	 * interrupts is done through setting the latest
910 	 * consumed index in the used_event field
911 	 */
912 	if (virtio_has_feature(sc, VIRTIO_F_RING_EVENT_IDX))
913 		VQ_USED_EVENT(vq) = vq->vq_used_idx;
914 	else
915 		vq->vq_avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
916 
917 	virtio_membar_sync();
918 
919 	vq_sync_aring(sc, vq, BUS_DMASYNC_PREWRITE);
920 	vq->vq_queued++;
921 
922 	if (vq->vq_used_idx != vq->vq_used->idx)
923 		return 1;
924 
925 	return 0;
926 }
927 
928 /*
929  * Returns a number of slots in the used ring available to
930  * be supplied to the avail ring.
931  */
932 int
933 virtio_nused(struct virtqueue *vq)
934 {
935 	uint16_t	n;
936 
937 	n = (uint16_t)(vq->vq_used->idx - vq->vq_used_idx);
938 	VIRTIO_ASSERT(n <= vq->vq_num);
939 
940 	return n;
941 }
942 
943 #if VIRTIO_DEBUG
944 void
945 virtio_vq_dump(struct virtqueue *vq)
946 {
947 	/* Common fields */
948 	printf(" + vq num: %d\n", vq->vq_num);
949 	printf(" + vq mask: 0x%X\n", vq->vq_mask);
950 	printf(" + vq index: %d\n", vq->vq_index);
951 	printf(" + vq used idx: %d\n", vq->vq_used_idx);
952 	printf(" + vq avail idx: %d\n", vq->vq_avail_idx);
953 	printf(" + vq queued: %d\n",vq->vq_queued);
954 	/* Avail ring fields */
955 	printf(" + avail flags: 0x%X\n", vq->vq_avail->flags);
956 	printf(" + avail idx: %d\n", vq->vq_avail->idx);
957 	printf(" + avail event: %d\n", VQ_AVAIL_EVENT(vq));
958 	/* Used ring fields */
959 	printf(" + used flags: 0x%X\n",vq->vq_used->flags);
960 	printf(" + used idx: %d\n",vq->vq_used->idx);
961 	printf(" + used event: %d\n", VQ_USED_EVENT(vq));
962 	printf(" +++++++++++++++++++++++++++\n");
963 }
964 #endif
965