xref: /netbsd-src/sys/arch/xen/xen/xbdback_xenbus.c (revision d55161bb9f08bdbecc38fc10cede7a9a4fcc9f7d)
1*d55161bbSbouyer /*      $NetBSD: xbdback_xenbus.c,v 1.107 2024/06/20 15:17:27 bouyer Exp $      */
2b96fedacSbouyer 
3b96fedacSbouyer /*
4*d55161bbSbouyer  * Copyright (c) 2006,2024 Manuel Bouyer.
5b96fedacSbouyer  *
6b96fedacSbouyer  * Redistribution and use in source and binary forms, with or without
7b96fedacSbouyer  * modification, are permitted provided that the following conditions
8b96fedacSbouyer  * are met:
9b96fedacSbouyer  * 1. Redistributions of source code must retain the above copyright
10b96fedacSbouyer  *    notice, this list of conditions and the following disclaimer.
11b96fedacSbouyer  * 2. Redistributions in binary form must reproduce the above copyright
12b96fedacSbouyer  *    notice, this list of conditions and the following disclaimer in the
13b96fedacSbouyer  *    documentation and/or other materials provided with the distribution.
14b96fedacSbouyer  *
15b96fedacSbouyer  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16b96fedacSbouyer  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17b96fedacSbouyer  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18b96fedacSbouyer  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19b96fedacSbouyer  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20b96fedacSbouyer  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21b96fedacSbouyer  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22b96fedacSbouyer  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23b96fedacSbouyer  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24b96fedacSbouyer  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25b96fedacSbouyer  *
26b96fedacSbouyer  */
27b96fedacSbouyer 
28a9cd1764Sbouyer #include <sys/cdefs.h>
29*d55161bbSbouyer __KERNEL_RCSID(0, "$NetBSD: xbdback_xenbus.c,v 1.107 2024/06/20 15:17:27 bouyer Exp $");
30a9cd1764Sbouyer 
3154f95b14Sjym #include <sys/buf.h>
3254f95b14Sjym #include <sys/condvar.h>
33b96fedacSbouyer #include <sys/conf.h>
34b96fedacSbouyer #include <sys/disk.h>
3596b6da24Schristos #include <sys/device.h>
36b96fedacSbouyer #include <sys/fcntl.h>
37b96fedacSbouyer #include <sys/kauth.h>
3854f95b14Sjym #include <sys/kernel.h>
3954f95b14Sjym #include <sys/kmem.h>
4054f95b14Sjym #include <sys/kthread.h>
4154f95b14Sjym #include <sys/mutex.h>
4254f95b14Sjym #include <sys/param.h>
4354f95b14Sjym #include <sys/queue.h>
4454f95b14Sjym #include <sys/systm.h>
4554f95b14Sjym #include <sys/time.h>
4654f95b14Sjym #include <sys/types.h>
4754f95b14Sjym #include <sys/vnode.h>
48b96fedacSbouyer 
495dac1986Sbouyer #include <xen/intr.h>
504dbd32ceSbouyer #include <xen/hypervisor.h>
514e541343Sbouyer #include <xen/xen.h>
524e541343Sbouyer #include <xen/xen_shm.h>
534e541343Sbouyer #include <xen/evtchn.h>
544e541343Sbouyer #include <xen/xenbus.h>
55ac8432e2Scherry #include <xen/xenring.h>
56ac8432e2Scherry #include <xen/include/public/io/protocols.h>
57b96fedacSbouyer 
58b96fedacSbouyer /* #define XENDEBUG_VBD */
59b96fedacSbouyer #ifdef XENDEBUG_VBD
60b96fedacSbouyer #define XENPRINTF(x) printf x
61b96fedacSbouyer #else
62b96fedacSbouyer #define XENPRINTF(x)
63b96fedacSbouyer #endif
64b96fedacSbouyer 
658d1b8859Sjdolecek #define BLKIF_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE)
66b96fedacSbouyer 
67b96fedacSbouyer /*
68b96fedacSbouyer  * Backend block device driver for Xen
69b96fedacSbouyer  */
70b96fedacSbouyer 
71b96fedacSbouyer /* Values are expressed in 512-byte sectors */
72b96fedacSbouyer #define VBD_BSIZE 512
73b96fedacSbouyer #define VBD_MAXSECT ((PAGE_SIZE / VBD_BSIZE) - 1)
74b96fedacSbouyer 
75848819d9Sjdolecek #define VBD_VA_SIZE			MAXPHYS
7680da1c02Sbouyer #define VBD_MAX_INDIRECT_SEGMENTS	(VBD_VA_SIZE >> PAGE_SHIFT)
77c8d41590Sjdolecek 
78c8d41590Sjdolecek CTASSERT(XENSHM_MAX_PAGES_PER_REQUEST >= VBD_MAX_INDIRECT_SEGMENTS);
798d1b8859Sjdolecek 
80b96fedacSbouyer struct xbdback_instance;
81b96fedacSbouyer 
8254f95b14Sjym /*
8354f95b14Sjym  * status of a xbdback instance:
8454f95b14Sjym  * WAITING: xbdback instance is connected, waiting for requests
8554f95b14Sjym  * RUN: xbdi thread must be woken up, I/Os have to be processed
8654f95b14Sjym  * DISCONNECTING: the instance is closing, no more I/Os can be scheduled
8754f95b14Sjym  * DISCONNECTED: no I/Os, no ring, the thread should terminate.
8854f95b14Sjym  */
8954f95b14Sjym typedef enum {WAITING, RUN, DISCONNECTING, DISCONNECTED} xbdback_state_t;
90b96fedacSbouyer 
91b96fedacSbouyer /*
9254f95b14Sjym  * Each xbdback instance is managed by a single thread that handles all
9354f95b14Sjym  * the I/O processing. As there are a variety of conditions that can block,
9454f95b14Sjym  * everything will be done in a sort of continuation-passing style.
95b96fedacSbouyer  *
9654f95b14Sjym  * When the execution has to block to delay processing, for example to
9754f95b14Sjym  * allow system to recover because of memory shortage (via shared memory
9854f95b14Sjym  * callback), the return value of a continuation can be set to NULL. In that
9954f95b14Sjym  * case, the thread will go back to sleeping and wait for the proper
10054f95b14Sjym  * condition before it starts processing requests again from where it left.
10170fd7422Sjdolecek  * Continuation state is "stored" in the xbdback instance (xbdi_cont),
10270fd7422Sjdolecek  * and should only be manipulated by the instance thread.
10380da1c02Sbouyer  * If a continuation has to be restarted from a specific point,
10480da1c02Sbouyer  * the callback and argument can be stored in xbdi_cont_restart and
10580da1c02Sbouyer  * xbdi_cont_restart_obj
10680da1c02Sbouyer  *
10754f95b14Sjym  *
10854f95b14Sjym  * As xbdback(4) has to handle different sort of asynchronous events (Xen
10954f95b14Sjym  * event channels, biointr() soft interrupts, xenbus commands), the xbdi_lock
11054f95b14Sjym  * mutex is used to protect specific elements of the xbdback instance from
11154f95b14Sjym  * concurrent access: thread status and ring access (when pushing responses).
11277822551Sjym  *
113db95bc7fSbouyer  * Here's how the call graph is supposed to be for a single I/O:
11454f95b14Sjym  *
115db95bc7fSbouyer  * xbdback_co_main()
11670fd7422Sjdolecek  *        |               --> xbdback_co_cache_flush()
117db95bc7fSbouyer  *        |               |    |
11880da1c02Sbouyer  *        |               |    -> xbdback_co_do_io() or NULL
11977822551Sjym  * xbdback_co_main_loop()-|
12070fd7422Sjdolecek  *        |               |-> xbdback_co_main_done2() or NULL
121db95bc7fSbouyer  *        |               |
12270fd7422Sjdolecek  *        |               --> xbdback_co_main_incr() -> xbdback_co_main_loop()
12377822551Sjym  *        |
124db95bc7fSbouyer  *     xbdback_co_io() -> xbdback_co_main_incr() -> xbdback_co_main_loop()
125db95bc7fSbouyer  *        |
12680da1c02Sbouyer  *     xbdback_co_io_gotio() -> xbdback_co_main_incr() -> xbdback_co_main_loop()
12770fd7422Sjdolecek  *        |
12870fd7422Sjdolecek  *     xbdback_co_do_io()
12977822551Sjym  *        |
13077822551Sjym  *     xbdback_co_main_incr() -> xbdback_co_main_loop()
131b96fedacSbouyer  */
132b96fedacSbouyer typedef void *(* xbdback_cont_t)(struct xbdback_instance *, void *);
133b96fedacSbouyer 
134d3cd2576Sbouyer enum xbdi_proto {
135d3cd2576Sbouyer 	XBDIP_NATIVE,
136d3cd2576Sbouyer 	XBDIP_32,
137d3cd2576Sbouyer 	XBDIP_64
138d3cd2576Sbouyer };
139d3cd2576Sbouyer 
1408d1b8859Sjdolecek struct xbdback_va {
1418d1b8859Sjdolecek 	SLIST_ENTRY(xbdback_va) xv_next;
1428d1b8859Sjdolecek 	vaddr_t xv_vaddr;
1438d1b8859Sjdolecek };
1448d1b8859Sjdolecek 
1452d80bedaSjdolecek /*
1462d80bedaSjdolecek  * For each I/O operation associated with one of those requests, an
1472d80bedaSjdolecek  * xbdback_io is allocated from a pool.  It may correspond to multiple
1482d80bedaSjdolecek  * Xen disk requests, or parts of them, if several arrive at once that
1492d80bedaSjdolecek  * can be coalesced.
1502d80bedaSjdolecek  */
1512d80bedaSjdolecek struct xbdback_io {
1522d80bedaSjdolecek 	SLIST_ENTRY(xbdback_io) xio_next;
1532d80bedaSjdolecek 	/* The instance pointer is duplicated for convenience. */
1542d80bedaSjdolecek 	struct xbdback_instance *xio_xbdi; /* our xbd instance */
15580da1c02Sbouyer 	/* _request state: track requests fetched from ring */
15680da1c02Sbouyer 	blkif_request_t xio_xen_req;
15780da1c02Sbouyer 	/* array of segments[VBD_MAX_INDIRECT_SEGMENTS] allocated separately */
15880da1c02Sbouyer 	struct blkif_request_segment *xio_seg;
15980da1c02Sbouyer 	bus_dmamap_t xio_seg_dmamap;
16080da1c02Sbouyer 	/* internal states */
1612d80bedaSjdolecek 	union {
1622d80bedaSjdolecek 		struct {
1632d80bedaSjdolecek 			struct buf xio_buf; /* our I/O */
1642d80bedaSjdolecek 			/* the virtual address to map the request at */
1652d80bedaSjdolecek 			vaddr_t xio_vaddr;
1662d80bedaSjdolecek 			struct xbdback_va *xio_xv;
1672d80bedaSjdolecek 			vaddr_t xio_start_offset;	/* I/O start offset */
1682d80bedaSjdolecek 			/* grants to map */
1692d80bedaSjdolecek 			grant_ref_t xio_gref[VBD_MAX_INDIRECT_SEGMENTS];
1702d80bedaSjdolecek 			/* grants release */
1712d80bedaSjdolecek 			grant_handle_t xio_gh[VBD_MAX_INDIRECT_SEGMENTS];
172*d55161bbSbouyer 			bool xio_need_bounce; /* request is not contiguous */
1732d80bedaSjdolecek 		} xio_rw;
1742d80bedaSjdolecek 	} u;
1752d80bedaSjdolecek };
1762d80bedaSjdolecek #define xio_buf		u.xio_rw.xio_buf
1772d80bedaSjdolecek #define xio_vaddr	u.xio_rw.xio_vaddr
1782d80bedaSjdolecek #define xio_start_offset	u.xio_rw.xio_start_offset
1792d80bedaSjdolecek #define xio_xv		u.xio_rw.xio_xv
1802d80bedaSjdolecek #define xio_gref	u.xio_rw.xio_gref
1812d80bedaSjdolecek #define xio_gh		u.xio_rw.xio_gh
18280da1c02Sbouyer #define xio_need_bounce	u.xio_rw.xio_need_bounce
1832d80bedaSjdolecek 
184b96fedacSbouyer /* we keep the xbdback instances in a linked list */
185b96fedacSbouyer struct xbdback_instance {
186b96fedacSbouyer 	SLIST_ENTRY(xbdback_instance) next;
187b96fedacSbouyer 	struct xenbus_device *xbdi_xbusd; /* our xenstore entry */
188b96fedacSbouyer 	struct xenbus_watch xbdi_watch; /* to watch our store */
189b96fedacSbouyer 	domid_t xbdi_domid;	/* attached to this domain */
190b96fedacSbouyer 	uint32_t xbdi_handle;	/* domain-specific handle */
19154f95b14Sjym 	char xbdi_name[16];	/* name of this instance */
19254f95b14Sjym 	/* mutex that protects concurrent access to the xbdback instance */
19354f95b14Sjym 	kmutex_t xbdi_lock;
19454f95b14Sjym 	kcondvar_t xbdi_cv;	/* wait channel for thread work */
19554f95b14Sjym 	xbdback_state_t xbdi_status; /* thread's status */
1962d80bedaSjdolecek 	/* context and KVA for mapping transfers */
1972d80bedaSjdolecek 	struct xbdback_io xbdi_io[BLKIF_RING_SIZE];
1982d80bedaSjdolecek 	SLIST_HEAD(, xbdback_io) xbdi_io_free;
1998d1b8859Sjdolecek 	struct xbdback_va xbdi_va[BLKIF_RING_SIZE];
2008d1b8859Sjdolecek 	SLIST_HEAD(, xbdback_va) xbdi_va_free;
20180da1c02Sbouyer 	/* segments structure allocated in page-aligned chunks */
20280da1c02Sbouyer 	struct blkif_request_segment *xbdi_segs;
203*d55161bbSbouyer 	/* bounce buffer in case a transfer is not contiguous */
20480da1c02Sbouyer 	vaddr_t xbdi_bouncebuf;
20580da1c02Sbouyer 	int xbdi_bouncebuf_use; /* is bounce buffer in use? */
206b96fedacSbouyer 	/* backing device parameters */
207b96fedacSbouyer 	dev_t xbdi_dev;
208b96fedacSbouyer 	const struct bdevsw *xbdi_bdevsw; /* pointer to the device's bdevsw */
209b96fedacSbouyer 	struct vnode *xbdi_vp;
2109cde455fSbouyer 	uint64_t xbdi_size;
21154f95b14Sjym 	bool xbdi_ro; /* is device read-only ? */
212b96fedacSbouyer 	/* parameters for the communication */
213b96fedacSbouyer 	unsigned int xbdi_evtchn;
214e5c2d6aaScherry 	struct intrhand *xbdi_ih;
215b96fedacSbouyer 	/* private parameters for communication */
216d3cd2576Sbouyer 	blkif_back_ring_proto_t xbdi_ring;
217d3cd2576Sbouyer 	enum xbdi_proto xbdi_proto;
218b96fedacSbouyer 	grant_handle_t xbdi_ring_handle; /* to unmap the ring */
219b96fedacSbouyer 	vaddr_t xbdi_ring_va; /* to unmap the ring */
220b96fedacSbouyer 	/* disconnection must be postponed until all I/O is done */
221f4853d4dSjym 	int xbdi_refcnt;
222b96fedacSbouyer 	/*
223b96fedacSbouyer 	 * State for I/O processing/coalescing follows; this has to
224b96fedacSbouyer 	 * live here instead of on the stack because of the
225b96fedacSbouyer 	 * continuation-ness (see above).
226b96fedacSbouyer 	 */
227b96fedacSbouyer 	RING_IDX xbdi_req_prod; /* limit on request indices */
22870fd7422Sjdolecek 	xbdback_cont_t xbdi_cont;
22980da1c02Sbouyer 	/* if not NULL, will restart here after thread wakes up */
23080da1c02Sbouyer 	xbdback_cont_t xbdi_cont_restart;
23180da1c02Sbouyer 	void *xbdi_cont_restart_obj;
232b96fedacSbouyer 	/* other state */
233db95bc7fSbouyer 	uint xbdi_pendingreqs; /* number of I/O in fly */
23454f95b14Sjym 	struct timeval xbdi_lasterr_time;    /* error time tracking */
235b96fedacSbouyer };
236b96fedacSbouyer /* Manipulation of the above reference count. */
23780da1c02Sbouyer #define xbdi_get(xbdip) 					\
23880da1c02Sbouyer do {								\
23980da1c02Sbouyer 	KASSERT(mutex_owned(&xbdip->xbdi_lock));		\
24080da1c02Sbouyer 	(xbdip)->xbdi_refcnt++;					\
241*d55161bbSbouyer } while (0)
24280da1c02Sbouyer 
243b96fedacSbouyer #define xbdi_put(xbdip)						\
244b96fedacSbouyer do {								\
24580da1c02Sbouyer 	KASSERT(mutex_owned(&xbdip->xbdi_lock));		\
24658477cb8Sjdolecek 	if (--((xbdip)->xbdi_refcnt) == 0)  			\
247b96fedacSbouyer                xbdback_finish_disconnect(xbdip);		\
248*d55161bbSbouyer } while (0)
249b96fedacSbouyer 
250555482faSjdolecek static SLIST_HEAD(, xbdback_instance) xbdback_instances;
251555482faSjdolecek static kmutex_t xbdback_lock;
252b96fedacSbouyer 
25354f95b14Sjym /* Interval between reports of I/O errors from frontend */
2548d1b8859Sjdolecek static const struct timeval xbdback_err_intvl = { 1, 0 };
25554f95b14Sjym 
256b96fedacSbouyer        void xbdbackattach(int);
257b96fedacSbouyer static int  xbdback_xenbus_create(struct xenbus_device *);
258b96fedacSbouyer static int  xbdback_xenbus_destroy(void *);
259b96fedacSbouyer static void xbdback_frontend_changed(void *, XenbusState);
260b96fedacSbouyer static void xbdback_backend_changed(struct xenbus_watch *,
261b96fedacSbouyer     const char **, unsigned int);
262b96fedacSbouyer static int  xbdback_evthandler(void *);
26354f95b14Sjym 
26454f95b14Sjym static int  xbdback_connect(struct xbdback_instance *);
2654d61ee8dSbouyer static void xbdback_disconnect(struct xbdback_instance *);
266b96fedacSbouyer static void xbdback_finish_disconnect(struct xbdback_instance *);
267b96fedacSbouyer 
268555482faSjdolecek static bool xbdif_lookup(domid_t, uint32_t);
269b96fedacSbouyer 
270b96fedacSbouyer static void *xbdback_co_main(struct xbdback_instance *, void *);
271b96fedacSbouyer static void *xbdback_co_main_loop(struct xbdback_instance *, void *);
272b96fedacSbouyer static void *xbdback_co_main_incr(struct xbdback_instance *, void *);
273b96fedacSbouyer static void *xbdback_co_main_done2(struct xbdback_instance *, void *);
274b96fedacSbouyer 
275db95bc7fSbouyer static void *xbdback_co_cache_flush(struct xbdback_instance *, void *);
276db95bc7fSbouyer 
277b96fedacSbouyer static void *xbdback_co_io(struct xbdback_instance *, void *);
278b96fedacSbouyer static void *xbdback_co_io_gotio(struct xbdback_instance *, void *);
279b96fedacSbouyer 
28054f95b14Sjym static void *xbdback_co_do_io(struct xbdback_instance *, void *);
28154f95b14Sjym 
282b96fedacSbouyer static void xbdback_io_error(struct xbdback_io *, int);
283b96fedacSbouyer static void xbdback_iodone(struct buf *);
28458477cb8Sjdolecek static void xbdback_iodone_locked(struct xbdback_instance *,
28558477cb8Sjdolecek 		struct xbdback_io *, struct buf *);
286b96fedacSbouyer static void xbdback_send_reply(struct xbdback_instance *, uint64_t , int , int);
287b96fedacSbouyer 
28880da1c02Sbouyer static int  xbdback_map_shm(struct xbdback_io *);
289b96fedacSbouyer static void xbdback_unmap_shm(struct xbdback_io *);
290b96fedacSbouyer 
2912d80bedaSjdolecek static struct xbdback_io *xbdback_io_get(struct xbdback_instance *);
2922d80bedaSjdolecek static void xbdback_io_put(struct xbdback_instance *, struct xbdback_io *);
29354f95b14Sjym static void xbdback_thread(void *);
29454f95b14Sjym static void xbdback_wakeup_thread(struct xbdback_instance *);
295b96fedacSbouyer static void xbdback_trampoline(struct xbdback_instance *, void *);
296b96fedacSbouyer 
297b96fedacSbouyer static struct xenbus_backend_driver xbd_backend_driver = {
298b96fedacSbouyer 	.xbakd_create = xbdback_xenbus_create,
299b96fedacSbouyer 	.xbakd_type = "vbd"
300b96fedacSbouyer };
301b96fedacSbouyer 
302b96fedacSbouyer void
xbdbackattach(int n)303b96fedacSbouyer xbdbackattach(int n)
304b96fedacSbouyer {
305b96fedacSbouyer 	XENPRINTF(("xbdbackattach\n"));
306b96fedacSbouyer 
307b96fedacSbouyer 	/*
308b96fedacSbouyer 	 * initialize the backend driver, register the control message handler
309b96fedacSbouyer 	 * and send driver up message.
310b96fedacSbouyer 	 */
311b96fedacSbouyer 	SLIST_INIT(&xbdback_instances);
312555482faSjdolecek 	mutex_init(&xbdback_lock, MUTEX_DEFAULT, IPL_NONE);
31354f95b14Sjym 
314b96fedacSbouyer 	xenbus_backend_register(&xbd_backend_driver);
315b96fedacSbouyer }
316b96fedacSbouyer 
317b96fedacSbouyer static int
xbdback_xenbus_create(struct xenbus_device * xbusd)318b96fedacSbouyer xbdback_xenbus_create(struct xenbus_device *xbusd)
319b96fedacSbouyer {
320b96fedacSbouyer 	struct xbdback_instance *xbdi;
321b96fedacSbouyer 	long domid, handle;
322593919aaSbouyer 	int error, i;
32380da1c02Sbouyer 	int segalloc = 0;
324593919aaSbouyer 	char *ep;
325b96fedacSbouyer 
326b96fedacSbouyer 	if ((error = xenbus_read_ul(NULL, xbusd->xbusd_path,
327b96fedacSbouyer 	    "frontend-id", &domid, 10)) != 0) {
328d0c2dda1Scegger 		aprint_error("xbdback: can't read %s/frontend-id: %d\n",
329b96fedacSbouyer 		    xbusd->xbusd_path, error);
330b96fedacSbouyer 		return error;
331b96fedacSbouyer 	}
332593919aaSbouyer 
333593919aaSbouyer 	/*
334593919aaSbouyer 	 * get handle: this is the last component of the path; which is
335593919aaSbouyer 	 * a decimal number. $path/dev contains the device name, which is not
336593919aaSbouyer 	 * appropriate.
337593919aaSbouyer 	 */
338593919aaSbouyer 	for (i = strlen(xbusd->xbusd_path); i > 0; i--) {
339593919aaSbouyer 		if (xbusd->xbusd_path[i] == '/')
340593919aaSbouyer 			break;
341593919aaSbouyer 	}
342593919aaSbouyer 	if (i == 0) {
343593919aaSbouyer 		aprint_error("xbdback: can't parse %s\n",
344593919aaSbouyer 		    xbusd->xbusd_path);
345593919aaSbouyer 		return EFTYPE;
346593919aaSbouyer 	}
347593919aaSbouyer 	handle = strtoul(&xbusd->xbusd_path[i+1], &ep, 10);
348593919aaSbouyer 	if (*ep != '\0') {
349593919aaSbouyer 		aprint_error("xbdback: can't parse %s\n",
350593919aaSbouyer 		    xbusd->xbusd_path);
351593919aaSbouyer 		return EFTYPE;
352b96fedacSbouyer 	}
353b96fedacSbouyer 
35454f95b14Sjym 	xbdi = kmem_zalloc(sizeof(*xbdi), KM_SLEEP);
35554f95b14Sjym 
356b96fedacSbouyer 	xbdi->xbdi_domid = domid;
357b96fedacSbouyer 	xbdi->xbdi_handle = handle;
35854f95b14Sjym 	snprintf(xbdi->xbdi_name, sizeof(xbdi->xbdi_name), "xbdb%di%d",
35954f95b14Sjym 	    xbdi->xbdi_domid, xbdi->xbdi_handle);
36054f95b14Sjym 
36158477cb8Sjdolecek 	mutex_enter(&xbdback_lock);
36258477cb8Sjdolecek 	if (xbdif_lookup(domid, handle)) {
36358477cb8Sjdolecek 		mutex_exit(&xbdback_lock);
36458477cb8Sjdolecek 		kmem_free(xbdi, sizeof(*xbdi));
36558477cb8Sjdolecek 		return EEXIST;
36658477cb8Sjdolecek 	}
36758477cb8Sjdolecek 	SLIST_INSERT_HEAD(&xbdback_instances, xbdi, next);
36858477cb8Sjdolecek 	mutex_exit(&xbdback_lock);
36958477cb8Sjdolecek 
37054f95b14Sjym 	/* initialize status and reference counter */
371b96fedacSbouyer 	xbdi->xbdi_status = DISCONNECTED;
37254f95b14Sjym 
37354f95b14Sjym 	mutex_init(&xbdi->xbdi_lock, MUTEX_DEFAULT, IPL_BIO);
37454f95b14Sjym 	cv_init(&xbdi->xbdi_cv, xbdi->xbdi_name);
37554f95b14Sjym 
37680da1c02Sbouyer 	mutex_enter(&xbdi->xbdi_lock);
37780da1c02Sbouyer 	xbdi_get(xbdi);
37880da1c02Sbouyer 	mutex_exit(&xbdi->xbdi_lock);
37980da1c02Sbouyer 
380b96fedacSbouyer 	xbusd->xbusd_u.b.b_cookie = xbdi;
381b96fedacSbouyer 	xbusd->xbusd_u.b.b_detach = xbdback_xenbus_destroy;
382b96fedacSbouyer 	xbusd->xbusd_otherend_changed = xbdback_frontend_changed;
383b96fedacSbouyer 	xbdi->xbdi_xbusd = xbusd;
384b96fedacSbouyer 
385ca27aaadSjdolecek 	SLIST_INIT(&xbdi->xbdi_va_free);
3868d1b8859Sjdolecek 	for (i = 0; i < BLKIF_RING_SIZE; i++) {
3878d1b8859Sjdolecek 		xbdi->xbdi_va[i].xv_vaddr = uvm_km_alloc(kernel_map,
3888d1b8859Sjdolecek 		    VBD_VA_SIZE, 0, UVM_KMF_VAONLY|UVM_KMF_WAITVA);
3898d1b8859Sjdolecek 		SLIST_INSERT_HEAD(&xbdi->xbdi_va_free, &xbdi->xbdi_va[i],
3908d1b8859Sjdolecek 		    xv_next);
3918d1b8859Sjdolecek 	}
3928d1b8859Sjdolecek 
39380da1c02Sbouyer 	/*
39480da1c02Sbouyer 	 * allocate page-aligned memory for segments, so that for each
39580da1c02Sbouyer 	 * xbdback_io its segments are in a single page.
39680da1c02Sbouyer 	 * sizeof(struct blkif_request_segment) * VBD_MAX_INDIRECT_SEGMENTS
397*d55161bbSbouyer 	 * is 128 so this helps us avoiding a page boundary withing a
398*d55161bbSbouyer 	 * block of VBD_MAX_INDIRECT_SEGMENTS segments.
39980da1c02Sbouyer 	 */
40080da1c02Sbouyer 	CTASSERT(sizeof(struct blkif_request_segment) * VBD_MAX_INDIRECT_SEGMENTS == 128);
40180da1c02Sbouyer 	xbdi->xbdi_segs = (void *)uvm_km_alloc(kernel_map, round_page(
40280da1c02Sbouyer 	    sizeof(struct blkif_request_segment) * VBD_MAX_INDIRECT_SEGMENTS * BLKIF_RING_SIZE),
40380da1c02Sbouyer 	    PAGE_SIZE, UVM_KMF_WIRED | UVM_KMF_WAITVA);
40480da1c02Sbouyer 
4052d80bedaSjdolecek 	SLIST_INIT(&xbdi->xbdi_io_free);
4062d80bedaSjdolecek 	for (i = 0; i < BLKIF_RING_SIZE; i++) {
40780da1c02Sbouyer 		struct xbdback_io *xbd_io = &xbdi->xbdi_io[i];
40880da1c02Sbouyer 		xbd_io->xio_seg =
40980da1c02Sbouyer 		    &xbdi->xbdi_segs[i * VBD_MAX_INDIRECT_SEGMENTS];
41080da1c02Sbouyer 		error = bus_dmamap_create(xbdi->xbdi_xbusd->xbusd_dmat,
41180da1c02Sbouyer 		    PAGE_SIZE, 1, PAGE_SIZE, PAGE_SIZE,
41280da1c02Sbouyer 		    BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW,
41380da1c02Sbouyer 		    &xbd_io->xio_seg_dmamap);
41480da1c02Sbouyer 		if (error != 0) {
41580da1c02Sbouyer 			printf("%s: can't create dma map for indirect segments %d\n",
41680da1c02Sbouyer 			    xbdi->xbdi_name, i);
41780da1c02Sbouyer 			goto fail;
41880da1c02Sbouyer 		}
41980da1c02Sbouyer 		error = bus_dmamap_load(xbdi->xbdi_xbusd->xbusd_dmat,
42080da1c02Sbouyer 		    xbd_io->xio_seg_dmamap, xbd_io->xio_seg,
42180da1c02Sbouyer 		    sizeof(struct blkif_request_segment) * VBD_MAX_INDIRECT_SEGMENTS,
42280da1c02Sbouyer 		    NULL, BUS_DMA_WAITOK);
42380da1c02Sbouyer 		if (error != 0) {
4245f6de192Smartin 			printf("%s: can't load dma map for indirect segments %d @%p (%d, %zu)\n",
42580da1c02Sbouyer 			    xbdi->xbdi_name, i, xbd_io->xio_seg, error, sizeof(xbd_io->xio_seg));
42680da1c02Sbouyer 			bus_dmamap_destroy(xbdi->xbdi_xbusd->xbusd_dmat,
42780da1c02Sbouyer 			    xbd_io->xio_seg_dmamap);
42880da1c02Sbouyer 			goto fail;
42980da1c02Sbouyer 		}
43080da1c02Sbouyer 		KASSERT(xbd_io->xio_seg_dmamap->dm_nsegs == 1);
43180da1c02Sbouyer 		segalloc = i;
43280da1c02Sbouyer 		SLIST_INSERT_HEAD(&xbdi->xbdi_io_free, xbd_io, xio_next);
4332d80bedaSjdolecek 	}
4342d80bedaSjdolecek 
435b96fedacSbouyer 	error = xenbus_watch_path2(xbusd, xbusd->xbusd_path, "physical-device",
436b96fedacSbouyer 	    &xbdi->xbdi_watch, xbdback_backend_changed);
437b96fedacSbouyer 	if (error) {
438b96fedacSbouyer 		printf("failed to watch on %s/physical-device: %d\n",
439b96fedacSbouyer 		    xbusd->xbusd_path, error);
440b96fedacSbouyer 		goto fail;
441b96fedacSbouyer 	}
442b96fedacSbouyer 	xbdi->xbdi_watch.xbw_dev = xbusd;
443b96fedacSbouyer 	error = xenbus_switch_state(xbusd, NULL, XenbusStateInitWait);
444b96fedacSbouyer 	if (error) {
445b96fedacSbouyer 		printf("failed to switch state on %s: %d\n",
446b96fedacSbouyer 		    xbusd->xbusd_path, error);
447b96fedacSbouyer 		goto fail2;
448b96fedacSbouyer 	}
44980da1c02Sbouyer 
45080da1c02Sbouyer 	xbdi->xbdi_bouncebuf = uvm_km_alloc(kernel_map, MAXPHYS, PAGE_SIZE,
45180da1c02Sbouyer 	    UVM_KMF_WIRED | UVM_KMF_WAITVA);
452b96fedacSbouyer 	return 0;
453b96fedacSbouyer fail2:
454b96fedacSbouyer 	unregister_xenbus_watch(&xbdi->xbdi_watch);
455b96fedacSbouyer fail:
45680da1c02Sbouyer 	for (i = 0; i < segalloc; i++) {
45780da1c02Sbouyer 		struct xbdback_io *xbd_io = &xbdi->xbdi_io[i];
45880da1c02Sbouyer 		bus_dmamap_unload(xbdi->xbdi_xbusd->xbusd_dmat,
45980da1c02Sbouyer 		    xbd_io->xio_seg_dmamap);
46080da1c02Sbouyer 		bus_dmamap_destroy(xbdi->xbdi_xbusd->xbusd_dmat,
46180da1c02Sbouyer 		    xbd_io->xio_seg_dmamap);
46280da1c02Sbouyer 	}
46380da1c02Sbouyer 	mutex_enter(&xbdback_lock);
46480da1c02Sbouyer 	SLIST_REMOVE(&xbdback_instances, xbdi, xbdback_instance, next);
46580da1c02Sbouyer 	mutex_exit(&xbdback_lock);
46654f95b14Sjym 	kmem_free(xbdi, sizeof(*xbdi));
467b96fedacSbouyer 	return error;
468b96fedacSbouyer }
469b96fedacSbouyer 
470b96fedacSbouyer static int
xbdback_xenbus_destroy(void * arg)471b96fedacSbouyer xbdback_xenbus_destroy(void *arg)
472b96fedacSbouyer {
473b96fedacSbouyer 	struct xbdback_instance *xbdi = arg;
474b96fedacSbouyer 
475b96fedacSbouyer 	XENPRINTF(("xbdback_xenbus_destroy state %d\n", xbdi->xbdi_status));
476b96fedacSbouyer 
47754f95b14Sjym 	xbdback_disconnect(xbdi);
47854f95b14Sjym 
479b96fedacSbouyer 	/* unregister watch */
480418f6d13Sjdolecek 	if (xbdi->xbdi_watch.node)
481418f6d13Sjdolecek 		xenbus_unwatch_path(&xbdi->xbdi_watch);
482b96fedacSbouyer 	/* unmap ring */
4834c8adaa4Sbouyer 	if (xbdi->xbdi_ring_handle) {
4844c8adaa4Sbouyer 		xen_shm_unmap(xbdi->xbdi_ring_va, 1, &xbdi->xbdi_ring_handle);
4855dac1986Sbouyer 	}
4865dac1986Sbouyer 
4875dac1986Sbouyer 	if (xbdi->xbdi_ring_va != 0) {
488b96fedacSbouyer 		uvm_km_free(kernel_map, xbdi->xbdi_ring_va,
489b96fedacSbouyer 		    PAGE_SIZE, UVM_KMF_VAONLY);
490b96fedacSbouyer 	}
4915dac1986Sbouyer 
492b96fedacSbouyer 	/* close device */
493b96fedacSbouyer 	if (xbdi->xbdi_size) {
494ac2d876cSchristos 		const char *name;
495ac2d876cSchristos 		struct dkwedge_info wi;
496ac2d876cSchristos 		if (getdiskinfo(xbdi->xbdi_vp, &wi) == 0)
497ac2d876cSchristos 			name = wi.dkw_devname;
498ac2d876cSchristos 		else
499ac2d876cSchristos 			name = "*unknown*";
500ac2d876cSchristos 		printf("xbd backend: detach device %s for domain %d\n",
501ac2d876cSchristos 		    name, xbdi->xbdi_domid);
5023685a310Sad 		vn_close(xbdi->xbdi_vp, FREAD, NOCRED);
503b96fedacSbouyer 	}
504555482faSjdolecek 	mutex_enter(&xbdback_lock);
505b96fedacSbouyer 	SLIST_REMOVE(&xbdback_instances, xbdi, xbdback_instance, next);
506555482faSjdolecek 	mutex_exit(&xbdback_lock);
5078d1b8859Sjdolecek 
5088d1b8859Sjdolecek 	for (int i = 0; i < BLKIF_RING_SIZE; i++) {
50980da1c02Sbouyer 		struct xbdback_io *xbd_io = &xbdi->xbdi_io[i];
51080da1c02Sbouyer 		bus_dmamap_unload(xbdi->xbdi_xbusd->xbusd_dmat,
51180da1c02Sbouyer 		    xbd_io->xio_seg_dmamap);
51280da1c02Sbouyer 		bus_dmamap_destroy(xbdi->xbdi_xbusd->xbusd_dmat,
51380da1c02Sbouyer 		    xbd_io->xio_seg_dmamap);
5148d1b8859Sjdolecek 		if (xbdi->xbdi_va[i].xv_vaddr != 0) {
5158d1b8859Sjdolecek 			uvm_km_free(kernel_map, xbdi->xbdi_va[i].xv_vaddr,
5168d1b8859Sjdolecek 			    VBD_VA_SIZE, UVM_KMF_VAONLY);
5178d1b8859Sjdolecek 			xbdi->xbdi_va[i].xv_vaddr = 0;
5188d1b8859Sjdolecek 		}
5198d1b8859Sjdolecek 	}
5208d1b8859Sjdolecek 
521c8d41590Sjdolecek 
522b2cdc990Scegger 	mutex_destroy(&xbdi->xbdi_lock);
523b2cdc990Scegger 	cv_destroy(&xbdi->xbdi_cv);
52454f95b14Sjym 	kmem_free(xbdi, sizeof(*xbdi));
525b96fedacSbouyer 	return 0;
526b96fedacSbouyer }
527b96fedacSbouyer 
528bf743748Sjym static int
xbdback_connect(struct xbdback_instance * xbdi)529bf743748Sjym xbdback_connect(struct xbdback_instance *xbdi)
530b96fedacSbouyer {
531782d8289Sjdolecek 	int err;
532b96fedacSbouyer 	evtchn_op_t evop;
5334c8adaa4Sbouyer 	grant_ref_t gring_ref;
534bf743748Sjym 	u_long ring_ref, revtchn;
535782d8289Sjdolecek 	char xsproto[32];
536d3cd2576Sbouyer 	const char *proto;
537bf743748Sjym 	struct xenbus_device *xbusd = xbdi->xbdi_xbusd;
538b96fedacSbouyer 
539d5cd140eSbouyer 	XENPRINTF(("xbdback %s: connect\n", xbusd->xbusd_path));
540b96fedacSbouyer 	/* read comunication informations */
541b96fedacSbouyer 	err = xenbus_read_ul(NULL, xbusd->xbusd_otherend,
542b96fedacSbouyer 	    "ring-ref", &ring_ref, 10);
543b96fedacSbouyer 	if (err) {
544b96fedacSbouyer 		xenbus_dev_fatal(xbusd, err, "reading %s/ring-ref",
545b96fedacSbouyer 		    xbusd->xbusd_otherend);
546bf743748Sjym 		return -1;
547b96fedacSbouyer 	}
548d5cd140eSbouyer 	XENPRINTF(("xbdback %s: connect ring-ref %lu\n", xbusd->xbusd_path, ring_ref));
549b96fedacSbouyer 	err = xenbus_read_ul(NULL, xbusd->xbusd_otherend,
550b96fedacSbouyer 	    "event-channel", &revtchn, 10);
551b96fedacSbouyer 	if (err) {
552b96fedacSbouyer 		xenbus_dev_fatal(xbusd, err, "reading %s/event-channel",
553b96fedacSbouyer 		    xbusd->xbusd_otherend);
554bf743748Sjym 		return -1;
555b96fedacSbouyer 	}
556d5cd140eSbouyer 	XENPRINTF(("xbdback %s: connect revtchn %lu\n", xbusd->xbusd_path, revtchn));
557d3cd2576Sbouyer 	err = xenbus_read(NULL, xbusd->xbusd_otherend, "protocol",
558782d8289Sjdolecek 	    xsproto, sizeof(xsproto));
559d3cd2576Sbouyer 	if (err) {
560d3cd2576Sbouyer 		xbdi->xbdi_proto = XBDIP_NATIVE;
561bf743748Sjym 		proto = "unspecified";
562d5cd140eSbouyer 		XENPRINTF(("xbdback %s: connect no xsproto\n", xbusd->xbusd_path));
563d3cd2576Sbouyer 	} else {
564d5cd140eSbouyer 		XENPRINTF(("xbdback %s: connect xsproto %s\n", xbusd->xbusd_path, xsproto));
565d3cd2576Sbouyer 		if (strcmp(xsproto, XEN_IO_PROTO_ABI_NATIVE) == 0) {
566d3cd2576Sbouyer 			xbdi->xbdi_proto = XBDIP_NATIVE;
567d3cd2576Sbouyer 			proto = XEN_IO_PROTO_ABI_NATIVE;
568d3cd2576Sbouyer 		} else if (strcmp(xsproto, XEN_IO_PROTO_ABI_X86_32) == 0) {
569d3cd2576Sbouyer 			xbdi->xbdi_proto = XBDIP_32;
570d3cd2576Sbouyer 			proto = XEN_IO_PROTO_ABI_X86_32;
571d3cd2576Sbouyer 		} else if (strcmp(xsproto, XEN_IO_PROTO_ABI_X86_64) == 0) {
572d3cd2576Sbouyer 			xbdi->xbdi_proto = XBDIP_64;
573dca35a7eSbouyer 			proto = XEN_IO_PROTO_ABI_X86_64;
574d3cd2576Sbouyer 		} else {
575bf743748Sjym 			aprint_error("xbd domain %d: unknown proto %s\n",
576d3cd2576Sbouyer 			    xbdi->xbdi_domid, xsproto);
577bf743748Sjym 			return -1;
578bf743748Sjym 		}
579d5cd140eSbouyer 	}
580bf743748Sjym 
581b96fedacSbouyer 	/* allocate VA space and map rings */
582b96fedacSbouyer 	xbdi->xbdi_ring_va = uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
583b96fedacSbouyer 	    UVM_KMF_VAONLY);
584b96fedacSbouyer 	if (xbdi->xbdi_ring_va == 0) {
585b96fedacSbouyer 		xenbus_dev_fatal(xbusd, ENOMEM,
586b96fedacSbouyer 		    "can't get VA for ring", xbusd->xbusd_otherend);
587bf743748Sjym 		return -1;
588b96fedacSbouyer 	}
589d5cd140eSbouyer 	XENPRINTF(("xbdback %s: connect va 0x%" PRIxVADDR "\n", xbusd->xbusd_path, xbdi->xbdi_ring_va));
5904c8adaa4Sbouyer 
5914c8adaa4Sbouyer 	gring_ref = ring_ref;
5924c8adaa4Sbouyer 	if (xen_shm_map(1, xbdi->xbdi_domid, &gring_ref, xbdi->xbdi_ring_va,
5934c8adaa4Sbouyer 	    &xbdi->xbdi_ring_handle, 0) != 0) {
5944c8adaa4Sbouyer 		aprint_error("xbdback %s: can't map grant ref\n",
5954c8adaa4Sbouyer 		    xbusd->xbusd_path);
596b96fedacSbouyer 		xenbus_dev_fatal(xbusd, EINVAL,
597b96fedacSbouyer 		    "can't map ring", xbusd->xbusd_otherend);
5985dac1986Sbouyer 		goto err1;
599b96fedacSbouyer 	}
6004c8adaa4Sbouyer 	XENPRINTF(("xbdback %s: connect grhandle %d\n", xbusd->xbusd_path, xbdi->xbdi_ring_handle));
601bf743748Sjym 
602d3cd2576Sbouyer 	switch(xbdi->xbdi_proto) {
603d3cd2576Sbouyer 	case XBDIP_NATIVE:
604d3cd2576Sbouyer 	{
605d3cd2576Sbouyer 		blkif_sring_t *sring = (void *)xbdi->xbdi_ring_va;
606bf743748Sjym 		BACK_RING_INIT(&xbdi->xbdi_ring.ring_n, sring, PAGE_SIZE);
607d3cd2576Sbouyer 		break;
608d3cd2576Sbouyer 	}
609d3cd2576Sbouyer 	case XBDIP_32:
610d3cd2576Sbouyer 	{
611bf743748Sjym 		blkif_x86_32_sring_t *sring = (void *)xbdi->xbdi_ring_va;
612bf743748Sjym 		BACK_RING_INIT(&xbdi->xbdi_ring.ring_32, sring, PAGE_SIZE);
613d3cd2576Sbouyer 		break;
614d3cd2576Sbouyer 	}
615d3cd2576Sbouyer 	case XBDIP_64:
616d3cd2576Sbouyer 	{
617bf743748Sjym 		blkif_x86_64_sring_t *sring = (void *)xbdi->xbdi_ring_va;
618bf743748Sjym 		BACK_RING_INIT(&xbdi->xbdi_ring.ring_64, sring, PAGE_SIZE);
619d3cd2576Sbouyer 		break;
620d3cd2576Sbouyer 	}
621d3cd2576Sbouyer 	}
622bf743748Sjym 
623b96fedacSbouyer 	evop.cmd = EVTCHNOP_bind_interdomain;
624b96fedacSbouyer 	evop.u.bind_interdomain.remote_dom = xbdi->xbdi_domid;
625b96fedacSbouyer 	evop.u.bind_interdomain.remote_port = revtchn;
626b96fedacSbouyer 	err = HYPERVISOR_event_channel_op(&evop);
627b96fedacSbouyer 	if (err) {
628507459d7Sjym 		aprint_error("blkback %s: "
629507459d7Sjym 		    "can't get event channel: %d\n",
630b96fedacSbouyer 		    xbusd->xbusd_otherend, err);
631b96fedacSbouyer 		xenbus_dev_fatal(xbusd, err,
632dca35a7eSbouyer 		    "can't bind event channel", xbusd->xbusd_otherend);
633b96fedacSbouyer 		goto err2;
634b96fedacSbouyer 	}
635b96fedacSbouyer 	xbdi->xbdi_evtchn = evop.u.bind_interdomain.local_port;
6365dac1986Sbouyer 	XENPRINTF(("xbdback %s: connect evchannel %d\n", xbusd->xbusd_path, xbdi->xbdi_evtchn));
637bf743748Sjym 
6385dac1986Sbouyer 	xbdi->xbdi_ih = xen_intr_establish_xname(-1, &xen_pic,
6395dac1986Sbouyer 	    xbdi->xbdi_evtchn, IST_LEVEL, IPL_BIO, xbdback_evthandler, xbdi,
6405dac1986Sbouyer 	    true, xbdi->xbdi_name);
641e5c2d6aaScherry 	KASSERT(xbdi->xbdi_ih != NULL);
642422a28e0Sjym 	aprint_verbose("xbd backend domain %d handle %#x (%d) "
643422a28e0Sjym 	    "using event channel %d, protocol %s\n", xbdi->xbdi_domid,
644422a28e0Sjym 	    xbdi->xbdi_handle, xbdi->xbdi_handle, xbdi->xbdi_evtchn, proto);
64554f95b14Sjym 
64654f95b14Sjym 	/* enable the xbdback event handler machinery */
64754f95b14Sjym 	xbdi->xbdi_status = WAITING;
6481fe45bddScherry 	hypervisor_unmask_event(xbdi->xbdi_evtchn);
649b96fedacSbouyer 	hypervisor_notify_via_evtchn(xbdi->xbdi_evtchn);
65054f95b14Sjym 
651d4845d00Sbouyer 	if (kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL,
652f5c3f346Sjoerg 	    xbdback_thread, xbdi, NULL, "%s", xbdi->xbdi_name) == 0)
653bf743748Sjym 		return 0;
654bf743748Sjym 
655bf743748Sjym err2:
656bf743748Sjym 	/* unmap ring */
6574c8adaa4Sbouyer 	xen_shm_unmap(xbdi->xbdi_ring_va, 1, &xbdi->xbdi_ring_handle);
6585dac1986Sbouyer err1:
659bf743748Sjym 	/* free ring VA space */
660bf743748Sjym 	uvm_km_free(kernel_map, xbdi->xbdi_ring_va, PAGE_SIZE, UVM_KMF_VAONLY);
661bf743748Sjym 	return -1;
662bf743748Sjym }
663bf743748Sjym 
66454f95b14Sjym /*
66554f95b14Sjym  * Signal a xbdback thread to disconnect. Done in 'xenwatch' thread context.
66654f95b14Sjym  */
6674d61ee8dSbouyer static void
xbdback_disconnect(struct xbdback_instance * xbdi)668cf4b804eSjym xbdback_disconnect(struct xbdback_instance *xbdi)
669cf4b804eSjym {
670cf4b804eSjym 
6714d61ee8dSbouyer 	mutex_enter(&xbdi->xbdi_lock);
6724d61ee8dSbouyer 	if (xbdi->xbdi_status == DISCONNECTED) {
6734d61ee8dSbouyer 		mutex_exit(&xbdi->xbdi_lock);
6744d61ee8dSbouyer 		return;
6754d61ee8dSbouyer 	}
676cf4b804eSjym 	hypervisor_mask_event(xbdi->xbdi_evtchn);
67754f95b14Sjym 
67854f95b14Sjym 	/* signal thread that we want to disconnect, then wait for it */
679cf4b804eSjym 	xbdi->xbdi_status = DISCONNECTING;
68054f95b14Sjym 	cv_signal(&xbdi->xbdi_cv);
68154f95b14Sjym 
68254f95b14Sjym 	while (xbdi->xbdi_status != DISCONNECTED)
68354f95b14Sjym 		cv_wait(&xbdi->xbdi_cv, &xbdi->xbdi_lock);
68454f95b14Sjym 
68554f95b14Sjym 	mutex_exit(&xbdi->xbdi_lock);
6865dac1986Sbouyer 	xen_intr_disestablish(xbdi->xbdi_ih);
68754f95b14Sjym 
688cf4b804eSjym 	xenbus_switch_state(xbdi->xbdi_xbusd, NULL, XenbusStateClosing);
689cf4b804eSjym }
690cf4b804eSjym 
691bf743748Sjym static void
xbdback_frontend_changed(void * arg,XenbusState new_state)692bf743748Sjym xbdback_frontend_changed(void *arg, XenbusState new_state)
693bf743748Sjym {
694bf743748Sjym 	struct xbdback_instance *xbdi = arg;
695bf743748Sjym 	struct xenbus_device *xbusd = xbdi->xbdi_xbusd;
696bf743748Sjym 
697bf743748Sjym 	XENPRINTF(("xbdback %s: new state %d\n", xbusd->xbusd_path, new_state));
698bf743748Sjym 	switch(new_state) {
699bf743748Sjym 	case XenbusStateInitialising:
700bf743748Sjym 		break;
701bf743748Sjym 	case XenbusStateInitialised:
702bf743748Sjym 	case XenbusStateConnected:
70354f95b14Sjym 		if (xbdi->xbdi_status == WAITING || xbdi->xbdi_status == RUN)
704bf743748Sjym 			break;
705bf743748Sjym 		xbdback_connect(xbdi);
706b96fedacSbouyer 		break;
707b96fedacSbouyer 	case XenbusStateClosing:
708cf4b804eSjym 		xbdback_disconnect(xbdi);
709b96fedacSbouyer 		break;
710b96fedacSbouyer 	case XenbusStateClosed:
711b96fedacSbouyer 		/* otherend_changed() should handle it for us */
712b96fedacSbouyer 		panic("xbdback_frontend_changed: closed\n");
713b96fedacSbouyer 	case XenbusStateUnknown:
714b96fedacSbouyer 	case XenbusStateInitWait:
715b96fedacSbouyer 	default:
716b96fedacSbouyer 		aprint_error("xbdback %s: invalid frontend state %d\n",
717b96fedacSbouyer 		    xbusd->xbusd_path, new_state);
718b96fedacSbouyer 	}
719b96fedacSbouyer 	return;
720b96fedacSbouyer }
721b96fedacSbouyer 
722b96fedacSbouyer static void
xbdback_backend_changed(struct xenbus_watch * watch,const char ** vec,unsigned int len)723b96fedacSbouyer xbdback_backend_changed(struct xenbus_watch *watch,
724b96fedacSbouyer     const char **vec, unsigned int len)
725b96fedacSbouyer {
726b96fedacSbouyer 	struct xenbus_device *xbusd = watch->xbw_dev;
727b96fedacSbouyer 	struct xbdback_instance *xbdi = xbusd->xbusd_u.b.b_cookie;
728b96fedacSbouyer 	int err;
729b96fedacSbouyer 	long dev;
730782d8289Sjdolecek 	char mode[32];
731b96fedacSbouyer 	struct xenbus_transaction *xbt;
732b96fedacSbouyer 	const char *devname;
733b96fedacSbouyer 	int major;
734b96fedacSbouyer 
735b96fedacSbouyer 	err = xenbus_read_ul(NULL, xbusd->xbusd_path, "physical-device",
736b96fedacSbouyer 	    &dev, 10);
737b96fedacSbouyer 	/*
738b96fedacSbouyer 	 * An error can occur as the watch can fire up just after being
739b96fedacSbouyer 	 * registered. So we have to ignore error  :(
740b96fedacSbouyer 	 */
741b96fedacSbouyer 	if (err)
742b96fedacSbouyer 		return;
7430eab8d73Sbouyer 	/*
74454f95b14Sjym 	 * we can also fire up after having opened the device, don't try
7450eab8d73Sbouyer 	 * to do it twice.
7460eab8d73Sbouyer 	 */
7470eab8d73Sbouyer 	if (xbdi->xbdi_vp != NULL) {
74854f95b14Sjym 		if (xbdi->xbdi_status == WAITING || xbdi->xbdi_status == RUN) {
74954f95b14Sjym 			if (xbdi->xbdi_dev != dev) {
75054f95b14Sjym 				printf("xbdback %s: changing physical device "
75154f95b14Sjym 				    "from %#"PRIx64" to %#lx not supported\n",
752ce06928cScegger 				    xbusd->xbusd_path, xbdi->xbdi_dev, dev);
7530eab8d73Sbouyer 			}
75454f95b14Sjym 		}
755b96fedacSbouyer 		return;
756b96fedacSbouyer 	}
757b96fedacSbouyer 	xbdi->xbdi_dev = dev;
758782d8289Sjdolecek 	err = xenbus_read(NULL, xbusd->xbusd_path, "mode", mode, sizeof(mode));
759b96fedacSbouyer 	if (err) {
760b96fedacSbouyer 		printf("xbdback: failed to read %s/mode: %d\n",
761b96fedacSbouyer 		    xbusd->xbusd_path, err);
762b96fedacSbouyer 		return;
763b96fedacSbouyer 	}
764b96fedacSbouyer 	if (mode[0] == 'w')
76554f95b14Sjym 		xbdi->xbdi_ro = false;
766b96fedacSbouyer 	else
76754f95b14Sjym 		xbdi->xbdi_ro = true;
768b96fedacSbouyer 	major = major(xbdi->xbdi_dev);
769b96fedacSbouyer 	devname = devsw_blk2name(major);
770b96fedacSbouyer 	if (devname == NULL) {
771ce06928cScegger 		printf("xbdback %s: unknown device 0x%"PRIx64"\n",
772ce06928cScegger 		    xbusd->xbusd_path, xbdi->xbdi_dev);
773b96fedacSbouyer 		return;
774b96fedacSbouyer 	}
775b96fedacSbouyer 	xbdi->xbdi_bdevsw = bdevsw_lookup(xbdi->xbdi_dev);
776b96fedacSbouyer 	if (xbdi->xbdi_bdevsw == NULL) {
777ce06928cScegger 		printf("xbdback %s: no bdevsw for device 0x%"PRIx64"\n",
778b96fedacSbouyer 		    xbusd->xbusd_path, xbdi->xbdi_dev);
779b96fedacSbouyer 		return;
780b96fedacSbouyer 	}
781b96fedacSbouyer 	err = bdevvp(xbdi->xbdi_dev, &xbdi->xbdi_vp);
782b96fedacSbouyer 	if (err) {
783ce06928cScegger 		printf("xbdback %s: can't open device 0x%"PRIx64": %d\n",
784b96fedacSbouyer 		    xbusd->xbusd_path, xbdi->xbdi_dev, err);
785b96fedacSbouyer 		return;
786b96fedacSbouyer 	}
78770aa8a05Sbouyer 	err = vn_lock(xbdi->xbdi_vp, LK_EXCLUSIVE | LK_RETRY);
78870aa8a05Sbouyer 	if (err) {
789ce06928cScegger 		printf("xbdback %s: can't vn_lock device 0x%"PRIx64": %d\n",
79070aa8a05Sbouyer 		    xbusd->xbusd_path, xbdi->xbdi_dev, err);
79170aa8a05Sbouyer 		vrele(xbdi->xbdi_vp);
79270aa8a05Sbouyer 		return;
79370aa8a05Sbouyer 	}
79461e8303eSpooka 	err  = VOP_OPEN(xbdi->xbdi_vp, FREAD, NOCRED);
795b96fedacSbouyer 	if (err) {
796ce06928cScegger 		printf("xbdback %s: can't VOP_OPEN device 0x%"PRIx64": %d\n",
797b96fedacSbouyer 		    xbusd->xbusd_path, xbdi->xbdi_dev, err);
798b96fedacSbouyer 		vput(xbdi->xbdi_vp);
799b96fedacSbouyer 		return;
800b96fedacSbouyer 	}
8011423e65bShannken 	VOP_UNLOCK(xbdi->xbdi_vp);
802e354c714Shaad 
8039cde455fSbouyer 	/* dk device; get wedge data */
8049cde455fSbouyer 	struct dkwedge_info wi;
80596b6da24Schristos 	if ((err = getdiskinfo(xbdi->xbdi_vp, &wi)) == 0) {
806e354c714Shaad 		xbdi->xbdi_size = wi.dkw_size;
807e354c714Shaad 		printf("xbd backend: attach device %s (size %" PRIu64 ") "
808e354c714Shaad 		    "for domain %d\n", wi.dkw_devname, xbdi->xbdi_size,
809e354c714Shaad 		    xbdi->xbdi_domid);
810d636871eShaad 	} else {
811d636871eShaad 		/* If both Ioctls failed set device size to 0 and return */
8129cde455fSbouyer 		printf("xbdback %s: can't DIOCGWEDGEINFO device "
813ce06928cScegger 		    "0x%"PRIx64": %d\n", xbusd->xbusd_path,
8149cde455fSbouyer 		    xbdi->xbdi_dev, err);
8159cde455fSbouyer 		xbdi->xbdi_size = xbdi->xbdi_dev = 0;
8163685a310Sad 		vn_close(xbdi->xbdi_vp, FREAD, NOCRED);
8179cde455fSbouyer 		xbdi->xbdi_vp = NULL;
8189cde455fSbouyer 		return;
8199cde455fSbouyer 	}
820b96fedacSbouyer again:
821b96fedacSbouyer 	xbt = xenbus_transaction_start();
822b96fedacSbouyer 	if (xbt == NULL) {
823b96fedacSbouyer 		printf("xbdback %s: can't start transaction\n",
824b96fedacSbouyer 		    xbusd->xbusd_path);
825b96fedacSbouyer 		    return;
826b96fedacSbouyer 	}
8279cde455fSbouyer 	err = xenbus_printf(xbt, xbusd->xbusd_path, "sectors", "%" PRIu64 ,
8289cde455fSbouyer 	    xbdi->xbdi_size);
829b96fedacSbouyer 	if (err) {
830b96fedacSbouyer 		printf("xbdback: failed to write %s/sectors: %d\n",
831b96fedacSbouyer 		    xbusd->xbusd_path, err);
832b96fedacSbouyer 		goto abort;
833b96fedacSbouyer 	}
834b96fedacSbouyer 	err = xenbus_printf(xbt, xbusd->xbusd_path, "info", "%u",
835b96fedacSbouyer 	    xbdi->xbdi_ro ? VDISK_READONLY : 0);
836b96fedacSbouyer 	if (err) {
837b96fedacSbouyer 		printf("xbdback: failed to write %s/info: %d\n",
838b96fedacSbouyer 		    xbusd->xbusd_path, err);
839b96fedacSbouyer 		goto abort;
840b96fedacSbouyer 	}
841b96fedacSbouyer 	err = xenbus_printf(xbt, xbusd->xbusd_path, "sector-size", "%lu",
842b96fedacSbouyer 	    (u_long)DEV_BSIZE);
843b96fedacSbouyer 	if (err) {
844b96fedacSbouyer 		printf("xbdback: failed to write %s/sector-size: %d\n",
845b96fedacSbouyer 		    xbusd->xbusd_path, err);
846b96fedacSbouyer 		goto abort;
847b96fedacSbouyer 	}
848db95bc7fSbouyer 	err = xenbus_printf(xbt, xbusd->xbusd_path, "feature-flush-cache",
849db95bc7fSbouyer 	    "%u", 1);
850db95bc7fSbouyer 	if (err) {
851db95bc7fSbouyer 		printf("xbdback: failed to write %s/feature-flush-cache: %d\n",
852db95bc7fSbouyer 		    xbusd->xbusd_path, err);
853db95bc7fSbouyer 		goto abort;
854db95bc7fSbouyer 	}
855c8d41590Sjdolecek 	err = xenbus_printf(xbt, xbusd->xbusd_path,
856c8d41590Sjdolecek 	    "feature-max-indirect-segments", "%u", VBD_MAX_INDIRECT_SEGMENTS);
857c8d41590Sjdolecek 	if (err) {
858c8d41590Sjdolecek 		printf("xbdback: failed to write %s/feature-indirect: %d\n",
859c8d41590Sjdolecek 		    xbusd->xbusd_path, err);
860c8d41590Sjdolecek 		goto abort;
861c8d41590Sjdolecek 	}
862b96fedacSbouyer 	err = xenbus_transaction_end(xbt, 0);
863b96fedacSbouyer 	if (err == EAGAIN)
864b96fedacSbouyer 		goto again;
865b96fedacSbouyer 	if (err) {
866b96fedacSbouyer 		printf("xbdback %s: can't end transaction: %d\n",
867b96fedacSbouyer 		    xbusd->xbusd_path, err);
868b96fedacSbouyer 	}
869b96fedacSbouyer 	err = xenbus_switch_state(xbusd, NULL, XenbusStateConnected);
870b96fedacSbouyer 	if (err) {
871b96fedacSbouyer 		printf("xbdback %s: can't switch state: %d\n",
872b96fedacSbouyer 		    xbusd->xbusd_path, err);
873b96fedacSbouyer 	}
874b96fedacSbouyer 	return;
875b96fedacSbouyer abort:
876b96fedacSbouyer 	xenbus_transaction_end(xbt, 1);
877b96fedacSbouyer }
878b96fedacSbouyer 
87954f95b14Sjym /*
88054f95b14Sjym  * Used by a xbdi thread to signal that it is now disconnected.
88154f95b14Sjym  */
88254f95b14Sjym static void
xbdback_finish_disconnect(struct xbdback_instance * xbdi)88354f95b14Sjym xbdback_finish_disconnect(struct xbdback_instance *xbdi)
884b96fedacSbouyer {
88554f95b14Sjym 	KASSERT(mutex_owned(&xbdi->xbdi_lock));
886b96fedacSbouyer 	KASSERT(xbdi->xbdi_status == DISCONNECTING);
887b96fedacSbouyer 
888b96fedacSbouyer 	xbdi->xbdi_status = DISCONNECTED;
889b96fedacSbouyer 
89058477cb8Sjdolecek 	cv_broadcast(&xbdi->xbdi_cv);
891b96fedacSbouyer }
892b96fedacSbouyer 
893555482faSjdolecek static bool
xbdif_lookup(domid_t dom,uint32_t handle)894b96fedacSbouyer xbdif_lookup(domid_t dom , uint32_t handle)
895b96fedacSbouyer {
896b96fedacSbouyer 	struct xbdback_instance *xbdi;
897555482faSjdolecek 	bool found = false;
898b96fedacSbouyer 
89958477cb8Sjdolecek 	KASSERT(mutex_owned(&xbdback_lock));
90058477cb8Sjdolecek 
901b96fedacSbouyer 	SLIST_FOREACH(xbdi, &xbdback_instances, next) {
902555482faSjdolecek 		if (xbdi->xbdi_domid == dom && xbdi->xbdi_handle == handle) {
903555482faSjdolecek 			found = true;
904555482faSjdolecek 			break;
905b96fedacSbouyer 		}
906555482faSjdolecek 	}
907555482faSjdolecek 
908555482faSjdolecek 	return found;
909b96fedacSbouyer }
910b96fedacSbouyer 
911b96fedacSbouyer static int
xbdback_evthandler(void * arg)912b96fedacSbouyer xbdback_evthandler(void *arg)
913b96fedacSbouyer {
914b96fedacSbouyer 	struct xbdback_instance *xbdi = arg;
915b96fedacSbouyer 
916b96fedacSbouyer 	XENPRINTF(("xbdback_evthandler domain %d: cont %p\n",
917b96fedacSbouyer 	    xbdi->xbdi_domid, xbdi->xbdi_cont));
918b96fedacSbouyer 
91958477cb8Sjdolecek 	mutex_enter(&xbdi->xbdi_lock);
92054f95b14Sjym 	xbdback_wakeup_thread(xbdi);
92158477cb8Sjdolecek 	mutex_exit(&xbdi->xbdi_lock);
92277822551Sjym 
923b96fedacSbouyer 	return 1;
924b96fedacSbouyer }
925b96fedacSbouyer 
92654f95b14Sjym /*
92754f95b14Sjym  * Main thread routine for one xbdback instance. Woken up by
92854f95b14Sjym  * xbdback_evthandler when a domain has I/O work scheduled in a I/O ring.
92954f95b14Sjym  */
93054f95b14Sjym static void
xbdback_thread(void * arg)93154f95b14Sjym xbdback_thread(void *arg)
93254f95b14Sjym {
93354f95b14Sjym 	struct xbdback_instance *xbdi = arg;
93480da1c02Sbouyer 	void *obj;
93554f95b14Sjym 
93654f95b14Sjym 	mutex_enter(&xbdi->xbdi_lock);
93758477cb8Sjdolecek 	for (;;) {
93854f95b14Sjym 		switch (xbdi->xbdi_status) {
93954f95b14Sjym 		case WAITING:
94054f95b14Sjym 			cv_wait(&xbdi->xbdi_cv, &xbdi->xbdi_lock);
94154f95b14Sjym 			break;
94254f95b14Sjym 		case RUN:
94354f95b14Sjym 			xbdi->xbdi_status = WAITING; /* reset state */
94480da1c02Sbouyer 			obj = xbdi;
94580da1c02Sbouyer 			if (xbdi->xbdi_cont_restart != NULL) {
94680da1c02Sbouyer 				KASSERT(xbdi->xbdi_cont == NULL);
94780da1c02Sbouyer 				xbdi->xbdi_cont = xbdi->xbdi_cont_restart;
94880da1c02Sbouyer 				obj = xbdi->xbdi_cont_restart_obj;
94980da1c02Sbouyer 				xbdi->xbdi_cont_restart = NULL;
95080da1c02Sbouyer 				xbdi->xbdi_cont_restart_obj = NULL;
95180da1c02Sbouyer 			}
95254f95b14Sjym 			if (xbdi->xbdi_cont == NULL) {
95354f95b14Sjym 				xbdi->xbdi_cont = xbdback_co_main;
95454f95b14Sjym 			}
95554f95b14Sjym 
95680da1c02Sbouyer 			xbdback_trampoline(xbdi, obj);
95754f95b14Sjym 			break;
95854f95b14Sjym 		case DISCONNECTING:
95954f95b14Sjym 			if (xbdi->xbdi_pendingreqs > 0) {
96054f95b14Sjym 				/* there are pending I/Os. Wait for them. */
96154f95b14Sjym 				cv_wait(&xbdi->xbdi_cv, &xbdi->xbdi_lock);
96258477cb8Sjdolecek 				continue;
96354f95b14Sjym 			}
96454f95b14Sjym 
96554f95b14Sjym 			/* All I/Os should have been processed by now,
96654f95b14Sjym 			 * xbdi_refcnt should drop to 0 */
96754f95b14Sjym 			xbdi_put(xbdi);
96854f95b14Sjym 			KASSERT(xbdi->xbdi_refcnt == 0);
96958477cb8Sjdolecek 			goto out;
97058477cb8Sjdolecek 			/* NOTREACHED */
97154f95b14Sjym 		default:
97254f95b14Sjym 			panic("%s: invalid state %d",
97354f95b14Sjym 			    xbdi->xbdi_name, xbdi->xbdi_status);
97454f95b14Sjym 		}
97554f95b14Sjym 	}
97658477cb8Sjdolecek out:
97758477cb8Sjdolecek 	mutex_exit(&xbdi->xbdi_lock);
97858477cb8Sjdolecek 
97958477cb8Sjdolecek 	kthread_exit(0);
98054f95b14Sjym }
98154f95b14Sjym 
982b96fedacSbouyer static void *
xbdback_co_main(struct xbdback_instance * xbdi,void * obj)983b96fedacSbouyer xbdback_co_main(struct xbdback_instance *xbdi, void *obj)
984b96fedacSbouyer {
985b96fedacSbouyer 	(void)obj;
98654f95b14Sjym 
987d3cd2576Sbouyer 	xbdi->xbdi_req_prod = xbdi->xbdi_ring.ring_n.sring->req_prod;
988f0ba6e4cSjym 	xen_rmb(); /* ensure we see all requests up to req_prod */
989b96fedacSbouyer 	/*
990b96fedacSbouyer 	 * note that we'll eventually get a full ring of request.
991b96fedacSbouyer 	 * in this case, MASK_BLKIF_IDX(req_cons) == MASK_BLKIF_IDX(req_prod)
992b96fedacSbouyer 	 */
993b96fedacSbouyer 	xbdi->xbdi_cont = xbdback_co_main_loop;
994b96fedacSbouyer 	return xbdi;
995b96fedacSbouyer }
996b96fedacSbouyer 
99777822551Sjym /*
99877822551Sjym  * Fetch a blkif request from the ring, and pass control to the appropriate
99977822551Sjym  * continuation.
100054f95b14Sjym  * If someone asked for disconnection, do not fetch any more request from
100154f95b14Sjym  * the ring.
100277822551Sjym  */
1003b96fedacSbouyer static void *
xbdback_co_main_loop(struct xbdback_instance * xbdi,void * obj __unused)1004c8d41590Sjdolecek xbdback_co_main_loop(struct xbdback_instance *xbdi, void *obj __unused)
1005b96fedacSbouyer {
1006c8d41590Sjdolecek 	blkif_request_t *req, *reqn;
1007d3cd2576Sbouyer 	blkif_x86_32_request_t *req32;
1008d3cd2576Sbouyer 	blkif_x86_64_request_t *req64;
1009f64904dbSjdolecek 	blkif_request_indirect_t *rinn;
1010f64904dbSjdolecek 	blkif_x86_32_request_indirect_t *rin32;
1011f64904dbSjdolecek 	blkif_x86_64_request_indirect_t *rin64;
1012b96fedacSbouyer 
1013d3cd2576Sbouyer 	if (xbdi->xbdi_ring.ring_n.req_cons != xbdi->xbdi_req_prod) {
101480da1c02Sbouyer 		struct xbdback_io *xbd_io = xbdback_io_get(xbdi);
1015f64904dbSjdolecek 		uint8_t real_op = 0xff;
1016f64904dbSjdolecek 
101780da1c02Sbouyer 		if (xbd_io == NULL) {
101880da1c02Sbouyer 			/* retry after iodone */
101980da1c02Sbouyer 			xbdi->xbdi_cont = NULL;
102080da1c02Sbouyer 			return NULL;
102180da1c02Sbouyer 		}
102280da1c02Sbouyer 		memset(&xbd_io->u, 0, sizeof(xbd_io->u));
102380da1c02Sbouyer 
102480da1c02Sbouyer 		buf_init(&xbd_io->xio_buf);
102580da1c02Sbouyer 		xbd_io->xio_xbdi = xbdi;
102680da1c02Sbouyer 
102780da1c02Sbouyer 		req = &xbd_io->xio_xen_req;
1028c8d41590Sjdolecek 		memset(req, 0, sizeof(*req));
1029c8d41590Sjdolecek 
1030d3cd2576Sbouyer 		switch(xbdi->xbdi_proto) {
1031d3cd2576Sbouyer 		case XBDIP_NATIVE:
1032c8d41590Sjdolecek 			reqn = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_n,
1033c8d41590Sjdolecek 			    xbdi->xbdi_ring.ring_n.req_cons);
1034f64904dbSjdolecek 			real_op = req->operation = reqn->operation;
1035f64904dbSjdolecek 			if (real_op == BLKIF_OP_INDIRECT) {
1036f64904dbSjdolecek 				rinn = (blkif_request_indirect_t *)reqn;
1037f64904dbSjdolecek 				real_op = rinn->indirect_op;
1038f64904dbSjdolecek 			}
1039c8d41590Sjdolecek 			req->id = reqn->id;
1040d3cd2576Sbouyer 			break;
1041d3cd2576Sbouyer 		case XBDIP_32:
1042d3cd2576Sbouyer 			req32 = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_32,
1043d3cd2576Sbouyer 			    xbdi->xbdi_ring.ring_n.req_cons);
1044f64904dbSjdolecek 			real_op = req->operation = req32->operation;
1045f64904dbSjdolecek 			if (real_op == BLKIF_OP_INDIRECT) {
1046f64904dbSjdolecek 				rin32 = (blkif_x86_32_request_indirect_t*)req32;
1047f64904dbSjdolecek 				real_op = rin32->indirect_op;
1048f64904dbSjdolecek 			}
1049d3cd2576Sbouyer 			req->id = req32->id;
1050d3cd2576Sbouyer 			break;
1051d3cd2576Sbouyer 		case XBDIP_64:
1052d3cd2576Sbouyer 			req64 = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_64,
1053d3cd2576Sbouyer 			    xbdi->xbdi_ring.ring_n.req_cons);
1054f64904dbSjdolecek 			real_op = req->operation = req64->operation;
1055f64904dbSjdolecek 			if (real_op == BLKIF_OP_INDIRECT) {
1056f64904dbSjdolecek 				rin64 = (blkif_x86_64_request_indirect_t*)req64;
1057f64904dbSjdolecek 				real_op = rin64->indirect_op;
1058f64904dbSjdolecek 			}
1059d3cd2576Sbouyer 			req->id = req64->id;
1060d3cd2576Sbouyer 			break;
1061d3cd2576Sbouyer 		}
106213ee92e7Sbouyer 		__insn_barrier();
1063b96fedacSbouyer 		XENPRINTF(("xbdback op %d req_cons 0x%x req_prod 0x%x "
1064d3cd2576Sbouyer 		    "resp_prod 0x%x id %" PRIu64 "\n", req->operation,
1065d3cd2576Sbouyer 			xbdi->xbdi_ring.ring_n.req_cons,
1066b96fedacSbouyer 			xbdi->xbdi_req_prod,
1067d3cd2576Sbouyer 			xbdi->xbdi_ring.ring_n.rsp_prod_pvt,
1068b96fedacSbouyer 			req->id));
1069b96fedacSbouyer 		switch (req->operation) {
1070c8d41590Sjdolecek 		case BLKIF_OP_INDIRECT:
1071c8d41590Sjdolecek 			/* just check indirect_op, rest is handled later */
1072f64904dbSjdolecek 			if (real_op != BLKIF_OP_READ &&
1073f64904dbSjdolecek 			    real_op != BLKIF_OP_WRITE) {
1074c8d41590Sjdolecek 				if (ratecheck(&xbdi->xbdi_lasterr_time,
1075c8d41590Sjdolecek 				    &xbdback_err_intvl)) {
1076c8d41590Sjdolecek 					printf("%s: unknown ind operation %d\n",
1077c8d41590Sjdolecek 					    xbdi->xbdi_name,
1078f64904dbSjdolecek 					    real_op);
1079c8d41590Sjdolecek 				}
1080c8d41590Sjdolecek 				goto fail;
1081c8d41590Sjdolecek 			}
1082c8d41590Sjdolecek 			/* FALLTHROUGH */
1083b96fedacSbouyer 		case BLKIF_OP_READ:
1084b96fedacSbouyer 		case BLKIF_OP_WRITE:
1085b96fedacSbouyer 			xbdi->xbdi_cont = xbdback_co_io;
108680da1c02Sbouyer 			return xbd_io;
1087db95bc7fSbouyer 		case BLKIF_OP_FLUSH_DISKCACHE:
1088db95bc7fSbouyer 			xbdi->xbdi_cont = xbdback_co_cache_flush;
108980da1c02Sbouyer 			return xbd_io;
1090b96fedacSbouyer 		default:
109154f95b14Sjym 			if (ratecheck(&xbdi->xbdi_lasterr_time,
109254f95b14Sjym 			    &xbdback_err_intvl)) {
109354f95b14Sjym 				printf("%s: unknown operation %d\n",
109454f95b14Sjym 				    xbdi->xbdi_name, req->operation);
109554f95b14Sjym 			}
1096c8d41590Sjdolecek fail:
1097f64904dbSjdolecek 			xbdback_send_reply(xbdi, req->id, real_op,
1098b96fedacSbouyer 			    BLKIF_RSP_ERROR);
1099b96fedacSbouyer 			xbdi->xbdi_cont = xbdback_co_main_incr;
110080da1c02Sbouyer 			return xbdi;
1101b96fedacSbouyer 		}
1102b96fedacSbouyer 	} else {
110370fd7422Sjdolecek 		xbdi->xbdi_cont = xbdback_co_main_done2;
1104b96fedacSbouyer 		return xbdi;
1105b96fedacSbouyer 	}
110680da1c02Sbouyer }
1107b96fedacSbouyer 
110877822551Sjym /*
110954f95b14Sjym  * Increment consumer index and move on to the next request. In case
111054f95b14Sjym  * we want to disconnect, leave continuation now.
111177822551Sjym  */
1112b96fedacSbouyer static void *
xbdback_co_main_incr(struct xbdback_instance * xbdi,void * obj __unused)111358477cb8Sjdolecek xbdback_co_main_incr(struct xbdback_instance *xbdi, void *obj __unused)
1114b96fedacSbouyer {
111558477cb8Sjdolecek 	KASSERT(mutex_owned(&xbdi->xbdi_lock));
111658477cb8Sjdolecek 
1117b1c4de01Sjym 	blkif_back_ring_t *ring = &xbdi->xbdi_ring.ring_n;
1118b1c4de01Sjym 
1119b1c4de01Sjym 	ring->req_cons++;
112054f95b14Sjym 
112154f95b14Sjym 	if (xbdi->xbdi_status == DISCONNECTING)
1122b1c4de01Sjym 		xbdi->xbdi_cont = NULL;
1123b1c4de01Sjym 	else
1124b96fedacSbouyer 		xbdi->xbdi_cont = xbdback_co_main_loop;
1125b1c4de01Sjym 
1126b96fedacSbouyer 	return xbdi;
1127b96fedacSbouyer }
1128b96fedacSbouyer 
112977822551Sjym /*
113077822551Sjym  * Check for requests in the instance's ring. In case there are, start again
113177822551Sjym  * from the beginning. If not, stall.
113277822551Sjym  */
1133b96fedacSbouyer static void *
xbdback_co_main_done2(struct xbdback_instance * xbdi,void * obj)1134b96fedacSbouyer xbdback_co_main_done2(struct xbdback_instance *xbdi, void *obj)
1135b96fedacSbouyer {
1136b96fedacSbouyer 	int work_to_do;
1137b96fedacSbouyer 
11387895a041Sriastradh 	xen_wmb();
1139d3cd2576Sbouyer 	RING_FINAL_CHECK_FOR_REQUESTS(&xbdi->xbdi_ring.ring_n, work_to_do);
1140b96fedacSbouyer 	if (work_to_do)
1141b96fedacSbouyer 		xbdi->xbdi_cont = xbdback_co_main;
1142b96fedacSbouyer 	else
1143b96fedacSbouyer 		xbdi->xbdi_cont = NULL;
114454f95b14Sjym 
1145b96fedacSbouyer 	return xbdi;
1146b96fedacSbouyer }
1147b96fedacSbouyer 
114877822551Sjym /*
114977822551Sjym  * Frontend requested a cache flush operation.
115077822551Sjym  */
1151b96fedacSbouyer static void *
xbdback_co_cache_flush(struct xbdback_instance * xbdi,void * obj)115280da1c02Sbouyer xbdback_co_cache_flush(struct xbdback_instance *xbdi, void *obj)
1153db95bc7fSbouyer {
115480da1c02Sbouyer 	struct xbdback_io *xbd_io = obj;
115580da1c02Sbouyer 	KASSERT(xbd_io->xio_xen_req.operation == BLKIF_OP_FLUSH_DISKCACHE);
1156db95bc7fSbouyer 	if (xbdi->xbdi_pendingreqs > 0) {
115748ed379bSbouyer 		/*
115848ed379bSbouyer 		 * There are pending requests.
115948ed379bSbouyer 		 * Event or iodone() will restart processing
116048ed379bSbouyer 		 */
116180da1c02Sbouyer 		xbdi->xbdi_cont_restart = xbdback_co_cache_flush;
116280da1c02Sbouyer 		xbdi->xbdi_cont_restart_obj = xbd_io;
1163db95bc7fSbouyer 		xbdi->xbdi_cont = NULL;
1164db95bc7fSbouyer 		return NULL;
1165db95bc7fSbouyer 	}
116680da1c02Sbouyer 	xbdi_get(xbdi);
116754f95b14Sjym 	xbdi->xbdi_cont = xbdback_co_do_io;
116878af9293Sjdolecek 	return xbd_io;
1169db95bc7fSbouyer }
1170db95bc7fSbouyer 
117177822551Sjym /*
117277822551Sjym  * A read or write I/O request must be processed. Do some checks first,
117377822551Sjym  * then get the segment information directly from the ring request.
117477822551Sjym  */
1175db95bc7fSbouyer static void *
xbdback_co_io(struct xbdback_instance * xbdi,void * obj)117680da1c02Sbouyer xbdback_co_io(struct xbdback_instance *xbdi, void *obj)
1177b96fedacSbouyer {
117899c9ae6dSjym 	int i, error;
1179c8d41590Sjdolecek 	blkif_request_t *req, *reqn;
118099c9ae6dSjym 	blkif_x86_32_request_t *req32;
118199c9ae6dSjym 	blkif_x86_64_request_t *req64;
1182c8d41590Sjdolecek 	blkif_request_indirect_t *rinn;
1183c8d41590Sjdolecek 	blkif_x86_32_request_indirect_t *rin32;
1184c8d41590Sjdolecek 	blkif_x86_64_request_indirect_t *rin64;
118580da1c02Sbouyer 	const char *errstr;
118680da1c02Sbouyer 	struct xbdback_io *xbd_io = obj;
118780da1c02Sbouyer 	grant_ref_t in_gntref = 0;
1188b96fedacSbouyer 
118980da1c02Sbouyer 	req = &xbd_io->xio_xen_req;
119099c9ae6dSjym 
119199c9ae6dSjym 	/* some sanity checks */
119248ed379bSbouyer 	KASSERT(req->operation == BLKIF_OP_READ ||
1193c8d41590Sjdolecek 	    req->operation == BLKIF_OP_WRITE ||
1194c8d41590Sjdolecek 	    req->operation == BLKIF_OP_INDIRECT);
1195b96fedacSbouyer 
119699c9ae6dSjym 	/* copy request segments */
119799c9ae6dSjym 	switch (xbdi->xbdi_proto) {
119899c9ae6dSjym 	case XBDIP_NATIVE:
1199c8d41590Sjdolecek 		reqn = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_n,
1200c8d41590Sjdolecek 		    xbdi->xbdi_ring.ring_n.req_cons);
120180da1c02Sbouyer 		req->handle = reqn->handle;
120280da1c02Sbouyer 		req->sector_number = reqn->sector_number;
1203c8d41590Sjdolecek 		if (reqn->operation == BLKIF_OP_INDIRECT) {
1204c8d41590Sjdolecek 			rinn = (blkif_request_indirect_t *)reqn;
1205c8d41590Sjdolecek 			req->operation = rinn->indirect_op;
12067da93351Sbouyer 			req->nr_segments = (uint8_t)rinn->nr_segments;
120780da1c02Sbouyer 			if (req->nr_segments > VBD_MAX_INDIRECT_SEGMENTS) {
1208*d55161bbSbouyer 				errstr = "too many indirect segments";
120980da1c02Sbouyer 				goto bad_segments;
121080da1c02Sbouyer 			}
121180da1c02Sbouyer 			in_gntref = rinn->indirect_grefs[0];
1212c8d41590Sjdolecek 			/* first_sect and segment grefs fetched later */
1213c8d41590Sjdolecek 		} else {
1214c8d41590Sjdolecek 			req->nr_segments = reqn->nr_segments;
121580da1c02Sbouyer 			if (req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST) {
1216*d55161bbSbouyer 				errstr = "too many segments";
121780da1c02Sbouyer 				goto bad_segments;
121880da1c02Sbouyer 			}
1219c8d41590Sjdolecek 			for (i = 0; i < req->nr_segments; i++)
122080da1c02Sbouyer 				xbd_io->xio_seg[i] = reqn->seg[i];
1221c8d41590Sjdolecek 		}
122299c9ae6dSjym 		break;
122399c9ae6dSjym 	case XBDIP_32:
122499c9ae6dSjym 		req32 = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_32,
122599c9ae6dSjym 		    xbdi->xbdi_ring.ring_n.req_cons);
122680da1c02Sbouyer 		req->handle = req32->handle;
122780da1c02Sbouyer 		req->sector_number = req32->sector_number;
1228c8d41590Sjdolecek 		if (req32->operation == BLKIF_OP_INDIRECT) {
1229c8d41590Sjdolecek 			rin32 = (blkif_x86_32_request_indirect_t *)req32;
1230c8d41590Sjdolecek 			req->operation = rin32->indirect_op;
12317da93351Sbouyer 			req->nr_segments = (uint8_t)rin32->nr_segments;
123280da1c02Sbouyer 			if (req->nr_segments > VBD_MAX_INDIRECT_SEGMENTS) {
1233*d55161bbSbouyer 				errstr = "too many indirect segments";
123480da1c02Sbouyer 				goto bad_segments;
123580da1c02Sbouyer 			}
123680da1c02Sbouyer 			in_gntref = rin32->indirect_grefs[0];
1237c8d41590Sjdolecek 			/* first_sect and segment grefs fetched later */
1238c8d41590Sjdolecek 		} else {
1239c8d41590Sjdolecek 			req->nr_segments = req32->nr_segments;
124080da1c02Sbouyer 			if (req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST) {
1241*d55161bbSbouyer 				errstr = "too many segments";
124280da1c02Sbouyer 				goto bad_segments;
124380da1c02Sbouyer 			}
124499c9ae6dSjym 			for (i = 0; i < req->nr_segments; i++)
124580da1c02Sbouyer 				xbd_io->xio_seg[i] = req32->seg[i];
1246c8d41590Sjdolecek 		}
124799c9ae6dSjym 		break;
124899c9ae6dSjym 	case XBDIP_64:
124999c9ae6dSjym 		req64 = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_64,
125099c9ae6dSjym 		    xbdi->xbdi_ring.ring_n.req_cons);
125180da1c02Sbouyer 		req->handle = req64->handle;
125280da1c02Sbouyer 		req->sector_number = req64->sector_number;
1253c8d41590Sjdolecek 		if (req64->operation == BLKIF_OP_INDIRECT) {
1254c8d41590Sjdolecek 			rin64 = (blkif_x86_64_request_indirect_t *)req64;
12557da93351Sbouyer 			req->nr_segments = (uint8_t)rin64->nr_segments;
125680da1c02Sbouyer 			if (req->nr_segments > VBD_MAX_INDIRECT_SEGMENTS) {
1257*d55161bbSbouyer 				errstr = "too many indirect segments";
125880da1c02Sbouyer 				goto bad_segments;
125980da1c02Sbouyer 			}
126080da1c02Sbouyer 			in_gntref = rin64->indirect_grefs[0];
1261c8d41590Sjdolecek 			/* first_sect and segment grefs fetched later */
1262c8d41590Sjdolecek 		} else {
1263c8d41590Sjdolecek 			req->nr_segments = req64->nr_segments;
126480da1c02Sbouyer 			if (req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST) {
1265*d55161bbSbouyer 				errstr = "too many segments";
126680da1c02Sbouyer 				goto bad_segments;
126780da1c02Sbouyer 			}
126899c9ae6dSjym 			for (i = 0; i < req->nr_segments; i++)
126980da1c02Sbouyer 				xbd_io->xio_seg[i] = req64->seg[i];
1270c8d41590Sjdolecek 		}
127199c9ae6dSjym 		break;
127299c9ae6dSjym 	}
127399c9ae6dSjym 
1274f64904dbSjdolecek 	if (req->operation == BLKIF_OP_WRITE) {
1275f64904dbSjdolecek 		if (xbdi->xbdi_ro) {
1276f64904dbSjdolecek 			error = EROFS;
1277f64904dbSjdolecek 			goto end;
1278f64904dbSjdolecek 		}
1279f64904dbSjdolecek 	}
1280f64904dbSjdolecek 
1281c8d41590Sjdolecek 	/* Max value checked already earlier */
128280da1c02Sbouyer 	if (req->nr_segments < 1) {
128380da1c02Sbouyer 		errstr = "invalid number of segments";
128480da1c02Sbouyer 		goto bad_segments;
128580da1c02Sbouyer 	}
1286c8d41590Sjdolecek 
128780da1c02Sbouyer 	/* If segments are on an indirect page, copy them now */
128880da1c02Sbouyer 	if (in_gntref) {
128980da1c02Sbouyer 		gnttab_copy_t gop;
129080da1c02Sbouyer 		paddr_t ma;
129180da1c02Sbouyer 
129280da1c02Sbouyer 		gop.flags = GNTCOPY_source_gref;
129380da1c02Sbouyer 		gop.len = req->nr_segments
129480da1c02Sbouyer 		    * sizeof(struct blkif_request_segment);
129580da1c02Sbouyer 
129680da1c02Sbouyer 		gop.source.u.ref = in_gntref;
129780da1c02Sbouyer 		gop.source.offset = 0;
129880da1c02Sbouyer 		gop.source.domid = xbdi->xbdi_domid;
129980da1c02Sbouyer 
130080da1c02Sbouyer 		ma = xbd_io->xio_seg_dmamap->dm_segs[0].ds_addr;
130180da1c02Sbouyer 		gop.dest.offset = ma & PAGE_MASK;
130280da1c02Sbouyer 		gop.dest.domid = DOMID_SELF;
130380da1c02Sbouyer 		gop.dest.u.gmfn = ma >> PAGE_SHIFT;
130480da1c02Sbouyer 
130580da1c02Sbouyer 		if (HYPERVISOR_grant_table_op(GNTTABOP_copy, &gop, 1) != 0) {
130680da1c02Sbouyer 			errstr = "GNTTABOP_copy failed";
130780da1c02Sbouyer 			goto bad_segments;
130880da1c02Sbouyer 		}
130980da1c02Sbouyer 	}
131080da1c02Sbouyer 
131180da1c02Sbouyer 	xbdi_get(xbdi);
131270fd7422Sjdolecek 	xbdi->xbdi_cont = xbdback_co_io_gotio;
131380da1c02Sbouyer 	return xbd_io;
131477822551Sjym 
131580da1c02Sbouyer  bad_segments:
1316c8d41590Sjdolecek 	if (ratecheck(&xbdi->xbdi_lasterr_time, &xbdback_err_intvl)) {
131780da1c02Sbouyer 		printf("%s: %s\n", xbdi->xbdi_name, errstr);
1318c8d41590Sjdolecek 	}
1319c8d41590Sjdolecek 	error = EINVAL;
1320c8d41590Sjdolecek 	/* FALLTHROUGH */
1321c8d41590Sjdolecek 
1322b96fedacSbouyer  end:
1323f64904dbSjdolecek 	xbdback_send_reply(xbdi, req->id, req->operation,
132470fd7422Sjdolecek 	    (error == EROFS) ? BLKIF_RSP_EOPNOTSUPP : BLKIF_RSP_ERROR);
1325b96fedacSbouyer 	xbdi->xbdi_cont = xbdback_co_main_incr;
1326b96fedacSbouyer 	return xbdi;
1327b96fedacSbouyer }
1328b96fedacSbouyer 
132977822551Sjym /* Prepare an I/O buffer for a xbdback instance */
1330b96fedacSbouyer static void *
xbdback_co_io_gotio(struct xbdback_instance * xbdi,void * obj)1331b96fedacSbouyer xbdback_co_io_gotio(struct xbdback_instance *xbdi, void *obj)
1332b96fedacSbouyer {
133380da1c02Sbouyer 	struct xbdback_io *xbd_io = obj;
1334b96fedacSbouyer 	int buf_flags;
133570fd7422Sjdolecek 	size_t bcount;
133680da1c02Sbouyer 	blkif_request_t *req = &xbd_io->xio_xen_req;
133780da1c02Sbouyer 	uint8_t last_sect;
133880da1c02Sbouyer 	int error;
1339b96fedacSbouyer 
134058477cb8Sjdolecek 	KASSERT(mutex_owned(&xbdi->xbdi_lock));
134180da1c02Sbouyer 	KASSERT(xbdi->xbdi_refcnt > 0);
1342c8d41590Sjdolecek 
134370fd7422Sjdolecek 	/* Process segments */
134470fd7422Sjdolecek 	bcount = 0;
134570fd7422Sjdolecek 	for (int i = 0; i < req->nr_segments; i++) {
134680da1c02Sbouyer 		struct blkif_request_segment *seg = &xbd_io->xio_seg[i];
134780da1c02Sbouyer 		if (seg->last_sect > VBD_MAXSECT ||
134880da1c02Sbouyer 		    seg->first_sect > VBD_MAXSECT) {
134980da1c02Sbouyer 			if (ratecheck(&xbdi->xbdi_lasterr_time,
135080da1c02Sbouyer 			    &xbdback_err_intvl)) {
135180da1c02Sbouyer 				printf("%s: invalid segment sectors %d %d\n",
135280da1c02Sbouyer 				    xbdi->xbdi_name,
135380da1c02Sbouyer 				    seg->first_sect, seg->last_sect);
135480da1c02Sbouyer 			}
135580da1c02Sbouyer 			xbdi->xbdi_pendingreqs++; /* xbdback_io_error will -- */
135680da1c02Sbouyer 			xbdback_io_error(xbd_io, EINVAL);
135780da1c02Sbouyer 			/* do not retry */
135880da1c02Sbouyer 			xbdi->xbdi_cont = xbdback_co_main_incr;
135980da1c02Sbouyer 			return xbdi;
136080da1c02Sbouyer 		}
136180da1c02Sbouyer 
136280da1c02Sbouyer 		if (i > 0) {
136380da1c02Sbouyer 			if (last_sect != VBD_MAXSECT ||
136480da1c02Sbouyer 			    seg->first_sect != 0) {
136580da1c02Sbouyer 				xbd_io->xio_need_bounce = 1;
136680da1c02Sbouyer 			}
136780da1c02Sbouyer 		}
136880da1c02Sbouyer 		last_sect = seg->last_sect;
1369c8d41590Sjdolecek 		xbd_io->xio_gref[i] = seg->gref;
1370c8d41590Sjdolecek 		bcount += (seg->last_sect - seg->first_sect + 1)
137170fd7422Sjdolecek 			* VBD_BSIZE;
137270fd7422Sjdolecek 	}
137380da1c02Sbouyer 	xbd_io->xio_start_offset = xbd_io->xio_seg[0].first_sect * VBD_BSIZE;
137470fd7422Sjdolecek 
1375c8d41590Sjdolecek 	KASSERT(bcount <= MAXPHYS);
137670fd7422Sjdolecek 	KASSERT(xbd_io->xio_start_offset < PAGE_SIZE);
137774676f87Sjdolecek 	KASSERT(bcount + xbd_io->xio_start_offset <= VBD_VA_SIZE);
1378b96fedacSbouyer 
1379c8d41590Sjdolecek 	/* Fill-in the buf */
138080da1c02Sbouyer 	if (req->operation == BLKIF_OP_WRITE) {
13814a780c9aSad 		buf_flags = B_WRITE;
1382b96fedacSbouyer 	} else {
13834a780c9aSad 		buf_flags = B_READ;
1384b96fedacSbouyer 	}
1385b96fedacSbouyer 
1386b96fedacSbouyer 	xbd_io->xio_buf.b_flags = buf_flags;
13874a780c9aSad 	xbd_io->xio_buf.b_cflags = 0;
13884a780c9aSad 	xbd_io->xio_buf.b_oflags = 0;
1389b96fedacSbouyer 	xbd_io->xio_buf.b_iodone = xbdback_iodone;
1390b96fedacSbouyer 	xbd_io->xio_buf.b_proc = NULL;
1391b96fedacSbouyer 	xbd_io->xio_buf.b_vp = xbdi->xbdi_vp;
1392e225b7bdSrmind 	xbd_io->xio_buf.b_objlock = xbdi->xbdi_vp->v_interlock;
1393b96fedacSbouyer 	xbd_io->xio_buf.b_dev = xbdi->xbdi_dev;
139470fd7422Sjdolecek 	xbd_io->xio_buf.b_blkno = req->sector_number;
139570fd7422Sjdolecek 	xbd_io->xio_buf.b_bcount = bcount;
139680da1c02Sbouyer 	if (__predict_false(xbd_io->xio_need_bounce)) {
139780da1c02Sbouyer 		if (__predict_false(xbdi->xbdi_bouncebuf_use)) {
139880da1c02Sbouyer 			KASSERT(xbdi->xbdi_pendingreqs > 1);
139980da1c02Sbouyer 			/* retry later */
140080da1c02Sbouyer 			xbdi->xbdi_cont_restart = xbdback_co_io_gotio;
140180da1c02Sbouyer 			xbdi->xbdi_cont_restart_obj = xbd_io;
140280da1c02Sbouyer 			xbdi->xbdi_cont = NULL;
140380da1c02Sbouyer 			return NULL;
140480da1c02Sbouyer 		}
140580da1c02Sbouyer 		xbdi->xbdi_bouncebuf_use++;
140680da1c02Sbouyer 		KASSERT(xbdi->xbdi_bouncebuf_use == 1);
140780da1c02Sbouyer 		xbd_io->xio_buf.b_data = (void *)xbdi->xbdi_bouncebuf;
140880da1c02Sbouyer 	}
140980da1c02Sbouyer 	xbdi->xbdi_pendingreqs++;
141080da1c02Sbouyer 	if ((error = xbdback_map_shm(xbd_io)) != 0) {
141180da1c02Sbouyer 		xbdback_io_error(xbd_io, error);
141280da1c02Sbouyer 		/* do not retry */
141380da1c02Sbouyer 		xbdi->xbdi_cont = xbdback_co_main_incr;
141480da1c02Sbouyer 		return xbdi;
141580da1c02Sbouyer 	}
141680da1c02Sbouyer 	if (__predict_true(xbd_io->xio_need_bounce == 0)) {
141780da1c02Sbouyer 		xbd_io->xio_buf.b_data = (void *)
141880da1c02Sbouyer 		    (xbd_io->xio_vaddr + xbd_io->xio_start_offset);
141980da1c02Sbouyer 	}
142080da1c02Sbouyer 
142180da1c02Sbouyer 
1422b96fedacSbouyer 	xbd_io->xio_buf.b_private = xbd_io;
1423b96fedacSbouyer 
142454f95b14Sjym 	xbdi->xbdi_cont = xbdback_co_do_io;
142580da1c02Sbouyer 	return xbd_io;
1426b96fedacSbouyer }
1427b96fedacSbouyer 
1428b96fedacSbouyer static void
xbdback_io_error(struct xbdback_io * xbd_io,int error)1429b96fedacSbouyer xbdback_io_error(struct xbdback_io *xbd_io, int error)
1430b96fedacSbouyer {
143158477cb8Sjdolecek 	KASSERT(mutex_owned(&xbd_io->xio_xbdi->xbdi_lock));
143258477cb8Sjdolecek 
143358477cb8Sjdolecek 	struct buf *bp = &xbd_io->xio_buf;
143458477cb8Sjdolecek 
143558477cb8Sjdolecek 	bp->b_error = error;
143658477cb8Sjdolecek 	xbdback_iodone_locked(xbd_io->xio_xbdi, xbd_io, bp);
1437b96fedacSbouyer }
1438b96fedacSbouyer 
143977822551Sjym /*
144054f95b14Sjym  * Main xbdback I/O routine. It can either perform a flush operation or
144154f95b14Sjym  * schedule a read/write operation.
144277822551Sjym  */
144354f95b14Sjym static void *
xbdback_co_do_io(struct xbdback_instance * xbdi,void * obj)144454f95b14Sjym xbdback_co_do_io(struct xbdback_instance *xbdi, void *obj)
1445b96fedacSbouyer {
144678af9293Sjdolecek 	struct xbdback_io *xbd_io = obj;
144780da1c02Sbouyer 	blkif_request_t *req = &xbd_io->xio_xen_req;
14483d5ae4c8Sbouyer 
144980da1c02Sbouyer 	KASSERT(xbdi->xbdi_refcnt > 0);
145080da1c02Sbouyer 
145180da1c02Sbouyer 	switch (req->operation) {
145277822551Sjym 	case BLKIF_OP_FLUSH_DISKCACHE:
145377822551Sjym 	{
1454db95bc7fSbouyer 		int error;
1455e4821a51Sbouyer 		int force = 1;
1456db95bc7fSbouyer 
145758477cb8Sjdolecek 		KASSERT(mutex_owned(&xbdi->xbdi_lock));
145858477cb8Sjdolecek 		mutex_exit(&xbdi->xbdi_lock);
1459e4821a51Sbouyer 		error = VOP_IOCTL(xbdi->xbdi_vp, DIOCCACHESYNC, &force, FWRITE,
1460db95bc7fSbouyer 		    kauth_cred_get());
146158477cb8Sjdolecek 		mutex_enter(&xbdi->xbdi_lock);
1462db95bc7fSbouyer 		if (error) {
1463db95bc7fSbouyer 			aprint_error("xbdback %s: DIOCCACHESYNC returned %d\n",
1464db95bc7fSbouyer 			    xbdi->xbdi_xbusd->xbusd_path, error);
1465db95bc7fSbouyer 			 if (error == EOPNOTSUPP || error == ENOTTY)
1466db95bc7fSbouyer 				error = BLKIF_RSP_EOPNOTSUPP;
1467db95bc7fSbouyer 			 else
1468db95bc7fSbouyer 				error = BLKIF_RSP_ERROR;
1469db95bc7fSbouyer 		} else
1470db95bc7fSbouyer 			error = BLKIF_RSP_OKAY;
147180da1c02Sbouyer 		xbdback_send_reply(xbdi, req->id, req->operation, error);
14722d80bedaSjdolecek 		xbdback_io_put(xbdi, xbd_io);
1473db95bc7fSbouyer 		xbdi_put(xbdi);
1474db95bc7fSbouyer 		xbdi->xbdi_cont = xbdback_co_main_incr;
147554f95b14Sjym 		return xbdi;
1476db95bc7fSbouyer 	}
147777822551Sjym 	case BLKIF_OP_READ:
147877822551Sjym 	case BLKIF_OP_WRITE:
147980da1c02Sbouyer 		if (__predict_false(xbd_io->xio_need_bounce) &&
148080da1c02Sbouyer 		    req->operation == BLKIF_OP_WRITE) {
148180da1c02Sbouyer 			vaddr_t boffset = 0;
148280da1c02Sbouyer 			for (int i = 0; i < req->nr_segments; i++) {
148380da1c02Sbouyer 				struct blkif_request_segment *seg =
148480da1c02Sbouyer 				    &xbd_io->xio_seg[i];
148580da1c02Sbouyer 				vaddr_t segoffset = seg->first_sect * VBD_BSIZE;
148680da1c02Sbouyer 				size_t segbcount =
148780da1c02Sbouyer 				   (seg->last_sect - seg->first_sect + 1) *
148880da1c02Sbouyer 				    VBD_BSIZE;
148980da1c02Sbouyer 				KASSERT(segoffset + segbcount <= PAGE_SIZE);
149080da1c02Sbouyer 				KASSERT(boffset + segbcount < MAXPHYS);
149180da1c02Sbouyer 				segoffset += PAGE_SIZE * i;
149280da1c02Sbouyer 				memcpy(
149380da1c02Sbouyer 				    (void *)(xbdi->xbdi_bouncebuf + boffset),
149480da1c02Sbouyer 				    (void *)(xbd_io->xio_vaddr + segoffset),
149580da1c02Sbouyer 				    segbcount);
149680da1c02Sbouyer 				boffset += segbcount;
149780da1c02Sbouyer 			}
149880da1c02Sbouyer 		}
14997cfb6126Sbouyer 		KASSERT(mutex_owned(&xbdi->xbdi_lock));
15007cfb6126Sbouyer 		mutex_exit(&xbdi->xbdi_lock);
15013d5ae4c8Sbouyer 		if ((xbd_io->xio_buf.b_flags & B_READ) == 0) {
1502e225b7bdSrmind 			mutex_enter(xbd_io->xio_buf.b_vp->v_interlock);
1503b96fedacSbouyer 			xbd_io->xio_buf.b_vp->v_numoutput++;
1504e225b7bdSrmind 			mutex_exit(xbd_io->xio_buf.b_vp->v_interlock);
15053d5ae4c8Sbouyer 		}
150654f95b14Sjym 		/* will call xbdback_iodone() asynchronously when done */
150770fd7422Sjdolecek 		bdev_strategy(&xbd_io->xio_buf);
15087cfb6126Sbouyer 		mutex_enter(&xbdi->xbdi_lock);
150970fd7422Sjdolecek 		xbdi->xbdi_cont = xbdback_co_main_incr;
151054f95b14Sjym 		return xbdi;
151177822551Sjym 	default:
151277822551Sjym 		/* Should never happen */
151354f95b14Sjym 		panic("xbdback_co_do_io: unsupported operation %d",
151480da1c02Sbouyer 		    req->operation);
151577822551Sjym 	}
1516b96fedacSbouyer }
1517b96fedacSbouyer 
151854f95b14Sjym /*
151954f95b14Sjym  * Called from softint(9) context when an I/O is done: for each request, send
152054f95b14Sjym  * back the associated reply to the domain.
152154f95b14Sjym  */
1522b96fedacSbouyer static void
xbdback_iodone(struct buf * bp)1523b96fedacSbouyer xbdback_iodone(struct buf *bp)
1524b96fedacSbouyer {
1525b96fedacSbouyer 	struct xbdback_io *xbd_io;
1526b96fedacSbouyer 	struct xbdback_instance *xbdi;
1527bdf8ebffSad 
1528b96fedacSbouyer 	xbd_io = bp->b_private;
152958477cb8Sjdolecek 	KASSERT(bp == &xbd_io->xio_buf);
1530b96fedacSbouyer 	xbdi = xbd_io->xio_xbdi;
1531b96fedacSbouyer 
153258477cb8Sjdolecek 	mutex_enter(&xbdi->xbdi_lock);
153358477cb8Sjdolecek 	xbdback_iodone_locked(xbdi, xbd_io, bp);
153458477cb8Sjdolecek 	mutex_exit(&xbdi->xbdi_lock);
153558477cb8Sjdolecek }
153658477cb8Sjdolecek 
153758477cb8Sjdolecek /*
153858477cb8Sjdolecek  * This gets reused by xbdback_io_error to report errors from other sources.
153958477cb8Sjdolecek  */
154058477cb8Sjdolecek static void
xbdback_iodone_locked(struct xbdback_instance * xbdi,struct xbdback_io * xbd_io,struct buf * bp)154158477cb8Sjdolecek xbdback_iodone_locked(struct xbdback_instance *xbdi, struct xbdback_io *xbd_io,
154258477cb8Sjdolecek     struct buf *bp)
154358477cb8Sjdolecek {
154458477cb8Sjdolecek 	int status;
154580da1c02Sbouyer 	blkif_request_t *req = &xbd_io->xio_xen_req;
154658477cb8Sjdolecek 
1547b96fedacSbouyer 	XENPRINTF(("xbdback_io domain %d: iodone ptr 0x%lx\n",
1548b96fedacSbouyer 		   xbdi->xbdi_domid, (long)xbd_io));
1549b96fedacSbouyer 
155058477cb8Sjdolecek 	KASSERT(mutex_owned(&xbdi->xbdi_lock));
155158477cb8Sjdolecek 
155270fd7422Sjdolecek 	KASSERT(bp->b_error != 0 || xbd_io->xio_xv != NULL);
155380da1c02Sbouyer 	if (__predict_false(xbd_io->xio_need_bounce)) {
155480da1c02Sbouyer 		KASSERT(xbd_io->xio_buf.b_data == (void *)xbdi->xbdi_bouncebuf);
155580da1c02Sbouyer 
155680da1c02Sbouyer 		KASSERT(req->operation == BLKIF_OP_WRITE ||
155780da1c02Sbouyer 		    req->operation == BLKIF_OP_READ);
155880da1c02Sbouyer 
155980da1c02Sbouyer 		if (req->operation == BLKIF_OP_READ && bp->b_error == 0) {
156080da1c02Sbouyer 			vaddr_t boffset = 0;
156180da1c02Sbouyer 			for (int i = 0; i < req->nr_segments; i++) {
156280da1c02Sbouyer 				struct blkif_request_segment *seg =
156380da1c02Sbouyer 				    &xbd_io->xio_seg[i];
156480da1c02Sbouyer 				vaddr_t segoffset = seg->first_sect * VBD_BSIZE;
156580da1c02Sbouyer 				size_t segbcount =
156680da1c02Sbouyer 				   (seg->last_sect - seg->first_sect + 1) *
156780da1c02Sbouyer 				    VBD_BSIZE;
156880da1c02Sbouyer 				KASSERT(segoffset + segbcount <= PAGE_SIZE);
156980da1c02Sbouyer 				KASSERT(boffset + segbcount < MAXPHYS);
157080da1c02Sbouyer 				segoffset += PAGE_SIZE * i;
157180da1c02Sbouyer 				memcpy(
157280da1c02Sbouyer 				    (void *)(xbd_io->xio_vaddr + segoffset),
157380da1c02Sbouyer 				    (void *)(xbdi->xbdi_bouncebuf + boffset),
157480da1c02Sbouyer 				    segbcount);
157580da1c02Sbouyer 				boffset += segbcount;
157680da1c02Sbouyer 			}
157780da1c02Sbouyer 		}
157880da1c02Sbouyer 		KASSERT(xbdi->xbdi_bouncebuf_use == 1);
157980da1c02Sbouyer 		xbdi->xbdi_bouncebuf_use--;
158080da1c02Sbouyer 	}
158170fd7422Sjdolecek 	if (xbd_io->xio_xv != NULL)
1582b96fedacSbouyer 		xbdback_unmap_shm(xbd_io);
1583b96fedacSbouyer 
158466fefd11Sad 	if (bp->b_error != 0) {
1585b96fedacSbouyer 		printf("xbd IO domain %d: error %d\n",
1586b96fedacSbouyer 		       xbdi->xbdi_domid, bp->b_error);
158770fd7422Sjdolecek 		status = BLKIF_RSP_ERROR;
1588b96fedacSbouyer 	} else
158970fd7422Sjdolecek 		status = BLKIF_RSP_OKAY;
1590b96fedacSbouyer 
159180da1c02Sbouyer 	xbdback_send_reply(xbdi, req->id, req->operation, status);
1592b96fedacSbouyer 
1593b96fedacSbouyer 	xbdi_put(xbdi);
159458477cb8Sjdolecek 	KASSERT(xbdi->xbdi_pendingreqs > 0);
159558477cb8Sjdolecek 	xbdi->xbdi_pendingreqs--;
15964a780c9aSad 	buf_destroy(&xbd_io->xio_buf);
15972d80bedaSjdolecek 	xbdback_io_put(xbdi, xbd_io);
159854f95b14Sjym 
159954f95b14Sjym 	xbdback_wakeup_thread(xbdi);
1600db95bc7fSbouyer }
160154f95b14Sjym 
160254f95b14Sjym /*
160354f95b14Sjym  * Wake up the per xbdback instance thread.
160454f95b14Sjym  */
160554f95b14Sjym static void
xbdback_wakeup_thread(struct xbdback_instance * xbdi)160654f95b14Sjym xbdback_wakeup_thread(struct xbdback_instance *xbdi)
160754f95b14Sjym {
160858477cb8Sjdolecek 	KASSERT(mutex_owned(&xbdi->xbdi_lock));
160954f95b14Sjym 
161054f95b14Sjym 	/* only set RUN state when we are WAITING for work */
161154f95b14Sjym 	if (xbdi->xbdi_status == WAITING)
161254f95b14Sjym 	       xbdi->xbdi_status = RUN;
161358477cb8Sjdolecek 	cv_signal(&xbdi->xbdi_cv);
1614b96fedacSbouyer }
1615b96fedacSbouyer 
1616b96fedacSbouyer /*
1617b96fedacSbouyer  * called once a request has completed. Place the reply in the ring and
161854f95b14Sjym  * notify the guest OS.
1619b96fedacSbouyer  */
1620b96fedacSbouyer static void
xbdback_send_reply(struct xbdback_instance * xbdi,uint64_t id,int op,int status)1621b96fedacSbouyer xbdback_send_reply(struct xbdback_instance *xbdi, uint64_t id,
1622b96fedacSbouyer     int op, int status)
1623b96fedacSbouyer {
1624d3cd2576Sbouyer 	blkif_response_t *resp_n;
1625d3cd2576Sbouyer 	blkif_x86_32_response_t *resp32;
1626d3cd2576Sbouyer 	blkif_x86_64_response_t *resp64;
1627b96fedacSbouyer 	int notify;
1628b96fedacSbouyer 
162958477cb8Sjdolecek 	KASSERT(mutex_owned(&xbdi->xbdi_lock));
163058477cb8Sjdolecek 
163154f95b14Sjym 	/*
163254f95b14Sjym 	 * The ring can be accessed by the xbdback thread, xbdback_iodone()
163354f95b14Sjym 	 * handler, or any handler that triggered the shm callback. So
163454f95b14Sjym 	 * protect ring access via the xbdi_lock mutex.
163554f95b14Sjym 	 */
1636d3cd2576Sbouyer 	switch (xbdi->xbdi_proto) {
1637d3cd2576Sbouyer 	case XBDIP_NATIVE:
1638d3cd2576Sbouyer 		resp_n = RING_GET_RESPONSE(&xbdi->xbdi_ring.ring_n,
1639d3cd2576Sbouyer 		    xbdi->xbdi_ring.ring_n.rsp_prod_pvt);
1640d3cd2576Sbouyer 		resp_n->id        = id;
1641d3cd2576Sbouyer 		resp_n->operation = op;
1642d3cd2576Sbouyer 		resp_n->status    = status;
1643e6190ce3Sbouyer 		break;
1644d3cd2576Sbouyer 	case XBDIP_32:
1645d3cd2576Sbouyer 		resp32 = RING_GET_RESPONSE(&xbdi->xbdi_ring.ring_32,
1646d3cd2576Sbouyer 		    xbdi->xbdi_ring.ring_n.rsp_prod_pvt);
1647d3cd2576Sbouyer 		resp32->id        = id;
1648d3cd2576Sbouyer 		resp32->operation = op;
1649d3cd2576Sbouyer 		resp32->status    = status;
1650e6190ce3Sbouyer 		break;
1651d3cd2576Sbouyer 	case XBDIP_64:
1652d3cd2576Sbouyer 		resp64 = RING_GET_RESPONSE(&xbdi->xbdi_ring.ring_64,
1653d3cd2576Sbouyer 		    xbdi->xbdi_ring.ring_n.rsp_prod_pvt);
1654d3cd2576Sbouyer 		resp64->id        = id;
1655d3cd2576Sbouyer 		resp64->operation = op;
1656d3cd2576Sbouyer 		resp64->status    = status;
1657e6190ce3Sbouyer 		break;
1658d3cd2576Sbouyer 	}
1659d3cd2576Sbouyer 	xbdi->xbdi_ring.ring_n.rsp_prod_pvt++;
1660d3cd2576Sbouyer 	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xbdi->xbdi_ring.ring_n, notify);
166154f95b14Sjym 
1662b96fedacSbouyer 	if (notify) {
1663b96fedacSbouyer 		XENPRINTF(("xbdback_send_reply notify %d\n", xbdi->xbdi_domid));
1664b96fedacSbouyer 		hypervisor_notify_via_evtchn(xbdi->xbdi_evtchn);
1665b96fedacSbouyer 	}
1666b96fedacSbouyer }
1667b96fedacSbouyer 
1668b96fedacSbouyer /*
166977822551Sjym  * Map multiple entries of an I/O request into backend's VA space.
167077822551Sjym  * The xbd_io->xio_gref array has to be filled out by the caller.
1671b96fedacSbouyer  */
167280da1c02Sbouyer static int
xbdback_map_shm(struct xbdback_io * xbd_io)1673b96fedacSbouyer xbdback_map_shm(struct xbdback_io *xbd_io)
1674b96fedacSbouyer {
16758d1b8859Sjdolecek 	struct xbdback_instance *xbdi = xbd_io->xio_xbdi;
167680da1c02Sbouyer 	blkif_request_t *req = &xbd_io->xio_xen_req;
167758477cb8Sjdolecek 	int error;
1678b96fedacSbouyer 
1679b96fedacSbouyer #ifdef XENDEBUG_VBD
1680b96fedacSbouyer 	int i;
1681b96fedacSbouyer 	printf("xbdback_map_shm map grant ");
168280da1c02Sbouyer 	for (i = 0; i < req->nr_segments; i++) {
1683b96fedacSbouyer 		printf("%u ", (u_int)xbd_io->xio_gref[i]);
1684b96fedacSbouyer 	}
1685b96fedacSbouyer #endif
1686b96fedacSbouyer 
168758477cb8Sjdolecek 	KASSERT(mutex_owned(&xbdi->xbdi_lock));
168880da1c02Sbouyer 	KASSERT(xbd_io->xio_xv == NULL);
168958477cb8Sjdolecek 
16908d1b8859Sjdolecek 	xbd_io->xio_xv = SLIST_FIRST(&xbdi->xbdi_va_free);
16918d1b8859Sjdolecek 	KASSERT(xbd_io->xio_xv != NULL);
16928d1b8859Sjdolecek 	SLIST_REMOVE_HEAD(&xbdi->xbdi_va_free, xv_next);
16938d1b8859Sjdolecek 	xbd_io->xio_vaddr = xbd_io->xio_xv->xv_vaddr;
16948d1b8859Sjdolecek 
169580da1c02Sbouyer 	error = xen_shm_map(req->nr_segments, xbdi->xbdi_domid,
16968d1b8859Sjdolecek 	    xbd_io->xio_gref, xbd_io->xio_vaddr, xbd_io->xio_gh,
169780da1c02Sbouyer 	    (req->operation == BLKIF_OP_WRITE) ? XSHM_RO : 0);
1698b96fedacSbouyer 
1699b96fedacSbouyer 	switch(error) {
1700b96fedacSbouyer 	case 0:
1701b96fedacSbouyer #ifdef XENDEBUG_VBD
1702b96fedacSbouyer 		printf("handle");
170380da1c02Sbouyer 		for (i = 0; i < req->nr_segments; i++) {
1704b96fedacSbouyer 			printf(" %u ", (u_int)xbd_io->xio_gh[i]);
1705b96fedacSbouyer 		}
1706b96fedacSbouyer 		printf("\n");
1707b96fedacSbouyer #endif
170880da1c02Sbouyer 		return 0;
1709b96fedacSbouyer 	default:
1710c144ae48Sjdolecek 		/* reset xio_xv so error handling won't try to unmap it */
17118d1b8859Sjdolecek 		SLIST_INSERT_HEAD(&xbdi->xbdi_va_free, xbd_io->xio_xv, xv_next);
17128d1b8859Sjdolecek 		xbd_io->xio_xv = NULL;
171380da1c02Sbouyer 		return error;
1714b96fedacSbouyer 	}
1715b96fedacSbouyer }
1716b96fedacSbouyer 
1717b96fedacSbouyer /* unmap a request from our virtual address space (request is done) */
1718b96fedacSbouyer static void
xbdback_unmap_shm(struct xbdback_io * xbd_io)1719b96fedacSbouyer xbdback_unmap_shm(struct xbdback_io *xbd_io)
1720b96fedacSbouyer {
17218d1b8859Sjdolecek 	struct xbdback_instance *xbdi = xbd_io->xio_xbdi;
172280da1c02Sbouyer 	blkif_request_t *req = &xbd_io->xio_xen_req;
17238d1b8859Sjdolecek 
1724b96fedacSbouyer #ifdef XENDEBUG_VBD
1725b96fedacSbouyer 	int i;
1726b96fedacSbouyer 	printf("xbdback_unmap_shm handle ");
172780da1c02Sbouyer 	for (i = 0; i < req->nr_segments; i++) {
1728b96fedacSbouyer 		printf("%u ", (u_int)xbd_io->xio_gh[i]);
1729b96fedacSbouyer 	}
1730b96fedacSbouyer 	printf("\n");
1731b96fedacSbouyer #endif
1732b96fedacSbouyer 
173370fd7422Sjdolecek 	KASSERT(xbd_io->xio_xv != NULL);
173480da1c02Sbouyer 	xen_shm_unmap(xbd_io->xio_vaddr, req->nr_segments,
1735b96fedacSbouyer 	    xbd_io->xio_gh);
17368d1b8859Sjdolecek 	SLIST_INSERT_HEAD(&xbdi->xbdi_va_free, xbd_io->xio_xv, xv_next);
17378d1b8859Sjdolecek 	xbd_io->xio_xv = NULL;
1738b96fedacSbouyer 	xbd_io->xio_vaddr = -1;
1739b96fedacSbouyer }
1740b96fedacSbouyer 
174154f95b14Sjym /* Obtain memory from a pool */
17422d80bedaSjdolecek static struct xbdback_io *
xbdback_io_get(struct xbdback_instance * xbdi)17432d80bedaSjdolecek xbdback_io_get(struct xbdback_instance *xbdi)
1744b96fedacSbouyer {
17452d80bedaSjdolecek 	struct xbdback_io *xbd_io = SLIST_FIRST(&xbdi->xbdi_io_free);
17462d80bedaSjdolecek 	SLIST_REMOVE_HEAD(&xbdi->xbdi_io_free, xio_next);
17472d80bedaSjdolecek 	return xbd_io;
1748b96fedacSbouyer }
1749b96fedacSbouyer 
175054f95b14Sjym /* Restore memory to a pool */
175154f95b14Sjym static void
xbdback_io_put(struct xbdback_instance * xbdi,struct xbdback_io * xbd_io)17522d80bedaSjdolecek xbdback_io_put(struct xbdback_instance *xbdi, struct xbdback_io *xbd_io)
1753b96fedacSbouyer {
175480da1c02Sbouyer 	KASSERT(xbd_io->xio_xv == NULL);
17552d80bedaSjdolecek 	KASSERT(xbd_io != NULL);
17562d80bedaSjdolecek 	SLIST_INSERT_HEAD(&xbdi->xbdi_io_free, xbd_io, xio_next);
1757b96fedacSbouyer }
1758b96fedacSbouyer 
175977822551Sjym /*
176077822551Sjym  * Trampoline routine. Calls continuations in a loop and only exits when
176177822551Sjym  * either the returned object or the next callback is NULL.
176277822551Sjym  */
1763b96fedacSbouyer static void
xbdback_trampoline(struct xbdback_instance * xbdi,void * obj)1764b96fedacSbouyer xbdback_trampoline(struct xbdback_instance *xbdi, void *obj)
1765b96fedacSbouyer {
1766b96fedacSbouyer 	xbdback_cont_t cont;
1767b96fedacSbouyer 
1768b96fedacSbouyer 	while(obj != NULL && xbdi->xbdi_cont != NULL) {
176980da1c02Sbouyer 		KASSERT(xbdi->xbdi_cont_restart == NULL);
177080da1c02Sbouyer 		KASSERT(xbdi->xbdi_cont_restart_obj == NULL);
1771b96fedacSbouyer 		cont = xbdi->xbdi_cont;
1772b96fedacSbouyer #ifdef DIAGNOSTIC
1773b96fedacSbouyer 		xbdi->xbdi_cont = (xbdback_cont_t)0xDEADBEEF;
1774b96fedacSbouyer #endif
1775b96fedacSbouyer 		obj = (*cont)(xbdi, obj);
1776b96fedacSbouyer #ifdef DIAGNOSTIC
1777b96fedacSbouyer 		if (xbdi->xbdi_cont == (xbdback_cont_t)0xDEADBEEF) {
1778b96fedacSbouyer 			printf("xbdback_trampoline: 0x%lx didn't set "
177977822551Sjym 			       "xbdi->xbdi_cont!\n", (long)cont);
1780b96fedacSbouyer 			panic("xbdback_trampoline: bad continuation");
1781b96fedacSbouyer 		}
178280da1c02Sbouyer 		if (xbdi->xbdi_cont_restart != NULL ||
178380da1c02Sbouyer 		    xbdi->xbdi_cont_restart_obj != NULL) {
178480da1c02Sbouyer 			KASSERT(xbdi->xbdi_cont_restart != NULL);
178580da1c02Sbouyer 			KASSERT(xbdi->xbdi_cont_restart_obj != NULL);
178680da1c02Sbouyer 			KASSERT(xbdi->xbdi_cont == NULL);
178780da1c02Sbouyer 			KASSERT(obj == NULL);
178880da1c02Sbouyer 		}
1789b96fedacSbouyer #endif
1790b96fedacSbouyer 	}
1791b96fedacSbouyer }
1792