1*d55161bbSbouyer /* $NetBSD: xbdback_xenbus.c,v 1.107 2024/06/20 15:17:27 bouyer Exp $ */
2b96fedacSbouyer
3b96fedacSbouyer /*
4*d55161bbSbouyer * Copyright (c) 2006,2024 Manuel Bouyer.
5b96fedacSbouyer *
6b96fedacSbouyer * Redistribution and use in source and binary forms, with or without
7b96fedacSbouyer * modification, are permitted provided that the following conditions
8b96fedacSbouyer * are met:
9b96fedacSbouyer * 1. Redistributions of source code must retain the above copyright
10b96fedacSbouyer * notice, this list of conditions and the following disclaimer.
11b96fedacSbouyer * 2. Redistributions in binary form must reproduce the above copyright
12b96fedacSbouyer * notice, this list of conditions and the following disclaimer in the
13b96fedacSbouyer * documentation and/or other materials provided with the distribution.
14b96fedacSbouyer *
15b96fedacSbouyer * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16b96fedacSbouyer * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17b96fedacSbouyer * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18b96fedacSbouyer * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19b96fedacSbouyer * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20b96fedacSbouyer * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21b96fedacSbouyer * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22b96fedacSbouyer * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23b96fedacSbouyer * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24b96fedacSbouyer * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25b96fedacSbouyer *
26b96fedacSbouyer */
27b96fedacSbouyer
28a9cd1764Sbouyer #include <sys/cdefs.h>
29*d55161bbSbouyer __KERNEL_RCSID(0, "$NetBSD: xbdback_xenbus.c,v 1.107 2024/06/20 15:17:27 bouyer Exp $");
30a9cd1764Sbouyer
3154f95b14Sjym #include <sys/buf.h>
3254f95b14Sjym #include <sys/condvar.h>
33b96fedacSbouyer #include <sys/conf.h>
34b96fedacSbouyer #include <sys/disk.h>
3596b6da24Schristos #include <sys/device.h>
36b96fedacSbouyer #include <sys/fcntl.h>
37b96fedacSbouyer #include <sys/kauth.h>
3854f95b14Sjym #include <sys/kernel.h>
3954f95b14Sjym #include <sys/kmem.h>
4054f95b14Sjym #include <sys/kthread.h>
4154f95b14Sjym #include <sys/mutex.h>
4254f95b14Sjym #include <sys/param.h>
4354f95b14Sjym #include <sys/queue.h>
4454f95b14Sjym #include <sys/systm.h>
4554f95b14Sjym #include <sys/time.h>
4654f95b14Sjym #include <sys/types.h>
4754f95b14Sjym #include <sys/vnode.h>
48b96fedacSbouyer
495dac1986Sbouyer #include <xen/intr.h>
504dbd32ceSbouyer #include <xen/hypervisor.h>
514e541343Sbouyer #include <xen/xen.h>
524e541343Sbouyer #include <xen/xen_shm.h>
534e541343Sbouyer #include <xen/evtchn.h>
544e541343Sbouyer #include <xen/xenbus.h>
55ac8432e2Scherry #include <xen/xenring.h>
56ac8432e2Scherry #include <xen/include/public/io/protocols.h>
57b96fedacSbouyer
58b96fedacSbouyer /* #define XENDEBUG_VBD */
59b96fedacSbouyer #ifdef XENDEBUG_VBD
60b96fedacSbouyer #define XENPRINTF(x) printf x
61b96fedacSbouyer #else
62b96fedacSbouyer #define XENPRINTF(x)
63b96fedacSbouyer #endif
64b96fedacSbouyer
658d1b8859Sjdolecek #define BLKIF_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE)
66b96fedacSbouyer
67b96fedacSbouyer /*
68b96fedacSbouyer * Backend block device driver for Xen
69b96fedacSbouyer */
70b96fedacSbouyer
71b96fedacSbouyer /* Values are expressed in 512-byte sectors */
72b96fedacSbouyer #define VBD_BSIZE 512
73b96fedacSbouyer #define VBD_MAXSECT ((PAGE_SIZE / VBD_BSIZE) - 1)
74b96fedacSbouyer
75848819d9Sjdolecek #define VBD_VA_SIZE MAXPHYS
7680da1c02Sbouyer #define VBD_MAX_INDIRECT_SEGMENTS (VBD_VA_SIZE >> PAGE_SHIFT)
77c8d41590Sjdolecek
78c8d41590Sjdolecek CTASSERT(XENSHM_MAX_PAGES_PER_REQUEST >= VBD_MAX_INDIRECT_SEGMENTS);
798d1b8859Sjdolecek
80b96fedacSbouyer struct xbdback_instance;
81b96fedacSbouyer
8254f95b14Sjym /*
8354f95b14Sjym * status of a xbdback instance:
8454f95b14Sjym * WAITING: xbdback instance is connected, waiting for requests
8554f95b14Sjym * RUN: xbdi thread must be woken up, I/Os have to be processed
8654f95b14Sjym * DISCONNECTING: the instance is closing, no more I/Os can be scheduled
8754f95b14Sjym * DISCONNECTED: no I/Os, no ring, the thread should terminate.
8854f95b14Sjym */
8954f95b14Sjym typedef enum {WAITING, RUN, DISCONNECTING, DISCONNECTED} xbdback_state_t;
90b96fedacSbouyer
91b96fedacSbouyer /*
9254f95b14Sjym * Each xbdback instance is managed by a single thread that handles all
9354f95b14Sjym * the I/O processing. As there are a variety of conditions that can block,
9454f95b14Sjym * everything will be done in a sort of continuation-passing style.
95b96fedacSbouyer *
9654f95b14Sjym * When the execution has to block to delay processing, for example to
9754f95b14Sjym * allow system to recover because of memory shortage (via shared memory
9854f95b14Sjym * callback), the return value of a continuation can be set to NULL. In that
9954f95b14Sjym * case, the thread will go back to sleeping and wait for the proper
10054f95b14Sjym * condition before it starts processing requests again from where it left.
10170fd7422Sjdolecek * Continuation state is "stored" in the xbdback instance (xbdi_cont),
10270fd7422Sjdolecek * and should only be manipulated by the instance thread.
10380da1c02Sbouyer * If a continuation has to be restarted from a specific point,
10480da1c02Sbouyer * the callback and argument can be stored in xbdi_cont_restart and
10580da1c02Sbouyer * xbdi_cont_restart_obj
10680da1c02Sbouyer *
10754f95b14Sjym *
10854f95b14Sjym * As xbdback(4) has to handle different sort of asynchronous events (Xen
10954f95b14Sjym * event channels, biointr() soft interrupts, xenbus commands), the xbdi_lock
11054f95b14Sjym * mutex is used to protect specific elements of the xbdback instance from
11154f95b14Sjym * concurrent access: thread status and ring access (when pushing responses).
11277822551Sjym *
113db95bc7fSbouyer * Here's how the call graph is supposed to be for a single I/O:
11454f95b14Sjym *
115db95bc7fSbouyer * xbdback_co_main()
11670fd7422Sjdolecek * | --> xbdback_co_cache_flush()
117db95bc7fSbouyer * | | |
11880da1c02Sbouyer * | | -> xbdback_co_do_io() or NULL
11977822551Sjym * xbdback_co_main_loop()-|
12070fd7422Sjdolecek * | |-> xbdback_co_main_done2() or NULL
121db95bc7fSbouyer * | |
12270fd7422Sjdolecek * | --> xbdback_co_main_incr() -> xbdback_co_main_loop()
12377822551Sjym * |
124db95bc7fSbouyer * xbdback_co_io() -> xbdback_co_main_incr() -> xbdback_co_main_loop()
125db95bc7fSbouyer * |
12680da1c02Sbouyer * xbdback_co_io_gotio() -> xbdback_co_main_incr() -> xbdback_co_main_loop()
12770fd7422Sjdolecek * |
12870fd7422Sjdolecek * xbdback_co_do_io()
12977822551Sjym * |
13077822551Sjym * xbdback_co_main_incr() -> xbdback_co_main_loop()
131b96fedacSbouyer */
132b96fedacSbouyer typedef void *(* xbdback_cont_t)(struct xbdback_instance *, void *);
133b96fedacSbouyer
134d3cd2576Sbouyer enum xbdi_proto {
135d3cd2576Sbouyer XBDIP_NATIVE,
136d3cd2576Sbouyer XBDIP_32,
137d3cd2576Sbouyer XBDIP_64
138d3cd2576Sbouyer };
139d3cd2576Sbouyer
1408d1b8859Sjdolecek struct xbdback_va {
1418d1b8859Sjdolecek SLIST_ENTRY(xbdback_va) xv_next;
1428d1b8859Sjdolecek vaddr_t xv_vaddr;
1438d1b8859Sjdolecek };
1448d1b8859Sjdolecek
1452d80bedaSjdolecek /*
1462d80bedaSjdolecek * For each I/O operation associated with one of those requests, an
1472d80bedaSjdolecek * xbdback_io is allocated from a pool. It may correspond to multiple
1482d80bedaSjdolecek * Xen disk requests, or parts of them, if several arrive at once that
1492d80bedaSjdolecek * can be coalesced.
1502d80bedaSjdolecek */
1512d80bedaSjdolecek struct xbdback_io {
1522d80bedaSjdolecek SLIST_ENTRY(xbdback_io) xio_next;
1532d80bedaSjdolecek /* The instance pointer is duplicated for convenience. */
1542d80bedaSjdolecek struct xbdback_instance *xio_xbdi; /* our xbd instance */
15580da1c02Sbouyer /* _request state: track requests fetched from ring */
15680da1c02Sbouyer blkif_request_t xio_xen_req;
15780da1c02Sbouyer /* array of segments[VBD_MAX_INDIRECT_SEGMENTS] allocated separately */
15880da1c02Sbouyer struct blkif_request_segment *xio_seg;
15980da1c02Sbouyer bus_dmamap_t xio_seg_dmamap;
16080da1c02Sbouyer /* internal states */
1612d80bedaSjdolecek union {
1622d80bedaSjdolecek struct {
1632d80bedaSjdolecek struct buf xio_buf; /* our I/O */
1642d80bedaSjdolecek /* the virtual address to map the request at */
1652d80bedaSjdolecek vaddr_t xio_vaddr;
1662d80bedaSjdolecek struct xbdback_va *xio_xv;
1672d80bedaSjdolecek vaddr_t xio_start_offset; /* I/O start offset */
1682d80bedaSjdolecek /* grants to map */
1692d80bedaSjdolecek grant_ref_t xio_gref[VBD_MAX_INDIRECT_SEGMENTS];
1702d80bedaSjdolecek /* grants release */
1712d80bedaSjdolecek grant_handle_t xio_gh[VBD_MAX_INDIRECT_SEGMENTS];
172*d55161bbSbouyer bool xio_need_bounce; /* request is not contiguous */
1732d80bedaSjdolecek } xio_rw;
1742d80bedaSjdolecek } u;
1752d80bedaSjdolecek };
1762d80bedaSjdolecek #define xio_buf u.xio_rw.xio_buf
1772d80bedaSjdolecek #define xio_vaddr u.xio_rw.xio_vaddr
1782d80bedaSjdolecek #define xio_start_offset u.xio_rw.xio_start_offset
1792d80bedaSjdolecek #define xio_xv u.xio_rw.xio_xv
1802d80bedaSjdolecek #define xio_gref u.xio_rw.xio_gref
1812d80bedaSjdolecek #define xio_gh u.xio_rw.xio_gh
18280da1c02Sbouyer #define xio_need_bounce u.xio_rw.xio_need_bounce
1832d80bedaSjdolecek
184b96fedacSbouyer /* we keep the xbdback instances in a linked list */
185b96fedacSbouyer struct xbdback_instance {
186b96fedacSbouyer SLIST_ENTRY(xbdback_instance) next;
187b96fedacSbouyer struct xenbus_device *xbdi_xbusd; /* our xenstore entry */
188b96fedacSbouyer struct xenbus_watch xbdi_watch; /* to watch our store */
189b96fedacSbouyer domid_t xbdi_domid; /* attached to this domain */
190b96fedacSbouyer uint32_t xbdi_handle; /* domain-specific handle */
19154f95b14Sjym char xbdi_name[16]; /* name of this instance */
19254f95b14Sjym /* mutex that protects concurrent access to the xbdback instance */
19354f95b14Sjym kmutex_t xbdi_lock;
19454f95b14Sjym kcondvar_t xbdi_cv; /* wait channel for thread work */
19554f95b14Sjym xbdback_state_t xbdi_status; /* thread's status */
1962d80bedaSjdolecek /* context and KVA for mapping transfers */
1972d80bedaSjdolecek struct xbdback_io xbdi_io[BLKIF_RING_SIZE];
1982d80bedaSjdolecek SLIST_HEAD(, xbdback_io) xbdi_io_free;
1998d1b8859Sjdolecek struct xbdback_va xbdi_va[BLKIF_RING_SIZE];
2008d1b8859Sjdolecek SLIST_HEAD(, xbdback_va) xbdi_va_free;
20180da1c02Sbouyer /* segments structure allocated in page-aligned chunks */
20280da1c02Sbouyer struct blkif_request_segment *xbdi_segs;
203*d55161bbSbouyer /* bounce buffer in case a transfer is not contiguous */
20480da1c02Sbouyer vaddr_t xbdi_bouncebuf;
20580da1c02Sbouyer int xbdi_bouncebuf_use; /* is bounce buffer in use? */
206b96fedacSbouyer /* backing device parameters */
207b96fedacSbouyer dev_t xbdi_dev;
208b96fedacSbouyer const struct bdevsw *xbdi_bdevsw; /* pointer to the device's bdevsw */
209b96fedacSbouyer struct vnode *xbdi_vp;
2109cde455fSbouyer uint64_t xbdi_size;
21154f95b14Sjym bool xbdi_ro; /* is device read-only ? */
212b96fedacSbouyer /* parameters for the communication */
213b96fedacSbouyer unsigned int xbdi_evtchn;
214e5c2d6aaScherry struct intrhand *xbdi_ih;
215b96fedacSbouyer /* private parameters for communication */
216d3cd2576Sbouyer blkif_back_ring_proto_t xbdi_ring;
217d3cd2576Sbouyer enum xbdi_proto xbdi_proto;
218b96fedacSbouyer grant_handle_t xbdi_ring_handle; /* to unmap the ring */
219b96fedacSbouyer vaddr_t xbdi_ring_va; /* to unmap the ring */
220b96fedacSbouyer /* disconnection must be postponed until all I/O is done */
221f4853d4dSjym int xbdi_refcnt;
222b96fedacSbouyer /*
223b96fedacSbouyer * State for I/O processing/coalescing follows; this has to
224b96fedacSbouyer * live here instead of on the stack because of the
225b96fedacSbouyer * continuation-ness (see above).
226b96fedacSbouyer */
227b96fedacSbouyer RING_IDX xbdi_req_prod; /* limit on request indices */
22870fd7422Sjdolecek xbdback_cont_t xbdi_cont;
22980da1c02Sbouyer /* if not NULL, will restart here after thread wakes up */
23080da1c02Sbouyer xbdback_cont_t xbdi_cont_restart;
23180da1c02Sbouyer void *xbdi_cont_restart_obj;
232b96fedacSbouyer /* other state */
233db95bc7fSbouyer uint xbdi_pendingreqs; /* number of I/O in fly */
23454f95b14Sjym struct timeval xbdi_lasterr_time; /* error time tracking */
235b96fedacSbouyer };
236b96fedacSbouyer /* Manipulation of the above reference count. */
23780da1c02Sbouyer #define xbdi_get(xbdip) \
23880da1c02Sbouyer do { \
23980da1c02Sbouyer KASSERT(mutex_owned(&xbdip->xbdi_lock)); \
24080da1c02Sbouyer (xbdip)->xbdi_refcnt++; \
241*d55161bbSbouyer } while (0)
24280da1c02Sbouyer
243b96fedacSbouyer #define xbdi_put(xbdip) \
244b96fedacSbouyer do { \
24580da1c02Sbouyer KASSERT(mutex_owned(&xbdip->xbdi_lock)); \
24658477cb8Sjdolecek if (--((xbdip)->xbdi_refcnt) == 0) \
247b96fedacSbouyer xbdback_finish_disconnect(xbdip); \
248*d55161bbSbouyer } while (0)
249b96fedacSbouyer
250555482faSjdolecek static SLIST_HEAD(, xbdback_instance) xbdback_instances;
251555482faSjdolecek static kmutex_t xbdback_lock;
252b96fedacSbouyer
25354f95b14Sjym /* Interval between reports of I/O errors from frontend */
2548d1b8859Sjdolecek static const struct timeval xbdback_err_intvl = { 1, 0 };
25554f95b14Sjym
256b96fedacSbouyer void xbdbackattach(int);
257b96fedacSbouyer static int xbdback_xenbus_create(struct xenbus_device *);
258b96fedacSbouyer static int xbdback_xenbus_destroy(void *);
259b96fedacSbouyer static void xbdback_frontend_changed(void *, XenbusState);
260b96fedacSbouyer static void xbdback_backend_changed(struct xenbus_watch *,
261b96fedacSbouyer const char **, unsigned int);
262b96fedacSbouyer static int xbdback_evthandler(void *);
26354f95b14Sjym
26454f95b14Sjym static int xbdback_connect(struct xbdback_instance *);
2654d61ee8dSbouyer static void xbdback_disconnect(struct xbdback_instance *);
266b96fedacSbouyer static void xbdback_finish_disconnect(struct xbdback_instance *);
267b96fedacSbouyer
268555482faSjdolecek static bool xbdif_lookup(domid_t, uint32_t);
269b96fedacSbouyer
270b96fedacSbouyer static void *xbdback_co_main(struct xbdback_instance *, void *);
271b96fedacSbouyer static void *xbdback_co_main_loop(struct xbdback_instance *, void *);
272b96fedacSbouyer static void *xbdback_co_main_incr(struct xbdback_instance *, void *);
273b96fedacSbouyer static void *xbdback_co_main_done2(struct xbdback_instance *, void *);
274b96fedacSbouyer
275db95bc7fSbouyer static void *xbdback_co_cache_flush(struct xbdback_instance *, void *);
276db95bc7fSbouyer
277b96fedacSbouyer static void *xbdback_co_io(struct xbdback_instance *, void *);
278b96fedacSbouyer static void *xbdback_co_io_gotio(struct xbdback_instance *, void *);
279b96fedacSbouyer
28054f95b14Sjym static void *xbdback_co_do_io(struct xbdback_instance *, void *);
28154f95b14Sjym
282b96fedacSbouyer static void xbdback_io_error(struct xbdback_io *, int);
283b96fedacSbouyer static void xbdback_iodone(struct buf *);
28458477cb8Sjdolecek static void xbdback_iodone_locked(struct xbdback_instance *,
28558477cb8Sjdolecek struct xbdback_io *, struct buf *);
286b96fedacSbouyer static void xbdback_send_reply(struct xbdback_instance *, uint64_t , int , int);
287b96fedacSbouyer
28880da1c02Sbouyer static int xbdback_map_shm(struct xbdback_io *);
289b96fedacSbouyer static void xbdback_unmap_shm(struct xbdback_io *);
290b96fedacSbouyer
2912d80bedaSjdolecek static struct xbdback_io *xbdback_io_get(struct xbdback_instance *);
2922d80bedaSjdolecek static void xbdback_io_put(struct xbdback_instance *, struct xbdback_io *);
29354f95b14Sjym static void xbdback_thread(void *);
29454f95b14Sjym static void xbdback_wakeup_thread(struct xbdback_instance *);
295b96fedacSbouyer static void xbdback_trampoline(struct xbdback_instance *, void *);
296b96fedacSbouyer
297b96fedacSbouyer static struct xenbus_backend_driver xbd_backend_driver = {
298b96fedacSbouyer .xbakd_create = xbdback_xenbus_create,
299b96fedacSbouyer .xbakd_type = "vbd"
300b96fedacSbouyer };
301b96fedacSbouyer
302b96fedacSbouyer void
xbdbackattach(int n)303b96fedacSbouyer xbdbackattach(int n)
304b96fedacSbouyer {
305b96fedacSbouyer XENPRINTF(("xbdbackattach\n"));
306b96fedacSbouyer
307b96fedacSbouyer /*
308b96fedacSbouyer * initialize the backend driver, register the control message handler
309b96fedacSbouyer * and send driver up message.
310b96fedacSbouyer */
311b96fedacSbouyer SLIST_INIT(&xbdback_instances);
312555482faSjdolecek mutex_init(&xbdback_lock, MUTEX_DEFAULT, IPL_NONE);
31354f95b14Sjym
314b96fedacSbouyer xenbus_backend_register(&xbd_backend_driver);
315b96fedacSbouyer }
316b96fedacSbouyer
317b96fedacSbouyer static int
xbdback_xenbus_create(struct xenbus_device * xbusd)318b96fedacSbouyer xbdback_xenbus_create(struct xenbus_device *xbusd)
319b96fedacSbouyer {
320b96fedacSbouyer struct xbdback_instance *xbdi;
321b96fedacSbouyer long domid, handle;
322593919aaSbouyer int error, i;
32380da1c02Sbouyer int segalloc = 0;
324593919aaSbouyer char *ep;
325b96fedacSbouyer
326b96fedacSbouyer if ((error = xenbus_read_ul(NULL, xbusd->xbusd_path,
327b96fedacSbouyer "frontend-id", &domid, 10)) != 0) {
328d0c2dda1Scegger aprint_error("xbdback: can't read %s/frontend-id: %d\n",
329b96fedacSbouyer xbusd->xbusd_path, error);
330b96fedacSbouyer return error;
331b96fedacSbouyer }
332593919aaSbouyer
333593919aaSbouyer /*
334593919aaSbouyer * get handle: this is the last component of the path; which is
335593919aaSbouyer * a decimal number. $path/dev contains the device name, which is not
336593919aaSbouyer * appropriate.
337593919aaSbouyer */
338593919aaSbouyer for (i = strlen(xbusd->xbusd_path); i > 0; i--) {
339593919aaSbouyer if (xbusd->xbusd_path[i] == '/')
340593919aaSbouyer break;
341593919aaSbouyer }
342593919aaSbouyer if (i == 0) {
343593919aaSbouyer aprint_error("xbdback: can't parse %s\n",
344593919aaSbouyer xbusd->xbusd_path);
345593919aaSbouyer return EFTYPE;
346593919aaSbouyer }
347593919aaSbouyer handle = strtoul(&xbusd->xbusd_path[i+1], &ep, 10);
348593919aaSbouyer if (*ep != '\0') {
349593919aaSbouyer aprint_error("xbdback: can't parse %s\n",
350593919aaSbouyer xbusd->xbusd_path);
351593919aaSbouyer return EFTYPE;
352b96fedacSbouyer }
353b96fedacSbouyer
35454f95b14Sjym xbdi = kmem_zalloc(sizeof(*xbdi), KM_SLEEP);
35554f95b14Sjym
356b96fedacSbouyer xbdi->xbdi_domid = domid;
357b96fedacSbouyer xbdi->xbdi_handle = handle;
35854f95b14Sjym snprintf(xbdi->xbdi_name, sizeof(xbdi->xbdi_name), "xbdb%di%d",
35954f95b14Sjym xbdi->xbdi_domid, xbdi->xbdi_handle);
36054f95b14Sjym
36158477cb8Sjdolecek mutex_enter(&xbdback_lock);
36258477cb8Sjdolecek if (xbdif_lookup(domid, handle)) {
36358477cb8Sjdolecek mutex_exit(&xbdback_lock);
36458477cb8Sjdolecek kmem_free(xbdi, sizeof(*xbdi));
36558477cb8Sjdolecek return EEXIST;
36658477cb8Sjdolecek }
36758477cb8Sjdolecek SLIST_INSERT_HEAD(&xbdback_instances, xbdi, next);
36858477cb8Sjdolecek mutex_exit(&xbdback_lock);
36958477cb8Sjdolecek
37054f95b14Sjym /* initialize status and reference counter */
371b96fedacSbouyer xbdi->xbdi_status = DISCONNECTED;
37254f95b14Sjym
37354f95b14Sjym mutex_init(&xbdi->xbdi_lock, MUTEX_DEFAULT, IPL_BIO);
37454f95b14Sjym cv_init(&xbdi->xbdi_cv, xbdi->xbdi_name);
37554f95b14Sjym
37680da1c02Sbouyer mutex_enter(&xbdi->xbdi_lock);
37780da1c02Sbouyer xbdi_get(xbdi);
37880da1c02Sbouyer mutex_exit(&xbdi->xbdi_lock);
37980da1c02Sbouyer
380b96fedacSbouyer xbusd->xbusd_u.b.b_cookie = xbdi;
381b96fedacSbouyer xbusd->xbusd_u.b.b_detach = xbdback_xenbus_destroy;
382b96fedacSbouyer xbusd->xbusd_otherend_changed = xbdback_frontend_changed;
383b96fedacSbouyer xbdi->xbdi_xbusd = xbusd;
384b96fedacSbouyer
385ca27aaadSjdolecek SLIST_INIT(&xbdi->xbdi_va_free);
3868d1b8859Sjdolecek for (i = 0; i < BLKIF_RING_SIZE; i++) {
3878d1b8859Sjdolecek xbdi->xbdi_va[i].xv_vaddr = uvm_km_alloc(kernel_map,
3888d1b8859Sjdolecek VBD_VA_SIZE, 0, UVM_KMF_VAONLY|UVM_KMF_WAITVA);
3898d1b8859Sjdolecek SLIST_INSERT_HEAD(&xbdi->xbdi_va_free, &xbdi->xbdi_va[i],
3908d1b8859Sjdolecek xv_next);
3918d1b8859Sjdolecek }
3928d1b8859Sjdolecek
39380da1c02Sbouyer /*
39480da1c02Sbouyer * allocate page-aligned memory for segments, so that for each
39580da1c02Sbouyer * xbdback_io its segments are in a single page.
39680da1c02Sbouyer * sizeof(struct blkif_request_segment) * VBD_MAX_INDIRECT_SEGMENTS
397*d55161bbSbouyer * is 128 so this helps us avoiding a page boundary withing a
398*d55161bbSbouyer * block of VBD_MAX_INDIRECT_SEGMENTS segments.
39980da1c02Sbouyer */
40080da1c02Sbouyer CTASSERT(sizeof(struct blkif_request_segment) * VBD_MAX_INDIRECT_SEGMENTS == 128);
40180da1c02Sbouyer xbdi->xbdi_segs = (void *)uvm_km_alloc(kernel_map, round_page(
40280da1c02Sbouyer sizeof(struct blkif_request_segment) * VBD_MAX_INDIRECT_SEGMENTS * BLKIF_RING_SIZE),
40380da1c02Sbouyer PAGE_SIZE, UVM_KMF_WIRED | UVM_KMF_WAITVA);
40480da1c02Sbouyer
4052d80bedaSjdolecek SLIST_INIT(&xbdi->xbdi_io_free);
4062d80bedaSjdolecek for (i = 0; i < BLKIF_RING_SIZE; i++) {
40780da1c02Sbouyer struct xbdback_io *xbd_io = &xbdi->xbdi_io[i];
40880da1c02Sbouyer xbd_io->xio_seg =
40980da1c02Sbouyer &xbdi->xbdi_segs[i * VBD_MAX_INDIRECT_SEGMENTS];
41080da1c02Sbouyer error = bus_dmamap_create(xbdi->xbdi_xbusd->xbusd_dmat,
41180da1c02Sbouyer PAGE_SIZE, 1, PAGE_SIZE, PAGE_SIZE,
41280da1c02Sbouyer BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW,
41380da1c02Sbouyer &xbd_io->xio_seg_dmamap);
41480da1c02Sbouyer if (error != 0) {
41580da1c02Sbouyer printf("%s: can't create dma map for indirect segments %d\n",
41680da1c02Sbouyer xbdi->xbdi_name, i);
41780da1c02Sbouyer goto fail;
41880da1c02Sbouyer }
41980da1c02Sbouyer error = bus_dmamap_load(xbdi->xbdi_xbusd->xbusd_dmat,
42080da1c02Sbouyer xbd_io->xio_seg_dmamap, xbd_io->xio_seg,
42180da1c02Sbouyer sizeof(struct blkif_request_segment) * VBD_MAX_INDIRECT_SEGMENTS,
42280da1c02Sbouyer NULL, BUS_DMA_WAITOK);
42380da1c02Sbouyer if (error != 0) {
4245f6de192Smartin printf("%s: can't load dma map for indirect segments %d @%p (%d, %zu)\n",
42580da1c02Sbouyer xbdi->xbdi_name, i, xbd_io->xio_seg, error, sizeof(xbd_io->xio_seg));
42680da1c02Sbouyer bus_dmamap_destroy(xbdi->xbdi_xbusd->xbusd_dmat,
42780da1c02Sbouyer xbd_io->xio_seg_dmamap);
42880da1c02Sbouyer goto fail;
42980da1c02Sbouyer }
43080da1c02Sbouyer KASSERT(xbd_io->xio_seg_dmamap->dm_nsegs == 1);
43180da1c02Sbouyer segalloc = i;
43280da1c02Sbouyer SLIST_INSERT_HEAD(&xbdi->xbdi_io_free, xbd_io, xio_next);
4332d80bedaSjdolecek }
4342d80bedaSjdolecek
435b96fedacSbouyer error = xenbus_watch_path2(xbusd, xbusd->xbusd_path, "physical-device",
436b96fedacSbouyer &xbdi->xbdi_watch, xbdback_backend_changed);
437b96fedacSbouyer if (error) {
438b96fedacSbouyer printf("failed to watch on %s/physical-device: %d\n",
439b96fedacSbouyer xbusd->xbusd_path, error);
440b96fedacSbouyer goto fail;
441b96fedacSbouyer }
442b96fedacSbouyer xbdi->xbdi_watch.xbw_dev = xbusd;
443b96fedacSbouyer error = xenbus_switch_state(xbusd, NULL, XenbusStateInitWait);
444b96fedacSbouyer if (error) {
445b96fedacSbouyer printf("failed to switch state on %s: %d\n",
446b96fedacSbouyer xbusd->xbusd_path, error);
447b96fedacSbouyer goto fail2;
448b96fedacSbouyer }
44980da1c02Sbouyer
45080da1c02Sbouyer xbdi->xbdi_bouncebuf = uvm_km_alloc(kernel_map, MAXPHYS, PAGE_SIZE,
45180da1c02Sbouyer UVM_KMF_WIRED | UVM_KMF_WAITVA);
452b96fedacSbouyer return 0;
453b96fedacSbouyer fail2:
454b96fedacSbouyer unregister_xenbus_watch(&xbdi->xbdi_watch);
455b96fedacSbouyer fail:
45680da1c02Sbouyer for (i = 0; i < segalloc; i++) {
45780da1c02Sbouyer struct xbdback_io *xbd_io = &xbdi->xbdi_io[i];
45880da1c02Sbouyer bus_dmamap_unload(xbdi->xbdi_xbusd->xbusd_dmat,
45980da1c02Sbouyer xbd_io->xio_seg_dmamap);
46080da1c02Sbouyer bus_dmamap_destroy(xbdi->xbdi_xbusd->xbusd_dmat,
46180da1c02Sbouyer xbd_io->xio_seg_dmamap);
46280da1c02Sbouyer }
46380da1c02Sbouyer mutex_enter(&xbdback_lock);
46480da1c02Sbouyer SLIST_REMOVE(&xbdback_instances, xbdi, xbdback_instance, next);
46580da1c02Sbouyer mutex_exit(&xbdback_lock);
46654f95b14Sjym kmem_free(xbdi, sizeof(*xbdi));
467b96fedacSbouyer return error;
468b96fedacSbouyer }
469b96fedacSbouyer
470b96fedacSbouyer static int
xbdback_xenbus_destroy(void * arg)471b96fedacSbouyer xbdback_xenbus_destroy(void *arg)
472b96fedacSbouyer {
473b96fedacSbouyer struct xbdback_instance *xbdi = arg;
474b96fedacSbouyer
475b96fedacSbouyer XENPRINTF(("xbdback_xenbus_destroy state %d\n", xbdi->xbdi_status));
476b96fedacSbouyer
47754f95b14Sjym xbdback_disconnect(xbdi);
47854f95b14Sjym
479b96fedacSbouyer /* unregister watch */
480418f6d13Sjdolecek if (xbdi->xbdi_watch.node)
481418f6d13Sjdolecek xenbus_unwatch_path(&xbdi->xbdi_watch);
482b96fedacSbouyer /* unmap ring */
4834c8adaa4Sbouyer if (xbdi->xbdi_ring_handle) {
4844c8adaa4Sbouyer xen_shm_unmap(xbdi->xbdi_ring_va, 1, &xbdi->xbdi_ring_handle);
4855dac1986Sbouyer }
4865dac1986Sbouyer
4875dac1986Sbouyer if (xbdi->xbdi_ring_va != 0) {
488b96fedacSbouyer uvm_km_free(kernel_map, xbdi->xbdi_ring_va,
489b96fedacSbouyer PAGE_SIZE, UVM_KMF_VAONLY);
490b96fedacSbouyer }
4915dac1986Sbouyer
492b96fedacSbouyer /* close device */
493b96fedacSbouyer if (xbdi->xbdi_size) {
494ac2d876cSchristos const char *name;
495ac2d876cSchristos struct dkwedge_info wi;
496ac2d876cSchristos if (getdiskinfo(xbdi->xbdi_vp, &wi) == 0)
497ac2d876cSchristos name = wi.dkw_devname;
498ac2d876cSchristos else
499ac2d876cSchristos name = "*unknown*";
500ac2d876cSchristos printf("xbd backend: detach device %s for domain %d\n",
501ac2d876cSchristos name, xbdi->xbdi_domid);
5023685a310Sad vn_close(xbdi->xbdi_vp, FREAD, NOCRED);
503b96fedacSbouyer }
504555482faSjdolecek mutex_enter(&xbdback_lock);
505b96fedacSbouyer SLIST_REMOVE(&xbdback_instances, xbdi, xbdback_instance, next);
506555482faSjdolecek mutex_exit(&xbdback_lock);
5078d1b8859Sjdolecek
5088d1b8859Sjdolecek for (int i = 0; i < BLKIF_RING_SIZE; i++) {
50980da1c02Sbouyer struct xbdback_io *xbd_io = &xbdi->xbdi_io[i];
51080da1c02Sbouyer bus_dmamap_unload(xbdi->xbdi_xbusd->xbusd_dmat,
51180da1c02Sbouyer xbd_io->xio_seg_dmamap);
51280da1c02Sbouyer bus_dmamap_destroy(xbdi->xbdi_xbusd->xbusd_dmat,
51380da1c02Sbouyer xbd_io->xio_seg_dmamap);
5148d1b8859Sjdolecek if (xbdi->xbdi_va[i].xv_vaddr != 0) {
5158d1b8859Sjdolecek uvm_km_free(kernel_map, xbdi->xbdi_va[i].xv_vaddr,
5168d1b8859Sjdolecek VBD_VA_SIZE, UVM_KMF_VAONLY);
5178d1b8859Sjdolecek xbdi->xbdi_va[i].xv_vaddr = 0;
5188d1b8859Sjdolecek }
5198d1b8859Sjdolecek }
5208d1b8859Sjdolecek
521c8d41590Sjdolecek
522b2cdc990Scegger mutex_destroy(&xbdi->xbdi_lock);
523b2cdc990Scegger cv_destroy(&xbdi->xbdi_cv);
52454f95b14Sjym kmem_free(xbdi, sizeof(*xbdi));
525b96fedacSbouyer return 0;
526b96fedacSbouyer }
527b96fedacSbouyer
528bf743748Sjym static int
xbdback_connect(struct xbdback_instance * xbdi)529bf743748Sjym xbdback_connect(struct xbdback_instance *xbdi)
530b96fedacSbouyer {
531782d8289Sjdolecek int err;
532b96fedacSbouyer evtchn_op_t evop;
5334c8adaa4Sbouyer grant_ref_t gring_ref;
534bf743748Sjym u_long ring_ref, revtchn;
535782d8289Sjdolecek char xsproto[32];
536d3cd2576Sbouyer const char *proto;
537bf743748Sjym struct xenbus_device *xbusd = xbdi->xbdi_xbusd;
538b96fedacSbouyer
539d5cd140eSbouyer XENPRINTF(("xbdback %s: connect\n", xbusd->xbusd_path));
540b96fedacSbouyer /* read comunication informations */
541b96fedacSbouyer err = xenbus_read_ul(NULL, xbusd->xbusd_otherend,
542b96fedacSbouyer "ring-ref", &ring_ref, 10);
543b96fedacSbouyer if (err) {
544b96fedacSbouyer xenbus_dev_fatal(xbusd, err, "reading %s/ring-ref",
545b96fedacSbouyer xbusd->xbusd_otherend);
546bf743748Sjym return -1;
547b96fedacSbouyer }
548d5cd140eSbouyer XENPRINTF(("xbdback %s: connect ring-ref %lu\n", xbusd->xbusd_path, ring_ref));
549b96fedacSbouyer err = xenbus_read_ul(NULL, xbusd->xbusd_otherend,
550b96fedacSbouyer "event-channel", &revtchn, 10);
551b96fedacSbouyer if (err) {
552b96fedacSbouyer xenbus_dev_fatal(xbusd, err, "reading %s/event-channel",
553b96fedacSbouyer xbusd->xbusd_otherend);
554bf743748Sjym return -1;
555b96fedacSbouyer }
556d5cd140eSbouyer XENPRINTF(("xbdback %s: connect revtchn %lu\n", xbusd->xbusd_path, revtchn));
557d3cd2576Sbouyer err = xenbus_read(NULL, xbusd->xbusd_otherend, "protocol",
558782d8289Sjdolecek xsproto, sizeof(xsproto));
559d3cd2576Sbouyer if (err) {
560d3cd2576Sbouyer xbdi->xbdi_proto = XBDIP_NATIVE;
561bf743748Sjym proto = "unspecified";
562d5cd140eSbouyer XENPRINTF(("xbdback %s: connect no xsproto\n", xbusd->xbusd_path));
563d3cd2576Sbouyer } else {
564d5cd140eSbouyer XENPRINTF(("xbdback %s: connect xsproto %s\n", xbusd->xbusd_path, xsproto));
565d3cd2576Sbouyer if (strcmp(xsproto, XEN_IO_PROTO_ABI_NATIVE) == 0) {
566d3cd2576Sbouyer xbdi->xbdi_proto = XBDIP_NATIVE;
567d3cd2576Sbouyer proto = XEN_IO_PROTO_ABI_NATIVE;
568d3cd2576Sbouyer } else if (strcmp(xsproto, XEN_IO_PROTO_ABI_X86_32) == 0) {
569d3cd2576Sbouyer xbdi->xbdi_proto = XBDIP_32;
570d3cd2576Sbouyer proto = XEN_IO_PROTO_ABI_X86_32;
571d3cd2576Sbouyer } else if (strcmp(xsproto, XEN_IO_PROTO_ABI_X86_64) == 0) {
572d3cd2576Sbouyer xbdi->xbdi_proto = XBDIP_64;
573dca35a7eSbouyer proto = XEN_IO_PROTO_ABI_X86_64;
574d3cd2576Sbouyer } else {
575bf743748Sjym aprint_error("xbd domain %d: unknown proto %s\n",
576d3cd2576Sbouyer xbdi->xbdi_domid, xsproto);
577bf743748Sjym return -1;
578bf743748Sjym }
579d5cd140eSbouyer }
580bf743748Sjym
581b96fedacSbouyer /* allocate VA space and map rings */
582b96fedacSbouyer xbdi->xbdi_ring_va = uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
583b96fedacSbouyer UVM_KMF_VAONLY);
584b96fedacSbouyer if (xbdi->xbdi_ring_va == 0) {
585b96fedacSbouyer xenbus_dev_fatal(xbusd, ENOMEM,
586b96fedacSbouyer "can't get VA for ring", xbusd->xbusd_otherend);
587bf743748Sjym return -1;
588b96fedacSbouyer }
589d5cd140eSbouyer XENPRINTF(("xbdback %s: connect va 0x%" PRIxVADDR "\n", xbusd->xbusd_path, xbdi->xbdi_ring_va));
5904c8adaa4Sbouyer
5914c8adaa4Sbouyer gring_ref = ring_ref;
5924c8adaa4Sbouyer if (xen_shm_map(1, xbdi->xbdi_domid, &gring_ref, xbdi->xbdi_ring_va,
5934c8adaa4Sbouyer &xbdi->xbdi_ring_handle, 0) != 0) {
5944c8adaa4Sbouyer aprint_error("xbdback %s: can't map grant ref\n",
5954c8adaa4Sbouyer xbusd->xbusd_path);
596b96fedacSbouyer xenbus_dev_fatal(xbusd, EINVAL,
597b96fedacSbouyer "can't map ring", xbusd->xbusd_otherend);
5985dac1986Sbouyer goto err1;
599b96fedacSbouyer }
6004c8adaa4Sbouyer XENPRINTF(("xbdback %s: connect grhandle %d\n", xbusd->xbusd_path, xbdi->xbdi_ring_handle));
601bf743748Sjym
602d3cd2576Sbouyer switch(xbdi->xbdi_proto) {
603d3cd2576Sbouyer case XBDIP_NATIVE:
604d3cd2576Sbouyer {
605d3cd2576Sbouyer blkif_sring_t *sring = (void *)xbdi->xbdi_ring_va;
606bf743748Sjym BACK_RING_INIT(&xbdi->xbdi_ring.ring_n, sring, PAGE_SIZE);
607d3cd2576Sbouyer break;
608d3cd2576Sbouyer }
609d3cd2576Sbouyer case XBDIP_32:
610d3cd2576Sbouyer {
611bf743748Sjym blkif_x86_32_sring_t *sring = (void *)xbdi->xbdi_ring_va;
612bf743748Sjym BACK_RING_INIT(&xbdi->xbdi_ring.ring_32, sring, PAGE_SIZE);
613d3cd2576Sbouyer break;
614d3cd2576Sbouyer }
615d3cd2576Sbouyer case XBDIP_64:
616d3cd2576Sbouyer {
617bf743748Sjym blkif_x86_64_sring_t *sring = (void *)xbdi->xbdi_ring_va;
618bf743748Sjym BACK_RING_INIT(&xbdi->xbdi_ring.ring_64, sring, PAGE_SIZE);
619d3cd2576Sbouyer break;
620d3cd2576Sbouyer }
621d3cd2576Sbouyer }
622bf743748Sjym
623b96fedacSbouyer evop.cmd = EVTCHNOP_bind_interdomain;
624b96fedacSbouyer evop.u.bind_interdomain.remote_dom = xbdi->xbdi_domid;
625b96fedacSbouyer evop.u.bind_interdomain.remote_port = revtchn;
626b96fedacSbouyer err = HYPERVISOR_event_channel_op(&evop);
627b96fedacSbouyer if (err) {
628507459d7Sjym aprint_error("blkback %s: "
629507459d7Sjym "can't get event channel: %d\n",
630b96fedacSbouyer xbusd->xbusd_otherend, err);
631b96fedacSbouyer xenbus_dev_fatal(xbusd, err,
632dca35a7eSbouyer "can't bind event channel", xbusd->xbusd_otherend);
633b96fedacSbouyer goto err2;
634b96fedacSbouyer }
635b96fedacSbouyer xbdi->xbdi_evtchn = evop.u.bind_interdomain.local_port;
6365dac1986Sbouyer XENPRINTF(("xbdback %s: connect evchannel %d\n", xbusd->xbusd_path, xbdi->xbdi_evtchn));
637bf743748Sjym
6385dac1986Sbouyer xbdi->xbdi_ih = xen_intr_establish_xname(-1, &xen_pic,
6395dac1986Sbouyer xbdi->xbdi_evtchn, IST_LEVEL, IPL_BIO, xbdback_evthandler, xbdi,
6405dac1986Sbouyer true, xbdi->xbdi_name);
641e5c2d6aaScherry KASSERT(xbdi->xbdi_ih != NULL);
642422a28e0Sjym aprint_verbose("xbd backend domain %d handle %#x (%d) "
643422a28e0Sjym "using event channel %d, protocol %s\n", xbdi->xbdi_domid,
644422a28e0Sjym xbdi->xbdi_handle, xbdi->xbdi_handle, xbdi->xbdi_evtchn, proto);
64554f95b14Sjym
64654f95b14Sjym /* enable the xbdback event handler machinery */
64754f95b14Sjym xbdi->xbdi_status = WAITING;
6481fe45bddScherry hypervisor_unmask_event(xbdi->xbdi_evtchn);
649b96fedacSbouyer hypervisor_notify_via_evtchn(xbdi->xbdi_evtchn);
65054f95b14Sjym
651d4845d00Sbouyer if (kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL,
652f5c3f346Sjoerg xbdback_thread, xbdi, NULL, "%s", xbdi->xbdi_name) == 0)
653bf743748Sjym return 0;
654bf743748Sjym
655bf743748Sjym err2:
656bf743748Sjym /* unmap ring */
6574c8adaa4Sbouyer xen_shm_unmap(xbdi->xbdi_ring_va, 1, &xbdi->xbdi_ring_handle);
6585dac1986Sbouyer err1:
659bf743748Sjym /* free ring VA space */
660bf743748Sjym uvm_km_free(kernel_map, xbdi->xbdi_ring_va, PAGE_SIZE, UVM_KMF_VAONLY);
661bf743748Sjym return -1;
662bf743748Sjym }
663bf743748Sjym
66454f95b14Sjym /*
66554f95b14Sjym * Signal a xbdback thread to disconnect. Done in 'xenwatch' thread context.
66654f95b14Sjym */
6674d61ee8dSbouyer static void
xbdback_disconnect(struct xbdback_instance * xbdi)668cf4b804eSjym xbdback_disconnect(struct xbdback_instance *xbdi)
669cf4b804eSjym {
670cf4b804eSjym
6714d61ee8dSbouyer mutex_enter(&xbdi->xbdi_lock);
6724d61ee8dSbouyer if (xbdi->xbdi_status == DISCONNECTED) {
6734d61ee8dSbouyer mutex_exit(&xbdi->xbdi_lock);
6744d61ee8dSbouyer return;
6754d61ee8dSbouyer }
676cf4b804eSjym hypervisor_mask_event(xbdi->xbdi_evtchn);
67754f95b14Sjym
67854f95b14Sjym /* signal thread that we want to disconnect, then wait for it */
679cf4b804eSjym xbdi->xbdi_status = DISCONNECTING;
68054f95b14Sjym cv_signal(&xbdi->xbdi_cv);
68154f95b14Sjym
68254f95b14Sjym while (xbdi->xbdi_status != DISCONNECTED)
68354f95b14Sjym cv_wait(&xbdi->xbdi_cv, &xbdi->xbdi_lock);
68454f95b14Sjym
68554f95b14Sjym mutex_exit(&xbdi->xbdi_lock);
6865dac1986Sbouyer xen_intr_disestablish(xbdi->xbdi_ih);
68754f95b14Sjym
688cf4b804eSjym xenbus_switch_state(xbdi->xbdi_xbusd, NULL, XenbusStateClosing);
689cf4b804eSjym }
690cf4b804eSjym
691bf743748Sjym static void
xbdback_frontend_changed(void * arg,XenbusState new_state)692bf743748Sjym xbdback_frontend_changed(void *arg, XenbusState new_state)
693bf743748Sjym {
694bf743748Sjym struct xbdback_instance *xbdi = arg;
695bf743748Sjym struct xenbus_device *xbusd = xbdi->xbdi_xbusd;
696bf743748Sjym
697bf743748Sjym XENPRINTF(("xbdback %s: new state %d\n", xbusd->xbusd_path, new_state));
698bf743748Sjym switch(new_state) {
699bf743748Sjym case XenbusStateInitialising:
700bf743748Sjym break;
701bf743748Sjym case XenbusStateInitialised:
702bf743748Sjym case XenbusStateConnected:
70354f95b14Sjym if (xbdi->xbdi_status == WAITING || xbdi->xbdi_status == RUN)
704bf743748Sjym break;
705bf743748Sjym xbdback_connect(xbdi);
706b96fedacSbouyer break;
707b96fedacSbouyer case XenbusStateClosing:
708cf4b804eSjym xbdback_disconnect(xbdi);
709b96fedacSbouyer break;
710b96fedacSbouyer case XenbusStateClosed:
711b96fedacSbouyer /* otherend_changed() should handle it for us */
712b96fedacSbouyer panic("xbdback_frontend_changed: closed\n");
713b96fedacSbouyer case XenbusStateUnknown:
714b96fedacSbouyer case XenbusStateInitWait:
715b96fedacSbouyer default:
716b96fedacSbouyer aprint_error("xbdback %s: invalid frontend state %d\n",
717b96fedacSbouyer xbusd->xbusd_path, new_state);
718b96fedacSbouyer }
719b96fedacSbouyer return;
720b96fedacSbouyer }
721b96fedacSbouyer
722b96fedacSbouyer static void
xbdback_backend_changed(struct xenbus_watch * watch,const char ** vec,unsigned int len)723b96fedacSbouyer xbdback_backend_changed(struct xenbus_watch *watch,
724b96fedacSbouyer const char **vec, unsigned int len)
725b96fedacSbouyer {
726b96fedacSbouyer struct xenbus_device *xbusd = watch->xbw_dev;
727b96fedacSbouyer struct xbdback_instance *xbdi = xbusd->xbusd_u.b.b_cookie;
728b96fedacSbouyer int err;
729b96fedacSbouyer long dev;
730782d8289Sjdolecek char mode[32];
731b96fedacSbouyer struct xenbus_transaction *xbt;
732b96fedacSbouyer const char *devname;
733b96fedacSbouyer int major;
734b96fedacSbouyer
735b96fedacSbouyer err = xenbus_read_ul(NULL, xbusd->xbusd_path, "physical-device",
736b96fedacSbouyer &dev, 10);
737b96fedacSbouyer /*
738b96fedacSbouyer * An error can occur as the watch can fire up just after being
739b96fedacSbouyer * registered. So we have to ignore error :(
740b96fedacSbouyer */
741b96fedacSbouyer if (err)
742b96fedacSbouyer return;
7430eab8d73Sbouyer /*
74454f95b14Sjym * we can also fire up after having opened the device, don't try
7450eab8d73Sbouyer * to do it twice.
7460eab8d73Sbouyer */
7470eab8d73Sbouyer if (xbdi->xbdi_vp != NULL) {
74854f95b14Sjym if (xbdi->xbdi_status == WAITING || xbdi->xbdi_status == RUN) {
74954f95b14Sjym if (xbdi->xbdi_dev != dev) {
75054f95b14Sjym printf("xbdback %s: changing physical device "
75154f95b14Sjym "from %#"PRIx64" to %#lx not supported\n",
752ce06928cScegger xbusd->xbusd_path, xbdi->xbdi_dev, dev);
7530eab8d73Sbouyer }
75454f95b14Sjym }
755b96fedacSbouyer return;
756b96fedacSbouyer }
757b96fedacSbouyer xbdi->xbdi_dev = dev;
758782d8289Sjdolecek err = xenbus_read(NULL, xbusd->xbusd_path, "mode", mode, sizeof(mode));
759b96fedacSbouyer if (err) {
760b96fedacSbouyer printf("xbdback: failed to read %s/mode: %d\n",
761b96fedacSbouyer xbusd->xbusd_path, err);
762b96fedacSbouyer return;
763b96fedacSbouyer }
764b96fedacSbouyer if (mode[0] == 'w')
76554f95b14Sjym xbdi->xbdi_ro = false;
766b96fedacSbouyer else
76754f95b14Sjym xbdi->xbdi_ro = true;
768b96fedacSbouyer major = major(xbdi->xbdi_dev);
769b96fedacSbouyer devname = devsw_blk2name(major);
770b96fedacSbouyer if (devname == NULL) {
771ce06928cScegger printf("xbdback %s: unknown device 0x%"PRIx64"\n",
772ce06928cScegger xbusd->xbusd_path, xbdi->xbdi_dev);
773b96fedacSbouyer return;
774b96fedacSbouyer }
775b96fedacSbouyer xbdi->xbdi_bdevsw = bdevsw_lookup(xbdi->xbdi_dev);
776b96fedacSbouyer if (xbdi->xbdi_bdevsw == NULL) {
777ce06928cScegger printf("xbdback %s: no bdevsw for device 0x%"PRIx64"\n",
778b96fedacSbouyer xbusd->xbusd_path, xbdi->xbdi_dev);
779b96fedacSbouyer return;
780b96fedacSbouyer }
781b96fedacSbouyer err = bdevvp(xbdi->xbdi_dev, &xbdi->xbdi_vp);
782b96fedacSbouyer if (err) {
783ce06928cScegger printf("xbdback %s: can't open device 0x%"PRIx64": %d\n",
784b96fedacSbouyer xbusd->xbusd_path, xbdi->xbdi_dev, err);
785b96fedacSbouyer return;
786b96fedacSbouyer }
78770aa8a05Sbouyer err = vn_lock(xbdi->xbdi_vp, LK_EXCLUSIVE | LK_RETRY);
78870aa8a05Sbouyer if (err) {
789ce06928cScegger printf("xbdback %s: can't vn_lock device 0x%"PRIx64": %d\n",
79070aa8a05Sbouyer xbusd->xbusd_path, xbdi->xbdi_dev, err);
79170aa8a05Sbouyer vrele(xbdi->xbdi_vp);
79270aa8a05Sbouyer return;
79370aa8a05Sbouyer }
79461e8303eSpooka err = VOP_OPEN(xbdi->xbdi_vp, FREAD, NOCRED);
795b96fedacSbouyer if (err) {
796ce06928cScegger printf("xbdback %s: can't VOP_OPEN device 0x%"PRIx64": %d\n",
797b96fedacSbouyer xbusd->xbusd_path, xbdi->xbdi_dev, err);
798b96fedacSbouyer vput(xbdi->xbdi_vp);
799b96fedacSbouyer return;
800b96fedacSbouyer }
8011423e65bShannken VOP_UNLOCK(xbdi->xbdi_vp);
802e354c714Shaad
8039cde455fSbouyer /* dk device; get wedge data */
8049cde455fSbouyer struct dkwedge_info wi;
80596b6da24Schristos if ((err = getdiskinfo(xbdi->xbdi_vp, &wi)) == 0) {
806e354c714Shaad xbdi->xbdi_size = wi.dkw_size;
807e354c714Shaad printf("xbd backend: attach device %s (size %" PRIu64 ") "
808e354c714Shaad "for domain %d\n", wi.dkw_devname, xbdi->xbdi_size,
809e354c714Shaad xbdi->xbdi_domid);
810d636871eShaad } else {
811d636871eShaad /* If both Ioctls failed set device size to 0 and return */
8129cde455fSbouyer printf("xbdback %s: can't DIOCGWEDGEINFO device "
813ce06928cScegger "0x%"PRIx64": %d\n", xbusd->xbusd_path,
8149cde455fSbouyer xbdi->xbdi_dev, err);
8159cde455fSbouyer xbdi->xbdi_size = xbdi->xbdi_dev = 0;
8163685a310Sad vn_close(xbdi->xbdi_vp, FREAD, NOCRED);
8179cde455fSbouyer xbdi->xbdi_vp = NULL;
8189cde455fSbouyer return;
8199cde455fSbouyer }
820b96fedacSbouyer again:
821b96fedacSbouyer xbt = xenbus_transaction_start();
822b96fedacSbouyer if (xbt == NULL) {
823b96fedacSbouyer printf("xbdback %s: can't start transaction\n",
824b96fedacSbouyer xbusd->xbusd_path);
825b96fedacSbouyer return;
826b96fedacSbouyer }
8279cde455fSbouyer err = xenbus_printf(xbt, xbusd->xbusd_path, "sectors", "%" PRIu64 ,
8289cde455fSbouyer xbdi->xbdi_size);
829b96fedacSbouyer if (err) {
830b96fedacSbouyer printf("xbdback: failed to write %s/sectors: %d\n",
831b96fedacSbouyer xbusd->xbusd_path, err);
832b96fedacSbouyer goto abort;
833b96fedacSbouyer }
834b96fedacSbouyer err = xenbus_printf(xbt, xbusd->xbusd_path, "info", "%u",
835b96fedacSbouyer xbdi->xbdi_ro ? VDISK_READONLY : 0);
836b96fedacSbouyer if (err) {
837b96fedacSbouyer printf("xbdback: failed to write %s/info: %d\n",
838b96fedacSbouyer xbusd->xbusd_path, err);
839b96fedacSbouyer goto abort;
840b96fedacSbouyer }
841b96fedacSbouyer err = xenbus_printf(xbt, xbusd->xbusd_path, "sector-size", "%lu",
842b96fedacSbouyer (u_long)DEV_BSIZE);
843b96fedacSbouyer if (err) {
844b96fedacSbouyer printf("xbdback: failed to write %s/sector-size: %d\n",
845b96fedacSbouyer xbusd->xbusd_path, err);
846b96fedacSbouyer goto abort;
847b96fedacSbouyer }
848db95bc7fSbouyer err = xenbus_printf(xbt, xbusd->xbusd_path, "feature-flush-cache",
849db95bc7fSbouyer "%u", 1);
850db95bc7fSbouyer if (err) {
851db95bc7fSbouyer printf("xbdback: failed to write %s/feature-flush-cache: %d\n",
852db95bc7fSbouyer xbusd->xbusd_path, err);
853db95bc7fSbouyer goto abort;
854db95bc7fSbouyer }
855c8d41590Sjdolecek err = xenbus_printf(xbt, xbusd->xbusd_path,
856c8d41590Sjdolecek "feature-max-indirect-segments", "%u", VBD_MAX_INDIRECT_SEGMENTS);
857c8d41590Sjdolecek if (err) {
858c8d41590Sjdolecek printf("xbdback: failed to write %s/feature-indirect: %d\n",
859c8d41590Sjdolecek xbusd->xbusd_path, err);
860c8d41590Sjdolecek goto abort;
861c8d41590Sjdolecek }
862b96fedacSbouyer err = xenbus_transaction_end(xbt, 0);
863b96fedacSbouyer if (err == EAGAIN)
864b96fedacSbouyer goto again;
865b96fedacSbouyer if (err) {
866b96fedacSbouyer printf("xbdback %s: can't end transaction: %d\n",
867b96fedacSbouyer xbusd->xbusd_path, err);
868b96fedacSbouyer }
869b96fedacSbouyer err = xenbus_switch_state(xbusd, NULL, XenbusStateConnected);
870b96fedacSbouyer if (err) {
871b96fedacSbouyer printf("xbdback %s: can't switch state: %d\n",
872b96fedacSbouyer xbusd->xbusd_path, err);
873b96fedacSbouyer }
874b96fedacSbouyer return;
875b96fedacSbouyer abort:
876b96fedacSbouyer xenbus_transaction_end(xbt, 1);
877b96fedacSbouyer }
878b96fedacSbouyer
87954f95b14Sjym /*
88054f95b14Sjym * Used by a xbdi thread to signal that it is now disconnected.
88154f95b14Sjym */
88254f95b14Sjym static void
xbdback_finish_disconnect(struct xbdback_instance * xbdi)88354f95b14Sjym xbdback_finish_disconnect(struct xbdback_instance *xbdi)
884b96fedacSbouyer {
88554f95b14Sjym KASSERT(mutex_owned(&xbdi->xbdi_lock));
886b96fedacSbouyer KASSERT(xbdi->xbdi_status == DISCONNECTING);
887b96fedacSbouyer
888b96fedacSbouyer xbdi->xbdi_status = DISCONNECTED;
889b96fedacSbouyer
89058477cb8Sjdolecek cv_broadcast(&xbdi->xbdi_cv);
891b96fedacSbouyer }
892b96fedacSbouyer
893555482faSjdolecek static bool
xbdif_lookup(domid_t dom,uint32_t handle)894b96fedacSbouyer xbdif_lookup(domid_t dom , uint32_t handle)
895b96fedacSbouyer {
896b96fedacSbouyer struct xbdback_instance *xbdi;
897555482faSjdolecek bool found = false;
898b96fedacSbouyer
89958477cb8Sjdolecek KASSERT(mutex_owned(&xbdback_lock));
90058477cb8Sjdolecek
901b96fedacSbouyer SLIST_FOREACH(xbdi, &xbdback_instances, next) {
902555482faSjdolecek if (xbdi->xbdi_domid == dom && xbdi->xbdi_handle == handle) {
903555482faSjdolecek found = true;
904555482faSjdolecek break;
905b96fedacSbouyer }
906555482faSjdolecek }
907555482faSjdolecek
908555482faSjdolecek return found;
909b96fedacSbouyer }
910b96fedacSbouyer
911b96fedacSbouyer static int
xbdback_evthandler(void * arg)912b96fedacSbouyer xbdback_evthandler(void *arg)
913b96fedacSbouyer {
914b96fedacSbouyer struct xbdback_instance *xbdi = arg;
915b96fedacSbouyer
916b96fedacSbouyer XENPRINTF(("xbdback_evthandler domain %d: cont %p\n",
917b96fedacSbouyer xbdi->xbdi_domid, xbdi->xbdi_cont));
918b96fedacSbouyer
91958477cb8Sjdolecek mutex_enter(&xbdi->xbdi_lock);
92054f95b14Sjym xbdback_wakeup_thread(xbdi);
92158477cb8Sjdolecek mutex_exit(&xbdi->xbdi_lock);
92277822551Sjym
923b96fedacSbouyer return 1;
924b96fedacSbouyer }
925b96fedacSbouyer
92654f95b14Sjym /*
92754f95b14Sjym * Main thread routine for one xbdback instance. Woken up by
92854f95b14Sjym * xbdback_evthandler when a domain has I/O work scheduled in a I/O ring.
92954f95b14Sjym */
93054f95b14Sjym static void
xbdback_thread(void * arg)93154f95b14Sjym xbdback_thread(void *arg)
93254f95b14Sjym {
93354f95b14Sjym struct xbdback_instance *xbdi = arg;
93480da1c02Sbouyer void *obj;
93554f95b14Sjym
93654f95b14Sjym mutex_enter(&xbdi->xbdi_lock);
93758477cb8Sjdolecek for (;;) {
93854f95b14Sjym switch (xbdi->xbdi_status) {
93954f95b14Sjym case WAITING:
94054f95b14Sjym cv_wait(&xbdi->xbdi_cv, &xbdi->xbdi_lock);
94154f95b14Sjym break;
94254f95b14Sjym case RUN:
94354f95b14Sjym xbdi->xbdi_status = WAITING; /* reset state */
94480da1c02Sbouyer obj = xbdi;
94580da1c02Sbouyer if (xbdi->xbdi_cont_restart != NULL) {
94680da1c02Sbouyer KASSERT(xbdi->xbdi_cont == NULL);
94780da1c02Sbouyer xbdi->xbdi_cont = xbdi->xbdi_cont_restart;
94880da1c02Sbouyer obj = xbdi->xbdi_cont_restart_obj;
94980da1c02Sbouyer xbdi->xbdi_cont_restart = NULL;
95080da1c02Sbouyer xbdi->xbdi_cont_restart_obj = NULL;
95180da1c02Sbouyer }
95254f95b14Sjym if (xbdi->xbdi_cont == NULL) {
95354f95b14Sjym xbdi->xbdi_cont = xbdback_co_main;
95454f95b14Sjym }
95554f95b14Sjym
95680da1c02Sbouyer xbdback_trampoline(xbdi, obj);
95754f95b14Sjym break;
95854f95b14Sjym case DISCONNECTING:
95954f95b14Sjym if (xbdi->xbdi_pendingreqs > 0) {
96054f95b14Sjym /* there are pending I/Os. Wait for them. */
96154f95b14Sjym cv_wait(&xbdi->xbdi_cv, &xbdi->xbdi_lock);
96258477cb8Sjdolecek continue;
96354f95b14Sjym }
96454f95b14Sjym
96554f95b14Sjym /* All I/Os should have been processed by now,
96654f95b14Sjym * xbdi_refcnt should drop to 0 */
96754f95b14Sjym xbdi_put(xbdi);
96854f95b14Sjym KASSERT(xbdi->xbdi_refcnt == 0);
96958477cb8Sjdolecek goto out;
97058477cb8Sjdolecek /* NOTREACHED */
97154f95b14Sjym default:
97254f95b14Sjym panic("%s: invalid state %d",
97354f95b14Sjym xbdi->xbdi_name, xbdi->xbdi_status);
97454f95b14Sjym }
97554f95b14Sjym }
97658477cb8Sjdolecek out:
97758477cb8Sjdolecek mutex_exit(&xbdi->xbdi_lock);
97858477cb8Sjdolecek
97958477cb8Sjdolecek kthread_exit(0);
98054f95b14Sjym }
98154f95b14Sjym
982b96fedacSbouyer static void *
xbdback_co_main(struct xbdback_instance * xbdi,void * obj)983b96fedacSbouyer xbdback_co_main(struct xbdback_instance *xbdi, void *obj)
984b96fedacSbouyer {
985b96fedacSbouyer (void)obj;
98654f95b14Sjym
987d3cd2576Sbouyer xbdi->xbdi_req_prod = xbdi->xbdi_ring.ring_n.sring->req_prod;
988f0ba6e4cSjym xen_rmb(); /* ensure we see all requests up to req_prod */
989b96fedacSbouyer /*
990b96fedacSbouyer * note that we'll eventually get a full ring of request.
991b96fedacSbouyer * in this case, MASK_BLKIF_IDX(req_cons) == MASK_BLKIF_IDX(req_prod)
992b96fedacSbouyer */
993b96fedacSbouyer xbdi->xbdi_cont = xbdback_co_main_loop;
994b96fedacSbouyer return xbdi;
995b96fedacSbouyer }
996b96fedacSbouyer
99777822551Sjym /*
99877822551Sjym * Fetch a blkif request from the ring, and pass control to the appropriate
99977822551Sjym * continuation.
100054f95b14Sjym * If someone asked for disconnection, do not fetch any more request from
100154f95b14Sjym * the ring.
100277822551Sjym */
1003b96fedacSbouyer static void *
xbdback_co_main_loop(struct xbdback_instance * xbdi,void * obj __unused)1004c8d41590Sjdolecek xbdback_co_main_loop(struct xbdback_instance *xbdi, void *obj __unused)
1005b96fedacSbouyer {
1006c8d41590Sjdolecek blkif_request_t *req, *reqn;
1007d3cd2576Sbouyer blkif_x86_32_request_t *req32;
1008d3cd2576Sbouyer blkif_x86_64_request_t *req64;
1009f64904dbSjdolecek blkif_request_indirect_t *rinn;
1010f64904dbSjdolecek blkif_x86_32_request_indirect_t *rin32;
1011f64904dbSjdolecek blkif_x86_64_request_indirect_t *rin64;
1012b96fedacSbouyer
1013d3cd2576Sbouyer if (xbdi->xbdi_ring.ring_n.req_cons != xbdi->xbdi_req_prod) {
101480da1c02Sbouyer struct xbdback_io *xbd_io = xbdback_io_get(xbdi);
1015f64904dbSjdolecek uint8_t real_op = 0xff;
1016f64904dbSjdolecek
101780da1c02Sbouyer if (xbd_io == NULL) {
101880da1c02Sbouyer /* retry after iodone */
101980da1c02Sbouyer xbdi->xbdi_cont = NULL;
102080da1c02Sbouyer return NULL;
102180da1c02Sbouyer }
102280da1c02Sbouyer memset(&xbd_io->u, 0, sizeof(xbd_io->u));
102380da1c02Sbouyer
102480da1c02Sbouyer buf_init(&xbd_io->xio_buf);
102580da1c02Sbouyer xbd_io->xio_xbdi = xbdi;
102680da1c02Sbouyer
102780da1c02Sbouyer req = &xbd_io->xio_xen_req;
1028c8d41590Sjdolecek memset(req, 0, sizeof(*req));
1029c8d41590Sjdolecek
1030d3cd2576Sbouyer switch(xbdi->xbdi_proto) {
1031d3cd2576Sbouyer case XBDIP_NATIVE:
1032c8d41590Sjdolecek reqn = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_n,
1033c8d41590Sjdolecek xbdi->xbdi_ring.ring_n.req_cons);
1034f64904dbSjdolecek real_op = req->operation = reqn->operation;
1035f64904dbSjdolecek if (real_op == BLKIF_OP_INDIRECT) {
1036f64904dbSjdolecek rinn = (blkif_request_indirect_t *)reqn;
1037f64904dbSjdolecek real_op = rinn->indirect_op;
1038f64904dbSjdolecek }
1039c8d41590Sjdolecek req->id = reqn->id;
1040d3cd2576Sbouyer break;
1041d3cd2576Sbouyer case XBDIP_32:
1042d3cd2576Sbouyer req32 = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_32,
1043d3cd2576Sbouyer xbdi->xbdi_ring.ring_n.req_cons);
1044f64904dbSjdolecek real_op = req->operation = req32->operation;
1045f64904dbSjdolecek if (real_op == BLKIF_OP_INDIRECT) {
1046f64904dbSjdolecek rin32 = (blkif_x86_32_request_indirect_t*)req32;
1047f64904dbSjdolecek real_op = rin32->indirect_op;
1048f64904dbSjdolecek }
1049d3cd2576Sbouyer req->id = req32->id;
1050d3cd2576Sbouyer break;
1051d3cd2576Sbouyer case XBDIP_64:
1052d3cd2576Sbouyer req64 = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_64,
1053d3cd2576Sbouyer xbdi->xbdi_ring.ring_n.req_cons);
1054f64904dbSjdolecek real_op = req->operation = req64->operation;
1055f64904dbSjdolecek if (real_op == BLKIF_OP_INDIRECT) {
1056f64904dbSjdolecek rin64 = (blkif_x86_64_request_indirect_t*)req64;
1057f64904dbSjdolecek real_op = rin64->indirect_op;
1058f64904dbSjdolecek }
1059d3cd2576Sbouyer req->id = req64->id;
1060d3cd2576Sbouyer break;
1061d3cd2576Sbouyer }
106213ee92e7Sbouyer __insn_barrier();
1063b96fedacSbouyer XENPRINTF(("xbdback op %d req_cons 0x%x req_prod 0x%x "
1064d3cd2576Sbouyer "resp_prod 0x%x id %" PRIu64 "\n", req->operation,
1065d3cd2576Sbouyer xbdi->xbdi_ring.ring_n.req_cons,
1066b96fedacSbouyer xbdi->xbdi_req_prod,
1067d3cd2576Sbouyer xbdi->xbdi_ring.ring_n.rsp_prod_pvt,
1068b96fedacSbouyer req->id));
1069b96fedacSbouyer switch (req->operation) {
1070c8d41590Sjdolecek case BLKIF_OP_INDIRECT:
1071c8d41590Sjdolecek /* just check indirect_op, rest is handled later */
1072f64904dbSjdolecek if (real_op != BLKIF_OP_READ &&
1073f64904dbSjdolecek real_op != BLKIF_OP_WRITE) {
1074c8d41590Sjdolecek if (ratecheck(&xbdi->xbdi_lasterr_time,
1075c8d41590Sjdolecek &xbdback_err_intvl)) {
1076c8d41590Sjdolecek printf("%s: unknown ind operation %d\n",
1077c8d41590Sjdolecek xbdi->xbdi_name,
1078f64904dbSjdolecek real_op);
1079c8d41590Sjdolecek }
1080c8d41590Sjdolecek goto fail;
1081c8d41590Sjdolecek }
1082c8d41590Sjdolecek /* FALLTHROUGH */
1083b96fedacSbouyer case BLKIF_OP_READ:
1084b96fedacSbouyer case BLKIF_OP_WRITE:
1085b96fedacSbouyer xbdi->xbdi_cont = xbdback_co_io;
108680da1c02Sbouyer return xbd_io;
1087db95bc7fSbouyer case BLKIF_OP_FLUSH_DISKCACHE:
1088db95bc7fSbouyer xbdi->xbdi_cont = xbdback_co_cache_flush;
108980da1c02Sbouyer return xbd_io;
1090b96fedacSbouyer default:
109154f95b14Sjym if (ratecheck(&xbdi->xbdi_lasterr_time,
109254f95b14Sjym &xbdback_err_intvl)) {
109354f95b14Sjym printf("%s: unknown operation %d\n",
109454f95b14Sjym xbdi->xbdi_name, req->operation);
109554f95b14Sjym }
1096c8d41590Sjdolecek fail:
1097f64904dbSjdolecek xbdback_send_reply(xbdi, req->id, real_op,
1098b96fedacSbouyer BLKIF_RSP_ERROR);
1099b96fedacSbouyer xbdi->xbdi_cont = xbdback_co_main_incr;
110080da1c02Sbouyer return xbdi;
1101b96fedacSbouyer }
1102b96fedacSbouyer } else {
110370fd7422Sjdolecek xbdi->xbdi_cont = xbdback_co_main_done2;
1104b96fedacSbouyer return xbdi;
1105b96fedacSbouyer }
110680da1c02Sbouyer }
1107b96fedacSbouyer
110877822551Sjym /*
110954f95b14Sjym * Increment consumer index and move on to the next request. In case
111054f95b14Sjym * we want to disconnect, leave continuation now.
111177822551Sjym */
1112b96fedacSbouyer static void *
xbdback_co_main_incr(struct xbdback_instance * xbdi,void * obj __unused)111358477cb8Sjdolecek xbdback_co_main_incr(struct xbdback_instance *xbdi, void *obj __unused)
1114b96fedacSbouyer {
111558477cb8Sjdolecek KASSERT(mutex_owned(&xbdi->xbdi_lock));
111658477cb8Sjdolecek
1117b1c4de01Sjym blkif_back_ring_t *ring = &xbdi->xbdi_ring.ring_n;
1118b1c4de01Sjym
1119b1c4de01Sjym ring->req_cons++;
112054f95b14Sjym
112154f95b14Sjym if (xbdi->xbdi_status == DISCONNECTING)
1122b1c4de01Sjym xbdi->xbdi_cont = NULL;
1123b1c4de01Sjym else
1124b96fedacSbouyer xbdi->xbdi_cont = xbdback_co_main_loop;
1125b1c4de01Sjym
1126b96fedacSbouyer return xbdi;
1127b96fedacSbouyer }
1128b96fedacSbouyer
112977822551Sjym /*
113077822551Sjym * Check for requests in the instance's ring. In case there are, start again
113177822551Sjym * from the beginning. If not, stall.
113277822551Sjym */
1133b96fedacSbouyer static void *
xbdback_co_main_done2(struct xbdback_instance * xbdi,void * obj)1134b96fedacSbouyer xbdback_co_main_done2(struct xbdback_instance *xbdi, void *obj)
1135b96fedacSbouyer {
1136b96fedacSbouyer int work_to_do;
1137b96fedacSbouyer
11387895a041Sriastradh xen_wmb();
1139d3cd2576Sbouyer RING_FINAL_CHECK_FOR_REQUESTS(&xbdi->xbdi_ring.ring_n, work_to_do);
1140b96fedacSbouyer if (work_to_do)
1141b96fedacSbouyer xbdi->xbdi_cont = xbdback_co_main;
1142b96fedacSbouyer else
1143b96fedacSbouyer xbdi->xbdi_cont = NULL;
114454f95b14Sjym
1145b96fedacSbouyer return xbdi;
1146b96fedacSbouyer }
1147b96fedacSbouyer
114877822551Sjym /*
114977822551Sjym * Frontend requested a cache flush operation.
115077822551Sjym */
1151b96fedacSbouyer static void *
xbdback_co_cache_flush(struct xbdback_instance * xbdi,void * obj)115280da1c02Sbouyer xbdback_co_cache_flush(struct xbdback_instance *xbdi, void *obj)
1153db95bc7fSbouyer {
115480da1c02Sbouyer struct xbdback_io *xbd_io = obj;
115580da1c02Sbouyer KASSERT(xbd_io->xio_xen_req.operation == BLKIF_OP_FLUSH_DISKCACHE);
1156db95bc7fSbouyer if (xbdi->xbdi_pendingreqs > 0) {
115748ed379bSbouyer /*
115848ed379bSbouyer * There are pending requests.
115948ed379bSbouyer * Event or iodone() will restart processing
116048ed379bSbouyer */
116180da1c02Sbouyer xbdi->xbdi_cont_restart = xbdback_co_cache_flush;
116280da1c02Sbouyer xbdi->xbdi_cont_restart_obj = xbd_io;
1163db95bc7fSbouyer xbdi->xbdi_cont = NULL;
1164db95bc7fSbouyer return NULL;
1165db95bc7fSbouyer }
116680da1c02Sbouyer xbdi_get(xbdi);
116754f95b14Sjym xbdi->xbdi_cont = xbdback_co_do_io;
116878af9293Sjdolecek return xbd_io;
1169db95bc7fSbouyer }
1170db95bc7fSbouyer
117177822551Sjym /*
117277822551Sjym * A read or write I/O request must be processed. Do some checks first,
117377822551Sjym * then get the segment information directly from the ring request.
117477822551Sjym */
1175db95bc7fSbouyer static void *
xbdback_co_io(struct xbdback_instance * xbdi,void * obj)117680da1c02Sbouyer xbdback_co_io(struct xbdback_instance *xbdi, void *obj)
1177b96fedacSbouyer {
117899c9ae6dSjym int i, error;
1179c8d41590Sjdolecek blkif_request_t *req, *reqn;
118099c9ae6dSjym blkif_x86_32_request_t *req32;
118199c9ae6dSjym blkif_x86_64_request_t *req64;
1182c8d41590Sjdolecek blkif_request_indirect_t *rinn;
1183c8d41590Sjdolecek blkif_x86_32_request_indirect_t *rin32;
1184c8d41590Sjdolecek blkif_x86_64_request_indirect_t *rin64;
118580da1c02Sbouyer const char *errstr;
118680da1c02Sbouyer struct xbdback_io *xbd_io = obj;
118780da1c02Sbouyer grant_ref_t in_gntref = 0;
1188b96fedacSbouyer
118980da1c02Sbouyer req = &xbd_io->xio_xen_req;
119099c9ae6dSjym
119199c9ae6dSjym /* some sanity checks */
119248ed379bSbouyer KASSERT(req->operation == BLKIF_OP_READ ||
1193c8d41590Sjdolecek req->operation == BLKIF_OP_WRITE ||
1194c8d41590Sjdolecek req->operation == BLKIF_OP_INDIRECT);
1195b96fedacSbouyer
119699c9ae6dSjym /* copy request segments */
119799c9ae6dSjym switch (xbdi->xbdi_proto) {
119899c9ae6dSjym case XBDIP_NATIVE:
1199c8d41590Sjdolecek reqn = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_n,
1200c8d41590Sjdolecek xbdi->xbdi_ring.ring_n.req_cons);
120180da1c02Sbouyer req->handle = reqn->handle;
120280da1c02Sbouyer req->sector_number = reqn->sector_number;
1203c8d41590Sjdolecek if (reqn->operation == BLKIF_OP_INDIRECT) {
1204c8d41590Sjdolecek rinn = (blkif_request_indirect_t *)reqn;
1205c8d41590Sjdolecek req->operation = rinn->indirect_op;
12067da93351Sbouyer req->nr_segments = (uint8_t)rinn->nr_segments;
120780da1c02Sbouyer if (req->nr_segments > VBD_MAX_INDIRECT_SEGMENTS) {
1208*d55161bbSbouyer errstr = "too many indirect segments";
120980da1c02Sbouyer goto bad_segments;
121080da1c02Sbouyer }
121180da1c02Sbouyer in_gntref = rinn->indirect_grefs[0];
1212c8d41590Sjdolecek /* first_sect and segment grefs fetched later */
1213c8d41590Sjdolecek } else {
1214c8d41590Sjdolecek req->nr_segments = reqn->nr_segments;
121580da1c02Sbouyer if (req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST) {
1216*d55161bbSbouyer errstr = "too many segments";
121780da1c02Sbouyer goto bad_segments;
121880da1c02Sbouyer }
1219c8d41590Sjdolecek for (i = 0; i < req->nr_segments; i++)
122080da1c02Sbouyer xbd_io->xio_seg[i] = reqn->seg[i];
1221c8d41590Sjdolecek }
122299c9ae6dSjym break;
122399c9ae6dSjym case XBDIP_32:
122499c9ae6dSjym req32 = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_32,
122599c9ae6dSjym xbdi->xbdi_ring.ring_n.req_cons);
122680da1c02Sbouyer req->handle = req32->handle;
122780da1c02Sbouyer req->sector_number = req32->sector_number;
1228c8d41590Sjdolecek if (req32->operation == BLKIF_OP_INDIRECT) {
1229c8d41590Sjdolecek rin32 = (blkif_x86_32_request_indirect_t *)req32;
1230c8d41590Sjdolecek req->operation = rin32->indirect_op;
12317da93351Sbouyer req->nr_segments = (uint8_t)rin32->nr_segments;
123280da1c02Sbouyer if (req->nr_segments > VBD_MAX_INDIRECT_SEGMENTS) {
1233*d55161bbSbouyer errstr = "too many indirect segments";
123480da1c02Sbouyer goto bad_segments;
123580da1c02Sbouyer }
123680da1c02Sbouyer in_gntref = rin32->indirect_grefs[0];
1237c8d41590Sjdolecek /* first_sect and segment grefs fetched later */
1238c8d41590Sjdolecek } else {
1239c8d41590Sjdolecek req->nr_segments = req32->nr_segments;
124080da1c02Sbouyer if (req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST) {
1241*d55161bbSbouyer errstr = "too many segments";
124280da1c02Sbouyer goto bad_segments;
124380da1c02Sbouyer }
124499c9ae6dSjym for (i = 0; i < req->nr_segments; i++)
124580da1c02Sbouyer xbd_io->xio_seg[i] = req32->seg[i];
1246c8d41590Sjdolecek }
124799c9ae6dSjym break;
124899c9ae6dSjym case XBDIP_64:
124999c9ae6dSjym req64 = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_64,
125099c9ae6dSjym xbdi->xbdi_ring.ring_n.req_cons);
125180da1c02Sbouyer req->handle = req64->handle;
125280da1c02Sbouyer req->sector_number = req64->sector_number;
1253c8d41590Sjdolecek if (req64->operation == BLKIF_OP_INDIRECT) {
1254c8d41590Sjdolecek rin64 = (blkif_x86_64_request_indirect_t *)req64;
12557da93351Sbouyer req->nr_segments = (uint8_t)rin64->nr_segments;
125680da1c02Sbouyer if (req->nr_segments > VBD_MAX_INDIRECT_SEGMENTS) {
1257*d55161bbSbouyer errstr = "too many indirect segments";
125880da1c02Sbouyer goto bad_segments;
125980da1c02Sbouyer }
126080da1c02Sbouyer in_gntref = rin64->indirect_grefs[0];
1261c8d41590Sjdolecek /* first_sect and segment grefs fetched later */
1262c8d41590Sjdolecek } else {
1263c8d41590Sjdolecek req->nr_segments = req64->nr_segments;
126480da1c02Sbouyer if (req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST) {
1265*d55161bbSbouyer errstr = "too many segments";
126680da1c02Sbouyer goto bad_segments;
126780da1c02Sbouyer }
126899c9ae6dSjym for (i = 0; i < req->nr_segments; i++)
126980da1c02Sbouyer xbd_io->xio_seg[i] = req64->seg[i];
1270c8d41590Sjdolecek }
127199c9ae6dSjym break;
127299c9ae6dSjym }
127399c9ae6dSjym
1274f64904dbSjdolecek if (req->operation == BLKIF_OP_WRITE) {
1275f64904dbSjdolecek if (xbdi->xbdi_ro) {
1276f64904dbSjdolecek error = EROFS;
1277f64904dbSjdolecek goto end;
1278f64904dbSjdolecek }
1279f64904dbSjdolecek }
1280f64904dbSjdolecek
1281c8d41590Sjdolecek /* Max value checked already earlier */
128280da1c02Sbouyer if (req->nr_segments < 1) {
128380da1c02Sbouyer errstr = "invalid number of segments";
128480da1c02Sbouyer goto bad_segments;
128580da1c02Sbouyer }
1286c8d41590Sjdolecek
128780da1c02Sbouyer /* If segments are on an indirect page, copy them now */
128880da1c02Sbouyer if (in_gntref) {
128980da1c02Sbouyer gnttab_copy_t gop;
129080da1c02Sbouyer paddr_t ma;
129180da1c02Sbouyer
129280da1c02Sbouyer gop.flags = GNTCOPY_source_gref;
129380da1c02Sbouyer gop.len = req->nr_segments
129480da1c02Sbouyer * sizeof(struct blkif_request_segment);
129580da1c02Sbouyer
129680da1c02Sbouyer gop.source.u.ref = in_gntref;
129780da1c02Sbouyer gop.source.offset = 0;
129880da1c02Sbouyer gop.source.domid = xbdi->xbdi_domid;
129980da1c02Sbouyer
130080da1c02Sbouyer ma = xbd_io->xio_seg_dmamap->dm_segs[0].ds_addr;
130180da1c02Sbouyer gop.dest.offset = ma & PAGE_MASK;
130280da1c02Sbouyer gop.dest.domid = DOMID_SELF;
130380da1c02Sbouyer gop.dest.u.gmfn = ma >> PAGE_SHIFT;
130480da1c02Sbouyer
130580da1c02Sbouyer if (HYPERVISOR_grant_table_op(GNTTABOP_copy, &gop, 1) != 0) {
130680da1c02Sbouyer errstr = "GNTTABOP_copy failed";
130780da1c02Sbouyer goto bad_segments;
130880da1c02Sbouyer }
130980da1c02Sbouyer }
131080da1c02Sbouyer
131180da1c02Sbouyer xbdi_get(xbdi);
131270fd7422Sjdolecek xbdi->xbdi_cont = xbdback_co_io_gotio;
131380da1c02Sbouyer return xbd_io;
131477822551Sjym
131580da1c02Sbouyer bad_segments:
1316c8d41590Sjdolecek if (ratecheck(&xbdi->xbdi_lasterr_time, &xbdback_err_intvl)) {
131780da1c02Sbouyer printf("%s: %s\n", xbdi->xbdi_name, errstr);
1318c8d41590Sjdolecek }
1319c8d41590Sjdolecek error = EINVAL;
1320c8d41590Sjdolecek /* FALLTHROUGH */
1321c8d41590Sjdolecek
1322b96fedacSbouyer end:
1323f64904dbSjdolecek xbdback_send_reply(xbdi, req->id, req->operation,
132470fd7422Sjdolecek (error == EROFS) ? BLKIF_RSP_EOPNOTSUPP : BLKIF_RSP_ERROR);
1325b96fedacSbouyer xbdi->xbdi_cont = xbdback_co_main_incr;
1326b96fedacSbouyer return xbdi;
1327b96fedacSbouyer }
1328b96fedacSbouyer
132977822551Sjym /* Prepare an I/O buffer for a xbdback instance */
1330b96fedacSbouyer static void *
xbdback_co_io_gotio(struct xbdback_instance * xbdi,void * obj)1331b96fedacSbouyer xbdback_co_io_gotio(struct xbdback_instance *xbdi, void *obj)
1332b96fedacSbouyer {
133380da1c02Sbouyer struct xbdback_io *xbd_io = obj;
1334b96fedacSbouyer int buf_flags;
133570fd7422Sjdolecek size_t bcount;
133680da1c02Sbouyer blkif_request_t *req = &xbd_io->xio_xen_req;
133780da1c02Sbouyer uint8_t last_sect;
133880da1c02Sbouyer int error;
1339b96fedacSbouyer
134058477cb8Sjdolecek KASSERT(mutex_owned(&xbdi->xbdi_lock));
134180da1c02Sbouyer KASSERT(xbdi->xbdi_refcnt > 0);
1342c8d41590Sjdolecek
134370fd7422Sjdolecek /* Process segments */
134470fd7422Sjdolecek bcount = 0;
134570fd7422Sjdolecek for (int i = 0; i < req->nr_segments; i++) {
134680da1c02Sbouyer struct blkif_request_segment *seg = &xbd_io->xio_seg[i];
134780da1c02Sbouyer if (seg->last_sect > VBD_MAXSECT ||
134880da1c02Sbouyer seg->first_sect > VBD_MAXSECT) {
134980da1c02Sbouyer if (ratecheck(&xbdi->xbdi_lasterr_time,
135080da1c02Sbouyer &xbdback_err_intvl)) {
135180da1c02Sbouyer printf("%s: invalid segment sectors %d %d\n",
135280da1c02Sbouyer xbdi->xbdi_name,
135380da1c02Sbouyer seg->first_sect, seg->last_sect);
135480da1c02Sbouyer }
135580da1c02Sbouyer xbdi->xbdi_pendingreqs++; /* xbdback_io_error will -- */
135680da1c02Sbouyer xbdback_io_error(xbd_io, EINVAL);
135780da1c02Sbouyer /* do not retry */
135880da1c02Sbouyer xbdi->xbdi_cont = xbdback_co_main_incr;
135980da1c02Sbouyer return xbdi;
136080da1c02Sbouyer }
136180da1c02Sbouyer
136280da1c02Sbouyer if (i > 0) {
136380da1c02Sbouyer if (last_sect != VBD_MAXSECT ||
136480da1c02Sbouyer seg->first_sect != 0) {
136580da1c02Sbouyer xbd_io->xio_need_bounce = 1;
136680da1c02Sbouyer }
136780da1c02Sbouyer }
136880da1c02Sbouyer last_sect = seg->last_sect;
1369c8d41590Sjdolecek xbd_io->xio_gref[i] = seg->gref;
1370c8d41590Sjdolecek bcount += (seg->last_sect - seg->first_sect + 1)
137170fd7422Sjdolecek * VBD_BSIZE;
137270fd7422Sjdolecek }
137380da1c02Sbouyer xbd_io->xio_start_offset = xbd_io->xio_seg[0].first_sect * VBD_BSIZE;
137470fd7422Sjdolecek
1375c8d41590Sjdolecek KASSERT(bcount <= MAXPHYS);
137670fd7422Sjdolecek KASSERT(xbd_io->xio_start_offset < PAGE_SIZE);
137774676f87Sjdolecek KASSERT(bcount + xbd_io->xio_start_offset <= VBD_VA_SIZE);
1378b96fedacSbouyer
1379c8d41590Sjdolecek /* Fill-in the buf */
138080da1c02Sbouyer if (req->operation == BLKIF_OP_WRITE) {
13814a780c9aSad buf_flags = B_WRITE;
1382b96fedacSbouyer } else {
13834a780c9aSad buf_flags = B_READ;
1384b96fedacSbouyer }
1385b96fedacSbouyer
1386b96fedacSbouyer xbd_io->xio_buf.b_flags = buf_flags;
13874a780c9aSad xbd_io->xio_buf.b_cflags = 0;
13884a780c9aSad xbd_io->xio_buf.b_oflags = 0;
1389b96fedacSbouyer xbd_io->xio_buf.b_iodone = xbdback_iodone;
1390b96fedacSbouyer xbd_io->xio_buf.b_proc = NULL;
1391b96fedacSbouyer xbd_io->xio_buf.b_vp = xbdi->xbdi_vp;
1392e225b7bdSrmind xbd_io->xio_buf.b_objlock = xbdi->xbdi_vp->v_interlock;
1393b96fedacSbouyer xbd_io->xio_buf.b_dev = xbdi->xbdi_dev;
139470fd7422Sjdolecek xbd_io->xio_buf.b_blkno = req->sector_number;
139570fd7422Sjdolecek xbd_io->xio_buf.b_bcount = bcount;
139680da1c02Sbouyer if (__predict_false(xbd_io->xio_need_bounce)) {
139780da1c02Sbouyer if (__predict_false(xbdi->xbdi_bouncebuf_use)) {
139880da1c02Sbouyer KASSERT(xbdi->xbdi_pendingreqs > 1);
139980da1c02Sbouyer /* retry later */
140080da1c02Sbouyer xbdi->xbdi_cont_restart = xbdback_co_io_gotio;
140180da1c02Sbouyer xbdi->xbdi_cont_restart_obj = xbd_io;
140280da1c02Sbouyer xbdi->xbdi_cont = NULL;
140380da1c02Sbouyer return NULL;
140480da1c02Sbouyer }
140580da1c02Sbouyer xbdi->xbdi_bouncebuf_use++;
140680da1c02Sbouyer KASSERT(xbdi->xbdi_bouncebuf_use == 1);
140780da1c02Sbouyer xbd_io->xio_buf.b_data = (void *)xbdi->xbdi_bouncebuf;
140880da1c02Sbouyer }
140980da1c02Sbouyer xbdi->xbdi_pendingreqs++;
141080da1c02Sbouyer if ((error = xbdback_map_shm(xbd_io)) != 0) {
141180da1c02Sbouyer xbdback_io_error(xbd_io, error);
141280da1c02Sbouyer /* do not retry */
141380da1c02Sbouyer xbdi->xbdi_cont = xbdback_co_main_incr;
141480da1c02Sbouyer return xbdi;
141580da1c02Sbouyer }
141680da1c02Sbouyer if (__predict_true(xbd_io->xio_need_bounce == 0)) {
141780da1c02Sbouyer xbd_io->xio_buf.b_data = (void *)
141880da1c02Sbouyer (xbd_io->xio_vaddr + xbd_io->xio_start_offset);
141980da1c02Sbouyer }
142080da1c02Sbouyer
142180da1c02Sbouyer
1422b96fedacSbouyer xbd_io->xio_buf.b_private = xbd_io;
1423b96fedacSbouyer
142454f95b14Sjym xbdi->xbdi_cont = xbdback_co_do_io;
142580da1c02Sbouyer return xbd_io;
1426b96fedacSbouyer }
1427b96fedacSbouyer
1428b96fedacSbouyer static void
xbdback_io_error(struct xbdback_io * xbd_io,int error)1429b96fedacSbouyer xbdback_io_error(struct xbdback_io *xbd_io, int error)
1430b96fedacSbouyer {
143158477cb8Sjdolecek KASSERT(mutex_owned(&xbd_io->xio_xbdi->xbdi_lock));
143258477cb8Sjdolecek
143358477cb8Sjdolecek struct buf *bp = &xbd_io->xio_buf;
143458477cb8Sjdolecek
143558477cb8Sjdolecek bp->b_error = error;
143658477cb8Sjdolecek xbdback_iodone_locked(xbd_io->xio_xbdi, xbd_io, bp);
1437b96fedacSbouyer }
1438b96fedacSbouyer
143977822551Sjym /*
144054f95b14Sjym * Main xbdback I/O routine. It can either perform a flush operation or
144154f95b14Sjym * schedule a read/write operation.
144277822551Sjym */
144354f95b14Sjym static void *
xbdback_co_do_io(struct xbdback_instance * xbdi,void * obj)144454f95b14Sjym xbdback_co_do_io(struct xbdback_instance *xbdi, void *obj)
1445b96fedacSbouyer {
144678af9293Sjdolecek struct xbdback_io *xbd_io = obj;
144780da1c02Sbouyer blkif_request_t *req = &xbd_io->xio_xen_req;
14483d5ae4c8Sbouyer
144980da1c02Sbouyer KASSERT(xbdi->xbdi_refcnt > 0);
145080da1c02Sbouyer
145180da1c02Sbouyer switch (req->operation) {
145277822551Sjym case BLKIF_OP_FLUSH_DISKCACHE:
145377822551Sjym {
1454db95bc7fSbouyer int error;
1455e4821a51Sbouyer int force = 1;
1456db95bc7fSbouyer
145758477cb8Sjdolecek KASSERT(mutex_owned(&xbdi->xbdi_lock));
145858477cb8Sjdolecek mutex_exit(&xbdi->xbdi_lock);
1459e4821a51Sbouyer error = VOP_IOCTL(xbdi->xbdi_vp, DIOCCACHESYNC, &force, FWRITE,
1460db95bc7fSbouyer kauth_cred_get());
146158477cb8Sjdolecek mutex_enter(&xbdi->xbdi_lock);
1462db95bc7fSbouyer if (error) {
1463db95bc7fSbouyer aprint_error("xbdback %s: DIOCCACHESYNC returned %d\n",
1464db95bc7fSbouyer xbdi->xbdi_xbusd->xbusd_path, error);
1465db95bc7fSbouyer if (error == EOPNOTSUPP || error == ENOTTY)
1466db95bc7fSbouyer error = BLKIF_RSP_EOPNOTSUPP;
1467db95bc7fSbouyer else
1468db95bc7fSbouyer error = BLKIF_RSP_ERROR;
1469db95bc7fSbouyer } else
1470db95bc7fSbouyer error = BLKIF_RSP_OKAY;
147180da1c02Sbouyer xbdback_send_reply(xbdi, req->id, req->operation, error);
14722d80bedaSjdolecek xbdback_io_put(xbdi, xbd_io);
1473db95bc7fSbouyer xbdi_put(xbdi);
1474db95bc7fSbouyer xbdi->xbdi_cont = xbdback_co_main_incr;
147554f95b14Sjym return xbdi;
1476db95bc7fSbouyer }
147777822551Sjym case BLKIF_OP_READ:
147877822551Sjym case BLKIF_OP_WRITE:
147980da1c02Sbouyer if (__predict_false(xbd_io->xio_need_bounce) &&
148080da1c02Sbouyer req->operation == BLKIF_OP_WRITE) {
148180da1c02Sbouyer vaddr_t boffset = 0;
148280da1c02Sbouyer for (int i = 0; i < req->nr_segments; i++) {
148380da1c02Sbouyer struct blkif_request_segment *seg =
148480da1c02Sbouyer &xbd_io->xio_seg[i];
148580da1c02Sbouyer vaddr_t segoffset = seg->first_sect * VBD_BSIZE;
148680da1c02Sbouyer size_t segbcount =
148780da1c02Sbouyer (seg->last_sect - seg->first_sect + 1) *
148880da1c02Sbouyer VBD_BSIZE;
148980da1c02Sbouyer KASSERT(segoffset + segbcount <= PAGE_SIZE);
149080da1c02Sbouyer KASSERT(boffset + segbcount < MAXPHYS);
149180da1c02Sbouyer segoffset += PAGE_SIZE * i;
149280da1c02Sbouyer memcpy(
149380da1c02Sbouyer (void *)(xbdi->xbdi_bouncebuf + boffset),
149480da1c02Sbouyer (void *)(xbd_io->xio_vaddr + segoffset),
149580da1c02Sbouyer segbcount);
149680da1c02Sbouyer boffset += segbcount;
149780da1c02Sbouyer }
149880da1c02Sbouyer }
14997cfb6126Sbouyer KASSERT(mutex_owned(&xbdi->xbdi_lock));
15007cfb6126Sbouyer mutex_exit(&xbdi->xbdi_lock);
15013d5ae4c8Sbouyer if ((xbd_io->xio_buf.b_flags & B_READ) == 0) {
1502e225b7bdSrmind mutex_enter(xbd_io->xio_buf.b_vp->v_interlock);
1503b96fedacSbouyer xbd_io->xio_buf.b_vp->v_numoutput++;
1504e225b7bdSrmind mutex_exit(xbd_io->xio_buf.b_vp->v_interlock);
15053d5ae4c8Sbouyer }
150654f95b14Sjym /* will call xbdback_iodone() asynchronously when done */
150770fd7422Sjdolecek bdev_strategy(&xbd_io->xio_buf);
15087cfb6126Sbouyer mutex_enter(&xbdi->xbdi_lock);
150970fd7422Sjdolecek xbdi->xbdi_cont = xbdback_co_main_incr;
151054f95b14Sjym return xbdi;
151177822551Sjym default:
151277822551Sjym /* Should never happen */
151354f95b14Sjym panic("xbdback_co_do_io: unsupported operation %d",
151480da1c02Sbouyer req->operation);
151577822551Sjym }
1516b96fedacSbouyer }
1517b96fedacSbouyer
151854f95b14Sjym /*
151954f95b14Sjym * Called from softint(9) context when an I/O is done: for each request, send
152054f95b14Sjym * back the associated reply to the domain.
152154f95b14Sjym */
1522b96fedacSbouyer static void
xbdback_iodone(struct buf * bp)1523b96fedacSbouyer xbdback_iodone(struct buf *bp)
1524b96fedacSbouyer {
1525b96fedacSbouyer struct xbdback_io *xbd_io;
1526b96fedacSbouyer struct xbdback_instance *xbdi;
1527bdf8ebffSad
1528b96fedacSbouyer xbd_io = bp->b_private;
152958477cb8Sjdolecek KASSERT(bp == &xbd_io->xio_buf);
1530b96fedacSbouyer xbdi = xbd_io->xio_xbdi;
1531b96fedacSbouyer
153258477cb8Sjdolecek mutex_enter(&xbdi->xbdi_lock);
153358477cb8Sjdolecek xbdback_iodone_locked(xbdi, xbd_io, bp);
153458477cb8Sjdolecek mutex_exit(&xbdi->xbdi_lock);
153558477cb8Sjdolecek }
153658477cb8Sjdolecek
153758477cb8Sjdolecek /*
153858477cb8Sjdolecek * This gets reused by xbdback_io_error to report errors from other sources.
153958477cb8Sjdolecek */
154058477cb8Sjdolecek static void
xbdback_iodone_locked(struct xbdback_instance * xbdi,struct xbdback_io * xbd_io,struct buf * bp)154158477cb8Sjdolecek xbdback_iodone_locked(struct xbdback_instance *xbdi, struct xbdback_io *xbd_io,
154258477cb8Sjdolecek struct buf *bp)
154358477cb8Sjdolecek {
154458477cb8Sjdolecek int status;
154580da1c02Sbouyer blkif_request_t *req = &xbd_io->xio_xen_req;
154658477cb8Sjdolecek
1547b96fedacSbouyer XENPRINTF(("xbdback_io domain %d: iodone ptr 0x%lx\n",
1548b96fedacSbouyer xbdi->xbdi_domid, (long)xbd_io));
1549b96fedacSbouyer
155058477cb8Sjdolecek KASSERT(mutex_owned(&xbdi->xbdi_lock));
155158477cb8Sjdolecek
155270fd7422Sjdolecek KASSERT(bp->b_error != 0 || xbd_io->xio_xv != NULL);
155380da1c02Sbouyer if (__predict_false(xbd_io->xio_need_bounce)) {
155480da1c02Sbouyer KASSERT(xbd_io->xio_buf.b_data == (void *)xbdi->xbdi_bouncebuf);
155580da1c02Sbouyer
155680da1c02Sbouyer KASSERT(req->operation == BLKIF_OP_WRITE ||
155780da1c02Sbouyer req->operation == BLKIF_OP_READ);
155880da1c02Sbouyer
155980da1c02Sbouyer if (req->operation == BLKIF_OP_READ && bp->b_error == 0) {
156080da1c02Sbouyer vaddr_t boffset = 0;
156180da1c02Sbouyer for (int i = 0; i < req->nr_segments; i++) {
156280da1c02Sbouyer struct blkif_request_segment *seg =
156380da1c02Sbouyer &xbd_io->xio_seg[i];
156480da1c02Sbouyer vaddr_t segoffset = seg->first_sect * VBD_BSIZE;
156580da1c02Sbouyer size_t segbcount =
156680da1c02Sbouyer (seg->last_sect - seg->first_sect + 1) *
156780da1c02Sbouyer VBD_BSIZE;
156880da1c02Sbouyer KASSERT(segoffset + segbcount <= PAGE_SIZE);
156980da1c02Sbouyer KASSERT(boffset + segbcount < MAXPHYS);
157080da1c02Sbouyer segoffset += PAGE_SIZE * i;
157180da1c02Sbouyer memcpy(
157280da1c02Sbouyer (void *)(xbd_io->xio_vaddr + segoffset),
157380da1c02Sbouyer (void *)(xbdi->xbdi_bouncebuf + boffset),
157480da1c02Sbouyer segbcount);
157580da1c02Sbouyer boffset += segbcount;
157680da1c02Sbouyer }
157780da1c02Sbouyer }
157880da1c02Sbouyer KASSERT(xbdi->xbdi_bouncebuf_use == 1);
157980da1c02Sbouyer xbdi->xbdi_bouncebuf_use--;
158080da1c02Sbouyer }
158170fd7422Sjdolecek if (xbd_io->xio_xv != NULL)
1582b96fedacSbouyer xbdback_unmap_shm(xbd_io);
1583b96fedacSbouyer
158466fefd11Sad if (bp->b_error != 0) {
1585b96fedacSbouyer printf("xbd IO domain %d: error %d\n",
1586b96fedacSbouyer xbdi->xbdi_domid, bp->b_error);
158770fd7422Sjdolecek status = BLKIF_RSP_ERROR;
1588b96fedacSbouyer } else
158970fd7422Sjdolecek status = BLKIF_RSP_OKAY;
1590b96fedacSbouyer
159180da1c02Sbouyer xbdback_send_reply(xbdi, req->id, req->operation, status);
1592b96fedacSbouyer
1593b96fedacSbouyer xbdi_put(xbdi);
159458477cb8Sjdolecek KASSERT(xbdi->xbdi_pendingreqs > 0);
159558477cb8Sjdolecek xbdi->xbdi_pendingreqs--;
15964a780c9aSad buf_destroy(&xbd_io->xio_buf);
15972d80bedaSjdolecek xbdback_io_put(xbdi, xbd_io);
159854f95b14Sjym
159954f95b14Sjym xbdback_wakeup_thread(xbdi);
1600db95bc7fSbouyer }
160154f95b14Sjym
160254f95b14Sjym /*
160354f95b14Sjym * Wake up the per xbdback instance thread.
160454f95b14Sjym */
160554f95b14Sjym static void
xbdback_wakeup_thread(struct xbdback_instance * xbdi)160654f95b14Sjym xbdback_wakeup_thread(struct xbdback_instance *xbdi)
160754f95b14Sjym {
160858477cb8Sjdolecek KASSERT(mutex_owned(&xbdi->xbdi_lock));
160954f95b14Sjym
161054f95b14Sjym /* only set RUN state when we are WAITING for work */
161154f95b14Sjym if (xbdi->xbdi_status == WAITING)
161254f95b14Sjym xbdi->xbdi_status = RUN;
161358477cb8Sjdolecek cv_signal(&xbdi->xbdi_cv);
1614b96fedacSbouyer }
1615b96fedacSbouyer
1616b96fedacSbouyer /*
1617b96fedacSbouyer * called once a request has completed. Place the reply in the ring and
161854f95b14Sjym * notify the guest OS.
1619b96fedacSbouyer */
1620b96fedacSbouyer static void
xbdback_send_reply(struct xbdback_instance * xbdi,uint64_t id,int op,int status)1621b96fedacSbouyer xbdback_send_reply(struct xbdback_instance *xbdi, uint64_t id,
1622b96fedacSbouyer int op, int status)
1623b96fedacSbouyer {
1624d3cd2576Sbouyer blkif_response_t *resp_n;
1625d3cd2576Sbouyer blkif_x86_32_response_t *resp32;
1626d3cd2576Sbouyer blkif_x86_64_response_t *resp64;
1627b96fedacSbouyer int notify;
1628b96fedacSbouyer
162958477cb8Sjdolecek KASSERT(mutex_owned(&xbdi->xbdi_lock));
163058477cb8Sjdolecek
163154f95b14Sjym /*
163254f95b14Sjym * The ring can be accessed by the xbdback thread, xbdback_iodone()
163354f95b14Sjym * handler, or any handler that triggered the shm callback. So
163454f95b14Sjym * protect ring access via the xbdi_lock mutex.
163554f95b14Sjym */
1636d3cd2576Sbouyer switch (xbdi->xbdi_proto) {
1637d3cd2576Sbouyer case XBDIP_NATIVE:
1638d3cd2576Sbouyer resp_n = RING_GET_RESPONSE(&xbdi->xbdi_ring.ring_n,
1639d3cd2576Sbouyer xbdi->xbdi_ring.ring_n.rsp_prod_pvt);
1640d3cd2576Sbouyer resp_n->id = id;
1641d3cd2576Sbouyer resp_n->operation = op;
1642d3cd2576Sbouyer resp_n->status = status;
1643e6190ce3Sbouyer break;
1644d3cd2576Sbouyer case XBDIP_32:
1645d3cd2576Sbouyer resp32 = RING_GET_RESPONSE(&xbdi->xbdi_ring.ring_32,
1646d3cd2576Sbouyer xbdi->xbdi_ring.ring_n.rsp_prod_pvt);
1647d3cd2576Sbouyer resp32->id = id;
1648d3cd2576Sbouyer resp32->operation = op;
1649d3cd2576Sbouyer resp32->status = status;
1650e6190ce3Sbouyer break;
1651d3cd2576Sbouyer case XBDIP_64:
1652d3cd2576Sbouyer resp64 = RING_GET_RESPONSE(&xbdi->xbdi_ring.ring_64,
1653d3cd2576Sbouyer xbdi->xbdi_ring.ring_n.rsp_prod_pvt);
1654d3cd2576Sbouyer resp64->id = id;
1655d3cd2576Sbouyer resp64->operation = op;
1656d3cd2576Sbouyer resp64->status = status;
1657e6190ce3Sbouyer break;
1658d3cd2576Sbouyer }
1659d3cd2576Sbouyer xbdi->xbdi_ring.ring_n.rsp_prod_pvt++;
1660d3cd2576Sbouyer RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xbdi->xbdi_ring.ring_n, notify);
166154f95b14Sjym
1662b96fedacSbouyer if (notify) {
1663b96fedacSbouyer XENPRINTF(("xbdback_send_reply notify %d\n", xbdi->xbdi_domid));
1664b96fedacSbouyer hypervisor_notify_via_evtchn(xbdi->xbdi_evtchn);
1665b96fedacSbouyer }
1666b96fedacSbouyer }
1667b96fedacSbouyer
1668b96fedacSbouyer /*
166977822551Sjym * Map multiple entries of an I/O request into backend's VA space.
167077822551Sjym * The xbd_io->xio_gref array has to be filled out by the caller.
1671b96fedacSbouyer */
167280da1c02Sbouyer static int
xbdback_map_shm(struct xbdback_io * xbd_io)1673b96fedacSbouyer xbdback_map_shm(struct xbdback_io *xbd_io)
1674b96fedacSbouyer {
16758d1b8859Sjdolecek struct xbdback_instance *xbdi = xbd_io->xio_xbdi;
167680da1c02Sbouyer blkif_request_t *req = &xbd_io->xio_xen_req;
167758477cb8Sjdolecek int error;
1678b96fedacSbouyer
1679b96fedacSbouyer #ifdef XENDEBUG_VBD
1680b96fedacSbouyer int i;
1681b96fedacSbouyer printf("xbdback_map_shm map grant ");
168280da1c02Sbouyer for (i = 0; i < req->nr_segments; i++) {
1683b96fedacSbouyer printf("%u ", (u_int)xbd_io->xio_gref[i]);
1684b96fedacSbouyer }
1685b96fedacSbouyer #endif
1686b96fedacSbouyer
168758477cb8Sjdolecek KASSERT(mutex_owned(&xbdi->xbdi_lock));
168880da1c02Sbouyer KASSERT(xbd_io->xio_xv == NULL);
168958477cb8Sjdolecek
16908d1b8859Sjdolecek xbd_io->xio_xv = SLIST_FIRST(&xbdi->xbdi_va_free);
16918d1b8859Sjdolecek KASSERT(xbd_io->xio_xv != NULL);
16928d1b8859Sjdolecek SLIST_REMOVE_HEAD(&xbdi->xbdi_va_free, xv_next);
16938d1b8859Sjdolecek xbd_io->xio_vaddr = xbd_io->xio_xv->xv_vaddr;
16948d1b8859Sjdolecek
169580da1c02Sbouyer error = xen_shm_map(req->nr_segments, xbdi->xbdi_domid,
16968d1b8859Sjdolecek xbd_io->xio_gref, xbd_io->xio_vaddr, xbd_io->xio_gh,
169780da1c02Sbouyer (req->operation == BLKIF_OP_WRITE) ? XSHM_RO : 0);
1698b96fedacSbouyer
1699b96fedacSbouyer switch(error) {
1700b96fedacSbouyer case 0:
1701b96fedacSbouyer #ifdef XENDEBUG_VBD
1702b96fedacSbouyer printf("handle");
170380da1c02Sbouyer for (i = 0; i < req->nr_segments; i++) {
1704b96fedacSbouyer printf(" %u ", (u_int)xbd_io->xio_gh[i]);
1705b96fedacSbouyer }
1706b96fedacSbouyer printf("\n");
1707b96fedacSbouyer #endif
170880da1c02Sbouyer return 0;
1709b96fedacSbouyer default:
1710c144ae48Sjdolecek /* reset xio_xv so error handling won't try to unmap it */
17118d1b8859Sjdolecek SLIST_INSERT_HEAD(&xbdi->xbdi_va_free, xbd_io->xio_xv, xv_next);
17128d1b8859Sjdolecek xbd_io->xio_xv = NULL;
171380da1c02Sbouyer return error;
1714b96fedacSbouyer }
1715b96fedacSbouyer }
1716b96fedacSbouyer
1717b96fedacSbouyer /* unmap a request from our virtual address space (request is done) */
1718b96fedacSbouyer static void
xbdback_unmap_shm(struct xbdback_io * xbd_io)1719b96fedacSbouyer xbdback_unmap_shm(struct xbdback_io *xbd_io)
1720b96fedacSbouyer {
17218d1b8859Sjdolecek struct xbdback_instance *xbdi = xbd_io->xio_xbdi;
172280da1c02Sbouyer blkif_request_t *req = &xbd_io->xio_xen_req;
17238d1b8859Sjdolecek
1724b96fedacSbouyer #ifdef XENDEBUG_VBD
1725b96fedacSbouyer int i;
1726b96fedacSbouyer printf("xbdback_unmap_shm handle ");
172780da1c02Sbouyer for (i = 0; i < req->nr_segments; i++) {
1728b96fedacSbouyer printf("%u ", (u_int)xbd_io->xio_gh[i]);
1729b96fedacSbouyer }
1730b96fedacSbouyer printf("\n");
1731b96fedacSbouyer #endif
1732b96fedacSbouyer
173370fd7422Sjdolecek KASSERT(xbd_io->xio_xv != NULL);
173480da1c02Sbouyer xen_shm_unmap(xbd_io->xio_vaddr, req->nr_segments,
1735b96fedacSbouyer xbd_io->xio_gh);
17368d1b8859Sjdolecek SLIST_INSERT_HEAD(&xbdi->xbdi_va_free, xbd_io->xio_xv, xv_next);
17378d1b8859Sjdolecek xbd_io->xio_xv = NULL;
1738b96fedacSbouyer xbd_io->xio_vaddr = -1;
1739b96fedacSbouyer }
1740b96fedacSbouyer
174154f95b14Sjym /* Obtain memory from a pool */
17422d80bedaSjdolecek static struct xbdback_io *
xbdback_io_get(struct xbdback_instance * xbdi)17432d80bedaSjdolecek xbdback_io_get(struct xbdback_instance *xbdi)
1744b96fedacSbouyer {
17452d80bedaSjdolecek struct xbdback_io *xbd_io = SLIST_FIRST(&xbdi->xbdi_io_free);
17462d80bedaSjdolecek SLIST_REMOVE_HEAD(&xbdi->xbdi_io_free, xio_next);
17472d80bedaSjdolecek return xbd_io;
1748b96fedacSbouyer }
1749b96fedacSbouyer
175054f95b14Sjym /* Restore memory to a pool */
175154f95b14Sjym static void
xbdback_io_put(struct xbdback_instance * xbdi,struct xbdback_io * xbd_io)17522d80bedaSjdolecek xbdback_io_put(struct xbdback_instance *xbdi, struct xbdback_io *xbd_io)
1753b96fedacSbouyer {
175480da1c02Sbouyer KASSERT(xbd_io->xio_xv == NULL);
17552d80bedaSjdolecek KASSERT(xbd_io != NULL);
17562d80bedaSjdolecek SLIST_INSERT_HEAD(&xbdi->xbdi_io_free, xbd_io, xio_next);
1757b96fedacSbouyer }
1758b96fedacSbouyer
175977822551Sjym /*
176077822551Sjym * Trampoline routine. Calls continuations in a loop and only exits when
176177822551Sjym * either the returned object or the next callback is NULL.
176277822551Sjym */
1763b96fedacSbouyer static void
xbdback_trampoline(struct xbdback_instance * xbdi,void * obj)1764b96fedacSbouyer xbdback_trampoline(struct xbdback_instance *xbdi, void *obj)
1765b96fedacSbouyer {
1766b96fedacSbouyer xbdback_cont_t cont;
1767b96fedacSbouyer
1768b96fedacSbouyer while(obj != NULL && xbdi->xbdi_cont != NULL) {
176980da1c02Sbouyer KASSERT(xbdi->xbdi_cont_restart == NULL);
177080da1c02Sbouyer KASSERT(xbdi->xbdi_cont_restart_obj == NULL);
1771b96fedacSbouyer cont = xbdi->xbdi_cont;
1772b96fedacSbouyer #ifdef DIAGNOSTIC
1773b96fedacSbouyer xbdi->xbdi_cont = (xbdback_cont_t)0xDEADBEEF;
1774b96fedacSbouyer #endif
1775b96fedacSbouyer obj = (*cont)(xbdi, obj);
1776b96fedacSbouyer #ifdef DIAGNOSTIC
1777b96fedacSbouyer if (xbdi->xbdi_cont == (xbdback_cont_t)0xDEADBEEF) {
1778b96fedacSbouyer printf("xbdback_trampoline: 0x%lx didn't set "
177977822551Sjym "xbdi->xbdi_cont!\n", (long)cont);
1780b96fedacSbouyer panic("xbdback_trampoline: bad continuation");
1781b96fedacSbouyer }
178280da1c02Sbouyer if (xbdi->xbdi_cont_restart != NULL ||
178380da1c02Sbouyer xbdi->xbdi_cont_restart_obj != NULL) {
178480da1c02Sbouyer KASSERT(xbdi->xbdi_cont_restart != NULL);
178580da1c02Sbouyer KASSERT(xbdi->xbdi_cont_restart_obj != NULL);
178680da1c02Sbouyer KASSERT(xbdi->xbdi_cont == NULL);
178780da1c02Sbouyer KASSERT(obj == NULL);
178880da1c02Sbouyer }
1789b96fedacSbouyer #endif
1790b96fedacSbouyer }
1791b96fedacSbouyer }
1792