1 /* $NetBSD: xbdback_xenbus.c,v 1.107 2024/06/20 15:17:27 bouyer Exp $ */
2
3 /*
4 * Copyright (c) 2006,2024 Manuel Bouyer.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *
26 */
27
28 #include <sys/cdefs.h>
29 __KERNEL_RCSID(0, "$NetBSD: xbdback_xenbus.c,v 1.107 2024/06/20 15:17:27 bouyer Exp $");
30
31 #include <sys/buf.h>
32 #include <sys/condvar.h>
33 #include <sys/conf.h>
34 #include <sys/disk.h>
35 #include <sys/device.h>
36 #include <sys/fcntl.h>
37 #include <sys/kauth.h>
38 #include <sys/kernel.h>
39 #include <sys/kmem.h>
40 #include <sys/kthread.h>
41 #include <sys/mutex.h>
42 #include <sys/param.h>
43 #include <sys/queue.h>
44 #include <sys/systm.h>
45 #include <sys/time.h>
46 #include <sys/types.h>
47 #include <sys/vnode.h>
48
49 #include <xen/intr.h>
50 #include <xen/hypervisor.h>
51 #include <xen/xen.h>
52 #include <xen/xen_shm.h>
53 #include <xen/evtchn.h>
54 #include <xen/xenbus.h>
55 #include <xen/xenring.h>
56 #include <xen/include/public/io/protocols.h>
57
58 /* #define XENDEBUG_VBD */
59 #ifdef XENDEBUG_VBD
60 #define XENPRINTF(x) printf x
61 #else
62 #define XENPRINTF(x)
63 #endif
64
65 #define BLKIF_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE)
66
67 /*
68 * Backend block device driver for Xen
69 */
70
71 /* Values are expressed in 512-byte sectors */
72 #define VBD_BSIZE 512
73 #define VBD_MAXSECT ((PAGE_SIZE / VBD_BSIZE) - 1)
74
75 #define VBD_VA_SIZE MAXPHYS
76 #define VBD_MAX_INDIRECT_SEGMENTS (VBD_VA_SIZE >> PAGE_SHIFT)
77
78 CTASSERT(XENSHM_MAX_PAGES_PER_REQUEST >= VBD_MAX_INDIRECT_SEGMENTS);
79
80 struct xbdback_instance;
81
82 /*
83 * status of a xbdback instance:
84 * WAITING: xbdback instance is connected, waiting for requests
85 * RUN: xbdi thread must be woken up, I/Os have to be processed
86 * DISCONNECTING: the instance is closing, no more I/Os can be scheduled
87 * DISCONNECTED: no I/Os, no ring, the thread should terminate.
88 */
89 typedef enum {WAITING, RUN, DISCONNECTING, DISCONNECTED} xbdback_state_t;
90
91 /*
92 * Each xbdback instance is managed by a single thread that handles all
93 * the I/O processing. As there are a variety of conditions that can block,
94 * everything will be done in a sort of continuation-passing style.
95 *
96 * When the execution has to block to delay processing, for example to
97 * allow system to recover because of memory shortage (via shared memory
98 * callback), the return value of a continuation can be set to NULL. In that
99 * case, the thread will go back to sleeping and wait for the proper
100 * condition before it starts processing requests again from where it left.
101 * Continuation state is "stored" in the xbdback instance (xbdi_cont),
102 * and should only be manipulated by the instance thread.
103 * If a continuation has to be restarted from a specific point,
104 * the callback and argument can be stored in xbdi_cont_restart and
105 * xbdi_cont_restart_obj
106 *
107 *
108 * As xbdback(4) has to handle different sort of asynchronous events (Xen
109 * event channels, biointr() soft interrupts, xenbus commands), the xbdi_lock
110 * mutex is used to protect specific elements of the xbdback instance from
111 * concurrent access: thread status and ring access (when pushing responses).
112 *
113 * Here's how the call graph is supposed to be for a single I/O:
114 *
115 * xbdback_co_main()
116 * | --> xbdback_co_cache_flush()
117 * | | |
118 * | | -> xbdback_co_do_io() or NULL
119 * xbdback_co_main_loop()-|
120 * | |-> xbdback_co_main_done2() or NULL
121 * | |
122 * | --> xbdback_co_main_incr() -> xbdback_co_main_loop()
123 * |
124 * xbdback_co_io() -> xbdback_co_main_incr() -> xbdback_co_main_loop()
125 * |
126 * xbdback_co_io_gotio() -> xbdback_co_main_incr() -> xbdback_co_main_loop()
127 * |
128 * xbdback_co_do_io()
129 * |
130 * xbdback_co_main_incr() -> xbdback_co_main_loop()
131 */
132 typedef void *(* xbdback_cont_t)(struct xbdback_instance *, void *);
133
134 enum xbdi_proto {
135 XBDIP_NATIVE,
136 XBDIP_32,
137 XBDIP_64
138 };
139
140 struct xbdback_va {
141 SLIST_ENTRY(xbdback_va) xv_next;
142 vaddr_t xv_vaddr;
143 };
144
145 /*
146 * For each I/O operation associated with one of those requests, an
147 * xbdback_io is allocated from a pool. It may correspond to multiple
148 * Xen disk requests, or parts of them, if several arrive at once that
149 * can be coalesced.
150 */
151 struct xbdback_io {
152 SLIST_ENTRY(xbdback_io) xio_next;
153 /* The instance pointer is duplicated for convenience. */
154 struct xbdback_instance *xio_xbdi; /* our xbd instance */
155 /* _request state: track requests fetched from ring */
156 blkif_request_t xio_xen_req;
157 /* array of segments[VBD_MAX_INDIRECT_SEGMENTS] allocated separately */
158 struct blkif_request_segment *xio_seg;
159 bus_dmamap_t xio_seg_dmamap;
160 /* internal states */
161 union {
162 struct {
163 struct buf xio_buf; /* our I/O */
164 /* the virtual address to map the request at */
165 vaddr_t xio_vaddr;
166 struct xbdback_va *xio_xv;
167 vaddr_t xio_start_offset; /* I/O start offset */
168 /* grants to map */
169 grant_ref_t xio_gref[VBD_MAX_INDIRECT_SEGMENTS];
170 /* grants release */
171 grant_handle_t xio_gh[VBD_MAX_INDIRECT_SEGMENTS];
172 bool xio_need_bounce; /* request is not contiguous */
173 } xio_rw;
174 } u;
175 };
176 #define xio_buf u.xio_rw.xio_buf
177 #define xio_vaddr u.xio_rw.xio_vaddr
178 #define xio_start_offset u.xio_rw.xio_start_offset
179 #define xio_xv u.xio_rw.xio_xv
180 #define xio_gref u.xio_rw.xio_gref
181 #define xio_gh u.xio_rw.xio_gh
182 #define xio_need_bounce u.xio_rw.xio_need_bounce
183
184 /* we keep the xbdback instances in a linked list */
185 struct xbdback_instance {
186 SLIST_ENTRY(xbdback_instance) next;
187 struct xenbus_device *xbdi_xbusd; /* our xenstore entry */
188 struct xenbus_watch xbdi_watch; /* to watch our store */
189 domid_t xbdi_domid; /* attached to this domain */
190 uint32_t xbdi_handle; /* domain-specific handle */
191 char xbdi_name[16]; /* name of this instance */
192 /* mutex that protects concurrent access to the xbdback instance */
193 kmutex_t xbdi_lock;
194 kcondvar_t xbdi_cv; /* wait channel for thread work */
195 xbdback_state_t xbdi_status; /* thread's status */
196 /* context and KVA for mapping transfers */
197 struct xbdback_io xbdi_io[BLKIF_RING_SIZE];
198 SLIST_HEAD(, xbdback_io) xbdi_io_free;
199 struct xbdback_va xbdi_va[BLKIF_RING_SIZE];
200 SLIST_HEAD(, xbdback_va) xbdi_va_free;
201 /* segments structure allocated in page-aligned chunks */
202 struct blkif_request_segment *xbdi_segs;
203 /* bounce buffer in case a transfer is not contiguous */
204 vaddr_t xbdi_bouncebuf;
205 int xbdi_bouncebuf_use; /* is bounce buffer in use? */
206 /* backing device parameters */
207 dev_t xbdi_dev;
208 const struct bdevsw *xbdi_bdevsw; /* pointer to the device's bdevsw */
209 struct vnode *xbdi_vp;
210 uint64_t xbdi_size;
211 bool xbdi_ro; /* is device read-only ? */
212 /* parameters for the communication */
213 unsigned int xbdi_evtchn;
214 struct intrhand *xbdi_ih;
215 /* private parameters for communication */
216 blkif_back_ring_proto_t xbdi_ring;
217 enum xbdi_proto xbdi_proto;
218 grant_handle_t xbdi_ring_handle; /* to unmap the ring */
219 vaddr_t xbdi_ring_va; /* to unmap the ring */
220 /* disconnection must be postponed until all I/O is done */
221 int xbdi_refcnt;
222 /*
223 * State for I/O processing/coalescing follows; this has to
224 * live here instead of on the stack because of the
225 * continuation-ness (see above).
226 */
227 RING_IDX xbdi_req_prod; /* limit on request indices */
228 xbdback_cont_t xbdi_cont;
229 /* if not NULL, will restart here after thread wakes up */
230 xbdback_cont_t xbdi_cont_restart;
231 void *xbdi_cont_restart_obj;
232 /* other state */
233 uint xbdi_pendingreqs; /* number of I/O in fly */
234 struct timeval xbdi_lasterr_time; /* error time tracking */
235 };
236 /* Manipulation of the above reference count. */
237 #define xbdi_get(xbdip) \
238 do { \
239 KASSERT(mutex_owned(&xbdip->xbdi_lock)); \
240 (xbdip)->xbdi_refcnt++; \
241 } while (0)
242
243 #define xbdi_put(xbdip) \
244 do { \
245 KASSERT(mutex_owned(&xbdip->xbdi_lock)); \
246 if (--((xbdip)->xbdi_refcnt) == 0) \
247 xbdback_finish_disconnect(xbdip); \
248 } while (0)
249
250 static SLIST_HEAD(, xbdback_instance) xbdback_instances;
251 static kmutex_t xbdback_lock;
252
253 /* Interval between reports of I/O errors from frontend */
254 static const struct timeval xbdback_err_intvl = { 1, 0 };
255
256 void xbdbackattach(int);
257 static int xbdback_xenbus_create(struct xenbus_device *);
258 static int xbdback_xenbus_destroy(void *);
259 static void xbdback_frontend_changed(void *, XenbusState);
260 static void xbdback_backend_changed(struct xenbus_watch *,
261 const char **, unsigned int);
262 static int xbdback_evthandler(void *);
263
264 static int xbdback_connect(struct xbdback_instance *);
265 static void xbdback_disconnect(struct xbdback_instance *);
266 static void xbdback_finish_disconnect(struct xbdback_instance *);
267
268 static bool xbdif_lookup(domid_t, uint32_t);
269
270 static void *xbdback_co_main(struct xbdback_instance *, void *);
271 static void *xbdback_co_main_loop(struct xbdback_instance *, void *);
272 static void *xbdback_co_main_incr(struct xbdback_instance *, void *);
273 static void *xbdback_co_main_done2(struct xbdback_instance *, void *);
274
275 static void *xbdback_co_cache_flush(struct xbdback_instance *, void *);
276
277 static void *xbdback_co_io(struct xbdback_instance *, void *);
278 static void *xbdback_co_io_gotio(struct xbdback_instance *, void *);
279
280 static void *xbdback_co_do_io(struct xbdback_instance *, void *);
281
282 static void xbdback_io_error(struct xbdback_io *, int);
283 static void xbdback_iodone(struct buf *);
284 static void xbdback_iodone_locked(struct xbdback_instance *,
285 struct xbdback_io *, struct buf *);
286 static void xbdback_send_reply(struct xbdback_instance *, uint64_t , int , int);
287
288 static int xbdback_map_shm(struct xbdback_io *);
289 static void xbdback_unmap_shm(struct xbdback_io *);
290
291 static struct xbdback_io *xbdback_io_get(struct xbdback_instance *);
292 static void xbdback_io_put(struct xbdback_instance *, struct xbdback_io *);
293 static void xbdback_thread(void *);
294 static void xbdback_wakeup_thread(struct xbdback_instance *);
295 static void xbdback_trampoline(struct xbdback_instance *, void *);
296
297 static struct xenbus_backend_driver xbd_backend_driver = {
298 .xbakd_create = xbdback_xenbus_create,
299 .xbakd_type = "vbd"
300 };
301
302 void
xbdbackattach(int n)303 xbdbackattach(int n)
304 {
305 XENPRINTF(("xbdbackattach\n"));
306
307 /*
308 * initialize the backend driver, register the control message handler
309 * and send driver up message.
310 */
311 SLIST_INIT(&xbdback_instances);
312 mutex_init(&xbdback_lock, MUTEX_DEFAULT, IPL_NONE);
313
314 xenbus_backend_register(&xbd_backend_driver);
315 }
316
317 static int
xbdback_xenbus_create(struct xenbus_device * xbusd)318 xbdback_xenbus_create(struct xenbus_device *xbusd)
319 {
320 struct xbdback_instance *xbdi;
321 long domid, handle;
322 int error, i;
323 int segalloc = 0;
324 char *ep;
325
326 if ((error = xenbus_read_ul(NULL, xbusd->xbusd_path,
327 "frontend-id", &domid, 10)) != 0) {
328 aprint_error("xbdback: can't read %s/frontend-id: %d\n",
329 xbusd->xbusd_path, error);
330 return error;
331 }
332
333 /*
334 * get handle: this is the last component of the path; which is
335 * a decimal number. $path/dev contains the device name, which is not
336 * appropriate.
337 */
338 for (i = strlen(xbusd->xbusd_path); i > 0; i--) {
339 if (xbusd->xbusd_path[i] == '/')
340 break;
341 }
342 if (i == 0) {
343 aprint_error("xbdback: can't parse %s\n",
344 xbusd->xbusd_path);
345 return EFTYPE;
346 }
347 handle = strtoul(&xbusd->xbusd_path[i+1], &ep, 10);
348 if (*ep != '\0') {
349 aprint_error("xbdback: can't parse %s\n",
350 xbusd->xbusd_path);
351 return EFTYPE;
352 }
353
354 xbdi = kmem_zalloc(sizeof(*xbdi), KM_SLEEP);
355
356 xbdi->xbdi_domid = domid;
357 xbdi->xbdi_handle = handle;
358 snprintf(xbdi->xbdi_name, sizeof(xbdi->xbdi_name), "xbdb%di%d",
359 xbdi->xbdi_domid, xbdi->xbdi_handle);
360
361 mutex_enter(&xbdback_lock);
362 if (xbdif_lookup(domid, handle)) {
363 mutex_exit(&xbdback_lock);
364 kmem_free(xbdi, sizeof(*xbdi));
365 return EEXIST;
366 }
367 SLIST_INSERT_HEAD(&xbdback_instances, xbdi, next);
368 mutex_exit(&xbdback_lock);
369
370 /* initialize status and reference counter */
371 xbdi->xbdi_status = DISCONNECTED;
372
373 mutex_init(&xbdi->xbdi_lock, MUTEX_DEFAULT, IPL_BIO);
374 cv_init(&xbdi->xbdi_cv, xbdi->xbdi_name);
375
376 mutex_enter(&xbdi->xbdi_lock);
377 xbdi_get(xbdi);
378 mutex_exit(&xbdi->xbdi_lock);
379
380 xbusd->xbusd_u.b.b_cookie = xbdi;
381 xbusd->xbusd_u.b.b_detach = xbdback_xenbus_destroy;
382 xbusd->xbusd_otherend_changed = xbdback_frontend_changed;
383 xbdi->xbdi_xbusd = xbusd;
384
385 SLIST_INIT(&xbdi->xbdi_va_free);
386 for (i = 0; i < BLKIF_RING_SIZE; i++) {
387 xbdi->xbdi_va[i].xv_vaddr = uvm_km_alloc(kernel_map,
388 VBD_VA_SIZE, 0, UVM_KMF_VAONLY|UVM_KMF_WAITVA);
389 SLIST_INSERT_HEAD(&xbdi->xbdi_va_free, &xbdi->xbdi_va[i],
390 xv_next);
391 }
392
393 /*
394 * allocate page-aligned memory for segments, so that for each
395 * xbdback_io its segments are in a single page.
396 * sizeof(struct blkif_request_segment) * VBD_MAX_INDIRECT_SEGMENTS
397 * is 128 so this helps us avoiding a page boundary withing a
398 * block of VBD_MAX_INDIRECT_SEGMENTS segments.
399 */
400 CTASSERT(sizeof(struct blkif_request_segment) * VBD_MAX_INDIRECT_SEGMENTS == 128);
401 xbdi->xbdi_segs = (void *)uvm_km_alloc(kernel_map, round_page(
402 sizeof(struct blkif_request_segment) * VBD_MAX_INDIRECT_SEGMENTS * BLKIF_RING_SIZE),
403 PAGE_SIZE, UVM_KMF_WIRED | UVM_KMF_WAITVA);
404
405 SLIST_INIT(&xbdi->xbdi_io_free);
406 for (i = 0; i < BLKIF_RING_SIZE; i++) {
407 struct xbdback_io *xbd_io = &xbdi->xbdi_io[i];
408 xbd_io->xio_seg =
409 &xbdi->xbdi_segs[i * VBD_MAX_INDIRECT_SEGMENTS];
410 error = bus_dmamap_create(xbdi->xbdi_xbusd->xbusd_dmat,
411 PAGE_SIZE, 1, PAGE_SIZE, PAGE_SIZE,
412 BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW,
413 &xbd_io->xio_seg_dmamap);
414 if (error != 0) {
415 printf("%s: can't create dma map for indirect segments %d\n",
416 xbdi->xbdi_name, i);
417 goto fail;
418 }
419 error = bus_dmamap_load(xbdi->xbdi_xbusd->xbusd_dmat,
420 xbd_io->xio_seg_dmamap, xbd_io->xio_seg,
421 sizeof(struct blkif_request_segment) * VBD_MAX_INDIRECT_SEGMENTS,
422 NULL, BUS_DMA_WAITOK);
423 if (error != 0) {
424 printf("%s: can't load dma map for indirect segments %d @%p (%d, %zu)\n",
425 xbdi->xbdi_name, i, xbd_io->xio_seg, error, sizeof(xbd_io->xio_seg));
426 bus_dmamap_destroy(xbdi->xbdi_xbusd->xbusd_dmat,
427 xbd_io->xio_seg_dmamap);
428 goto fail;
429 }
430 KASSERT(xbd_io->xio_seg_dmamap->dm_nsegs == 1);
431 segalloc = i;
432 SLIST_INSERT_HEAD(&xbdi->xbdi_io_free, xbd_io, xio_next);
433 }
434
435 error = xenbus_watch_path2(xbusd, xbusd->xbusd_path, "physical-device",
436 &xbdi->xbdi_watch, xbdback_backend_changed);
437 if (error) {
438 printf("failed to watch on %s/physical-device: %d\n",
439 xbusd->xbusd_path, error);
440 goto fail;
441 }
442 xbdi->xbdi_watch.xbw_dev = xbusd;
443 error = xenbus_switch_state(xbusd, NULL, XenbusStateInitWait);
444 if (error) {
445 printf("failed to switch state on %s: %d\n",
446 xbusd->xbusd_path, error);
447 goto fail2;
448 }
449
450 xbdi->xbdi_bouncebuf = uvm_km_alloc(kernel_map, MAXPHYS, PAGE_SIZE,
451 UVM_KMF_WIRED | UVM_KMF_WAITVA);
452 return 0;
453 fail2:
454 unregister_xenbus_watch(&xbdi->xbdi_watch);
455 fail:
456 for (i = 0; i < segalloc; i++) {
457 struct xbdback_io *xbd_io = &xbdi->xbdi_io[i];
458 bus_dmamap_unload(xbdi->xbdi_xbusd->xbusd_dmat,
459 xbd_io->xio_seg_dmamap);
460 bus_dmamap_destroy(xbdi->xbdi_xbusd->xbusd_dmat,
461 xbd_io->xio_seg_dmamap);
462 }
463 mutex_enter(&xbdback_lock);
464 SLIST_REMOVE(&xbdback_instances, xbdi, xbdback_instance, next);
465 mutex_exit(&xbdback_lock);
466 kmem_free(xbdi, sizeof(*xbdi));
467 return error;
468 }
469
470 static int
xbdback_xenbus_destroy(void * arg)471 xbdback_xenbus_destroy(void *arg)
472 {
473 struct xbdback_instance *xbdi = arg;
474
475 XENPRINTF(("xbdback_xenbus_destroy state %d\n", xbdi->xbdi_status));
476
477 xbdback_disconnect(xbdi);
478
479 /* unregister watch */
480 if (xbdi->xbdi_watch.node)
481 xenbus_unwatch_path(&xbdi->xbdi_watch);
482 /* unmap ring */
483 if (xbdi->xbdi_ring_handle) {
484 xen_shm_unmap(xbdi->xbdi_ring_va, 1, &xbdi->xbdi_ring_handle);
485 }
486
487 if (xbdi->xbdi_ring_va != 0) {
488 uvm_km_free(kernel_map, xbdi->xbdi_ring_va,
489 PAGE_SIZE, UVM_KMF_VAONLY);
490 }
491
492 /* close device */
493 if (xbdi->xbdi_size) {
494 const char *name;
495 struct dkwedge_info wi;
496 if (getdiskinfo(xbdi->xbdi_vp, &wi) == 0)
497 name = wi.dkw_devname;
498 else
499 name = "*unknown*";
500 printf("xbd backend: detach device %s for domain %d\n",
501 name, xbdi->xbdi_domid);
502 vn_close(xbdi->xbdi_vp, FREAD, NOCRED);
503 }
504 mutex_enter(&xbdback_lock);
505 SLIST_REMOVE(&xbdback_instances, xbdi, xbdback_instance, next);
506 mutex_exit(&xbdback_lock);
507
508 for (int i = 0; i < BLKIF_RING_SIZE; i++) {
509 struct xbdback_io *xbd_io = &xbdi->xbdi_io[i];
510 bus_dmamap_unload(xbdi->xbdi_xbusd->xbusd_dmat,
511 xbd_io->xio_seg_dmamap);
512 bus_dmamap_destroy(xbdi->xbdi_xbusd->xbusd_dmat,
513 xbd_io->xio_seg_dmamap);
514 if (xbdi->xbdi_va[i].xv_vaddr != 0) {
515 uvm_km_free(kernel_map, xbdi->xbdi_va[i].xv_vaddr,
516 VBD_VA_SIZE, UVM_KMF_VAONLY);
517 xbdi->xbdi_va[i].xv_vaddr = 0;
518 }
519 }
520
521
522 mutex_destroy(&xbdi->xbdi_lock);
523 cv_destroy(&xbdi->xbdi_cv);
524 kmem_free(xbdi, sizeof(*xbdi));
525 return 0;
526 }
527
528 static int
xbdback_connect(struct xbdback_instance * xbdi)529 xbdback_connect(struct xbdback_instance *xbdi)
530 {
531 int err;
532 evtchn_op_t evop;
533 grant_ref_t gring_ref;
534 u_long ring_ref, revtchn;
535 char xsproto[32];
536 const char *proto;
537 struct xenbus_device *xbusd = xbdi->xbdi_xbusd;
538
539 XENPRINTF(("xbdback %s: connect\n", xbusd->xbusd_path));
540 /* read comunication informations */
541 err = xenbus_read_ul(NULL, xbusd->xbusd_otherend,
542 "ring-ref", &ring_ref, 10);
543 if (err) {
544 xenbus_dev_fatal(xbusd, err, "reading %s/ring-ref",
545 xbusd->xbusd_otherend);
546 return -1;
547 }
548 XENPRINTF(("xbdback %s: connect ring-ref %lu\n", xbusd->xbusd_path, ring_ref));
549 err = xenbus_read_ul(NULL, xbusd->xbusd_otherend,
550 "event-channel", &revtchn, 10);
551 if (err) {
552 xenbus_dev_fatal(xbusd, err, "reading %s/event-channel",
553 xbusd->xbusd_otherend);
554 return -1;
555 }
556 XENPRINTF(("xbdback %s: connect revtchn %lu\n", xbusd->xbusd_path, revtchn));
557 err = xenbus_read(NULL, xbusd->xbusd_otherend, "protocol",
558 xsproto, sizeof(xsproto));
559 if (err) {
560 xbdi->xbdi_proto = XBDIP_NATIVE;
561 proto = "unspecified";
562 XENPRINTF(("xbdback %s: connect no xsproto\n", xbusd->xbusd_path));
563 } else {
564 XENPRINTF(("xbdback %s: connect xsproto %s\n", xbusd->xbusd_path, xsproto));
565 if (strcmp(xsproto, XEN_IO_PROTO_ABI_NATIVE) == 0) {
566 xbdi->xbdi_proto = XBDIP_NATIVE;
567 proto = XEN_IO_PROTO_ABI_NATIVE;
568 } else if (strcmp(xsproto, XEN_IO_PROTO_ABI_X86_32) == 0) {
569 xbdi->xbdi_proto = XBDIP_32;
570 proto = XEN_IO_PROTO_ABI_X86_32;
571 } else if (strcmp(xsproto, XEN_IO_PROTO_ABI_X86_64) == 0) {
572 xbdi->xbdi_proto = XBDIP_64;
573 proto = XEN_IO_PROTO_ABI_X86_64;
574 } else {
575 aprint_error("xbd domain %d: unknown proto %s\n",
576 xbdi->xbdi_domid, xsproto);
577 return -1;
578 }
579 }
580
581 /* allocate VA space and map rings */
582 xbdi->xbdi_ring_va = uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
583 UVM_KMF_VAONLY);
584 if (xbdi->xbdi_ring_va == 0) {
585 xenbus_dev_fatal(xbusd, ENOMEM,
586 "can't get VA for ring", xbusd->xbusd_otherend);
587 return -1;
588 }
589 XENPRINTF(("xbdback %s: connect va 0x%" PRIxVADDR "\n", xbusd->xbusd_path, xbdi->xbdi_ring_va));
590
591 gring_ref = ring_ref;
592 if (xen_shm_map(1, xbdi->xbdi_domid, &gring_ref, xbdi->xbdi_ring_va,
593 &xbdi->xbdi_ring_handle, 0) != 0) {
594 aprint_error("xbdback %s: can't map grant ref\n",
595 xbusd->xbusd_path);
596 xenbus_dev_fatal(xbusd, EINVAL,
597 "can't map ring", xbusd->xbusd_otherend);
598 goto err1;
599 }
600 XENPRINTF(("xbdback %s: connect grhandle %d\n", xbusd->xbusd_path, xbdi->xbdi_ring_handle));
601
602 switch(xbdi->xbdi_proto) {
603 case XBDIP_NATIVE:
604 {
605 blkif_sring_t *sring = (void *)xbdi->xbdi_ring_va;
606 BACK_RING_INIT(&xbdi->xbdi_ring.ring_n, sring, PAGE_SIZE);
607 break;
608 }
609 case XBDIP_32:
610 {
611 blkif_x86_32_sring_t *sring = (void *)xbdi->xbdi_ring_va;
612 BACK_RING_INIT(&xbdi->xbdi_ring.ring_32, sring, PAGE_SIZE);
613 break;
614 }
615 case XBDIP_64:
616 {
617 blkif_x86_64_sring_t *sring = (void *)xbdi->xbdi_ring_va;
618 BACK_RING_INIT(&xbdi->xbdi_ring.ring_64, sring, PAGE_SIZE);
619 break;
620 }
621 }
622
623 evop.cmd = EVTCHNOP_bind_interdomain;
624 evop.u.bind_interdomain.remote_dom = xbdi->xbdi_domid;
625 evop.u.bind_interdomain.remote_port = revtchn;
626 err = HYPERVISOR_event_channel_op(&evop);
627 if (err) {
628 aprint_error("blkback %s: "
629 "can't get event channel: %d\n",
630 xbusd->xbusd_otherend, err);
631 xenbus_dev_fatal(xbusd, err,
632 "can't bind event channel", xbusd->xbusd_otherend);
633 goto err2;
634 }
635 xbdi->xbdi_evtchn = evop.u.bind_interdomain.local_port;
636 XENPRINTF(("xbdback %s: connect evchannel %d\n", xbusd->xbusd_path, xbdi->xbdi_evtchn));
637
638 xbdi->xbdi_ih = xen_intr_establish_xname(-1, &xen_pic,
639 xbdi->xbdi_evtchn, IST_LEVEL, IPL_BIO, xbdback_evthandler, xbdi,
640 true, xbdi->xbdi_name);
641 KASSERT(xbdi->xbdi_ih != NULL);
642 aprint_verbose("xbd backend domain %d handle %#x (%d) "
643 "using event channel %d, protocol %s\n", xbdi->xbdi_domid,
644 xbdi->xbdi_handle, xbdi->xbdi_handle, xbdi->xbdi_evtchn, proto);
645
646 /* enable the xbdback event handler machinery */
647 xbdi->xbdi_status = WAITING;
648 hypervisor_unmask_event(xbdi->xbdi_evtchn);
649 hypervisor_notify_via_evtchn(xbdi->xbdi_evtchn);
650
651 if (kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL,
652 xbdback_thread, xbdi, NULL, "%s", xbdi->xbdi_name) == 0)
653 return 0;
654
655 err2:
656 /* unmap ring */
657 xen_shm_unmap(xbdi->xbdi_ring_va, 1, &xbdi->xbdi_ring_handle);
658 err1:
659 /* free ring VA space */
660 uvm_km_free(kernel_map, xbdi->xbdi_ring_va, PAGE_SIZE, UVM_KMF_VAONLY);
661 return -1;
662 }
663
664 /*
665 * Signal a xbdback thread to disconnect. Done in 'xenwatch' thread context.
666 */
667 static void
xbdback_disconnect(struct xbdback_instance * xbdi)668 xbdback_disconnect(struct xbdback_instance *xbdi)
669 {
670
671 mutex_enter(&xbdi->xbdi_lock);
672 if (xbdi->xbdi_status == DISCONNECTED) {
673 mutex_exit(&xbdi->xbdi_lock);
674 return;
675 }
676 hypervisor_mask_event(xbdi->xbdi_evtchn);
677
678 /* signal thread that we want to disconnect, then wait for it */
679 xbdi->xbdi_status = DISCONNECTING;
680 cv_signal(&xbdi->xbdi_cv);
681
682 while (xbdi->xbdi_status != DISCONNECTED)
683 cv_wait(&xbdi->xbdi_cv, &xbdi->xbdi_lock);
684
685 mutex_exit(&xbdi->xbdi_lock);
686 xen_intr_disestablish(xbdi->xbdi_ih);
687
688 xenbus_switch_state(xbdi->xbdi_xbusd, NULL, XenbusStateClosing);
689 }
690
691 static void
xbdback_frontend_changed(void * arg,XenbusState new_state)692 xbdback_frontend_changed(void *arg, XenbusState new_state)
693 {
694 struct xbdback_instance *xbdi = arg;
695 struct xenbus_device *xbusd = xbdi->xbdi_xbusd;
696
697 XENPRINTF(("xbdback %s: new state %d\n", xbusd->xbusd_path, new_state));
698 switch(new_state) {
699 case XenbusStateInitialising:
700 break;
701 case XenbusStateInitialised:
702 case XenbusStateConnected:
703 if (xbdi->xbdi_status == WAITING || xbdi->xbdi_status == RUN)
704 break;
705 xbdback_connect(xbdi);
706 break;
707 case XenbusStateClosing:
708 xbdback_disconnect(xbdi);
709 break;
710 case XenbusStateClosed:
711 /* otherend_changed() should handle it for us */
712 panic("xbdback_frontend_changed: closed\n");
713 case XenbusStateUnknown:
714 case XenbusStateInitWait:
715 default:
716 aprint_error("xbdback %s: invalid frontend state %d\n",
717 xbusd->xbusd_path, new_state);
718 }
719 return;
720 }
721
722 static void
xbdback_backend_changed(struct xenbus_watch * watch,const char ** vec,unsigned int len)723 xbdback_backend_changed(struct xenbus_watch *watch,
724 const char **vec, unsigned int len)
725 {
726 struct xenbus_device *xbusd = watch->xbw_dev;
727 struct xbdback_instance *xbdi = xbusd->xbusd_u.b.b_cookie;
728 int err;
729 long dev;
730 char mode[32];
731 struct xenbus_transaction *xbt;
732 const char *devname;
733 int major;
734
735 err = xenbus_read_ul(NULL, xbusd->xbusd_path, "physical-device",
736 &dev, 10);
737 /*
738 * An error can occur as the watch can fire up just after being
739 * registered. So we have to ignore error :(
740 */
741 if (err)
742 return;
743 /*
744 * we can also fire up after having opened the device, don't try
745 * to do it twice.
746 */
747 if (xbdi->xbdi_vp != NULL) {
748 if (xbdi->xbdi_status == WAITING || xbdi->xbdi_status == RUN) {
749 if (xbdi->xbdi_dev != dev) {
750 printf("xbdback %s: changing physical device "
751 "from %#"PRIx64" to %#lx not supported\n",
752 xbusd->xbusd_path, xbdi->xbdi_dev, dev);
753 }
754 }
755 return;
756 }
757 xbdi->xbdi_dev = dev;
758 err = xenbus_read(NULL, xbusd->xbusd_path, "mode", mode, sizeof(mode));
759 if (err) {
760 printf("xbdback: failed to read %s/mode: %d\n",
761 xbusd->xbusd_path, err);
762 return;
763 }
764 if (mode[0] == 'w')
765 xbdi->xbdi_ro = false;
766 else
767 xbdi->xbdi_ro = true;
768 major = major(xbdi->xbdi_dev);
769 devname = devsw_blk2name(major);
770 if (devname == NULL) {
771 printf("xbdback %s: unknown device 0x%"PRIx64"\n",
772 xbusd->xbusd_path, xbdi->xbdi_dev);
773 return;
774 }
775 xbdi->xbdi_bdevsw = bdevsw_lookup(xbdi->xbdi_dev);
776 if (xbdi->xbdi_bdevsw == NULL) {
777 printf("xbdback %s: no bdevsw for device 0x%"PRIx64"\n",
778 xbusd->xbusd_path, xbdi->xbdi_dev);
779 return;
780 }
781 err = bdevvp(xbdi->xbdi_dev, &xbdi->xbdi_vp);
782 if (err) {
783 printf("xbdback %s: can't open device 0x%"PRIx64": %d\n",
784 xbusd->xbusd_path, xbdi->xbdi_dev, err);
785 return;
786 }
787 err = vn_lock(xbdi->xbdi_vp, LK_EXCLUSIVE | LK_RETRY);
788 if (err) {
789 printf("xbdback %s: can't vn_lock device 0x%"PRIx64": %d\n",
790 xbusd->xbusd_path, xbdi->xbdi_dev, err);
791 vrele(xbdi->xbdi_vp);
792 return;
793 }
794 err = VOP_OPEN(xbdi->xbdi_vp, FREAD, NOCRED);
795 if (err) {
796 printf("xbdback %s: can't VOP_OPEN device 0x%"PRIx64": %d\n",
797 xbusd->xbusd_path, xbdi->xbdi_dev, err);
798 vput(xbdi->xbdi_vp);
799 return;
800 }
801 VOP_UNLOCK(xbdi->xbdi_vp);
802
803 /* dk device; get wedge data */
804 struct dkwedge_info wi;
805 if ((err = getdiskinfo(xbdi->xbdi_vp, &wi)) == 0) {
806 xbdi->xbdi_size = wi.dkw_size;
807 printf("xbd backend: attach device %s (size %" PRIu64 ") "
808 "for domain %d\n", wi.dkw_devname, xbdi->xbdi_size,
809 xbdi->xbdi_domid);
810 } else {
811 /* If both Ioctls failed set device size to 0 and return */
812 printf("xbdback %s: can't DIOCGWEDGEINFO device "
813 "0x%"PRIx64": %d\n", xbusd->xbusd_path,
814 xbdi->xbdi_dev, err);
815 xbdi->xbdi_size = xbdi->xbdi_dev = 0;
816 vn_close(xbdi->xbdi_vp, FREAD, NOCRED);
817 xbdi->xbdi_vp = NULL;
818 return;
819 }
820 again:
821 xbt = xenbus_transaction_start();
822 if (xbt == NULL) {
823 printf("xbdback %s: can't start transaction\n",
824 xbusd->xbusd_path);
825 return;
826 }
827 err = xenbus_printf(xbt, xbusd->xbusd_path, "sectors", "%" PRIu64 ,
828 xbdi->xbdi_size);
829 if (err) {
830 printf("xbdback: failed to write %s/sectors: %d\n",
831 xbusd->xbusd_path, err);
832 goto abort;
833 }
834 err = xenbus_printf(xbt, xbusd->xbusd_path, "info", "%u",
835 xbdi->xbdi_ro ? VDISK_READONLY : 0);
836 if (err) {
837 printf("xbdback: failed to write %s/info: %d\n",
838 xbusd->xbusd_path, err);
839 goto abort;
840 }
841 err = xenbus_printf(xbt, xbusd->xbusd_path, "sector-size", "%lu",
842 (u_long)DEV_BSIZE);
843 if (err) {
844 printf("xbdback: failed to write %s/sector-size: %d\n",
845 xbusd->xbusd_path, err);
846 goto abort;
847 }
848 err = xenbus_printf(xbt, xbusd->xbusd_path, "feature-flush-cache",
849 "%u", 1);
850 if (err) {
851 printf("xbdback: failed to write %s/feature-flush-cache: %d\n",
852 xbusd->xbusd_path, err);
853 goto abort;
854 }
855 err = xenbus_printf(xbt, xbusd->xbusd_path,
856 "feature-max-indirect-segments", "%u", VBD_MAX_INDIRECT_SEGMENTS);
857 if (err) {
858 printf("xbdback: failed to write %s/feature-indirect: %d\n",
859 xbusd->xbusd_path, err);
860 goto abort;
861 }
862 err = xenbus_transaction_end(xbt, 0);
863 if (err == EAGAIN)
864 goto again;
865 if (err) {
866 printf("xbdback %s: can't end transaction: %d\n",
867 xbusd->xbusd_path, err);
868 }
869 err = xenbus_switch_state(xbusd, NULL, XenbusStateConnected);
870 if (err) {
871 printf("xbdback %s: can't switch state: %d\n",
872 xbusd->xbusd_path, err);
873 }
874 return;
875 abort:
876 xenbus_transaction_end(xbt, 1);
877 }
878
879 /*
880 * Used by a xbdi thread to signal that it is now disconnected.
881 */
882 static void
xbdback_finish_disconnect(struct xbdback_instance * xbdi)883 xbdback_finish_disconnect(struct xbdback_instance *xbdi)
884 {
885 KASSERT(mutex_owned(&xbdi->xbdi_lock));
886 KASSERT(xbdi->xbdi_status == DISCONNECTING);
887
888 xbdi->xbdi_status = DISCONNECTED;
889
890 cv_broadcast(&xbdi->xbdi_cv);
891 }
892
893 static bool
xbdif_lookup(domid_t dom,uint32_t handle)894 xbdif_lookup(domid_t dom , uint32_t handle)
895 {
896 struct xbdback_instance *xbdi;
897 bool found = false;
898
899 KASSERT(mutex_owned(&xbdback_lock));
900
901 SLIST_FOREACH(xbdi, &xbdback_instances, next) {
902 if (xbdi->xbdi_domid == dom && xbdi->xbdi_handle == handle) {
903 found = true;
904 break;
905 }
906 }
907
908 return found;
909 }
910
911 static int
xbdback_evthandler(void * arg)912 xbdback_evthandler(void *arg)
913 {
914 struct xbdback_instance *xbdi = arg;
915
916 XENPRINTF(("xbdback_evthandler domain %d: cont %p\n",
917 xbdi->xbdi_domid, xbdi->xbdi_cont));
918
919 mutex_enter(&xbdi->xbdi_lock);
920 xbdback_wakeup_thread(xbdi);
921 mutex_exit(&xbdi->xbdi_lock);
922
923 return 1;
924 }
925
926 /*
927 * Main thread routine for one xbdback instance. Woken up by
928 * xbdback_evthandler when a domain has I/O work scheduled in a I/O ring.
929 */
930 static void
xbdback_thread(void * arg)931 xbdback_thread(void *arg)
932 {
933 struct xbdback_instance *xbdi = arg;
934 void *obj;
935
936 mutex_enter(&xbdi->xbdi_lock);
937 for (;;) {
938 switch (xbdi->xbdi_status) {
939 case WAITING:
940 cv_wait(&xbdi->xbdi_cv, &xbdi->xbdi_lock);
941 break;
942 case RUN:
943 xbdi->xbdi_status = WAITING; /* reset state */
944 obj = xbdi;
945 if (xbdi->xbdi_cont_restart != NULL) {
946 KASSERT(xbdi->xbdi_cont == NULL);
947 xbdi->xbdi_cont = xbdi->xbdi_cont_restart;
948 obj = xbdi->xbdi_cont_restart_obj;
949 xbdi->xbdi_cont_restart = NULL;
950 xbdi->xbdi_cont_restart_obj = NULL;
951 }
952 if (xbdi->xbdi_cont == NULL) {
953 xbdi->xbdi_cont = xbdback_co_main;
954 }
955
956 xbdback_trampoline(xbdi, obj);
957 break;
958 case DISCONNECTING:
959 if (xbdi->xbdi_pendingreqs > 0) {
960 /* there are pending I/Os. Wait for them. */
961 cv_wait(&xbdi->xbdi_cv, &xbdi->xbdi_lock);
962 continue;
963 }
964
965 /* All I/Os should have been processed by now,
966 * xbdi_refcnt should drop to 0 */
967 xbdi_put(xbdi);
968 KASSERT(xbdi->xbdi_refcnt == 0);
969 goto out;
970 /* NOTREACHED */
971 default:
972 panic("%s: invalid state %d",
973 xbdi->xbdi_name, xbdi->xbdi_status);
974 }
975 }
976 out:
977 mutex_exit(&xbdi->xbdi_lock);
978
979 kthread_exit(0);
980 }
981
982 static void *
xbdback_co_main(struct xbdback_instance * xbdi,void * obj)983 xbdback_co_main(struct xbdback_instance *xbdi, void *obj)
984 {
985 (void)obj;
986
987 xbdi->xbdi_req_prod = xbdi->xbdi_ring.ring_n.sring->req_prod;
988 xen_rmb(); /* ensure we see all requests up to req_prod */
989 /*
990 * note that we'll eventually get a full ring of request.
991 * in this case, MASK_BLKIF_IDX(req_cons) == MASK_BLKIF_IDX(req_prod)
992 */
993 xbdi->xbdi_cont = xbdback_co_main_loop;
994 return xbdi;
995 }
996
997 /*
998 * Fetch a blkif request from the ring, and pass control to the appropriate
999 * continuation.
1000 * If someone asked for disconnection, do not fetch any more request from
1001 * the ring.
1002 */
1003 static void *
xbdback_co_main_loop(struct xbdback_instance * xbdi,void * obj __unused)1004 xbdback_co_main_loop(struct xbdback_instance *xbdi, void *obj __unused)
1005 {
1006 blkif_request_t *req, *reqn;
1007 blkif_x86_32_request_t *req32;
1008 blkif_x86_64_request_t *req64;
1009 blkif_request_indirect_t *rinn;
1010 blkif_x86_32_request_indirect_t *rin32;
1011 blkif_x86_64_request_indirect_t *rin64;
1012
1013 if (xbdi->xbdi_ring.ring_n.req_cons != xbdi->xbdi_req_prod) {
1014 struct xbdback_io *xbd_io = xbdback_io_get(xbdi);
1015 uint8_t real_op = 0xff;
1016
1017 if (xbd_io == NULL) {
1018 /* retry after iodone */
1019 xbdi->xbdi_cont = NULL;
1020 return NULL;
1021 }
1022 memset(&xbd_io->u, 0, sizeof(xbd_io->u));
1023
1024 buf_init(&xbd_io->xio_buf);
1025 xbd_io->xio_xbdi = xbdi;
1026
1027 req = &xbd_io->xio_xen_req;
1028 memset(req, 0, sizeof(*req));
1029
1030 switch(xbdi->xbdi_proto) {
1031 case XBDIP_NATIVE:
1032 reqn = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_n,
1033 xbdi->xbdi_ring.ring_n.req_cons);
1034 real_op = req->operation = reqn->operation;
1035 if (real_op == BLKIF_OP_INDIRECT) {
1036 rinn = (blkif_request_indirect_t *)reqn;
1037 real_op = rinn->indirect_op;
1038 }
1039 req->id = reqn->id;
1040 break;
1041 case XBDIP_32:
1042 req32 = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_32,
1043 xbdi->xbdi_ring.ring_n.req_cons);
1044 real_op = req->operation = req32->operation;
1045 if (real_op == BLKIF_OP_INDIRECT) {
1046 rin32 = (blkif_x86_32_request_indirect_t*)req32;
1047 real_op = rin32->indirect_op;
1048 }
1049 req->id = req32->id;
1050 break;
1051 case XBDIP_64:
1052 req64 = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_64,
1053 xbdi->xbdi_ring.ring_n.req_cons);
1054 real_op = req->operation = req64->operation;
1055 if (real_op == BLKIF_OP_INDIRECT) {
1056 rin64 = (blkif_x86_64_request_indirect_t*)req64;
1057 real_op = rin64->indirect_op;
1058 }
1059 req->id = req64->id;
1060 break;
1061 }
1062 __insn_barrier();
1063 XENPRINTF(("xbdback op %d req_cons 0x%x req_prod 0x%x "
1064 "resp_prod 0x%x id %" PRIu64 "\n", req->operation,
1065 xbdi->xbdi_ring.ring_n.req_cons,
1066 xbdi->xbdi_req_prod,
1067 xbdi->xbdi_ring.ring_n.rsp_prod_pvt,
1068 req->id));
1069 switch (req->operation) {
1070 case BLKIF_OP_INDIRECT:
1071 /* just check indirect_op, rest is handled later */
1072 if (real_op != BLKIF_OP_READ &&
1073 real_op != BLKIF_OP_WRITE) {
1074 if (ratecheck(&xbdi->xbdi_lasterr_time,
1075 &xbdback_err_intvl)) {
1076 printf("%s: unknown ind operation %d\n",
1077 xbdi->xbdi_name,
1078 real_op);
1079 }
1080 goto fail;
1081 }
1082 /* FALLTHROUGH */
1083 case BLKIF_OP_READ:
1084 case BLKIF_OP_WRITE:
1085 xbdi->xbdi_cont = xbdback_co_io;
1086 return xbd_io;
1087 case BLKIF_OP_FLUSH_DISKCACHE:
1088 xbdi->xbdi_cont = xbdback_co_cache_flush;
1089 return xbd_io;
1090 default:
1091 if (ratecheck(&xbdi->xbdi_lasterr_time,
1092 &xbdback_err_intvl)) {
1093 printf("%s: unknown operation %d\n",
1094 xbdi->xbdi_name, req->operation);
1095 }
1096 fail:
1097 xbdback_send_reply(xbdi, req->id, real_op,
1098 BLKIF_RSP_ERROR);
1099 xbdi->xbdi_cont = xbdback_co_main_incr;
1100 return xbdi;
1101 }
1102 } else {
1103 xbdi->xbdi_cont = xbdback_co_main_done2;
1104 return xbdi;
1105 }
1106 }
1107
1108 /*
1109 * Increment consumer index and move on to the next request. In case
1110 * we want to disconnect, leave continuation now.
1111 */
1112 static void *
xbdback_co_main_incr(struct xbdback_instance * xbdi,void * obj __unused)1113 xbdback_co_main_incr(struct xbdback_instance *xbdi, void *obj __unused)
1114 {
1115 KASSERT(mutex_owned(&xbdi->xbdi_lock));
1116
1117 blkif_back_ring_t *ring = &xbdi->xbdi_ring.ring_n;
1118
1119 ring->req_cons++;
1120
1121 if (xbdi->xbdi_status == DISCONNECTING)
1122 xbdi->xbdi_cont = NULL;
1123 else
1124 xbdi->xbdi_cont = xbdback_co_main_loop;
1125
1126 return xbdi;
1127 }
1128
1129 /*
1130 * Check for requests in the instance's ring. In case there are, start again
1131 * from the beginning. If not, stall.
1132 */
1133 static void *
xbdback_co_main_done2(struct xbdback_instance * xbdi,void * obj)1134 xbdback_co_main_done2(struct xbdback_instance *xbdi, void *obj)
1135 {
1136 int work_to_do;
1137
1138 xen_wmb();
1139 RING_FINAL_CHECK_FOR_REQUESTS(&xbdi->xbdi_ring.ring_n, work_to_do);
1140 if (work_to_do)
1141 xbdi->xbdi_cont = xbdback_co_main;
1142 else
1143 xbdi->xbdi_cont = NULL;
1144
1145 return xbdi;
1146 }
1147
1148 /*
1149 * Frontend requested a cache flush operation.
1150 */
1151 static void *
xbdback_co_cache_flush(struct xbdback_instance * xbdi,void * obj)1152 xbdback_co_cache_flush(struct xbdback_instance *xbdi, void *obj)
1153 {
1154 struct xbdback_io *xbd_io = obj;
1155 KASSERT(xbd_io->xio_xen_req.operation == BLKIF_OP_FLUSH_DISKCACHE);
1156 if (xbdi->xbdi_pendingreqs > 0) {
1157 /*
1158 * There are pending requests.
1159 * Event or iodone() will restart processing
1160 */
1161 xbdi->xbdi_cont_restart = xbdback_co_cache_flush;
1162 xbdi->xbdi_cont_restart_obj = xbd_io;
1163 xbdi->xbdi_cont = NULL;
1164 return NULL;
1165 }
1166 xbdi_get(xbdi);
1167 xbdi->xbdi_cont = xbdback_co_do_io;
1168 return xbd_io;
1169 }
1170
1171 /*
1172 * A read or write I/O request must be processed. Do some checks first,
1173 * then get the segment information directly from the ring request.
1174 */
1175 static void *
xbdback_co_io(struct xbdback_instance * xbdi,void * obj)1176 xbdback_co_io(struct xbdback_instance *xbdi, void *obj)
1177 {
1178 int i, error;
1179 blkif_request_t *req, *reqn;
1180 blkif_x86_32_request_t *req32;
1181 blkif_x86_64_request_t *req64;
1182 blkif_request_indirect_t *rinn;
1183 blkif_x86_32_request_indirect_t *rin32;
1184 blkif_x86_64_request_indirect_t *rin64;
1185 const char *errstr;
1186 struct xbdback_io *xbd_io = obj;
1187 grant_ref_t in_gntref = 0;
1188
1189 req = &xbd_io->xio_xen_req;
1190
1191 /* some sanity checks */
1192 KASSERT(req->operation == BLKIF_OP_READ ||
1193 req->operation == BLKIF_OP_WRITE ||
1194 req->operation == BLKIF_OP_INDIRECT);
1195
1196 /* copy request segments */
1197 switch (xbdi->xbdi_proto) {
1198 case XBDIP_NATIVE:
1199 reqn = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_n,
1200 xbdi->xbdi_ring.ring_n.req_cons);
1201 req->handle = reqn->handle;
1202 req->sector_number = reqn->sector_number;
1203 if (reqn->operation == BLKIF_OP_INDIRECT) {
1204 rinn = (blkif_request_indirect_t *)reqn;
1205 req->operation = rinn->indirect_op;
1206 req->nr_segments = (uint8_t)rinn->nr_segments;
1207 if (req->nr_segments > VBD_MAX_INDIRECT_SEGMENTS) {
1208 errstr = "too many indirect segments";
1209 goto bad_segments;
1210 }
1211 in_gntref = rinn->indirect_grefs[0];
1212 /* first_sect and segment grefs fetched later */
1213 } else {
1214 req->nr_segments = reqn->nr_segments;
1215 if (req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST) {
1216 errstr = "too many segments";
1217 goto bad_segments;
1218 }
1219 for (i = 0; i < req->nr_segments; i++)
1220 xbd_io->xio_seg[i] = reqn->seg[i];
1221 }
1222 break;
1223 case XBDIP_32:
1224 req32 = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_32,
1225 xbdi->xbdi_ring.ring_n.req_cons);
1226 req->handle = req32->handle;
1227 req->sector_number = req32->sector_number;
1228 if (req32->operation == BLKIF_OP_INDIRECT) {
1229 rin32 = (blkif_x86_32_request_indirect_t *)req32;
1230 req->operation = rin32->indirect_op;
1231 req->nr_segments = (uint8_t)rin32->nr_segments;
1232 if (req->nr_segments > VBD_MAX_INDIRECT_SEGMENTS) {
1233 errstr = "too many indirect segments";
1234 goto bad_segments;
1235 }
1236 in_gntref = rin32->indirect_grefs[0];
1237 /* first_sect and segment grefs fetched later */
1238 } else {
1239 req->nr_segments = req32->nr_segments;
1240 if (req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST) {
1241 errstr = "too many segments";
1242 goto bad_segments;
1243 }
1244 for (i = 0; i < req->nr_segments; i++)
1245 xbd_io->xio_seg[i] = req32->seg[i];
1246 }
1247 break;
1248 case XBDIP_64:
1249 req64 = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_64,
1250 xbdi->xbdi_ring.ring_n.req_cons);
1251 req->handle = req64->handle;
1252 req->sector_number = req64->sector_number;
1253 if (req64->operation == BLKIF_OP_INDIRECT) {
1254 rin64 = (blkif_x86_64_request_indirect_t *)req64;
1255 req->nr_segments = (uint8_t)rin64->nr_segments;
1256 if (req->nr_segments > VBD_MAX_INDIRECT_SEGMENTS) {
1257 errstr = "too many indirect segments";
1258 goto bad_segments;
1259 }
1260 in_gntref = rin64->indirect_grefs[0];
1261 /* first_sect and segment grefs fetched later */
1262 } else {
1263 req->nr_segments = req64->nr_segments;
1264 if (req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST) {
1265 errstr = "too many segments";
1266 goto bad_segments;
1267 }
1268 for (i = 0; i < req->nr_segments; i++)
1269 xbd_io->xio_seg[i] = req64->seg[i];
1270 }
1271 break;
1272 }
1273
1274 if (req->operation == BLKIF_OP_WRITE) {
1275 if (xbdi->xbdi_ro) {
1276 error = EROFS;
1277 goto end;
1278 }
1279 }
1280
1281 /* Max value checked already earlier */
1282 if (req->nr_segments < 1) {
1283 errstr = "invalid number of segments";
1284 goto bad_segments;
1285 }
1286
1287 /* If segments are on an indirect page, copy them now */
1288 if (in_gntref) {
1289 gnttab_copy_t gop;
1290 paddr_t ma;
1291
1292 gop.flags = GNTCOPY_source_gref;
1293 gop.len = req->nr_segments
1294 * sizeof(struct blkif_request_segment);
1295
1296 gop.source.u.ref = in_gntref;
1297 gop.source.offset = 0;
1298 gop.source.domid = xbdi->xbdi_domid;
1299
1300 ma = xbd_io->xio_seg_dmamap->dm_segs[0].ds_addr;
1301 gop.dest.offset = ma & PAGE_MASK;
1302 gop.dest.domid = DOMID_SELF;
1303 gop.dest.u.gmfn = ma >> PAGE_SHIFT;
1304
1305 if (HYPERVISOR_grant_table_op(GNTTABOP_copy, &gop, 1) != 0) {
1306 errstr = "GNTTABOP_copy failed";
1307 goto bad_segments;
1308 }
1309 }
1310
1311 xbdi_get(xbdi);
1312 xbdi->xbdi_cont = xbdback_co_io_gotio;
1313 return xbd_io;
1314
1315 bad_segments:
1316 if (ratecheck(&xbdi->xbdi_lasterr_time, &xbdback_err_intvl)) {
1317 printf("%s: %s\n", xbdi->xbdi_name, errstr);
1318 }
1319 error = EINVAL;
1320 /* FALLTHROUGH */
1321
1322 end:
1323 xbdback_send_reply(xbdi, req->id, req->operation,
1324 (error == EROFS) ? BLKIF_RSP_EOPNOTSUPP : BLKIF_RSP_ERROR);
1325 xbdi->xbdi_cont = xbdback_co_main_incr;
1326 return xbdi;
1327 }
1328
1329 /* Prepare an I/O buffer for a xbdback instance */
1330 static void *
xbdback_co_io_gotio(struct xbdback_instance * xbdi,void * obj)1331 xbdback_co_io_gotio(struct xbdback_instance *xbdi, void *obj)
1332 {
1333 struct xbdback_io *xbd_io = obj;
1334 int buf_flags;
1335 size_t bcount;
1336 blkif_request_t *req = &xbd_io->xio_xen_req;
1337 uint8_t last_sect;
1338 int error;
1339
1340 KASSERT(mutex_owned(&xbdi->xbdi_lock));
1341 KASSERT(xbdi->xbdi_refcnt > 0);
1342
1343 /* Process segments */
1344 bcount = 0;
1345 for (int i = 0; i < req->nr_segments; i++) {
1346 struct blkif_request_segment *seg = &xbd_io->xio_seg[i];
1347 if (seg->last_sect > VBD_MAXSECT ||
1348 seg->first_sect > VBD_MAXSECT) {
1349 if (ratecheck(&xbdi->xbdi_lasterr_time,
1350 &xbdback_err_intvl)) {
1351 printf("%s: invalid segment sectors %d %d\n",
1352 xbdi->xbdi_name,
1353 seg->first_sect, seg->last_sect);
1354 }
1355 xbdi->xbdi_pendingreqs++; /* xbdback_io_error will -- */
1356 xbdback_io_error(xbd_io, EINVAL);
1357 /* do not retry */
1358 xbdi->xbdi_cont = xbdback_co_main_incr;
1359 return xbdi;
1360 }
1361
1362 if (i > 0) {
1363 if (last_sect != VBD_MAXSECT ||
1364 seg->first_sect != 0) {
1365 xbd_io->xio_need_bounce = 1;
1366 }
1367 }
1368 last_sect = seg->last_sect;
1369 xbd_io->xio_gref[i] = seg->gref;
1370 bcount += (seg->last_sect - seg->first_sect + 1)
1371 * VBD_BSIZE;
1372 }
1373 xbd_io->xio_start_offset = xbd_io->xio_seg[0].first_sect * VBD_BSIZE;
1374
1375 KASSERT(bcount <= MAXPHYS);
1376 KASSERT(xbd_io->xio_start_offset < PAGE_SIZE);
1377 KASSERT(bcount + xbd_io->xio_start_offset <= VBD_VA_SIZE);
1378
1379 /* Fill-in the buf */
1380 if (req->operation == BLKIF_OP_WRITE) {
1381 buf_flags = B_WRITE;
1382 } else {
1383 buf_flags = B_READ;
1384 }
1385
1386 xbd_io->xio_buf.b_flags = buf_flags;
1387 xbd_io->xio_buf.b_cflags = 0;
1388 xbd_io->xio_buf.b_oflags = 0;
1389 xbd_io->xio_buf.b_iodone = xbdback_iodone;
1390 xbd_io->xio_buf.b_proc = NULL;
1391 xbd_io->xio_buf.b_vp = xbdi->xbdi_vp;
1392 xbd_io->xio_buf.b_objlock = xbdi->xbdi_vp->v_interlock;
1393 xbd_io->xio_buf.b_dev = xbdi->xbdi_dev;
1394 xbd_io->xio_buf.b_blkno = req->sector_number;
1395 xbd_io->xio_buf.b_bcount = bcount;
1396 if (__predict_false(xbd_io->xio_need_bounce)) {
1397 if (__predict_false(xbdi->xbdi_bouncebuf_use)) {
1398 KASSERT(xbdi->xbdi_pendingreqs > 1);
1399 /* retry later */
1400 xbdi->xbdi_cont_restart = xbdback_co_io_gotio;
1401 xbdi->xbdi_cont_restart_obj = xbd_io;
1402 xbdi->xbdi_cont = NULL;
1403 return NULL;
1404 }
1405 xbdi->xbdi_bouncebuf_use++;
1406 KASSERT(xbdi->xbdi_bouncebuf_use == 1);
1407 xbd_io->xio_buf.b_data = (void *)xbdi->xbdi_bouncebuf;
1408 }
1409 xbdi->xbdi_pendingreqs++;
1410 if ((error = xbdback_map_shm(xbd_io)) != 0) {
1411 xbdback_io_error(xbd_io, error);
1412 /* do not retry */
1413 xbdi->xbdi_cont = xbdback_co_main_incr;
1414 return xbdi;
1415 }
1416 if (__predict_true(xbd_io->xio_need_bounce == 0)) {
1417 xbd_io->xio_buf.b_data = (void *)
1418 (xbd_io->xio_vaddr + xbd_io->xio_start_offset);
1419 }
1420
1421
1422 xbd_io->xio_buf.b_private = xbd_io;
1423
1424 xbdi->xbdi_cont = xbdback_co_do_io;
1425 return xbd_io;
1426 }
1427
1428 static void
xbdback_io_error(struct xbdback_io * xbd_io,int error)1429 xbdback_io_error(struct xbdback_io *xbd_io, int error)
1430 {
1431 KASSERT(mutex_owned(&xbd_io->xio_xbdi->xbdi_lock));
1432
1433 struct buf *bp = &xbd_io->xio_buf;
1434
1435 bp->b_error = error;
1436 xbdback_iodone_locked(xbd_io->xio_xbdi, xbd_io, bp);
1437 }
1438
1439 /*
1440 * Main xbdback I/O routine. It can either perform a flush operation or
1441 * schedule a read/write operation.
1442 */
1443 static void *
xbdback_co_do_io(struct xbdback_instance * xbdi,void * obj)1444 xbdback_co_do_io(struct xbdback_instance *xbdi, void *obj)
1445 {
1446 struct xbdback_io *xbd_io = obj;
1447 blkif_request_t *req = &xbd_io->xio_xen_req;
1448
1449 KASSERT(xbdi->xbdi_refcnt > 0);
1450
1451 switch (req->operation) {
1452 case BLKIF_OP_FLUSH_DISKCACHE:
1453 {
1454 int error;
1455 int force = 1;
1456
1457 KASSERT(mutex_owned(&xbdi->xbdi_lock));
1458 mutex_exit(&xbdi->xbdi_lock);
1459 error = VOP_IOCTL(xbdi->xbdi_vp, DIOCCACHESYNC, &force, FWRITE,
1460 kauth_cred_get());
1461 mutex_enter(&xbdi->xbdi_lock);
1462 if (error) {
1463 aprint_error("xbdback %s: DIOCCACHESYNC returned %d\n",
1464 xbdi->xbdi_xbusd->xbusd_path, error);
1465 if (error == EOPNOTSUPP || error == ENOTTY)
1466 error = BLKIF_RSP_EOPNOTSUPP;
1467 else
1468 error = BLKIF_RSP_ERROR;
1469 } else
1470 error = BLKIF_RSP_OKAY;
1471 xbdback_send_reply(xbdi, req->id, req->operation, error);
1472 xbdback_io_put(xbdi, xbd_io);
1473 xbdi_put(xbdi);
1474 xbdi->xbdi_cont = xbdback_co_main_incr;
1475 return xbdi;
1476 }
1477 case BLKIF_OP_READ:
1478 case BLKIF_OP_WRITE:
1479 if (__predict_false(xbd_io->xio_need_bounce) &&
1480 req->operation == BLKIF_OP_WRITE) {
1481 vaddr_t boffset = 0;
1482 for (int i = 0; i < req->nr_segments; i++) {
1483 struct blkif_request_segment *seg =
1484 &xbd_io->xio_seg[i];
1485 vaddr_t segoffset = seg->first_sect * VBD_BSIZE;
1486 size_t segbcount =
1487 (seg->last_sect - seg->first_sect + 1) *
1488 VBD_BSIZE;
1489 KASSERT(segoffset + segbcount <= PAGE_SIZE);
1490 KASSERT(boffset + segbcount < MAXPHYS);
1491 segoffset += PAGE_SIZE * i;
1492 memcpy(
1493 (void *)(xbdi->xbdi_bouncebuf + boffset),
1494 (void *)(xbd_io->xio_vaddr + segoffset),
1495 segbcount);
1496 boffset += segbcount;
1497 }
1498 }
1499 KASSERT(mutex_owned(&xbdi->xbdi_lock));
1500 mutex_exit(&xbdi->xbdi_lock);
1501 if ((xbd_io->xio_buf.b_flags & B_READ) == 0) {
1502 mutex_enter(xbd_io->xio_buf.b_vp->v_interlock);
1503 xbd_io->xio_buf.b_vp->v_numoutput++;
1504 mutex_exit(xbd_io->xio_buf.b_vp->v_interlock);
1505 }
1506 /* will call xbdback_iodone() asynchronously when done */
1507 bdev_strategy(&xbd_io->xio_buf);
1508 mutex_enter(&xbdi->xbdi_lock);
1509 xbdi->xbdi_cont = xbdback_co_main_incr;
1510 return xbdi;
1511 default:
1512 /* Should never happen */
1513 panic("xbdback_co_do_io: unsupported operation %d",
1514 req->operation);
1515 }
1516 }
1517
1518 /*
1519 * Called from softint(9) context when an I/O is done: for each request, send
1520 * back the associated reply to the domain.
1521 */
1522 static void
xbdback_iodone(struct buf * bp)1523 xbdback_iodone(struct buf *bp)
1524 {
1525 struct xbdback_io *xbd_io;
1526 struct xbdback_instance *xbdi;
1527
1528 xbd_io = bp->b_private;
1529 KASSERT(bp == &xbd_io->xio_buf);
1530 xbdi = xbd_io->xio_xbdi;
1531
1532 mutex_enter(&xbdi->xbdi_lock);
1533 xbdback_iodone_locked(xbdi, xbd_io, bp);
1534 mutex_exit(&xbdi->xbdi_lock);
1535 }
1536
1537 /*
1538 * This gets reused by xbdback_io_error to report errors from other sources.
1539 */
1540 static void
xbdback_iodone_locked(struct xbdback_instance * xbdi,struct xbdback_io * xbd_io,struct buf * bp)1541 xbdback_iodone_locked(struct xbdback_instance *xbdi, struct xbdback_io *xbd_io,
1542 struct buf *bp)
1543 {
1544 int status;
1545 blkif_request_t *req = &xbd_io->xio_xen_req;
1546
1547 XENPRINTF(("xbdback_io domain %d: iodone ptr 0x%lx\n",
1548 xbdi->xbdi_domid, (long)xbd_io));
1549
1550 KASSERT(mutex_owned(&xbdi->xbdi_lock));
1551
1552 KASSERT(bp->b_error != 0 || xbd_io->xio_xv != NULL);
1553 if (__predict_false(xbd_io->xio_need_bounce)) {
1554 KASSERT(xbd_io->xio_buf.b_data == (void *)xbdi->xbdi_bouncebuf);
1555
1556 KASSERT(req->operation == BLKIF_OP_WRITE ||
1557 req->operation == BLKIF_OP_READ);
1558
1559 if (req->operation == BLKIF_OP_READ && bp->b_error == 0) {
1560 vaddr_t boffset = 0;
1561 for (int i = 0; i < req->nr_segments; i++) {
1562 struct blkif_request_segment *seg =
1563 &xbd_io->xio_seg[i];
1564 vaddr_t segoffset = seg->first_sect * VBD_BSIZE;
1565 size_t segbcount =
1566 (seg->last_sect - seg->first_sect + 1) *
1567 VBD_BSIZE;
1568 KASSERT(segoffset + segbcount <= PAGE_SIZE);
1569 KASSERT(boffset + segbcount < MAXPHYS);
1570 segoffset += PAGE_SIZE * i;
1571 memcpy(
1572 (void *)(xbd_io->xio_vaddr + segoffset),
1573 (void *)(xbdi->xbdi_bouncebuf + boffset),
1574 segbcount);
1575 boffset += segbcount;
1576 }
1577 }
1578 KASSERT(xbdi->xbdi_bouncebuf_use == 1);
1579 xbdi->xbdi_bouncebuf_use--;
1580 }
1581 if (xbd_io->xio_xv != NULL)
1582 xbdback_unmap_shm(xbd_io);
1583
1584 if (bp->b_error != 0) {
1585 printf("xbd IO domain %d: error %d\n",
1586 xbdi->xbdi_domid, bp->b_error);
1587 status = BLKIF_RSP_ERROR;
1588 } else
1589 status = BLKIF_RSP_OKAY;
1590
1591 xbdback_send_reply(xbdi, req->id, req->operation, status);
1592
1593 xbdi_put(xbdi);
1594 KASSERT(xbdi->xbdi_pendingreqs > 0);
1595 xbdi->xbdi_pendingreqs--;
1596 buf_destroy(&xbd_io->xio_buf);
1597 xbdback_io_put(xbdi, xbd_io);
1598
1599 xbdback_wakeup_thread(xbdi);
1600 }
1601
1602 /*
1603 * Wake up the per xbdback instance thread.
1604 */
1605 static void
xbdback_wakeup_thread(struct xbdback_instance * xbdi)1606 xbdback_wakeup_thread(struct xbdback_instance *xbdi)
1607 {
1608 KASSERT(mutex_owned(&xbdi->xbdi_lock));
1609
1610 /* only set RUN state when we are WAITING for work */
1611 if (xbdi->xbdi_status == WAITING)
1612 xbdi->xbdi_status = RUN;
1613 cv_signal(&xbdi->xbdi_cv);
1614 }
1615
1616 /*
1617 * called once a request has completed. Place the reply in the ring and
1618 * notify the guest OS.
1619 */
1620 static void
xbdback_send_reply(struct xbdback_instance * xbdi,uint64_t id,int op,int status)1621 xbdback_send_reply(struct xbdback_instance *xbdi, uint64_t id,
1622 int op, int status)
1623 {
1624 blkif_response_t *resp_n;
1625 blkif_x86_32_response_t *resp32;
1626 blkif_x86_64_response_t *resp64;
1627 int notify;
1628
1629 KASSERT(mutex_owned(&xbdi->xbdi_lock));
1630
1631 /*
1632 * The ring can be accessed by the xbdback thread, xbdback_iodone()
1633 * handler, or any handler that triggered the shm callback. So
1634 * protect ring access via the xbdi_lock mutex.
1635 */
1636 switch (xbdi->xbdi_proto) {
1637 case XBDIP_NATIVE:
1638 resp_n = RING_GET_RESPONSE(&xbdi->xbdi_ring.ring_n,
1639 xbdi->xbdi_ring.ring_n.rsp_prod_pvt);
1640 resp_n->id = id;
1641 resp_n->operation = op;
1642 resp_n->status = status;
1643 break;
1644 case XBDIP_32:
1645 resp32 = RING_GET_RESPONSE(&xbdi->xbdi_ring.ring_32,
1646 xbdi->xbdi_ring.ring_n.rsp_prod_pvt);
1647 resp32->id = id;
1648 resp32->operation = op;
1649 resp32->status = status;
1650 break;
1651 case XBDIP_64:
1652 resp64 = RING_GET_RESPONSE(&xbdi->xbdi_ring.ring_64,
1653 xbdi->xbdi_ring.ring_n.rsp_prod_pvt);
1654 resp64->id = id;
1655 resp64->operation = op;
1656 resp64->status = status;
1657 break;
1658 }
1659 xbdi->xbdi_ring.ring_n.rsp_prod_pvt++;
1660 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xbdi->xbdi_ring.ring_n, notify);
1661
1662 if (notify) {
1663 XENPRINTF(("xbdback_send_reply notify %d\n", xbdi->xbdi_domid));
1664 hypervisor_notify_via_evtchn(xbdi->xbdi_evtchn);
1665 }
1666 }
1667
1668 /*
1669 * Map multiple entries of an I/O request into backend's VA space.
1670 * The xbd_io->xio_gref array has to be filled out by the caller.
1671 */
1672 static int
xbdback_map_shm(struct xbdback_io * xbd_io)1673 xbdback_map_shm(struct xbdback_io *xbd_io)
1674 {
1675 struct xbdback_instance *xbdi = xbd_io->xio_xbdi;
1676 blkif_request_t *req = &xbd_io->xio_xen_req;
1677 int error;
1678
1679 #ifdef XENDEBUG_VBD
1680 int i;
1681 printf("xbdback_map_shm map grant ");
1682 for (i = 0; i < req->nr_segments; i++) {
1683 printf("%u ", (u_int)xbd_io->xio_gref[i]);
1684 }
1685 #endif
1686
1687 KASSERT(mutex_owned(&xbdi->xbdi_lock));
1688 KASSERT(xbd_io->xio_xv == NULL);
1689
1690 xbd_io->xio_xv = SLIST_FIRST(&xbdi->xbdi_va_free);
1691 KASSERT(xbd_io->xio_xv != NULL);
1692 SLIST_REMOVE_HEAD(&xbdi->xbdi_va_free, xv_next);
1693 xbd_io->xio_vaddr = xbd_io->xio_xv->xv_vaddr;
1694
1695 error = xen_shm_map(req->nr_segments, xbdi->xbdi_domid,
1696 xbd_io->xio_gref, xbd_io->xio_vaddr, xbd_io->xio_gh,
1697 (req->operation == BLKIF_OP_WRITE) ? XSHM_RO : 0);
1698
1699 switch(error) {
1700 case 0:
1701 #ifdef XENDEBUG_VBD
1702 printf("handle");
1703 for (i = 0; i < req->nr_segments; i++) {
1704 printf(" %u ", (u_int)xbd_io->xio_gh[i]);
1705 }
1706 printf("\n");
1707 #endif
1708 return 0;
1709 default:
1710 /* reset xio_xv so error handling won't try to unmap it */
1711 SLIST_INSERT_HEAD(&xbdi->xbdi_va_free, xbd_io->xio_xv, xv_next);
1712 xbd_io->xio_xv = NULL;
1713 return error;
1714 }
1715 }
1716
1717 /* unmap a request from our virtual address space (request is done) */
1718 static void
xbdback_unmap_shm(struct xbdback_io * xbd_io)1719 xbdback_unmap_shm(struct xbdback_io *xbd_io)
1720 {
1721 struct xbdback_instance *xbdi = xbd_io->xio_xbdi;
1722 blkif_request_t *req = &xbd_io->xio_xen_req;
1723
1724 #ifdef XENDEBUG_VBD
1725 int i;
1726 printf("xbdback_unmap_shm handle ");
1727 for (i = 0; i < req->nr_segments; i++) {
1728 printf("%u ", (u_int)xbd_io->xio_gh[i]);
1729 }
1730 printf("\n");
1731 #endif
1732
1733 KASSERT(xbd_io->xio_xv != NULL);
1734 xen_shm_unmap(xbd_io->xio_vaddr, req->nr_segments,
1735 xbd_io->xio_gh);
1736 SLIST_INSERT_HEAD(&xbdi->xbdi_va_free, xbd_io->xio_xv, xv_next);
1737 xbd_io->xio_xv = NULL;
1738 xbd_io->xio_vaddr = -1;
1739 }
1740
1741 /* Obtain memory from a pool */
1742 static struct xbdback_io *
xbdback_io_get(struct xbdback_instance * xbdi)1743 xbdback_io_get(struct xbdback_instance *xbdi)
1744 {
1745 struct xbdback_io *xbd_io = SLIST_FIRST(&xbdi->xbdi_io_free);
1746 SLIST_REMOVE_HEAD(&xbdi->xbdi_io_free, xio_next);
1747 return xbd_io;
1748 }
1749
1750 /* Restore memory to a pool */
1751 static void
xbdback_io_put(struct xbdback_instance * xbdi,struct xbdback_io * xbd_io)1752 xbdback_io_put(struct xbdback_instance *xbdi, struct xbdback_io *xbd_io)
1753 {
1754 KASSERT(xbd_io->xio_xv == NULL);
1755 KASSERT(xbd_io != NULL);
1756 SLIST_INSERT_HEAD(&xbdi->xbdi_io_free, xbd_io, xio_next);
1757 }
1758
1759 /*
1760 * Trampoline routine. Calls continuations in a loop and only exits when
1761 * either the returned object or the next callback is NULL.
1762 */
1763 static void
xbdback_trampoline(struct xbdback_instance * xbdi,void * obj)1764 xbdback_trampoline(struct xbdback_instance *xbdi, void *obj)
1765 {
1766 xbdback_cont_t cont;
1767
1768 while(obj != NULL && xbdi->xbdi_cont != NULL) {
1769 KASSERT(xbdi->xbdi_cont_restart == NULL);
1770 KASSERT(xbdi->xbdi_cont_restart_obj == NULL);
1771 cont = xbdi->xbdi_cont;
1772 #ifdef DIAGNOSTIC
1773 xbdi->xbdi_cont = (xbdback_cont_t)0xDEADBEEF;
1774 #endif
1775 obj = (*cont)(xbdi, obj);
1776 #ifdef DIAGNOSTIC
1777 if (xbdi->xbdi_cont == (xbdback_cont_t)0xDEADBEEF) {
1778 printf("xbdback_trampoline: 0x%lx didn't set "
1779 "xbdi->xbdi_cont!\n", (long)cont);
1780 panic("xbdback_trampoline: bad continuation");
1781 }
1782 if (xbdi->xbdi_cont_restart != NULL ||
1783 xbdi->xbdi_cont_restart_obj != NULL) {
1784 KASSERT(xbdi->xbdi_cont_restart != NULL);
1785 KASSERT(xbdi->xbdi_cont_restart_obj != NULL);
1786 KASSERT(xbdi->xbdi_cont == NULL);
1787 KASSERT(obj == NULL);
1788 }
1789 #endif
1790 }
1791 }
1792