xref: /netbsd-src/sys/arch/xen/xen/xbd_xenbus.c (revision f82d7874c259b2a6cc59b714f844919f32bf7b51)
1 /*      $NetBSD: xbd_xenbus.c,v 1.27 2008/05/03 08:23:41 plunky Exp $      */
2 
3 /*
4  * Copyright (c) 2006 Manuel Bouyer.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. All advertising materials mentioning features or use of this software
15  *    must display the following acknowledgement:
16  *      This product includes software developed by Manuel Bouyer.
17  * 4. The name of the author may not be used to endorse or promote products
18  *    derived from this software without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  *
31  */
32 
33 #include <sys/cdefs.h>
34 __KERNEL_RCSID(0, "$NetBSD: xbd_xenbus.c,v 1.27 2008/05/03 08:23:41 plunky Exp $");
35 
36 #include "opt_xen.h"
37 #include "rnd.h"
38 
39 #include <sys/param.h>
40 #include <sys/buf.h>
41 #include <sys/bufq.h>
42 #include <sys/device.h>
43 #include <sys/disk.h>
44 #include <sys/disklabel.h>
45 #include <sys/conf.h>
46 #include <sys/fcntl.h>
47 #include <sys/kernel.h>
48 #include <sys/malloc.h>
49 #include <sys/proc.h>
50 #include <sys/systm.h>
51 #include <sys/stat.h>
52 #include <sys/vnode.h>
53 
54 #include <dev/dkvar.h>
55 
56 #include <uvm/uvm.h>
57 
58 #include <xen/xen3-public/io/ring.h>
59 #include <xen/xen3-public/io/blkif.h>
60 
61 #include <xen/granttables.h>
62 #include <xen/xenbus.h>
63 #include "locators.h"
64 
65 #undef XBD_DEBUG
66 #ifdef XBD_DEBUG
67 #define DPRINTF(x) printf x;
68 #else
69 #define DPRINTF(x)
70 #endif
71 
72 #define GRANT_INVALID_REF -1
73 
74 #define XBD_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE)
75 
76 #define XEN_BSHIFT      9               /* log2(XEN_BSIZE) */
77 #define XEN_BSIZE       (1 << XEN_BSHIFT)
78 
79 struct xbd_req {
80 	SLIST_ENTRY(xbd_req) req_next;
81 	uint16_t req_id; /* ID passed to backed */
82 	grant_ref_t req_gntref[BLKIF_MAX_SEGMENTS_PER_REQUEST];
83 	int req_nr_segments; /* number of segments in this request */
84 	struct buf *req_bp; /* buffer associated with this request */
85 	void *req_data; /* pointer to the data buffer */
86 };
87 
88 struct xbd_xenbus_softc {
89 	device_t sc_dev;
90 	struct dk_softc sc_dksc;
91 	struct dk_intf *sc_di;
92 	struct xenbus_device *sc_xbusd;
93 
94 	blkif_front_ring_t sc_ring;
95 
96 	unsigned int sc_evtchn;
97 
98 	grant_ref_t sc_ring_gntref;
99 
100 	struct xbd_req sc_reqs[XBD_RING_SIZE];
101 	SLIST_HEAD(,xbd_req) sc_xbdreq_head; /* list of free requests */
102 
103 	int sc_backend_status; /* our status with backend */
104 #define BLKIF_STATE_DISCONNECTED 0
105 #define BLKIF_STATE_CONNECTED    1
106 #define BLKIF_STATE_SUSPENDED    2
107 	int sc_shutdown;
108 
109 	uint64_t sc_sectors; /* number of sectors for this device */
110 	u_long sc_secsize; /* sector size */
111 	uint64_t sc_xbdsize; /* size of disk in DEV_BSIZE */
112 	u_long sc_info; /* VDISK_* */
113 	u_long sc_handle; /* from backend */
114 };
115 
116 #if 0
117 /* too big to be on stack */
118 static multicall_entry_t rq_mcl[XBD_RING_SIZE+1];
119 static paddr_t rq_pages[XBD_RING_SIZE];
120 #endif
121 
122 static int  xbd_xenbus_match(device_t, cfdata_t, void *);
123 static void xbd_xenbus_attach(device_t, device_t, void *);
124 static int  xbd_xenbus_detach(device_t, int);
125 
126 static int  xbd_xenbus_resume(void *);
127 static int  xbd_handler(void *);
128 static int  xbdstart(struct dk_softc *, struct buf *);
129 static void xbd_backend_changed(void *, XenbusState);
130 static void xbd_connect(struct xbd_xenbus_softc *);
131 
132 static int  xbd_map_align(struct xbd_req *);
133 static void xbd_unmap_align(struct xbd_req *);
134 
135 CFATTACH_DECL_NEW(xbd_xenbus, sizeof(struct xbd_xenbus_softc),
136    xbd_xenbus_match, xbd_xenbus_attach, xbd_xenbus_detach, NULL);
137 
138 dev_type_open(xbdopen);
139 dev_type_close(xbdclose);
140 dev_type_read(xbdread);
141 dev_type_write(xbdwrite);
142 dev_type_ioctl(xbdioctl);
143 dev_type_strategy(xbdstrategy);
144 dev_type_dump(xbddump);
145 dev_type_size(xbdsize);
146 
147 const struct bdevsw xbd_bdevsw = {
148 	xbdopen, xbdclose, xbdstrategy, xbdioctl,
149 	xbddump, xbdsize, D_DISK
150 };
151 
152 const struct cdevsw xbd_cdevsw = {
153 	xbdopen, xbdclose, xbdread, xbdwrite, xbdioctl,
154 	nostop, notty, nopoll, nommap, nokqfilter, D_DISK
155 };
156 
157 extern struct cfdriver xbd_cd;
158 
159 /* Pseudo-disk Interface */
160 static struct dk_intf dkintf_esdi = {
161         DTYPE_ESDI,
162 	"Xen Virtual ESDI",
163 	xbdopen,
164 	xbdclose,
165 	xbdstrategy,
166 	xbdstart,
167 };
168 
169 static struct dkdriver xbddkdriver = {
170         .d_strategy = xbdstrategy,
171 	.d_minphys = minphys,
172 };
173 
174 static int
175 xbd_xenbus_match(device_t parent, cfdata_t match, void *aux)
176 {
177 	struct xenbusdev_attach_args *xa = aux;
178 
179 	if (strcmp(xa->xa_type, "vbd") != 0)
180 		return 0;
181 
182 	if (match->cf_loc[XENBUSCF_ID] != XENBUSCF_ID_DEFAULT &&
183 	    match->cf_loc[XENBUSCF_ID] != xa->xa_id)
184 		return 0;
185 
186 	return 1;
187 }
188 
189 static void
190 xbd_xenbus_attach(device_t parent, device_t self, void *aux)
191 {
192 	struct xbd_xenbus_softc *sc = device_private(self);
193 	struct xenbusdev_attach_args *xa = aux;
194 	RING_IDX i;
195 #ifdef XBD_DEBUG
196 	char **dir, *val;
197 	int dir_n = 0;
198 	char id_str[20];
199 	int err;
200 #endif
201 
202 	config_pending_incr();
203 	printf(": Xen Virtual Block Device Interface\n");
204 
205 	sc->sc_dev = self;
206 
207 #ifdef XBD_DEBUG
208 	printf("path: %s\n", xa->xa_xbusd->xbusd_path);
209 	snprintf(id_str, sizeof(id_str), "%d", xa->xa_id);
210 	err = xenbus_directory(NULL, "device/vbd", id_str, &dir_n, &dir);
211 	if (err) {
212 		aprint_error_dev(self, "xenbus_directory err %d\n", err);
213 	} else {
214 		printf("%s/\n", xa->xa_xbusd->xbusd_path);
215 		for (i = 0; i < dir_n; i++) {
216 			printf("\t/%s", dir[i]);
217 			err = xenbus_read(NULL, xa->xa_xbusd->xbusd_path, dir[i],
218 			    NULL, &val);
219 			if (err) {
220 				aprint_error_dev(self, "xenbus_read err %d\n", err);
221 			} else {
222 				printf(" = %s\n", val);
223 				free(val, M_DEVBUF);
224 			}
225 		}
226 	}
227 #endif /* XBD_DEBUG */
228 	sc->sc_xbusd = xa->xa_xbusd;
229 	sc->sc_xbusd->xbusd_otherend_changed = xbd_backend_changed;
230 
231 	dk_sc_init(&sc->sc_dksc, sc, device_xname(self));
232 	disk_init(&sc->sc_dksc.sc_dkdev, device_xname(self), &xbddkdriver);
233 	sc->sc_di = &dkintf_esdi;
234 	/* initialize free requests list */
235 	SLIST_INIT(&sc->sc_xbdreq_head);
236 	for (i = 0; i < XBD_RING_SIZE; i++) {
237 		sc->sc_reqs[i].req_id = i;
238 		SLIST_INSERT_HEAD(&sc->sc_xbdreq_head, &sc->sc_reqs[i],
239 		    req_next);
240 	}
241 
242 	sc->sc_backend_status = BLKIF_STATE_DISCONNECTED;
243 	sc->sc_shutdown = 1;
244 	/* initialise shared structures and tell backend that we are ready */
245 	xbd_xenbus_resume(sc);
246 }
247 
248 static int
249 xbd_xenbus_detach(device_t dev, int flags)
250 {
251 	struct xbd_xenbus_softc *sc = device_private(dev);
252 	int s, bmaj, cmaj, i, mn;
253 	s = splbio();
254 	DPRINTF(("%s: xbd_detach\n", device_xname(dev)));
255 	if (sc->sc_shutdown == 0) {
256 		sc->sc_shutdown = 1;
257 		/* wait for requests to complete */
258 		while (sc->sc_backend_status == BLKIF_STATE_CONNECTED &&
259 		    sc->sc_dksc.sc_dkdev.dk_stats->io_busy > 0)
260 			tsleep(xbd_xenbus_detach, PRIBIO, "xbddetach", hz/2);
261 	}
262 	splx(s);
263 
264 	/* locate the major number */
265 	bmaj = bdevsw_lookup_major(&xbd_bdevsw);
266 	cmaj = cdevsw_lookup_major(&xbd_cdevsw);
267 
268 	/* Nuke the vnodes for any open instances. */
269 	for (i = 0; i < MAXPARTITIONS; i++) {
270 		mn = DISKMINOR(device_unit(dev), i);
271 		vdevgone(bmaj, mn, mn, VBLK);
272 		vdevgone(cmaj, mn, mn, VCHR);
273 	}
274 	if (sc->sc_backend_status == BLKIF_STATE_CONNECTED) {
275 		/* Delete all of our wedges. */
276 		dkwedge_delall(&sc->sc_dksc.sc_dkdev);
277 
278 		s = splbio();
279 		/* Kill off any queued buffers. */
280 		bufq_drain(sc->sc_dksc.sc_bufq);
281 		bufq_free(sc->sc_dksc.sc_bufq);
282 		splx(s);
283 
284 		/* detach disk */
285 		disk_detach(&sc->sc_dksc.sc_dkdev);
286 		disk_destroy(&sc->sc_dksc.sc_dkdev);
287 	}
288 
289 	event_remove_handler(sc->sc_evtchn, &xbd_handler, sc);
290 	while (xengnt_status(sc->sc_ring_gntref)) {
291 		tsleep(xbd_xenbus_detach, PRIBIO, "xbd_ref", hz/2);
292 	}
293 	xengnt_revoke_access(sc->sc_ring_gntref);
294 	uvm_km_free(kernel_map, (vaddr_t)sc->sc_ring.sring,
295 	    PAGE_SIZE, UVM_KMF_WIRED);
296 	return 0;
297 }
298 
299 static int
300 xbd_xenbus_resume(void *p)
301 {
302 	struct xbd_xenbus_softc *sc = p;
303 	struct xenbus_transaction *xbt;
304 	int error;
305 	blkif_sring_t *ring;
306 	paddr_t ma;
307 	const char *errmsg;
308 
309 	sc->sc_ring_gntref = GRANT_INVALID_REF;
310 
311 
312 	/* setup device: alloc event channel and shared ring */
313 	ring = (void *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
314 		UVM_KMF_ZERO | UVM_KMF_WIRED);
315 	if (ring == NULL)
316 		panic("xbd_xenbus_resume: can't alloc rings");
317 
318 	SHARED_RING_INIT(ring);
319 	FRONT_RING_INIT(&sc->sc_ring, ring, PAGE_SIZE);
320 
321 	(void)pmap_extract_ma(pmap_kernel(), (vaddr_t)ring, &ma);
322 	error = xenbus_grant_ring(sc->sc_xbusd, ma, &sc->sc_ring_gntref);
323 	if (error)
324 		return error;
325 	error = xenbus_alloc_evtchn(sc->sc_xbusd, &sc->sc_evtchn);
326 	if (error)
327 		return error;
328 	aprint_verbose_dev(sc->sc_dev, "using event channel %d\n",
329 	    sc->sc_evtchn);
330 	event_set_handler(sc->sc_evtchn, &xbd_handler, sc,
331 	    IPL_BIO, device_xname(sc->sc_dev));
332 
333 again:
334 	xbt = xenbus_transaction_start();
335 	if (xbt == NULL)
336 		return ENOMEM;
337 	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
338 	    "ring-ref","%u", sc->sc_ring_gntref);
339 	if (error) {
340 		errmsg = "writing ring-ref";
341 		goto abort_transaction;
342 	}
343 	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
344 	    "event-channel", "%u", sc->sc_evtchn);
345 	if (error) {
346 		errmsg = "writing event channel";
347 		goto abort_transaction;
348 	}
349 	error = xenbus_switch_state(sc->sc_xbusd, xbt, XenbusStateInitialised);
350 	if (error) {
351 		errmsg = "writing frontend XenbusStateInitialised";
352 		goto abort_transaction;
353 	}
354 	error = xenbus_transaction_end(xbt, 0);
355 	if (error == EAGAIN)
356 		goto again;
357 	if (error) {
358 		xenbus_dev_fatal(sc->sc_xbusd, error, "completing transaction");
359 		return -1;
360 	}
361 	return 0;
362 
363 abort_transaction:
364 	xenbus_transaction_end(xbt, 1);
365 	xenbus_dev_fatal(sc->sc_xbusd, error, "%s", errmsg);
366 	return error;
367 }
368 
369 static void xbd_backend_changed(void *arg, XenbusState new_state)
370 {
371 	struct xbd_xenbus_softc *sc = device_private((device_t)arg);
372 	struct dk_geom *pdg;
373 	char buf[9];
374 	int s;
375 	DPRINTF(("%s: new backend state %d\n", device_xname(sc->sc_dev), new_state));
376 
377 	switch (new_state) {
378 	case XenbusStateUnknown:
379 	case XenbusStateInitialising:
380 	case XenbusStateInitWait:
381 	case XenbusStateInitialised:
382 		break;
383 	case XenbusStateClosing:
384 		s = splbio();
385 		sc->sc_shutdown = 1;
386 		/* wait for requests to complete */
387 		while (sc->sc_backend_status == BLKIF_STATE_CONNECTED &&
388 		    sc->sc_dksc.sc_dkdev.dk_stats->io_busy > 0)
389 			tsleep(xbd_xenbus_detach, PRIBIO, "xbddetach",
390 			    hz/2);
391 		splx(s);
392 		xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateClosed);
393 		break;
394 	case XenbusStateConnected:
395 		/*
396 		 * note that xbd_backend_changed() can only be called by
397 		 * the xenbus thread.
398 		 */
399 
400 		if (sc->sc_backend_status == BLKIF_STATE_CONNECTED)
401 			/* already connected */
402 			return;
403 
404 		xbd_connect(sc);
405 		sc->sc_shutdown = 0;
406 		hypervisor_enable_event(sc->sc_evtchn);
407 
408 		sc->sc_xbdsize =
409 		    sc->sc_sectors * (uint64_t)sc->sc_secsize / DEV_BSIZE;
410 		sc->sc_dksc.sc_size = sc->sc_xbdsize;
411 		pdg = &sc->sc_dksc.sc_geom;
412 		pdg->pdg_secsize = DEV_BSIZE;
413 		pdg->pdg_ntracks = 1;
414 		pdg->pdg_nsectors = 1024 * (1024 / pdg->pdg_secsize);
415 		pdg->pdg_ncylinders = sc->sc_dksc.sc_size / pdg->pdg_nsectors;
416 
417 		bufq_alloc(&sc->sc_dksc.sc_bufq, "fcfs", 0);
418 		sc->sc_dksc.sc_flags |= DKF_INITED;
419 		disk_attach(&sc->sc_dksc.sc_dkdev);
420 
421 		sc->sc_backend_status = BLKIF_STATE_CONNECTED;
422 
423 		/* try to read the disklabel */
424 		dk_getdisklabel(sc->sc_di, &sc->sc_dksc, 0 /* XXX ? */);
425 		format_bytes(buf, sizeof(buf), sc->sc_sectors * sc->sc_secsize);
426 		printf("%s: %s, %d bytes/sect x %" PRIu64 " sectors\n",
427 		    device_xname(sc->sc_dev), buf, (int)pdg->pdg_secsize,
428 		    sc->sc_xbdsize);
429 		/* Discover wedges on this disk. */
430 		dkwedge_discover(&sc->sc_dksc.sc_dkdev);
431 
432 		/* the disk should be working now */
433 		config_pending_decr();
434 		break;
435 	default:
436 		panic("bad backend state %d", new_state);
437 	}
438 }
439 
440 static void
441 xbd_connect(struct xbd_xenbus_softc *sc)
442 {
443 	int err;
444 	unsigned long long sectors;
445 
446 	err = xenbus_read_ul(NULL,
447 	    sc->sc_xbusd->xbusd_path, "virtual-device", &sc->sc_handle, 10);
448 	if (err)
449 		panic("%s: can't read number from %s/virtual-device\n",
450 		    device_xname(sc->sc_dev), sc->sc_xbusd->xbusd_otherend);
451 	err = xenbus_read_ull(NULL,
452 	    sc->sc_xbusd->xbusd_otherend, "sectors", &sectors, 10);
453 	if (err)
454 		panic("%s: can't read number from %s/sectors\n",
455 		    device_xname(sc->sc_dev), sc->sc_xbusd->xbusd_otherend);
456 	sc->sc_sectors = sectors;
457 
458 	err = xenbus_read_ul(NULL,
459 	    sc->sc_xbusd->xbusd_otherend, "info", &sc->sc_info, 10);
460 	if (err)
461 		panic("%s: can't read number from %s/info\n",
462 		    device_xname(sc->sc_dev), sc->sc_xbusd->xbusd_otherend);
463 	err = xenbus_read_ul(NULL,
464 	    sc->sc_xbusd->xbusd_otherend, "sector-size", &sc->sc_secsize, 10);
465 	if (err)
466 		panic("%s: can't read number from %s/sector-size\n",
467 		    device_xname(sc->sc_dev), sc->sc_xbusd->xbusd_otherend);
468 
469 	xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateConnected);
470 }
471 
472 static int
473 xbd_handler(void *arg)
474 {
475 	struct xbd_xenbus_softc *sc = arg;
476 	struct buf *bp;
477 	RING_IDX resp_prod, i;
478 	int more_to_do;
479 	int seg;
480 
481 	DPRINTF(("xbd_handler(%s)\n", device_xname(sc->sc_dev)));
482 
483 	if (__predict_false(sc->sc_backend_status != BLKIF_STATE_CONNECTED))
484 		return 0;
485 again:
486 	resp_prod = sc->sc_ring.sring->rsp_prod;
487 	x86_lfence(); /* ensure we see replies up to resp_prod */
488 	for (i = sc->sc_ring.rsp_cons; i != resp_prod; i++) {
489 		blkif_response_t *rep = RING_GET_RESPONSE(&sc->sc_ring, i);
490 		struct xbd_req *xbdreq = &sc->sc_reqs[rep->id];
491 		bp = xbdreq->req_bp;
492 		DPRINTF(("xbd_handler(%p): b_bcount = %ld\n",
493 		    bp, (long)bp->b_bcount));
494 		for (seg = xbdreq->req_nr_segments - 1; seg >= 0; seg--) {
495 			if (__predict_false(
496 			    xengnt_status(xbdreq->req_gntref[seg]))) {
497 				printf("%s: grant still used by backend\n",
498 				    device_xname(sc->sc_dev));
499 				sc->sc_ring.rsp_cons = i;
500 				xbdreq->req_nr_segments = seg + 1;
501 				goto done;
502 			}
503 			xengnt_revoke_access(
504 			    xbdreq->req_gntref[seg]);
505 			xbdreq->req_nr_segments--;
506 		}
507 		if (rep->operation != BLKIF_OP_READ &&
508 		    rep->operation != BLKIF_OP_WRITE) {
509 			printf("%s: bad operation %d from backend\n",
510 			     device_xname(sc->sc_dev), rep->operation);
511 				bp->b_error = EIO;
512 				bp->b_resid = bp->b_bcount;
513 				goto next;
514 		}
515 		if (rep->status != BLKIF_RSP_OKAY) {
516 				bp->b_error = EIO;
517 				bp->b_resid = bp->b_bcount;
518 				goto next;
519 		}
520 		/* b_resid was set in xbdstart */
521 next:
522 		if (bp->b_data != xbdreq->req_data)
523 			xbd_unmap_align(xbdreq);
524 		disk_unbusy(&sc->sc_dksc.sc_dkdev,
525 		    (bp->b_bcount - bp->b_resid),
526 		    (bp->b_flags & B_READ));
527 		biodone(bp);
528 		SLIST_INSERT_HEAD(&sc->sc_xbdreq_head, xbdreq, req_next);
529 	}
530 	x86_lfence();
531 	sc->sc_ring.rsp_cons = i;
532 	RING_FINAL_CHECK_FOR_RESPONSES(&sc->sc_ring, more_to_do);
533 	if (more_to_do)
534 		goto again;
535 done:
536 	dk_iodone(sc->sc_di, &sc->sc_dksc);
537 	return 1;
538 }
539 
540 int
541 xbdopen(dev_t dev, int flags, int fmt, struct lwp *l)
542 {
543 	struct	xbd_xenbus_softc *sc;
544 
545 	sc = device_lookup_private(&xbd_cd, DISKUNIT(dev));
546 	if (sc == NULL)
547 		return (ENXIO);
548 	if ((flags & FWRITE) && (sc->sc_info & VDISK_READONLY))
549 		return EROFS;
550 
551 	DPRINTF(("xbdopen(0x%04x, %d)\n", dev, flags));
552 	return dk_open(sc->sc_di, &sc->sc_dksc, dev, flags, fmt, l);
553 }
554 
555 int
556 xbdclose(dev_t dev, int flags, int fmt, struct lwp *l)
557 {
558 	struct xbd_xenbus_softc *sc;
559 
560 	sc = device_lookup_private(&xbd_cd, DISKUNIT(dev));
561 
562 	DPRINTF(("xbdclose(%d, %d)\n", dev, flags));
563 	return dk_close(sc->sc_di, &sc->sc_dksc, dev, flags, fmt, l);
564 }
565 
566 void
567 xbdstrategy(struct buf *bp)
568 {
569 	struct xbd_xenbus_softc *sc;
570 
571 	sc = device_lookup_private(&xbd_cd, DISKUNIT(bp->b_dev));
572 
573 	DPRINTF(("xbdstrategy(%p): b_bcount = %ld\n", bp,
574 	    (long)bp->b_bcount));
575 
576 	if (sc == NULL || sc->sc_shutdown) {
577 		bp->b_error = EIO;
578 		biodone(bp);
579 		return;
580 	}
581 	if (__predict_false((sc->sc_info & VDISK_READONLY) &&
582 	    (bp->b_flags & B_READ) == 0)) {
583 		bp->b_error = EROFS;
584 		biodone(bp);
585 		return;
586 	}
587 
588 	dk_strategy(sc->sc_di, &sc->sc_dksc, bp);
589 	return;
590 }
591 
592 int
593 xbdsize(dev_t dev)
594 {
595 	struct	xbd_xenbus_softc *sc;
596 
597 	DPRINTF(("xbdsize(%d)\n", dev));
598 
599 	sc = device_lookup_private(&xbd_cd, DISKUNIT(dev));
600 	if (sc == NULL || sc->sc_shutdown)
601 		return -1;
602 	return dk_size(sc->sc_di, &sc->sc_dksc, dev);
603 }
604 
605 int
606 xbdread(dev_t dev, struct uio *uio, int flags)
607 {
608 	struct xbd_xenbus_softc *sc = device_lookup_private(&xbd_cd, DISKUNIT(dev));
609 	struct  dk_softc *dksc = &sc->sc_dksc;
610 
611 	if ((dksc->sc_flags & DKF_INITED) == 0)
612 		return ENXIO;
613 	return physio(xbdstrategy, NULL, dev, B_READ, minphys, uio);
614 }
615 
616 int
617 xbdwrite(dev_t dev, struct uio *uio, int flags)
618 {
619 	struct xbd_xenbus_softc *sc = device_lookup_private(&xbd_cd, DISKUNIT(dev));
620 	struct  dk_softc *dksc = &sc->sc_dksc;
621 
622 	if ((dksc->sc_flags & DKF_INITED) == 0)
623 		return ENXIO;
624 	if (__predict_false(sc->sc_info & VDISK_READONLY))
625 		return EROFS;
626 	return physio(xbdstrategy, NULL, dev, B_WRITE, minphys, uio);
627 }
628 
629 int
630 xbdioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
631 {
632 	struct xbd_xenbus_softc *sc = device_lookup_private(&xbd_cd, DISKUNIT(dev));
633 	struct	dk_softc *dksc;
634 	int	error;
635 	struct	disk *dk;
636 
637 	DPRINTF(("xbdioctl(%d, %08lx, %p, %d, %p)\n",
638 	    dev, cmd, data, flag, l));
639 	dksc = &sc->sc_dksc;
640 	dk = &dksc->sc_dkdev;
641 
642 	switch (cmd) {
643 	case DIOCSSTRATEGY:
644 		error = EOPNOTSUPP;
645 		break;
646 	default:
647 		error = dk_ioctl(sc->sc_di, dksc, dev, cmd, data, flag, l);
648 		break;
649 	}
650 
651 	return error;
652 }
653 
654 int
655 xbddump(dev_t dev, daddr_t blkno, void *va, size_t size)
656 {
657 	struct xbd_xenbus_softc *sc = device_lookup_private(&xbd_cd, DISKUNIT(dev));
658 
659 	if (DISKUNIT(dev) > xbd_cd.cd_ndevs)
660 		return (ENXIO);
661 	sc = xbd_cd.cd_devs[DISKUNIT(dev)];
662 	if (sc == NULL)
663 		return (ENXIO);
664 
665 	DPRINTF(("xbddump(%d, %" PRId64 ", %p, %lu)\n", dev, blkno, va,
666 	    (unsigned long)size));
667 	return dk_dump(sc->sc_di, &sc->sc_dksc, dev, blkno, va, size);
668 }
669 
670 static int
671 xbdstart(struct dk_softc *dksc, struct buf *bp)
672 {
673 	struct xbd_xenbus_softc *sc;
674 	struct xbd_req *xbdreq;
675 	blkif_request_t *req;
676 	int ret = 0, runqueue = 1;
677 	size_t bcount, off;
678 	paddr_t ma;
679 	vaddr_t va;
680 	int nsects, nbytes, seg;
681 	int notify;
682 
683 	DPRINTF(("xbdstart(%p): b_bcount = %ld\n", bp, (long)bp->b_bcount));
684 
685 	sc = device_lookup_private(&xbd_cd, DISKUNIT(bp->b_dev));
686 	if (sc == NULL || sc->sc_shutdown) {
687 		bp->b_error = EIO;
688 		goto err;
689 	}
690 
691 	if (bp->b_rawblkno < 0 || bp->b_rawblkno > sc->sc_xbdsize) {
692 		/* invalid block number */
693 		bp->b_error = EINVAL;
694 		goto err;
695 	}
696 
697 	if (bp->b_rawblkno == sc->sc_xbdsize) {
698 		/* at end of disk; return short read */
699 		bp->b_resid = bp->b_bcount;
700 		biodone(bp);
701 		return 0;
702 	}
703 
704 
705 	if (RING_FULL(&sc->sc_ring)) {
706 		DPRINTF(("xbdstart: ring_full\n"));
707 		ret = -1;
708 		goto out;
709 	}
710 
711 	dksc = &sc->sc_dksc;
712 
713 	xbdreq = SLIST_FIRST(&sc->sc_xbdreq_head);
714 	if (__predict_false(xbdreq == NULL)) {
715 		DPRINTF(("xbdstart: no req\n"));
716 		ret = -1; /* dk_start should not remove bp from queue */
717 		goto out;
718 	}
719 
720 	xbdreq->req_bp = bp;
721 	xbdreq->req_data = bp->b_data;
722 	if ((vaddr_t)bp->b_data & (XEN_BSIZE - 1)) {
723 		if (__predict_false(xbd_map_align(xbdreq) != 0)) {
724 			ret = -1;
725 			goto out;
726 		}
727 	}
728 	/* now we're sure we'll send this buf */
729 	disk_busy(&dksc->sc_dkdev);
730 	SLIST_REMOVE_HEAD(&sc->sc_xbdreq_head, req_next);
731 	req = RING_GET_REQUEST(&sc->sc_ring, sc->sc_ring.req_prod_pvt);
732 	req->id = xbdreq->req_id;
733 	req->operation = bp->b_flags & B_READ ? BLKIF_OP_READ : BLKIF_OP_WRITE;
734 	req->sector_number = bp->b_rawblkno;
735 	req->handle = sc->sc_handle;
736 
737 	va = (vaddr_t)xbdreq->req_data & ~PAGE_MASK;
738 	off = (vaddr_t)xbdreq->req_data & PAGE_MASK;
739 	if (bp->b_rawblkno + bp->b_bcount / DEV_BSIZE >= sc->sc_xbdsize) {
740 		bcount = (sc->sc_xbdsize - bp->b_rawblkno) * DEV_BSIZE;
741 		bp->b_resid = bp->b_bcount - bcount;
742 	} else {
743 		bcount = bp->b_bcount;
744 		bp->b_resid = 0;
745 	}
746 	for (seg = 0, bcount = bp->b_bcount; bcount > 0;) {
747 		pmap_extract_ma(pmap_kernel(), va, &ma);
748 		KASSERT((ma & (XEN_BSIZE - 1)) == 0);
749 		if (bcount > PAGE_SIZE - off)
750 			nbytes = PAGE_SIZE - off;
751 		else
752 			nbytes = bcount;
753 		nsects = nbytes >> XEN_BSHIFT;
754 		req->seg[seg].first_sect = off >> XEN_BSHIFT;
755 		req->seg[seg].last_sect = (off >> XEN_BSHIFT) + nsects - 1;
756 		KASSERT(req->seg[seg].first_sect <= req->seg[seg].last_sect);
757 		KASSERT(req->seg[seg].last_sect < 8);
758 		if (__predict_false(xengnt_grant_access(
759 		    sc->sc_xbusd->xbusd_otherend_id, ma,
760 		    (bp->b_flags & B_READ) == 0, &xbdreq->req_gntref[seg])))
761 			panic("xbdstart: xengnt_grant_access"); /* XXX XXX !!! */
762 		req->seg[seg].gref = xbdreq->req_gntref[seg];
763 		seg++;
764 		KASSERT(seg <= BLKIF_MAX_SEGMENTS_PER_REQUEST);
765 		va += PAGE_SIZE;
766 		off = 0;
767 		bcount -= nbytes;
768 	}
769 	xbdreq->req_nr_segments = req->nr_segments = seg;
770 	sc->sc_ring.req_prod_pvt++;
771 	if (BUFQ_PEEK(sc->sc_dksc.sc_bufq)) {
772 		 /* we will be called again; don't notify guest yet */
773 		runqueue = 0;
774 	}
775 
776 out:
777 	if (runqueue) {
778 		RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_ring, notify);
779 		if (notify)
780 			hypervisor_notify_via_evtchn(sc->sc_evtchn);
781 	}
782 	return ret;
783 
784 err:
785 	bp->b_resid = bp->b_bcount;
786 	biodone(bp);
787 	return 0;
788 }
789 
790 static int
791 xbd_map_align(struct xbd_req *req)
792 {
793 	int s = splvm();
794 
795 	req->req_data = (void *)uvm_km_alloc(kmem_map, req->req_bp->b_bcount,
796 	    PAGE_SIZE, UVM_KMF_WIRED);
797 	splx(s);
798 	if (__predict_false(req->req_data == NULL))
799 		return ENOMEM;
800 	if ((req->req_bp->b_flags & B_READ) == 0)
801 		memcpy(req->req_data, req->req_bp->b_data,
802 		    req->req_bp->b_bcount);
803 	return 0;
804 }
805 
806 static void
807 xbd_unmap_align(struct xbd_req *req)
808 {
809 	int s;
810 	if (req->req_bp->b_flags & B_READ)
811 		memcpy(req->req_bp->b_data, req->req_data,
812 		    req->req_bp->b_bcount);
813 	s = splvm();
814 	uvm_km_free(kmem_map, (vaddr_t)req->req_data, req->req_bp->b_bcount,
815 	    UVM_KMF_WIRED);
816 	splx(s);
817 }
818