xref: /openbsd-src/sys/dev/pv/xen.c (revision f763167468dba5339ed4b14b7ecaca2a397ab0f6)
1 /*	$OpenBSD: xen.c,v 1.90 2017/08/10 20:13:57 mikeb Exp $	*/
2 
3 /*
4  * Copyright (c) 2015, 2016, 2017 Mike Belopuhov
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/param.h>
20 
21 /* Xen requires locked atomic operations */
22 #ifndef MULTIPROCESSOR
23 #define _XENMPATOMICS
24 #define MULTIPROCESSOR
25 #endif
26 #include <sys/atomic.h>
27 #ifdef _XENMPATOMICS
28 #undef MULTIPROCESSOR
29 #undef _XENMPATOMICS
30 #endif
31 
32 #include <sys/systm.h>
33 #include <sys/proc.h>
34 #include <sys/signal.h>
35 #include <sys/signalvar.h>
36 #include <sys/refcnt.h>
37 #include <sys/malloc.h>
38 #include <sys/kernel.h>
39 #include <sys/stdint.h>
40 #include <sys/device.h>
41 #include <sys/task.h>
42 #include <sys/syslog.h>
43 
44 #include <machine/bus.h>
45 #include <machine/cpu.h>
46 #include <machine/cpufunc.h>
47 
48 #include <uvm/uvm_extern.h>
49 
50 #include <machine/i82489var.h>
51 
52 #include <dev/rndvar.h>
53 
54 #include <dev/pv/pvvar.h>
55 #include <dev/pv/pvreg.h>
56 #include <dev/pv/xenreg.h>
57 #include <dev/pv/xenvar.h>
58 
59 /* #define XEN_DEBUG */
60 
61 #ifdef XEN_DEBUG
62 #define DPRINTF(x...)		printf(x)
63 #else
64 #define DPRINTF(x...)
65 #endif
66 
67 struct xen_softc *xen_sc;
68 
69 int	xen_init_hypercall(struct xen_softc *);
70 int	xen_getfeatures(struct xen_softc *);
71 int	xen_init_info_page(struct xen_softc *);
72 int	xen_init_cbvec(struct xen_softc *);
73 int	xen_init_interrupts(struct xen_softc *);
74 void	xen_intr_dispatch(void *);
75 int	xen_init_grant_tables(struct xen_softc *);
76 struct xen_gntent *
77 	xen_grant_table_grow(struct xen_softc *);
78 int	xen_grant_table_alloc(struct xen_softc *, grant_ref_t *);
79 void	xen_grant_table_free(struct xen_softc *, grant_ref_t);
80 void	xen_grant_table_enter(struct xen_softc *, grant_ref_t, paddr_t,
81 	    int, int);
82 void	xen_grant_table_remove(struct xen_softc *, grant_ref_t);
83 void	xen_disable_emulated_devices(struct xen_softc *);
84 
85 int 	xen_match(struct device *, void *, void *);
86 void	xen_attach(struct device *, struct device *, void *);
87 void	xen_deferred(struct device *);
88 void	xen_control(void *);
89 void	xen_hotplug(void *);
90 void	xen_resume(struct device *);
91 int	xen_activate(struct device *, int);
92 int	xen_attach_device(struct xen_softc *, struct xen_devlist *,
93 	    const char *, const char *);
94 int	xen_probe_devices(struct xen_softc *);
95 
96 int	xen_bus_dmamap_create(bus_dma_tag_t, bus_size_t, int, bus_size_t,
97 	    bus_size_t, int, bus_dmamap_t *);
98 void	xen_bus_dmamap_destroy(bus_dma_tag_t, bus_dmamap_t);
99 int	xen_bus_dmamap_load(bus_dma_tag_t, bus_dmamap_t, void *, bus_size_t,
100 	    struct proc *, int);
101 int	xen_bus_dmamap_load_mbuf(bus_dma_tag_t, bus_dmamap_t, struct mbuf *,
102 	    int);
103 void	xen_bus_dmamap_unload(bus_dma_tag_t, bus_dmamap_t);
104 void	xen_bus_dmamap_sync(bus_dma_tag_t, bus_dmamap_t, bus_addr_t,
105 	    bus_size_t, int);
106 
107 int	xs_attach(struct xen_softc *);
108 
109 struct cfdriver xen_cd = {
110 	NULL, "xen", DV_DULL
111 };
112 
113 const struct cfattach xen_ca = {
114 	sizeof(struct xen_softc), xen_match, xen_attach, NULL, xen_activate
115 };
116 
117 struct bus_dma_tag xen_bus_dma_tag = {
118 	NULL,
119 	xen_bus_dmamap_create,
120 	xen_bus_dmamap_destroy,
121 	xen_bus_dmamap_load,
122 	xen_bus_dmamap_load_mbuf,
123 	NULL,
124 	NULL,
125 	xen_bus_dmamap_unload,
126 	xen_bus_dmamap_sync,
127 	_bus_dmamem_alloc,
128 	NULL,
129 	_bus_dmamem_free,
130 	_bus_dmamem_map,
131 	_bus_dmamem_unmap,
132 	NULL,
133 };
134 
135 int
136 xen_match(struct device *parent, void *match, void *aux)
137 {
138 	struct pv_attach_args *pva = aux;
139 	struct pvbus_hv *hv = &pva->pva_hv[PVBUS_XEN];
140 
141 	if (hv->hv_base == 0)
142 		return (0);
143 
144 	return (1);
145 }
146 
147 void
148 xen_attach(struct device *parent, struct device *self, void *aux)
149 {
150 	struct pv_attach_args *pva = (struct pv_attach_args *)aux;
151 	struct pvbus_hv *hv = &pva->pva_hv[PVBUS_XEN];
152 	struct xen_softc *sc = (struct xen_softc *)self;
153 
154 	sc->sc_base = hv->hv_base;
155 	sc->sc_dmat = pva->pva_dmat;
156 
157 	if (xen_init_hypercall(sc))
158 		return;
159 
160 	/* Wire it up to the global */
161 	xen_sc = sc;
162 
163 	if (xen_getfeatures(sc))
164 		return;
165 
166 	if (xen_init_info_page(sc))
167 		return;
168 
169 	xen_init_cbvec(sc);
170 
171 	if (xen_init_interrupts(sc))
172 		return;
173 
174 	if (xen_init_grant_tables(sc))
175 		return;
176 
177 	if (xs_attach(sc))
178 		return;
179 
180 	xen_probe_devices(sc);
181 
182 	/* pvbus(4) key/value interface */
183 	hv->hv_kvop = xs_kvop;
184 	hv->hv_arg = sc;
185 
186 	xen_disable_emulated_devices(sc);
187 
188 	config_mountroot(self, xen_deferred);
189 }
190 
191 void
192 xen_deferred(struct device *self)
193 {
194 	struct xen_softc *sc = (struct xen_softc *)self;
195 
196 	if (!(sc->sc_flags & XSF_CBVEC)) {
197 		DPRINTF("%s: callback vector hasn't been established\n",
198 		    sc->sc_dev.dv_xname);
199 		return;
200 	}
201 
202 	xen_intr_enable();
203 
204 	if (xs_watch(sc, "control", "shutdown", &sc->sc_ctltsk,
205 	    xen_control, sc))
206 		printf("%s: failed to setup shutdown control watch\n",
207 		    sc->sc_dev.dv_xname);
208 }
209 
210 void
211 xen_control(void *arg)
212 {
213 	struct xen_softc *sc = arg;
214 	struct xs_transaction xst;
215 	char action[128];
216 	int error;
217 
218 	memset(&xst, 0, sizeof(xst));
219 	xst.xst_id = 0;
220 	xst.xst_cookie = sc->sc_xs;
221 
222 	error = xs_getprop(sc, "control", "shutdown", action, sizeof(action));
223 	if (error) {
224 		if (error != ENOENT)
225 			printf("%s: failed to process control event\n",
226 			    sc->sc_dev.dv_xname);
227 		return;
228 	}
229 
230 	if (strlen(action) == 0)
231 		return;
232 
233 	/* Acknowledge the event */
234 	xs_setprop(sc, "control", "shutdown", "", 0);
235 
236 	if (strcmp(action, "halt") == 0 || strcmp(action, "poweroff") == 0) {
237 		pvbus_shutdown(&sc->sc_dev);
238 	} else if (strcmp(action, "reboot") == 0) {
239 		pvbus_reboot(&sc->sc_dev);
240 	} else if (strcmp(action, "crash") == 0) {
241 		panic("xen told us to do this");
242 	} else if (strcmp(action, "suspend") == 0) {
243 		/* Not implemented yet */
244 	} else {
245 		printf("%s: unknown shutdown event \"%s\"\n",
246 		    sc->sc_dev.dv_xname, action);
247 	}
248 }
249 
250 void
251 xen_resume(struct device *self)
252 {
253 }
254 
255 int
256 xen_activate(struct device *self, int act)
257 {
258 	int rv = 0;
259 
260 	switch (act) {
261 	case DVACT_RESUME:
262 		xen_resume(self);
263 		break;
264 	}
265 	return (rv);
266 }
267 
268 int
269 xen_init_hypercall(struct xen_softc *sc)
270 {
271 	extern void *xen_hypercall_page;
272 	uint32_t regs[4];
273 	paddr_t pa;
274 
275 	/* Get hypercall page configuration MSR */
276 	CPUID(sc->sc_base + CPUID_OFFSET_XEN_HYPERCALL,
277 	    regs[0], regs[1], regs[2], regs[3]);
278 
279 	/* We don't support more than one hypercall page */
280 	if (regs[0] != 1) {
281 		printf(": requested %u hypercall pages\n", regs[0]);
282 		return (-1);
283 	}
284 
285 	sc->sc_hc = &xen_hypercall_page;
286 
287 	if (!pmap_extract(pmap_kernel(), (vaddr_t)sc->sc_hc, &pa)) {
288 		printf(": hypercall page PA extraction failed\n");
289 		return (-1);
290 	}
291 	wrmsr(regs[1], pa);
292 
293 	return (0);
294 }
295 
296 int
297 xen_hypercall(struct xen_softc *sc, int op, int argc, ...)
298 {
299 	va_list ap;
300 	ulong argv[5];
301 	int i;
302 
303 	if (argc < 0 || argc > 5)
304 		return (-1);
305 	va_start(ap, argc);
306 	for (i = 0; i < argc; i++)
307 		argv[i] = (ulong)va_arg(ap, ulong);
308 	va_end(ap);
309 	return (xen_hypercallv(sc, op, argc, argv));
310 }
311 
312 int
313 xen_hypercallv(struct xen_softc *sc, int op, int argc, ulong *argv)
314 {
315 	ulong hcall;
316 	int rv = 0;
317 
318 	hcall = (ulong)sc->sc_hc + op * 32;
319 
320 #if defined(XEN_DEBUG) && disabled
321 	{
322 		int i;
323 
324 		printf("hypercall %d", op);
325 		if (argc > 0) {
326 			printf(", args {");
327 			for (i = 0; i < argc; i++)
328 				printf(" %#lx", argv[i]);
329 			printf(" }\n");
330 		} else
331 			printf("\n");
332 	}
333 #endif
334 
335 	switch (argc) {
336 	case 0: {
337 		HYPERCALL_RES1;
338 		__asm__ volatile (			\
339 			  HYPERCALL_LABEL		\
340 			: HYPERCALL_OUT1		\
341 			: HYPERCALL_PTR(hcall)		\
342 			: HYPERCALL_CLOBBER		\
343 		);
344 		HYPERCALL_RET(rv);
345 		break;
346 	}
347 	case 1: {
348 		HYPERCALL_RES1; HYPERCALL_RES2;
349 		HYPERCALL_ARG1(argv[0]);
350 		__asm__ volatile (			\
351 			  HYPERCALL_LABEL		\
352 			: HYPERCALL_OUT1 HYPERCALL_OUT2	\
353 			: HYPERCALL_IN1			\
354 			, HYPERCALL_PTR(hcall)		\
355 			: HYPERCALL_CLOBBER		\
356 		);
357 		HYPERCALL_RET(rv);
358 		break;
359 	}
360 	case 2: {
361 		HYPERCALL_RES1; HYPERCALL_RES2; HYPERCALL_RES3;
362 		HYPERCALL_ARG1(argv[0]); HYPERCALL_ARG2(argv[1]);
363 		__asm__ volatile (			\
364 			  HYPERCALL_LABEL		\
365 			: HYPERCALL_OUT1 HYPERCALL_OUT2	\
366 			  HYPERCALL_OUT3		\
367 			: HYPERCALL_IN1	HYPERCALL_IN2	\
368 			, HYPERCALL_PTR(hcall)		\
369 			: HYPERCALL_CLOBBER		\
370 		);
371 		HYPERCALL_RET(rv);
372 		break;
373 	}
374 	case 3: {
375 		HYPERCALL_RES1; HYPERCALL_RES2; HYPERCALL_RES3;
376 		HYPERCALL_RES4;
377 		HYPERCALL_ARG1(argv[0]); HYPERCALL_ARG2(argv[1]);
378 		HYPERCALL_ARG3(argv[2]);
379 		__asm__ volatile (			\
380 			  HYPERCALL_LABEL		\
381 			: HYPERCALL_OUT1 HYPERCALL_OUT2	\
382 			  HYPERCALL_OUT3 HYPERCALL_OUT4	\
383 			: HYPERCALL_IN1	HYPERCALL_IN2	\
384 			  HYPERCALL_IN3			\
385 			, HYPERCALL_PTR(hcall)		\
386 			: HYPERCALL_CLOBBER		\
387 		);
388 		HYPERCALL_RET(rv);
389 		break;
390 	}
391 	case 4: {
392 		HYPERCALL_RES1; HYPERCALL_RES2; HYPERCALL_RES3;
393 		HYPERCALL_RES4; HYPERCALL_RES5;
394 		HYPERCALL_ARG1(argv[0]); HYPERCALL_ARG2(argv[1]);
395 		HYPERCALL_ARG3(argv[2]); HYPERCALL_ARG4(argv[3]);
396 		__asm__ volatile (			\
397 			  HYPERCALL_LABEL		\
398 			: HYPERCALL_OUT1 HYPERCALL_OUT2	\
399 			  HYPERCALL_OUT3 HYPERCALL_OUT4	\
400 			  HYPERCALL_OUT5		\
401 			: HYPERCALL_IN1	HYPERCALL_IN2	\
402 			  HYPERCALL_IN3	HYPERCALL_IN4	\
403 			, HYPERCALL_PTR(hcall)		\
404 			: HYPERCALL_CLOBBER		\
405 		);
406 		HYPERCALL_RET(rv);
407 		break;
408 	}
409 	case 5: {
410 		HYPERCALL_RES1; HYPERCALL_RES2; HYPERCALL_RES3;
411 		HYPERCALL_RES4; HYPERCALL_RES5; HYPERCALL_RES6;
412 		HYPERCALL_ARG1(argv[0]); HYPERCALL_ARG2(argv[1]);
413 		HYPERCALL_ARG3(argv[2]); HYPERCALL_ARG4(argv[3]);
414 		HYPERCALL_ARG5(argv[4]);
415 		__asm__ volatile (			\
416 			  HYPERCALL_LABEL		\
417 			: HYPERCALL_OUT1 HYPERCALL_OUT2	\
418 			  HYPERCALL_OUT3 HYPERCALL_OUT4	\
419 			  HYPERCALL_OUT5 HYPERCALL_OUT6	\
420 			: HYPERCALL_IN1	HYPERCALL_IN2	\
421 			  HYPERCALL_IN3	HYPERCALL_IN4	\
422 			  HYPERCALL_IN5			\
423 			, HYPERCALL_PTR(hcall)		\
424 			: HYPERCALL_CLOBBER		\
425 		);
426 		HYPERCALL_RET(rv);
427 		break;
428 	}
429 	default:
430 		DPRINTF("%s: wrong number of arguments: %d\n", __func__, argc);
431 		rv = -1;
432 		break;
433 	}
434 	return (rv);
435 }
436 
437 int
438 xen_getfeatures(struct xen_softc *sc)
439 {
440 	struct xen_feature_info xfi;
441 
442 	memset(&xfi, 0, sizeof(xfi));
443 	if (xen_hypercall(sc, XC_VERSION, 2, XENVER_get_features, &xfi) < 0) {
444 		printf(": failed to fetch features\n");
445 		return (-1);
446 	}
447 	sc->sc_features = xfi.submap;
448 #ifdef XEN_DEBUG
449 	printf(": features %b", sc->sc_features,
450 	    "\20\014DOM0\013PIRQ\012PVCLOCK\011CBVEC\010GNTFLAGS\007HMA"
451 	    "\006PTUPD\005PAE4G\004SUPERVISOR\003AUTOPMAP\002WDT\001WPT");
452 #else
453 	printf(": features %#x", sc->sc_features);
454 #endif
455 	return (0);
456 }
457 
458 #ifdef XEN_DEBUG
459 void
460 xen_print_info_page(void)
461 {
462 	struct xen_softc *sc = xen_sc;
463 	struct shared_info *s = sc->sc_ipg;
464 	struct vcpu_info *v;
465 	int i;
466 
467 	virtio_membar_sync();
468 	for (i = 0; i < XEN_LEGACY_MAX_VCPUS; i++) {
469 		v = &s->vcpu_info[i];
470 		if (!v->evtchn_upcall_pending && !v->evtchn_upcall_mask &&
471 		    !v->evtchn_pending_sel && !v->time.version &&
472 		    !v->time.tsc_timestamp && !v->time.system_time &&
473 		    !v->time.tsc_to_system_mul && !v->time.tsc_shift)
474 			continue;
475 		printf("vcpu%d:\n"
476 		    "   upcall_pending=%02x upcall_mask=%02x pending_sel=%#lx\n"
477 		    "   time version=%u tsc=%llu system=%llu\n"
478 		    "   time mul=%u shift=%d\n",
479 		    i, v->evtchn_upcall_pending, v->evtchn_upcall_mask,
480 		    v->evtchn_pending_sel, v->time.version,
481 		    v->time.tsc_timestamp, v->time.system_time,
482 		    v->time.tsc_to_system_mul, v->time.tsc_shift);
483 	}
484 	printf("pending events: ");
485 	for (i = 0; i < nitems(s->evtchn_pending); i++) {
486 		if (s->evtchn_pending[i] == 0)
487 			continue;
488 		printf(" %d:%#lx", i, s->evtchn_pending[i]);
489 	}
490 	printf("\nmasked events: ");
491 	for (i = 0; i < nitems(s->evtchn_mask); i++) {
492 		if (s->evtchn_mask[i] == 0xffffffffffffffffULL)
493 			continue;
494 		printf(" %d:%#lx", i, s->evtchn_mask[i]);
495 	}
496 	printf("\nwc ver=%u sec=%u nsec=%u\n", s->wc_version, s->wc_sec,
497 	    s->wc_nsec);
498 	printf("arch maxpfn=%lu framelist=%lu nmi=%lu\n", s->arch.max_pfn,
499 	    s->arch.pfn_to_mfn_frame_list, s->arch.nmi_reason);
500 }
501 #endif	/* XEN_DEBUG */
502 
503 int
504 xen_init_info_page(struct xen_softc *sc)
505 {
506 	struct xen_add_to_physmap xatp;
507 	paddr_t pa;
508 
509 	sc->sc_ipg = malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT | M_ZERO);
510 	if (sc->sc_ipg == NULL) {
511 		printf(": failed to allocate shared info page\n");
512 		return (-1);
513 	}
514 	if (!pmap_extract(pmap_kernel(), (vaddr_t)sc->sc_ipg, &pa)) {
515 		printf(": shared info page PA extraction failed\n");
516 		free(sc->sc_ipg, M_DEVBUF, PAGE_SIZE);
517 		return (-1);
518 	}
519 	xatp.domid = DOMID_SELF;
520 	xatp.idx = 0;
521 	xatp.space = XENMAPSPACE_shared_info;
522 	xatp.gpfn = atop(pa);
523 	if (xen_hypercall(sc, XC_MEMORY, 2, XENMEM_add_to_physmap, &xatp)) {
524 		printf(": failed to register shared info page\n");
525 		free(sc->sc_ipg, M_DEVBUF, PAGE_SIZE);
526 		return (-1);
527 	}
528 	return (0);
529 }
530 
531 int
532 xen_init_cbvec(struct xen_softc *sc)
533 {
534 	struct xen_hvm_param xhp;
535 
536 	if ((sc->sc_features & XENFEAT_CBVEC) == 0)
537 		return (ENOENT);
538 
539 	xhp.domid = DOMID_SELF;
540 	xhp.index = HVM_PARAM_CALLBACK_IRQ;
541 	xhp.value = HVM_CALLBACK_VECTOR(LAPIC_XEN_VECTOR);
542 	if (xen_hypercall(sc, XC_HVM, 2, HVMOP_set_param, &xhp)) {
543 		/* Will retry with the xspd(4) PCI interrupt */
544 		return (ENOENT);
545 	}
546 	DPRINTF(", idtvec %d", LAPIC_XEN_VECTOR);
547 
548 	sc->sc_flags |= XSF_CBVEC;
549 
550 	return (0);
551 }
552 
553 int
554 xen_init_interrupts(struct xen_softc *sc)
555 {
556 	int i;
557 
558 	sc->sc_irq = LAPIC_XEN_VECTOR;
559 
560 	/*
561 	 * Clear all pending events and mask all interrupts
562 	 */
563 	for (i = 0; i < nitems(sc->sc_ipg->evtchn_pending); i++) {
564 		sc->sc_ipg->evtchn_pending[i] = 0;
565 		sc->sc_ipg->evtchn_mask[i] = ~0UL;
566 	}
567 
568 	SLIST_INIT(&sc->sc_intrs);
569 
570 	mtx_init(&sc->sc_islck, IPL_NET);
571 
572 	return (0);
573 }
574 
575 static int
576 xen_evtchn_hypercall(struct xen_softc *sc, int cmd, void *arg, size_t len)
577 {
578 	struct evtchn_op compat;
579 	int error;
580 
581 	error = xen_hypercall(sc, XC_EVTCHN, 2, cmd, arg);
582 	if (error == -ENOXENSYS) {
583 		memset(&compat, 0, sizeof(compat));
584 		compat.cmd = cmd;
585 		memcpy(&compat.u, arg, len);
586 		error = xen_hypercall(sc, XC_OEVTCHN, 1, &compat);
587 	}
588 	return (error);
589 }
590 
591 static inline void
592 xen_intsrc_add(struct xen_softc *sc, struct xen_intsrc *xi)
593 {
594 	refcnt_init(&xi->xi_refcnt);
595 	mtx_enter(&sc->sc_islck);
596 	SLIST_INSERT_HEAD(&sc->sc_intrs, xi, xi_entry);
597 	mtx_leave(&sc->sc_islck);
598 }
599 
600 static inline struct xen_intsrc *
601 xen_intsrc_acquire(struct xen_softc *sc, evtchn_port_t port)
602 {
603 	struct xen_intsrc *xi;
604 
605 	mtx_enter(&sc->sc_islck);
606 	SLIST_FOREACH(xi, &sc->sc_intrs, xi_entry) {
607 		if (xi->xi_port == port) {
608 			refcnt_take(&xi->xi_refcnt);
609 			break;
610 		}
611 	}
612 	mtx_leave(&sc->sc_islck);
613 	return (xi);
614 }
615 
616 static inline void
617 xen_intsrc_release(struct xen_softc *sc, struct xen_intsrc *xi)
618 {
619 	refcnt_rele_wake(&xi->xi_refcnt);
620 }
621 
622 static inline struct xen_intsrc *
623 xen_intsrc_remove(struct xen_softc *sc, evtchn_port_t port)
624 {
625 	struct xen_intsrc *xi;
626 
627 	mtx_enter(&sc->sc_islck);
628 	SLIST_FOREACH(xi, &sc->sc_intrs, xi_entry) {
629 		if (xi->xi_port == port) {
630 			SLIST_REMOVE(&sc->sc_intrs, xi, xen_intsrc, xi_entry);
631 			break;
632 		}
633 	}
634 	mtx_leave(&sc->sc_islck);
635 	if (xi != NULL)
636 		refcnt_finalize(&xi->xi_refcnt, "xenisrm");
637 	return (xi);
638 }
639 
640 static inline void
641 xen_intr_mask_acquired(struct xen_softc *sc, struct xen_intsrc *xi)
642 {
643 	xi->xi_masked = 1;
644 	set_bit(xi->xi_port, &sc->sc_ipg->evtchn_mask[0]);
645 }
646 
647 static inline int
648 xen_intr_unmask_release(struct xen_softc *sc, struct xen_intsrc *xi)
649 {
650 	struct evtchn_unmask eu;
651 
652 	xi->xi_masked = 0;
653 	if (!test_bit(xi->xi_port, &sc->sc_ipg->evtchn_mask[0]))
654 		return (0);
655 	eu.port = xi->xi_port;
656 	xen_intsrc_release(sc, xi);
657 	return (xen_evtchn_hypercall(sc, EVTCHNOP_unmask, &eu, sizeof(eu)));
658 }
659 
660 void
661 xen_intr_ack(void)
662 {
663 	struct xen_softc *sc = xen_sc;
664 	struct shared_info *s = sc->sc_ipg;
665 	struct cpu_info *ci = curcpu();
666 	struct vcpu_info *v = &s->vcpu_info[CPU_INFO_UNIT(ci)];
667 
668 	v->evtchn_upcall_pending = 0;
669 	virtio_membar_sync();
670 }
671 
672 void
673 xen_intr(void)
674 {
675 	struct xen_softc *sc = xen_sc;
676 	struct xen_intsrc *xi;
677 	struct shared_info *s = sc->sc_ipg;
678 	struct cpu_info *ci = curcpu();
679 	struct vcpu_info *v = &s->vcpu_info[CPU_INFO_UNIT(ci)];
680 	ulong pending, selector;
681 	int port, bit, row;
682 
683 	v->evtchn_upcall_pending = 0;
684 	selector = atomic_swap_ulong(&v->evtchn_pending_sel, 0);
685 
686 	for (row = 0; selector > 0; selector >>= 1, row++) {
687 		if ((selector & 1) == 0)
688 			continue;
689 		if ((sc->sc_ipg->evtchn_pending[row] &
690 		    ~(sc->sc_ipg->evtchn_mask[row])) == 0)
691 			continue;
692 		pending = atomic_swap_ulong(&sc->sc_ipg->evtchn_pending[row],
693 		    0) & ~(sc->sc_ipg->evtchn_mask[row]);
694 		for (bit = 0; pending > 0; pending >>= 1, bit++) {
695 			if ((pending & 1) == 0)
696 				continue;
697 			port = (row * LONG_BIT) + bit;
698 			if ((xi = xen_intsrc_acquire(sc, port)) == NULL) {
699 				printf("%s: unhandled interrupt on port %d\n",
700 				    sc->sc_dev.dv_xname, port);
701 				continue;
702 			}
703 			xi->xi_evcnt.ec_count++;
704 			xen_intr_mask_acquired(sc, xi);
705 			task_add(xi->xi_taskq, &xi->xi_task);
706 		}
707 	}
708 }
709 
710 void
711 xen_intr_schedule(xen_intr_handle_t xih)
712 {
713 	struct xen_softc *sc = xen_sc;
714 	struct xen_intsrc *xi;
715 
716 	if ((xi = xen_intsrc_acquire(sc, (evtchn_port_t)xih)) != NULL)
717 		task_add(xi->xi_taskq, &xi->xi_task);
718 }
719 
720 static void
721 xen_barrier_task(void *arg)
722 {
723 	int *notdone = arg;
724 
725 	*notdone = 0;
726 	wakeup_one(notdone);
727 }
728 
729 /*
730  * This code achieves two goals: 1) makes sure that *after* masking
731  * the interrupt source we're not getting more task_adds: intr_barrier
732  * will take care of that, and 2) makes sure that the interrupt task
733  * has finished executing the current task and won't be called again:
734  * it sets up a barrier task to await completion of the current task
735  * and relies on the interrupt masking to prevent submission of new
736  * tasks in the future.
737  */
738 void
739 xen_intr_barrier(xen_intr_handle_t xih)
740 {
741 	struct xen_softc *sc = xen_sc;
742 	struct xen_intsrc *xi;
743 	struct sleep_state sls;
744 	int notdone = 1;
745 	struct task t = TASK_INITIALIZER(xen_barrier_task, &notdone);
746 
747 	/*
748 	 * XXX This will need to be revised once intr_barrier starts
749 	 * using its argument.
750 	 */
751 	intr_barrier(NULL);
752 
753 	if ((xi = xen_intsrc_acquire(sc, (evtchn_port_t)xih)) != NULL) {
754 		task_add(xi->xi_taskq, &t);
755 		while (notdone) {
756 			sleep_setup(&sls, &notdone, PWAIT, "xenbar");
757 			sleep_finish(&sls, notdone);
758 		}
759 		xen_intsrc_release(sc, xi);
760 	}
761 }
762 
763 void
764 xen_intr_signal(xen_intr_handle_t xih)
765 {
766 	struct xen_softc *sc = xen_sc;
767 	struct xen_intsrc *xi;
768 	struct evtchn_send es;
769 
770 	if ((xi = xen_intsrc_acquire(sc, (evtchn_port_t)xih)) != NULL) {
771 		es.port = xi->xi_port;
772 		xen_intsrc_release(sc, xi);
773 		xen_evtchn_hypercall(sc, EVTCHNOP_send, &es, sizeof(es));
774 	}
775 }
776 
777 int
778 xen_intr_establish(evtchn_port_t port, xen_intr_handle_t *xih, int domain,
779     void (*handler)(void *), void *arg, char *name)
780 {
781 	struct xen_softc *sc = xen_sc;
782 	struct xen_intsrc *xi;
783 	struct evtchn_alloc_unbound eau;
784 #ifdef notyet
785 	struct evtchn_bind_vcpu ebv;
786 #endif
787 #if defined(XEN_DEBUG) && disabled
788 	struct evtchn_status es;
789 #endif
790 
791 	if (port && (xi = xen_intsrc_acquire(sc, port)) != NULL) {
792 		xen_intsrc_release(sc, xi);
793 		DPRINTF("%s: interrupt handler has already been established "
794 		    "for port %u\n", sc->sc_dev.dv_xname, port);
795 		return (-1);
796 	}
797 
798 	xi = malloc(sizeof(*xi), M_DEVBUF, M_NOWAIT | M_ZERO);
799 	if (xi == NULL)
800 		return (-1);
801 
802 	xi->xi_port = (evtchn_port_t)*xih;
803 
804 	xi->xi_handler = handler;
805 	xi->xi_ctx = arg;
806 
807 	xi->xi_taskq = taskq_create(name, 1, IPL_NET, TASKQ_MPSAFE);
808 	if (!xi->xi_taskq) {
809 		printf("%s: failed to create interrupt task for %s\n",
810 		    sc->sc_dev.dv_xname, name);
811 		free(xi, M_DEVBUF, sizeof(*xi));
812 		return (-1);
813 	}
814 	task_set(&xi->xi_task, xen_intr_dispatch, xi);
815 
816 	if (port == 0) {
817 		/* We're being asked to allocate a new event port */
818 		memset(&eau, 0, sizeof(eau));
819 		eau.dom = DOMID_SELF;
820 		eau.remote_dom = domain;
821 		if (xen_evtchn_hypercall(sc, EVTCHNOP_alloc_unbound, &eau,
822 		    sizeof(eau)) != 0) {
823 			DPRINTF("%s: failed to allocate new event port\n",
824 			    sc->sc_dev.dv_xname);
825 			free(xi, M_DEVBUF, sizeof(*xi));
826 			return (-1);
827 		}
828 		*xih = xi->xi_port = eau.port;
829 	} else {
830 		*xih = xi->xi_port = port;
831 		/*
832 		 * The Event Channel API didn't open this port, so it is not
833 		 * responsible for closing it automatically on unbind.
834 		 */
835 		xi->xi_noclose = 1;
836 	}
837 
838 #ifdef notyet
839 	/* Bind interrupt to VCPU#0 */
840 	memset(&ebv, 0, sizeof(ebv));
841 	ebv.port = xi->xi_port;
842 	ebv.vcpu = 0;
843 	if (xen_evtchn_hypercall(sc, EVTCHNOP_bind_vcpu, &ebv, sizeof(ebv))) {
844 		printf("%s: failed to bind interrupt on port %u to vcpu%d\n",
845 		    sc->sc_dev.dv_xname, ebv.port, ebv.vcpu);
846 	}
847 #endif
848 
849 	evcount_attach(&xi->xi_evcnt, name, &sc->sc_irq);
850 
851 	xen_intsrc_add(sc, xi);
852 
853 	/* Mask the event port */
854 	set_bit(xi->xi_port, &sc->sc_ipg->evtchn_mask[0]);
855 
856 #if defined(XEN_DEBUG) && disabled
857 	memset(&es, 0, sizeof(es));
858 	es.dom = DOMID_SELF;
859 	es.port = xi->xi_port;
860 	if (xen_evtchn_hypercall(sc, EVTCHNOP_status, &es, sizeof(es))) {
861 		printf("%s: failed to obtain status for port %d\n",
862 		    sc->sc_dev.dv_xname, es.port);
863 	}
864 	printf("%s: port %u bound to vcpu%u", sc->sc_dev.dv_xname,
865 	    es.port, es.vcpu);
866 	if (es.status == EVTCHNSTAT_interdomain)
867 		printf(": domain %d port %u\n", es.u.interdomain.dom,
868 		    es.u.interdomain.port);
869 	else if (es.status == EVTCHNSTAT_unbound)
870 		printf(": domain %d\n", es.u.unbound.dom);
871 	else if (es.status == EVTCHNSTAT_pirq)
872 		printf(": pirq %u\n", es.u.pirq);
873 	else if (es.status == EVTCHNSTAT_virq)
874 		printf(": virq %u\n", es.u.virq);
875 	else
876 		printf("\n");
877 #endif
878 
879 	return (0);
880 }
881 
882 int
883 xen_intr_disestablish(xen_intr_handle_t xih)
884 {
885 	struct xen_softc *sc = xen_sc;
886 	evtchn_port_t port = (evtchn_port_t)xih;
887 	struct evtchn_close ec;
888 	struct xen_intsrc *xi;
889 
890 	if ((xi = xen_intsrc_remove(sc, port)) == NULL)
891 		return (-1);
892 
893 	evcount_detach(&xi->xi_evcnt);
894 
895 	taskq_destroy(xi->xi_taskq);
896 
897 	set_bit(xi->xi_port, &sc->sc_ipg->evtchn_mask[0]);
898 	clear_bit(xi->xi_port, &sc->sc_ipg->evtchn_pending[0]);
899 
900 	if (!xi->xi_noclose) {
901 		ec.port = xi->xi_port;
902 		if (xen_evtchn_hypercall(sc, EVTCHNOP_close, &ec, sizeof(ec))) {
903 			DPRINTF("%s: failed to close event port %u\n",
904 			    sc->sc_dev.dv_xname, xi->xi_port);
905 		}
906 	}
907 
908 	free(xi, M_DEVBUF, sizeof(*xi));
909 	return (0);
910 }
911 
912 void
913 xen_intr_dispatch(void *arg)
914 {
915 	struct xen_softc *sc = xen_sc;
916 	struct xen_intsrc *xi = arg;
917 
918 	if (xi->xi_handler)
919 		xi->xi_handler(xi->xi_ctx);
920 
921 	xen_intr_unmask_release(sc, xi);
922 }
923 
924 void
925 xen_intr_enable(void)
926 {
927 	struct xen_softc *sc = xen_sc;
928 	struct xen_intsrc *xi;
929 	struct evtchn_unmask eu;
930 
931 	mtx_enter(&sc->sc_islck);
932 	SLIST_FOREACH(xi, &sc->sc_intrs, xi_entry) {
933 		if (!xi->xi_masked) {
934 			eu.port = xi->xi_port;
935 			if (xen_evtchn_hypercall(sc, EVTCHNOP_unmask, &eu,
936 			    sizeof(eu)))
937 				printf("%s: unmasking port %u failed\n",
938 				    sc->sc_dev.dv_xname, xi->xi_port);
939 			virtio_membar_sync();
940 			if (test_bit(xi->xi_port, &sc->sc_ipg->evtchn_mask[0]))
941 				printf("%s: port %u is still masked\n",
942 				    sc->sc_dev.dv_xname, xi->xi_port);
943 		}
944 	}
945 	mtx_leave(&sc->sc_islck);
946 }
947 
948 void
949 xen_intr_mask(xen_intr_handle_t xih)
950 {
951 	struct xen_softc *sc = xen_sc;
952 	evtchn_port_t port = (evtchn_port_t)xih;
953 	struct xen_intsrc *xi;
954 
955 	if ((xi = xen_intsrc_acquire(sc, port)) != NULL) {
956 		xen_intr_mask_acquired(sc, xi);
957 		xen_intsrc_release(sc, xi);
958 	}
959 }
960 
961 int
962 xen_intr_unmask(xen_intr_handle_t xih)
963 {
964 	struct xen_softc *sc = xen_sc;
965 	evtchn_port_t port = (evtchn_port_t)xih;
966 	struct xen_intsrc *xi;
967 
968 	if ((xi = xen_intsrc_acquire(sc, port)) != NULL)
969 		return (xen_intr_unmask_release(sc, xi));
970 
971 	return (0);
972 }
973 
974 int
975 xen_init_grant_tables(struct xen_softc *sc)
976 {
977 	struct gnttab_query_size gqs;
978 
979 	gqs.dom = DOMID_SELF;
980 	if (xen_hypercall(sc, XC_GNTTAB, 3, GNTTABOP_query_size, &gqs, 1)) {
981 		printf(": failed the query for grant table pages\n");
982 		return (-1);
983 	}
984 	if (gqs.nr_frames == 0 || gqs.nr_frames > gqs.max_nr_frames) {
985 		printf(": invalid number of grant table pages: %u/%u\n",
986 		    gqs.nr_frames, gqs.max_nr_frames);
987 		return (-1);
988 	}
989 
990 	sc->sc_gntmax = gqs.max_nr_frames;
991 
992 	sc->sc_gnt = mallocarray(sc->sc_gntmax + 1, sizeof(struct xen_gntent),
993 	    M_DEVBUF, M_ZERO | M_NOWAIT);
994 	if (sc->sc_gnt == NULL) {
995 		printf(": failed to allocate grant table lookup table\n");
996 		return (-1);
997 	}
998 
999 	mtx_init(&sc->sc_gntlck, IPL_NET);
1000 
1001 	if (xen_grant_table_grow(sc) == NULL) {
1002 		free(sc->sc_gnt, M_DEVBUF, sc->sc_gntmax *
1003 		    sizeof(struct xen_gntent));
1004 		return (-1);
1005 	}
1006 
1007 	printf(", %d grant table frames", sc->sc_gntmax);
1008 
1009 	xen_bus_dma_tag._cookie = sc;
1010 
1011 	return (0);
1012 }
1013 
1014 struct xen_gntent *
1015 xen_grant_table_grow(struct xen_softc *sc)
1016 {
1017 	struct xen_add_to_physmap xatp;
1018 	struct xen_gntent *ge;
1019 	void *va;
1020 	paddr_t pa;
1021 
1022 	if (sc->sc_gntcnt == sc->sc_gntmax) {
1023 		printf("%s: grant table frame allotment limit reached\n",
1024 		    sc->sc_dev.dv_xname);
1025 		return (NULL);
1026 	}
1027 
1028 	va = km_alloc(PAGE_SIZE, &kv_any, &kp_zero, &kd_nowait);
1029 	if (va == NULL)
1030 		return (NULL);
1031 	if (!pmap_extract(pmap_kernel(), (vaddr_t)va, &pa)) {
1032 		printf("%s: grant table page PA extraction failed\n",
1033 		    sc->sc_dev.dv_xname);
1034 		km_free(va, PAGE_SIZE, &kv_any, &kp_zero);
1035 		return (NULL);
1036 	}
1037 
1038 	mtx_enter(&sc->sc_gntlck);
1039 
1040 	ge = &sc->sc_gnt[sc->sc_gntcnt];
1041 	ge->ge_table = va;
1042 
1043 	xatp.domid = DOMID_SELF;
1044 	xatp.idx = sc->sc_gntcnt;
1045 	xatp.space = XENMAPSPACE_grant_table;
1046 	xatp.gpfn = atop(pa);
1047 	if (xen_hypercall(sc, XC_MEMORY, 2, XENMEM_add_to_physmap, &xatp)) {
1048 		printf("%s: failed to add a grant table page\n",
1049 		    sc->sc_dev.dv_xname);
1050 		km_free(ge->ge_table, PAGE_SIZE, &kv_any, &kp_zero);
1051 		mtx_leave(&sc->sc_gntlck);
1052 		return (NULL);
1053 	}
1054 	ge->ge_start = sc->sc_gntcnt * GNTTAB_NEPG;
1055 	/* First page has 8 reserved entries */
1056 	ge->ge_reserved = ge->ge_start == 0 ? GNTTAB_NR_RESERVED_ENTRIES : 0;
1057 	ge->ge_free = GNTTAB_NEPG - ge->ge_reserved;
1058 	ge->ge_next = ge->ge_reserved;
1059 	mtx_init(&ge->ge_lock, IPL_NET);
1060 
1061 	sc->sc_gntcnt++;
1062 	mtx_leave(&sc->sc_gntlck);
1063 
1064 	return (ge);
1065 }
1066 
1067 int
1068 xen_grant_table_alloc(struct xen_softc *sc, grant_ref_t *ref)
1069 {
1070 	struct xen_gntent *ge;
1071 	int i;
1072 
1073 	/* Start with a previously allocated table page */
1074 	ge = &sc->sc_gnt[sc->sc_gntcnt - 1];
1075 	if (ge->ge_free > 0) {
1076 		mtx_enter(&ge->ge_lock);
1077 		if (ge->ge_free > 0)
1078 			goto search;
1079 		mtx_leave(&ge->ge_lock);
1080 	}
1081 
1082 	/* Try other existing table pages */
1083 	for (i = 0; i < sc->sc_gntcnt; i++) {
1084 		ge = &sc->sc_gnt[i];
1085 		if (ge->ge_free == 0)
1086 			continue;
1087 		mtx_enter(&ge->ge_lock);
1088 		if (ge->ge_free > 0)
1089 			goto search;
1090 		mtx_leave(&ge->ge_lock);
1091 	}
1092 
1093  alloc:
1094 	/* Allocate a new table page */
1095 	if ((ge = xen_grant_table_grow(sc)) == NULL)
1096 		return (-1);
1097 
1098 	mtx_enter(&ge->ge_lock);
1099 	if (ge->ge_free == 0) {
1100 		/* We were not fast enough... */
1101 		mtx_leave(&ge->ge_lock);
1102 		goto alloc;
1103 	}
1104 
1105  search:
1106 	for (i = ge->ge_next;
1107 	     /* Math works here because GNTTAB_NEPG is a power of 2 */
1108 	     i != ((ge->ge_next + GNTTAB_NEPG - 1) & (GNTTAB_NEPG - 1));
1109 	     i++) {
1110 		if (i == GNTTAB_NEPG)
1111 			i = 0;
1112 		if (ge->ge_reserved && i < ge->ge_reserved)
1113 			continue;
1114 		if (ge->ge_table[i].frame != 0)
1115 			continue;
1116 		*ref = ge->ge_start + i;
1117 		ge->ge_table[i].flags = GTF_invalid;
1118 		ge->ge_table[i].frame = 0xffffffff; /* Mark as taken */
1119 		if ((ge->ge_next = i + 1) == GNTTAB_NEPG)
1120 			ge->ge_next = ge->ge_reserved;
1121 		ge->ge_free--;
1122 		mtx_leave(&ge->ge_lock);
1123 		return (0);
1124 	}
1125 	mtx_leave(&ge->ge_lock);
1126 
1127 	panic("page full, sc %p gnt %p (%d) ge %p", sc, sc->sc_gnt,
1128 	    sc->sc_gntcnt, ge);
1129 	return (-1);
1130 }
1131 
1132 void
1133 xen_grant_table_free(struct xen_softc *sc, grant_ref_t ref)
1134 {
1135 	struct xen_gntent *ge;
1136 
1137 #ifdef XEN_DEBUG
1138 	if (ref > sc->sc_gntcnt * GNTTAB_NEPG)
1139 		panic("unmanaged ref %u sc %p gnt %p (%d)", ref, sc,
1140 		    sc->sc_gnt, sc->sc_gntcnt);
1141 #endif
1142 	ge = &sc->sc_gnt[ref / GNTTAB_NEPG];
1143 	mtx_enter(&ge->ge_lock);
1144 #ifdef XEN_DEBUG
1145 	if (ref < ge->ge_start || ref > ge->ge_start + GNTTAB_NEPG) {
1146 		mtx_leave(&ge->ge_lock);
1147 		panic("out of bounds ref %u ge %p start %u sc %p gnt %p",
1148 		    ref, ge, ge->ge_start, sc, sc->sc_gnt);
1149 	}
1150 #endif
1151 	ref -= ge->ge_start;
1152 	if (ge->ge_table[ref].flags != GTF_invalid) {
1153 		mtx_leave(&ge->ge_lock);
1154 		panic("reference %u is still in use, flags %#x frame %#x",
1155 		    ref + ge->ge_start, ge->ge_table[ref].flags,
1156 		    ge->ge_table[ref].frame);
1157 	}
1158 	ge->ge_table[ref].frame = 0;
1159 	ge->ge_next = ref;
1160 	ge->ge_free++;
1161 	mtx_leave(&ge->ge_lock);
1162 }
1163 
1164 void
1165 xen_grant_table_enter(struct xen_softc *sc, grant_ref_t ref, paddr_t pa,
1166     int domain, int flags)
1167 {
1168 	struct xen_gntent *ge;
1169 
1170 #ifdef XEN_DEBUG
1171 	if (ref > sc->sc_gntcnt * GNTTAB_NEPG)
1172 		panic("unmanaged ref %u sc %p gnt %p (%d)", ref, sc,
1173 		    sc->sc_gnt, sc->sc_gntcnt);
1174 #endif
1175 	ge = &sc->sc_gnt[ref / GNTTAB_NEPG];
1176 #ifdef XEN_DEBUG
1177 	if (ref < ge->ge_start || ref > ge->ge_start + GNTTAB_NEPG) {
1178 		panic("out of bounds ref %u ge %p start %u sc %p gnt %p",
1179 		    ref, ge, ge->ge_start, sc, sc->sc_gnt);
1180 	}
1181 #endif
1182 	ref -= ge->ge_start;
1183 	if (ge->ge_table[ref].flags != GTF_invalid) {
1184 		panic("reference %u is still in use, flags %#x frame %#x",
1185 		    ref + ge->ge_start, ge->ge_table[ref].flags,
1186 		    ge->ge_table[ref].frame);
1187 	}
1188 	ge->ge_table[ref].frame = atop(pa);
1189 	ge->ge_table[ref].domid = domain;
1190 	virtio_membar_sync();
1191 	ge->ge_table[ref].flags = GTF_permit_access | flags;
1192 	virtio_membar_sync();
1193 }
1194 
1195 void
1196 xen_grant_table_remove(struct xen_softc *sc, grant_ref_t ref)
1197 {
1198 	struct xen_gntent *ge;
1199 	uint32_t flags, *ptr;
1200 	int loop;
1201 
1202 #ifdef XEN_DEBUG
1203 	if (ref > sc->sc_gntcnt * GNTTAB_NEPG)
1204 		panic("unmanaged ref %u sc %p gnt %p (%d)", ref, sc,
1205 		    sc->sc_gnt, sc->sc_gntcnt);
1206 #endif
1207 	ge = &sc->sc_gnt[ref / GNTTAB_NEPG];
1208 #ifdef XEN_DEBUG
1209 	if (ref < ge->ge_start || ref > ge->ge_start + GNTTAB_NEPG) {
1210 		panic("out of bounds ref %u ge %p start %u sc %p gnt %p",
1211 		    ref, ge, ge->ge_start, sc, sc->sc_gnt);
1212 	}
1213 #endif
1214 	ref -= ge->ge_start;
1215 	/* Invalidate the grant reference */
1216 	virtio_membar_sync();
1217 	ptr = (uint32_t *)&ge->ge_table[ref];
1218 	flags = (ge->ge_table[ref].flags & ~(GTF_reading|GTF_writing)) |
1219 	    (ge->ge_table[ref].domid << 16);
1220 	loop = 0;
1221 	while (atomic_cas_uint(ptr, flags, GTF_invalid) != flags) {
1222 		if (loop++ > 10) {
1223 			panic("%s: grant table reference %u is held "
1224 			    "by domain %d: frame %#x flags %#x\n",
1225 			    sc->sc_dev.dv_xname, ref + ge->ge_start,
1226 			    ge->ge_table[ref].domid, ge->ge_table[ref].frame,
1227 			    ge->ge_table[ref].flags);
1228 		}
1229 #if (defined(__amd64__) || defined(__i386__))
1230 		__asm volatile("pause": : : "memory");
1231 #endif
1232 	}
1233 	ge->ge_table[ref].frame = 0xffffffff;
1234 }
1235 
1236 int
1237 xen_bus_dmamap_create(bus_dma_tag_t t, bus_size_t size, int nsegments,
1238     bus_size_t maxsegsz, bus_size_t boundary, int flags, bus_dmamap_t *dmamp)
1239 {
1240 	struct xen_softc *sc = t->_cookie;
1241 	struct xen_gntmap *gm;
1242 	int i, error;
1243 
1244 	if (maxsegsz < PAGE_SIZE)
1245 		return (EINVAL);
1246 
1247 	/* Allocate a dma map structure */
1248 	error = bus_dmamap_create(sc->sc_dmat, size, nsegments, maxsegsz,
1249 	    boundary, flags, dmamp);
1250 	if (error)
1251 		return (error);
1252 	/* Allocate an array of grant table pa<->ref maps */
1253 	gm = mallocarray(nsegments, sizeof(struct xen_gntmap), M_DEVBUF,
1254 	    M_ZERO | ((flags & BUS_DMA_NOWAIT) ? M_NOWAIT : M_WAITOK));
1255 	if (gm == NULL) {
1256 		bus_dmamap_destroy(sc->sc_dmat, *dmamp);
1257 		*dmamp = NULL;
1258 		return (ENOMEM);
1259 	}
1260 	/* Wire it to the dma map */
1261 	(*dmamp)->_dm_cookie = gm;
1262 	/* Claim references from the grant table */
1263 	for (i = 0; i < (*dmamp)->_dm_segcnt; i++) {
1264 		if (xen_grant_table_alloc(sc, &gm[i].gm_ref)) {
1265 			xen_bus_dmamap_destroy(t, *dmamp);
1266 			*dmamp = NULL;
1267 			return (ENOBUFS);
1268 		}
1269 	}
1270 	return (0);
1271 }
1272 
1273 void
1274 xen_bus_dmamap_destroy(bus_dma_tag_t t, bus_dmamap_t map)
1275 {
1276 	struct xen_softc *sc = t->_cookie;
1277 	struct xen_gntmap *gm;
1278 	int i;
1279 
1280 	gm = map->_dm_cookie;
1281 	for (i = 0; i < map->_dm_segcnt; i++) {
1282 		if (gm[i].gm_ref == 0)
1283 			continue;
1284 		xen_grant_table_free(sc, gm[i].gm_ref);
1285 	}
1286 	free(gm, M_DEVBUF, map->_dm_segcnt * sizeof(struct xen_gntmap));
1287 	bus_dmamap_destroy(sc->sc_dmat, map);
1288 }
1289 
1290 int
1291 xen_bus_dmamap_load(bus_dma_tag_t t, bus_dmamap_t map, void *buf,
1292     bus_size_t buflen, struct proc *p, int flags)
1293 {
1294 	struct xen_softc *sc = t->_cookie;
1295 	struct xen_gntmap *gm = map->_dm_cookie;
1296 	int i, domain, error;
1297 
1298 	domain = flags >> 16;
1299 	flags &= 0xffff;
1300 	error = bus_dmamap_load(sc->sc_dmat, map, buf, buflen, p, flags);
1301 	if (error)
1302 		return (error);
1303 	for (i = 0; i < map->dm_nsegs; i++) {
1304 		xen_grant_table_enter(sc, gm[i].gm_ref, map->dm_segs[i].ds_addr,
1305 		    domain, flags & BUS_DMA_WRITE ? GTF_readonly : 0);
1306 		gm[i].gm_paddr = map->dm_segs[i].ds_addr;
1307 		map->dm_segs[i].ds_addr = gm[i].gm_ref;
1308 	}
1309 	return (0);
1310 }
1311 
1312 int
1313 xen_bus_dmamap_load_mbuf(bus_dma_tag_t t, bus_dmamap_t map, struct mbuf *m0,
1314     int flags)
1315 {
1316 	struct xen_softc *sc = t->_cookie;
1317 	struct xen_gntmap *gm = map->_dm_cookie;
1318 	int i, domain, error;
1319 
1320 	domain = flags >> 16;
1321 	flags &= 0xffff;
1322 	error = bus_dmamap_load_mbuf(sc->sc_dmat, map, m0, flags);
1323 	if (error)
1324 		return (error);
1325 	for (i = 0; i < map->dm_nsegs; i++) {
1326 		xen_grant_table_enter(sc, gm[i].gm_ref, map->dm_segs[i].ds_addr,
1327 		    domain, flags & BUS_DMA_WRITE ? GTF_readonly : 0);
1328 		gm[i].gm_paddr = map->dm_segs[i].ds_addr;
1329 		map->dm_segs[i].ds_addr = gm[i].gm_ref;
1330 	}
1331 	return (0);
1332 }
1333 
1334 void
1335 xen_bus_dmamap_unload(bus_dma_tag_t t, bus_dmamap_t map)
1336 {
1337 	struct xen_softc *sc = t->_cookie;
1338 	struct xen_gntmap *gm = map->_dm_cookie;
1339 	int i;
1340 
1341 	for (i = 0; i < map->dm_nsegs; i++) {
1342 		if (gm[i].gm_paddr == 0)
1343 			continue;
1344 		xen_grant_table_remove(sc, gm[i].gm_ref);
1345 		map->dm_segs[i].ds_addr = gm[i].gm_paddr;
1346 		gm[i].gm_paddr = 0;
1347 	}
1348 	bus_dmamap_unload(sc->sc_dmat, map);
1349 }
1350 
1351 void
1352 xen_bus_dmamap_sync(bus_dma_tag_t t, bus_dmamap_t map, bus_addr_t addr,
1353     bus_size_t size, int op)
1354 {
1355 	if ((op == (BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE)) ||
1356 	    (op == (BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE)))
1357 		virtio_membar_sync();
1358 }
1359 
1360 static int
1361 xen_attach_print(void *aux, const char *name)
1362 {
1363 	struct xen_attach_args *xa = aux;
1364 
1365 	if (name)
1366 		printf("\"%s\" at %s: %s", xa->xa_name, name, xa->xa_node);
1367 
1368 	return (UNCONF);
1369 }
1370 
1371 int
1372 xen_attach_device(struct xen_softc *sc, struct xen_devlist *xdl,
1373     const char *name, const char *unit)
1374 {
1375 	struct xen_attach_args xa;
1376 	struct xen_device *xdv;
1377 	unsigned long long res;
1378 
1379 	xa.xa_dmat = &xen_bus_dma_tag;
1380 
1381 	strlcpy(xa.xa_name, name, sizeof(xa.xa_name));
1382 	snprintf(xa.xa_node, sizeof(xa.xa_node), "device/%s/%s", name, unit);
1383 
1384 	if (xs_getprop(sc, xa.xa_node, "backend", xa.xa_backend,
1385 	    sizeof(xa.xa_backend))) {
1386 		DPRINTF("%s: failed to identify \"backend\" for "
1387 		    "\"%s\"\n", sc->sc_dev.dv_xname, xa.xa_node);
1388 		return (EIO);
1389 	}
1390 
1391 	if (xs_getnum(sc, xa.xa_node, "backend-id", &res) || res > UINT16_MAX) {
1392 		DPRINTF("%s: invalid \"backend-id\" for \"%s\"\n",
1393 		    sc->sc_dev.dv_xname, xa.xa_node);
1394 		return (EIO);
1395 	}
1396 	xa.xa_domid = (uint16_t)res;
1397 
1398 	xdv = malloc(sizeof(struct xen_device), M_DEVBUF, M_ZERO | M_NOWAIT);
1399 	if (xdv == NULL)
1400 		return (ENOMEM);
1401 
1402 	strlcpy(xdv->dv_unit, unit, sizeof(xdv->dv_unit));
1403 	LIST_INSERT_HEAD(&xdl->dl_devs, xdv, dv_entry);
1404 
1405 	xdv->dv_dev = config_found((struct device *)sc, &xa, xen_attach_print);
1406 
1407 	return (0);
1408 }
1409 
1410 int
1411 xen_probe_devices(struct xen_softc *sc)
1412 {
1413 	struct xen_devlist *xdl;
1414 	struct xs_transaction xst;
1415 	struct iovec *iovp1 = NULL, *iovp2 = NULL;
1416 	int i, j, error, iov1_cnt = 0, iov2_cnt = 0;
1417 	char path[256];
1418 
1419 	memset(&xst, 0, sizeof(xst));
1420 	xst.xst_id = 0;
1421 	xst.xst_cookie = sc->sc_xs;
1422 
1423 	if ((error = xs_cmd(&xst, XS_LIST, "device", &iovp1, &iov1_cnt)) != 0)
1424 		return (error);
1425 
1426 	for (i = 0; i < iov1_cnt; i++) {
1427 		if (strcmp("suspend", (char *)iovp1[i].iov_base) == 0)
1428 			continue;
1429 		snprintf(path, sizeof(path), "device/%s",
1430 		    (char *)iovp1[i].iov_base);
1431 		if ((error = xs_cmd(&xst, XS_LIST, path, &iovp2,
1432 		    &iov2_cnt)) != 0)
1433 			goto out;
1434 		if ((xdl = malloc(sizeof(struct xen_devlist), M_DEVBUF,
1435 		    M_ZERO | M_NOWAIT)) == NULL) {
1436 			error = ENOMEM;
1437 			goto out;
1438 		}
1439 		xdl->dl_xen = sc;
1440 		strlcpy(xdl->dl_node, (const char *)iovp1[i].iov_base,
1441 		    XEN_MAX_NODE_LEN);
1442 		for (j = 0; j < iov2_cnt; j++) {
1443 			error = xen_attach_device(sc, xdl,
1444 			    (const char *)iovp1[i].iov_base,
1445 			    (const char *)iovp2[j].iov_base);
1446 			if (error) {
1447 				printf("%s: failed to attach \"%s/%s\"\n",
1448 				    sc->sc_dev.dv_xname, path,
1449 				    (const char *)iovp2[j].iov_base);
1450 				goto out;
1451 			}
1452 		}
1453 		/* Setup a watch for every device subtree */
1454 		if (xs_watch(sc, "device", (char *)iovp1[i].iov_base,
1455 		    &xdl->dl_task, xen_hotplug, xdl))
1456 			printf("%s: failed to setup hotplug watch for \"%s\"\n",
1457 			    sc->sc_dev.dv_xname, (char *)iovp1[i].iov_base);
1458 		SLIST_INSERT_HEAD(&sc->sc_devlists, xdl, dl_entry);
1459 		xs_resfree(&xst, iovp2, iov2_cnt);
1460 		iovp2 = NULL;
1461 		iov2_cnt = 0;
1462 	}
1463 
1464  out:
1465 	if (iovp2)
1466 		xs_resfree(&xst, iovp2, iov2_cnt);
1467 	xs_resfree(&xst, iovp1, iov1_cnt);
1468 	return (error);
1469 }
1470 
1471 void
1472 xen_hotplug(void *arg)
1473 {
1474 	struct xen_devlist *xdl = arg;
1475 	struct xen_softc *sc = xdl->dl_xen;
1476 	struct xen_device *xdv, *xvdn;
1477 	struct xs_transaction xst;
1478 	struct iovec *iovp = NULL;
1479 	int error, i, keep, iov_cnt = 0;
1480 	char path[256];
1481 	int8_t *seen;
1482 
1483 	memset(&xst, 0, sizeof(xst));
1484 	xst.xst_id = 0;
1485 	xst.xst_cookie = sc->sc_xs;
1486 
1487 	snprintf(path, sizeof(path), "device/%s", xdl->dl_node);
1488 	if ((error = xs_cmd(&xst, XS_LIST, path, &iovp, &iov_cnt)) != 0)
1489 		return;
1490 
1491 	seen = malloc(iov_cnt, M_TEMP, M_ZERO | M_WAITOK);
1492 
1493 	/* Detect all removed and kept devices */
1494 	LIST_FOREACH_SAFE(xdv, &xdl->dl_devs, dv_entry, xvdn) {
1495 		for (i = 0, keep = 0; i < iov_cnt; i++) {
1496 			if (!seen[i] &&
1497 			    !strcmp(xdv->dv_unit, (char *)iovp[i].iov_base)) {
1498 				seen[i]++;
1499 				keep++;
1500 				break;
1501 			}
1502 		}
1503 		if (!keep) {
1504 			DPRINTF("%s: removing \"%s/%s\"\n", sc->sc_dev.dv_xname,
1505 			    xdl->dl_node, xdv->dv_unit);
1506 			LIST_REMOVE(xdv, dv_entry);
1507 			config_detach(xdv->dv_dev, 0);
1508 			free(xdv, M_DEVBUF, sizeof(struct xen_device));
1509 		}
1510 	}
1511 
1512 	/* Attach all new devices */
1513 	for (i = 0; i < iov_cnt; i++) {
1514 		if (seen[i])
1515 			continue;
1516 		DPRINTF("%s: attaching \"%s/%s\"\n", sc->sc_dev.dv_xname,
1517 			    xdl->dl_node, (const char *)iovp[i].iov_base);
1518 		error = xen_attach_device(sc, xdl, xdl->dl_node,
1519 		    (const char *)iovp[i].iov_base);
1520 		if (error) {
1521 			printf("%s: failed to attach \"%s/%s\"\n",
1522 			    sc->sc_dev.dv_xname, path,
1523 			    (const char *)iovp[i].iov_base);
1524 			continue;
1525 		}
1526 	}
1527 
1528 	free(seen, M_TEMP, iov_cnt);
1529 
1530 	xs_resfree(&xst, iovp, iov_cnt);
1531 }
1532 
1533 #include <machine/pio.h>
1534 
1535 #define	XMI_PORT		0x10
1536 #define XMI_MAGIC		0x49d2
1537 #define XMI_UNPLUG_IDE		0x01
1538 #define XMI_UNPLUG_NIC		0x02
1539 #define XMI_UNPLUG_IDESEC	0x04
1540 
1541 void
1542 xen_disable_emulated_devices(struct xen_softc *sc)
1543 {
1544 #if defined(__i386__) || defined(__amd64__)
1545 	ushort unplug = 0;
1546 
1547 	if (inw(XMI_PORT) != XMI_MAGIC) {
1548 		printf("%s: failed to disable emulated devices\n",
1549 		    sc->sc_dev.dv_xname);
1550 		return;
1551 	}
1552 	if (sc->sc_unplug & XEN_UNPLUG_IDE)
1553 		unplug |= XMI_UNPLUG_IDE;
1554 	if (sc->sc_unplug & XEN_UNPLUG_IDESEC)
1555 		unplug |= XMI_UNPLUG_IDESEC;
1556 	if (sc->sc_unplug & XEN_UNPLUG_NIC)
1557 		unplug |= XMI_UNPLUG_NIC;
1558 	if (unplug)
1559 		outw(XMI_PORT, unplug);
1560 #endif	/* __i386__ || __amd64__ */
1561 }
1562 
1563 void
1564 xen_unplug_emulated(void *xsc, int what)
1565 {
1566 	struct xen_softc *sc = xsc;
1567 
1568 	sc->sc_unplug |= what;
1569 }
1570