xref: /openbsd-src/sys/dev/pv/xen.c (revision f6aab3d83b51b91c24247ad2c2573574de475a82)
1 /*	$OpenBSD: xen.c,v 1.97 2020/06/29 06:50:52 jsg Exp $	*/
2 
3 /*
4  * Copyright (c) 2015, 2016, 2017 Mike Belopuhov
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/param.h>
20 
21 /* Xen requires locked atomic operations */
22 #ifndef MULTIPROCESSOR
23 #define _XENMPATOMICS
24 #define MULTIPROCESSOR
25 #endif
26 #include <sys/atomic.h>
27 #ifdef _XENMPATOMICS
28 #undef MULTIPROCESSOR
29 #undef _XENMPATOMICS
30 #endif
31 
32 #include <sys/systm.h>
33 #include <sys/proc.h>
34 #include <sys/signal.h>
35 #include <sys/signalvar.h>
36 #include <sys/refcnt.h>
37 #include <sys/malloc.h>
38 #include <sys/kernel.h>
39 #include <sys/stdint.h>
40 #include <sys/device.h>
41 #include <sys/task.h>
42 #include <sys/syslog.h>
43 
44 #include <machine/bus.h>
45 #include <machine/cpu.h>
46 #include <machine/cpufunc.h>
47 
48 #include <uvm/uvm_extern.h>
49 
50 #include <machine/i82489var.h>
51 
52 #include <dev/pv/pvvar.h>
53 #include <dev/pv/pvreg.h>
54 #include <dev/pv/xenreg.h>
55 #include <dev/pv/xenvar.h>
56 
57 /* #define XEN_DEBUG */
58 
59 #ifdef XEN_DEBUG
60 #define DPRINTF(x...)		printf(x)
61 #else
62 #define DPRINTF(x...)
63 #endif
64 
65 struct xen_softc *xen_sc;
66 
67 int	xen_init_hypercall(struct xen_softc *);
68 int	xen_getfeatures(struct xen_softc *);
69 int	xen_init_info_page(struct xen_softc *);
70 int	xen_init_cbvec(struct xen_softc *);
71 int	xen_init_interrupts(struct xen_softc *);
72 void	xen_intr_dispatch(void *);
73 int	xen_init_grant_tables(struct xen_softc *);
74 struct xen_gntent *
75 	xen_grant_table_grow(struct xen_softc *);
76 int	xen_grant_table_alloc(struct xen_softc *, grant_ref_t *);
77 void	xen_grant_table_free(struct xen_softc *, grant_ref_t);
78 void	xen_grant_table_enter(struct xen_softc *, grant_ref_t, paddr_t,
79 	    int, int);
80 void	xen_grant_table_remove(struct xen_softc *, grant_ref_t);
81 void	xen_disable_emulated_devices(struct xen_softc *);
82 
83 int 	xen_match(struct device *, void *, void *);
84 void	xen_attach(struct device *, struct device *, void *);
85 void	xen_deferred(struct device *);
86 void	xen_control(void *);
87 void	xen_hotplug(void *);
88 void	xen_resume(struct device *);
89 int	xen_activate(struct device *, int);
90 int	xen_attach_device(struct xen_softc *, struct xen_devlist *,
91 	    const char *, const char *);
92 int	xen_probe_devices(struct xen_softc *);
93 
94 int	xen_bus_dmamap_create(bus_dma_tag_t, bus_size_t, int, bus_size_t,
95 	    bus_size_t, int, bus_dmamap_t *);
96 void	xen_bus_dmamap_destroy(bus_dma_tag_t, bus_dmamap_t);
97 int	xen_bus_dmamap_load(bus_dma_tag_t, bus_dmamap_t, void *, bus_size_t,
98 	    struct proc *, int);
99 int	xen_bus_dmamap_load_mbuf(bus_dma_tag_t, bus_dmamap_t, struct mbuf *,
100 	    int);
101 void	xen_bus_dmamap_unload(bus_dma_tag_t, bus_dmamap_t);
102 void	xen_bus_dmamap_sync(bus_dma_tag_t, bus_dmamap_t, bus_addr_t,
103 	    bus_size_t, int);
104 
105 int	xs_attach(struct xen_softc *);
106 
107 struct cfdriver xen_cd = {
108 	NULL, "xen", DV_DULL
109 };
110 
111 const struct cfattach xen_ca = {
112 	sizeof(struct xen_softc), xen_match, xen_attach, NULL, xen_activate
113 };
114 
115 struct bus_dma_tag xen_bus_dma_tag = {
116 	NULL,
117 	xen_bus_dmamap_create,
118 	xen_bus_dmamap_destroy,
119 	xen_bus_dmamap_load,
120 	xen_bus_dmamap_load_mbuf,
121 	NULL,
122 	NULL,
123 	xen_bus_dmamap_unload,
124 	xen_bus_dmamap_sync,
125 	_bus_dmamem_alloc,
126 	NULL,
127 	_bus_dmamem_free,
128 	_bus_dmamem_map,
129 	_bus_dmamem_unmap,
130 	NULL,
131 };
132 
133 int
134 xen_match(struct device *parent, void *match, void *aux)
135 {
136 	struct pv_attach_args *pva = aux;
137 	struct pvbus_hv *hv = &pva->pva_hv[PVBUS_XEN];
138 
139 	if (hv->hv_base == 0)
140 		return (0);
141 
142 	return (1);
143 }
144 
145 void
146 xen_attach(struct device *parent, struct device *self, void *aux)
147 {
148 	struct pv_attach_args *pva = (struct pv_attach_args *)aux;
149 	struct pvbus_hv *hv = &pva->pva_hv[PVBUS_XEN];
150 	struct xen_softc *sc = (struct xen_softc *)self;
151 
152 	sc->sc_base = hv->hv_base;
153 	sc->sc_dmat = pva->pva_dmat;
154 
155 	if (xen_init_hypercall(sc))
156 		return;
157 
158 	/* Wire it up to the global */
159 	xen_sc = sc;
160 
161 	if (xen_getfeatures(sc))
162 		return;
163 
164 	if (xen_init_info_page(sc))
165 		return;
166 
167 	xen_init_cbvec(sc);
168 
169 	if (xen_init_interrupts(sc))
170 		return;
171 
172 	if (xen_init_grant_tables(sc))
173 		return;
174 
175 	if (xs_attach(sc))
176 		return;
177 
178 	xen_probe_devices(sc);
179 
180 	/* pvbus(4) key/value interface */
181 	hv->hv_kvop = xs_kvop;
182 	hv->hv_arg = sc;
183 
184 	xen_disable_emulated_devices(sc);
185 
186 	config_mountroot(self, xen_deferred);
187 }
188 
189 void
190 xen_deferred(struct device *self)
191 {
192 	struct xen_softc *sc = (struct xen_softc *)self;
193 
194 	if (!(sc->sc_flags & XSF_CBVEC)) {
195 		DPRINTF("%s: callback vector hasn't been established\n",
196 		    sc->sc_dev.dv_xname);
197 		return;
198 	}
199 
200 	xen_intr_enable();
201 
202 	if (xs_watch(sc, "control", "shutdown", &sc->sc_ctltsk,
203 	    xen_control, sc))
204 		printf("%s: failed to setup shutdown control watch\n",
205 		    sc->sc_dev.dv_xname);
206 }
207 
208 void
209 xen_control(void *arg)
210 {
211 	struct xen_softc *sc = arg;
212 	struct xs_transaction xst;
213 	char action[128];
214 	int error;
215 
216 	memset(&xst, 0, sizeof(xst));
217 	xst.xst_id = 0;
218 	xst.xst_cookie = sc->sc_xs;
219 
220 	error = xs_getprop(sc, "control", "shutdown", action, sizeof(action));
221 	if (error) {
222 		if (error != ENOENT)
223 			printf("%s: failed to process control event\n",
224 			    sc->sc_dev.dv_xname);
225 		return;
226 	}
227 
228 	if (strlen(action) == 0)
229 		return;
230 
231 	/* Acknowledge the event */
232 	xs_setprop(sc, "control", "shutdown", "", 0);
233 
234 	if (strcmp(action, "halt") == 0 || strcmp(action, "poweroff") == 0) {
235 		pvbus_shutdown(&sc->sc_dev);
236 	} else if (strcmp(action, "reboot") == 0) {
237 		pvbus_reboot(&sc->sc_dev);
238 	} else if (strcmp(action, "crash") == 0) {
239 		panic("xen told us to do this");
240 	} else if (strcmp(action, "suspend") == 0) {
241 		/* Not implemented yet */
242 	} else {
243 		printf("%s: unknown shutdown event \"%s\"\n",
244 		    sc->sc_dev.dv_xname, action);
245 	}
246 }
247 
248 void
249 xen_resume(struct device *self)
250 {
251 }
252 
253 int
254 xen_activate(struct device *self, int act)
255 {
256 	int rv = 0;
257 
258 	switch (act) {
259 	case DVACT_RESUME:
260 		xen_resume(self);
261 		break;
262 	}
263 	return (rv);
264 }
265 
266 int
267 xen_init_hypercall(struct xen_softc *sc)
268 {
269 	extern void *xen_hypercall_page;
270 	uint32_t regs[4];
271 	paddr_t pa;
272 
273 	/* Get hypercall page configuration MSR */
274 	CPUID(sc->sc_base + CPUID_OFFSET_XEN_HYPERCALL,
275 	    regs[0], regs[1], regs[2], regs[3]);
276 
277 	/* We don't support more than one hypercall page */
278 	if (regs[0] != 1) {
279 		printf(": requested %u hypercall pages\n", regs[0]);
280 		return (-1);
281 	}
282 
283 	sc->sc_hc = &xen_hypercall_page;
284 
285 	if (!pmap_extract(pmap_kernel(), (vaddr_t)sc->sc_hc, &pa)) {
286 		printf(": hypercall page PA extraction failed\n");
287 		return (-1);
288 	}
289 	wrmsr(regs[1], pa);
290 
291 	return (0);
292 }
293 
294 int
295 xen_hypercall(struct xen_softc *sc, int op, int argc, ...)
296 {
297 	va_list ap;
298 	ulong argv[5];
299 	int i;
300 
301 	if (argc < 0 || argc > 5)
302 		return (-1);
303 	va_start(ap, argc);
304 	for (i = 0; i < argc; i++)
305 		argv[i] = (ulong)va_arg(ap, ulong);
306 	va_end(ap);
307 	return (xen_hypercallv(sc, op, argc, argv));
308 }
309 
310 int
311 xen_hypercallv(struct xen_softc *sc, int op, int argc, ulong *argv)
312 {
313 	ulong hcall;
314 	int rv = 0;
315 
316 	hcall = (ulong)sc->sc_hc + op * 32;
317 
318 #if defined(XEN_DEBUG) && disabled
319 	{
320 		int i;
321 
322 		printf("hypercall %d", op);
323 		if (argc > 0) {
324 			printf(", args {");
325 			for (i = 0; i < argc; i++)
326 				printf(" %#lx", argv[i]);
327 			printf(" }\n");
328 		} else
329 			printf("\n");
330 	}
331 #endif
332 
333 	switch (argc) {
334 	case 0: {
335 		HYPERCALL_RES1;
336 		__asm__ volatile (			\
337 			  HYPERCALL_LABEL		\
338 			: HYPERCALL_OUT1		\
339 			: HYPERCALL_PTR(hcall)		\
340 			: HYPERCALL_CLOBBER		\
341 		);
342 		HYPERCALL_RET(rv);
343 		break;
344 	}
345 	case 1: {
346 		HYPERCALL_RES1; HYPERCALL_RES2;
347 		HYPERCALL_ARG1(argv[0]);
348 		__asm__ volatile (			\
349 			  HYPERCALL_LABEL		\
350 			: HYPERCALL_OUT1 HYPERCALL_OUT2	\
351 			: HYPERCALL_IN1			\
352 			, HYPERCALL_PTR(hcall)		\
353 			: HYPERCALL_CLOBBER		\
354 		);
355 		HYPERCALL_RET(rv);
356 		break;
357 	}
358 	case 2: {
359 		HYPERCALL_RES1; HYPERCALL_RES2; HYPERCALL_RES3;
360 		HYPERCALL_ARG1(argv[0]); HYPERCALL_ARG2(argv[1]);
361 		__asm__ volatile (			\
362 			  HYPERCALL_LABEL		\
363 			: HYPERCALL_OUT1 HYPERCALL_OUT2	\
364 			  HYPERCALL_OUT3		\
365 			: HYPERCALL_IN1	HYPERCALL_IN2	\
366 			, HYPERCALL_PTR(hcall)		\
367 			: HYPERCALL_CLOBBER		\
368 		);
369 		HYPERCALL_RET(rv);
370 		break;
371 	}
372 	case 3: {
373 		HYPERCALL_RES1; HYPERCALL_RES2; HYPERCALL_RES3;
374 		HYPERCALL_RES4;
375 		HYPERCALL_ARG1(argv[0]); HYPERCALL_ARG2(argv[1]);
376 		HYPERCALL_ARG3(argv[2]);
377 		__asm__ volatile (			\
378 			  HYPERCALL_LABEL		\
379 			: HYPERCALL_OUT1 HYPERCALL_OUT2	\
380 			  HYPERCALL_OUT3 HYPERCALL_OUT4	\
381 			: HYPERCALL_IN1	HYPERCALL_IN2	\
382 			  HYPERCALL_IN3			\
383 			, HYPERCALL_PTR(hcall)		\
384 			: HYPERCALL_CLOBBER		\
385 		);
386 		HYPERCALL_RET(rv);
387 		break;
388 	}
389 	case 4: {
390 		HYPERCALL_RES1; HYPERCALL_RES2; HYPERCALL_RES3;
391 		HYPERCALL_RES4; HYPERCALL_RES5;
392 		HYPERCALL_ARG1(argv[0]); HYPERCALL_ARG2(argv[1]);
393 		HYPERCALL_ARG3(argv[2]); HYPERCALL_ARG4(argv[3]);
394 		__asm__ volatile (			\
395 			  HYPERCALL_LABEL		\
396 			: HYPERCALL_OUT1 HYPERCALL_OUT2	\
397 			  HYPERCALL_OUT3 HYPERCALL_OUT4	\
398 			  HYPERCALL_OUT5		\
399 			: HYPERCALL_IN1	HYPERCALL_IN2	\
400 			  HYPERCALL_IN3	HYPERCALL_IN4	\
401 			, HYPERCALL_PTR(hcall)		\
402 			: HYPERCALL_CLOBBER		\
403 		);
404 		HYPERCALL_RET(rv);
405 		break;
406 	}
407 	case 5: {
408 		HYPERCALL_RES1; HYPERCALL_RES2; HYPERCALL_RES3;
409 		HYPERCALL_RES4; HYPERCALL_RES5; HYPERCALL_RES6;
410 		HYPERCALL_ARG1(argv[0]); HYPERCALL_ARG2(argv[1]);
411 		HYPERCALL_ARG3(argv[2]); HYPERCALL_ARG4(argv[3]);
412 		HYPERCALL_ARG5(argv[4]);
413 		__asm__ volatile (			\
414 			  HYPERCALL_LABEL		\
415 			: HYPERCALL_OUT1 HYPERCALL_OUT2	\
416 			  HYPERCALL_OUT3 HYPERCALL_OUT4	\
417 			  HYPERCALL_OUT5 HYPERCALL_OUT6	\
418 			: HYPERCALL_IN1	HYPERCALL_IN2	\
419 			  HYPERCALL_IN3	HYPERCALL_IN4	\
420 			  HYPERCALL_IN5			\
421 			, HYPERCALL_PTR(hcall)		\
422 			: HYPERCALL_CLOBBER		\
423 		);
424 		HYPERCALL_RET(rv);
425 		break;
426 	}
427 	default:
428 		DPRINTF("%s: wrong number of arguments: %d\n", __func__, argc);
429 		rv = -1;
430 		break;
431 	}
432 	return (rv);
433 }
434 
435 int
436 xen_getfeatures(struct xen_softc *sc)
437 {
438 	struct xen_feature_info xfi;
439 
440 	memset(&xfi, 0, sizeof(xfi));
441 	if (xen_hypercall(sc, XC_VERSION, 2, XENVER_get_features, &xfi) < 0) {
442 		printf(": failed to fetch features\n");
443 		return (-1);
444 	}
445 	sc->sc_features = xfi.submap;
446 #ifdef XEN_DEBUG
447 	printf(": features %b", sc->sc_features,
448 	    "\20\014DOM0\013PIRQ\012PVCLOCK\011CBVEC\010GNTFLAGS\007HMA"
449 	    "\006PTUPD\005PAE4G\004SUPERVISOR\003AUTOPMAP\002WDT\001WPT");
450 #else
451 	printf(": features %#x", sc->sc_features);
452 #endif
453 	return (0);
454 }
455 
456 #ifdef XEN_DEBUG
457 void
458 xen_print_info_page(void)
459 {
460 	struct xen_softc *sc = xen_sc;
461 	struct shared_info *s = sc->sc_ipg;
462 	struct vcpu_info *v;
463 	int i;
464 
465 	virtio_membar_sync();
466 	for (i = 0; i < XEN_LEGACY_MAX_VCPUS; i++) {
467 		v = &s->vcpu_info[i];
468 		if (!v->evtchn_upcall_pending && !v->evtchn_upcall_mask &&
469 		    !v->evtchn_pending_sel && !v->time.version &&
470 		    !v->time.tsc_timestamp && !v->time.system_time &&
471 		    !v->time.tsc_to_system_mul && !v->time.tsc_shift)
472 			continue;
473 		printf("vcpu%d:\n"
474 		    "   upcall_pending=%02x upcall_mask=%02x pending_sel=%#lx\n"
475 		    "   time version=%u tsc=%llu system=%llu\n"
476 		    "   time mul=%u shift=%d\n",
477 		    i, v->evtchn_upcall_pending, v->evtchn_upcall_mask,
478 		    v->evtchn_pending_sel, v->time.version,
479 		    v->time.tsc_timestamp, v->time.system_time,
480 		    v->time.tsc_to_system_mul, v->time.tsc_shift);
481 	}
482 	printf("pending events: ");
483 	for (i = 0; i < nitems(s->evtchn_pending); i++) {
484 		if (s->evtchn_pending[i] == 0)
485 			continue;
486 		printf(" %d:%#lx", i, s->evtchn_pending[i]);
487 	}
488 	printf("\nmasked events: ");
489 	for (i = 0; i < nitems(s->evtchn_mask); i++) {
490 		if (s->evtchn_mask[i] == 0xffffffffffffffffULL)
491 			continue;
492 		printf(" %d:%#lx", i, s->evtchn_mask[i]);
493 	}
494 	printf("\nwc ver=%u sec=%u nsec=%u\n", s->wc_version, s->wc_sec,
495 	    s->wc_nsec);
496 	printf("arch maxpfn=%lu framelist=%lu nmi=%lu\n", s->arch.max_pfn,
497 	    s->arch.pfn_to_mfn_frame_list, s->arch.nmi_reason);
498 }
499 #endif	/* XEN_DEBUG */
500 
501 int
502 xen_init_info_page(struct xen_softc *sc)
503 {
504 	struct xen_add_to_physmap xatp;
505 	paddr_t pa;
506 
507 	sc->sc_ipg = malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT | M_ZERO);
508 	if (sc->sc_ipg == NULL) {
509 		printf(": failed to allocate shared info page\n");
510 		return (-1);
511 	}
512 	if (!pmap_extract(pmap_kernel(), (vaddr_t)sc->sc_ipg, &pa)) {
513 		printf(": shared info page PA extraction failed\n");
514 		free(sc->sc_ipg, M_DEVBUF, PAGE_SIZE);
515 		return (-1);
516 	}
517 	xatp.domid = DOMID_SELF;
518 	xatp.idx = 0;
519 	xatp.space = XENMAPSPACE_shared_info;
520 	xatp.gpfn = atop(pa);
521 	if (xen_hypercall(sc, XC_MEMORY, 2, XENMEM_add_to_physmap, &xatp)) {
522 		printf(": failed to register shared info page\n");
523 		free(sc->sc_ipg, M_DEVBUF, PAGE_SIZE);
524 		return (-1);
525 	}
526 	return (0);
527 }
528 
529 int
530 xen_init_cbvec(struct xen_softc *sc)
531 {
532 	struct xen_hvm_param xhp;
533 
534 	if ((sc->sc_features & XENFEAT_CBVEC) == 0)
535 		return (ENOENT);
536 
537 	xhp.domid = DOMID_SELF;
538 	xhp.index = HVM_PARAM_CALLBACK_IRQ;
539 	xhp.value = HVM_CALLBACK_VECTOR(LAPIC_XEN_VECTOR);
540 	if (xen_hypercall(sc, XC_HVM, 2, HVMOP_set_param, &xhp)) {
541 		/* Will retry with the xspd(4) PCI interrupt */
542 		return (ENOENT);
543 	}
544 	DPRINTF(", idtvec %d", LAPIC_XEN_VECTOR);
545 
546 	sc->sc_flags |= XSF_CBVEC;
547 
548 	return (0);
549 }
550 
551 int
552 xen_init_interrupts(struct xen_softc *sc)
553 {
554 	int i;
555 
556 	sc->sc_irq = LAPIC_XEN_VECTOR;
557 
558 	/*
559 	 * Clear all pending events and mask all interrupts
560 	 */
561 	for (i = 0; i < nitems(sc->sc_ipg->evtchn_pending); i++) {
562 		sc->sc_ipg->evtchn_pending[i] = 0;
563 		sc->sc_ipg->evtchn_mask[i] = ~0UL;
564 	}
565 
566 	SLIST_INIT(&sc->sc_intrs);
567 
568 	mtx_init(&sc->sc_islck, IPL_NET);
569 
570 	return (0);
571 }
572 
573 static int
574 xen_evtchn_hypercall(struct xen_softc *sc, int cmd, void *arg, size_t len)
575 {
576 	struct evtchn_op compat;
577 	int error;
578 
579 	error = xen_hypercall(sc, XC_EVTCHN, 2, cmd, arg);
580 	if (error == -ENOXENSYS) {
581 		memset(&compat, 0, sizeof(compat));
582 		compat.cmd = cmd;
583 		memcpy(&compat.u, arg, len);
584 		error = xen_hypercall(sc, XC_OEVTCHN, 1, &compat);
585 	}
586 	return (error);
587 }
588 
589 static inline void
590 xen_intsrc_add(struct xen_softc *sc, struct xen_intsrc *xi)
591 {
592 	refcnt_init(&xi->xi_refcnt);
593 	mtx_enter(&sc->sc_islck);
594 	SLIST_INSERT_HEAD(&sc->sc_intrs, xi, xi_entry);
595 	mtx_leave(&sc->sc_islck);
596 }
597 
598 static inline struct xen_intsrc *
599 xen_intsrc_acquire(struct xen_softc *sc, evtchn_port_t port)
600 {
601 	struct xen_intsrc *xi = NULL;
602 
603 	mtx_enter(&sc->sc_islck);
604 	SLIST_FOREACH(xi, &sc->sc_intrs, xi_entry) {
605 		if (xi->xi_port == port) {
606 			refcnt_take(&xi->xi_refcnt);
607 			break;
608 		}
609 	}
610 	mtx_leave(&sc->sc_islck);
611 	return (xi);
612 }
613 
614 static inline void
615 xen_intsrc_release(struct xen_softc *sc, struct xen_intsrc *xi)
616 {
617 	refcnt_rele_wake(&xi->xi_refcnt);
618 }
619 
620 static inline struct xen_intsrc *
621 xen_intsrc_remove(struct xen_softc *sc, evtchn_port_t port)
622 {
623 	struct xen_intsrc *xi;
624 
625 	mtx_enter(&sc->sc_islck);
626 	SLIST_FOREACH(xi, &sc->sc_intrs, xi_entry) {
627 		if (xi->xi_port == port) {
628 			SLIST_REMOVE(&sc->sc_intrs, xi, xen_intsrc, xi_entry);
629 			break;
630 		}
631 	}
632 	mtx_leave(&sc->sc_islck);
633 	if (xi != NULL)
634 		refcnt_finalize(&xi->xi_refcnt, "xenisrm");
635 	return (xi);
636 }
637 
638 static inline void
639 xen_intr_mask_acquired(struct xen_softc *sc, struct xen_intsrc *xi)
640 {
641 	xi->xi_masked = 1;
642 	set_bit(xi->xi_port, &sc->sc_ipg->evtchn_mask[0]);
643 }
644 
645 static inline int
646 xen_intr_unmask_release(struct xen_softc *sc, struct xen_intsrc *xi)
647 {
648 	struct evtchn_unmask eu;
649 
650 	xi->xi_masked = 0;
651 	if (!test_bit(xi->xi_port, &sc->sc_ipg->evtchn_mask[0])) {
652 		xen_intsrc_release(sc, xi);
653 		return (0);
654 	}
655 	eu.port = xi->xi_port;
656 	xen_intsrc_release(sc, xi);
657 	return (xen_evtchn_hypercall(sc, EVTCHNOP_unmask, &eu, sizeof(eu)));
658 }
659 
660 void
661 xen_intr_ack(void)
662 {
663 	struct xen_softc *sc = xen_sc;
664 	struct shared_info *s = sc->sc_ipg;
665 	struct cpu_info *ci = curcpu();
666 	struct vcpu_info *v = &s->vcpu_info[CPU_INFO_UNIT(ci)];
667 
668 	v->evtchn_upcall_pending = 0;
669 	virtio_membar_sync();
670 }
671 
672 void
673 xen_intr(void)
674 {
675 	struct xen_softc *sc = xen_sc;
676 	struct xen_intsrc *xi;
677 	struct shared_info *s = sc->sc_ipg;
678 	struct cpu_info *ci = curcpu();
679 	struct vcpu_info *v = &s->vcpu_info[CPU_INFO_UNIT(ci)];
680 	ulong pending, selector;
681 	int port, bit, row;
682 
683 	v->evtchn_upcall_pending = 0;
684 	selector = atomic_swap_ulong(&v->evtchn_pending_sel, 0);
685 
686 	for (row = 0; selector > 0; selector >>= 1, row++) {
687 		if ((selector & 1) == 0)
688 			continue;
689 		if ((sc->sc_ipg->evtchn_pending[row] &
690 		    ~(sc->sc_ipg->evtchn_mask[row])) == 0)
691 			continue;
692 		pending = atomic_swap_ulong(&sc->sc_ipg->evtchn_pending[row],
693 		    0) & ~(sc->sc_ipg->evtchn_mask[row]);
694 		for (bit = 0; pending > 0; pending >>= 1, bit++) {
695 			if ((pending & 1) == 0)
696 				continue;
697 			port = (row * LONG_BIT) + bit;
698 			if ((xi = xen_intsrc_acquire(sc, port)) == NULL) {
699 				printf("%s: unhandled interrupt on port %d\n",
700 				    sc->sc_dev.dv_xname, port);
701 				continue;
702 			}
703 			xi->xi_evcnt.ec_count++;
704 			xen_intr_mask_acquired(sc, xi);
705 			if (!task_add(xi->xi_taskq, &xi->xi_task))
706 				xen_intsrc_release(sc, xi);
707 		}
708 	}
709 }
710 
711 void
712 xen_intr_schedule(xen_intr_handle_t xih)
713 {
714 	struct xen_softc *sc = xen_sc;
715 	struct xen_intsrc *xi;
716 
717 	if ((xi = xen_intsrc_acquire(sc, (evtchn_port_t)xih)) != NULL) {
718 		xen_intr_mask_acquired(sc, xi);
719 		if (!task_add(xi->xi_taskq, &xi->xi_task))
720 			xen_intsrc_release(sc, xi);
721 	}
722 }
723 
724 /*
725  * This code achieves two goals: 1) makes sure that *after* masking
726  * the interrupt source we're not getting more task_adds: sched_barrier
727  * will take care of that, and 2) makes sure that the interrupt task
728  * has finished executing the current task and won't be called again:
729  * it sets up a barrier task to await completion of the current task
730  * and relies on the interrupt masking to prevent submission of new
731  * tasks in the future.
732  */
733 void
734 xen_intr_barrier(xen_intr_handle_t xih)
735 {
736 	struct xen_softc *sc = xen_sc;
737 	struct xen_intsrc *xi;
738 
739 	sched_barrier(NULL);
740 
741 	if ((xi = xen_intsrc_acquire(sc, (evtchn_port_t)xih)) != NULL) {
742 		taskq_barrier(xi->xi_taskq);
743 		xen_intsrc_release(sc, xi);
744 	}
745 }
746 
747 void
748 xen_intr_signal(xen_intr_handle_t xih)
749 {
750 	struct xen_softc *sc = xen_sc;
751 	struct xen_intsrc *xi;
752 	struct evtchn_send es;
753 
754 	if ((xi = xen_intsrc_acquire(sc, (evtchn_port_t)xih)) != NULL) {
755 		es.port = xi->xi_port;
756 		xen_intsrc_release(sc, xi);
757 		xen_evtchn_hypercall(sc, EVTCHNOP_send, &es, sizeof(es));
758 	}
759 }
760 
761 int
762 xen_intr_establish(evtchn_port_t port, xen_intr_handle_t *xih, int domain,
763     void (*handler)(void *), void *arg, char *name)
764 {
765 	struct xen_softc *sc = xen_sc;
766 	struct xen_intsrc *xi;
767 	struct evtchn_alloc_unbound eau;
768 #ifdef notyet
769 	struct evtchn_bind_vcpu ebv;
770 #endif
771 #if defined(XEN_DEBUG) && disabled
772 	struct evtchn_status es;
773 #endif
774 
775 	if (port && (xi = xen_intsrc_acquire(sc, port)) != NULL) {
776 		xen_intsrc_release(sc, xi);
777 		DPRINTF("%s: interrupt handler has already been established "
778 		    "for port %u\n", sc->sc_dev.dv_xname, port);
779 		return (-1);
780 	}
781 
782 	xi = malloc(sizeof(*xi), M_DEVBUF, M_NOWAIT | M_ZERO);
783 	if (xi == NULL)
784 		return (-1);
785 
786 	xi->xi_port = (evtchn_port_t)*xih;
787 
788 	xi->xi_handler = handler;
789 	xi->xi_ctx = arg;
790 
791 	xi->xi_taskq = taskq_create(name, 1, IPL_NET, TASKQ_MPSAFE);
792 	if (!xi->xi_taskq) {
793 		printf("%s: failed to create interrupt task for %s\n",
794 		    sc->sc_dev.dv_xname, name);
795 		free(xi, M_DEVBUF, sizeof(*xi));
796 		return (-1);
797 	}
798 	task_set(&xi->xi_task, xen_intr_dispatch, xi);
799 
800 	if (port == 0) {
801 		/* We're being asked to allocate a new event port */
802 		memset(&eau, 0, sizeof(eau));
803 		eau.dom = DOMID_SELF;
804 		eau.remote_dom = domain;
805 		if (xen_evtchn_hypercall(sc, EVTCHNOP_alloc_unbound, &eau,
806 		    sizeof(eau)) != 0) {
807 			DPRINTF("%s: failed to allocate new event port\n",
808 			    sc->sc_dev.dv_xname);
809 			free(xi, M_DEVBUF, sizeof(*xi));
810 			return (-1);
811 		}
812 		*xih = xi->xi_port = eau.port;
813 	} else {
814 		*xih = xi->xi_port = port;
815 		/*
816 		 * The Event Channel API didn't open this port, so it is not
817 		 * responsible for closing it automatically on unbind.
818 		 */
819 		xi->xi_noclose = 1;
820 	}
821 
822 #ifdef notyet
823 	/* Bind interrupt to VCPU#0 */
824 	memset(&ebv, 0, sizeof(ebv));
825 	ebv.port = xi->xi_port;
826 	ebv.vcpu = 0;
827 	if (xen_evtchn_hypercall(sc, EVTCHNOP_bind_vcpu, &ebv, sizeof(ebv))) {
828 		printf("%s: failed to bind interrupt on port %u to vcpu%d\n",
829 		    sc->sc_dev.dv_xname, ebv.port, ebv.vcpu);
830 	}
831 #endif
832 
833 	evcount_attach(&xi->xi_evcnt, name, &sc->sc_irq);
834 
835 	xen_intsrc_add(sc, xi);
836 
837 	/* Mask the event port */
838 	set_bit(xi->xi_port, &sc->sc_ipg->evtchn_mask[0]);
839 
840 #if defined(XEN_DEBUG) && disabled
841 	memset(&es, 0, sizeof(es));
842 	es.dom = DOMID_SELF;
843 	es.port = xi->xi_port;
844 	if (xen_evtchn_hypercall(sc, EVTCHNOP_status, &es, sizeof(es))) {
845 		printf("%s: failed to obtain status for port %d\n",
846 		    sc->sc_dev.dv_xname, es.port);
847 	}
848 	printf("%s: port %u bound to vcpu%u", sc->sc_dev.dv_xname,
849 	    es.port, es.vcpu);
850 	if (es.status == EVTCHNSTAT_interdomain)
851 		printf(": domain %d port %u\n", es.u.interdomain.dom,
852 		    es.u.interdomain.port);
853 	else if (es.status == EVTCHNSTAT_unbound)
854 		printf(": domain %d\n", es.u.unbound.dom);
855 	else if (es.status == EVTCHNSTAT_pirq)
856 		printf(": pirq %u\n", es.u.pirq);
857 	else if (es.status == EVTCHNSTAT_virq)
858 		printf(": virq %u\n", es.u.virq);
859 	else
860 		printf("\n");
861 #endif
862 
863 	return (0);
864 }
865 
866 int
867 xen_intr_disestablish(xen_intr_handle_t xih)
868 {
869 	struct xen_softc *sc = xen_sc;
870 	evtchn_port_t port = (evtchn_port_t)xih;
871 	struct evtchn_close ec;
872 	struct xen_intsrc *xi;
873 
874 	if ((xi = xen_intsrc_remove(sc, port)) == NULL)
875 		return (-1);
876 
877 	evcount_detach(&xi->xi_evcnt);
878 
879 	taskq_destroy(xi->xi_taskq);
880 
881 	set_bit(xi->xi_port, &sc->sc_ipg->evtchn_mask[0]);
882 	clear_bit(xi->xi_port, &sc->sc_ipg->evtchn_pending[0]);
883 
884 	if (!xi->xi_noclose) {
885 		ec.port = xi->xi_port;
886 		if (xen_evtchn_hypercall(sc, EVTCHNOP_close, &ec, sizeof(ec))) {
887 			DPRINTF("%s: failed to close event port %u\n",
888 			    sc->sc_dev.dv_xname, xi->xi_port);
889 		}
890 	}
891 
892 	free(xi, M_DEVBUF, sizeof(*xi));
893 	return (0);
894 }
895 
896 void
897 xen_intr_dispatch(void *arg)
898 {
899 	struct xen_softc *sc = xen_sc;
900 	struct xen_intsrc *xi = arg;
901 
902 	if (xi->xi_handler)
903 		xi->xi_handler(xi->xi_ctx);
904 
905 	xen_intr_unmask_release(sc, xi);
906 }
907 
908 void
909 xen_intr_enable(void)
910 {
911 	struct xen_softc *sc = xen_sc;
912 	struct xen_intsrc *xi;
913 	struct evtchn_unmask eu;
914 
915 	mtx_enter(&sc->sc_islck);
916 	SLIST_FOREACH(xi, &sc->sc_intrs, xi_entry) {
917 		if (!xi->xi_masked) {
918 			eu.port = xi->xi_port;
919 			if (xen_evtchn_hypercall(sc, EVTCHNOP_unmask, &eu,
920 			    sizeof(eu)))
921 				printf("%s: unmasking port %u failed\n",
922 				    sc->sc_dev.dv_xname, xi->xi_port);
923 			virtio_membar_sync();
924 			if (test_bit(xi->xi_port, &sc->sc_ipg->evtchn_mask[0]))
925 				printf("%s: port %u is still masked\n",
926 				    sc->sc_dev.dv_xname, xi->xi_port);
927 		}
928 	}
929 	mtx_leave(&sc->sc_islck);
930 }
931 
932 void
933 xen_intr_mask(xen_intr_handle_t xih)
934 {
935 	struct xen_softc *sc = xen_sc;
936 	evtchn_port_t port = (evtchn_port_t)xih;
937 	struct xen_intsrc *xi;
938 
939 	if ((xi = xen_intsrc_acquire(sc, port)) != NULL) {
940 		xen_intr_mask_acquired(sc, xi);
941 		xen_intsrc_release(sc, xi);
942 	}
943 }
944 
945 int
946 xen_intr_unmask(xen_intr_handle_t xih)
947 {
948 	struct xen_softc *sc = xen_sc;
949 	evtchn_port_t port = (evtchn_port_t)xih;
950 	struct xen_intsrc *xi;
951 
952 	if ((xi = xen_intsrc_acquire(sc, port)) != NULL)
953 		return (xen_intr_unmask_release(sc, xi));
954 
955 	return (0);
956 }
957 
958 int
959 xen_init_grant_tables(struct xen_softc *sc)
960 {
961 	struct gnttab_query_size gqs;
962 
963 	gqs.dom = DOMID_SELF;
964 	if (xen_hypercall(sc, XC_GNTTAB, 3, GNTTABOP_query_size, &gqs, 1)) {
965 		printf(": failed the query for grant table pages\n");
966 		return (-1);
967 	}
968 	if (gqs.nr_frames == 0 || gqs.nr_frames > gqs.max_nr_frames) {
969 		printf(": invalid number of grant table pages: %u/%u\n",
970 		    gqs.nr_frames, gqs.max_nr_frames);
971 		return (-1);
972 	}
973 
974 	sc->sc_gntmax = gqs.max_nr_frames;
975 
976 	sc->sc_gnt = mallocarray(sc->sc_gntmax + 1, sizeof(struct xen_gntent),
977 	    M_DEVBUF, M_ZERO | M_NOWAIT);
978 	if (sc->sc_gnt == NULL) {
979 		printf(": failed to allocate grant table lookup table\n");
980 		return (-1);
981 	}
982 
983 	mtx_init(&sc->sc_gntlck, IPL_NET);
984 
985 	if (xen_grant_table_grow(sc) == NULL) {
986 		free(sc->sc_gnt, M_DEVBUF, sc->sc_gntmax *
987 		    sizeof(struct xen_gntent));
988 		return (-1);
989 	}
990 
991 	printf(", %d grant table frames", sc->sc_gntmax);
992 
993 	xen_bus_dma_tag._cookie = sc;
994 
995 	return (0);
996 }
997 
998 struct xen_gntent *
999 xen_grant_table_grow(struct xen_softc *sc)
1000 {
1001 	struct xen_add_to_physmap xatp;
1002 	struct xen_gntent *ge;
1003 	void *va;
1004 	paddr_t pa;
1005 
1006 	if (sc->sc_gntcnt == sc->sc_gntmax) {
1007 		printf("%s: grant table frame allotment limit reached\n",
1008 		    sc->sc_dev.dv_xname);
1009 		return (NULL);
1010 	}
1011 
1012 	va = km_alloc(PAGE_SIZE, &kv_any, &kp_zero, &kd_nowait);
1013 	if (va == NULL)
1014 		return (NULL);
1015 	if (!pmap_extract(pmap_kernel(), (vaddr_t)va, &pa)) {
1016 		printf("%s: grant table page PA extraction failed\n",
1017 		    sc->sc_dev.dv_xname);
1018 		km_free(va, PAGE_SIZE, &kv_any, &kp_zero);
1019 		return (NULL);
1020 	}
1021 
1022 	mtx_enter(&sc->sc_gntlck);
1023 
1024 	ge = &sc->sc_gnt[sc->sc_gntcnt];
1025 	ge->ge_table = va;
1026 
1027 	xatp.domid = DOMID_SELF;
1028 	xatp.idx = sc->sc_gntcnt;
1029 	xatp.space = XENMAPSPACE_grant_table;
1030 	xatp.gpfn = atop(pa);
1031 	if (xen_hypercall(sc, XC_MEMORY, 2, XENMEM_add_to_physmap, &xatp)) {
1032 		printf("%s: failed to add a grant table page\n",
1033 		    sc->sc_dev.dv_xname);
1034 		km_free(ge->ge_table, PAGE_SIZE, &kv_any, &kp_zero);
1035 		mtx_leave(&sc->sc_gntlck);
1036 		return (NULL);
1037 	}
1038 	ge->ge_start = sc->sc_gntcnt * GNTTAB_NEPG;
1039 	/* First page has 8 reserved entries */
1040 	ge->ge_reserved = ge->ge_start == 0 ? GNTTAB_NR_RESERVED_ENTRIES : 0;
1041 	ge->ge_free = GNTTAB_NEPG - ge->ge_reserved;
1042 	ge->ge_next = ge->ge_reserved;
1043 	mtx_init(&ge->ge_lock, IPL_NET);
1044 
1045 	sc->sc_gntcnt++;
1046 	mtx_leave(&sc->sc_gntlck);
1047 
1048 	return (ge);
1049 }
1050 
1051 int
1052 xen_grant_table_alloc(struct xen_softc *sc, grant_ref_t *ref)
1053 {
1054 	struct xen_gntent *ge;
1055 	int i;
1056 
1057 	/* Start with a previously allocated table page */
1058 	ge = &sc->sc_gnt[sc->sc_gntcnt - 1];
1059 	if (ge->ge_free > 0) {
1060 		mtx_enter(&ge->ge_lock);
1061 		if (ge->ge_free > 0)
1062 			goto search;
1063 		mtx_leave(&ge->ge_lock);
1064 	}
1065 
1066 	/* Try other existing table pages */
1067 	for (i = 0; i < sc->sc_gntcnt; i++) {
1068 		ge = &sc->sc_gnt[i];
1069 		if (ge->ge_free == 0)
1070 			continue;
1071 		mtx_enter(&ge->ge_lock);
1072 		if (ge->ge_free > 0)
1073 			goto search;
1074 		mtx_leave(&ge->ge_lock);
1075 	}
1076 
1077  alloc:
1078 	/* Allocate a new table page */
1079 	if ((ge = xen_grant_table_grow(sc)) == NULL)
1080 		return (-1);
1081 
1082 	mtx_enter(&ge->ge_lock);
1083 	if (ge->ge_free == 0) {
1084 		/* We were not fast enough... */
1085 		mtx_leave(&ge->ge_lock);
1086 		goto alloc;
1087 	}
1088 
1089  search:
1090 	for (i = ge->ge_next;
1091 	     /* Math works here because GNTTAB_NEPG is a power of 2 */
1092 	     i != ((ge->ge_next + GNTTAB_NEPG - 1) & (GNTTAB_NEPG - 1));
1093 	     i++) {
1094 		if (i == GNTTAB_NEPG)
1095 			i = 0;
1096 		if (ge->ge_reserved && i < ge->ge_reserved)
1097 			continue;
1098 		if (ge->ge_table[i].frame != 0)
1099 			continue;
1100 		*ref = ge->ge_start + i;
1101 		ge->ge_table[i].flags = GTF_invalid;
1102 		ge->ge_table[i].frame = 0xffffffff; /* Mark as taken */
1103 		if ((ge->ge_next = i + 1) == GNTTAB_NEPG)
1104 			ge->ge_next = ge->ge_reserved;
1105 		ge->ge_free--;
1106 		mtx_leave(&ge->ge_lock);
1107 		return (0);
1108 	}
1109 	mtx_leave(&ge->ge_lock);
1110 
1111 	panic("page full, sc %p gnt %p (%d) ge %p", sc, sc->sc_gnt,
1112 	    sc->sc_gntcnt, ge);
1113 	return (-1);
1114 }
1115 
1116 void
1117 xen_grant_table_free(struct xen_softc *sc, grant_ref_t ref)
1118 {
1119 	struct xen_gntent *ge;
1120 
1121 #ifdef XEN_DEBUG
1122 	if (ref > sc->sc_gntcnt * GNTTAB_NEPG)
1123 		panic("unmanaged ref %u sc %p gnt %p (%d)", ref, sc,
1124 		    sc->sc_gnt, sc->sc_gntcnt);
1125 #endif
1126 	ge = &sc->sc_gnt[ref / GNTTAB_NEPG];
1127 	mtx_enter(&ge->ge_lock);
1128 #ifdef XEN_DEBUG
1129 	if (ref < ge->ge_start || ref > ge->ge_start + GNTTAB_NEPG) {
1130 		mtx_leave(&ge->ge_lock);
1131 		panic("out of bounds ref %u ge %p start %u sc %p gnt %p",
1132 		    ref, ge, ge->ge_start, sc, sc->sc_gnt);
1133 	}
1134 #endif
1135 	ref -= ge->ge_start;
1136 	if (ge->ge_table[ref].flags != GTF_invalid) {
1137 		mtx_leave(&ge->ge_lock);
1138 		panic("reference %u is still in use, flags %#x frame %#x",
1139 		    ref + ge->ge_start, ge->ge_table[ref].flags,
1140 		    ge->ge_table[ref].frame);
1141 	}
1142 	ge->ge_table[ref].frame = 0;
1143 	ge->ge_next = ref;
1144 	ge->ge_free++;
1145 	mtx_leave(&ge->ge_lock);
1146 }
1147 
1148 void
1149 xen_grant_table_enter(struct xen_softc *sc, grant_ref_t ref, paddr_t pa,
1150     int domain, int flags)
1151 {
1152 	struct xen_gntent *ge;
1153 
1154 #ifdef XEN_DEBUG
1155 	if (ref > sc->sc_gntcnt * GNTTAB_NEPG)
1156 		panic("unmanaged ref %u sc %p gnt %p (%d)", ref, sc,
1157 		    sc->sc_gnt, sc->sc_gntcnt);
1158 #endif
1159 	ge = &sc->sc_gnt[ref / GNTTAB_NEPG];
1160 #ifdef XEN_DEBUG
1161 	if (ref < ge->ge_start || ref > ge->ge_start + GNTTAB_NEPG) {
1162 		panic("out of bounds ref %u ge %p start %u sc %p gnt %p",
1163 		    ref, ge, ge->ge_start, sc, sc->sc_gnt);
1164 	}
1165 #endif
1166 	ref -= ge->ge_start;
1167 	if (ge->ge_table[ref].flags != GTF_invalid) {
1168 		panic("reference %u is still in use, flags %#x frame %#x",
1169 		    ref + ge->ge_start, ge->ge_table[ref].flags,
1170 		    ge->ge_table[ref].frame);
1171 	}
1172 	ge->ge_table[ref].frame = atop(pa);
1173 	ge->ge_table[ref].domid = domain;
1174 	virtio_membar_sync();
1175 	ge->ge_table[ref].flags = GTF_permit_access | flags;
1176 	virtio_membar_sync();
1177 }
1178 
1179 void
1180 xen_grant_table_remove(struct xen_softc *sc, grant_ref_t ref)
1181 {
1182 	struct xen_gntent *ge;
1183 	uint32_t flags, *ptr;
1184 	int loop;
1185 
1186 #ifdef XEN_DEBUG
1187 	if (ref > sc->sc_gntcnt * GNTTAB_NEPG)
1188 		panic("unmanaged ref %u sc %p gnt %p (%d)", ref, sc,
1189 		    sc->sc_gnt, sc->sc_gntcnt);
1190 #endif
1191 	ge = &sc->sc_gnt[ref / GNTTAB_NEPG];
1192 #ifdef XEN_DEBUG
1193 	if (ref < ge->ge_start || ref > ge->ge_start + GNTTAB_NEPG) {
1194 		panic("out of bounds ref %u ge %p start %u sc %p gnt %p",
1195 		    ref, ge, ge->ge_start, sc, sc->sc_gnt);
1196 	}
1197 #endif
1198 	ref -= ge->ge_start;
1199 	/* Invalidate the grant reference */
1200 	virtio_membar_sync();
1201 	ptr = (uint32_t *)&ge->ge_table[ref];
1202 	flags = (ge->ge_table[ref].flags & ~(GTF_reading|GTF_writing)) |
1203 	    (ge->ge_table[ref].domid << 16);
1204 	loop = 0;
1205 	while (atomic_cas_uint(ptr, flags, GTF_invalid) != flags) {
1206 		if (loop++ > 10) {
1207 			panic("grant table reference %u is held "
1208 			    "by domain %d: frame %#x flags %#x",
1209 			    ref + ge->ge_start, ge->ge_table[ref].domid,
1210 			    ge->ge_table[ref].frame, ge->ge_table[ref].flags);
1211 		}
1212 #if (defined(__amd64__) || defined(__i386__))
1213 		__asm volatile("pause": : : "memory");
1214 #endif
1215 	}
1216 	ge->ge_table[ref].frame = 0xffffffff;
1217 }
1218 
1219 int
1220 xen_bus_dmamap_create(bus_dma_tag_t t, bus_size_t size, int nsegments,
1221     bus_size_t maxsegsz, bus_size_t boundary, int flags, bus_dmamap_t *dmamp)
1222 {
1223 	struct xen_softc *sc = t->_cookie;
1224 	struct xen_gntmap *gm;
1225 	int i, error;
1226 
1227 	if (maxsegsz < PAGE_SIZE)
1228 		return (EINVAL);
1229 
1230 	/* Allocate a dma map structure */
1231 	error = bus_dmamap_create(sc->sc_dmat, size, nsegments, maxsegsz,
1232 	    boundary, flags, dmamp);
1233 	if (error)
1234 		return (error);
1235 	/* Allocate an array of grant table pa<->ref maps */
1236 	gm = mallocarray(nsegments, sizeof(struct xen_gntmap), M_DEVBUF,
1237 	    M_ZERO | ((flags & BUS_DMA_NOWAIT) ? M_NOWAIT : M_WAITOK));
1238 	if (gm == NULL) {
1239 		bus_dmamap_destroy(sc->sc_dmat, *dmamp);
1240 		*dmamp = NULL;
1241 		return (ENOMEM);
1242 	}
1243 	/* Wire it to the dma map */
1244 	(*dmamp)->_dm_cookie = gm;
1245 	/* Claim references from the grant table */
1246 	for (i = 0; i < (*dmamp)->_dm_segcnt; i++) {
1247 		if (xen_grant_table_alloc(sc, &gm[i].gm_ref)) {
1248 			xen_bus_dmamap_destroy(t, *dmamp);
1249 			*dmamp = NULL;
1250 			return (ENOBUFS);
1251 		}
1252 	}
1253 	return (0);
1254 }
1255 
1256 void
1257 xen_bus_dmamap_destroy(bus_dma_tag_t t, bus_dmamap_t map)
1258 {
1259 	struct xen_softc *sc = t->_cookie;
1260 	struct xen_gntmap *gm;
1261 	int i;
1262 
1263 	gm = map->_dm_cookie;
1264 	for (i = 0; i < map->_dm_segcnt; i++) {
1265 		if (gm[i].gm_ref == 0)
1266 			continue;
1267 		xen_grant_table_free(sc, gm[i].gm_ref);
1268 	}
1269 	free(gm, M_DEVBUF, map->_dm_segcnt * sizeof(struct xen_gntmap));
1270 	bus_dmamap_destroy(sc->sc_dmat, map);
1271 }
1272 
1273 int
1274 xen_bus_dmamap_load(bus_dma_tag_t t, bus_dmamap_t map, void *buf,
1275     bus_size_t buflen, struct proc *p, int flags)
1276 {
1277 	struct xen_softc *sc = t->_cookie;
1278 	struct xen_gntmap *gm = map->_dm_cookie;
1279 	int i, domain, error;
1280 
1281 	domain = flags >> 16;
1282 	flags &= 0xffff;
1283 	error = bus_dmamap_load(sc->sc_dmat, map, buf, buflen, p, flags);
1284 	if (error)
1285 		return (error);
1286 	for (i = 0; i < map->dm_nsegs; i++) {
1287 		xen_grant_table_enter(sc, gm[i].gm_ref, map->dm_segs[i].ds_addr,
1288 		    domain, flags & BUS_DMA_WRITE ? GTF_readonly : 0);
1289 		gm[i].gm_paddr = map->dm_segs[i].ds_addr;
1290 		map->dm_segs[i].ds_addr = gm[i].gm_ref;
1291 	}
1292 	return (0);
1293 }
1294 
1295 int
1296 xen_bus_dmamap_load_mbuf(bus_dma_tag_t t, bus_dmamap_t map, struct mbuf *m0,
1297     int flags)
1298 {
1299 	struct xen_softc *sc = t->_cookie;
1300 	struct xen_gntmap *gm = map->_dm_cookie;
1301 	int i, domain, error;
1302 
1303 	domain = flags >> 16;
1304 	flags &= 0xffff;
1305 	error = bus_dmamap_load_mbuf(sc->sc_dmat, map, m0, flags);
1306 	if (error)
1307 		return (error);
1308 	for (i = 0; i < map->dm_nsegs; i++) {
1309 		xen_grant_table_enter(sc, gm[i].gm_ref, map->dm_segs[i].ds_addr,
1310 		    domain, flags & BUS_DMA_WRITE ? GTF_readonly : 0);
1311 		gm[i].gm_paddr = map->dm_segs[i].ds_addr;
1312 		map->dm_segs[i].ds_addr = gm[i].gm_ref;
1313 	}
1314 	return (0);
1315 }
1316 
1317 void
1318 xen_bus_dmamap_unload(bus_dma_tag_t t, bus_dmamap_t map)
1319 {
1320 	struct xen_softc *sc = t->_cookie;
1321 	struct xen_gntmap *gm = map->_dm_cookie;
1322 	int i;
1323 
1324 	for (i = 0; i < map->dm_nsegs; i++) {
1325 		if (gm[i].gm_paddr == 0)
1326 			continue;
1327 		xen_grant_table_remove(sc, gm[i].gm_ref);
1328 		map->dm_segs[i].ds_addr = gm[i].gm_paddr;
1329 		gm[i].gm_paddr = 0;
1330 	}
1331 	bus_dmamap_unload(sc->sc_dmat, map);
1332 }
1333 
1334 void
1335 xen_bus_dmamap_sync(bus_dma_tag_t t, bus_dmamap_t map, bus_addr_t addr,
1336     bus_size_t size, int op)
1337 {
1338 	if ((op == (BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE)) ||
1339 	    (op == (BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE)))
1340 		virtio_membar_sync();
1341 }
1342 
1343 static int
1344 xen_attach_print(void *aux, const char *name)
1345 {
1346 	struct xen_attach_args *xa = aux;
1347 
1348 	if (name)
1349 		printf("\"%s\" at %s: %s", xa->xa_name, name, xa->xa_node);
1350 
1351 	return (UNCONF);
1352 }
1353 
1354 int
1355 xen_attach_device(struct xen_softc *sc, struct xen_devlist *xdl,
1356     const char *name, const char *unit)
1357 {
1358 	struct xen_attach_args xa;
1359 	struct xen_device *xdv;
1360 	unsigned long long res;
1361 
1362 	xa.xa_dmat = &xen_bus_dma_tag;
1363 
1364 	strlcpy(xa.xa_name, name, sizeof(xa.xa_name));
1365 	snprintf(xa.xa_node, sizeof(xa.xa_node), "device/%s/%s", name, unit);
1366 
1367 	if (xs_getprop(sc, xa.xa_node, "backend", xa.xa_backend,
1368 	    sizeof(xa.xa_backend))) {
1369 		DPRINTF("%s: failed to identify \"backend\" for "
1370 		    "\"%s\"\n", sc->sc_dev.dv_xname, xa.xa_node);
1371 		return (EIO);
1372 	}
1373 
1374 	if (xs_getnum(sc, xa.xa_node, "backend-id", &res) || res > UINT16_MAX) {
1375 		DPRINTF("%s: invalid \"backend-id\" for \"%s\"\n",
1376 		    sc->sc_dev.dv_xname, xa.xa_node);
1377 		return (EIO);
1378 	}
1379 	xa.xa_domid = (uint16_t)res;
1380 
1381 	xdv = malloc(sizeof(struct xen_device), M_DEVBUF, M_ZERO | M_NOWAIT);
1382 	if (xdv == NULL)
1383 		return (ENOMEM);
1384 
1385 	strlcpy(xdv->dv_unit, unit, sizeof(xdv->dv_unit));
1386 	LIST_INSERT_HEAD(&xdl->dl_devs, xdv, dv_entry);
1387 
1388 	xdv->dv_dev = config_found((struct device *)sc, &xa, xen_attach_print);
1389 
1390 	return (0);
1391 }
1392 
1393 int
1394 xen_probe_devices(struct xen_softc *sc)
1395 {
1396 	struct xen_devlist *xdl;
1397 	struct xs_transaction xst;
1398 	struct iovec *iovp1 = NULL, *iovp2 = NULL;
1399 	int i, j, error, iov1_cnt = 0, iov2_cnt = 0;
1400 	char path[256];
1401 
1402 	memset(&xst, 0, sizeof(xst));
1403 	xst.xst_id = 0;
1404 	xst.xst_cookie = sc->sc_xs;
1405 
1406 	if ((error = xs_cmd(&xst, XS_LIST, "device", &iovp1, &iov1_cnt)) != 0)
1407 		return (error);
1408 
1409 	for (i = 0; i < iov1_cnt; i++) {
1410 		if (strcmp("suspend", (char *)iovp1[i].iov_base) == 0)
1411 			continue;
1412 		snprintf(path, sizeof(path), "device/%s",
1413 		    (char *)iovp1[i].iov_base);
1414 		if ((error = xs_cmd(&xst, XS_LIST, path, &iovp2,
1415 		    &iov2_cnt)) != 0)
1416 			goto out;
1417 		if ((xdl = malloc(sizeof(struct xen_devlist), M_DEVBUF,
1418 		    M_ZERO | M_NOWAIT)) == NULL) {
1419 			error = ENOMEM;
1420 			goto out;
1421 		}
1422 		xdl->dl_xen = sc;
1423 		strlcpy(xdl->dl_node, (const char *)iovp1[i].iov_base,
1424 		    XEN_MAX_NODE_LEN);
1425 		for (j = 0; j < iov2_cnt; j++) {
1426 			error = xen_attach_device(sc, xdl,
1427 			    (const char *)iovp1[i].iov_base,
1428 			    (const char *)iovp2[j].iov_base);
1429 			if (error) {
1430 				printf("%s: failed to attach \"%s/%s\"\n",
1431 				    sc->sc_dev.dv_xname, path,
1432 				    (const char *)iovp2[j].iov_base);
1433 				goto out;
1434 			}
1435 		}
1436 		/* Setup a watch for every device subtree */
1437 		if (xs_watch(sc, "device", (char *)iovp1[i].iov_base,
1438 		    &xdl->dl_task, xen_hotplug, xdl))
1439 			printf("%s: failed to setup hotplug watch for \"%s\"\n",
1440 			    sc->sc_dev.dv_xname, (char *)iovp1[i].iov_base);
1441 		SLIST_INSERT_HEAD(&sc->sc_devlists, xdl, dl_entry);
1442 		xs_resfree(&xst, iovp2, iov2_cnt);
1443 		iovp2 = NULL;
1444 		iov2_cnt = 0;
1445 	}
1446 
1447  out:
1448 	if (iovp2)
1449 		xs_resfree(&xst, iovp2, iov2_cnt);
1450 	xs_resfree(&xst, iovp1, iov1_cnt);
1451 	return (error);
1452 }
1453 
1454 void
1455 xen_hotplug(void *arg)
1456 {
1457 	struct xen_devlist *xdl = arg;
1458 	struct xen_softc *sc = xdl->dl_xen;
1459 	struct xen_device *xdv, *xvdn;
1460 	struct xs_transaction xst;
1461 	struct iovec *iovp = NULL;
1462 	int error, i, keep, iov_cnt = 0;
1463 	char path[256];
1464 	int8_t *seen;
1465 
1466 	memset(&xst, 0, sizeof(xst));
1467 	xst.xst_id = 0;
1468 	xst.xst_cookie = sc->sc_xs;
1469 
1470 	snprintf(path, sizeof(path), "device/%s", xdl->dl_node);
1471 	if ((error = xs_cmd(&xst, XS_LIST, path, &iovp, &iov_cnt)) != 0)
1472 		return;
1473 
1474 	seen = malloc(iov_cnt, M_TEMP, M_ZERO | M_WAITOK);
1475 
1476 	/* Detect all removed and kept devices */
1477 	LIST_FOREACH_SAFE(xdv, &xdl->dl_devs, dv_entry, xvdn) {
1478 		for (i = 0, keep = 0; i < iov_cnt; i++) {
1479 			if (!seen[i] &&
1480 			    !strcmp(xdv->dv_unit, (char *)iovp[i].iov_base)) {
1481 				seen[i]++;
1482 				keep++;
1483 				break;
1484 			}
1485 		}
1486 		if (!keep) {
1487 			DPRINTF("%s: removing \"%s/%s\"\n", sc->sc_dev.dv_xname,
1488 			    xdl->dl_node, xdv->dv_unit);
1489 			LIST_REMOVE(xdv, dv_entry);
1490 			config_detach(xdv->dv_dev, 0);
1491 			free(xdv, M_DEVBUF, sizeof(struct xen_device));
1492 		}
1493 	}
1494 
1495 	/* Attach all new devices */
1496 	for (i = 0; i < iov_cnt; i++) {
1497 		if (seen[i])
1498 			continue;
1499 		DPRINTF("%s: attaching \"%s/%s\"\n", sc->sc_dev.dv_xname,
1500 			    xdl->dl_node, (const char *)iovp[i].iov_base);
1501 		error = xen_attach_device(sc, xdl, xdl->dl_node,
1502 		    (const char *)iovp[i].iov_base);
1503 		if (error) {
1504 			printf("%s: failed to attach \"%s/%s\"\n",
1505 			    sc->sc_dev.dv_xname, path,
1506 			    (const char *)iovp[i].iov_base);
1507 			continue;
1508 		}
1509 	}
1510 
1511 	free(seen, M_TEMP, iov_cnt);
1512 
1513 	xs_resfree(&xst, iovp, iov_cnt);
1514 }
1515 
1516 #include <machine/pio.h>
1517 
1518 #define	XMI_PORT		0x10
1519 #define XMI_MAGIC		0x49d2
1520 #define XMI_UNPLUG_IDE		0x01
1521 #define XMI_UNPLUG_NIC		0x02
1522 #define XMI_UNPLUG_IDESEC	0x04
1523 
1524 void
1525 xen_disable_emulated_devices(struct xen_softc *sc)
1526 {
1527 #if defined(__i386__) || defined(__amd64__)
1528 	ushort unplug = 0;
1529 
1530 	if (inw(XMI_PORT) != XMI_MAGIC) {
1531 		printf("%s: failed to disable emulated devices\n",
1532 		    sc->sc_dev.dv_xname);
1533 		return;
1534 	}
1535 	if (sc->sc_unplug & XEN_UNPLUG_IDE)
1536 		unplug |= XMI_UNPLUG_IDE;
1537 	if (sc->sc_unplug & XEN_UNPLUG_IDESEC)
1538 		unplug |= XMI_UNPLUG_IDESEC;
1539 	if (sc->sc_unplug & XEN_UNPLUG_NIC)
1540 		unplug |= XMI_UNPLUG_NIC;
1541 	if (unplug)
1542 		outw(XMI_PORT, unplug);
1543 #endif	/* __i386__ || __amd64__ */
1544 }
1545 
1546 void
1547 xen_unplug_emulated(void *xsc, int what)
1548 {
1549 	struct xen_softc *sc = xsc;
1550 
1551 	sc->sc_unplug |= what;
1552 }
1553