xref: /netbsd-src/sys/arch/xen/x86/hypervisor_machdep.c (revision 49e7470743413517cdd7fa97d7acfb0cae351e7a)
1*49e74707Sriastradh /*	$NetBSD: hypervisor_machdep.c,v 1.46 2023/03/01 08:13:44 riastradh Exp $	*/
24e541343Sbouyer 
34e541343Sbouyer /*
44e541343Sbouyer  *
54e541343Sbouyer  * Copyright (c) 2004 Christian Limpach.
64e541343Sbouyer  * All rights reserved.
74e541343Sbouyer  *
84e541343Sbouyer  * Redistribution and use in source and binary forms, with or without
94e541343Sbouyer  * modification, are permitted provided that the following conditions
104e541343Sbouyer  * are met:
114e541343Sbouyer  * 1. Redistributions of source code must retain the above copyright
124e541343Sbouyer  *    notice, this list of conditions and the following disclaimer.
134e541343Sbouyer  * 2. Redistributions in binary form must reproduce the above copyright
144e541343Sbouyer  *    notice, this list of conditions and the following disclaimer in the
154e541343Sbouyer  *    documentation and/or other materials provided with the distribution.
164e541343Sbouyer  *
174e541343Sbouyer  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
184e541343Sbouyer  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
194e541343Sbouyer  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
204e541343Sbouyer  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
214e541343Sbouyer  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
224e541343Sbouyer  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
234e541343Sbouyer  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
244e541343Sbouyer  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
254e541343Sbouyer  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
264e541343Sbouyer  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
274e541343Sbouyer  */
284e541343Sbouyer 
294e541343Sbouyer /******************************************************************************
304e541343Sbouyer  * hypervisor.c
314e541343Sbouyer  *
324e541343Sbouyer  * Communication to/from hypervisor.
334e541343Sbouyer  *
344e541343Sbouyer  * Copyright (c) 2002-2004, K A Fraser
354e541343Sbouyer  *
364e541343Sbouyer  * Permission is hereby granted, free of charge, to any person obtaining a copy
374e541343Sbouyer  * of this software and associated documentation files (the "Software"), to
384e541343Sbouyer  * deal in the Software without restriction, including without limitation the
394e541343Sbouyer  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
404e541343Sbouyer  * sell copies of the Software, and to permit persons to whom the Software is
414e541343Sbouyer  * furnished to do so, subject to the following conditions:
424e541343Sbouyer  *
434e541343Sbouyer  * The above copyright notice and this permission notice shall be included in
444e541343Sbouyer  * all copies or substantial portions of the Software.
454e541343Sbouyer  *
464e541343Sbouyer  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
474e541343Sbouyer  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
484e541343Sbouyer  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
494e541343Sbouyer  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
504e541343Sbouyer  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
514e541343Sbouyer  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
524e541343Sbouyer  * DEALINGS IN THE SOFTWARE.
534e541343Sbouyer  */
544e541343Sbouyer 
554e541343Sbouyer 
564e541343Sbouyer #include <sys/cdefs.h>
57*49e74707Sriastradh __KERNEL_RCSID(0, "$NetBSD: hypervisor_machdep.c,v 1.46 2023/03/01 08:13:44 riastradh Exp $");
584e541343Sbouyer 
594e541343Sbouyer #include <sys/param.h>
604e541343Sbouyer #include <sys/systm.h>
616004aef4Sbouyer #include <sys/kmem.h>
62c24c993fSbouyer #include <sys/cpu.h>
63d5c9d50fSbouyer #include <sys/ksyms.h>
646004aef4Sbouyer 
656004aef4Sbouyer #include <uvm/uvm_extern.h>
666004aef4Sbouyer 
676004aef4Sbouyer #include <machine/vmparam.h>
686004aef4Sbouyer #include <machine/pmap.h>
698f18579dSriastradh #include <machine/pmap_private.h>
704e541343Sbouyer 
71c24c993fSbouyer #include <x86/machdep.h>
72c24c993fSbouyer #include <x86/cpuvar.h>
73c24c993fSbouyer 
744e541343Sbouyer #include <xen/xen.h>
75c24c993fSbouyer #include <xen/intr.h>
764e541343Sbouyer #include <xen/hypervisor.h>
774e541343Sbouyer #include <xen/evtchn.h>
786004aef4Sbouyer #include <xen/xenpmap.h>
794e541343Sbouyer 
804e541343Sbouyer #include "opt_xen.h"
81d5c9d50fSbouyer #include "opt_modular.h"
82d5c9d50fSbouyer #include "opt_ddb.h"
831fe45bddScherry #include "isa.h"
841fe45bddScherry #include "pci.h"
85d5c9d50fSbouyer #include "ksyms.h"
86d5c9d50fSbouyer 
87d5c9d50fSbouyer #ifdef DDB
88d5c9d50fSbouyer #include <machine/db_machdep.h>
89d5c9d50fSbouyer #include <ddb/db_extern.h>
90d5c9d50fSbouyer #include <ddb/db_output.h>
91d5c9d50fSbouyer #include <ddb/db_interface.h>
92d5c9d50fSbouyer #endif
934e541343Sbouyer 
94b4bf0ca2Scherry #ifdef XENPV
956004aef4Sbouyer /*
966004aef4Sbouyer  * arch-dependent p2m frame lists list (L3 and L2)
976004aef4Sbouyer  * used by Xen for save/restore mappings
986004aef4Sbouyer  */
996004aef4Sbouyer static unsigned long * l3_p2m_page;
1006004aef4Sbouyer static unsigned long * l2_p2m_page;
1016004aef4Sbouyer static int l2_p2m_page_size; /* size of L2 page, in pages */
1026004aef4Sbouyer 
1036004aef4Sbouyer static void build_p2m_frame_list_list(void);
1046004aef4Sbouyer static void update_p2m_frame_list_list(void);
1056004aef4Sbouyer 
106b4bf0ca2Scherry #endif
107b4bf0ca2Scherry 
1084e541343Sbouyer // #define PORT_DEBUG 4
1094e541343Sbouyer // #define EARLY_DEBUG_EVENT
1104e541343Sbouyer 
11115504847Scherry /* callback function type */
11292f2ec54Sbouyer typedef void (*iterate_func_t)(unsigned int, unsigned int,
11392f2ec54Sbouyer 			       unsigned int, void *);
11415504847Scherry 
11515504847Scherry static inline void
evt_iterate_bits(volatile unsigned long * pendingl1,volatile unsigned long * pendingl2,volatile unsigned long * mask,iterate_func_t iterate_pending,void * iterate_args)11692f2ec54Sbouyer evt_iterate_bits(volatile unsigned long *pendingl1,
11715504847Scherry 		 volatile unsigned long *pendingl2,
11815504847Scherry 		 volatile unsigned long *mask,
11915504847Scherry 		 iterate_func_t iterate_pending, void *iterate_args)
12015504847Scherry {
12115504847Scherry 
12215504847Scherry 	KASSERT(pendingl1 != NULL);
12315504847Scherry 	KASSERT(pendingl2 != NULL);
12415504847Scherry 
12515504847Scherry 	unsigned long l1, l2;
12615504847Scherry 	unsigned int l1i, l2i, port;
12715504847Scherry 
12815504847Scherry 	l1 = xen_atomic_xchg(pendingl1, 0);
12915504847Scherry 	while ((l1i = xen_ffs(l1)) != 0) {
13015504847Scherry 		l1i--;
13115504847Scherry 		l1 &= ~(1UL << l1i);
13215504847Scherry 
13315504847Scherry 		l2 = pendingl2[l1i] & (mask != NULL ? ~mask[l1i] : -1UL);
13492f2ec54Sbouyer 		l2 &= curcpu()->ci_evtmask[l1i];
13515504847Scherry 
13615504847Scherry 		if (mask != NULL) xen_atomic_setbits_l(&mask[l1i], l2);
13715504847Scherry 		xen_atomic_clearbits_l(&pendingl2[l1i], l2);
13815504847Scherry 
13915504847Scherry 		while ((l2i = xen_ffs(l2)) != 0) {
14015504847Scherry 			l2i--;
14115504847Scherry 			l2 &= ~(1UL << l2i);
14215504847Scherry 
14315504847Scherry 			port = (l1i << LONG_SHIFT) + l2i;
14415504847Scherry 
14592f2ec54Sbouyer 			iterate_pending(port, l1i, l2i, iterate_args);
14615504847Scherry 		}
14715504847Scherry 	}
14815504847Scherry }
14915504847Scherry 
15015504847Scherry /*
15115504847Scherry  * Set per-cpu "pending" information for outstanding events that
15215504847Scherry  * cannot be processed now.
15315504847Scherry  */
15415504847Scherry 
15515504847Scherry static inline void
evt_set_pending(unsigned int port,unsigned int l1i,unsigned int l2i,void * args)15692f2ec54Sbouyer evt_set_pending(unsigned int port, unsigned int l1i,
15715504847Scherry 		unsigned int l2i, void *args)
15815504847Scherry {
15915504847Scherry 
16015504847Scherry 	KASSERT(args != NULL);
16115504847Scherry 
16215504847Scherry 	int *ret = args;
16308505f36Sbouyer 	struct intrhand *ih;
16415504847Scherry 
16515504847Scherry 	if (evtsource[port]) {
16692f2ec54Sbouyer 		hypervisor_set_ipending(evtsource[port]->ev_imask, l1i, l2i);
16715504847Scherry 		evtsource[port]->ev_evcnt.ev_count++;
16808505f36Sbouyer 		ih = evtsource[port]->ev_handlers;
16908505f36Sbouyer 		while (ih != NULL) {
17008505f36Sbouyer 			ih->ih_pending++;
17108505f36Sbouyer 			ih = ih->ih_evt_next;
17208505f36Sbouyer 		}
17308505f36Sbouyer 
17492f2ec54Sbouyer 		if (*ret == 0 && curcpu()->ci_ilevel <
17515504847Scherry 		    evtsource[port]->ev_maxlevel)
17615504847Scherry 			*ret = 1;
17715504847Scherry 	}
17815504847Scherry #ifdef DOM0OPS
17915504847Scherry 	else  {
18015504847Scherry 		/* set pending event */
18115504847Scherry 		xenevt_setipending(l1i, l2i);
18215504847Scherry 	}
18315504847Scherry #endif
18415504847Scherry }
18515504847Scherry 
1864e541343Sbouyer int stipending(void);
1874e541343Sbouyer int
stipending(void)1887f5c40b7Scegger stipending(void)
1894e541343Sbouyer {
1904e541343Sbouyer 	volatile shared_info_t *s = HYPERVISOR_shared_info;
1914e541343Sbouyer 	struct cpu_info *ci;
19220161b72Scegger 	volatile struct vcpu_info *vci;
1934e541343Sbouyer 	int ret;
1944e541343Sbouyer 
195*49e74707Sriastradh 	kpreempt_disable();
196*49e74707Sriastradh 
1974e541343Sbouyer 	ret = 0;
1984e541343Sbouyer 	ci = curcpu();
19920161b72Scegger 	vci = ci->ci_vcpu;
2004e541343Sbouyer 
2014e541343Sbouyer #if 0
2024e541343Sbouyer 	if (HYPERVISOR_shared_info->events)
2034e541343Sbouyer 		printf("stipending events %08lx mask %08lx ilevel %d\n",
2044e541343Sbouyer 		    HYPERVISOR_shared_info->events,
2054e541343Sbouyer 		    HYPERVISOR_shared_info->events_mask, ci->ci_ilevel);
2064e541343Sbouyer #endif
2074e541343Sbouyer 
2084e541343Sbouyer #ifdef EARLY_DEBUG_EVENT
2094e541343Sbouyer 	if (xen_atomic_test_bit(&s->evtchn_pending[0], debug_port)) {
2104e541343Sbouyer 		xen_debug_handler(NULL);
2114e541343Sbouyer 		xen_atomic_clear_bit(&s->evtchn_pending[0], debug_port);
2124e541343Sbouyer 	}
2134e541343Sbouyer #endif
2144e541343Sbouyer 
2154e541343Sbouyer 	/*
2164e541343Sbouyer 	 * we're only called after STIC, so we know that we'll have to
2174e541343Sbouyer 	 * STI at the end
2184e541343Sbouyer 	 */
21915504847Scherry 
22020161b72Scegger 	while (vci->evtchn_upcall_pending) {
2213eb5fd5eSbouyer 		x86_disable_intr();
22215504847Scherry 
22320161b72Scegger 		vci->evtchn_upcall_pending = 0;
2244e541343Sbouyer 
22592f2ec54Sbouyer 		evt_iterate_bits(&vci->evtchn_pending_sel,
22615504847Scherry 		    s->evtchn_pending, s->evtchn_mask,
22715504847Scherry 		    evt_set_pending, &ret);
2284e541343Sbouyer 
2293eb5fd5eSbouyer 		x86_enable_intr();
2304e541343Sbouyer 	}
2314e541343Sbouyer 
232*49e74707Sriastradh 	kpreempt_enable();
233*49e74707Sriastradh 
2344e541343Sbouyer 	return (ret);
2354e541343Sbouyer }
2364e541343Sbouyer 
23715504847Scherry /* Iterate through pending events and call the event handler */
23815504847Scherry 
23915504847Scherry static inline void
evt_do_hypervisor_callback(unsigned int port,unsigned int l1i,unsigned int l2i,void * args)24092f2ec54Sbouyer evt_do_hypervisor_callback(unsigned int port, unsigned int l1i,
24192f2ec54Sbouyer 			   unsigned int l2i, void *args)
2424e541343Sbouyer {
24315504847Scherry 	KASSERT(args != NULL);
2444e541343Sbouyer 
245c24c993fSbouyer #ifdef DOM0OPS
24692f2ec54Sbouyer 	struct cpu_info *ci = curcpu();
247c24c993fSbouyer #endif
24815504847Scherry 	struct intrframe *regs = args;
2494e541343Sbouyer 
2504e541343Sbouyer #ifdef PORT_DEBUG
2514e541343Sbouyer 	if (port == PORT_DEBUG)
2524e541343Sbouyer 		printf("do_hypervisor_callback event %d\n", port);
2534e541343Sbouyer #endif
254d204c4c4Scherry 	if (evtsource[port]) {
255c24c993fSbouyer 		KASSERT(cpu_intr_p());
256d204c4c4Scherry 		evtchn_do_event(port, regs);
257d204c4c4Scherry 	}
2584e541343Sbouyer #ifdef DOM0OPS
259e3e720d8Sbouyer 	else  {
260e3e720d8Sbouyer 		if (ci->ci_ilevel < IPL_HIGH) {
261e3e720d8Sbouyer 			/* fast path */
262e3e720d8Sbouyer 			int oipl = ci->ci_ilevel;
263e3e720d8Sbouyer 			ci->ci_ilevel = IPL_HIGH;
264c24c993fSbouyer 			KASSERT(cpu_intr_p());
265d204c4c4Scherry 			xenevt_event(port);
266e3e720d8Sbouyer 			ci->ci_ilevel = oipl;
267e3e720d8Sbouyer 		} else {
268e3e720d8Sbouyer 			/* set pending event */
269e3e720d8Sbouyer 			xenevt_setipending(l1i, l2i);
270e3e720d8Sbouyer 		}
271e3e720d8Sbouyer 	}
2724e541343Sbouyer #endif
2734e541343Sbouyer }
27415504847Scherry 
27515504847Scherry void
do_hypervisor_callback(struct intrframe * regs)27615504847Scherry do_hypervisor_callback(struct intrframe *regs)
27715504847Scherry {
27815504847Scherry 	volatile shared_info_t *s = HYPERVISOR_shared_info;
27915504847Scherry 	struct cpu_info *ci;
28015504847Scherry 	volatile struct vcpu_info *vci;
281ff4bde10Sknakahara 	uint64_t level __diagused;
28215504847Scherry 
28315504847Scherry 	ci = curcpu();
28415504847Scherry 	vci = ci->ci_vcpu;
28515504847Scherry 	level = ci->ci_ilevel;
28615504847Scherry 
287524a83dcScherry 	/* Save trapframe for clock handler */
288524a83dcScherry 	KASSERT(regs != NULL);
289cce7c65fSkre 	ci->ci_xen_clockf_usermode = USERMODE(regs->_INTRFRAME_CS);
290cce7c65fSkre 	ci->ci_xen_clockf_pc = regs->_INTRFRAME_IP;
291524a83dcScherry 
29215504847Scherry 	// DDD printf("do_hypervisor_callback\n");
29315504847Scherry 
29415504847Scherry #ifdef EARLY_DEBUG_EVENT
29515504847Scherry 	if (xen_atomic_test_bit(&s->evtchn_pending[0], debug_port)) {
29615504847Scherry 		xen_debug_handler(NULL);
29715504847Scherry 		xen_atomic_clear_bit(&s->evtchn_pending[0], debug_port);
2984e541343Sbouyer 	}
29915504847Scherry #endif
30015504847Scherry 
30115504847Scherry 	while (vci->evtchn_upcall_pending) {
30215504847Scherry 		vci->evtchn_upcall_pending = 0;
30315504847Scherry 
30492f2ec54Sbouyer 		evt_iterate_bits(&vci->evtchn_pending_sel,
30515504847Scherry 		    s->evtchn_pending, s->evtchn_mask,
30615504847Scherry 		    evt_do_hypervisor_callback, regs);
3074e541343Sbouyer 	}
3084e541343Sbouyer 
3094e541343Sbouyer #ifdef DIAGNOSTIC
3104e541343Sbouyer 	if (level != ci->ci_ilevel)
311ff4bde10Sknakahara 		printf("hypervisor done %08x level %" PRIu64 "/%" PRIu64 " ipending %0" PRIx64 "\n",
31220161b72Scegger 		    (uint)vci->evtchn_pending_sel,
313ff4bde10Sknakahara 		    level, (uint64_t)ci->ci_ilevel, (uint64_t)ci->ci_ipending);
3144e541343Sbouyer #endif
3154e541343Sbouyer }
3164e541343Sbouyer 
317c24c993fSbouyer #if 0
3184e541343Sbouyer void
319de4e5faeScherry hypervisor_send_event(struct cpu_info *ci, unsigned int ev)
320de4e5faeScherry {
321de4e5faeScherry 	KASSERT(ci != NULL);
322de4e5faeScherry 
323de4e5faeScherry 	volatile shared_info_t *s = HYPERVISOR_shared_info;
324de4e5faeScherry 	volatile struct vcpu_info *vci = ci->ci_vcpu;
325de4e5faeScherry 
326de4e5faeScherry #ifdef PORT_DEBUG
327de4e5faeScherry 	if (ev == PORT_DEBUG)
328de4e5faeScherry 		printf("hypervisor_send_event %d\n", ev);
329de4e5faeScherry #endif
330de4e5faeScherry 
331de4e5faeScherry 	xen_atomic_set_bit(&s->evtchn_pending[0], ev);
332533ee572Scherry 
333c4baef96Scherry 	if (__predict_false(ci == curcpu())) {
334de4e5faeScherry 		xen_atomic_set_bit(&vci->evtchn_pending_sel,
335de4e5faeScherry 		    ev >> LONG_SHIFT);
336de4e5faeScherry 		xen_atomic_set_bit(&vci->evtchn_upcall_pending, 0);
337533ee572Scherry 	}
338de4e5faeScherry 
339de4e5faeScherry 	xen_atomic_clear_bit(&s->evtchn_mask[0], ev);
340de4e5faeScherry 
341de4e5faeScherry 	if (__predict_true(ci == curcpu())) {
342de4e5faeScherry 		hypervisor_force_callback();
343de4e5faeScherry 	} else {
344ad7affb1Sbouyer 		if (__predict_false(xen_send_ipi(ci, XEN_IPI_HVCB))) {
345c24c993fSbouyer 			panic("xen_send_ipi(cpu%d id %d, XEN_IPI_HVCB) failed\n",
346c24c993fSbouyer 			    (int) ci->ci_cpuid, ci->ci_vcpuid);
347de4e5faeScherry 		}
348de4e5faeScherry 	}
349de4e5faeScherry }
350c24c993fSbouyer #endif
351de4e5faeScherry 
352de4e5faeScherry void
hypervisor_unmask_event(unsigned int ev)3534e541343Sbouyer hypervisor_unmask_event(unsigned int ev)
3544e541343Sbouyer {
35526c2e0b1Scherry 
35626c2e0b1Scherry 	KASSERT(ev > 0 && ev < NR_EVENT_CHANNELS);
35720161b72Scegger 
3584e541343Sbouyer #ifdef PORT_DEBUG
3594e541343Sbouyer 	if (ev == PORT_DEBUG)
3604e541343Sbouyer 		printf("hypervisor_unmask_event %d\n", ev);
3614e541343Sbouyer #endif
3624e541343Sbouyer 
36326c2e0b1Scherry 	/* Xen unmasks the evtchn_mask[0]:ev bit for us. */
36426c2e0b1Scherry 	evtchn_op_t op;
36526c2e0b1Scherry 	op.cmd = EVTCHNOP_unmask;
36626c2e0b1Scherry 	op.u.unmask.port = ev;
36726c2e0b1Scherry 	if (HYPERVISOR_event_channel_op(&op) != 0)
36826c2e0b1Scherry 		panic("Failed to unmask event %d\n", ev);
369ad7affb1Sbouyer 
37026c2e0b1Scherry 	return;
3714e541343Sbouyer }
3724e541343Sbouyer 
3734e541343Sbouyer void
hypervisor_mask_event(unsigned int ev)3744e541343Sbouyer hypervisor_mask_event(unsigned int ev)
3754e541343Sbouyer {
3764e541343Sbouyer 	volatile shared_info_t *s = HYPERVISOR_shared_info;
3774e541343Sbouyer #ifdef PORT_DEBUG
3784e541343Sbouyer 	if (ev == PORT_DEBUG)
3794e541343Sbouyer 		printf("hypervisor_mask_event %d\n", ev);
3804e541343Sbouyer #endif
3814e541343Sbouyer 
3824e541343Sbouyer 	xen_atomic_set_bit(&s->evtchn_mask[0], ev);
3834e541343Sbouyer }
3844e541343Sbouyer 
3854e541343Sbouyer void
hypervisor_clear_event(unsigned int ev)3864e541343Sbouyer hypervisor_clear_event(unsigned int ev)
3874e541343Sbouyer {
3884e541343Sbouyer 	volatile shared_info_t *s = HYPERVISOR_shared_info;
3894e541343Sbouyer #ifdef PORT_DEBUG
3904e541343Sbouyer 	if (ev == PORT_DEBUG)
3914e541343Sbouyer 		printf("hypervisor_clear_event %d\n", ev);
3924e541343Sbouyer #endif
3934e541343Sbouyer 
3944e541343Sbouyer 	xen_atomic_clear_bit(&s->evtchn_pending[0], ev);
3954e541343Sbouyer }
3964e541343Sbouyer 
39715504847Scherry static inline void
evt_enable_event(unsigned int port,unsigned int l1i,unsigned int l2i,void * args)39892f2ec54Sbouyer evt_enable_event(unsigned int port, unsigned int l1i,
39992f2ec54Sbouyer 		 unsigned int l2i, void *args)
40015504847Scherry {
40115504847Scherry 	KASSERT(args == NULL);
4021fe45bddScherry 	hypervisor_unmask_event(port);
40363697482Sbouyer #if defined(XENPV) && (NPCI > 0 || NISA > 0)
40463697482Sbouyer 	hypervisor_ack_pirq_event(port);
40563697482Sbouyer #endif /* NPCI > 0 || NISA > 0 */
40615504847Scherry }
40715504847Scherry 
4084e541343Sbouyer void
hypervisor_enable_sir(unsigned int sir)409c24c993fSbouyer hypervisor_enable_sir(unsigned int sir)
4104e541343Sbouyer {
4114e541343Sbouyer 	struct cpu_info *ci = curcpu();
4124e541343Sbouyer 
4134e541343Sbouyer 	/*
4144e541343Sbouyer 	 * enable all events for ipl. As we only set an event in ipl_evt_mask
4154e541343Sbouyer 	 * for its lowest IPL, and pending IPLs are processed high to low,
4164e541343Sbouyer 	 * we know that all callback for this event have been processed.
4174e541343Sbouyer 	 */
4184e541343Sbouyer 
419c24c993fSbouyer 	evt_iterate_bits(&ci->ci_isources[sir]->ipl_evt_mask1,
420c24c993fSbouyer 	    ci->ci_isources[sir]->ipl_evt_mask2, NULL,
42115504847Scherry 	    evt_enable_event, NULL);
4224e541343Sbouyer 
4234e541343Sbouyer }
4244e541343Sbouyer 
4254e541343Sbouyer void
hypervisor_set_ipending(uint64_t imask,int l1,int l2)426ff4bde10Sknakahara hypervisor_set_ipending(uint64_t imask, int l1, int l2)
4274e541343Sbouyer {
42892f2ec54Sbouyer 
42992f2ec54Sbouyer 	/* This function is not re-entrant */
43092f2ec54Sbouyer 	KASSERT(x86_read_psl() != 0);
43192f2ec54Sbouyer 
432c24c993fSbouyer 	int sir;
43392f2ec54Sbouyer 	struct cpu_info *ci = curcpu();
4344e541343Sbouyer 
4354e541343Sbouyer 	/* set pending bit for the appropriate IPLs */
436c24c993fSbouyer 	ci->ci_ipending |= imask;
4374e541343Sbouyer 
4384e541343Sbouyer 	/*
4394e541343Sbouyer 	 * And set event pending bit for the lowest IPL. As IPL are handled
4404e541343Sbouyer 	 * from high to low, this ensure that all callbacks will have been
4414e541343Sbouyer 	 * called when we ack the event
4424e541343Sbouyer 	 */
443c24c993fSbouyer 	sir = ffs(imask);
444c24c993fSbouyer 	KASSERT(sir > SIR_XENIPL_VM);
445c24c993fSbouyer 	sir--;
446c24c993fSbouyer 	KASSERT(sir <= SIR_XENIPL_HIGH);
447c24c993fSbouyer 	KASSERT(ci->ci_isources[sir] != NULL);
448c24c993fSbouyer 	ci->ci_isources[sir]->ipl_evt_mask1 |= 1UL << l1;
449c24c993fSbouyer 	ci->ci_isources[sir]->ipl_evt_mask2[l1] |= 1UL << l2;
450c24c993fSbouyer 	KASSERT(ci == curcpu());
451c24c993fSbouyer #if 0
452763123baSbouyer 	if (__predict_false(ci != curcpu())) {
453763123baSbouyer 		if (xen_send_ipi(ci, XEN_IPI_HVCB)) {
454763123baSbouyer 			panic("hypervisor_set_ipending: "
455c24c993fSbouyer 			    "xen_send_ipi(cpu%d id %d, XEN_IPI_HVCB) failed\n",
456c24c993fSbouyer 			    (int) ci->ci_cpuid, ci->ci_vcpuid);
457763123baSbouyer 		}
458763123baSbouyer 	}
459c24c993fSbouyer #endif
4604e541343Sbouyer }
4616004aef4Sbouyer 
4626004aef4Sbouyer void
hypervisor_machdep_attach(void)4633518084fScegger hypervisor_machdep_attach(void)
4643518084fScegger {
465b4bf0ca2Scherry #ifdef XENPV
4666004aef4Sbouyer  	/* dom0 does not require the arch-dependent P2M translation table */
4672de31871Scegger 	if (!xendomain_is_dom0()) {
4686004aef4Sbouyer 		build_p2m_frame_list_list();
469eba16022Sjym 		sysctl_xen_suspend_setup();
4706004aef4Sbouyer 	}
471b4bf0ca2Scherry #endif
4726004aef4Sbouyer }
4736004aef4Sbouyer 
474eba16022Sjym void
hypervisor_machdep_resume(void)475eba16022Sjym hypervisor_machdep_resume(void)
476eba16022Sjym {
477b4bf0ca2Scherry #ifdef XENPV
478eba16022Sjym 	/* dom0 does not require the arch-dependent P2M translation table */
479eba16022Sjym 	if (!xendomain_is_dom0())
480eba16022Sjym 		update_p2m_frame_list_list();
481b4bf0ca2Scherry #endif
482eba16022Sjym }
483eba16022Sjym 
484c24c993fSbouyer /*
485c24c993fSbouyer  * idle_block()
486c24c993fSbouyer  *
487c24c993fSbouyer  *	Called from the idle loop when we have nothing to do but wait
488c24c993fSbouyer  *	for an interrupt.
489c24c993fSbouyer  */
490c24c993fSbouyer static void
idle_block(void)491c24c993fSbouyer idle_block(void)
492c24c993fSbouyer {
493c24c993fSbouyer 	KASSERT(curcpu()->ci_ipending == 0);
494c24c993fSbouyer 	HYPERVISOR_block();
495c24c993fSbouyer 	KASSERT(curcpu()->ci_ipending == 0);
496c24c993fSbouyer }
497c24c993fSbouyer 
498c24c993fSbouyer void
x86_cpu_idle_xen(void)499c24c993fSbouyer x86_cpu_idle_xen(void)
500c24c993fSbouyer {
501c24c993fSbouyer 	struct cpu_info *ci = curcpu();
502c24c993fSbouyer 
503c24c993fSbouyer 	KASSERT(ci->ci_ilevel == IPL_NONE);
504c24c993fSbouyer 
505c24c993fSbouyer 	x86_disable_intr();
506c24c993fSbouyer 	if (__predict_false(!ci->ci_want_resched)) {
507c24c993fSbouyer 		idle_block();
508c24c993fSbouyer 	} else {
509c24c993fSbouyer 		x86_enable_intr();
510c24c993fSbouyer 	}
511c24c993fSbouyer }
512c24c993fSbouyer 
513b4bf0ca2Scherry #ifdef XENPV
5146004aef4Sbouyer /*
5156004aef4Sbouyer  * Generate the p2m_frame_list_list table,
5166004aef4Sbouyer  * needed for guest save/restore
5176004aef4Sbouyer  */
5186004aef4Sbouyer static void
build_p2m_frame_list_list(void)5193518084fScegger build_p2m_frame_list_list(void)
5203518084fScegger {
5216004aef4Sbouyer         int fpp; /* number of page (frame) pointer per page */
5226004aef4Sbouyer         unsigned long max_pfn;
5236004aef4Sbouyer         /*
5246004aef4Sbouyer          * The p2m list is composed of three levels of indirection,
5256004aef4Sbouyer          * each layer containing MFNs pointing to lower level pages
5266004aef4Sbouyer          * The indirection is used to convert a given PFN to its MFN
5276004aef4Sbouyer          * Each N level page can point to @fpp (N-1) level pages
5286004aef4Sbouyer          * For example, for x86 32bit, we have:
5296004aef4Sbouyer          * - PAGE_SIZE: 4096 bytes
5306004aef4Sbouyer          * - fpp: 1024 (one L3 page can address 1024 L2 pages)
5316004aef4Sbouyer          * A L1 page contains the list of MFN we are looking for
5326004aef4Sbouyer          */
5336004aef4Sbouyer         max_pfn = xen_start_info.nr_pages;
534fc848a10Sjym         fpp = PAGE_SIZE / sizeof(xen_pfn_t);
5356004aef4Sbouyer 
5366004aef4Sbouyer         /* we only need one L3 page */
537fc848a10Sjym         l3_p2m_page = (vaddr_t *)uvm_km_alloc(kernel_map, PAGE_SIZE,
538fc848a10Sjym 	    PAGE_SIZE, UVM_KMF_WIRED | UVM_KMF_NOWAIT);
5396004aef4Sbouyer         if (l3_p2m_page == NULL)
5406004aef4Sbouyer                 panic("could not allocate memory for l3_p2m_page");
5416004aef4Sbouyer 
5426004aef4Sbouyer         /*
5436004aef4Sbouyer          * Determine how many L2 pages we need for the mapping
5446004aef4Sbouyer          * Each L2 can map a total of @fpp L1 pages
5456004aef4Sbouyer          */
5466004aef4Sbouyer         l2_p2m_page_size = howmany(max_pfn, fpp);
5476004aef4Sbouyer 
548fc848a10Sjym         l2_p2m_page = (vaddr_t *)uvm_km_alloc(kernel_map,
549fc848a10Sjym 	    l2_p2m_page_size * PAGE_SIZE,
550fc848a10Sjym 	    PAGE_SIZE, UVM_KMF_WIRED | UVM_KMF_NOWAIT);
5516004aef4Sbouyer         if (l2_p2m_page == NULL)
5526004aef4Sbouyer                 panic("could not allocate memory for l2_p2m_page");
5536004aef4Sbouyer 
5546004aef4Sbouyer         /* We now have L3 and L2 pages ready, update L1 mapping */
5556004aef4Sbouyer         update_p2m_frame_list_list();
5566004aef4Sbouyer 
5576004aef4Sbouyer }
5586004aef4Sbouyer 
5596004aef4Sbouyer /*
5606004aef4Sbouyer  * Update the L1 p2m_frame_list_list mapping (during guest boot or resume)
5616004aef4Sbouyer  */
5626004aef4Sbouyer static void
update_p2m_frame_list_list(void)5633518084fScegger update_p2m_frame_list_list(void)
5643518084fScegger {
5656004aef4Sbouyer         int i;
5666004aef4Sbouyer         int fpp; /* number of page (frame) pointer per page */
5676004aef4Sbouyer         unsigned long max_pfn;
5686004aef4Sbouyer 
5696004aef4Sbouyer         max_pfn = xen_start_info.nr_pages;
570fc848a10Sjym         fpp = PAGE_SIZE / sizeof(xen_pfn_t);
5716004aef4Sbouyer 
5726004aef4Sbouyer         for (i = 0; i < l2_p2m_page_size; i++) {
5736004aef4Sbouyer                 /*
5746004aef4Sbouyer                  * Each time we start a new L2 page,
5756004aef4Sbouyer                  * store its MFN in the L3 page
5766004aef4Sbouyer                  */
5776004aef4Sbouyer                 if ((i % fpp) == 0) {
5786004aef4Sbouyer                         l3_p2m_page[i/fpp] = vtomfn(
5796004aef4Sbouyer                                 (vaddr_t)&l2_p2m_page[i]);
5806004aef4Sbouyer                 }
5816004aef4Sbouyer                 /*
5826004aef4Sbouyer                  * we use a shortcut
5836004aef4Sbouyer                  * since @xpmap_phys_to_machine_mapping array
5846004aef4Sbouyer                  * already contains PFN to MFN mapping, we just
5856004aef4Sbouyer                  * set the l2_p2m_page MFN pointer to the MFN of the
5866004aef4Sbouyer                  * according frame of @xpmap_phys_to_machine_mapping
5876004aef4Sbouyer                  */
5886004aef4Sbouyer                 l2_p2m_page[i] = vtomfn((vaddr_t)
5896004aef4Sbouyer                         &xpmap_phys_to_machine_mapping[i*fpp]);
5906004aef4Sbouyer         }
5916004aef4Sbouyer 
5926004aef4Sbouyer         HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
5936004aef4Sbouyer                                         vtomfn((vaddr_t)l3_p2m_page);
5946004aef4Sbouyer         HYPERVISOR_shared_info->arch.max_pfn = max_pfn;
5956004aef4Sbouyer 
5966004aef4Sbouyer }
597b4bf0ca2Scherry #endif /* XENPV */
598d5c9d50fSbouyer 
599d5c9d50fSbouyer void
xen_init_ksyms(void)600d5c9d50fSbouyer xen_init_ksyms(void)
601d5c9d50fSbouyer {
602d5c9d50fSbouyer #if NKSYMS || defined(DDB) || defined(MODULAR)
603d5c9d50fSbouyer 	extern int end;
604d5c9d50fSbouyer 	extern int *esym;
605d5c9d50fSbouyer #ifdef DDB
606d5c9d50fSbouyer 	db_machine_init();
607d5c9d50fSbouyer #endif
608d5c9d50fSbouyer 
609d5c9d50fSbouyer #ifdef XENPV
610d5c9d50fSbouyer 	esym = xen_start_info.mod_start ?
611d5c9d50fSbouyer 	    (void *)xen_start_info.mod_start :
612d5c9d50fSbouyer 	    (void *)xen_start_info.mfn_list;
613d5c9d50fSbouyer #endif /* XENPV */
614d5c9d50fSbouyer 	/* for PVH, esym is set in locore.S */
615d5c9d50fSbouyer 	ksyms_addsyms_elf(*(int *)(void *)&end,
616d5c9d50fSbouyer 	    ((int *)(void *)&end) + 1, esym);
617d5c9d50fSbouyer #endif
618d5c9d50fSbouyer }
619