1*49e74707Sriastradh /* $NetBSD: hypervisor_machdep.c,v 1.46 2023/03/01 08:13:44 riastradh Exp $ */
24e541343Sbouyer
34e541343Sbouyer /*
44e541343Sbouyer *
54e541343Sbouyer * Copyright (c) 2004 Christian Limpach.
64e541343Sbouyer * All rights reserved.
74e541343Sbouyer *
84e541343Sbouyer * Redistribution and use in source and binary forms, with or without
94e541343Sbouyer * modification, are permitted provided that the following conditions
104e541343Sbouyer * are met:
114e541343Sbouyer * 1. Redistributions of source code must retain the above copyright
124e541343Sbouyer * notice, this list of conditions and the following disclaimer.
134e541343Sbouyer * 2. Redistributions in binary form must reproduce the above copyright
144e541343Sbouyer * notice, this list of conditions and the following disclaimer in the
154e541343Sbouyer * documentation and/or other materials provided with the distribution.
164e541343Sbouyer *
174e541343Sbouyer * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
184e541343Sbouyer * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
194e541343Sbouyer * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
204e541343Sbouyer * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
214e541343Sbouyer * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
224e541343Sbouyer * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
234e541343Sbouyer * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
244e541343Sbouyer * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
254e541343Sbouyer * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
264e541343Sbouyer * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
274e541343Sbouyer */
284e541343Sbouyer
294e541343Sbouyer /******************************************************************************
304e541343Sbouyer * hypervisor.c
314e541343Sbouyer *
324e541343Sbouyer * Communication to/from hypervisor.
334e541343Sbouyer *
344e541343Sbouyer * Copyright (c) 2002-2004, K A Fraser
354e541343Sbouyer *
364e541343Sbouyer * Permission is hereby granted, free of charge, to any person obtaining a copy
374e541343Sbouyer * of this software and associated documentation files (the "Software"), to
384e541343Sbouyer * deal in the Software without restriction, including without limitation the
394e541343Sbouyer * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
404e541343Sbouyer * sell copies of the Software, and to permit persons to whom the Software is
414e541343Sbouyer * furnished to do so, subject to the following conditions:
424e541343Sbouyer *
434e541343Sbouyer * The above copyright notice and this permission notice shall be included in
444e541343Sbouyer * all copies or substantial portions of the Software.
454e541343Sbouyer *
464e541343Sbouyer * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
474e541343Sbouyer * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
484e541343Sbouyer * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
494e541343Sbouyer * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
504e541343Sbouyer * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
514e541343Sbouyer * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
524e541343Sbouyer * DEALINGS IN THE SOFTWARE.
534e541343Sbouyer */
544e541343Sbouyer
554e541343Sbouyer
564e541343Sbouyer #include <sys/cdefs.h>
57*49e74707Sriastradh __KERNEL_RCSID(0, "$NetBSD: hypervisor_machdep.c,v 1.46 2023/03/01 08:13:44 riastradh Exp $");
584e541343Sbouyer
594e541343Sbouyer #include <sys/param.h>
604e541343Sbouyer #include <sys/systm.h>
616004aef4Sbouyer #include <sys/kmem.h>
62c24c993fSbouyer #include <sys/cpu.h>
63d5c9d50fSbouyer #include <sys/ksyms.h>
646004aef4Sbouyer
656004aef4Sbouyer #include <uvm/uvm_extern.h>
666004aef4Sbouyer
676004aef4Sbouyer #include <machine/vmparam.h>
686004aef4Sbouyer #include <machine/pmap.h>
698f18579dSriastradh #include <machine/pmap_private.h>
704e541343Sbouyer
71c24c993fSbouyer #include <x86/machdep.h>
72c24c993fSbouyer #include <x86/cpuvar.h>
73c24c993fSbouyer
744e541343Sbouyer #include <xen/xen.h>
75c24c993fSbouyer #include <xen/intr.h>
764e541343Sbouyer #include <xen/hypervisor.h>
774e541343Sbouyer #include <xen/evtchn.h>
786004aef4Sbouyer #include <xen/xenpmap.h>
794e541343Sbouyer
804e541343Sbouyer #include "opt_xen.h"
81d5c9d50fSbouyer #include "opt_modular.h"
82d5c9d50fSbouyer #include "opt_ddb.h"
831fe45bddScherry #include "isa.h"
841fe45bddScherry #include "pci.h"
85d5c9d50fSbouyer #include "ksyms.h"
86d5c9d50fSbouyer
87d5c9d50fSbouyer #ifdef DDB
88d5c9d50fSbouyer #include <machine/db_machdep.h>
89d5c9d50fSbouyer #include <ddb/db_extern.h>
90d5c9d50fSbouyer #include <ddb/db_output.h>
91d5c9d50fSbouyer #include <ddb/db_interface.h>
92d5c9d50fSbouyer #endif
934e541343Sbouyer
94b4bf0ca2Scherry #ifdef XENPV
956004aef4Sbouyer /*
966004aef4Sbouyer * arch-dependent p2m frame lists list (L3 and L2)
976004aef4Sbouyer * used by Xen for save/restore mappings
986004aef4Sbouyer */
996004aef4Sbouyer static unsigned long * l3_p2m_page;
1006004aef4Sbouyer static unsigned long * l2_p2m_page;
1016004aef4Sbouyer static int l2_p2m_page_size; /* size of L2 page, in pages */
1026004aef4Sbouyer
1036004aef4Sbouyer static void build_p2m_frame_list_list(void);
1046004aef4Sbouyer static void update_p2m_frame_list_list(void);
1056004aef4Sbouyer
106b4bf0ca2Scherry #endif
107b4bf0ca2Scherry
1084e541343Sbouyer // #define PORT_DEBUG 4
1094e541343Sbouyer // #define EARLY_DEBUG_EVENT
1104e541343Sbouyer
11115504847Scherry /* callback function type */
11292f2ec54Sbouyer typedef void (*iterate_func_t)(unsigned int, unsigned int,
11392f2ec54Sbouyer unsigned int, void *);
11415504847Scherry
11515504847Scherry static inline void
evt_iterate_bits(volatile unsigned long * pendingl1,volatile unsigned long * pendingl2,volatile unsigned long * mask,iterate_func_t iterate_pending,void * iterate_args)11692f2ec54Sbouyer evt_iterate_bits(volatile unsigned long *pendingl1,
11715504847Scherry volatile unsigned long *pendingl2,
11815504847Scherry volatile unsigned long *mask,
11915504847Scherry iterate_func_t iterate_pending, void *iterate_args)
12015504847Scherry {
12115504847Scherry
12215504847Scherry KASSERT(pendingl1 != NULL);
12315504847Scherry KASSERT(pendingl2 != NULL);
12415504847Scherry
12515504847Scherry unsigned long l1, l2;
12615504847Scherry unsigned int l1i, l2i, port;
12715504847Scherry
12815504847Scherry l1 = xen_atomic_xchg(pendingl1, 0);
12915504847Scherry while ((l1i = xen_ffs(l1)) != 0) {
13015504847Scherry l1i--;
13115504847Scherry l1 &= ~(1UL << l1i);
13215504847Scherry
13315504847Scherry l2 = pendingl2[l1i] & (mask != NULL ? ~mask[l1i] : -1UL);
13492f2ec54Sbouyer l2 &= curcpu()->ci_evtmask[l1i];
13515504847Scherry
13615504847Scherry if (mask != NULL) xen_atomic_setbits_l(&mask[l1i], l2);
13715504847Scherry xen_atomic_clearbits_l(&pendingl2[l1i], l2);
13815504847Scherry
13915504847Scherry while ((l2i = xen_ffs(l2)) != 0) {
14015504847Scherry l2i--;
14115504847Scherry l2 &= ~(1UL << l2i);
14215504847Scherry
14315504847Scherry port = (l1i << LONG_SHIFT) + l2i;
14415504847Scherry
14592f2ec54Sbouyer iterate_pending(port, l1i, l2i, iterate_args);
14615504847Scherry }
14715504847Scherry }
14815504847Scherry }
14915504847Scherry
15015504847Scherry /*
15115504847Scherry * Set per-cpu "pending" information for outstanding events that
15215504847Scherry * cannot be processed now.
15315504847Scherry */
15415504847Scherry
15515504847Scherry static inline void
evt_set_pending(unsigned int port,unsigned int l1i,unsigned int l2i,void * args)15692f2ec54Sbouyer evt_set_pending(unsigned int port, unsigned int l1i,
15715504847Scherry unsigned int l2i, void *args)
15815504847Scherry {
15915504847Scherry
16015504847Scherry KASSERT(args != NULL);
16115504847Scherry
16215504847Scherry int *ret = args;
16308505f36Sbouyer struct intrhand *ih;
16415504847Scherry
16515504847Scherry if (evtsource[port]) {
16692f2ec54Sbouyer hypervisor_set_ipending(evtsource[port]->ev_imask, l1i, l2i);
16715504847Scherry evtsource[port]->ev_evcnt.ev_count++;
16808505f36Sbouyer ih = evtsource[port]->ev_handlers;
16908505f36Sbouyer while (ih != NULL) {
17008505f36Sbouyer ih->ih_pending++;
17108505f36Sbouyer ih = ih->ih_evt_next;
17208505f36Sbouyer }
17308505f36Sbouyer
17492f2ec54Sbouyer if (*ret == 0 && curcpu()->ci_ilevel <
17515504847Scherry evtsource[port]->ev_maxlevel)
17615504847Scherry *ret = 1;
17715504847Scherry }
17815504847Scherry #ifdef DOM0OPS
17915504847Scherry else {
18015504847Scherry /* set pending event */
18115504847Scherry xenevt_setipending(l1i, l2i);
18215504847Scherry }
18315504847Scherry #endif
18415504847Scherry }
18515504847Scherry
1864e541343Sbouyer int stipending(void);
1874e541343Sbouyer int
stipending(void)1887f5c40b7Scegger stipending(void)
1894e541343Sbouyer {
1904e541343Sbouyer volatile shared_info_t *s = HYPERVISOR_shared_info;
1914e541343Sbouyer struct cpu_info *ci;
19220161b72Scegger volatile struct vcpu_info *vci;
1934e541343Sbouyer int ret;
1944e541343Sbouyer
195*49e74707Sriastradh kpreempt_disable();
196*49e74707Sriastradh
1974e541343Sbouyer ret = 0;
1984e541343Sbouyer ci = curcpu();
19920161b72Scegger vci = ci->ci_vcpu;
2004e541343Sbouyer
2014e541343Sbouyer #if 0
2024e541343Sbouyer if (HYPERVISOR_shared_info->events)
2034e541343Sbouyer printf("stipending events %08lx mask %08lx ilevel %d\n",
2044e541343Sbouyer HYPERVISOR_shared_info->events,
2054e541343Sbouyer HYPERVISOR_shared_info->events_mask, ci->ci_ilevel);
2064e541343Sbouyer #endif
2074e541343Sbouyer
2084e541343Sbouyer #ifdef EARLY_DEBUG_EVENT
2094e541343Sbouyer if (xen_atomic_test_bit(&s->evtchn_pending[0], debug_port)) {
2104e541343Sbouyer xen_debug_handler(NULL);
2114e541343Sbouyer xen_atomic_clear_bit(&s->evtchn_pending[0], debug_port);
2124e541343Sbouyer }
2134e541343Sbouyer #endif
2144e541343Sbouyer
2154e541343Sbouyer /*
2164e541343Sbouyer * we're only called after STIC, so we know that we'll have to
2174e541343Sbouyer * STI at the end
2184e541343Sbouyer */
21915504847Scherry
22020161b72Scegger while (vci->evtchn_upcall_pending) {
2213eb5fd5eSbouyer x86_disable_intr();
22215504847Scherry
22320161b72Scegger vci->evtchn_upcall_pending = 0;
2244e541343Sbouyer
22592f2ec54Sbouyer evt_iterate_bits(&vci->evtchn_pending_sel,
22615504847Scherry s->evtchn_pending, s->evtchn_mask,
22715504847Scherry evt_set_pending, &ret);
2284e541343Sbouyer
2293eb5fd5eSbouyer x86_enable_intr();
2304e541343Sbouyer }
2314e541343Sbouyer
232*49e74707Sriastradh kpreempt_enable();
233*49e74707Sriastradh
2344e541343Sbouyer return (ret);
2354e541343Sbouyer }
2364e541343Sbouyer
23715504847Scherry /* Iterate through pending events and call the event handler */
23815504847Scherry
23915504847Scherry static inline void
evt_do_hypervisor_callback(unsigned int port,unsigned int l1i,unsigned int l2i,void * args)24092f2ec54Sbouyer evt_do_hypervisor_callback(unsigned int port, unsigned int l1i,
24192f2ec54Sbouyer unsigned int l2i, void *args)
2424e541343Sbouyer {
24315504847Scherry KASSERT(args != NULL);
2444e541343Sbouyer
245c24c993fSbouyer #ifdef DOM0OPS
24692f2ec54Sbouyer struct cpu_info *ci = curcpu();
247c24c993fSbouyer #endif
24815504847Scherry struct intrframe *regs = args;
2494e541343Sbouyer
2504e541343Sbouyer #ifdef PORT_DEBUG
2514e541343Sbouyer if (port == PORT_DEBUG)
2524e541343Sbouyer printf("do_hypervisor_callback event %d\n", port);
2534e541343Sbouyer #endif
254d204c4c4Scherry if (evtsource[port]) {
255c24c993fSbouyer KASSERT(cpu_intr_p());
256d204c4c4Scherry evtchn_do_event(port, regs);
257d204c4c4Scherry }
2584e541343Sbouyer #ifdef DOM0OPS
259e3e720d8Sbouyer else {
260e3e720d8Sbouyer if (ci->ci_ilevel < IPL_HIGH) {
261e3e720d8Sbouyer /* fast path */
262e3e720d8Sbouyer int oipl = ci->ci_ilevel;
263e3e720d8Sbouyer ci->ci_ilevel = IPL_HIGH;
264c24c993fSbouyer KASSERT(cpu_intr_p());
265d204c4c4Scherry xenevt_event(port);
266e3e720d8Sbouyer ci->ci_ilevel = oipl;
267e3e720d8Sbouyer } else {
268e3e720d8Sbouyer /* set pending event */
269e3e720d8Sbouyer xenevt_setipending(l1i, l2i);
270e3e720d8Sbouyer }
271e3e720d8Sbouyer }
2724e541343Sbouyer #endif
2734e541343Sbouyer }
27415504847Scherry
27515504847Scherry void
do_hypervisor_callback(struct intrframe * regs)27615504847Scherry do_hypervisor_callback(struct intrframe *regs)
27715504847Scherry {
27815504847Scherry volatile shared_info_t *s = HYPERVISOR_shared_info;
27915504847Scherry struct cpu_info *ci;
28015504847Scherry volatile struct vcpu_info *vci;
281ff4bde10Sknakahara uint64_t level __diagused;
28215504847Scherry
28315504847Scherry ci = curcpu();
28415504847Scherry vci = ci->ci_vcpu;
28515504847Scherry level = ci->ci_ilevel;
28615504847Scherry
287524a83dcScherry /* Save trapframe for clock handler */
288524a83dcScherry KASSERT(regs != NULL);
289cce7c65fSkre ci->ci_xen_clockf_usermode = USERMODE(regs->_INTRFRAME_CS);
290cce7c65fSkre ci->ci_xen_clockf_pc = regs->_INTRFRAME_IP;
291524a83dcScherry
29215504847Scherry // DDD printf("do_hypervisor_callback\n");
29315504847Scherry
29415504847Scherry #ifdef EARLY_DEBUG_EVENT
29515504847Scherry if (xen_atomic_test_bit(&s->evtchn_pending[0], debug_port)) {
29615504847Scherry xen_debug_handler(NULL);
29715504847Scherry xen_atomic_clear_bit(&s->evtchn_pending[0], debug_port);
2984e541343Sbouyer }
29915504847Scherry #endif
30015504847Scherry
30115504847Scherry while (vci->evtchn_upcall_pending) {
30215504847Scherry vci->evtchn_upcall_pending = 0;
30315504847Scherry
30492f2ec54Sbouyer evt_iterate_bits(&vci->evtchn_pending_sel,
30515504847Scherry s->evtchn_pending, s->evtchn_mask,
30615504847Scherry evt_do_hypervisor_callback, regs);
3074e541343Sbouyer }
3084e541343Sbouyer
3094e541343Sbouyer #ifdef DIAGNOSTIC
3104e541343Sbouyer if (level != ci->ci_ilevel)
311ff4bde10Sknakahara printf("hypervisor done %08x level %" PRIu64 "/%" PRIu64 " ipending %0" PRIx64 "\n",
31220161b72Scegger (uint)vci->evtchn_pending_sel,
313ff4bde10Sknakahara level, (uint64_t)ci->ci_ilevel, (uint64_t)ci->ci_ipending);
3144e541343Sbouyer #endif
3154e541343Sbouyer }
3164e541343Sbouyer
317c24c993fSbouyer #if 0
3184e541343Sbouyer void
319de4e5faeScherry hypervisor_send_event(struct cpu_info *ci, unsigned int ev)
320de4e5faeScherry {
321de4e5faeScherry KASSERT(ci != NULL);
322de4e5faeScherry
323de4e5faeScherry volatile shared_info_t *s = HYPERVISOR_shared_info;
324de4e5faeScherry volatile struct vcpu_info *vci = ci->ci_vcpu;
325de4e5faeScherry
326de4e5faeScherry #ifdef PORT_DEBUG
327de4e5faeScherry if (ev == PORT_DEBUG)
328de4e5faeScherry printf("hypervisor_send_event %d\n", ev);
329de4e5faeScherry #endif
330de4e5faeScherry
331de4e5faeScherry xen_atomic_set_bit(&s->evtchn_pending[0], ev);
332533ee572Scherry
333c4baef96Scherry if (__predict_false(ci == curcpu())) {
334de4e5faeScherry xen_atomic_set_bit(&vci->evtchn_pending_sel,
335de4e5faeScherry ev >> LONG_SHIFT);
336de4e5faeScherry xen_atomic_set_bit(&vci->evtchn_upcall_pending, 0);
337533ee572Scherry }
338de4e5faeScherry
339de4e5faeScherry xen_atomic_clear_bit(&s->evtchn_mask[0], ev);
340de4e5faeScherry
341de4e5faeScherry if (__predict_true(ci == curcpu())) {
342de4e5faeScherry hypervisor_force_callback();
343de4e5faeScherry } else {
344ad7affb1Sbouyer if (__predict_false(xen_send_ipi(ci, XEN_IPI_HVCB))) {
345c24c993fSbouyer panic("xen_send_ipi(cpu%d id %d, XEN_IPI_HVCB) failed\n",
346c24c993fSbouyer (int) ci->ci_cpuid, ci->ci_vcpuid);
347de4e5faeScherry }
348de4e5faeScherry }
349de4e5faeScherry }
350c24c993fSbouyer #endif
351de4e5faeScherry
352de4e5faeScherry void
hypervisor_unmask_event(unsigned int ev)3534e541343Sbouyer hypervisor_unmask_event(unsigned int ev)
3544e541343Sbouyer {
35526c2e0b1Scherry
35626c2e0b1Scherry KASSERT(ev > 0 && ev < NR_EVENT_CHANNELS);
35720161b72Scegger
3584e541343Sbouyer #ifdef PORT_DEBUG
3594e541343Sbouyer if (ev == PORT_DEBUG)
3604e541343Sbouyer printf("hypervisor_unmask_event %d\n", ev);
3614e541343Sbouyer #endif
3624e541343Sbouyer
36326c2e0b1Scherry /* Xen unmasks the evtchn_mask[0]:ev bit for us. */
36426c2e0b1Scherry evtchn_op_t op;
36526c2e0b1Scherry op.cmd = EVTCHNOP_unmask;
36626c2e0b1Scherry op.u.unmask.port = ev;
36726c2e0b1Scherry if (HYPERVISOR_event_channel_op(&op) != 0)
36826c2e0b1Scherry panic("Failed to unmask event %d\n", ev);
369ad7affb1Sbouyer
37026c2e0b1Scherry return;
3714e541343Sbouyer }
3724e541343Sbouyer
3734e541343Sbouyer void
hypervisor_mask_event(unsigned int ev)3744e541343Sbouyer hypervisor_mask_event(unsigned int ev)
3754e541343Sbouyer {
3764e541343Sbouyer volatile shared_info_t *s = HYPERVISOR_shared_info;
3774e541343Sbouyer #ifdef PORT_DEBUG
3784e541343Sbouyer if (ev == PORT_DEBUG)
3794e541343Sbouyer printf("hypervisor_mask_event %d\n", ev);
3804e541343Sbouyer #endif
3814e541343Sbouyer
3824e541343Sbouyer xen_atomic_set_bit(&s->evtchn_mask[0], ev);
3834e541343Sbouyer }
3844e541343Sbouyer
3854e541343Sbouyer void
hypervisor_clear_event(unsigned int ev)3864e541343Sbouyer hypervisor_clear_event(unsigned int ev)
3874e541343Sbouyer {
3884e541343Sbouyer volatile shared_info_t *s = HYPERVISOR_shared_info;
3894e541343Sbouyer #ifdef PORT_DEBUG
3904e541343Sbouyer if (ev == PORT_DEBUG)
3914e541343Sbouyer printf("hypervisor_clear_event %d\n", ev);
3924e541343Sbouyer #endif
3934e541343Sbouyer
3944e541343Sbouyer xen_atomic_clear_bit(&s->evtchn_pending[0], ev);
3954e541343Sbouyer }
3964e541343Sbouyer
39715504847Scherry static inline void
evt_enable_event(unsigned int port,unsigned int l1i,unsigned int l2i,void * args)39892f2ec54Sbouyer evt_enable_event(unsigned int port, unsigned int l1i,
39992f2ec54Sbouyer unsigned int l2i, void *args)
40015504847Scherry {
40115504847Scherry KASSERT(args == NULL);
4021fe45bddScherry hypervisor_unmask_event(port);
40363697482Sbouyer #if defined(XENPV) && (NPCI > 0 || NISA > 0)
40463697482Sbouyer hypervisor_ack_pirq_event(port);
40563697482Sbouyer #endif /* NPCI > 0 || NISA > 0 */
40615504847Scherry }
40715504847Scherry
4084e541343Sbouyer void
hypervisor_enable_sir(unsigned int sir)409c24c993fSbouyer hypervisor_enable_sir(unsigned int sir)
4104e541343Sbouyer {
4114e541343Sbouyer struct cpu_info *ci = curcpu();
4124e541343Sbouyer
4134e541343Sbouyer /*
4144e541343Sbouyer * enable all events for ipl. As we only set an event in ipl_evt_mask
4154e541343Sbouyer * for its lowest IPL, and pending IPLs are processed high to low,
4164e541343Sbouyer * we know that all callback for this event have been processed.
4174e541343Sbouyer */
4184e541343Sbouyer
419c24c993fSbouyer evt_iterate_bits(&ci->ci_isources[sir]->ipl_evt_mask1,
420c24c993fSbouyer ci->ci_isources[sir]->ipl_evt_mask2, NULL,
42115504847Scherry evt_enable_event, NULL);
4224e541343Sbouyer
4234e541343Sbouyer }
4244e541343Sbouyer
4254e541343Sbouyer void
hypervisor_set_ipending(uint64_t imask,int l1,int l2)426ff4bde10Sknakahara hypervisor_set_ipending(uint64_t imask, int l1, int l2)
4274e541343Sbouyer {
42892f2ec54Sbouyer
42992f2ec54Sbouyer /* This function is not re-entrant */
43092f2ec54Sbouyer KASSERT(x86_read_psl() != 0);
43192f2ec54Sbouyer
432c24c993fSbouyer int sir;
43392f2ec54Sbouyer struct cpu_info *ci = curcpu();
4344e541343Sbouyer
4354e541343Sbouyer /* set pending bit for the appropriate IPLs */
436c24c993fSbouyer ci->ci_ipending |= imask;
4374e541343Sbouyer
4384e541343Sbouyer /*
4394e541343Sbouyer * And set event pending bit for the lowest IPL. As IPL are handled
4404e541343Sbouyer * from high to low, this ensure that all callbacks will have been
4414e541343Sbouyer * called when we ack the event
4424e541343Sbouyer */
443c24c993fSbouyer sir = ffs(imask);
444c24c993fSbouyer KASSERT(sir > SIR_XENIPL_VM);
445c24c993fSbouyer sir--;
446c24c993fSbouyer KASSERT(sir <= SIR_XENIPL_HIGH);
447c24c993fSbouyer KASSERT(ci->ci_isources[sir] != NULL);
448c24c993fSbouyer ci->ci_isources[sir]->ipl_evt_mask1 |= 1UL << l1;
449c24c993fSbouyer ci->ci_isources[sir]->ipl_evt_mask2[l1] |= 1UL << l2;
450c24c993fSbouyer KASSERT(ci == curcpu());
451c24c993fSbouyer #if 0
452763123baSbouyer if (__predict_false(ci != curcpu())) {
453763123baSbouyer if (xen_send_ipi(ci, XEN_IPI_HVCB)) {
454763123baSbouyer panic("hypervisor_set_ipending: "
455c24c993fSbouyer "xen_send_ipi(cpu%d id %d, XEN_IPI_HVCB) failed\n",
456c24c993fSbouyer (int) ci->ci_cpuid, ci->ci_vcpuid);
457763123baSbouyer }
458763123baSbouyer }
459c24c993fSbouyer #endif
4604e541343Sbouyer }
4616004aef4Sbouyer
4626004aef4Sbouyer void
hypervisor_machdep_attach(void)4633518084fScegger hypervisor_machdep_attach(void)
4643518084fScegger {
465b4bf0ca2Scherry #ifdef XENPV
4666004aef4Sbouyer /* dom0 does not require the arch-dependent P2M translation table */
4672de31871Scegger if (!xendomain_is_dom0()) {
4686004aef4Sbouyer build_p2m_frame_list_list();
469eba16022Sjym sysctl_xen_suspend_setup();
4706004aef4Sbouyer }
471b4bf0ca2Scherry #endif
4726004aef4Sbouyer }
4736004aef4Sbouyer
474eba16022Sjym void
hypervisor_machdep_resume(void)475eba16022Sjym hypervisor_machdep_resume(void)
476eba16022Sjym {
477b4bf0ca2Scherry #ifdef XENPV
478eba16022Sjym /* dom0 does not require the arch-dependent P2M translation table */
479eba16022Sjym if (!xendomain_is_dom0())
480eba16022Sjym update_p2m_frame_list_list();
481b4bf0ca2Scherry #endif
482eba16022Sjym }
483eba16022Sjym
484c24c993fSbouyer /*
485c24c993fSbouyer * idle_block()
486c24c993fSbouyer *
487c24c993fSbouyer * Called from the idle loop when we have nothing to do but wait
488c24c993fSbouyer * for an interrupt.
489c24c993fSbouyer */
490c24c993fSbouyer static void
idle_block(void)491c24c993fSbouyer idle_block(void)
492c24c993fSbouyer {
493c24c993fSbouyer KASSERT(curcpu()->ci_ipending == 0);
494c24c993fSbouyer HYPERVISOR_block();
495c24c993fSbouyer KASSERT(curcpu()->ci_ipending == 0);
496c24c993fSbouyer }
497c24c993fSbouyer
498c24c993fSbouyer void
x86_cpu_idle_xen(void)499c24c993fSbouyer x86_cpu_idle_xen(void)
500c24c993fSbouyer {
501c24c993fSbouyer struct cpu_info *ci = curcpu();
502c24c993fSbouyer
503c24c993fSbouyer KASSERT(ci->ci_ilevel == IPL_NONE);
504c24c993fSbouyer
505c24c993fSbouyer x86_disable_intr();
506c24c993fSbouyer if (__predict_false(!ci->ci_want_resched)) {
507c24c993fSbouyer idle_block();
508c24c993fSbouyer } else {
509c24c993fSbouyer x86_enable_intr();
510c24c993fSbouyer }
511c24c993fSbouyer }
512c24c993fSbouyer
513b4bf0ca2Scherry #ifdef XENPV
5146004aef4Sbouyer /*
5156004aef4Sbouyer * Generate the p2m_frame_list_list table,
5166004aef4Sbouyer * needed for guest save/restore
5176004aef4Sbouyer */
5186004aef4Sbouyer static void
build_p2m_frame_list_list(void)5193518084fScegger build_p2m_frame_list_list(void)
5203518084fScegger {
5216004aef4Sbouyer int fpp; /* number of page (frame) pointer per page */
5226004aef4Sbouyer unsigned long max_pfn;
5236004aef4Sbouyer /*
5246004aef4Sbouyer * The p2m list is composed of three levels of indirection,
5256004aef4Sbouyer * each layer containing MFNs pointing to lower level pages
5266004aef4Sbouyer * The indirection is used to convert a given PFN to its MFN
5276004aef4Sbouyer * Each N level page can point to @fpp (N-1) level pages
5286004aef4Sbouyer * For example, for x86 32bit, we have:
5296004aef4Sbouyer * - PAGE_SIZE: 4096 bytes
5306004aef4Sbouyer * - fpp: 1024 (one L3 page can address 1024 L2 pages)
5316004aef4Sbouyer * A L1 page contains the list of MFN we are looking for
5326004aef4Sbouyer */
5336004aef4Sbouyer max_pfn = xen_start_info.nr_pages;
534fc848a10Sjym fpp = PAGE_SIZE / sizeof(xen_pfn_t);
5356004aef4Sbouyer
5366004aef4Sbouyer /* we only need one L3 page */
537fc848a10Sjym l3_p2m_page = (vaddr_t *)uvm_km_alloc(kernel_map, PAGE_SIZE,
538fc848a10Sjym PAGE_SIZE, UVM_KMF_WIRED | UVM_KMF_NOWAIT);
5396004aef4Sbouyer if (l3_p2m_page == NULL)
5406004aef4Sbouyer panic("could not allocate memory for l3_p2m_page");
5416004aef4Sbouyer
5426004aef4Sbouyer /*
5436004aef4Sbouyer * Determine how many L2 pages we need for the mapping
5446004aef4Sbouyer * Each L2 can map a total of @fpp L1 pages
5456004aef4Sbouyer */
5466004aef4Sbouyer l2_p2m_page_size = howmany(max_pfn, fpp);
5476004aef4Sbouyer
548fc848a10Sjym l2_p2m_page = (vaddr_t *)uvm_km_alloc(kernel_map,
549fc848a10Sjym l2_p2m_page_size * PAGE_SIZE,
550fc848a10Sjym PAGE_SIZE, UVM_KMF_WIRED | UVM_KMF_NOWAIT);
5516004aef4Sbouyer if (l2_p2m_page == NULL)
5526004aef4Sbouyer panic("could not allocate memory for l2_p2m_page");
5536004aef4Sbouyer
5546004aef4Sbouyer /* We now have L3 and L2 pages ready, update L1 mapping */
5556004aef4Sbouyer update_p2m_frame_list_list();
5566004aef4Sbouyer
5576004aef4Sbouyer }
5586004aef4Sbouyer
5596004aef4Sbouyer /*
5606004aef4Sbouyer * Update the L1 p2m_frame_list_list mapping (during guest boot or resume)
5616004aef4Sbouyer */
5626004aef4Sbouyer static void
update_p2m_frame_list_list(void)5633518084fScegger update_p2m_frame_list_list(void)
5643518084fScegger {
5656004aef4Sbouyer int i;
5666004aef4Sbouyer int fpp; /* number of page (frame) pointer per page */
5676004aef4Sbouyer unsigned long max_pfn;
5686004aef4Sbouyer
5696004aef4Sbouyer max_pfn = xen_start_info.nr_pages;
570fc848a10Sjym fpp = PAGE_SIZE / sizeof(xen_pfn_t);
5716004aef4Sbouyer
5726004aef4Sbouyer for (i = 0; i < l2_p2m_page_size; i++) {
5736004aef4Sbouyer /*
5746004aef4Sbouyer * Each time we start a new L2 page,
5756004aef4Sbouyer * store its MFN in the L3 page
5766004aef4Sbouyer */
5776004aef4Sbouyer if ((i % fpp) == 0) {
5786004aef4Sbouyer l3_p2m_page[i/fpp] = vtomfn(
5796004aef4Sbouyer (vaddr_t)&l2_p2m_page[i]);
5806004aef4Sbouyer }
5816004aef4Sbouyer /*
5826004aef4Sbouyer * we use a shortcut
5836004aef4Sbouyer * since @xpmap_phys_to_machine_mapping array
5846004aef4Sbouyer * already contains PFN to MFN mapping, we just
5856004aef4Sbouyer * set the l2_p2m_page MFN pointer to the MFN of the
5866004aef4Sbouyer * according frame of @xpmap_phys_to_machine_mapping
5876004aef4Sbouyer */
5886004aef4Sbouyer l2_p2m_page[i] = vtomfn((vaddr_t)
5896004aef4Sbouyer &xpmap_phys_to_machine_mapping[i*fpp]);
5906004aef4Sbouyer }
5916004aef4Sbouyer
5926004aef4Sbouyer HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
5936004aef4Sbouyer vtomfn((vaddr_t)l3_p2m_page);
5946004aef4Sbouyer HYPERVISOR_shared_info->arch.max_pfn = max_pfn;
5956004aef4Sbouyer
5966004aef4Sbouyer }
597b4bf0ca2Scherry #endif /* XENPV */
598d5c9d50fSbouyer
599d5c9d50fSbouyer void
xen_init_ksyms(void)600d5c9d50fSbouyer xen_init_ksyms(void)
601d5c9d50fSbouyer {
602d5c9d50fSbouyer #if NKSYMS || defined(DDB) || defined(MODULAR)
603d5c9d50fSbouyer extern int end;
604d5c9d50fSbouyer extern int *esym;
605d5c9d50fSbouyer #ifdef DDB
606d5c9d50fSbouyer db_machine_init();
607d5c9d50fSbouyer #endif
608d5c9d50fSbouyer
609d5c9d50fSbouyer #ifdef XENPV
610d5c9d50fSbouyer esym = xen_start_info.mod_start ?
611d5c9d50fSbouyer (void *)xen_start_info.mod_start :
612d5c9d50fSbouyer (void *)xen_start_info.mfn_list;
613d5c9d50fSbouyer #endif /* XENPV */
614d5c9d50fSbouyer /* for PVH, esym is set in locore.S */
615d5c9d50fSbouyer ksyms_addsyms_elf(*(int *)(void *)&end,
616d5c9d50fSbouyer ((int *)(void *)&end) + 1, esym);
617d5c9d50fSbouyer #endif
618d5c9d50fSbouyer }
619