xref: /onnv-gate/usr/src/uts/i86xpv/os/evtchn.c (revision 10453:87a0767e80c3)
15084Sjohnlev /*
25084Sjohnlev  * CDDL HEADER START
35084Sjohnlev  *
45084Sjohnlev  * The contents of this file are subject to the terms of the
55084Sjohnlev  * Common Development and Distribution License (the "License").
65084Sjohnlev  * You may not use this file except in compliance with the License.
75084Sjohnlev  *
85084Sjohnlev  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
95084Sjohnlev  * or http://www.opensolaris.org/os/licensing.
105084Sjohnlev  * See the License for the specific language governing permissions
115084Sjohnlev  * and limitations under the License.
125084Sjohnlev  *
135084Sjohnlev  * When distributing Covered Code, include this CDDL HEADER in each
145084Sjohnlev  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
155084Sjohnlev  * If applicable, add the following below this CDDL HEADER, with the
165084Sjohnlev  * fields enclosed by brackets "[]" replaced with your own identifying
175084Sjohnlev  * information: Portions Copyright [yyyy] [name of copyright owner]
185084Sjohnlev  *
195084Sjohnlev  * CDDL HEADER END
205084Sjohnlev  */
215084Sjohnlev 
225084Sjohnlev /*
2310175SStuart.Maybee@Sun.COM  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
245084Sjohnlev  * Use is subject to license terms.
255084Sjohnlev  */
265084Sjohnlev 
275084Sjohnlev /*
285084Sjohnlev  * evtchn.c
295084Sjohnlev  *
305084Sjohnlev  * Communication via hypervisor event channels.
315084Sjohnlev  *
325084Sjohnlev  * Copyright (c) 2002-2005, K A Fraser
335084Sjohnlev  *
345084Sjohnlev  * This file may be distributed separately from the Linux kernel, or
355084Sjohnlev  * incorporated into other software packages, subject to the following license:
365084Sjohnlev  *
375084Sjohnlev  * Permission is hereby granted, free of charge, to any person obtaining a copy
385084Sjohnlev  * of this source file (the "Software"), to deal in the Software without
395084Sjohnlev  * restriction, including without limitation the rights to use, copy, modify,
405084Sjohnlev  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
415084Sjohnlev  * and to permit persons to whom the Software is furnished to do so, subject to
425084Sjohnlev  * the following conditions:
435084Sjohnlev  *
445084Sjohnlev  * The above copyright notice and this permission notice shall be included in
455084Sjohnlev  * all copies or substantial portions of the Software.
465084Sjohnlev  *
475084Sjohnlev  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
485084Sjohnlev  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
495084Sjohnlev  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
505084Sjohnlev  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
515084Sjohnlev  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
525084Sjohnlev  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
535084Sjohnlev  * IN THE SOFTWARE.
545084Sjohnlev  */
555084Sjohnlev 
565084Sjohnlev /* some parts derived from netbsd's hypervisor_machdep.c 1.2.2.2 */
575084Sjohnlev 
585084Sjohnlev /*
595084Sjohnlev  *
605084Sjohnlev  * Copyright (c) 2004 Christian Limpach.
615084Sjohnlev  * All rights reserved.
625084Sjohnlev  *
635084Sjohnlev  * Redistribution and use in source and binary forms, with or without
645084Sjohnlev  * modification, are permitted provided that the following conditions
655084Sjohnlev  * are met:
665084Sjohnlev  * 1. Redistributions of source code must retain the above copyright
675084Sjohnlev  *    notice, this list of conditions and the following disclaimer.
685084Sjohnlev  * 2. Redistributions in binary form must reproduce the above copyright
695084Sjohnlev  *    notice, this list of conditions and the following disclaimer in the
705084Sjohnlev  *    documentation and/or other materials provided with the distribution.
715084Sjohnlev  * 3. This section intentionally left blank.
725084Sjohnlev  * 4. The name of the author may not be used to endorse or promote products
735084Sjohnlev  *    derived from this software without specific prior written permission.
745084Sjohnlev  *
755084Sjohnlev  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
765084Sjohnlev  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
775084Sjohnlev  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
785084Sjohnlev  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
795084Sjohnlev  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
805084Sjohnlev  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
815084Sjohnlev  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
825084Sjohnlev  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
835084Sjohnlev  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
845084Sjohnlev  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
855084Sjohnlev  */
865084Sjohnlev /*
875084Sjohnlev  * Section 3 of the above license was updated in response to bug 6379571.
885084Sjohnlev  */
895084Sjohnlev 
905084Sjohnlev #include <sys/types.h>
915084Sjohnlev #include <sys/hypervisor.h>
925084Sjohnlev #include <sys/machsystm.h>
935084Sjohnlev #include <sys/mutex.h>
945084Sjohnlev #include <sys/evtchn_impl.h>
955084Sjohnlev #include <sys/ddi_impldefs.h>
965084Sjohnlev #include <sys/avintr.h>
975084Sjohnlev #include <sys/cpuvar.h>
985084Sjohnlev #include <sys/smp_impldefs.h>
995084Sjohnlev #include <sys/archsystm.h>
1005084Sjohnlev #include <sys/sysmacros.h>
1015084Sjohnlev #include <sys/cmn_err.h>
1025084Sjohnlev #include <sys/promif.h>
1035084Sjohnlev #include <sys/debug.h>
1045084Sjohnlev #include <sys/psm.h>
1055084Sjohnlev #include <sys/privregs.h>
1065084Sjohnlev #include <sys/trap.h>
1075084Sjohnlev #include <sys/atomic.h>
1085084Sjohnlev #include <sys/cpu.h>
1095084Sjohnlev #include <sys/psw.h>
1105084Sjohnlev #include <sys/traptrace.h>
1115084Sjohnlev #include <sys/stack.h>
1125084Sjohnlev #include <sys/x_call.h>
1135084Sjohnlev #include <xen/public/physdev.h>
1145084Sjohnlev 
1155084Sjohnlev /*
1165084Sjohnlev  * This file manages our association between hypervisor event channels and
1175084Sjohnlev  * Solaris's IRQs.  This is a one-to-one mapping, with the exception of
1185084Sjohnlev  * IPI IRQs, for which there is one event channel per CPU participating
1195084Sjohnlev  * in the IPI, and the clock VIRQ which also has an event channel per cpu
1205084Sjohnlev  * and the IRQ for /dev/xen/evtchn. The IRQ types are:
1215084Sjohnlev  *
1225084Sjohnlev  * IRQT_VIRQ:
1235084Sjohnlev  *	The hypervisor's standard virtual IRQ, used for the clock timer, for
1245084Sjohnlev  *	example.  This code allows any cpu to bind to one of these, although
1255084Sjohnlev  *	some are treated specially (i.e. VIRQ_DEBUG).
1265084Sjohnlev  *	Event channel binding is done via EVTCHNOP_bind_virq.
1275084Sjohnlev  *
1285084Sjohnlev  * IRQT_PIRQ:
1295084Sjohnlev  *	These associate a physical IRQ with an event channel via
1305084Sjohnlev  *	EVTCHNOP_bind_pirq.
1315084Sjohnlev  *
1325084Sjohnlev  * IRQT_IPI:
1335084Sjohnlev  *	A cross-call IRQ. Maps to "ncpus" event channels, each of which is
1345084Sjohnlev  *	bound to exactly one of the vcpus.  We do not currently support
1355084Sjohnlev  *	unbinding of IPIs (since Solaris doesn't need it). Uses
1365084Sjohnlev  *	EVTCHNOP_bind_ipi.
1375084Sjohnlev  *
1385084Sjohnlev  * IRQT_EVTCHN:
1395084Sjohnlev  *	A "normal" binding to an event channel, typically used by the frontend
1405084Sjohnlev  *      drivers to bind to the their backend event channel.
1415084Sjohnlev  *
1425084Sjohnlev  * IRQT_DEV_EVTCHN:
1435084Sjohnlev  *	This is a one-time IRQ used by /dev/xen/evtchn. Unlike other IRQs, we
1445084Sjohnlev  *	have a one-IRQ to many-evtchn mapping. We only track evtchn->irq for
1455084Sjohnlev  *	these event channels, which are managed via ec_irq_add/rm_evtchn().
1465084Sjohnlev  *	We enforce that IRQT_DEV_EVTCHN's representative evtchn (->ii_evtchn)
1475084Sjohnlev  *	is zero, and make any calls to irq_evtchn() an error, to prevent
1485084Sjohnlev  *	accidentally attempting to use the illegal evtchn 0.
1495084Sjohnlev  *
1505084Sjohnlev  * Suspend/resume
1515084Sjohnlev  *
1525084Sjohnlev  *	During a suspend/resume cycle, we need to tear down the event channels.
1535084Sjohnlev  *	All other mapping data is kept. The drivers will remove their own event
1545084Sjohnlev  *	channels via xendev on receiving a DDI_SUSPEND.  This leaves us with
1555084Sjohnlev  *	the IPIs and VIRQs, which we handle in ec_suspend() and ec_resume()
1565084Sjohnlev  *	below.
1575084Sjohnlev  *
1585084Sjohnlev  * CPU binding
1595084Sjohnlev  *
1605084Sjohnlev  *	When an event channel is bound to a CPU, we set a bit in a mask present
1615084Sjohnlev  *	in the machcpu (evt_affinity) to indicate that this CPU can accept this
1625084Sjohnlev  *	event channel.  For both IPIs and VIRQs, this binding is fixed at
1635084Sjohnlev  *	allocation time and we never modify it.  All other event channels are
1645084Sjohnlev  *	bound via the PSM either as part of add_avintr(), or interrupt
1655084Sjohnlev  *	redistribution (xen_psm_dis/enable_intr()) as a result of CPU
1665084Sjohnlev  *	offline/online.
1675084Sjohnlev  *
1685084Sjohnlev  * Locking
1695084Sjohnlev  *
1705084Sjohnlev  *	Updates are done holding the ec_lock.  The xen_callback_handler()
1715084Sjohnlev  *	routine reads the mapping data in a lockless fashion.  Additionally
1725084Sjohnlev  *	suspend takes ec_lock to prevent update races during a suspend/resume
1735084Sjohnlev  *	cycle.  The IPI info is also examined without the lock; this is OK
1745084Sjohnlev  *	since we only ever change IPI info during initial setup and resume.
1755084Sjohnlev  */
1765084Sjohnlev 
1775084Sjohnlev #define	IRQ_IS_CPUPOKE(irq) (ipi_info[XC_CPUPOKE_PIL].mi_irq == (irq))
1785084Sjohnlev 
1795084Sjohnlev #define	EVTCHN_MASKED(ev) \
1805084Sjohnlev 	(HYPERVISOR_shared_info->evtchn_mask[(ev) >> EVTCHN_SHIFT] & \
1815084Sjohnlev 	(1ul << ((ev) & ((1ul << EVTCHN_SHIFT) - 1))))
1825084Sjohnlev 
1835084Sjohnlev static short evtchn_to_irq[NR_EVENT_CHANNELS];
1845084Sjohnlev static cpuset_t evtchn_cpus[NR_EVENT_CHANNELS];
1855084Sjohnlev static int	evtchn_owner[NR_EVENT_CHANNELS];
1865084Sjohnlev #ifdef DEBUG
1875084Sjohnlev static kthread_t *evtchn_owner_thread[NR_EVENT_CHANNELS];
1885084Sjohnlev #endif
1895084Sjohnlev 
1905084Sjohnlev static irq_info_t irq_info[NR_IRQS];
1915084Sjohnlev static mec_info_t ipi_info[MAXIPL];
1925084Sjohnlev static mec_info_t virq_info[NR_VIRQS];
1935084Sjohnlev 
1945084Sjohnlev /*
1955084Sjohnlev  * See the locking description above.
1965084Sjohnlev  */
1975084Sjohnlev kmutex_t ec_lock;
1985084Sjohnlev 
1995084Sjohnlev /*
2005084Sjohnlev  * Bitmap indicating which PIRQs require the hypervisor to be notified
2015084Sjohnlev  * on unmask.
2025084Sjohnlev  */
2035084Sjohnlev static unsigned long pirq_needs_eoi[NR_PIRQS / (sizeof (unsigned long) * NBBY)];
2045084Sjohnlev 
2055084Sjohnlev static int ec_debug_irq = INVALID_IRQ;
2065084Sjohnlev int ec_dev_irq = INVALID_IRQ;
2075084Sjohnlev 
2085084Sjohnlev int
xen_bind_virq(unsigned int virq,processorid_t cpu,int * port)2095084Sjohnlev xen_bind_virq(unsigned int virq, processorid_t cpu, int *port)
2105084Sjohnlev {
2115084Sjohnlev 	evtchn_bind_virq_t bind;
2125084Sjohnlev 	int err;
2135084Sjohnlev 
2145084Sjohnlev 	bind.virq = virq;
2155084Sjohnlev 	bind.vcpu = cpu;
2165084Sjohnlev 	if ((err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, &bind)) == 0)
2175084Sjohnlev 		*port = bind.port;
2185084Sjohnlev 	else
2195084Sjohnlev 		err = xen_xlate_errcode(err);
2205084Sjohnlev 	return (err);
2215084Sjohnlev }
2225084Sjohnlev 
2235084Sjohnlev int
xen_bind_interdomain(int domid,int remote_port,int * port)2245084Sjohnlev xen_bind_interdomain(int domid, int remote_port, int *port)
2255084Sjohnlev {
2265084Sjohnlev 	evtchn_bind_interdomain_t bind;
2275084Sjohnlev 	int err;
2285084Sjohnlev 
2295084Sjohnlev 	bind.remote_dom  = domid;
2305084Sjohnlev 	bind.remote_port = remote_port;
2315084Sjohnlev 	if ((err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
2325084Sjohnlev 	    &bind)) == 0)
2335084Sjohnlev 		*port = bind.local_port;
2345084Sjohnlev 	else
2355084Sjohnlev 		err = xen_xlate_errcode(err);
2365084Sjohnlev 	return (err);
2375084Sjohnlev }
2385084Sjohnlev 
2395084Sjohnlev int
xen_alloc_unbound_evtchn(int domid,int * evtchnp)2405084Sjohnlev xen_alloc_unbound_evtchn(int domid, int *evtchnp)
2415084Sjohnlev {
2425084Sjohnlev 	evtchn_alloc_unbound_t alloc;
2435084Sjohnlev 	int err;
2445084Sjohnlev 
2455084Sjohnlev 	alloc.dom = DOMID_SELF;
2465084Sjohnlev 	alloc.remote_dom = domid;
2475084Sjohnlev 
2485084Sjohnlev 	if ((err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
2495084Sjohnlev 	    &alloc)) == 0) {
2505084Sjohnlev 		*evtchnp = alloc.port;
2515084Sjohnlev 		/* ensure evtchn is masked till we're ready to use it */
2525084Sjohnlev 		(void) ec_mask_evtchn(*evtchnp);
2535084Sjohnlev 	} else {
2545084Sjohnlev 		err = xen_xlate_errcode(err);
2555084Sjohnlev 	}
2565084Sjohnlev 
2575084Sjohnlev 	return (err);
2585084Sjohnlev }
2595084Sjohnlev 
2605084Sjohnlev static int
xen_close_evtchn(int evtchn)2615084Sjohnlev xen_close_evtchn(int evtchn)
2625084Sjohnlev {
2635084Sjohnlev 	evtchn_close_t close;
2645084Sjohnlev 	int err;
2655084Sjohnlev 
2665084Sjohnlev 	close.port = evtchn;
2675084Sjohnlev 	err = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close);
2685084Sjohnlev 	if (err)
2695084Sjohnlev 		err = xen_xlate_errcode(err);
2705084Sjohnlev 	return (err);
2715084Sjohnlev }
2725084Sjohnlev 
2735084Sjohnlev static int
xen_bind_ipi(processorid_t cpu)2745084Sjohnlev xen_bind_ipi(processorid_t cpu)
2755084Sjohnlev {
2765084Sjohnlev 	evtchn_bind_ipi_t bind;
2775084Sjohnlev 
2785084Sjohnlev 	ASSERT(MUTEX_HELD(&ec_lock));
2795084Sjohnlev 
2805084Sjohnlev 	bind.vcpu = cpu;
2815084Sjohnlev 	if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, &bind) != 0)
2825084Sjohnlev 		panic("xen_bind_ipi() failed");
2835084Sjohnlev 	return (bind.port);
2845084Sjohnlev }
2855084Sjohnlev 
2865084Sjohnlev /* Send future instances of this interrupt to other vcpu. */
2875084Sjohnlev static void
xen_bind_vcpu(int evtchn,int cpu)2885084Sjohnlev xen_bind_vcpu(int evtchn, int cpu)
2895084Sjohnlev {
2905084Sjohnlev 	evtchn_bind_vcpu_t bind;
2915084Sjohnlev 
2925084Sjohnlev 	ASSERT(MUTEX_HELD(&ec_lock));
2935084Sjohnlev 
2945084Sjohnlev 	bind.port = evtchn;
2955084Sjohnlev 	bind.vcpu = cpu;
2965084Sjohnlev 	if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind) != 0)
2975084Sjohnlev 		panic("xen_bind_vcpu() failed");
2985084Sjohnlev }
2995084Sjohnlev 
3005084Sjohnlev static int
xen_bind_pirq(int pirq)3015084Sjohnlev xen_bind_pirq(int pirq)
3025084Sjohnlev {
3035084Sjohnlev 	evtchn_bind_pirq_t bind;
3045084Sjohnlev 	int ret;
3055084Sjohnlev 
3065084Sjohnlev 	bind.pirq = pirq;
3075084Sjohnlev 	bind.flags = BIND_PIRQ__WILL_SHARE;
3085084Sjohnlev 	if ((ret = HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind)) != 0)
3095084Sjohnlev 		panic("xen_bind_pirq() failed (err %d)", ret);
3105084Sjohnlev 	return (bind.port);
3115084Sjohnlev }
3125084Sjohnlev 
3135084Sjohnlev /* unmask an evtchn and send upcall to appropriate vcpu if pending bit is set */
3145084Sjohnlev static void
xen_evtchn_unmask(int evtchn)3155084Sjohnlev xen_evtchn_unmask(int evtchn)
3165084Sjohnlev {
3175084Sjohnlev 	evtchn_unmask_t unmask;
3185084Sjohnlev 
3195084Sjohnlev 	unmask.port = evtchn;
3205084Sjohnlev 	if (HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask) != 0)
3215084Sjohnlev 		panic("xen_evtchn_unmask() failed");
3225084Sjohnlev }
3235084Sjohnlev 
3245084Sjohnlev static void
update_evtchn_affinity(int evtchn)3255084Sjohnlev update_evtchn_affinity(int evtchn)
3265084Sjohnlev {
3275084Sjohnlev 	cpu_t *cp;
3285084Sjohnlev 	struct xen_evt_data *cpe;
3295084Sjohnlev 
3305084Sjohnlev 	ASSERT(evtchn_to_irq[evtchn] != INVALID_IRQ);
3315084Sjohnlev 	ASSERT(MUTEX_HELD(&ec_lock));
3325084Sjohnlev 
3335084Sjohnlev 	/*
3345084Sjohnlev 	 * Use lockless search of cpu_list, similar to mutex_vector_enter().
3355084Sjohnlev 	 */
3365084Sjohnlev 	kpreempt_disable();
3375084Sjohnlev 	cp = cpu_list;
3385084Sjohnlev 	do {
3395084Sjohnlev 		cpe = cp->cpu_m.mcpu_evt_pend;
3405084Sjohnlev 		if (CPU_IN_SET(evtchn_cpus[evtchn], cp->cpu_id))
3415084Sjohnlev 			SET_EVTCHN_BIT(evtchn, cpe->evt_affinity);
3425084Sjohnlev 		else
3435084Sjohnlev 			CLEAR_EVTCHN_BIT(evtchn, cpe->evt_affinity);
3445084Sjohnlev 	} while ((cp = cp->cpu_next) != cpu_list);
3455084Sjohnlev 	kpreempt_enable();
3465084Sjohnlev }
3475084Sjohnlev 
3485084Sjohnlev static void
bind_evtchn_to_cpuset(int evtchn,cpuset_t cpus)3495084Sjohnlev bind_evtchn_to_cpuset(int evtchn, cpuset_t cpus)
3505084Sjohnlev {
3515084Sjohnlev 	ASSERT(evtchn_to_irq[evtchn] != INVALID_IRQ);
3525084Sjohnlev 
3535084Sjohnlev 	CPUSET_ZERO(evtchn_cpus[evtchn]);
3545084Sjohnlev 	CPUSET_OR(evtchn_cpus[evtchn], cpus);
3555084Sjohnlev 	update_evtchn_affinity(evtchn);
3565084Sjohnlev }
3575084Sjohnlev 
3585084Sjohnlev static void
clear_evtchn_affinity(int evtchn)3595084Sjohnlev clear_evtchn_affinity(int evtchn)
3605084Sjohnlev {
3615084Sjohnlev 	CPUSET_ZERO(evtchn_cpus[evtchn]);
3625084Sjohnlev 	update_evtchn_affinity(evtchn);
3635084Sjohnlev }
3645084Sjohnlev 
3655084Sjohnlev static void
alloc_irq_evtchn(int irq,int index,int evtchn,int cpu)3665084Sjohnlev alloc_irq_evtchn(int irq, int index, int evtchn, int cpu)
3675084Sjohnlev {
3685084Sjohnlev 	irq_info_t *irqp = &irq_info[irq];
3695084Sjohnlev 
3705084Sjohnlev 	switch (irqp->ii_type) {
3715084Sjohnlev 	case IRQT_IPI:
3725084Sjohnlev 		ipi_info[index].mi_evtchns[cpu] = evtchn;
3735084Sjohnlev 		irqp->ii_u.index = index;
3745084Sjohnlev 		break;
3755084Sjohnlev 	case IRQT_VIRQ:
3765084Sjohnlev 		virq_info[index].mi_evtchns[cpu] = evtchn;
3775084Sjohnlev 		irqp->ii_u.index = index;
3785084Sjohnlev 		break;
3795084Sjohnlev 	default:
3805084Sjohnlev 		irqp->ii_u.evtchn = evtchn;
3815084Sjohnlev 		break;
3825084Sjohnlev 	}
3835084Sjohnlev 
3845084Sjohnlev 	evtchn_to_irq[evtchn] = irq;
3855084Sjohnlev 
3865084Sjohnlev 	/*
3875084Sjohnlev 	 * If a CPU is not specified, we expect to bind it to a CPU later via
3885084Sjohnlev 	 * the PSM.
3895084Sjohnlev 	 */
3905084Sjohnlev 	if (cpu != -1) {
3915084Sjohnlev 		cpuset_t tcpus;
3925084Sjohnlev 		CPUSET_ONLY(tcpus, cpu);
3935084Sjohnlev 		bind_evtchn_to_cpuset(evtchn, tcpus);
3945084Sjohnlev 	}
3955084Sjohnlev }
3965084Sjohnlev 
3975084Sjohnlev static int
alloc_irq(int type,int index,int evtchn,int cpu)3985084Sjohnlev alloc_irq(int type, int index, int evtchn, int cpu)
3995084Sjohnlev {
4005084Sjohnlev 	int irq;
4015084Sjohnlev 	irq_info_t *irqp;
4025084Sjohnlev 
4035084Sjohnlev 	ASSERT(MUTEX_HELD(&ec_lock));
4045084Sjohnlev 	ASSERT(type != IRQT_IPI || cpu != -1);
4055084Sjohnlev 
4065084Sjohnlev 	for (irq = 0; irq < NR_IRQS; irq++) {
4075084Sjohnlev 		if (irq_info[irq].ii_type == IRQT_UNBOUND)
4085084Sjohnlev 			break;
4095084Sjohnlev 	}
4105084Sjohnlev 
4115084Sjohnlev 	if (irq == NR_IRQS)
4125084Sjohnlev 		panic("No available IRQ to bind to: increase NR_IRQS!\n");
4135084Sjohnlev 
4145084Sjohnlev 	irqp = &irq_info[irq];
4155084Sjohnlev 
4165084Sjohnlev 	irqp->ii_type = type;
4175084Sjohnlev 	/*
4185084Sjohnlev 	 * Set irq/has_handler field to zero which means handler not installed
4195084Sjohnlev 	 */
4205084Sjohnlev 	irqp->ii_u2.has_handler = 0;
4215084Sjohnlev 
4225084Sjohnlev 	alloc_irq_evtchn(irq, index, evtchn, cpu);
4235084Sjohnlev 	return (irq);
4245084Sjohnlev }
4255084Sjohnlev 
4265084Sjohnlev static int
irq_evtchn(irq_info_t * irqp)4275084Sjohnlev irq_evtchn(irq_info_t *irqp)
4285084Sjohnlev {
4295084Sjohnlev 	int evtchn;
4305084Sjohnlev 
4315084Sjohnlev 	ASSERT(irqp->ii_type != IRQT_DEV_EVTCHN);
4325084Sjohnlev 
4335084Sjohnlev 	switch (irqp->ii_type) {
4345084Sjohnlev 	case IRQT_IPI:
4355084Sjohnlev 		ASSERT(irqp->ii_u.index != 0);
4365084Sjohnlev 		evtchn = ipi_info[irqp->ii_u.index].mi_evtchns[CPU->cpu_id];
4375084Sjohnlev 		break;
4385084Sjohnlev 	case IRQT_VIRQ:
4395084Sjohnlev 		evtchn = virq_info[irqp->ii_u.index].mi_evtchns[CPU->cpu_id];
4405084Sjohnlev 		break;
4415084Sjohnlev 	default:
4425084Sjohnlev 		evtchn = irqp->ii_u.evtchn;
4435084Sjohnlev 		break;
4445084Sjohnlev 	}
4455084Sjohnlev 
4465084Sjohnlev 	return (evtchn);
4475084Sjohnlev }
4485084Sjohnlev 
44910175SStuart.Maybee@Sun.COM int
ec_is_edge_pirq(int irq)45010175SStuart.Maybee@Sun.COM ec_is_edge_pirq(int irq)
45110175SStuart.Maybee@Sun.COM {
45210175SStuart.Maybee@Sun.COM 	return (irq_info[irq].ii_type == IRQT_PIRQ &&
45310175SStuart.Maybee@Sun.COM 	    !TEST_EVTCHN_BIT(irq, &pirq_needs_eoi[0]));
45410175SStuart.Maybee@Sun.COM }
45510175SStuart.Maybee@Sun.COM 
4565084Sjohnlev static void
unbind_evtchn(ushort_t * evtchnp)4575084Sjohnlev unbind_evtchn(ushort_t *evtchnp)
4585084Sjohnlev {
4595084Sjohnlev 	int err;
4605084Sjohnlev 
4615084Sjohnlev 	ASSERT(MUTEX_HELD(&ec_lock));
4625084Sjohnlev 
4635084Sjohnlev 	ASSERT(*evtchnp != 0);
4645084Sjohnlev 
4655084Sjohnlev 	err = xen_close_evtchn(*evtchnp);
4665084Sjohnlev 	ASSERT(err == 0);
4675084Sjohnlev 	clear_evtchn_affinity(*evtchnp);
4685084Sjohnlev 	evtchn_to_irq[*evtchnp] = INVALID_IRQ;
4695084Sjohnlev 	*evtchnp = 0;
4705084Sjohnlev }
4715084Sjohnlev 
4725084Sjohnlev static void
pirq_unmask_notify(int pirq)4735084Sjohnlev pirq_unmask_notify(int pirq)
4745084Sjohnlev {
4755084Sjohnlev 	struct physdev_eoi eoi;
4765084Sjohnlev 
4775084Sjohnlev 	if (TEST_EVTCHN_BIT(pirq, &pirq_needs_eoi[0])) {
4785084Sjohnlev 		eoi.irq = pirq;
4795084Sjohnlev 		(void) HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi);
4805084Sjohnlev 	}
4815084Sjohnlev }
4825084Sjohnlev 
4835084Sjohnlev static void
pirq_query_unmask(int pirq)4845084Sjohnlev pirq_query_unmask(int pirq)
4855084Sjohnlev {
4865084Sjohnlev 	struct physdev_irq_status_query irq_status;
4875084Sjohnlev 
4885084Sjohnlev 	irq_status.irq = pirq;
4895084Sjohnlev 	(void) HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status);
4905084Sjohnlev 	CLEAR_EVTCHN_BIT(pirq, &pirq_needs_eoi[0]);
4915084Sjohnlev 	if (irq_status.flags & XENIRQSTAT_needs_eoi)
4925084Sjohnlev 		SET_EVTCHN_BIT(pirq, &pirq_needs_eoi[0]);
4935084Sjohnlev }
4945084Sjohnlev 
4955084Sjohnlev static void
end_pirq(int irq)4965084Sjohnlev end_pirq(int irq)
4975084Sjohnlev {
4985084Sjohnlev 	int evtchn = irq_evtchn(&irq_info[irq]);
4995084Sjohnlev 
50010175SStuart.Maybee@Sun.COM 	/*
50110175SStuart.Maybee@Sun.COM 	 * If it is an edge-triggered interrupt we have already unmasked
50210175SStuart.Maybee@Sun.COM 	 */
50310175SStuart.Maybee@Sun.COM 	if (TEST_EVTCHN_BIT(irq, &pirq_needs_eoi[0])) {
50410175SStuart.Maybee@Sun.COM 		ec_unmask_evtchn(evtchn);
50510175SStuart.Maybee@Sun.COM 		pirq_unmask_notify(IRQ_TO_PIRQ(irq));
5065084Sjohnlev 	}
5075084Sjohnlev }
5085084Sjohnlev 
5095084Sjohnlev /*
5105084Sjohnlev  * Bind an event channel to a vcpu
5115084Sjohnlev  */
5125084Sjohnlev void
ec_bind_vcpu(int evtchn,int cpu)5135084Sjohnlev ec_bind_vcpu(int evtchn, int cpu)
5145084Sjohnlev {
5155084Sjohnlev 	mutex_enter(&ec_lock);
5165084Sjohnlev 	xen_bind_vcpu(evtchn, cpu);
5175084Sjohnlev 	mutex_exit(&ec_lock);
5185084Sjohnlev }
5195084Sjohnlev 
5205084Sjohnlev /*
5215084Sjohnlev  * Set up a physical device irq to be associated with an event channel.
5225084Sjohnlev  */
5235084Sjohnlev void
ec_setup_pirq(int irq,int ipl,cpuset_t * cpusp)5245529Ssmaybe ec_setup_pirq(int irq, int ipl, cpuset_t *cpusp)
5255084Sjohnlev {
5265084Sjohnlev 	int evtchn;
5275084Sjohnlev 	irq_info_t *irqp = &irq_info[irq];
5285084Sjohnlev 
5295084Sjohnlev 	/*
5305084Sjohnlev 	 * Test if this PIRQ is already bound to an evtchn,
5315084Sjohnlev 	 * which means it is a shared IRQ and we don't want to
5325084Sjohnlev 	 * bind and do some initial setup that has already been
5335084Sjohnlev 	 * done for this irq on a previous trip through this code.
5345084Sjohnlev 	 */
5355084Sjohnlev 	if (irqp->ii_u.evtchn == INVALID_EVTCHN) {
5365084Sjohnlev 		evtchn = xen_bind_pirq(irq);
5375084Sjohnlev 
5385084Sjohnlev 		pirq_query_unmask(IRQ_TO_PIRQ(irq));
5395084Sjohnlev 
5405084Sjohnlev 		irqp->ii_type = IRQT_PIRQ;
5415084Sjohnlev 		irqp->ii_u.evtchn = evtchn;
5425084Sjohnlev 
5435084Sjohnlev 		evtchn_to_irq[evtchn] = irq;
5445084Sjohnlev 		irqp->ii_u2.ipl = ipl;
5455529Ssmaybe 		ec_set_irq_affinity(irq, *cpusp);
5465084Sjohnlev 		ec_enable_irq(irq);
5475084Sjohnlev 		pirq_unmask_notify(IRQ_TO_PIRQ(irq));
5485084Sjohnlev 	} else {
5495084Sjohnlev 		ASSERT(irqp->ii_u2.ipl != 0);
5506176Smrj 		cmn_err(CE_NOTE, "!IRQ%d is shared", irq);
5515084Sjohnlev 		if (ipl > irqp->ii_u2.ipl)
5525084Sjohnlev 			irqp->ii_u2.ipl = ipl;
5535529Ssmaybe 		*cpusp = evtchn_cpus[irqp->ii_u.evtchn];
5545084Sjohnlev 	}
5555084Sjohnlev }
5565084Sjohnlev 
5575084Sjohnlev void
ec_unbind_irq(int irq)5585084Sjohnlev ec_unbind_irq(int irq)
5595084Sjohnlev {
5605084Sjohnlev 	irq_info_t *irqp = &irq_info[irq];
5615084Sjohnlev 	mec_info_t *virqp;
5625084Sjohnlev 	int drop_lock = 0;
5635084Sjohnlev 	int type, i;
5645084Sjohnlev 
5655084Sjohnlev 	/*
5665084Sjohnlev 	 * Nasty, but we need this during suspend.
5675084Sjohnlev 	 */
5685084Sjohnlev 	if (mutex_owner(&ec_lock) != curthread) {
5695084Sjohnlev 		mutex_enter(&ec_lock);
5705084Sjohnlev 		drop_lock = 1;
5715084Sjohnlev 	}
5725084Sjohnlev 
5735084Sjohnlev 	type = irqp->ii_type;
5745084Sjohnlev 
5755084Sjohnlev 	ASSERT((type == IRQT_EVTCHN) || (type == IRQT_PIRQ) ||
5765084Sjohnlev 	    (type == IRQT_VIRQ));
5775084Sjohnlev 
5785084Sjohnlev 	if ((type == IRQT_EVTCHN) || (type == IRQT_PIRQ)) {
5795084Sjohnlev 		/* There's only one event channel associated with this irq */
5805084Sjohnlev 		unbind_evtchn(&irqp->ii_u.evtchn);
5815084Sjohnlev 	} else if (type == IRQT_VIRQ) {
5825084Sjohnlev 		/*
5835084Sjohnlev 		 * Each cpu on the system can have it's own event channel
5845084Sjohnlev 		 * associated with a virq.  Unbind them all.
5855084Sjohnlev 		 */
5865084Sjohnlev 		virqp = &virq_info[irqp->ii_u.index];
5875084Sjohnlev 		for (i = 0; i < NCPU; i++) {
5885084Sjohnlev 			if (virqp->mi_evtchns[i] != 0)
5895084Sjohnlev 				unbind_evtchn(&virqp->mi_evtchns[i]);
5905084Sjohnlev 		}
5915084Sjohnlev 		/* Mark the virq structure as invalid. */
5925084Sjohnlev 		virqp->mi_irq = INVALID_IRQ;
5935084Sjohnlev 	}
5945084Sjohnlev 
5955084Sjohnlev 	bzero(irqp, sizeof (*irqp));
5965084Sjohnlev 	/* Re-reserve PIRQ. */
5975084Sjohnlev 	if (type == IRQT_PIRQ)
5985084Sjohnlev 		irqp->ii_type = IRQT_PIRQ;
5995084Sjohnlev 
6005084Sjohnlev 	if (drop_lock)
6015084Sjohnlev 		mutex_exit(&ec_lock);
6025084Sjohnlev }
6035084Sjohnlev 
6045084Sjohnlev /*
6055084Sjohnlev  * Rebind an event channel for delivery to a CPU.
6065084Sjohnlev  */
6075084Sjohnlev void
ec_set_irq_affinity(int irq,cpuset_t dest)6085084Sjohnlev ec_set_irq_affinity(int irq, cpuset_t dest)
6095084Sjohnlev {
6105084Sjohnlev 	int evtchn, tcpu;
6115084Sjohnlev 	irq_info_t *irqp = &irq_info[irq];
6125084Sjohnlev 
6135084Sjohnlev 	mutex_enter(&ec_lock);
6145084Sjohnlev 
6155084Sjohnlev 	ASSERT(irq < NR_IRQS);
6165084Sjohnlev 	ASSERT(irqp->ii_type != IRQT_UNBOUND);
6175084Sjohnlev 
6185084Sjohnlev 	/*
6195084Sjohnlev 	 * Binding is done at allocation time for these types, so we should
6205084Sjohnlev 	 * never modify them.
6215084Sjohnlev 	 */
6225084Sjohnlev 	if (irqp->ii_type == IRQT_IPI || irqp->ii_type == IRQT_VIRQ ||
6235084Sjohnlev 	    irqp->ii_type == IRQT_DEV_EVTCHN) {
6245084Sjohnlev 		mutex_exit(&ec_lock);
6255084Sjohnlev 		return;
6265084Sjohnlev 	}
6275084Sjohnlev 
6285084Sjohnlev 	CPUSET_FIND(dest, tcpu);
6295084Sjohnlev 	ASSERT(tcpu != CPUSET_NOTINSET);
6305084Sjohnlev 
6315084Sjohnlev 	evtchn = irq_evtchn(irqp);
6325084Sjohnlev 
6335084Sjohnlev 	xen_bind_vcpu(evtchn, tcpu);
6345084Sjohnlev 
6355084Sjohnlev 	bind_evtchn_to_cpuset(evtchn, dest);
6365084Sjohnlev 
6375084Sjohnlev 	mutex_exit(&ec_lock);
6385084Sjohnlev 
6395084Sjohnlev 	/*
6405529Ssmaybe 	 * Now send the new target processor a NOP IPI.
6415529Ssmaybe 	 * It will check for any pending interrupts, and so service any that
6425084Sjohnlev 	 * got delivered to the wrong processor by mistake.
6435084Sjohnlev 	 */
6445573Ssmaybe 	if (ncpus > 1)
6455573Ssmaybe 		poke_cpu(tcpu);
6465084Sjohnlev }
6475084Sjohnlev 
6485084Sjohnlev int
ec_set_irq_priority(int irq,int pri)6495084Sjohnlev ec_set_irq_priority(int irq, int pri)
6505084Sjohnlev {
6515084Sjohnlev 	irq_info_t *irqp;
6525084Sjohnlev 
6535084Sjohnlev 	if (irq >= NR_IRQS)
6545084Sjohnlev 		return (-1);
6555084Sjohnlev 
6565084Sjohnlev 	irqp = &irq_info[irq];
6575084Sjohnlev 
6585084Sjohnlev 	if (irqp->ii_type == IRQT_UNBOUND)
6595084Sjohnlev 		return (-1);
6605084Sjohnlev 
6615084Sjohnlev 	irqp->ii_u2.ipl = pri;
6625084Sjohnlev 
6635084Sjohnlev 	return (0);
6645084Sjohnlev }
6655084Sjohnlev 
6665084Sjohnlev void
ec_clear_irq_priority(int irq)6675084Sjohnlev ec_clear_irq_priority(int irq)
6685084Sjohnlev {
6695084Sjohnlev 	irq_info_t *irqp = &irq_info[irq];
6705084Sjohnlev 
6715084Sjohnlev 	ASSERT(irq < NR_IRQS);
6725084Sjohnlev 	ASSERT(irqp->ii_type != IRQT_UNBOUND);
6735084Sjohnlev 
6745084Sjohnlev 	irqp->ii_u2.ipl = 0;
6755084Sjohnlev }
6765084Sjohnlev 
6775084Sjohnlev int
ec_bind_evtchn_to_irq(int evtchn)6785084Sjohnlev ec_bind_evtchn_to_irq(int evtchn)
6795084Sjohnlev {
6805084Sjohnlev 	mutex_enter(&ec_lock);
6815084Sjohnlev 
6825084Sjohnlev 	ASSERT(evtchn_to_irq[evtchn] == INVALID_IRQ);
6835084Sjohnlev 
6845084Sjohnlev 	(void) alloc_irq(IRQT_EVTCHN, 0, evtchn, -1);
6855084Sjohnlev 
6865084Sjohnlev 	mutex_exit(&ec_lock);
6875084Sjohnlev 	return (evtchn_to_irq[evtchn]);
6885084Sjohnlev }
6895084Sjohnlev 
6905084Sjohnlev int
ec_bind_virq_to_irq(int virq,int cpu)6915084Sjohnlev ec_bind_virq_to_irq(int virq, int cpu)
6925084Sjohnlev {
6935084Sjohnlev 	int err;
6945084Sjohnlev 	int evtchn;
6955084Sjohnlev 	mec_info_t *virqp;
6965084Sjohnlev 
6975084Sjohnlev 	virqp = &virq_info[virq];
6985084Sjohnlev 	mutex_enter(&ec_lock);
6995084Sjohnlev 
7005084Sjohnlev 	err = xen_bind_virq(virq, cpu, &evtchn);
7015084Sjohnlev 	ASSERT(err == 0);
7025084Sjohnlev 
7035084Sjohnlev 	ASSERT(evtchn_to_irq[evtchn] == INVALID_IRQ);
7045084Sjohnlev 
7055084Sjohnlev 	if (virqp->mi_irq == INVALID_IRQ) {
7065084Sjohnlev 		virqp->mi_irq = alloc_irq(IRQT_VIRQ, virq, evtchn, cpu);
7075084Sjohnlev 	} else {
7085084Sjohnlev 		alloc_irq_evtchn(virqp->mi_irq, virq, evtchn, cpu);
7095084Sjohnlev 	}
7105084Sjohnlev 
7115084Sjohnlev 	mutex_exit(&ec_lock);
7125084Sjohnlev 
7135084Sjohnlev 	return (virqp->mi_irq);
7145084Sjohnlev }
7155084Sjohnlev 
7165084Sjohnlev int
ec_bind_ipi_to_irq(int ipl,int cpu)7175084Sjohnlev ec_bind_ipi_to_irq(int ipl, int cpu)
7185084Sjohnlev {
7195084Sjohnlev 	int evtchn;
7205084Sjohnlev 	ulong_t flags;
7215084Sjohnlev 	mec_info_t *ipip;
7225084Sjohnlev 
7235084Sjohnlev 	mutex_enter(&ec_lock);
7245084Sjohnlev 
7255084Sjohnlev 	ipip = &ipi_info[ipl];
7265084Sjohnlev 
7275084Sjohnlev 	evtchn = xen_bind_ipi(cpu);
7285084Sjohnlev 
7295084Sjohnlev 	ASSERT(evtchn_to_irq[evtchn] == INVALID_IRQ);
7305084Sjohnlev 
7315084Sjohnlev 	if (ipip->mi_irq == INVALID_IRQ) {
7325084Sjohnlev 		ipip->mi_irq = alloc_irq(IRQT_IPI, ipl, evtchn, cpu);
7335084Sjohnlev 	} else {
7345084Sjohnlev 		alloc_irq_evtchn(ipip->mi_irq, ipl, evtchn, cpu);
7355084Sjohnlev 	}
7365084Sjohnlev 
7375084Sjohnlev 	/*
7385084Sjohnlev 	 * Unmask the new evtchn so that it can be seen by the target cpu
7395084Sjohnlev 	 */
7405084Sjohnlev 	flags = intr_clear();
7415084Sjohnlev 	ec_unmask_evtchn(evtchn);
7425084Sjohnlev 	intr_restore(flags);
7435084Sjohnlev 
7445084Sjohnlev 	mutex_exit(&ec_lock);
7455084Sjohnlev 	return (ipip->mi_irq);
7465084Sjohnlev }
7475084Sjohnlev 
7485084Sjohnlev /*
7495084Sjohnlev  * When bringing up a CPU, bind to all the IPIs that CPU0 bound.
7505084Sjohnlev  */
7515084Sjohnlev void
ec_bind_cpu_ipis(int cpu)7525084Sjohnlev ec_bind_cpu_ipis(int cpu)
7535084Sjohnlev {
7545084Sjohnlev 	int i;
7555084Sjohnlev 
7565084Sjohnlev 	for (i = 0; i < MAXIPL; i++) {
7575084Sjohnlev 		mec_info_t *ipip = &ipi_info[i];
7585084Sjohnlev 		if (ipip->mi_irq == INVALID_IRQ)
7595084Sjohnlev 			continue;
7605084Sjohnlev 
7615084Sjohnlev 		(void) ec_bind_ipi_to_irq(i, cpu);
7625084Sjohnlev 	}
7635084Sjohnlev }
7645084Sjohnlev 
7655084Sjohnlev /*
7665084Sjohnlev  * Can this IRQ be rebound to another CPU?
7675084Sjohnlev  */
7685084Sjohnlev int
ec_irq_rebindable(int irq)7695084Sjohnlev ec_irq_rebindable(int irq)
7705084Sjohnlev {
7715084Sjohnlev 	irq_info_t *irqp = &irq_info[irq];
7725084Sjohnlev 
7735084Sjohnlev 	if (irqp->ii_u.evtchn == 0)
7745084Sjohnlev 		return (0);
7755084Sjohnlev 
7765084Sjohnlev 	return (irqp->ii_type == IRQT_EVTCHN || irqp->ii_type == IRQT_PIRQ);
7775084Sjohnlev }
7785084Sjohnlev 
7795084Sjohnlev /*
7805084Sjohnlev  * Should this IRQ be unbound from this CPU (which is being offlined) to
7815084Sjohnlev  * another?
7825084Sjohnlev  */
7835084Sjohnlev int
ec_irq_needs_rebind(int irq,int cpu)7845084Sjohnlev ec_irq_needs_rebind(int irq, int cpu)
7855084Sjohnlev {
7865084Sjohnlev 	irq_info_t *irqp = &irq_info[irq];
7875084Sjohnlev 
7885084Sjohnlev 	return (ec_irq_rebindable(irq) &&
7895084Sjohnlev 	    CPU_IN_SET(evtchn_cpus[irqp->ii_u.evtchn], cpu));
7905084Sjohnlev }
7915084Sjohnlev 
7925084Sjohnlev void
ec_send_ipi(int ipl,int cpu)7935084Sjohnlev ec_send_ipi(int ipl, int cpu)
7945084Sjohnlev {
7955084Sjohnlev 	mec_info_t *ipip = &ipi_info[ipl];
7965084Sjohnlev 
7975084Sjohnlev 	ASSERT(ipip->mi_irq != INVALID_IRQ);
7985084Sjohnlev 
7995084Sjohnlev 	ec_notify_via_evtchn(ipip->mi_evtchns[cpu]);
8005084Sjohnlev }
8015084Sjohnlev 
8025084Sjohnlev void
ec_try_ipi(int ipl,int cpu)8035084Sjohnlev ec_try_ipi(int ipl, int cpu)
8045084Sjohnlev {
8055084Sjohnlev 	mec_info_t *ipip = &ipi_info[ipl];
8065084Sjohnlev 
8075084Sjohnlev 	if (ipip->mi_irq == INVALID_IRQ || ipip->mi_irq == 0)
8085084Sjohnlev 		return;
8095084Sjohnlev 
8105084Sjohnlev 	ec_notify_via_evtchn(ipip->mi_evtchns[cpu]);
8115084Sjohnlev }
8125084Sjohnlev 
8135084Sjohnlev void
ec_irq_add_evtchn(int irq,int evtchn)8145084Sjohnlev ec_irq_add_evtchn(int irq, int evtchn)
8155084Sjohnlev {
8165084Sjohnlev 	mutex_enter(&ec_lock);
8175084Sjohnlev 
8185084Sjohnlev 	/*
8195084Sjohnlev 	 * See description of IRQT_DEV_EVTCHN above.
8205084Sjohnlev 	 */
8215084Sjohnlev 	ASSERT(irq == ec_dev_irq);
8225084Sjohnlev 
8235084Sjohnlev 	alloc_irq_evtchn(irq, 0, evtchn, 0);
8245084Sjohnlev 	/*
8255084Sjohnlev 	 * We enforce that the representative event channel for IRQT_DEV_EVTCHN
8265084Sjohnlev 	 * is zero, so PSM operations on it have no effect.
8275084Sjohnlev 	 */
8285084Sjohnlev 	irq_info[irq].ii_u.evtchn = 0;
8295084Sjohnlev 	mutex_exit(&ec_lock);
8305084Sjohnlev }
8315084Sjohnlev 
8325084Sjohnlev void
ec_irq_rm_evtchn(int irq,int evtchn)8335084Sjohnlev ec_irq_rm_evtchn(int irq, int evtchn)
8345084Sjohnlev {
8355084Sjohnlev 	ushort_t ec = evtchn;
8365084Sjohnlev 
8375084Sjohnlev 	mutex_enter(&ec_lock);
8385084Sjohnlev 	ASSERT(irq == ec_dev_irq);
8395084Sjohnlev 	unbind_evtchn(&ec);
8405084Sjohnlev 	mutex_exit(&ec_lock);
8415084Sjohnlev }
8425084Sjohnlev 
8435084Sjohnlev /*
8445084Sjohnlev  * Allocate an /dev/xen/evtchn IRQ.  See the big comment at the top
8455084Sjohnlev  * for an explanation.
8465084Sjohnlev  */
8475084Sjohnlev int
ec_dev_alloc_irq(void)8485084Sjohnlev ec_dev_alloc_irq(void)
8495084Sjohnlev {
8505084Sjohnlev 	int i;
8515084Sjohnlev 	irq_info_t *irqp;
8525084Sjohnlev 
8535084Sjohnlev 	for (i = 0; i < NR_IRQS; i++) {
8545084Sjohnlev 		if (irq_info[i].ii_type == IRQT_UNBOUND)
8555084Sjohnlev 			break;
8565084Sjohnlev 	}
8575084Sjohnlev 
8585084Sjohnlev 	ASSERT(i != NR_IRQS);
8595084Sjohnlev 
8605084Sjohnlev 	irqp = &irq_info[i];
8615084Sjohnlev 	irqp->ii_type = IRQT_DEV_EVTCHN;
8625084Sjohnlev 	irqp->ii_u2.ipl = IPL_EVTCHN;
8635084Sjohnlev 	/*
8645084Sjohnlev 	 * Force the evtchn to zero for the special evtchn device irq
8655084Sjohnlev 	 */
8665084Sjohnlev 	irqp->ii_u.evtchn = 0;
8675084Sjohnlev 	return (i);
8685084Sjohnlev }
8695084Sjohnlev 
8705084Sjohnlev void
ec_enable_irq(unsigned int irq)8715084Sjohnlev ec_enable_irq(unsigned int irq)
8725084Sjohnlev {
8735084Sjohnlev 	ulong_t flag;
8745084Sjohnlev 	irq_info_t *irqp = &irq_info[irq];
8755084Sjohnlev 
8765084Sjohnlev 	if (irqp->ii_type == IRQT_DEV_EVTCHN)
8775084Sjohnlev 		return;
8785084Sjohnlev 
8795084Sjohnlev 	flag = intr_clear();
8805084Sjohnlev 	ec_unmask_evtchn(irq_evtchn(irqp));
8815084Sjohnlev 	intr_restore(flag);
8825084Sjohnlev }
8835084Sjohnlev 
8845084Sjohnlev void
ec_disable_irq(unsigned int irq)8855084Sjohnlev ec_disable_irq(unsigned int irq)
8865084Sjohnlev {
8875084Sjohnlev 	irq_info_t *irqp = &irq_info[irq];
8885084Sjohnlev 
8895084Sjohnlev 	if (irqp->ii_type == IRQT_DEV_EVTCHN)
8905084Sjohnlev 		return;
8915084Sjohnlev 
8925084Sjohnlev 	/*
8935084Sjohnlev 	 * Spin till we are the one to mask the evtchn
8945084Sjohnlev 	 * Ensures no one else can be servicing this evtchn.
8955084Sjohnlev 	 */
8965084Sjohnlev 	while (!ec_mask_evtchn(irq_evtchn(irqp)))
8975084Sjohnlev 		SMT_PAUSE();
8985084Sjohnlev }
8995084Sjohnlev 
9005084Sjohnlev static int
ec_evtchn_pending(uint_t ev)9015084Sjohnlev ec_evtchn_pending(uint_t ev)
9025084Sjohnlev {
9035084Sjohnlev 	uint_t evi;
9045084Sjohnlev 	shared_info_t *si = HYPERVISOR_shared_info;
9055084Sjohnlev 
9065084Sjohnlev 	evi = ev >> EVTCHN_SHIFT;
9075084Sjohnlev 	ev &= (1ul << EVTCHN_SHIFT) - 1;
9085084Sjohnlev 	return ((si->evtchn_pending[evi] & (1ul << ev)) != 0);
9095084Sjohnlev }
9105084Sjohnlev 
9115084Sjohnlev int
ec_pending_irq(unsigned int irq)9125084Sjohnlev ec_pending_irq(unsigned int irq)
9135084Sjohnlev {
9145084Sjohnlev 	int evtchn = irq_evtchn(&irq_info[irq]);
9155084Sjohnlev 
9165084Sjohnlev 	return (ec_evtchn_pending(evtchn));
9175084Sjohnlev }
9185084Sjohnlev 
9195084Sjohnlev void
ec_clear_irq(int irq)9205084Sjohnlev ec_clear_irq(int irq)
9215084Sjohnlev {
9225084Sjohnlev 	irq_info_t *irqp = &irq_info[irq];
9235084Sjohnlev 	int evtchn;
9245084Sjohnlev 
9255084Sjohnlev 	if (irqp->ii_type == IRQT_DEV_EVTCHN)
9265084Sjohnlev 		return;
9275084Sjohnlev 
9285084Sjohnlev 	ASSERT(irqp->ii_type != IRQT_UNBOUND);
9295084Sjohnlev 
9305084Sjohnlev 	evtchn = irq_evtchn(irqp);
9315084Sjohnlev 
9325084Sjohnlev 	ASSERT(EVTCHN_MASKED(evtchn));
9335084Sjohnlev 	ec_clear_evtchn(evtchn);
9345084Sjohnlev }
9355084Sjohnlev 
9365084Sjohnlev void
ec_unmask_irq(int irq)9375084Sjohnlev ec_unmask_irq(int irq)
9385084Sjohnlev {
9395084Sjohnlev 	ulong_t flags;
9405084Sjohnlev 	irq_info_t *irqp = &irq_info[irq];
9415084Sjohnlev 
9425084Sjohnlev 	flags = intr_clear();
9435084Sjohnlev 	switch (irqp->ii_type) {
9445084Sjohnlev 	case IRQT_PIRQ:
9455084Sjohnlev 		end_pirq(irq);
9465084Sjohnlev 		break;
9475084Sjohnlev 	case IRQT_DEV_EVTCHN:
9485084Sjohnlev 		break;
9495084Sjohnlev 	default:
9505084Sjohnlev 		ec_unmask_evtchn(irq_evtchn(irqp));
9515084Sjohnlev 		break;
9525084Sjohnlev 	}
9535084Sjohnlev 	intr_restore(flags);
9545084Sjohnlev }
9555084Sjohnlev 
9565084Sjohnlev void
ec_try_unmask_irq(int irq)9575084Sjohnlev ec_try_unmask_irq(int irq)
9585084Sjohnlev {
9595084Sjohnlev 	ulong_t flags;
9605084Sjohnlev 	irq_info_t *irqp = &irq_info[irq];
9615084Sjohnlev 	int evtchn;
9625084Sjohnlev 
9635084Sjohnlev 	flags = intr_clear();
9645084Sjohnlev 	switch (irqp->ii_type) {
9655084Sjohnlev 	case IRQT_PIRQ:
9665084Sjohnlev 		end_pirq(irq);
9675084Sjohnlev 		break;
9685084Sjohnlev 	case IRQT_DEV_EVTCHN:
9695084Sjohnlev 		break;
9705084Sjohnlev 	default:
9715084Sjohnlev 		if ((evtchn = irq_evtchn(irqp)) != 0)
9725084Sjohnlev 			ec_unmask_evtchn(evtchn);
9735084Sjohnlev 		break;
9745084Sjohnlev 	}
9755084Sjohnlev 	intr_restore(flags);
9765084Sjohnlev }
9775084Sjohnlev 
9785084Sjohnlev /*
9795084Sjohnlev  * Poll until an event channel is ready or 'check_func' returns true.  This can
9805084Sjohnlev  * only be used in a situation where interrupts are masked, otherwise we have a
9815084Sjohnlev  * classic time-of-check vs. time-of-use race.
9825084Sjohnlev  */
9835084Sjohnlev void
ec_wait_on_evtchn(int evtchn,int (* check_func)(void *),void * arg)9845084Sjohnlev ec_wait_on_evtchn(int evtchn, int (*check_func)(void *), void *arg)
9855084Sjohnlev {
9865084Sjohnlev 	if (DOMAIN_IS_INITDOMAIN(xen_info)) {
9875084Sjohnlev 		while (!check_func(arg))
9885084Sjohnlev 			(void) HYPERVISOR_yield();
9895084Sjohnlev 		return;
9905084Sjohnlev 	}
9915084Sjohnlev 
9925084Sjohnlev 	ASSERT(CPU->cpu_m.mcpu_vcpu_info->evtchn_upcall_mask != 0);
9935084Sjohnlev 
9945084Sjohnlev 	for (;;) {
9955084Sjohnlev 		evtchn_port_t ports[1];
9965084Sjohnlev 
9975084Sjohnlev 		ports[0] = evtchn;
9985084Sjohnlev 
9995084Sjohnlev 		ec_clear_evtchn(evtchn);
10005084Sjohnlev 
10015084Sjohnlev 		if (check_func(arg))
10025084Sjohnlev 			return;
10035084Sjohnlev 
10045084Sjohnlev 		(void) HYPERVISOR_poll(ports, 1, 0);
10055084Sjohnlev 	}
10065084Sjohnlev }
10075084Sjohnlev 
10085084Sjohnlev void
ec_wait_on_ipi(int ipl,int (* check_func)(void *),void * arg)10095084Sjohnlev ec_wait_on_ipi(int ipl, int (*check_func)(void *), void *arg)
10105084Sjohnlev {
10115084Sjohnlev 	mec_info_t *ipip = &ipi_info[ipl];
10125084Sjohnlev 
10135084Sjohnlev 	if (ipip->mi_irq == INVALID_IRQ || ipip->mi_irq == 0)
10145084Sjohnlev 		return;
10155084Sjohnlev 
10165084Sjohnlev 	ec_wait_on_evtchn(ipip->mi_evtchns[CPU->cpu_id], check_func, arg);
10175084Sjohnlev }
10185084Sjohnlev 
10195084Sjohnlev void
ec_suspend(void)10205084Sjohnlev ec_suspend(void)
10215084Sjohnlev {
10225084Sjohnlev 	irq_info_t *irqp;
10235084Sjohnlev 	ushort_t *evtchnp;
10245084Sjohnlev 	int i;
10255084Sjohnlev 	int c;
10265084Sjohnlev 
10275084Sjohnlev 	ASSERT(MUTEX_HELD(&ec_lock));
10285084Sjohnlev 
10295084Sjohnlev 	for (i = 0; i < MAXIPL; i++) {
10305084Sjohnlev 		if (ipi_info[i].mi_irq == INVALID_IRQ)
10315084Sjohnlev 			continue;
10325084Sjohnlev 
10335084Sjohnlev 		for (c = 0; c < NCPU; c++) {
10345084Sjohnlev 			if (cpu[c] == NULL)
10355084Sjohnlev 				continue;
10365084Sjohnlev 
10375084Sjohnlev 			if (CPU_IN_SET(cpu_suspend_lost_set, c))
10385084Sjohnlev 				continue;
10395084Sjohnlev 
10405084Sjohnlev 			evtchnp = &ipi_info[i].mi_evtchns[c];
10415084Sjohnlev 			ASSERT(*evtchnp != 0);
10425084Sjohnlev 			unbind_evtchn(evtchnp);
10435084Sjohnlev 		}
10445084Sjohnlev 	}
10455084Sjohnlev 
10465084Sjohnlev 	for (i = 0; i < NR_VIRQS; i++) {
10475084Sjohnlev 		if (virq_info[i].mi_irq == INVALID_IRQ)
10485084Sjohnlev 			continue;
10495084Sjohnlev 
10505084Sjohnlev 		/*
10515084Sjohnlev 		 * If we're sharing a single event channel across all CPUs, we
10525084Sjohnlev 		 * should only unbind once.
10535084Sjohnlev 		 */
10545084Sjohnlev 		if (virq_info[i].mi_shared) {
10555084Sjohnlev 			evtchnp = &virq_info[i].mi_evtchns[0];
10565084Sjohnlev 			unbind_evtchn(evtchnp);
10575084Sjohnlev 			for (c = 1; c < NCPU; c++)
10585084Sjohnlev 				virq_info[i].mi_evtchns[c] = 0;
10595084Sjohnlev 		} else {
10605084Sjohnlev 			for (c = 0; c < NCPU; c++) {
10615084Sjohnlev 				if (cpu[c] == NULL)
10625084Sjohnlev 					continue;
10635084Sjohnlev 
10645084Sjohnlev 				evtchnp = &virq_info[i].mi_evtchns[c];
10655084Sjohnlev 				if (*evtchnp != 0)
10665084Sjohnlev 					unbind_evtchn(evtchnp);
10675084Sjohnlev 			}
10685084Sjohnlev 		}
10695084Sjohnlev 	}
10705084Sjohnlev 
10715084Sjohnlev 	for (i = 0; i < NR_IRQS; i++) {
10725084Sjohnlev 		irqp = &irq_info[i];
10735084Sjohnlev 
10745084Sjohnlev 		switch (irqp->ii_type) {
10755084Sjohnlev 		case IRQT_EVTCHN:
10765084Sjohnlev 		case IRQT_DEV_EVTCHN:
10775084Sjohnlev 			(void) HYPERVISOR_shutdown(SHUTDOWN_crash);
10785084Sjohnlev 			break;
10795084Sjohnlev 		case IRQT_PIRQ:
10805084Sjohnlev 			if (irqp->ii_u.evtchn != 0)
10815084Sjohnlev 				(void) HYPERVISOR_shutdown(SHUTDOWN_crash);
10825084Sjohnlev 			break;
10835084Sjohnlev 		default:
10845084Sjohnlev 			break;
10855084Sjohnlev 		}
10865084Sjohnlev 	}
10875084Sjohnlev }
10885084Sjohnlev 
10895084Sjohnlev /*
10905084Sjohnlev  * The debug irq is special, we only have one evtchn and irq but we allow all
10915084Sjohnlev  * cpus to service it.  It's marked as shared and we propogate the event
10925084Sjohnlev  * channel into all CPUs by hand.
10935084Sjohnlev  */
10945084Sjohnlev static void
share_virq(mec_info_t * virqp)10955084Sjohnlev share_virq(mec_info_t *virqp)
10965084Sjohnlev {
10975084Sjohnlev 	int evtchn = virqp->mi_evtchns[0];
10985084Sjohnlev 	cpuset_t tset;
10995084Sjohnlev 	int i;
11005084Sjohnlev 
11015084Sjohnlev 	ASSERT(evtchn != 0);
11025084Sjohnlev 
11035084Sjohnlev 	virqp->mi_shared = 1;
11045084Sjohnlev 
11055084Sjohnlev 	for (i = 1; i < NCPU; i++)
11065084Sjohnlev 		virqp->mi_evtchns[i] = evtchn;
11075084Sjohnlev 	CPUSET_ALL(tset);
11085084Sjohnlev 	bind_evtchn_to_cpuset(evtchn, tset);
11095084Sjohnlev }
11105084Sjohnlev 
11115084Sjohnlev static void
virq_resume(int virq)11125084Sjohnlev virq_resume(int virq)
11135084Sjohnlev {
11145084Sjohnlev 	mec_info_t *virqp = &virq_info[virq];
11155084Sjohnlev 	int evtchn;
11165084Sjohnlev 	int i, err;
11175084Sjohnlev 
11185084Sjohnlev 	for (i = 0; i < NCPU; i++) {
11195084Sjohnlev 		cpuset_t tcpus;
11205084Sjohnlev 
11215084Sjohnlev 		if (cpu[i] == NULL || CPU_IN_SET(cpu_suspend_lost_set, i))
11225084Sjohnlev 			continue;
11235084Sjohnlev 
11245084Sjohnlev 		err = xen_bind_virq(virq, i, &evtchn);
11255084Sjohnlev 		ASSERT(err == 0);
11265084Sjohnlev 
11275084Sjohnlev 		virqp->mi_evtchns[i] = evtchn;
11285084Sjohnlev 		evtchn_to_irq[evtchn] = virqp->mi_irq;
11295084Sjohnlev 		CPUSET_ONLY(tcpus, i);
11305084Sjohnlev 		bind_evtchn_to_cpuset(evtchn, tcpus);
11315084Sjohnlev 		ec_unmask_evtchn(evtchn);
11325084Sjohnlev 		/*
11335084Sjohnlev 		 * only timer VIRQ is bound to all cpus
11345084Sjohnlev 		 */
11355084Sjohnlev 		if (virq != VIRQ_TIMER)
11365084Sjohnlev 			break;
11375084Sjohnlev 	}
11385084Sjohnlev 
11395084Sjohnlev 	if (virqp->mi_shared)
11405084Sjohnlev 		share_virq(virqp);
11415084Sjohnlev }
11425084Sjohnlev 
11435084Sjohnlev static void
ipi_resume(int ipl)11445084Sjohnlev ipi_resume(int ipl)
11455084Sjohnlev {
11465084Sjohnlev 	mec_info_t *ipip = &ipi_info[ipl];
11475084Sjohnlev 	int i;
11485084Sjohnlev 
11495084Sjohnlev 	for (i = 0; i < NCPU; i++) {
11505084Sjohnlev 		cpuset_t tcpus;
11515084Sjohnlev 		int evtchn;
11525084Sjohnlev 
11535084Sjohnlev 		if (cpu[i] == NULL || CPU_IN_SET(cpu_suspend_lost_set, i))
11545084Sjohnlev 			continue;
11555084Sjohnlev 
11565084Sjohnlev 		evtchn = xen_bind_ipi(i);
11575084Sjohnlev 		ipip->mi_evtchns[i] = evtchn;
11585084Sjohnlev 		evtchn_to_irq[evtchn] = ipip->mi_irq;
11595084Sjohnlev 		CPUSET_ONLY(tcpus, i);
11605084Sjohnlev 		bind_evtchn_to_cpuset(evtchn, tcpus);
11615084Sjohnlev 		ec_unmask_evtchn(evtchn);
11625084Sjohnlev 	}
11635084Sjohnlev }
11645084Sjohnlev 
11655084Sjohnlev void
ec_resume(void)11665084Sjohnlev ec_resume(void)
11675084Sjohnlev {
11685084Sjohnlev 	int i;
11695084Sjohnlev 
11705084Sjohnlev 	/* New event-channel space is not 'live' yet. */
11715084Sjohnlev 	for (i = 0; i < NR_EVENT_CHANNELS; i++)
11725084Sjohnlev 		(void) ec_mask_evtchn(i);
11735084Sjohnlev 
11745084Sjohnlev 	for (i = 0; i < MAXIPL; i++) {
11755084Sjohnlev 		if (ipi_info[i].mi_irq == INVALID_IRQ)
11765084Sjohnlev 			continue;
11775084Sjohnlev 		ipi_resume(i);
11785084Sjohnlev 	}
11795084Sjohnlev 
11805084Sjohnlev 	for (i = 0; i < NR_VIRQS; i++) {
11815084Sjohnlev 		if (virq_info[i].mi_irq == INVALID_IRQ)
11825084Sjohnlev 			continue;
11835084Sjohnlev 		virq_resume(i);
11845084Sjohnlev 	}
11855084Sjohnlev }
11865084Sjohnlev 
118710175SStuart.Maybee@Sun.COM int
ec_init(void)11885084Sjohnlev ec_init(void)
11895084Sjohnlev {
11905084Sjohnlev 	int i;
11915084Sjohnlev 	mutex_init(&ec_lock, NULL, MUTEX_SPIN, (void *)ipltospl(SPL7));
11925084Sjohnlev 
11935084Sjohnlev 	for (i = 0; i < NR_EVENT_CHANNELS; i++) {
11945084Sjohnlev 		CPUSET_ZERO(evtchn_cpus[i]);
11955084Sjohnlev 		evtchn_to_irq[i] = INVALID_IRQ;
11965084Sjohnlev 		(void) ec_mask_evtchn(i);
11975084Sjohnlev 	}
11985084Sjohnlev 
11995084Sjohnlev 	for (i = 0; i < MAXIPL; i++)
12005084Sjohnlev 		ipi_info[i].mi_irq = INVALID_IRQ;
12015084Sjohnlev 
12025084Sjohnlev 	for (i = 0; i < NR_VIRQS; i++)
12035084Sjohnlev 		virq_info[i].mi_irq = INVALID_IRQ;
12045084Sjohnlev 
12055084Sjohnlev 	/*
12065084Sjohnlev 	 * Phys IRQ space is statically bound (1:1 mapping), grab the IRQs
12075084Sjohnlev 	 * now.
12085084Sjohnlev 	 */
12095084Sjohnlev 	for (i = PIRQ_BASE; i < NR_PIRQS; i++) {
12105084Sjohnlev 		irq_info[PIRQ_TO_IRQ(i)].ii_type = IRQT_PIRQ;
12115084Sjohnlev 	}
121210175SStuart.Maybee@Sun.COM 
121310175SStuart.Maybee@Sun.COM 	return (0);
12145084Sjohnlev }
12155084Sjohnlev 
12165084Sjohnlev void
ec_init_debug_irq()12175084Sjohnlev ec_init_debug_irq()
12185084Sjohnlev {
12195084Sjohnlev 	int irq;
12205084Sjohnlev 
12215084Sjohnlev 	irq = ec_bind_virq_to_irq(VIRQ_DEBUG, 0);
12225084Sjohnlev 	(void) add_avintr(NULL, IPL_DEBUG, (avfunc)xen_debug_handler,
12235084Sjohnlev 	    "debug", irq, NULL, NULL, NULL, NULL);
12245084Sjohnlev 
12255084Sjohnlev 	mutex_enter(&ec_lock);
12265084Sjohnlev 	share_virq(&virq_info[irq_info[irq].ii_u.index]);
12275084Sjohnlev 	mutex_exit(&ec_lock);
12285084Sjohnlev 	ec_debug_irq = irq;
12295084Sjohnlev }
12305084Sjohnlev 
12315084Sjohnlev #define	UNBLOCKED_EVENTS(si, ix, cpe, cpu_id) \
12325084Sjohnlev 	((si)->evtchn_pending[ix] & ~(si)->evtchn_mask[ix] & \
12335084Sjohnlev 		(cpe)->evt_affinity[ix])
12345084Sjohnlev 
123510175SStuart.Maybee@Sun.COM 
12365084Sjohnlev /*
12375084Sjohnlev  * This is the entry point for processing events from xen
12385084Sjohnlev  *
12395084Sjohnlev  * (See the commentary associated with the shared_info_st structure
12405084Sjohnlev  * in hypervisor-if.h)
12415084Sjohnlev  *
12425084Sjohnlev  * Since the event channel mechanism doesn't really implement the
12435084Sjohnlev  * concept of priority like hardware interrupt controllers, we simulate
12445084Sjohnlev  * that in software here using the cpu priority field and the pending
12455084Sjohnlev  * interrupts field.  Events/interrupts that are not able to be serviced
12465084Sjohnlev  * now because they are at a lower priority than the current cpu priority
12475084Sjohnlev  * cause a level bit to be recorded in the pending interrupts word.  When
12485084Sjohnlev  * the priority is lowered (either by spl or interrupt exit code) the pending
12495084Sjohnlev  * levels are checked and an upcall is scheduled if there are events/interrupts
12505084Sjohnlev  * that have become deliverable.
12515084Sjohnlev  */
12525084Sjohnlev void
xen_callback_handler(struct regs * rp,trap_trace_rec_t * ttp)12535084Sjohnlev xen_callback_handler(struct regs *rp, trap_trace_rec_t *ttp)
12545084Sjohnlev {
12555084Sjohnlev 	ulong_t pending_sels, pe, selbit;
125610175SStuart.Maybee@Sun.COM 	int i, j, port, pri, curpri, irq, sipri;
125710175SStuart.Maybee@Sun.COM 	uint16_t pending_ints, sip;
12585084Sjohnlev 	struct cpu *cpu = CPU;
12595084Sjohnlev 	volatile shared_info_t *si = HYPERVISOR_shared_info;
12605084Sjohnlev 	volatile vcpu_info_t *vci = cpu->cpu_m.mcpu_vcpu_info;
12615084Sjohnlev 	volatile struct xen_evt_data *cpe = cpu->cpu_m.mcpu_evt_pend;
12625084Sjohnlev 	volatile uint16_t *cpu_ipp = &cpu->cpu_m.mcpu_intr_pending;
126310175SStuart.Maybee@Sun.COM 	extern void dosoftint(struct regs *);
12645084Sjohnlev 
12655084Sjohnlev 	ASSERT(rp->r_trapno == T_AST && rp->r_err == 0);
12665084Sjohnlev 	ASSERT(&si->vcpu_info[cpu->cpu_id] == vci);
12675084Sjohnlev 	ASSERT_STACK_ALIGNED();
12685084Sjohnlev 
12695084Sjohnlev 	vci->evtchn_upcall_pending = 0;
12705084Sjohnlev 
12715084Sjohnlev 	/*
12725084Sjohnlev 	 * To expedite scanning of pending notifications, any 0->1
12735084Sjohnlev 	 * pending transition on an unmasked channel causes a
12745084Sjohnlev 	 * corresponding bit in evtchn_pending_sel to be set.
12755084Sjohnlev 	 * Each bit in the selector covers a 32-bit word in
12765084Sjohnlev 	 * the evtchn_pending[] array.
12775084Sjohnlev 	 */
12785084Sjohnlev 	membar_enter();
12795084Sjohnlev 	do {
12805084Sjohnlev 		pending_sels = vci->evtchn_pending_sel;
12815084Sjohnlev 	} while (atomic_cas_ulong((volatile ulong_t *)&vci->evtchn_pending_sel,
12825084Sjohnlev 	    pending_sels, 0) != pending_sels);
12835084Sjohnlev 
12845084Sjohnlev 	pending_ints = *cpu_ipp;
12855084Sjohnlev 	while ((i = ffs(pending_sels)) != 0) {
12865084Sjohnlev 		i--;
12875084Sjohnlev 		selbit = 1ul << i;
12885084Sjohnlev 		pending_sels &= ~selbit;
12895084Sjohnlev 
12905084Sjohnlev 		membar_enter();
12915084Sjohnlev 		while ((pe = UNBLOCKED_EVENTS(si, i, cpe, cpu->cpu_id)) != 0) {
12925084Sjohnlev 			j = ffs(pe) - 1;
12935084Sjohnlev 			pe &= ~(1ul << j);
12945084Sjohnlev 
12955084Sjohnlev 			port = (i << EVTCHN_SHIFT) + j;
12965084Sjohnlev 
12975084Sjohnlev 			irq = evtchn_to_irq[port];
12985084Sjohnlev 
12995084Sjohnlev 			/*
13005084Sjohnlev 			 * If no irq set, just ignore the event.
13015084Sjohnlev 			 * On e.g. netbsd they call evtchn_device_upcall(port)
13025084Sjohnlev 			 * We require the evtchn driver to install a handler
13035084Sjohnlev 			 * so there will be an irq associated with user mode
13045084Sjohnlev 			 * evtchns.
13055084Sjohnlev 			 */
13065084Sjohnlev 			if (irq == INVALID_IRQ) {
13075084Sjohnlev 				ec_clear_evtchn(port);
13085084Sjohnlev 				continue;
13095084Sjohnlev 			}
13105084Sjohnlev 
13115084Sjohnlev 			/*
13125084Sjohnlev 			 * If there's no handler, it could be a poke, so just
13135084Sjohnlev 			 * accept the event and continue.
13145084Sjohnlev 			 */
13155084Sjohnlev 			if (!irq_info[irq].ii_u2.has_handler) {
13165084Sjohnlev #ifdef TRAPTRACE
13175084Sjohnlev 				ttp->ttr_ipl = 0xff;
13185084Sjohnlev 				if (IRQ_IS_CPUPOKE(irq)) {
13195084Sjohnlev 					ttp->ttr_ipl = XC_CPUPOKE_PIL;
13205084Sjohnlev 					ttp->ttr_marker = TT_INTERRUPT;
13215084Sjohnlev 				}
13225084Sjohnlev 				ttp->ttr_pri = cpu->cpu_pri;
13235084Sjohnlev 				ttp->ttr_spl = cpu->cpu_base_spl;
13245084Sjohnlev 				ttp->ttr_vector = 0xff;
13255084Sjohnlev #endif /* TRAPTRACE */
13265084Sjohnlev 				if (ec_mask_evtchn(port)) {
13275084Sjohnlev 					ec_clear_evtchn(port);
13285084Sjohnlev 					ec_unmask_evtchn(port);
13295084Sjohnlev 					continue;
13305084Sjohnlev 				}
13315084Sjohnlev 			}
13325084Sjohnlev 
13335084Sjohnlev 			pri = irq_info[irq].ii_u2.ipl;
13345084Sjohnlev 
13355084Sjohnlev 			/*
13365084Sjohnlev 			 * If we are the cpu that successfully masks
13375084Sjohnlev 			 * the event, then record it as a pending event
13385084Sjohnlev 			 * for this cpu to service
13395084Sjohnlev 			 */
13405084Sjohnlev 			if (ec_mask_evtchn(port)) {
13415084Sjohnlev 				if (ec_evtchn_pending(port)) {
13425084Sjohnlev 					cpe->pending_sel[pri] |= selbit;
13435084Sjohnlev 					cpe->pending_evts[pri][i] |= (1ul << j);
13445084Sjohnlev 					pending_ints |= 1 << pri;
134510175SStuart.Maybee@Sun.COM 					/*
134610175SStuart.Maybee@Sun.COM 					 * We have recorded a pending interrupt
134710175SStuart.Maybee@Sun.COM 					 * for this cpu.  If it is an edge
134810175SStuart.Maybee@Sun.COM 					 * triggered interrupt then we go ahead
134910175SStuart.Maybee@Sun.COM 					 * and clear the pending and mask bits
135010175SStuart.Maybee@Sun.COM 					 * from the shared info to avoid having
135110175SStuart.Maybee@Sun.COM 					 * the hypervisor see the pending event
135210175SStuart.Maybee@Sun.COM 					 * again and possibly disabling the
135310175SStuart.Maybee@Sun.COM 					 * interrupt.  This should also help
135410175SStuart.Maybee@Sun.COM 					 * keep us from missing an interrupt.
135510175SStuart.Maybee@Sun.COM 					 */
135610175SStuart.Maybee@Sun.COM 					if (ec_is_edge_pirq(irq)) {
135710175SStuart.Maybee@Sun.COM 						ec_clear_evtchn(port);
135810175SStuart.Maybee@Sun.COM 						ec_unmask_evtchn(port);
135910175SStuart.Maybee@Sun.COM 					}
13605084Sjohnlev 				} else {
13615084Sjohnlev 					/*
13625084Sjohnlev 					 * another cpu serviced this event
13635084Sjohnlev 					 * before us, clear the mask.
13645084Sjohnlev 					 */
13655084Sjohnlev 					ec_unmask_evtchn(port);
13665084Sjohnlev 				}
13675084Sjohnlev 			}
13685084Sjohnlev 		}
13695084Sjohnlev 	}
13705084Sjohnlev 	*cpu_ipp = pending_ints;
13715084Sjohnlev 	if (pending_ints == 0)
13725084Sjohnlev 		return;
13735084Sjohnlev 	/*
13745084Sjohnlev 	 * We have gathered all the pending events/interrupts,
13755084Sjohnlev 	 * go service all the ones we can from highest priority to lowest.
13765084Sjohnlev 	 * Note: This loop may not actually complete and service all
13775084Sjohnlev 	 * pending interrupts since one of the interrupt threads may
13785084Sjohnlev 	 * block and the pinned thread runs.  In that case, when we
13795084Sjohnlev 	 * exit the interrupt thread that blocked we will check for
13805084Sjohnlev 	 * any unserviced interrupts and re-post an upcall to process
13815084Sjohnlev 	 * any unserviced pending events.
13825084Sjohnlev 	 */
138310175SStuart.Maybee@Sun.COM restart:
13845084Sjohnlev 	curpri = cpu->cpu_pri;
138510175SStuart.Maybee@Sun.COM 	pri = bsrw_insn(*cpu_ipp);
138610175SStuart.Maybee@Sun.COM 	while (pri > curpri) {
13875084Sjohnlev 		while ((pending_sels = cpe->pending_sel[pri]) != 0) {
13885084Sjohnlev 			i = ffs(pending_sels) - 1;
13895084Sjohnlev 			while ((pe = cpe->pending_evts[pri][i]) != 0) {
13905084Sjohnlev 				j = ffs(pe) - 1;
139110175SStuart.Maybee@Sun.COM 				port = (i << EVTCHN_SHIFT) + j;
13925084Sjohnlev 				pe &= ~(1ul << j);
13935084Sjohnlev 				cpe->pending_evts[pri][i] = pe;
13945084Sjohnlev 				if (pe == 0) {
13955084Sjohnlev 					/*
13965084Sjohnlev 					 * Must reload pending selector bits
13975084Sjohnlev 					 * here as they could have changed on
13985084Sjohnlev 					 * a previous trip around the inner loop
13995084Sjohnlev 					 * while we were interrupt enabled
14005084Sjohnlev 					 * in a interrupt service routine.
14015084Sjohnlev 					 */
14025084Sjohnlev 					pending_sels = cpe->pending_sel[pri];
14035084Sjohnlev 					pending_sels &= ~(1ul << i);
14045084Sjohnlev 					cpe->pending_sel[pri] = pending_sels;
14055084Sjohnlev 					if (pending_sels == 0)
14065084Sjohnlev 						*cpu_ipp &= ~(1 << pri);
14075084Sjohnlev 				}
14085084Sjohnlev 				irq = evtchn_to_irq[port];
14095084Sjohnlev 				if (irq == INVALID_IRQ) {
14105084Sjohnlev 					/*
14115084Sjohnlev 					 * No longer a handler for this event
14125084Sjohnlev 					 * channel.  Clear the event and
14135084Sjohnlev 					 * ignore it, unmask the event.
14145084Sjohnlev 					 */
14155084Sjohnlev 					ec_clear_evtchn(port);
14165084Sjohnlev 					ec_unmask_evtchn(port);
14175084Sjohnlev 					continue;
14185084Sjohnlev 				}
14195084Sjohnlev 				if (irq == ec_dev_irq) {
142010175SStuart.Maybee@Sun.COM 					ASSERT(cpu->cpu_m.mcpu_ec_mbox == 0);
142110175SStuart.Maybee@Sun.COM 					cpu->cpu_m.mcpu_ec_mbox = port;
14225084Sjohnlev 				}
14235084Sjohnlev 				/*
14245084Sjohnlev 				 * Set up the regs struct to
14255084Sjohnlev 				 * look like a normal hardware int
14265084Sjohnlev 				 * and do normal interrupt handling.
14275084Sjohnlev 				 */
14285084Sjohnlev 				rp->r_trapno = irq;
14295084Sjohnlev 				do_interrupt(rp, ttp);
14305084Sjohnlev 				/*
14315084Sjohnlev 				 * Check for cpu priority change
14325084Sjohnlev 				 * Can happen if int thread blocks
14335084Sjohnlev 				 */
143410175SStuart.Maybee@Sun.COM 				if (cpu->cpu_pri != curpri)
143510175SStuart.Maybee@Sun.COM 					goto restart;
14365084Sjohnlev 			}
14375084Sjohnlev 		}
143810175SStuart.Maybee@Sun.COM 		/*
143910175SStuart.Maybee@Sun.COM 		 * Dispatch any soft interrupts that are
144010175SStuart.Maybee@Sun.COM 		 * higher priority than any hard ones remaining.
144110175SStuart.Maybee@Sun.COM 		 */
144210175SStuart.Maybee@Sun.COM 		pri = bsrw_insn(*cpu_ipp);
144310175SStuart.Maybee@Sun.COM 		sip = (uint16_t)cpu->cpu_softinfo.st_pending;
144410175SStuart.Maybee@Sun.COM 		if (sip != 0) {
144510175SStuart.Maybee@Sun.COM 			sipri = bsrw_insn(sip);
1446*10453SStuart.Maybee@Sun.COM 			if (sipri > pri && sipri > cpu->cpu_pri) {
144710175SStuart.Maybee@Sun.COM 				dosoftint(rp);
1448*10453SStuart.Maybee@Sun.COM 				/*
1449*10453SStuart.Maybee@Sun.COM 				 * Check for cpu priority change
1450*10453SStuart.Maybee@Sun.COM 				 * Can happen if softint thread blocks
1451*10453SStuart.Maybee@Sun.COM 				 */
1452*10453SStuart.Maybee@Sun.COM 				if (cpu->cpu_pri != curpri)
1453*10453SStuart.Maybee@Sun.COM 					goto restart;
1454*10453SStuart.Maybee@Sun.COM 			}
145510175SStuart.Maybee@Sun.COM 		}
14565084Sjohnlev 	}
145710175SStuart.Maybee@Sun.COM 	/*
145810175SStuart.Maybee@Sun.COM 	 * Deliver any pending soft interrupts.
145910175SStuart.Maybee@Sun.COM 	 */
146010175SStuart.Maybee@Sun.COM 	if (cpu->cpu_softinfo.st_pending)
146110175SStuart.Maybee@Sun.COM 		dosoftint(rp);
14625084Sjohnlev }
14635084Sjohnlev 
146410175SStuart.Maybee@Sun.COM 
14655084Sjohnlev void
ec_unmask_evtchn(unsigned int ev)14665084Sjohnlev ec_unmask_evtchn(unsigned int ev)
14675084Sjohnlev {
146810175SStuart.Maybee@Sun.COM 	uint_t evi, evb;
14695084Sjohnlev 	volatile shared_info_t *si = HYPERVISOR_shared_info;
14705084Sjohnlev 	volatile vcpu_info_t *vci = CPU->cpu_m.mcpu_vcpu_info;
14715084Sjohnlev 	volatile ulong_t *ulp;
14725084Sjohnlev 
14735084Sjohnlev 	ASSERT(!interrupts_enabled());
14745084Sjohnlev 	/*
14755084Sjohnlev 	 * Check if we need to take slow path
14765084Sjohnlev 	 */
14775084Sjohnlev 	if (!CPU_IN_SET(evtchn_cpus[ev], CPU->cpu_id)) {
14785084Sjohnlev 		xen_evtchn_unmask(ev);
14795084Sjohnlev 		return;
14805084Sjohnlev 	}
14815084Sjohnlev 	evi = ev >> EVTCHN_SHIFT;
148210175SStuart.Maybee@Sun.COM 	evb = ev & ((1ul << EVTCHN_SHIFT) - 1);
14835084Sjohnlev 	ulp = (volatile ulong_t *)&si->evtchn_mask[evi];
148410175SStuart.Maybee@Sun.COM 	atomic_and_ulong(ulp, ~(1ul << evb));
14855084Sjohnlev 	/*
14865084Sjohnlev 	 * The following is basically the equivalent of
14875084Sjohnlev 	 * 'hw_resend_irq'. Just like a real IO-APIC we 'lose the
14885084Sjohnlev 	 * interrupt edge' if the channel is masked.
14895084Sjohnlev 	 * XXPV - slight race if upcall was about to be set, we may get
14905084Sjohnlev 	 * an extra upcall.
14915084Sjohnlev 	 */
14925084Sjohnlev 	membar_enter();
149310175SStuart.Maybee@Sun.COM 	if (si->evtchn_pending[evi] & (1ul << evb)) {
14945084Sjohnlev 		membar_consumer();
14955084Sjohnlev 		ulp = (volatile ulong_t *)&vci->evtchn_pending_sel;
14965084Sjohnlev 		if (!(*ulp & (1ul << evi))) {
14975084Sjohnlev 			atomic_or_ulong(ulp, (1ul << evi));
14985084Sjohnlev 		}
14995084Sjohnlev 		vci->evtchn_upcall_pending = 1;
15005084Sjohnlev 	}
15015084Sjohnlev }
15025084Sjohnlev 
15035084Sjohnlev /*
15045084Sjohnlev  * Set a bit in an evtchan mask word, return true if we are the cpu that
15055084Sjohnlev  * set the bit.
15065084Sjohnlev  */
15075084Sjohnlev int
ec_mask_evtchn(unsigned int ev)15085084Sjohnlev ec_mask_evtchn(unsigned int ev)
15095084Sjohnlev {
15105084Sjohnlev 	uint_t evi, evb;
15115084Sjohnlev 	ulong_t new, old, bit;
15125084Sjohnlev 	volatile shared_info_t *si = HYPERVISOR_shared_info;
15135084Sjohnlev 	volatile ulong_t *maskp;
15145084Sjohnlev 	int masked;
15155084Sjohnlev 
15165084Sjohnlev 	kpreempt_disable();
15175084Sjohnlev 	evi = ev >> EVTCHN_SHIFT;
15185084Sjohnlev 	evb = ev & ((1ul << EVTCHN_SHIFT) - 1);
15195084Sjohnlev 	bit = 1ul << evb;
15205084Sjohnlev 	maskp = (volatile ulong_t *)&si->evtchn_mask[evi];
15215084Sjohnlev 	do {
15225084Sjohnlev 		old = si->evtchn_mask[evi];
15235084Sjohnlev 		new = old | bit;
15245084Sjohnlev 	} while (atomic_cas_ulong(maskp, old, new) != old);
15255084Sjohnlev 	masked = (old & bit) == 0;
15265084Sjohnlev 	if (masked) {
15275084Sjohnlev 		evtchn_owner[ev] = CPU->cpu_id;
15285084Sjohnlev #ifdef DEBUG
15295084Sjohnlev 		evtchn_owner_thread[ev] = curthread;
15305084Sjohnlev #endif
15315084Sjohnlev 	}
15325084Sjohnlev 	kpreempt_enable();
15335084Sjohnlev 	return (masked);
15345084Sjohnlev }
15355084Sjohnlev 
15365084Sjohnlev void
ec_clear_evtchn(unsigned int ev)15375084Sjohnlev ec_clear_evtchn(unsigned int ev)
15385084Sjohnlev {
15395084Sjohnlev 	uint_t evi;
15405084Sjohnlev 	shared_info_t *si = HYPERVISOR_shared_info;
15415084Sjohnlev 	volatile ulong_t *pendp;
15425084Sjohnlev 
15435084Sjohnlev 	evi = ev >> EVTCHN_SHIFT;
15445084Sjohnlev 	ev &= (1ul << EVTCHN_SHIFT) - 1;
15455084Sjohnlev 	pendp = (volatile ulong_t *)&si->evtchn_pending[evi];
15465084Sjohnlev 	atomic_and_ulong(pendp, ~(1ul << ev));
15475084Sjohnlev }
15485084Sjohnlev 
15495084Sjohnlev void
ec_notify_via_evtchn(unsigned int port)15505084Sjohnlev ec_notify_via_evtchn(unsigned int port)
15515084Sjohnlev {
15525084Sjohnlev 	evtchn_send_t send;
15535084Sjohnlev 
15545084Sjohnlev 	ASSERT(port != INVALID_EVTCHN);
15555084Sjohnlev 
15565084Sjohnlev 	send.port = port;
15575084Sjohnlev 	(void) HYPERVISOR_event_channel_op(EVTCHNOP_send, &send);
15585084Sjohnlev }
15595084Sjohnlev 
15605084Sjohnlev int
ec_block_irq(int irq)15615084Sjohnlev ec_block_irq(int irq)
15625084Sjohnlev {
15635084Sjohnlev 	irq_info_t *irqp = &irq_info[irq];
15645084Sjohnlev 	int evtchn;
15655084Sjohnlev 
15665084Sjohnlev 
15675084Sjohnlev 	evtchn = irq_evtchn(irqp);
15685084Sjohnlev 	(void) ec_mask_evtchn(evtchn);
15695084Sjohnlev 	return (evtchn_owner[evtchn]);
15705084Sjohnlev }
15715084Sjohnlev 
15725084Sjohnlev /*
15735084Sjohnlev  * Make a event that is pending for delivery on the current cpu  "go away"
15745084Sjohnlev  * without servicing the interrupt.
15755084Sjohnlev  */
15765084Sjohnlev void
ec_unpend_irq(int irq)15775084Sjohnlev ec_unpend_irq(int irq)
15785084Sjohnlev {
15795084Sjohnlev 	irq_info_t *irqp = &irq_info[irq];
15805084Sjohnlev 	int pri = irqp->ii_u2.ipl;
15815084Sjohnlev 	ulong_t flags;
15825084Sjohnlev 	uint_t evtchn, evi, bit;
15835084Sjohnlev 	unsigned long pe, pending_sels;
15845084Sjohnlev 	struct xen_evt_data *cpe;
15855084Sjohnlev 
15865084Sjohnlev 	/*
15875084Sjohnlev 	 * The evtchn must be masked
15885084Sjohnlev 	 */
15895084Sjohnlev 	evtchn = irq_evtchn(irqp);
15905084Sjohnlev 	ASSERT(EVTCHN_MASKED(evtchn));
15915084Sjohnlev 	evi = evtchn >> EVTCHN_SHIFT;
15925084Sjohnlev 	bit = evtchn & (1ul << EVTCHN_SHIFT) - 1;
15935084Sjohnlev 	flags = intr_clear();
15945084Sjohnlev 	cpe = CPU->cpu_m.mcpu_evt_pend;
15955084Sjohnlev 	pe = cpe->pending_evts[pri][evi] & ~(1ul << bit);
15965084Sjohnlev 	cpe->pending_evts[pri][evi] = pe;
15975084Sjohnlev 	if (pe == 0) {
15985084Sjohnlev 		pending_sels = cpe->pending_sel[pri];
15995084Sjohnlev 		pending_sels &= ~(1ul << evi);
16005084Sjohnlev 		cpe->pending_sel[pri] = pending_sels;
16015084Sjohnlev 		if (pending_sels == 0)
16025084Sjohnlev 			CPU->cpu_m.mcpu_intr_pending &= ~(1 << pri);
16035084Sjohnlev 	}
16045084Sjohnlev 	intr_restore(flags);
16055084Sjohnlev }
1606