xref: /onnv-gate/usr/src/uts/i86xpv/io/psm/xpv_psm.c (revision 12004:93f274d4a367)
15084Sjohnlev /*
25084Sjohnlev  * CDDL HEADER START
35084Sjohnlev  *
45084Sjohnlev  * The contents of this file are subject to the terms of the
55084Sjohnlev  * Common Development and Distribution License (the "License").
65084Sjohnlev  * You may not use this file except in compliance with the License.
75084Sjohnlev  *
85084Sjohnlev  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
95084Sjohnlev  * or http://www.opensolaris.org/os/licensing.
105084Sjohnlev  * See the License for the specific language governing permissions
115084Sjohnlev  * and limitations under the License.
125084Sjohnlev  *
135084Sjohnlev  * When distributing Covered Code, include this CDDL HEADER in each
145084Sjohnlev  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
155084Sjohnlev  * If applicable, add the following below this CDDL HEADER, with the
165084Sjohnlev  * fields enclosed by brackets "[]" replaced with your own identifying
175084Sjohnlev  * information: Portions Copyright [yyyy] [name of copyright owner]
185084Sjohnlev  *
195084Sjohnlev  * CDDL HEADER END
205084Sjohnlev  */
215084Sjohnlev 
225084Sjohnlev /*
238925SEvan.Yan@Sun.COM  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
245084Sjohnlev  * Use is subject to license terms.
255084Sjohnlev  */
265084Sjohnlev 
27*12004Sjiang.liu@intel.com #define	PSMI_1_7
285084Sjohnlev 
295084Sjohnlev #include <sys/mutex.h>
305084Sjohnlev #include <sys/types.h>
315084Sjohnlev #include <sys/time.h>
325084Sjohnlev #include <sys/clock.h>
335084Sjohnlev #include <sys/machlock.h>
345084Sjohnlev #include <sys/smp_impldefs.h>
355084Sjohnlev #include <sys/uadmin.h>
365084Sjohnlev #include <sys/promif.h>
375084Sjohnlev #include <sys/psm.h>
385084Sjohnlev #include <sys/psm_common.h>
395084Sjohnlev #include <sys/atomic.h>
405084Sjohnlev #include <sys/apic.h>
415084Sjohnlev #include <sys/archsystm.h>
425084Sjohnlev #include <sys/mach_intr.h>
435084Sjohnlev #include <sys/hypervisor.h>
445084Sjohnlev #include <sys/evtchn_impl.h>
455084Sjohnlev #include <sys/modctl.h>
465084Sjohnlev #include <sys/trap.h>
475084Sjohnlev #include <sys/panic.h>
4810175SStuart.Maybee@Sun.COM #include <sys/sysmacros.h>
4910175SStuart.Maybee@Sun.COM #include <sys/pci_intr_lib.h>
5010175SStuart.Maybee@Sun.COM #include <vm/hat_i86.h>
515084Sjohnlev 
525084Sjohnlev #include <xen/public/vcpu.h>
535084Sjohnlev #include <xen/public/physdev.h>
545084Sjohnlev 
555084Sjohnlev 
565084Sjohnlev /*
575084Sjohnlev  * Global Data
585084Sjohnlev  */
595084Sjohnlev 
605084Sjohnlev int xen_psm_verbose = 0;
615084Sjohnlev 
627282Smishra /* As of now we don't support x2apic in xVM */
635084Sjohnlev volatile uint32_t *apicadr = NULL;	/* dummy, so common code will link */
645084Sjohnlev int apic_error = 0;
655084Sjohnlev int apic_verbose = 0;
665084Sjohnlev cpuset_t apic_cpumask;
675084Sjohnlev int apic_forceload = 0;
685084Sjohnlev uchar_t apic_vectortoipl[APIC_AVAIL_VECTOR / APIC_VECTOR_PER_IPL] = {
695084Sjohnlev 	3, 4, 5, 5, 6, 6, 9, 10, 11, 12, 13, 14, 15, 15
705084Sjohnlev };
715084Sjohnlev uchar_t apic_ipltopri[MAXIPL + 1];
725084Sjohnlev uchar_t apic_ipls[APIC_AVAIL_VECTOR];
735084Sjohnlev uint_t apic_picinit_called;
745084Sjohnlev apic_cpus_info_t *apic_cpus;
755084Sjohnlev int xen_psm_intr_policy = INTR_ROUND_ROBIN_WITH_AFFINITY;
765084Sjohnlev /* use to make sure only one cpu handles the nmi */
775084Sjohnlev static lock_t xen_psm_nmi_lock;
785084Sjohnlev int xen_psm_kmdb_on_nmi = 0;		/* 0 - no, 1 - yes enter kmdb */
795084Sjohnlev int xen_psm_panic_on_nmi = 0;
805084Sjohnlev int xen_psm_num_nmis = 0;
815084Sjohnlev 
825084Sjohnlev cpuset_t xen_psm_cpus_online;	/* online cpus */
835084Sjohnlev int xen_psm_ncpus = 1;		/* cpu count */
845084Sjohnlev int xen_psm_next_bind_cpu;	/* next cpu to bind an interrupt to */
855084Sjohnlev 
8610323SStuart.Maybee@Sun.COM int xen_support_msi = 0;
875084Sjohnlev 
885084Sjohnlev static int xen_clock_irq = INVALID_IRQ;
895084Sjohnlev 
905084Sjohnlev /* flag definitions for xen_psm_verbose */
915084Sjohnlev #define	XEN_PSM_VERBOSE_IRQ_FLAG		0x00000001
925084Sjohnlev #define	XEN_PSM_VERBOSE_POWEROFF_FLAG		0x00000002
935084Sjohnlev #define	XEN_PSM_VERBOSE_POWEROFF_PAUSE_FLAG	0x00000004
945084Sjohnlev 
955084Sjohnlev #define	XEN_PSM_VERBOSE_IRQ(fmt) \
965084Sjohnlev 	if (xen_psm_verbose & XEN_PSM_VERBOSE_IRQ_FLAG) \
975084Sjohnlev 		cmn_err fmt;
985084Sjohnlev 
995084Sjohnlev #define	XEN_PSM_VERBOSE_POWEROFF(fmt) \
1005084Sjohnlev 	if (xen_psm_verbose & XEN_PSM_VERBOSE_POWEROFF_FLAG) \
1015084Sjohnlev 		prom_printf fmt;
1025084Sjohnlev 
1035084Sjohnlev /*
1045084Sjohnlev  * Dummy apic array to point common routines at that want to do some apic
1055084Sjohnlev  * manipulation.  Xen doesn't allow guest apic access so we point at these
1065084Sjohnlev  * memory locations to fake out those who want to do apic fiddling.
1075084Sjohnlev  */
1085084Sjohnlev uint32_t xen_psm_dummy_apic[APIC_IRR_REG + 1];
1095084Sjohnlev 
1105084Sjohnlev static struct psm_info xen_psm_info;
1115084Sjohnlev static void xen_psm_setspl(int);
1125084Sjohnlev 
11310175SStuart.Maybee@Sun.COM int
11410175SStuart.Maybee@Sun.COM apic_alloc_msi_vectors(dev_info_t *dip, int inum, int count, int pri,
11510175SStuart.Maybee@Sun.COM     int behavior);
11610175SStuart.Maybee@Sun.COM int
11710175SStuart.Maybee@Sun.COM apic_alloc_msix_vectors(dev_info_t *dip, int inum, int count, int pri,
11810175SStuart.Maybee@Sun.COM     int behavior);
1195084Sjohnlev 
1205084Sjohnlev /*
1215084Sjohnlev  * Local support routines
1225084Sjohnlev  */
1235084Sjohnlev 
1245084Sjohnlev /*
1255084Sjohnlev  * Select vcpu to bind xen virtual device interrupt to.
1265084Sjohnlev  */
1275084Sjohnlev /*ARGSUSED*/
1285084Sjohnlev int
xen_psm_bind_intr(int irq)1295084Sjohnlev xen_psm_bind_intr(int irq)
1305084Sjohnlev {
13110175SStuart.Maybee@Sun.COM 	int bind_cpu;
1325084Sjohnlev 	apic_irq_t *irqptr;
1335084Sjohnlev 
13410175SStuart.Maybee@Sun.COM 	bind_cpu = IRQ_UNBOUND;
1355084Sjohnlev 	if (xen_psm_intr_policy == INTR_LOWEST_PRIORITY)
13610175SStuart.Maybee@Sun.COM 		return (bind_cpu);
1375084Sjohnlev 	if (irq <= APIC_MAX_VECTOR)
1385084Sjohnlev 		irqptr = apic_irq_table[irq];
1395084Sjohnlev 	else
1405084Sjohnlev 		irqptr = NULL;
14110175SStuart.Maybee@Sun.COM 	if (irqptr && (irqptr->airq_cpu != IRQ_UNBOUND))
14210175SStuart.Maybee@Sun.COM 		bind_cpu = irqptr->airq_cpu & ~IRQ_USER_BOUND;
14310175SStuart.Maybee@Sun.COM 	if (bind_cpu != IRQ_UNBOUND) {
14410175SStuart.Maybee@Sun.COM 		if (!CPU_IN_SET(xen_psm_cpus_online, bind_cpu))
1455084Sjohnlev 			bind_cpu = 0;
1465084Sjohnlev 		goto done;
1475084Sjohnlev 	}
1485084Sjohnlev 	if (xen_psm_intr_policy == INTR_ROUND_ROBIN_WITH_AFFINITY) {
1495084Sjohnlev 		do {
1505084Sjohnlev 			bind_cpu = xen_psm_next_bind_cpu++;
1515084Sjohnlev 			if (xen_psm_next_bind_cpu >= xen_psm_ncpus)
1525084Sjohnlev 				xen_psm_next_bind_cpu = 0;
1535084Sjohnlev 		} while (!CPU_IN_SET(xen_psm_cpus_online, bind_cpu));
1545084Sjohnlev 	} else {
1555084Sjohnlev 		bind_cpu = 0;
1565084Sjohnlev 	}
1575084Sjohnlev done:
1585084Sjohnlev 	return (bind_cpu);
1595084Sjohnlev }
1605084Sjohnlev 
1615084Sjohnlev /*
1625084Sjohnlev  * Autoconfiguration Routines
1635084Sjohnlev  */
1645084Sjohnlev 
1655084Sjohnlev static int
xen_psm_probe(void)1665084Sjohnlev xen_psm_probe(void)
1675084Sjohnlev {
1685084Sjohnlev 	int ret = PSM_SUCCESS;
1695084Sjohnlev 
1705084Sjohnlev 	if (DOMAIN_IS_INITDOMAIN(xen_info))
1715084Sjohnlev 		ret = apic_probe_common(xen_psm_info.p_mach_idstring);
1725084Sjohnlev 	return (ret);
1735084Sjohnlev }
1745084Sjohnlev 
1755084Sjohnlev static void
xen_psm_softinit(void)1765084Sjohnlev xen_psm_softinit(void)
1775084Sjohnlev {
1785084Sjohnlev 	/* LINTED logical expression always true: op "||" */
1795084Sjohnlev 	ASSERT((1 << EVTCHN_SHIFT) == NBBY * sizeof (ulong_t));
1805529Ssmaybe 	CPUSET_ATOMIC_ADD(xen_psm_cpus_online, 0);
1815084Sjohnlev 	if (DOMAIN_IS_INITDOMAIN(xen_info)) {
1825084Sjohnlev 		apic_init_common();
1835084Sjohnlev 	}
1845084Sjohnlev }
1855084Sjohnlev 
1865084Sjohnlev #define	XEN_NSEC_PER_TICK	10 /* XXX - assume we have a 100 Mhz clock */
1875084Sjohnlev 
1885084Sjohnlev /*ARGSUSED*/
1895084Sjohnlev static int
xen_psm_clkinit(int hertz)1905084Sjohnlev xen_psm_clkinit(int hertz)
1915084Sjohnlev {
1925084Sjohnlev 	extern enum tod_fault_type tod_fault(enum tod_fault_type, int);
1935084Sjohnlev 	extern int dosynctodr;
1945084Sjohnlev 
1955084Sjohnlev 	/*
1965084Sjohnlev 	 * domU cannot set the TOD hardware, fault the TOD clock now to
1975084Sjohnlev 	 * indicate that and turn off attempts to sync TOD hardware
1985084Sjohnlev 	 * with the hires timer.
1995084Sjohnlev 	 */
2005084Sjohnlev 	if (!DOMAIN_IS_INITDOMAIN(xen_info)) {
2015084Sjohnlev 		mutex_enter(&tod_lock);
2025084Sjohnlev 		(void) tod_fault(TOD_RDONLY, 0);
2035084Sjohnlev 		dosynctodr = 0;
2045084Sjohnlev 		mutex_exit(&tod_lock);
2055084Sjohnlev 	}
2065084Sjohnlev 	/*
2075084Sjohnlev 	 * The hypervisor provides a timer based on the local APIC timer.
2085084Sjohnlev 	 * The interface supports requests of nanosecond resolution.
2095084Sjohnlev 	 * A common frequency of the apic clock is 100 Mhz which
2105084Sjohnlev 	 * gives a resolution of 10 nsec per tick.  What we would really like
2115084Sjohnlev 	 * is a way to get the ns per tick value from xen.
2125084Sjohnlev 	 * XXPV - This is an assumption that needs checking and may change
2135084Sjohnlev 	 */
2145084Sjohnlev 	return (XEN_NSEC_PER_TICK);
2155084Sjohnlev }
2165084Sjohnlev 
2175084Sjohnlev static void
xen_psm_hrtimeinit(void)2185084Sjohnlev xen_psm_hrtimeinit(void)
2195084Sjohnlev {
2205084Sjohnlev 	extern int gethrtime_hires;
2215084Sjohnlev 	gethrtime_hires = 1;
2225084Sjohnlev }
2235084Sjohnlev 
2245084Sjohnlev /* xen_psm NMI handler */
2255084Sjohnlev /*ARGSUSED*/
2265084Sjohnlev static void
xen_psm_nmi_intr(caddr_t arg,struct regs * rp)2275084Sjohnlev xen_psm_nmi_intr(caddr_t arg, struct regs *rp)
2285084Sjohnlev {
2295084Sjohnlev 	xen_psm_num_nmis++;
2305084Sjohnlev 
2315084Sjohnlev 	if (!lock_try(&xen_psm_nmi_lock))
2325084Sjohnlev 		return;
2335084Sjohnlev 
2345084Sjohnlev 	if (xen_psm_kmdb_on_nmi && psm_debugger()) {
2355084Sjohnlev 		debug_enter("NMI received: entering kmdb\n");
2365084Sjohnlev 	} else if (xen_psm_panic_on_nmi) {
2375084Sjohnlev 		/* Keep panic from entering kmdb. */
2385084Sjohnlev 		nopanicdebug = 1;
2395084Sjohnlev 		panic("NMI received\n");
2405084Sjohnlev 	} else {
2415084Sjohnlev 		/*
2425084Sjohnlev 		 * prom_printf is the best shot we have of something which is
2435084Sjohnlev 		 * problem free from high level/NMI type of interrupts
2445084Sjohnlev 		 */
2455084Sjohnlev 		prom_printf("NMI received\n");
2465084Sjohnlev 	}
2475084Sjohnlev 
2485084Sjohnlev 	lock_clear(&xen_psm_nmi_lock);
2495084Sjohnlev }
2505084Sjohnlev 
2515084Sjohnlev static void
xen_psm_picinit()2525084Sjohnlev xen_psm_picinit()
2535084Sjohnlev {
2545084Sjohnlev 	int cpu, irqno;
2555084Sjohnlev 	cpuset_t cpus;
2565084Sjohnlev 
2575084Sjohnlev 	if (DOMAIN_IS_INITDOMAIN(xen_info)) {
2585084Sjohnlev 		/* set a flag so we know we have run xen_psm_picinit() */
2595084Sjohnlev 		apic_picinit_called = 1;
2605084Sjohnlev 		LOCK_INIT_CLEAR(&apic_ioapic_lock);
2615084Sjohnlev 
2625084Sjohnlev 		/* XXPV - do we need to do this? */
2635084Sjohnlev 		picsetup();	 /* initialise the 8259 */
2645084Sjohnlev 
2655084Sjohnlev 		/* enable apic mode if imcr present */
2665084Sjohnlev 		/* XXPV - do we need to do this either? */
2675084Sjohnlev 		if (apic_imcrp) {
2685084Sjohnlev 			outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT);
2695084Sjohnlev 			outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_APIC);
2705084Sjohnlev 		}
2715084Sjohnlev 
2725084Sjohnlev 		ioapic_init_intr(IOAPIC_NOMASK);
2735084Sjohnlev 		/*
2745084Sjohnlev 		 * We never called xen_psm_addspl() when the SCI
2755084Sjohnlev 		 * interrupt was added because that happened before the
2765084Sjohnlev 		 * PSM module was loaded.  Fix that up here by doing
2775084Sjohnlev 		 * any missed operations (e.g. bind to CPU)
2785084Sjohnlev 		 */
2795084Sjohnlev 		if ((irqno = apic_sci_vect) > 0) {
2805084Sjohnlev 			if ((cpu = xen_psm_bind_intr(irqno)) == IRQ_UNBOUND) {
2815084Sjohnlev 				CPUSET_ZERO(cpus);
2825084Sjohnlev 				CPUSET_OR(cpus, xen_psm_cpus_online);
2835084Sjohnlev 			} else {
2845084Sjohnlev 				CPUSET_ONLY(cpus, cpu & ~IRQ_USER_BOUND);
2855084Sjohnlev 			}
2865084Sjohnlev 			ec_set_irq_affinity(irqno, cpus);
2875529Ssmaybe 			apic_irq_table[irqno]->airq_temp_cpu =
2885529Ssmaybe 			    (uchar_t)(cpu & ~IRQ_USER_BOUND);
2895084Sjohnlev 			ec_enable_irq(irqno);
2905084Sjohnlev 		}
2915084Sjohnlev 	}
2925084Sjohnlev 
2935084Sjohnlev 	/* add nmi handler - least priority nmi handler */
2945084Sjohnlev 	LOCK_INIT_CLEAR(&xen_psm_nmi_lock);
2955084Sjohnlev 
2965084Sjohnlev 	if (!psm_add_nmintr(0, (avfunc) xen_psm_nmi_intr,
2975529Ssmaybe 	    "xVM_psm NMI handler", (caddr_t)NULL))
2985529Ssmaybe 		cmn_err(CE_WARN, "xVM_psm: Unable to add nmi handler");
2995084Sjohnlev }
3005084Sjohnlev 
3015084Sjohnlev 
3025084Sjohnlev /*
3035084Sjohnlev  * generates an interprocessor interrupt to another CPU
3045084Sjohnlev  */
3055084Sjohnlev static void
xen_psm_send_ipi(int cpun,int ipl)3065084Sjohnlev xen_psm_send_ipi(int cpun, int ipl)
3075084Sjohnlev {
3085084Sjohnlev 	ulong_t flag = intr_clear();
3095084Sjohnlev 
3105084Sjohnlev 	ec_send_ipi(ipl, cpun);
3115084Sjohnlev 	intr_restore(flag);
3125084Sjohnlev }
3135084Sjohnlev 
3145084Sjohnlev /*ARGSUSED*/
3155084Sjohnlev static int
xen_psm_addspl(int irqno,int ipl,int min_ipl,int max_ipl)3165084Sjohnlev xen_psm_addspl(int irqno, int ipl, int min_ipl, int max_ipl)
3175084Sjohnlev {
3185084Sjohnlev 	int cpu, ret;
3195084Sjohnlev 	cpuset_t cpus;
3205084Sjohnlev 
3215084Sjohnlev 	/*
3225084Sjohnlev 	 * We are called at splhi() so we can't call anything that might end
3235084Sjohnlev 	 * up trying to context switch.
3245084Sjohnlev 	 */
3255084Sjohnlev 	if (irqno >= PIRQ_BASE && irqno < NR_PIRQS &&
3265084Sjohnlev 	    DOMAIN_IS_INITDOMAIN(xen_info)) {
3275084Sjohnlev 		/*
3285084Sjohnlev 		 * Priority/affinity/enable for PIRQ's is set in ec_setup_pirq()
3295084Sjohnlev 		 */
3305084Sjohnlev 		ret = apic_addspl_common(irqno, ipl, min_ipl, max_ipl);
3315084Sjohnlev 	} else {
3325084Sjohnlev 		/*
3335084Sjohnlev 		 * Set priority/affinity/enable for non PIRQs
3345084Sjohnlev 		 */
3355084Sjohnlev 		ret = ec_set_irq_priority(irqno, ipl);
3365084Sjohnlev 		ASSERT(ret == 0);
3375084Sjohnlev 		if ((cpu = xen_psm_bind_intr(irqno)) == IRQ_UNBOUND) {
3385084Sjohnlev 			CPUSET_ZERO(cpus);
3395084Sjohnlev 			CPUSET_OR(cpus, xen_psm_cpus_online);
3405084Sjohnlev 		} else {
3415084Sjohnlev 			CPUSET_ONLY(cpus, cpu & ~IRQ_USER_BOUND);
3425084Sjohnlev 		}
3435084Sjohnlev 		ec_set_irq_affinity(irqno, cpus);
3445084Sjohnlev 		ec_enable_irq(irqno);
3455084Sjohnlev 	}
3465084Sjohnlev 	return (ret);
3475084Sjohnlev }
3485084Sjohnlev 
3495084Sjohnlev /*
3505084Sjohnlev  * Acquire ownership of this irq on this cpu
3515084Sjohnlev  */
3525084Sjohnlev void
xen_psm_acquire_irq(int irq)3535084Sjohnlev xen_psm_acquire_irq(int irq)
3545084Sjohnlev {
3555084Sjohnlev 	ulong_t flags;
3565084Sjohnlev 	int cpuid;
3575084Sjohnlev 
3585084Sjohnlev 	/*
3595084Sjohnlev 	 * If the irq is currently being serviced by another cpu
3605084Sjohnlev 	 * we busy-wait for the other cpu to finish.  Take any
3615084Sjohnlev 	 * pending interrupts before retrying.
3625084Sjohnlev 	 */
3635084Sjohnlev 	do {
3645084Sjohnlev 		flags = intr_clear();
3655084Sjohnlev 		cpuid = ec_block_irq(irq);
3665084Sjohnlev 		intr_restore(flags);
3675084Sjohnlev 	} while (cpuid != CPU->cpu_id);
3685084Sjohnlev }
3695084Sjohnlev 
3705084Sjohnlev /*ARGSUSED*/
3715084Sjohnlev static int
xen_psm_delspl(int irqno,int ipl,int min_ipl,int max_ipl)3725084Sjohnlev xen_psm_delspl(int irqno, int ipl, int min_ipl, int max_ipl)
3735084Sjohnlev {
3745084Sjohnlev 	apic_irq_t *irqptr;
3755084Sjohnlev 	int err = PSM_SUCCESS;
3765084Sjohnlev 
3775084Sjohnlev 	if (irqno >= PIRQ_BASE && irqno < NR_PIRQS &&
3785084Sjohnlev 	    DOMAIN_IS_INITDOMAIN(xen_info)) {
3795084Sjohnlev 		irqptr = apic_irq_table[irqno];
3805084Sjohnlev 		/*
3815084Sjohnlev 		 * unbind if no more sharers of this irq/evtchn
3825084Sjohnlev 		 */
3835084Sjohnlev 		if (irqptr->airq_share == 1) {
3845084Sjohnlev 			xen_psm_acquire_irq(irqno);
3855084Sjohnlev 			ec_unbind_irq(irqno);
3865084Sjohnlev 		}
3875084Sjohnlev 		err = apic_delspl_common(irqno, ipl, min_ipl, max_ipl);
3885084Sjohnlev 		/*
3895084Sjohnlev 		 * If still in use reset priority
3905084Sjohnlev 		 */
3915084Sjohnlev 		if (!err && irqptr->airq_share != 0) {
3925084Sjohnlev 			err = ec_set_irq_priority(irqno, max_ipl);
3935084Sjohnlev 			return (err);
3945084Sjohnlev 		}
3955084Sjohnlev 	} else {
3965084Sjohnlev 		xen_psm_acquire_irq(irqno);
3975084Sjohnlev 		ec_unbind_irq(irqno);
3985084Sjohnlev 	}
3995084Sjohnlev 	return (err);
4005084Sjohnlev }
4015084Sjohnlev 
4025084Sjohnlev static processorid_t
xen_psm_get_next_processorid(processorid_t id)4035084Sjohnlev xen_psm_get_next_processorid(processorid_t id)
4045084Sjohnlev {
4055084Sjohnlev 	if (id == -1)
4065084Sjohnlev 		return (0);
4075084Sjohnlev 
4085084Sjohnlev 	for (id++; id < NCPU; id++) {
4095084Sjohnlev 		switch (-HYPERVISOR_vcpu_op(VCPUOP_is_up, id, NULL)) {
4105084Sjohnlev 		case 0:		/* yeah, that one's there */
4115084Sjohnlev 			return (id);
4125084Sjohnlev 		default:
4135084Sjohnlev 		case X_EINVAL:	/* out of range */
4145084Sjohnlev 			return (-1);
4155084Sjohnlev 		case X_ENOENT:	/* not present in the domain */
4165084Sjohnlev 			/*
4175084Sjohnlev 			 * It's not clear that we -need- to keep looking
4185084Sjohnlev 			 * at this point, if, e.g., we can guarantee
4195084Sjohnlev 			 * the hypervisor always keeps a contiguous range
4205084Sjohnlev 			 * of vcpus around this is equivalent to "out of range".
4215084Sjohnlev 			 *
4225084Sjohnlev 			 * But it would be sad to miss a vcpu we're
4235084Sjohnlev 			 * supposed to be using ..
4245084Sjohnlev 			 */
4255084Sjohnlev 			break;
4265084Sjohnlev 		}
4275084Sjohnlev 	}
4285084Sjohnlev 
4295084Sjohnlev 	return (-1);
4305084Sjohnlev }
4315084Sjohnlev 
4325084Sjohnlev /*
4335084Sjohnlev  * XXPV - undo the start cpu op change; return to ignoring this value
4345084Sjohnlev  *	- also tweak error handling in main startup loop
4355084Sjohnlev  */
4365084Sjohnlev /*ARGSUSED*/
4375084Sjohnlev static int
xen_psm_cpu_start(processorid_t id,caddr_t arg)4385084Sjohnlev xen_psm_cpu_start(processorid_t id, caddr_t arg)
4395084Sjohnlev {
4405084Sjohnlev 	int ret;
4415084Sjohnlev 
4425084Sjohnlev 	ASSERT(id > 0);
4435529Ssmaybe 	CPUSET_ATOMIC_ADD(xen_psm_cpus_online, id);
4445084Sjohnlev 	ec_bind_cpu_ipis(id);
4455084Sjohnlev 	(void) ec_bind_virq_to_irq(VIRQ_TIMER, id);
4465084Sjohnlev 	if ((ret = xen_vcpu_up(id)) == 0)
4475084Sjohnlev 		xen_psm_ncpus++;
4485084Sjohnlev 	else
4495084Sjohnlev 		ret = EINVAL;
4505084Sjohnlev 	return (ret);
4515084Sjohnlev }
4525084Sjohnlev 
4535084Sjohnlev /*
4545084Sjohnlev  * Allocate an irq for inter cpu signaling
4555084Sjohnlev  */
4565084Sjohnlev /*ARGSUSED*/
4575084Sjohnlev static int
xen_psm_get_ipivect(int ipl,int type)4585084Sjohnlev xen_psm_get_ipivect(int ipl, int type)
4595084Sjohnlev {
4605084Sjohnlev 	return (ec_bind_ipi_to_irq(ipl, 0));
4615084Sjohnlev }
4625084Sjohnlev 
4635084Sjohnlev /*ARGSUSED*/
4645084Sjohnlev static int
xen_psm_get_clockirq(int ipl)4655084Sjohnlev xen_psm_get_clockirq(int ipl)
4665084Sjohnlev {
4675084Sjohnlev 	if (xen_clock_irq != INVALID_IRQ)
4685084Sjohnlev 		return (xen_clock_irq);
4695084Sjohnlev 
4705084Sjohnlev 	xen_clock_irq = ec_bind_virq_to_irq(VIRQ_TIMER, 0);
4715084Sjohnlev 	return (xen_clock_irq);
4725084Sjohnlev }
4735084Sjohnlev 
4745084Sjohnlev /*ARGSUSED*/
4755084Sjohnlev static void
xen_psm_shutdown(int cmd,int fcn)4765084Sjohnlev xen_psm_shutdown(int cmd, int fcn)
4775084Sjohnlev {
4785084Sjohnlev 	XEN_PSM_VERBOSE_POWEROFF(("xen_psm_shutdown(%d,%d);\n", cmd, fcn));
4795084Sjohnlev 
4805084Sjohnlev 	switch (cmd) {
4815084Sjohnlev 	case A_SHUTDOWN:
4825084Sjohnlev 		switch (fcn) {
4835084Sjohnlev 		case AD_BOOT:
4845084Sjohnlev 		case AD_IBOOT:
4855084Sjohnlev 			(void) HYPERVISOR_shutdown(SHUTDOWN_reboot);
4865084Sjohnlev 			break;
4875084Sjohnlev 		case AD_POWEROFF:
4885084Sjohnlev 			/* fall through if domU or if poweroff fails */
4895084Sjohnlev 			if (DOMAIN_IS_INITDOMAIN(xen_info))
4905084Sjohnlev 				if (apic_enable_acpi)
4915084Sjohnlev 					(void) acpi_poweroff();
4925084Sjohnlev 			/* FALLTHRU */
4935084Sjohnlev 		case AD_HALT:
4945084Sjohnlev 		default:
4955084Sjohnlev 			(void) HYPERVISOR_shutdown(SHUTDOWN_poweroff);
4965084Sjohnlev 			break;
4975084Sjohnlev 		}
4985084Sjohnlev 		break;
4995084Sjohnlev 	case A_REBOOT:
5005084Sjohnlev 		(void) HYPERVISOR_shutdown(SHUTDOWN_reboot);
5015084Sjohnlev 		break;
5025084Sjohnlev 	default:
5035084Sjohnlev 		return;
5045084Sjohnlev 	}
5055084Sjohnlev }
5065084Sjohnlev 
5075084Sjohnlev 
5085084Sjohnlev static int
xen_psm_translate_irq(dev_info_t * dip,int irqno)5095084Sjohnlev xen_psm_translate_irq(dev_info_t *dip, int irqno)
5105084Sjohnlev {
5115084Sjohnlev 	if (dip == NULL) {
5125084Sjohnlev 		XEN_PSM_VERBOSE_IRQ((CE_CONT, "!xen_psm: irqno = %d"
5135084Sjohnlev 		    " dip = NULL\n", irqno));
5145084Sjohnlev 		return (irqno);
5155084Sjohnlev 	}
5165084Sjohnlev 	return (irqno);
5175084Sjohnlev }
5185084Sjohnlev 
5195084Sjohnlev /*
5205084Sjohnlev  * xen_psm_intr_enter() acks the event that triggered the interrupt and
5215084Sjohnlev  * returns the new priority level,
5225084Sjohnlev  */
5235084Sjohnlev /*ARGSUSED*/
5245084Sjohnlev static int
xen_psm_intr_enter(int ipl,int * vector)5255084Sjohnlev xen_psm_intr_enter(int ipl, int *vector)
5265084Sjohnlev {
5275084Sjohnlev 	int newipl;
5285084Sjohnlev 	uint_t intno;
5295084Sjohnlev 	cpu_t *cpu = CPU;
5305084Sjohnlev 
5315084Sjohnlev 	intno = (*vector);
5325084Sjohnlev 
5335084Sjohnlev 	ASSERT(intno < NR_IRQS);
5345084Sjohnlev 	ASSERT(cpu->cpu_m.mcpu_vcpu_info->evtchn_upcall_mask != 0);
5355084Sjohnlev 
53610175SStuart.Maybee@Sun.COM 	if (!ec_is_edge_pirq(intno))
53710175SStuart.Maybee@Sun.COM 		ec_clear_irq(intno);
5385084Sjohnlev 
5395084Sjohnlev 	newipl = autovect[intno].avh_hi_pri;
5405084Sjohnlev 	if (newipl == 0) {
5415084Sjohnlev 		/*
5425084Sjohnlev 		 * (newipl == 0) means we have no service routines for this
5435084Sjohnlev 		 * vector.  We will treat this as a spurious interrupt.
5445084Sjohnlev 		 * We have cleared the pending bit already, clear the event
5455084Sjohnlev 		 * mask and return a spurious interrupt.  This case can happen
5465084Sjohnlev 		 * when an interrupt delivery is racing with the removal of
5475084Sjohnlev 		 * of the service routine for that interrupt.
5485084Sjohnlev 		 */
5495084Sjohnlev 		ec_unmask_irq(intno);
5505084Sjohnlev 		newipl = -1;	/* flag spurious interrupt */
5515084Sjohnlev 	} else if (newipl <= cpu->cpu_pri) {
5525084Sjohnlev 		/*
5535084Sjohnlev 		 * (newipl <= cpu->cpu_pri) means that we must be trying to
5545084Sjohnlev 		 * service a vector that was shared with a higher priority
5555084Sjohnlev 		 * isr.  The higher priority handler has been removed and
5565084Sjohnlev 		 * we need to service this int.  We can't return a lower
5575084Sjohnlev 		 * priority than current cpu priority.  Just synthesize a
5585084Sjohnlev 		 * priority to return that should be acceptable.
55910453SStuart.Maybee@Sun.COM 		 * It should never happen that we synthesize a priority that
56010453SStuart.Maybee@Sun.COM 		 * moves us from low-priority to high-priority that would make
56110453SStuart.Maybee@Sun.COM 		 * a us incorrectly run on the high priority stack.
5625084Sjohnlev 		 */
5635084Sjohnlev 		newipl = cpu->cpu_pri + 1;	/* synthetic priority */
56410453SStuart.Maybee@Sun.COM 		ASSERT(newipl != LOCK_LEVEL + 1);
5655084Sjohnlev 	}
5665084Sjohnlev 	return (newipl);
5675084Sjohnlev }
5685084Sjohnlev 
5695084Sjohnlev 
5705084Sjohnlev /*
5715084Sjohnlev  * xen_psm_intr_exit() restores the old interrupt
5725084Sjohnlev  * priority level after processing an interrupt.
5735084Sjohnlev  * It is called with interrupts disabled, and does not enable interrupts.
5745084Sjohnlev  */
5755084Sjohnlev /* ARGSUSED */
5765084Sjohnlev static void
xen_psm_intr_exit(int ipl,int vector)5775084Sjohnlev xen_psm_intr_exit(int ipl, int vector)
5785084Sjohnlev {
5795084Sjohnlev 	ec_try_unmask_irq(vector);
5805084Sjohnlev 	xen_psm_setspl(ipl);
5815084Sjohnlev }
5825084Sjohnlev 
5835084Sjohnlev intr_exit_fn_t
psm_intr_exit_fn(void)5845084Sjohnlev psm_intr_exit_fn(void)
5855084Sjohnlev {
5865084Sjohnlev 	return (xen_psm_intr_exit);
5875084Sjohnlev }
5885084Sjohnlev 
5895084Sjohnlev /*
5905084Sjohnlev  * Check if new ipl level allows delivery of previously unserviced events
5915084Sjohnlev  */
5925084Sjohnlev static void
xen_psm_setspl(int ipl)5935084Sjohnlev xen_psm_setspl(int ipl)
5945084Sjohnlev {
5955084Sjohnlev 	struct cpu *cpu = CPU;
5965084Sjohnlev 	volatile vcpu_info_t *vci = cpu->cpu_m.mcpu_vcpu_info;
5975084Sjohnlev 	uint16_t pending;
5985084Sjohnlev 
5995084Sjohnlev 	ASSERT(vci->evtchn_upcall_mask != 0);
6005084Sjohnlev 
6015084Sjohnlev 	/*
6025084Sjohnlev 	 * If new ipl level will enable any pending interrupts, setup so the
6035084Sjohnlev 	 * upcoming sti will cause us to get an upcall.
6045084Sjohnlev 	 */
6055084Sjohnlev 	pending = cpu->cpu_m.mcpu_intr_pending & ~((1 << (ipl + 1)) - 1);
6065084Sjohnlev 	if (pending) {
6075084Sjohnlev 		int i;
6085084Sjohnlev 		ulong_t pending_sels = 0;
6095084Sjohnlev 		volatile ulong_t *selp;
6105084Sjohnlev 		struct xen_evt_data *cpe = cpu->cpu_m.mcpu_evt_pend;
6115084Sjohnlev 
6125084Sjohnlev 		for (i = bsrw_insn(pending); i > ipl; i--)
6135084Sjohnlev 			pending_sels |= cpe->pending_sel[i];
6145084Sjohnlev 		ASSERT(pending_sels);
6155084Sjohnlev 		selp = (volatile ulong_t *)&vci->evtchn_pending_sel;
6165084Sjohnlev 		atomic_or_ulong(selp, pending_sels);
6175084Sjohnlev 		vci->evtchn_upcall_pending = 1;
6185084Sjohnlev 	}
6195084Sjohnlev }
6205084Sjohnlev 
6215084Sjohnlev /*
6225084Sjohnlev  * This function provides external interface to the nexus for all
6235084Sjohnlev  * functionality related to the new DDI interrupt framework.
6245084Sjohnlev  *
6255084Sjohnlev  * Input:
6265084Sjohnlev  * dip     - pointer to the dev_info structure of the requested device
6275084Sjohnlev  * hdlp    - pointer to the internal interrupt handle structure for the
6285084Sjohnlev  *	     requested interrupt
6295084Sjohnlev  * intr_op - opcode for this call
6305084Sjohnlev  * result  - pointer to the integer that will hold the result to be
6315084Sjohnlev  *	     passed back if return value is PSM_SUCCESS
6325084Sjohnlev  *
6335084Sjohnlev  * Output:
6345084Sjohnlev  * return value is either PSM_SUCCESS or PSM_FAILURE
6355084Sjohnlev  */
6365084Sjohnlev int
xen_intr_ops(dev_info_t * dip,ddi_intr_handle_impl_t * hdlp,psm_intr_op_t intr_op,int * result)6375084Sjohnlev xen_intr_ops(dev_info_t *dip, ddi_intr_handle_impl_t *hdlp,
6385084Sjohnlev     psm_intr_op_t intr_op, int *result)
6395084Sjohnlev {
6405084Sjohnlev 	int		cap;
6415084Sjohnlev 	int		err;
6425084Sjohnlev 	int		new_priority;
6435084Sjohnlev 	apic_irq_t	*irqp;
6445084Sjohnlev 	struct intrspec *ispec;
6455084Sjohnlev 
6465084Sjohnlev 	DDI_INTR_IMPLDBG((CE_CONT, "xen_intr_ops: dip: %p hdlp: %p "
6475084Sjohnlev 	    "intr_op: %x\n", (void *)dip, (void *)hdlp, intr_op));
6485084Sjohnlev 
6495084Sjohnlev 	switch (intr_op) {
6505084Sjohnlev 	case PSM_INTR_OP_CHECK_MSI:
65110175SStuart.Maybee@Sun.COM 		/*
65210175SStuart.Maybee@Sun.COM 		 * Till PCI passthru is supported, only dom0 has MSI/MSIX
65310175SStuart.Maybee@Sun.COM 		 */
6545084Sjohnlev 		if (!DOMAIN_IS_INITDOMAIN(xen_info)) {
6555084Sjohnlev 			*result = hdlp->ih_type & ~(DDI_INTR_TYPE_MSI |
6565084Sjohnlev 			    DDI_INTR_TYPE_MSIX);
6575084Sjohnlev 			break;
6585084Sjohnlev 		}
6595084Sjohnlev 		/*
6605084Sjohnlev 		 * Check MSI/X is supported or not at APIC level and
6615084Sjohnlev 		 * masked off the MSI/X bits in hdlp->ih_type if not
6625084Sjohnlev 		 * supported before return.  If MSI/X is supported,
6635084Sjohnlev 		 * leave the ih_type unchanged and return.
6645084Sjohnlev 		 *
6655084Sjohnlev 		 * hdlp->ih_type passed in from the nexus has all the
6665084Sjohnlev 		 * interrupt types supported by the device.
6675084Sjohnlev 		 */
6685084Sjohnlev 		if (xen_support_msi == 0) {
6695084Sjohnlev 			/*
6705084Sjohnlev 			 * if xen_support_msi is not set, call
6715084Sjohnlev 			 * apic_check_msi_support() to check whether msi
6725084Sjohnlev 			 * is supported first
6735084Sjohnlev 			 */
6745084Sjohnlev 			if (apic_check_msi_support() == PSM_SUCCESS)
6755084Sjohnlev 				xen_support_msi = 1;
6765084Sjohnlev 			else
6775084Sjohnlev 				xen_support_msi = -1;
6785084Sjohnlev 		}
6795084Sjohnlev 		if (xen_support_msi == 1)
6805084Sjohnlev 			*result = hdlp->ih_type;
6815084Sjohnlev 		else
6825084Sjohnlev 			*result = hdlp->ih_type & ~(DDI_INTR_TYPE_MSI |
6835084Sjohnlev 			    DDI_INTR_TYPE_MSIX);
6845084Sjohnlev 		break;
6855084Sjohnlev 	case PSM_INTR_OP_ALLOC_VECTORS:
68610175SStuart.Maybee@Sun.COM 		if (hdlp->ih_type == DDI_INTR_TYPE_MSI)
68710175SStuart.Maybee@Sun.COM 			*result = apic_alloc_msi_vectors(dip, hdlp->ih_inum,
68810175SStuart.Maybee@Sun.COM 			    hdlp->ih_scratch1, hdlp->ih_pri,
68910175SStuart.Maybee@Sun.COM 			    (int)(uintptr_t)hdlp->ih_scratch2);
69010175SStuart.Maybee@Sun.COM 		else
69110175SStuart.Maybee@Sun.COM 			*result = apic_alloc_msix_vectors(dip, hdlp->ih_inum,
69210175SStuart.Maybee@Sun.COM 			    hdlp->ih_scratch1, hdlp->ih_pri,
69310175SStuart.Maybee@Sun.COM 			    (int)(uintptr_t)hdlp->ih_scratch2);
6945084Sjohnlev 		break;
6955084Sjohnlev 	case PSM_INTR_OP_FREE_VECTORS:
6965084Sjohnlev 		apic_free_vectors(dip, hdlp->ih_inum, hdlp->ih_scratch1,
6975084Sjohnlev 		    hdlp->ih_pri, hdlp->ih_type);
6985084Sjohnlev 		break;
6995084Sjohnlev 	case PSM_INTR_OP_NAVAIL_VECTORS:
7005084Sjohnlev 		/*
7015084Sjohnlev 		 * XXPV - maybe we should make this be:
7025084Sjohnlev 		 * min(APIC_VECTOR_PER_IPL, count of all avail vectors);
7035084Sjohnlev 		 */
7045084Sjohnlev 		if (DOMAIN_IS_INITDOMAIN(xen_info))
7055084Sjohnlev 			*result = APIC_VECTOR_PER_IPL;
7065084Sjohnlev 		else
7075084Sjohnlev 			*result = 1;
7085084Sjohnlev 		break;
7095084Sjohnlev 	case PSM_INTR_OP_XLATE_VECTOR:
7105084Sjohnlev 		ispec = ((ihdl_plat_t *)hdlp->ih_private)->ip_ispecp;
7115084Sjohnlev 		if (ispec->intrspec_vec >= PIRQ_BASE &&
7125084Sjohnlev 		    ispec->intrspec_vec < NR_PIRQS &&
7135084Sjohnlev 		    DOMAIN_IS_INITDOMAIN(xen_info)) {
7145084Sjohnlev 			*result = apic_introp_xlate(dip, ispec, hdlp->ih_type);
7155084Sjohnlev 		} else {
7165084Sjohnlev 			*result = ispec->intrspec_vec;
7175084Sjohnlev 		}
7185084Sjohnlev 		break;
7195084Sjohnlev 	case PSM_INTR_OP_GET_PENDING:
7205084Sjohnlev 		/* XXPV - is this enough for dom0 or do we need to ref ioapic */
7215084Sjohnlev 		*result = ec_pending_irq(hdlp->ih_vector);
7225084Sjohnlev 		break;
7235084Sjohnlev 	case PSM_INTR_OP_CLEAR_MASK:
7245084Sjohnlev 		/* XXPV - is this enough for dom0 or do we need to set ioapic */
7255084Sjohnlev 		if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
7265084Sjohnlev 			return (PSM_FAILURE);
7275084Sjohnlev 		ec_enable_irq(hdlp->ih_vector);
7285084Sjohnlev 		break;
7295084Sjohnlev 	case PSM_INTR_OP_SET_MASK:
7305084Sjohnlev 		/* XXPV - is this enough for dom0 or do we need to set ioapic */
7315084Sjohnlev 		if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
7325084Sjohnlev 			return (PSM_FAILURE);
7335084Sjohnlev 		ec_disable_irq(hdlp->ih_vector);
7345084Sjohnlev 		break;
7355084Sjohnlev 	case PSM_INTR_OP_GET_CAP:
7365084Sjohnlev 		cap = DDI_INTR_FLAG_PENDING | DDI_INTR_FLAG_EDGE;
7375084Sjohnlev 		if (hdlp->ih_type == DDI_INTR_TYPE_FIXED)
7385084Sjohnlev 			cap |= DDI_INTR_FLAG_MASKABLE;
7395084Sjohnlev 		*result = cap;
7405084Sjohnlev 		break;
7415084Sjohnlev 	case PSM_INTR_OP_GET_SHARED:
7425084Sjohnlev 		if (DOMAIN_IS_INITDOMAIN(xen_info)) {
7435084Sjohnlev 			if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
7445084Sjohnlev 				return (PSM_FAILURE);
74510190SSophia.Li@Sun.COM 			ispec = ((ihdl_plat_t *)hdlp->ih_private)->ip_ispecp;
7465084Sjohnlev 			if ((irqp = apic_find_irq(dip, ispec, hdlp->ih_type))
7475084Sjohnlev 			    == NULL)
7485084Sjohnlev 				return (PSM_FAILURE);
74910190SSophia.Li@Sun.COM 			*result = (irqp->airq_share > 1) ? 1: 0;
7505084Sjohnlev 		} else {
7515084Sjohnlev 			return (PSM_FAILURE);
7525084Sjohnlev 		}
7535084Sjohnlev 		break;
7545084Sjohnlev 	case PSM_INTR_OP_SET_PRI:
7555084Sjohnlev 		new_priority = *(int *)result;
7565084Sjohnlev 		err = ec_set_irq_priority(hdlp->ih_vector, new_priority);
7575084Sjohnlev 		if (err != 0)
7585084Sjohnlev 			return (PSM_FAILURE);
7595084Sjohnlev 		break;
7605084Sjohnlev 	case PSM_INTR_OP_GET_INTR:
7615084Sjohnlev 		if (!DOMAIN_IS_INITDOMAIN(xen_info))
7625084Sjohnlev 			return (PSM_FAILURE);
7635084Sjohnlev 		/*
7645084Sjohnlev 		 * The interrupt handle given here has been allocated
7655084Sjohnlev 		 * specifically for this command, and ih_private carries
7665084Sjohnlev 		 * a pointer to a apic_get_intr_t.
7675084Sjohnlev 		 */
7685084Sjohnlev 		if (apic_get_vector_intr_info(
7695084Sjohnlev 		    hdlp->ih_vector, hdlp->ih_private) != PSM_SUCCESS)
7705084Sjohnlev 			return (PSM_FAILURE);
7715084Sjohnlev 		break;
7725084Sjohnlev 	case PSM_INTR_OP_SET_CAP:
7735084Sjohnlev 		/* FALLTHRU */
7745084Sjohnlev 	default:
7755084Sjohnlev 		return (PSM_FAILURE);
7765084Sjohnlev 	}
7775084Sjohnlev 	return (PSM_SUCCESS);
7785084Sjohnlev }
7795084Sjohnlev 
7805084Sjohnlev static void
xen_psm_rebind_irq(int irq)7815084Sjohnlev xen_psm_rebind_irq(int irq)
7825084Sjohnlev {
7835084Sjohnlev 	cpuset_t ncpu;
7845084Sjohnlev 	processorid_t newcpu;
7855529Ssmaybe 	apic_irq_t *irqptr;
7865084Sjohnlev 
7875084Sjohnlev 	newcpu = xen_psm_bind_intr(irq);
7885084Sjohnlev 	if (newcpu == IRQ_UNBOUND) {
7895084Sjohnlev 		CPUSET_ZERO(ncpu);
7905084Sjohnlev 		CPUSET_OR(ncpu, xen_psm_cpus_online);
7915084Sjohnlev 	} else {
7925084Sjohnlev 		CPUSET_ONLY(ncpu, newcpu & ~IRQ_USER_BOUND);
7935084Sjohnlev 	}
7945084Sjohnlev 	ec_set_irq_affinity(irq, ncpu);
7955573Ssmaybe 	if (irq <= APIC_MAX_VECTOR) {
7965573Ssmaybe 		irqptr = apic_irq_table[irq];
7975573Ssmaybe 		ASSERT(irqptr != NULL);
7985573Ssmaybe 		irqptr->airq_temp_cpu = (uchar_t)newcpu;
7995573Ssmaybe 	}
8005084Sjohnlev }
8015084Sjohnlev 
8025084Sjohnlev /*
8035084Sjohnlev  * Disable all device interrupts for the given cpu.
8045084Sjohnlev  * High priority interrupts are not disabled and will still be serviced.
8055084Sjohnlev  */
8065084Sjohnlev static int
xen_psm_disable_intr(processorid_t cpun)8075084Sjohnlev xen_psm_disable_intr(processorid_t cpun)
8085084Sjohnlev {
8095084Sjohnlev 	int irq;
8105084Sjohnlev 
8115084Sjohnlev 	/*
8125084Sjohnlev 	 * Can't offline VCPU 0 on this hypervisor.  There's no reason
8135084Sjohnlev 	 * anyone would want to given that the CPUs are virtual. Also note
8145084Sjohnlev 	 * that the hypervisor requires suspend/resume to be on VCPU 0.
8155084Sjohnlev 	 */
8165084Sjohnlev 	if (cpun == 0)
8175084Sjohnlev 		return (PSM_FAILURE);
8185084Sjohnlev 
8195529Ssmaybe 	CPUSET_ATOMIC_DEL(xen_psm_cpus_online, cpun);
8205084Sjohnlev 	for (irq = 0; irq < NR_IRQS; irq++) {
8215084Sjohnlev 		if (!ec_irq_needs_rebind(irq, cpun))
8225084Sjohnlev 			continue;
8235084Sjohnlev 		xen_psm_rebind_irq(irq);
8245084Sjohnlev 	}
8255084Sjohnlev 	return (PSM_SUCCESS);
8265084Sjohnlev }
8275084Sjohnlev 
8285084Sjohnlev static void
xen_psm_enable_intr(processorid_t cpun)8295084Sjohnlev xen_psm_enable_intr(processorid_t cpun)
8305084Sjohnlev {
8315084Sjohnlev 	int irq;
8325084Sjohnlev 
8335084Sjohnlev 	if (cpun == 0)
8345084Sjohnlev 		return;
8355084Sjohnlev 
8365529Ssmaybe 	CPUSET_ATOMIC_ADD(xen_psm_cpus_online, cpun);
8375084Sjohnlev 
8385084Sjohnlev 	/*
8395084Sjohnlev 	 * Rebalance device interrupts among online processors
8405084Sjohnlev 	 */
8415084Sjohnlev 	for (irq = 0; irq < NR_IRQS; irq++) {
8425084Sjohnlev 		if (!ec_irq_rebindable(irq))
8435084Sjohnlev 			continue;
8445084Sjohnlev 		xen_psm_rebind_irq(irq);
8455084Sjohnlev 	}
8466749Ssherrym 
8476749Ssherrym 	if (DOMAIN_IS_INITDOMAIN(xen_info)) {
8486749Ssherrym 		apic_cpus[cpun].aci_status |= APIC_CPU_INTR_ENABLE;
8496749Ssherrym 	}
8505084Sjohnlev }
8515084Sjohnlev 
8525529Ssmaybe static int
xen_psm_post_cpu_start()8535529Ssmaybe xen_psm_post_cpu_start()
8545529Ssmaybe {
8555529Ssmaybe 	processorid_t cpun;
8565529Ssmaybe 
8576467Ssmaybe 	cpun = psm_get_cpu_id();
8585529Ssmaybe 	if (DOMAIN_IS_INITDOMAIN(xen_info)) {
8597113Sbholler 		/*
8607113Sbholler 		 * Non-virtualized environments can call psm_post_cpu_start
8617113Sbholler 		 * from Suspend/Resume with the APIC_CPU_INTR_ENABLE bit set.
8627113Sbholler 		 * xen_psm_post_cpu_start() is only called from boot.
8637113Sbholler 		 */
8647113Sbholler 		apic_cpus[cpun].aci_status |= APIC_CPU_ONLINE;
8655529Ssmaybe 	}
8665529Ssmaybe 	return (PSM_SUCCESS);
8675529Ssmaybe }
8685529Ssmaybe 
8695084Sjohnlev /*
8705084Sjohnlev  * This function will reprogram the timer.
8715084Sjohnlev  *
8725084Sjohnlev  * When in oneshot mode the argument is the absolute time in future at which to
8735084Sjohnlev  * generate the interrupt.
8745084Sjohnlev  *
8755084Sjohnlev  * When in periodic mode, the argument is the interval at which the
8765084Sjohnlev  * interrupts should be generated. There is no need to support the periodic
8775084Sjohnlev  * mode timer change at this time.
8785084Sjohnlev  *
8795084Sjohnlev  * Note that we must be careful to convert from hrtime to Xen system time (see
8805084Sjohnlev  * xpv_timestamp.c).
8815084Sjohnlev  */
8825084Sjohnlev static void
xen_psm_timer_reprogram(hrtime_t timer_req)8835084Sjohnlev xen_psm_timer_reprogram(hrtime_t timer_req)
8845084Sjohnlev {
8855084Sjohnlev 	hrtime_t now, timer_new, time_delta, xen_time;
8865084Sjohnlev 	ulong_t flags;
8875084Sjohnlev 
8885084Sjohnlev 	flags = intr_clear();
8895084Sjohnlev 	/*
8905084Sjohnlev 	 * We should be called from high PIL context (CBE_HIGH_PIL),
8915084Sjohnlev 	 * so kpreempt is disabled.
8925084Sjohnlev 	 */
8935084Sjohnlev 
8945084Sjohnlev 	now = xpv_gethrtime();
8955084Sjohnlev 	xen_time = xpv_getsystime();
8965084Sjohnlev 	if (timer_req <= now) {
8975084Sjohnlev 		/*
8985084Sjohnlev 		 * requested to generate an interrupt in the past
8995084Sjohnlev 		 * generate an interrupt as soon as possible
9005084Sjohnlev 		 */
9015084Sjohnlev 		time_delta = XEN_NSEC_PER_TICK;
9025084Sjohnlev 	} else
9035084Sjohnlev 		time_delta = timer_req - now;
9045084Sjohnlev 
9055084Sjohnlev 	timer_new = xen_time + time_delta;
9065084Sjohnlev 	if (HYPERVISOR_set_timer_op(timer_new) != 0)
9075084Sjohnlev 		panic("can't set hypervisor timer?");
9085084Sjohnlev 	intr_restore(flags);
9095084Sjohnlev }
9105084Sjohnlev 
9115084Sjohnlev /*
9125084Sjohnlev  * This function will enable timer interrupts.
9135084Sjohnlev  */
9145084Sjohnlev static void
xen_psm_timer_enable(void)9155084Sjohnlev xen_psm_timer_enable(void)
9165084Sjohnlev {
9175084Sjohnlev 	ec_unmask_irq(xen_clock_irq);
9185084Sjohnlev }
9195084Sjohnlev 
9205084Sjohnlev /*
9215084Sjohnlev  * This function will disable timer interrupts on the current cpu.
9225084Sjohnlev  */
9235084Sjohnlev static void
xen_psm_timer_disable(void)9245084Sjohnlev xen_psm_timer_disable(void)
9255084Sjohnlev {
9265084Sjohnlev 	(void) ec_block_irq(xen_clock_irq);
9275084Sjohnlev 	/*
9285084Sjohnlev 	 * If the clock irq is pending on this cpu then we need to
9295084Sjohnlev 	 * clear the pending interrupt.
9305084Sjohnlev 	 */
9315084Sjohnlev 	ec_unpend_irq(xen_clock_irq);
9325084Sjohnlev }
9335084Sjohnlev 
9345084Sjohnlev /*
9355084Sjohnlev  *
9365084Sjohnlev  * The following functions are in the platform specific file so that they
9375084Sjohnlev  * can be different functions depending on whether we are running on
9385084Sjohnlev  * bare metal or a hypervisor.
9395084Sjohnlev  */
9405084Sjohnlev 
9415084Sjohnlev /*
9425084Sjohnlev  * Allocate a free vector for irq at ipl.
9435084Sjohnlev  */
9445084Sjohnlev /* ARGSUSED */
9455084Sjohnlev uchar_t
apic_allocate_vector(int ipl,int irq,int pri)9465084Sjohnlev apic_allocate_vector(int ipl, int irq, int pri)
9475084Sjohnlev {
9485084Sjohnlev 	physdev_irq_t irq_op;
9495084Sjohnlev 	uchar_t vector;
95010175SStuart.Maybee@Sun.COM 	int rc;
9515084Sjohnlev 
9525084Sjohnlev 	irq_op.irq = irq;
9535084Sjohnlev 
95410175SStuart.Maybee@Sun.COM 	if ((rc = HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op))
95510175SStuart.Maybee@Sun.COM 	    != 0)
95610175SStuart.Maybee@Sun.COM 		panic("Hypervisor alloc vector failed err: %d", -rc);
9575084Sjohnlev 	vector = irq_op.vector;
9585084Sjohnlev 	/*
9595084Sjohnlev 	 * No need to worry about vector colliding with our reserved vectors
9605084Sjohnlev 	 * e.g. T_FASTTRAP, xen can differentiate between hardware and software
9615084Sjohnlev 	 * generated traps and handle them properly.
9625084Sjohnlev 	 */
9635084Sjohnlev 	apic_vector_to_irq[vector] = (uchar_t)irq;
9645084Sjohnlev 	return (vector);
9655084Sjohnlev }
9665084Sjohnlev 
9675084Sjohnlev /* Mark vector as not being used by any irq */
9685084Sjohnlev void
apic_free_vector(uchar_t vector)9695084Sjohnlev apic_free_vector(uchar_t vector)
9705084Sjohnlev {
9715084Sjohnlev 	apic_vector_to_irq[vector] = APIC_RESV_IRQ;
9725084Sjohnlev }
9735084Sjohnlev 
9745084Sjohnlev /*
97510175SStuart.Maybee@Sun.COM  * This function returns the no. of vectors available for the pri.
97610175SStuart.Maybee@Sun.COM  * dip is not used at this moment.  If we really don't need that,
97710175SStuart.Maybee@Sun.COM  * it will be removed.  Since priority is not limited by hardware
97810175SStuart.Maybee@Sun.COM  * when running on the hypervisor we simply return the maximum no.
97910175SStuart.Maybee@Sun.COM  * of available contiguous vectors.
98010175SStuart.Maybee@Sun.COM  */
98110175SStuart.Maybee@Sun.COM /*ARGSUSED*/
98210175SStuart.Maybee@Sun.COM int
apic_navail_vector(dev_info_t * dip,int pri)98310175SStuart.Maybee@Sun.COM apic_navail_vector(dev_info_t *dip, int pri)
98410175SStuart.Maybee@Sun.COM {
98510175SStuart.Maybee@Sun.COM 	int	lowest, highest, i, navail, count;
98610175SStuart.Maybee@Sun.COM 
98710175SStuart.Maybee@Sun.COM 	DDI_INTR_IMPLDBG((CE_CONT, "apic_navail_vector: dip: %p, pri: %x\n",
98810175SStuart.Maybee@Sun.COM 	    (void *)dip, pri));
98910175SStuart.Maybee@Sun.COM 
99010175SStuart.Maybee@Sun.COM 	highest = APIC_MAX_VECTOR;
99110175SStuart.Maybee@Sun.COM 	lowest = APIC_BASE_VECT;
99210175SStuart.Maybee@Sun.COM 	navail = count = 0;
99310175SStuart.Maybee@Sun.COM 
99410175SStuart.Maybee@Sun.COM 	/* It has to be contiguous */
99510175SStuart.Maybee@Sun.COM 	for (i = lowest; i < highest; i++) {
99610175SStuart.Maybee@Sun.COM 		count = 0;
99710175SStuart.Maybee@Sun.COM 		while ((apic_vector_to_irq[i] == APIC_RESV_IRQ) &&
99810175SStuart.Maybee@Sun.COM 		    (i < highest)) {
99910175SStuart.Maybee@Sun.COM 			count++;
100010175SStuart.Maybee@Sun.COM 			i++;
100110175SStuart.Maybee@Sun.COM 		}
100210175SStuart.Maybee@Sun.COM 		if (count > navail)
100310175SStuart.Maybee@Sun.COM 			navail = count;
100410175SStuart.Maybee@Sun.COM 	}
100510175SStuart.Maybee@Sun.COM 	return (navail);
100610175SStuart.Maybee@Sun.COM }
100710175SStuart.Maybee@Sun.COM 
100810175SStuart.Maybee@Sun.COM static physdev_manage_pci_t *managed_devlist;
100910175SStuart.Maybee@Sun.COM static int mdev_cnt;
101010175SStuart.Maybee@Sun.COM static int mdev_size = 128;
101110175SStuart.Maybee@Sun.COM static uchar_t	msi_vector_to_pirq[APIC_MAX_VECTOR+1];
101210175SStuart.Maybee@Sun.COM 
101310175SStuart.Maybee@Sun.COM /*
101410175SStuart.Maybee@Sun.COM  * Add devfn on given bus to devices managed by hypervisor
10155084Sjohnlev  */
10165084Sjohnlev static int
xen_manage_device(uint8_t bus,uint8_t devfn)101710175SStuart.Maybee@Sun.COM xen_manage_device(uint8_t bus, uint8_t devfn)
101810175SStuart.Maybee@Sun.COM {
101910175SStuart.Maybee@Sun.COM 	physdev_manage_pci_t manage_pci, *newlist;
102010175SStuart.Maybee@Sun.COM 	int rc, i, oldsize;
102110175SStuart.Maybee@Sun.COM 
102210175SStuart.Maybee@Sun.COM 	/*
102310175SStuart.Maybee@Sun.COM 	 * Check if bus/devfn already managed.  If so just return success.
102410175SStuart.Maybee@Sun.COM 	 */
102510175SStuart.Maybee@Sun.COM 	if (managed_devlist == NULL) {
102610175SStuart.Maybee@Sun.COM 		managed_devlist = kmem_alloc(sizeof (physdev_manage_pci_t) *
102710175SStuart.Maybee@Sun.COM 		    mdev_size, KM_NOSLEEP);
102810175SStuart.Maybee@Sun.COM 		if (managed_devlist == NULL) {
102910175SStuart.Maybee@Sun.COM 			cmn_err(CE_WARN,
103010175SStuart.Maybee@Sun.COM 			    "Can't alloc space for managed device list");
103110175SStuart.Maybee@Sun.COM 			return (0);
103210175SStuart.Maybee@Sun.COM 		}
103310175SStuart.Maybee@Sun.COM 	};
103410175SStuart.Maybee@Sun.COM 	for (i = 0; i < mdev_cnt; i++) {
103510175SStuart.Maybee@Sun.COM 		if (managed_devlist[i].bus == bus &&
103610175SStuart.Maybee@Sun.COM 		    managed_devlist[i].devfn == devfn)
103710175SStuart.Maybee@Sun.COM 			return (1); /* device already managed */
103810175SStuart.Maybee@Sun.COM 	}
103910175SStuart.Maybee@Sun.COM 	manage_pci.bus = bus;
104010175SStuart.Maybee@Sun.COM 	manage_pci.devfn = devfn;
104110175SStuart.Maybee@Sun.COM 	rc = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add, &manage_pci);
104210175SStuart.Maybee@Sun.COM 	if (rc < 0) {
104310175SStuart.Maybee@Sun.COM 		cmn_err(CE_WARN,
104410175SStuart.Maybee@Sun.COM 		    "hypervisor add pci device call failed bus:0x%x"
104510175SStuart.Maybee@Sun.COM 		    " devfn:0x%x", bus, devfn);
104610175SStuart.Maybee@Sun.COM 		return (0);
104710175SStuart.Maybee@Sun.COM 	}
104810175SStuart.Maybee@Sun.COM 	/*
104910175SStuart.Maybee@Sun.COM 	 * Add device to the managed device list
105010175SStuart.Maybee@Sun.COM 	 */
105110175SStuart.Maybee@Sun.COM 	if (i == mdev_size) {
105210175SStuart.Maybee@Sun.COM 		/*
105310175SStuart.Maybee@Sun.COM 		 * grow the managed device list
105410175SStuart.Maybee@Sun.COM 		 */
105510175SStuart.Maybee@Sun.COM 		oldsize = mdev_size * sizeof (physdev_manage_pci_t);
105610175SStuart.Maybee@Sun.COM 		mdev_size *= 2;
105710175SStuart.Maybee@Sun.COM 		newlist = kmem_alloc(sizeof (physdev_manage_pci_t) * mdev_size,
105810175SStuart.Maybee@Sun.COM 		    KM_NOSLEEP);
105910175SStuart.Maybee@Sun.COM 		if (newlist == NULL) {
106010175SStuart.Maybee@Sun.COM 			cmn_err(CE_WARN, "Can't grow managed device list");
106110175SStuart.Maybee@Sun.COM 			return (0);
106210175SStuart.Maybee@Sun.COM 		}
106310175SStuart.Maybee@Sun.COM 		bcopy(managed_devlist, newlist, oldsize);
106410175SStuart.Maybee@Sun.COM 		kmem_free(managed_devlist, oldsize);
106510175SStuart.Maybee@Sun.COM 		managed_devlist = newlist;
106610175SStuart.Maybee@Sun.COM 	}
106710175SStuart.Maybee@Sun.COM 	managed_devlist[i].bus = bus;
106810175SStuart.Maybee@Sun.COM 	managed_devlist[i].devfn = devfn;
106910175SStuart.Maybee@Sun.COM 	mdev_cnt++;
107010175SStuart.Maybee@Sun.COM 	return (1);
107110175SStuart.Maybee@Sun.COM }
107210175SStuart.Maybee@Sun.COM 
107310175SStuart.Maybee@Sun.COM /*
107410175SStuart.Maybee@Sun.COM  * allocate an apic irq struct for an MSI interrupt
107510175SStuart.Maybee@Sun.COM  */
107610175SStuart.Maybee@Sun.COM static int
msi_allocate_irq(int irq)107710175SStuart.Maybee@Sun.COM msi_allocate_irq(int irq)
107810175SStuart.Maybee@Sun.COM {
107910175SStuart.Maybee@Sun.COM 	apic_irq_t *irqptr = apic_irq_table[irq];
108010175SStuart.Maybee@Sun.COM 
108110175SStuart.Maybee@Sun.COM 	if (irqptr == NULL) {
108210175SStuart.Maybee@Sun.COM 		irqptr = kmem_zalloc(sizeof (apic_irq_t), KM_NOSLEEP);
108310175SStuart.Maybee@Sun.COM 		if (irqptr == NULL) {
108410175SStuart.Maybee@Sun.COM 			cmn_err(CE_WARN, "xpv_psm: NO memory to allocate IRQ");
108510175SStuart.Maybee@Sun.COM 			return (-1);
108610175SStuart.Maybee@Sun.COM 		}
108710175SStuart.Maybee@Sun.COM 		apic_irq_table[irq] = irqptr;
108810175SStuart.Maybee@Sun.COM 	} else {
108910175SStuart.Maybee@Sun.COM 		if (irq == APIC_RESV_IRQ && irqptr->airq_mps_intr_index == 0)
109010175SStuart.Maybee@Sun.COM 			irqptr->airq_mps_intr_index = FREE_INDEX;
109110175SStuart.Maybee@Sun.COM 		if (irqptr->airq_mps_intr_index != FREE_INDEX) {
109210175SStuart.Maybee@Sun.COM 			cmn_err(CE_WARN, "xpv_psm: MSI IRQ already in use");
109310175SStuart.Maybee@Sun.COM 			return (-1);
109410175SStuart.Maybee@Sun.COM 		}
109510175SStuart.Maybee@Sun.COM 	}
109610175SStuart.Maybee@Sun.COM 	irqptr->airq_mps_intr_index = FREE_INDEX;
109710175SStuart.Maybee@Sun.COM 	return (irq);
109810175SStuart.Maybee@Sun.COM }
109910175SStuart.Maybee@Sun.COM 
110010175SStuart.Maybee@Sun.COM /*
110110175SStuart.Maybee@Sun.COM  * read MSI/MSIX vector out of config space
110210175SStuart.Maybee@Sun.COM  */
110310175SStuart.Maybee@Sun.COM static uchar_t
xpv_psm_get_msi_vector(dev_info_t * dip,int type,int entry)110410175SStuart.Maybee@Sun.COM xpv_psm_get_msi_vector(dev_info_t *dip, int type, int entry)
110510175SStuart.Maybee@Sun.COM {
110610175SStuart.Maybee@Sun.COM 	uint64_t		msi_data = 0;
110710175SStuart.Maybee@Sun.COM 	int			cap_ptr = i_ddi_get_msi_msix_cap_ptr(dip);
110810175SStuart.Maybee@Sun.COM 	ddi_acc_handle_t	handle = i_ddi_get_pci_config_handle(dip);
110910175SStuart.Maybee@Sun.COM 	ushort_t		msi_ctrl;
111010175SStuart.Maybee@Sun.COM 	uchar_t			vector;
111110175SStuart.Maybee@Sun.COM 
111210175SStuart.Maybee@Sun.COM 	ASSERT((handle != NULL) && (cap_ptr != 0));
111310175SStuart.Maybee@Sun.COM 	if (type == DDI_INTR_TYPE_MSI) {
111410175SStuart.Maybee@Sun.COM 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
111510175SStuart.Maybee@Sun.COM 		/*
111610175SStuart.Maybee@Sun.COM 		 * Get vector
111710175SStuart.Maybee@Sun.COM 		 */
111810175SStuart.Maybee@Sun.COM 		if (msi_ctrl &  PCI_MSI_64BIT_MASK) {
111910175SStuart.Maybee@Sun.COM 			msi_data = pci_config_get16(handle,
112010175SStuart.Maybee@Sun.COM 			    cap_ptr + PCI_MSI_64BIT_DATA);
112110175SStuart.Maybee@Sun.COM 		} else {
112210175SStuart.Maybee@Sun.COM 			msi_data = pci_config_get16(handle,
112310175SStuart.Maybee@Sun.COM 			    cap_ptr + PCI_MSI_32BIT_DATA);
112410175SStuart.Maybee@Sun.COM 		}
112511188SFrank.Vanderlinden@Sun.COM 		vector = (msi_data & 0xff) + entry;
112610175SStuart.Maybee@Sun.COM 	} else if (type == DDI_INTR_TYPE_MSIX) {
112710175SStuart.Maybee@Sun.COM 		uintptr_t	off;
112810175SStuart.Maybee@Sun.COM 		ddi_intr_msix_t	*msix_p = i_ddi_get_msix(dip);
112910175SStuart.Maybee@Sun.COM 
113010175SStuart.Maybee@Sun.COM 		/* Offset into the given entry in the MSI-X table */
113110175SStuart.Maybee@Sun.COM 		off = (uintptr_t)msix_p->msix_tbl_addr +
113210175SStuart.Maybee@Sun.COM 		    (entry  * PCI_MSIX_VECTOR_SIZE);
113310175SStuart.Maybee@Sun.COM 
113410175SStuart.Maybee@Sun.COM 		msi_data = ddi_get32(msix_p->msix_tbl_hdl,
113510175SStuart.Maybee@Sun.COM 		    (uint32_t *)(off + PCI_MSIX_DATA_OFFSET));
113611188SFrank.Vanderlinden@Sun.COM 		vector = msi_data & 0xff;
113710175SStuart.Maybee@Sun.COM 	}
113810175SStuart.Maybee@Sun.COM 	return (vector);
113910175SStuart.Maybee@Sun.COM }
114010175SStuart.Maybee@Sun.COM 
114110175SStuart.Maybee@Sun.COM 
114210175SStuart.Maybee@Sun.COM static void
get_busdevfn(dev_info_t * dip,int * busp,int * devfnp)114310175SStuart.Maybee@Sun.COM get_busdevfn(dev_info_t *dip, int *busp, int *devfnp)
114410175SStuart.Maybee@Sun.COM {
114510175SStuart.Maybee@Sun.COM 	pci_regspec_t *regspec;
114610175SStuart.Maybee@Sun.COM 	int reglen;
114710175SStuart.Maybee@Sun.COM 
114810175SStuart.Maybee@Sun.COM 	/*
114910175SStuart.Maybee@Sun.COM 	 * Get device reg spec, first word has PCI bus and
115010175SStuart.Maybee@Sun.COM 	 * device/function info we need.
115110175SStuart.Maybee@Sun.COM 	 */
115210175SStuart.Maybee@Sun.COM 	if (ddi_getlongprop(DDI_DEV_T_NONE, dip, DDI_PROP_DONTPASS, "reg",
115310175SStuart.Maybee@Sun.COM 	    (caddr_t)&regspec, &reglen) != DDI_SUCCESS) {
115410175SStuart.Maybee@Sun.COM 		cmn_err(CE_WARN,
115510175SStuart.Maybee@Sun.COM 		    "get_busdevfn() failed to get regspec.");
115610175SStuart.Maybee@Sun.COM 		return;
115710175SStuart.Maybee@Sun.COM 	}
115810175SStuart.Maybee@Sun.COM 	/*
115910175SStuart.Maybee@Sun.COM 	 * get PCI bus # from reg spec for device
116010175SStuart.Maybee@Sun.COM 	 */
116110175SStuart.Maybee@Sun.COM 	*busp = PCI_REG_BUS_G(regspec[0].pci_phys_hi);
116210175SStuart.Maybee@Sun.COM 	/*
116310175SStuart.Maybee@Sun.COM 	 * get combined device/function from reg spec for device.
116410175SStuart.Maybee@Sun.COM 	 */
116510175SStuart.Maybee@Sun.COM 	*devfnp = (regspec[0].pci_phys_hi & (PCI_REG_FUNC_M | PCI_REG_DEV_M)) >>
116610175SStuart.Maybee@Sun.COM 	    PCI_REG_FUNC_SHIFT;
116710175SStuart.Maybee@Sun.COM 
116810175SStuart.Maybee@Sun.COM 	kmem_free(regspec, reglen);
116910175SStuart.Maybee@Sun.COM }
117010175SStuart.Maybee@Sun.COM 
117110175SStuart.Maybee@Sun.COM /*
117210175SStuart.Maybee@Sun.COM  * This function allocates "count" MSI vector(s) for the given "dip/pri/type"
117310175SStuart.Maybee@Sun.COM  */
117410175SStuart.Maybee@Sun.COM int
apic_alloc_msi_vectors(dev_info_t * dip,int inum,int count,int pri,int behavior)117510175SStuart.Maybee@Sun.COM apic_alloc_msi_vectors(dev_info_t *dip, int inum, int count, int pri,
11765084Sjohnlev     int behavior)
11775084Sjohnlev {
117810175SStuart.Maybee@Sun.COM 	int	rcount, i, rc, irqno;
11795084Sjohnlev 	uchar_t	vector, cpu;
11805084Sjohnlev 	major_t	major;
11815084Sjohnlev 	apic_irq_t	*irqptr;
118210175SStuart.Maybee@Sun.COM 	physdev_map_pirq_t map_irq;
118310175SStuart.Maybee@Sun.COM 	int busnum, devfn;
11845084Sjohnlev 
118510175SStuart.Maybee@Sun.COM 	DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msi_vectors: dip=0x%p "
11865084Sjohnlev 	    "inum=0x%x  pri=0x%x count=0x%x behavior=%d\n",
118710175SStuart.Maybee@Sun.COM 	    (void *)dip, inum, pri, count, behavior));
11885084Sjohnlev 
11895084Sjohnlev 	if (count > 1) {
11905084Sjohnlev 		if (behavior == DDI_INTR_ALLOC_STRICT &&
11918925SEvan.Yan@Sun.COM 		    apic_multi_msi_enable == 0)
11925084Sjohnlev 			return (0);
11935084Sjohnlev 		if (apic_multi_msi_enable == 0)
11945084Sjohnlev 			count = 1;
11955084Sjohnlev 	}
11965084Sjohnlev 
119710175SStuart.Maybee@Sun.COM 	if ((rcount = apic_navail_vector(dip, pri)) > count)
119810175SStuart.Maybee@Sun.COM 		rcount = count;
119910175SStuart.Maybee@Sun.COM 	else if (rcount == 0 || (rcount < count &&
120010175SStuart.Maybee@Sun.COM 	    behavior == DDI_INTR_ALLOC_STRICT))
120110175SStuart.Maybee@Sun.COM 		return (0);
120210175SStuart.Maybee@Sun.COM 
120310175SStuart.Maybee@Sun.COM 	/* if not ISP2, then round it down */
120410175SStuart.Maybee@Sun.COM 	if (!ISP2(rcount))
120510175SStuart.Maybee@Sun.COM 		rcount = 1 << (highbit(rcount) - 1);
120610175SStuart.Maybee@Sun.COM 
12075084Sjohnlev 	/*
120810175SStuart.Maybee@Sun.COM 	 * get PCI bus #  and devfn from reg spec for device
12095084Sjohnlev 	 */
121010175SStuart.Maybee@Sun.COM 	get_busdevfn(dip, &busnum, &devfn);
121110175SStuart.Maybee@Sun.COM 
121210175SStuart.Maybee@Sun.COM 	/*
121310175SStuart.Maybee@Sun.COM 	 * Tell xen about this pci device
121410175SStuart.Maybee@Sun.COM 	 */
121510175SStuart.Maybee@Sun.COM 	if (!xen_manage_device(busnum, devfn))
121610175SStuart.Maybee@Sun.COM 		return (0);
12175084Sjohnlev 
12185084Sjohnlev 	mutex_enter(&airq_mutex);
12195084Sjohnlev 
122010175SStuart.Maybee@Sun.COM 	major = (dip != NULL) ? ddi_name_to_major(ddi_get_name(dip)) : 0;
12215084Sjohnlev 	for (i = 0; i < rcount; i++) {
122210175SStuart.Maybee@Sun.COM 		/*
122310175SStuart.Maybee@Sun.COM 		 * use PHYSDEVOP_map_pirq to have xen map MSI to a pirq
122410175SStuart.Maybee@Sun.COM 		 */
122510175SStuart.Maybee@Sun.COM 		map_irq.domid = DOMID_SELF;
122610175SStuart.Maybee@Sun.COM 		map_irq.type = MAP_PIRQ_TYPE_MSI;
122711188SFrank.Vanderlinden@Sun.COM 		map_irq.index = -rcount; /* hypervisor auto allocates vectors */
122810175SStuart.Maybee@Sun.COM 		map_irq.pirq = -1;
122910175SStuart.Maybee@Sun.COM 		map_irq.bus = busnum;
123010175SStuart.Maybee@Sun.COM 		map_irq.devfn = devfn;
123111188SFrank.Vanderlinden@Sun.COM 		map_irq.entry_nr = i;
123210175SStuart.Maybee@Sun.COM 		map_irq.table_base = 0;
123310175SStuart.Maybee@Sun.COM 		rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
123410175SStuart.Maybee@Sun.COM 		irqno = map_irq.pirq;
123510175SStuart.Maybee@Sun.COM 		if (rc < 0) {
12365084Sjohnlev 			mutex_exit(&airq_mutex);
123710175SStuart.Maybee@Sun.COM 			cmn_err(CE_WARN, "map MSI irq failed err: %d", -rc);
123811188SFrank.Vanderlinden@Sun.COM 			return (i);
12395084Sjohnlev 		}
124010175SStuart.Maybee@Sun.COM 		if (irqno < 0) {
124110175SStuart.Maybee@Sun.COM 			mutex_exit(&airq_mutex);
124210175SStuart.Maybee@Sun.COM 			cmn_err(CE_NOTE,
124310175SStuart.Maybee@Sun.COM 			    "!hypervisor not configured for MSI support");
124410175SStuart.Maybee@Sun.COM 			xen_support_msi = -1;
124510175SStuart.Maybee@Sun.COM 			return (0);
124610175SStuart.Maybee@Sun.COM 		}
124711188SFrank.Vanderlinden@Sun.COM 
124810175SStuart.Maybee@Sun.COM 		/*
124910175SStuart.Maybee@Sun.COM 		 * Find out what vector the hypervisor assigned
125010175SStuart.Maybee@Sun.COM 		 */
125111188SFrank.Vanderlinden@Sun.COM 		vector = xpv_psm_get_msi_vector(dip, DDI_INTR_TYPE_MSI, i);
125211188SFrank.Vanderlinden@Sun.COM 
125311188SFrank.Vanderlinden@Sun.COM 		if (msi_allocate_irq(irqno) < 0) {
125411188SFrank.Vanderlinden@Sun.COM 			mutex_exit(&airq_mutex);
125511188SFrank.Vanderlinden@Sun.COM 			return (i);
125611188SFrank.Vanderlinden@Sun.COM 		}
12575084Sjohnlev 		apic_max_device_irq = max(irqno, apic_max_device_irq);
12585084Sjohnlev 		apic_min_device_irq = min(irqno, apic_min_device_irq);
12595084Sjohnlev 		irqptr = apic_irq_table[irqno];
126010175SStuart.Maybee@Sun.COM 		ASSERT(irqptr != NULL);
12615084Sjohnlev #ifdef	DEBUG
12625084Sjohnlev 		if (apic_vector_to_irq[vector] != APIC_RESV_IRQ)
126310175SStuart.Maybee@Sun.COM 			DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msi_vectors: "
12645084Sjohnlev 			    "apic_vector_to_irq is not APIC_RESV_IRQ\n"));
12655084Sjohnlev #endif
126610175SStuart.Maybee@Sun.COM 		apic_vector_to_irq[vector] = (uchar_t)irqno;
126710175SStuart.Maybee@Sun.COM 		msi_vector_to_pirq[vector] = (uchar_t)irqno;
12685084Sjohnlev 
12695084Sjohnlev 		irqptr->airq_vector = vector;
12705084Sjohnlev 		irqptr->airq_ioapicindex = (uchar_t)inum;	/* start */
12715084Sjohnlev 		irqptr->airq_intin_no = (uchar_t)rcount;
12725084Sjohnlev 		irqptr->airq_ipl = pri;
12735084Sjohnlev 		irqptr->airq_origirq = (uchar_t)(inum + i);
12745084Sjohnlev 		irqptr->airq_share_id = 0;
12755084Sjohnlev 		irqptr->airq_mps_intr_index = MSI_INDEX;
12765084Sjohnlev 		irqptr->airq_dip = dip;
12775084Sjohnlev 		irqptr->airq_major = major;
127810175SStuart.Maybee@Sun.COM 		if (i == 0) /* they all bind to the same cpu */
127910175SStuart.Maybee@Sun.COM 			cpu = irqptr->airq_cpu = xen_psm_bind_intr(irqno);
12805084Sjohnlev 		else
12815084Sjohnlev 			irqptr->airq_cpu = cpu;
128210175SStuart.Maybee@Sun.COM 		DDI_INTR_IMPLDBG((CE_CONT, "apic_alloc_msi_vectors: irq=0x%x "
12835084Sjohnlev 		    "dip=0x%p vector=0x%x origirq=0x%x pri=0x%x\n", irqno,
12845084Sjohnlev 		    (void *)irqptr->airq_dip, irqptr->airq_vector,
12855084Sjohnlev 		    irqptr->airq_origirq, pri));
12865084Sjohnlev 	}
12875084Sjohnlev 	mutex_exit(&airq_mutex);
12885084Sjohnlev 	return (rcount);
12895084Sjohnlev }
12905084Sjohnlev 
12915084Sjohnlev /*
129210175SStuart.Maybee@Sun.COM  * This function allocates "count" MSI-X vector(s) for the given "dip/pri/type"
129310175SStuart.Maybee@Sun.COM  */
129410175SStuart.Maybee@Sun.COM int
apic_alloc_msix_vectors(dev_info_t * dip,int inum,int count,int pri,int behavior)129510175SStuart.Maybee@Sun.COM apic_alloc_msix_vectors(dev_info_t *dip, int inum, int count, int pri,
129610175SStuart.Maybee@Sun.COM     int behavior)
129710175SStuart.Maybee@Sun.COM {
129810175SStuart.Maybee@Sun.COM 	int	rcount, i, rc;
129910175SStuart.Maybee@Sun.COM 	major_t	major;
130010175SStuart.Maybee@Sun.COM 	physdev_map_pirq_t map_irq;
130110175SStuart.Maybee@Sun.COM 	int busnum, devfn;
130210175SStuart.Maybee@Sun.COM 	ddi_intr_msix_t *msix_p = i_ddi_get_msix(dip);
130310175SStuart.Maybee@Sun.COM 	uint64_t table_base;
130410175SStuart.Maybee@Sun.COM 	pfn_t pfnum;
130510175SStuart.Maybee@Sun.COM 
130610175SStuart.Maybee@Sun.COM 	if (msix_p == NULL) {
130710175SStuart.Maybee@Sun.COM 		msix_p = pci_msix_init(dip);
130810175SStuart.Maybee@Sun.COM 		if (msix_p != NULL) {
130910175SStuart.Maybee@Sun.COM 			i_ddi_set_msix(dip, msix_p);
131010175SStuart.Maybee@Sun.COM 		} else {
131110175SStuart.Maybee@Sun.COM 			cmn_err(CE_WARN, "apic_alloc_msix_vectors()"
131210175SStuart.Maybee@Sun.COM 			    " msix_init failed");
131310175SStuart.Maybee@Sun.COM 			return (0);
131410175SStuart.Maybee@Sun.COM 		}
131510175SStuart.Maybee@Sun.COM 	}
131610175SStuart.Maybee@Sun.COM 	/*
131710323SStuart.Maybee@Sun.COM 	 * Hypervisor wants PCI config space address of msix table base
131810175SStuart.Maybee@Sun.COM 	 */
131910175SStuart.Maybee@Sun.COM 	pfnum = hat_getpfnum(kas.a_hat, (caddr_t)msix_p->msix_tbl_addr) &
132010175SStuart.Maybee@Sun.COM 	    ~PFN_IS_FOREIGN_MFN;
132110323SStuart.Maybee@Sun.COM 	table_base = (uint64_t)((pfnum << PAGESHIFT) - msix_p->msix_tbl_offset |
132210175SStuart.Maybee@Sun.COM 	    ((uintptr_t)msix_p->msix_tbl_addr & PAGEOFFSET));
132310175SStuart.Maybee@Sun.COM 	/*
132410175SStuart.Maybee@Sun.COM 	 * get PCI bus #  and devfn from reg spec for device
132510175SStuart.Maybee@Sun.COM 	 */
132610175SStuart.Maybee@Sun.COM 	get_busdevfn(dip, &busnum, &devfn);
132710175SStuart.Maybee@Sun.COM 
132810175SStuart.Maybee@Sun.COM 	/*
132910175SStuart.Maybee@Sun.COM 	 * Tell xen about this pci device
133010175SStuart.Maybee@Sun.COM 	 */
133110175SStuart.Maybee@Sun.COM 	if (!xen_manage_device(busnum, devfn))
133210175SStuart.Maybee@Sun.COM 		return (0);
133310175SStuart.Maybee@Sun.COM 	mutex_enter(&airq_mutex);
133410175SStuart.Maybee@Sun.COM 
133510175SStuart.Maybee@Sun.COM 	if ((rcount = apic_navail_vector(dip, pri)) > count)
133610175SStuart.Maybee@Sun.COM 		rcount = count;
133710175SStuart.Maybee@Sun.COM 	else if (rcount == 0 || (rcount < count &&
133810175SStuart.Maybee@Sun.COM 	    behavior == DDI_INTR_ALLOC_STRICT)) {
133910175SStuart.Maybee@Sun.COM 		rcount = 0;
134010175SStuart.Maybee@Sun.COM 		goto out;
134110175SStuart.Maybee@Sun.COM 	}
134210175SStuart.Maybee@Sun.COM 
134310175SStuart.Maybee@Sun.COM 	major = (dip != NULL) ? ddi_name_to_major(ddi_get_name(dip)) : 0;
134410175SStuart.Maybee@Sun.COM 	for (i = 0; i < rcount; i++) {
134510175SStuart.Maybee@Sun.COM 		int irqno;
134610175SStuart.Maybee@Sun.COM 		uchar_t	vector;
134710175SStuart.Maybee@Sun.COM 		apic_irq_t	*irqptr;
134810175SStuart.Maybee@Sun.COM 
134910175SStuart.Maybee@Sun.COM 		/*
135010175SStuart.Maybee@Sun.COM 		 * use PHYSDEVOP_map_pirq to have xen map MSI-X to a pirq
135110175SStuart.Maybee@Sun.COM 		 */
135210175SStuart.Maybee@Sun.COM 		map_irq.domid = DOMID_SELF;
135310175SStuart.Maybee@Sun.COM 		map_irq.type = MAP_PIRQ_TYPE_MSI;
135410175SStuart.Maybee@Sun.COM 		map_irq.index = -1; /* hypervisor auto allocates vector */
135510175SStuart.Maybee@Sun.COM 		map_irq.pirq = -1;
135610175SStuart.Maybee@Sun.COM 		map_irq.bus = busnum;
135710175SStuart.Maybee@Sun.COM 		map_irq.devfn = devfn;
135810175SStuart.Maybee@Sun.COM 		map_irq.entry_nr = i;
135910175SStuart.Maybee@Sun.COM 		map_irq.table_base = table_base;
136010175SStuart.Maybee@Sun.COM 		rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
136110175SStuart.Maybee@Sun.COM 		irqno = map_irq.pirq;
136210175SStuart.Maybee@Sun.COM 		if (rc < 0) {
136310175SStuart.Maybee@Sun.COM 			mutex_exit(&airq_mutex);
136410175SStuart.Maybee@Sun.COM 			cmn_err(CE_WARN, "map MSI irq failed err: %d", -rc);
136511188SFrank.Vanderlinden@Sun.COM 			return (i);
136610175SStuart.Maybee@Sun.COM 		}
136710175SStuart.Maybee@Sun.COM 		if (irqno < 0) {
136810175SStuart.Maybee@Sun.COM 			mutex_exit(&airq_mutex);
136910175SStuart.Maybee@Sun.COM 			cmn_err(CE_NOTE,
137010175SStuart.Maybee@Sun.COM 			    "!hypervisor not configured for MSI support");
137110175SStuart.Maybee@Sun.COM 			xen_support_msi = -1;
137210175SStuart.Maybee@Sun.COM 			return (0);
137310175SStuart.Maybee@Sun.COM 		}
137410175SStuart.Maybee@Sun.COM 		/*
137510175SStuart.Maybee@Sun.COM 		 * Find out what vector the hypervisor assigned
137610175SStuart.Maybee@Sun.COM 		 */
137710175SStuart.Maybee@Sun.COM 		vector = xpv_psm_get_msi_vector(dip, DDI_INTR_TYPE_MSIX, i);
137811188SFrank.Vanderlinden@Sun.COM 
137910175SStuart.Maybee@Sun.COM 		if (msi_allocate_irq(irqno) < 0) {
138010175SStuart.Maybee@Sun.COM 			mutex_exit(&airq_mutex);
138111188SFrank.Vanderlinden@Sun.COM 			return (i);
138210175SStuart.Maybee@Sun.COM 		}
138310175SStuart.Maybee@Sun.COM 		apic_vector_to_irq[vector] = (uchar_t)irqno;
138410175SStuart.Maybee@Sun.COM 		msi_vector_to_pirq[vector] = (uchar_t)irqno;
138510175SStuart.Maybee@Sun.COM 		apic_max_device_irq = max(irqno, apic_max_device_irq);
138610175SStuart.Maybee@Sun.COM 		apic_min_device_irq = min(irqno, apic_min_device_irq);
138710175SStuart.Maybee@Sun.COM 		irqptr = apic_irq_table[irqno];
138810175SStuart.Maybee@Sun.COM 		ASSERT(irqptr != NULL);
138910175SStuart.Maybee@Sun.COM 		irqptr->airq_vector = (uchar_t)vector;
139010175SStuart.Maybee@Sun.COM 		irqptr->airq_ipl = pri;
139110175SStuart.Maybee@Sun.COM 		irqptr->airq_origirq = (uchar_t)(inum + i);
139210175SStuart.Maybee@Sun.COM 		irqptr->airq_share_id = 0;
139310175SStuart.Maybee@Sun.COM 		irqptr->airq_mps_intr_index = MSIX_INDEX;
139410175SStuart.Maybee@Sun.COM 		irqptr->airq_dip = dip;
139510175SStuart.Maybee@Sun.COM 		irqptr->airq_major = major;
139610175SStuart.Maybee@Sun.COM 		irqptr->airq_cpu = IRQ_UNBOUND; /* will be bound when addspl */
139710175SStuart.Maybee@Sun.COM 	}
139810175SStuart.Maybee@Sun.COM out:
139910175SStuart.Maybee@Sun.COM 	mutex_exit(&airq_mutex);
140010175SStuart.Maybee@Sun.COM 	return (rcount);
140110175SStuart.Maybee@Sun.COM }
140210175SStuart.Maybee@Sun.COM 
140310175SStuart.Maybee@Sun.COM 
140410175SStuart.Maybee@Sun.COM /*
140510175SStuart.Maybee@Sun.COM  * This finds the apic_irq_t associated with the dip, ispec and type.
140610175SStuart.Maybee@Sun.COM  * The entry should have already been freed, but it can not have been
140710175SStuart.Maybee@Sun.COM  * reused yet since the hypervisor can not have reassigned the pirq since
140810175SStuart.Maybee@Sun.COM  * we have not freed that yet.
140910175SStuart.Maybee@Sun.COM  */
141010175SStuart.Maybee@Sun.COM static apic_irq_t *
msi_find_irq(dev_info_t * dip,struct intrspec * ispec)141110175SStuart.Maybee@Sun.COM msi_find_irq(dev_info_t *dip, struct intrspec *ispec)
141210175SStuart.Maybee@Sun.COM {
141310175SStuart.Maybee@Sun.COM 	apic_irq_t	*irqp;
141410175SStuart.Maybee@Sun.COM 	int i;
141510175SStuart.Maybee@Sun.COM 
141610175SStuart.Maybee@Sun.COM 	for (i = apic_min_device_irq; i <= apic_max_device_irq; i++) {
141710175SStuart.Maybee@Sun.COM 		if ((irqp = apic_irq_table[i]) == NULL)
141810175SStuart.Maybee@Sun.COM 			continue;
141910175SStuart.Maybee@Sun.COM 		if ((irqp->airq_dip == dip) &&
142010175SStuart.Maybee@Sun.COM 		    (irqp->airq_origirq == ispec->intrspec_vec) &&
142110175SStuart.Maybee@Sun.COM 		    (irqp->airq_ipl == ispec->intrspec_pri)) {
142210175SStuart.Maybee@Sun.COM 			return (irqp);
142310175SStuart.Maybee@Sun.COM 		}
142410175SStuart.Maybee@Sun.COM 	}
142510175SStuart.Maybee@Sun.COM 	return (NULL);
142610175SStuart.Maybee@Sun.COM }
142710175SStuart.Maybee@Sun.COM 
142810175SStuart.Maybee@Sun.COM void
apic_free_vectors(dev_info_t * dip,int inum,int count,int pri,int type)142910175SStuart.Maybee@Sun.COM apic_free_vectors(dev_info_t *dip, int inum, int count, int pri, int type)
143010175SStuart.Maybee@Sun.COM {
143110175SStuart.Maybee@Sun.COM 	int i, rc;
143210175SStuart.Maybee@Sun.COM 	physdev_unmap_pirq_t unmap_pirq;
143310175SStuart.Maybee@Sun.COM 	apic_irq_t *irqptr;
143410175SStuart.Maybee@Sun.COM 	struct intrspec ispec;
143510175SStuart.Maybee@Sun.COM 
143610175SStuart.Maybee@Sun.COM 	DDI_INTR_IMPLDBG((CE_CONT, "apic_free_vectors: dip: %p inum: %x "
143710175SStuart.Maybee@Sun.COM 	    "count: %x pri: %x type: %x\n",
143810175SStuart.Maybee@Sun.COM 	    (void *)dip, inum, count, pri, type));
143910175SStuart.Maybee@Sun.COM 
144010175SStuart.Maybee@Sun.COM 	/* for MSI/X only */
144110175SStuart.Maybee@Sun.COM 	if (!DDI_INTR_IS_MSI_OR_MSIX(type))
144210175SStuart.Maybee@Sun.COM 		return;
144310175SStuart.Maybee@Sun.COM 
144410175SStuart.Maybee@Sun.COM 	for (i = 0; i < count; i++) {
144510175SStuart.Maybee@Sun.COM 		DDI_INTR_IMPLDBG((CE_CONT, "apic_free_vectors: inum=0x%x "
144610175SStuart.Maybee@Sun.COM 		    "pri=0x%x count=0x%x\n", inum, pri, count));
144710175SStuart.Maybee@Sun.COM 		ispec.intrspec_vec = inum + i;
144810175SStuart.Maybee@Sun.COM 		ispec.intrspec_pri = pri;
144910175SStuart.Maybee@Sun.COM 		if ((irqptr = msi_find_irq(dip, &ispec)) == NULL) {
145010175SStuart.Maybee@Sun.COM 			cmn_err(CE_WARN,
145110175SStuart.Maybee@Sun.COM 			    "couldn't find irq %s,%s dip: 0x%p vec: %x pri: %x",
145210175SStuart.Maybee@Sun.COM 			    ddi_get_name(dip), ddi_get_name_addr(dip),
145310175SStuart.Maybee@Sun.COM 			    (void *)dip, inum + i, pri);
145410175SStuart.Maybee@Sun.COM 			continue;
145510175SStuart.Maybee@Sun.COM 		}
145610175SStuart.Maybee@Sun.COM 		/*
145710175SStuart.Maybee@Sun.COM 		 * use PHYSDEVOP_unmap_pirq to have xen unmap MSI from a pirq
145810175SStuart.Maybee@Sun.COM 		 */
145910175SStuart.Maybee@Sun.COM 		unmap_pirq.domid = DOMID_SELF;
146010175SStuart.Maybee@Sun.COM 		unmap_pirq.pirq = msi_vector_to_pirq[irqptr->airq_vector];
146110175SStuart.Maybee@Sun.COM 		rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_pirq);
146210175SStuart.Maybee@Sun.COM 		if (rc < 0) {
146310175SStuart.Maybee@Sun.COM 			cmn_err(CE_WARN, "unmap pirq failed");
146410175SStuart.Maybee@Sun.COM 			return;
146510175SStuart.Maybee@Sun.COM 		}
146610175SStuart.Maybee@Sun.COM 		irqptr->airq_mps_intr_index = FREE_INDEX;
146710175SStuart.Maybee@Sun.COM 		apic_vector_to_irq[irqptr->airq_vector] = APIC_RESV_IRQ;
146810175SStuart.Maybee@Sun.COM 	}
146910175SStuart.Maybee@Sun.COM }
147010175SStuart.Maybee@Sun.COM 
147110175SStuart.Maybee@Sun.COM /*
14725084Sjohnlev  * The hypervisor doesn't permit access to local apics directly
14735084Sjohnlev  */
14745084Sjohnlev /* ARGSUSED */
14755084Sjohnlev uint32_t *
mapin_apic(uint32_t addr,size_t len,int flags)14765084Sjohnlev mapin_apic(uint32_t addr, size_t len, int flags)
14775084Sjohnlev {
14785084Sjohnlev 	/*
14795084Sjohnlev 	 * Return a pointer to a memory area to fake out the
14805084Sjohnlev 	 * probe code that wants to read apic registers.
14815084Sjohnlev 	 * The dummy values will end up being ignored by xen
14825084Sjohnlev 	 * later on when they are used anyway.
14835084Sjohnlev 	 */
14845084Sjohnlev 	xen_psm_dummy_apic[APIC_VERS_REG] = APIC_INTEGRATED_VERS;
14855084Sjohnlev 	return (xen_psm_dummy_apic);
14865084Sjohnlev }
14875084Sjohnlev 
14885084Sjohnlev /* ARGSUSED */
14895084Sjohnlev uint32_t *
mapin_ioapic(uint32_t addr,size_t len,int flags)14905084Sjohnlev mapin_ioapic(uint32_t addr, size_t len, int flags)
14915084Sjohnlev {
14925084Sjohnlev 	/*
14935084Sjohnlev 	 * Return non-null here to fake out configure code that calls this.
14945084Sjohnlev 	 * The i86xpv platform will not reference through the returned value..
14955084Sjohnlev 	 */
14965084Sjohnlev 	return ((uint32_t *)0x1);
14975084Sjohnlev }
14985084Sjohnlev 
14995084Sjohnlev /* ARGSUSED */
15005084Sjohnlev void
mapout_apic(caddr_t addr,size_t len)15015084Sjohnlev mapout_apic(caddr_t addr, size_t len)
15025084Sjohnlev {
15035084Sjohnlev }
15045084Sjohnlev 
15055084Sjohnlev /* ARGSUSED */
15065084Sjohnlev void
mapout_ioapic(caddr_t addr,size_t len)15075084Sjohnlev mapout_ioapic(caddr_t addr, size_t len)
15085084Sjohnlev {
15095084Sjohnlev }
15105084Sjohnlev 
15115084Sjohnlev uint32_t
ioapic_read(int apic_ix,uint32_t reg)15125084Sjohnlev ioapic_read(int apic_ix, uint32_t reg)
15135084Sjohnlev {
15145084Sjohnlev 	physdev_apic_t apic;
15155084Sjohnlev 
15165084Sjohnlev 	apic.apic_physbase = (unsigned long)apic_physaddr[apic_ix];
15175084Sjohnlev 	apic.reg = reg;
15185084Sjohnlev 	if (HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic))
15195084Sjohnlev 		panic("read ioapic %d reg %d failed", apic_ix, reg);
15205084Sjohnlev 	return (apic.value);
15215084Sjohnlev }
15225084Sjohnlev 
15235084Sjohnlev void
ioapic_write(int apic_ix,uint32_t reg,uint32_t value)15245084Sjohnlev ioapic_write(int apic_ix, uint32_t reg, uint32_t value)
15255084Sjohnlev {
15265084Sjohnlev 	physdev_apic_t apic;
15275084Sjohnlev 
15285084Sjohnlev 	apic.apic_physbase = (unsigned long)apic_physaddr[apic_ix];
15295084Sjohnlev 	apic.reg = reg;
15305084Sjohnlev 	apic.value = value;
15315084Sjohnlev 	if (HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic))
15325084Sjohnlev 		panic("write ioapic %d reg %d failed", apic_ix, reg);
15335084Sjohnlev }
15345084Sjohnlev 
15355084Sjohnlev /*
15367282Smishra  * This function was added as part of x2APIC support in pcplusmp.
15377282Smishra  */
15387282Smishra void
ioapic_write_eoi(int apic_ix,uint32_t value)15397282Smishra ioapic_write_eoi(int apic_ix, uint32_t value)
15407282Smishra {
15417282Smishra 	physdev_apic_t apic;
15427282Smishra 
15437282Smishra 	apic.apic_physbase = (unsigned long)apic_physaddr[apic_ix];
15447282Smishra 	apic.reg = APIC_IO_EOI;
15457282Smishra 	apic.value = value;
15467282Smishra 	if (HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic))
15477282Smishra 		panic("write ioapic reg : APIC_IO_EOI %d failed", apic_ix);
15487282Smishra }
15497282Smishra 
15507282Smishra /*
15517282Smishra  * This function was added as part of x2APIC support in pcplusmp to resolve
15527282Smishra  * undefined symbol in xpv_psm.
15537282Smishra  */
15547282Smishra void
x2apic_update_psm()15557282Smishra x2apic_update_psm()
15567282Smishra {
15577282Smishra }
15587282Smishra 
15597282Smishra /*
15607282Smishra  * This function was added as part of x2APIC support in pcplusmp to resolve
15617282Smishra  * undefined symbol in xpv_psm.
15627282Smishra  */
15637282Smishra void
apic_ret()15647282Smishra apic_ret()
15657282Smishra {
15667282Smishra }
15677282Smishra 
15687282Smishra /*
15695084Sjohnlev  * Call rebind to do the actual programming.
15705084Sjohnlev  */
15715084Sjohnlev int
apic_setup_io_intr(void * p,int irq,boolean_t deferred)15725084Sjohnlev apic_setup_io_intr(void *p, int irq, boolean_t deferred)
15735084Sjohnlev {
15745084Sjohnlev 	apic_irq_t *irqptr;
15755084Sjohnlev 	struct ioapic_reprogram_data *drep = NULL;
15765084Sjohnlev 	int rv, cpu;
15775084Sjohnlev 	cpuset_t cpus;
15785084Sjohnlev 
15795084Sjohnlev 	if (deferred) {
15805084Sjohnlev 		drep = (struct ioapic_reprogram_data *)p;
15815084Sjohnlev 		ASSERT(drep != NULL);
15825084Sjohnlev 		irqptr = drep->irqp;
15835084Sjohnlev 	} else {
15845084Sjohnlev 		irqptr = (apic_irq_t *)p;
15855084Sjohnlev 	}
15865084Sjohnlev 	ASSERT(irqptr != NULL);
158710175SStuart.Maybee@Sun.COM 	/*
158810175SStuart.Maybee@Sun.COM 	 * Set cpu based on xen idea of online cpu's not apic tables.
158910175SStuart.Maybee@Sun.COM 	 * Note that xen ignores/sets to it's own preferred value the
159010175SStuart.Maybee@Sun.COM 	 * target cpu field when programming ioapic anyway.
159110175SStuart.Maybee@Sun.COM 	 */
159210175SStuart.Maybee@Sun.COM 	if (irqptr->airq_mps_intr_index == MSI_INDEX)
159310175SStuart.Maybee@Sun.COM 		cpu = irqptr->airq_cpu; /* MSI cpus are already set */
159410175SStuart.Maybee@Sun.COM 	else {
159510175SStuart.Maybee@Sun.COM 		cpu = xen_psm_bind_intr(irq);
159610175SStuart.Maybee@Sun.COM 		irqptr->airq_cpu = cpu;
159710175SStuart.Maybee@Sun.COM 	}
159810175SStuart.Maybee@Sun.COM 	if (cpu == IRQ_UNBOUND) {
159910175SStuart.Maybee@Sun.COM 		CPUSET_ZERO(cpus);
160010175SStuart.Maybee@Sun.COM 		CPUSET_OR(cpus, xen_psm_cpus_online);
160110175SStuart.Maybee@Sun.COM 	} else {
160210175SStuart.Maybee@Sun.COM 		CPUSET_ONLY(cpus, cpu & ~IRQ_USER_BOUND);
160310175SStuart.Maybee@Sun.COM 	}
16045084Sjohnlev 	rv = apic_rebind(irqptr, cpu, drep);
16055084Sjohnlev 	if (rv) {
16065084Sjohnlev 		/* CPU is not up or interrupt is disabled. Fall back to 0 */
16075084Sjohnlev 		cpu = 0;
160810175SStuart.Maybee@Sun.COM 		irqptr->airq_cpu = cpu;
16095084Sjohnlev 		rv = apic_rebind(irqptr, cpu, drep);
16105084Sjohnlev 	}
16115084Sjohnlev 	/*
16125084Sjohnlev 	 * If rebind successful bind the irq to an event channel
16135084Sjohnlev 	 */
16145529Ssmaybe 	if (rv == 0) {
16155529Ssmaybe 		ec_setup_pirq(irq, irqptr->airq_ipl, &cpus);
16165529Ssmaybe 		CPUSET_FIND(cpus, cpu);
16175529Ssmaybe 		apic_irq_table[irq]->airq_temp_cpu = cpu & ~IRQ_USER_BOUND;
16185529Ssmaybe 	}
16195084Sjohnlev 	return (rv);
16205084Sjohnlev }
16215084Sjohnlev 
16225084Sjohnlev /*
16235084Sjohnlev  * Allocate a new vector for the given irq
16245084Sjohnlev  */
16255084Sjohnlev /* ARGSUSED */
16265084Sjohnlev uchar_t
apic_modify_vector(uchar_t vector,int irq)16275084Sjohnlev apic_modify_vector(uchar_t vector, int irq)
16285084Sjohnlev {
16295084Sjohnlev 	return (apic_allocate_vector(0, irq, 0));
16305084Sjohnlev }
16315084Sjohnlev 
16325084Sjohnlev /*
16335084Sjohnlev  * The rest of the file is just generic psm module boilerplate
16345084Sjohnlev  */
16355084Sjohnlev 
16365084Sjohnlev static struct psm_ops xen_psm_ops = {
16375084Sjohnlev 	xen_psm_probe,				/* psm_probe		*/
16385084Sjohnlev 
16395084Sjohnlev 	xen_psm_softinit,			/* psm_init		*/
16405084Sjohnlev 	xen_psm_picinit,			/* psm_picinit		*/
16415084Sjohnlev 	xen_psm_intr_enter,			/* psm_intr_enter	*/
16425084Sjohnlev 	xen_psm_intr_exit,			/* psm_intr_exit	*/
16435084Sjohnlev 	xen_psm_setspl,				/* psm_setspl		*/
16445084Sjohnlev 	xen_psm_addspl,				/* psm_addspl		*/
16455084Sjohnlev 	xen_psm_delspl,				/* psm_delspl		*/
16465084Sjohnlev 	xen_psm_disable_intr,			/* psm_disable_intr	*/
16475084Sjohnlev 	xen_psm_enable_intr,			/* psm_enable_intr	*/
16485084Sjohnlev 	(int (*)(int))NULL,			/* psm_softlvl_to_irq	*/
16495084Sjohnlev 	(void (*)(int))NULL,			/* psm_set_softintr	*/
16505084Sjohnlev 	(void (*)(processorid_t))NULL,		/* psm_set_idlecpu	*/
16515084Sjohnlev 	(void (*)(processorid_t))NULL,		/* psm_unset_idlecpu	*/
16525084Sjohnlev 
16535084Sjohnlev 	xen_psm_clkinit,			/* psm_clkinit		*/
16545084Sjohnlev 	xen_psm_get_clockirq,			/* psm_get_clockirq	*/
16555084Sjohnlev 	xen_psm_hrtimeinit,			/* psm_hrtimeinit	*/
16565084Sjohnlev 	xpv_gethrtime,				/* psm_gethrtime	*/
16575084Sjohnlev 
16585084Sjohnlev 	xen_psm_get_next_processorid,		/* psm_get_next_processorid */
16595084Sjohnlev 	xen_psm_cpu_start,			/* psm_cpu_start	*/
16605084Sjohnlev 	xen_psm_post_cpu_start,			/* psm_post_cpu_start	*/
16615084Sjohnlev 	xen_psm_shutdown,			/* psm_shutdown		*/
16625084Sjohnlev 	xen_psm_get_ipivect,			/* psm_get_ipivect	*/
16635084Sjohnlev 	xen_psm_send_ipi,			/* psm_send_ipi		*/
16645084Sjohnlev 
16655084Sjohnlev 	xen_psm_translate_irq,			/* psm_translate_irq	*/
16665084Sjohnlev 
16675084Sjohnlev 	(void (*)(int, char *))NULL,		/* psm_notify_error	*/
16685084Sjohnlev 	(void (*)(int msg))NULL,		/* psm_notify_func	*/
16695084Sjohnlev 	xen_psm_timer_reprogram,		/* psm_timer_reprogram	*/
16705084Sjohnlev 	xen_psm_timer_enable,			/* psm_timer_enable	*/
16715084Sjohnlev 	xen_psm_timer_disable,			/* psm_timer_disable	*/
16725084Sjohnlev 	(void (*)(void *arg))NULL,		/* psm_post_cyclic_setup */
16735084Sjohnlev 	(void (*)(int, int))NULL,		/* psm_preshutdown	*/
16747767SJohn.Beck@Sun.COM 	xen_intr_ops,			/* Advanced DDI Interrupt framework */
1675*12004Sjiang.liu@intel.com 	(int (*)(psm_state_request_t *))NULL,	/* psm_state		*/
1676*12004Sjiang.liu@intel.com 	(int (*)(psm_cpu_request_t *))NULL	/* psm_cpu_ops		*/
16775084Sjohnlev };
16785084Sjohnlev 
16795084Sjohnlev static struct psm_info xen_psm_info = {
16805084Sjohnlev 	PSM_INFO_VER01_5,	/* version				*/
16816356Smrj 	PSM_OWN_EXCLUSIVE,	/* ownership				*/
16825084Sjohnlev 	&xen_psm_ops,		/* operation				*/
16835529Ssmaybe 	"xVM_psm",		/* machine name				*/
16847542SRichard.Bean@Sun.COM 	"platform module"	/* machine descriptions			*/
16855084Sjohnlev };
16865084Sjohnlev 
16875084Sjohnlev static void *xen_psm_hdlp;
16885084Sjohnlev 
16895084Sjohnlev int
_init(void)16905084Sjohnlev _init(void)
16915084Sjohnlev {
16925084Sjohnlev 	return (psm_mod_init(&xen_psm_hdlp, &xen_psm_info));
16935084Sjohnlev }
16945084Sjohnlev 
16955084Sjohnlev int
_fini(void)16965084Sjohnlev _fini(void)
16975084Sjohnlev {
16985084Sjohnlev 	return (psm_mod_fini(&xen_psm_hdlp, &xen_psm_info));
16995084Sjohnlev }
17005084Sjohnlev 
17015084Sjohnlev int
_info(struct modinfo * modinfop)17025084Sjohnlev _info(struct modinfo *modinfop)
17035084Sjohnlev {
17045084Sjohnlev 	return (psm_mod_info(&xen_psm_hdlp, &xen_psm_info, modinfop));
17055084Sjohnlev }
1706