xref: /onnv-gate/usr/src/uts/sun4/os/intr.c (revision 9039:94951b21f634)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
51455Sandrei  * Common Development and Distribution License (the "License").
61455Sandrei  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
22*9039SMadhavan.Venkataraman@Sun.COM  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
230Sstevel@tonic-gate  * Use is subject to license terms.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate 
260Sstevel@tonic-gate #include <sys/sysmacros.h>
270Sstevel@tonic-gate #include <sys/stack.h>
280Sstevel@tonic-gate #include <sys/cpuvar.h>
290Sstevel@tonic-gate #include <sys/ivintr.h>
300Sstevel@tonic-gate #include <sys/intreg.h>
310Sstevel@tonic-gate #include <sys/membar.h>
320Sstevel@tonic-gate #include <sys/kmem.h>
330Sstevel@tonic-gate #include <sys/intr.h>
345107Seota #include <sys/sunddi.h>
350Sstevel@tonic-gate #include <sys/sunndi.h>
360Sstevel@tonic-gate #include <sys/cmn_err.h>
370Sstevel@tonic-gate #include <sys/privregs.h>
380Sstevel@tonic-gate #include <sys/systm.h>
390Sstevel@tonic-gate #include <sys/archsystm.h>
400Sstevel@tonic-gate #include <sys/machsystm.h>
410Sstevel@tonic-gate #include <sys/x_call.h>
420Sstevel@tonic-gate #include <vm/seg_kp.h>
430Sstevel@tonic-gate #include <sys/debug.h>
440Sstevel@tonic-gate #include <sys/cyclic.h>
454652Scwb #include <sys/kdi_impl.h>
465107Seota #include <sys/ddi_timer.h>
470Sstevel@tonic-gate 
480Sstevel@tonic-gate #include <sys/cpu_sgnblk_defs.h>
490Sstevel@tonic-gate 
500Sstevel@tonic-gate /* Global locks which protect the interrupt distribution lists */
510Sstevel@tonic-gate static kmutex_t intr_dist_lock;
520Sstevel@tonic-gate static kmutex_t intr_dist_cpu_lock;
530Sstevel@tonic-gate 
540Sstevel@tonic-gate /* Head of the interrupt distribution lists */
550Sstevel@tonic-gate static struct intr_dist *intr_dist_head = NULL;
560Sstevel@tonic-gate static struct intr_dist *intr_dist_whead = NULL;
570Sstevel@tonic-gate 
585107Seota static uint64_t siron_inum[DDI_IPL_10]; /* software interrupt numbers */
595076Smishra uint64_t *siron_cpu_inum = NULL;
605076Smishra uint64_t siron_poke_cpu_inum;
615076Smishra static int siron_cpu_setup(cpu_setup_t, int, void *);
625076Smishra extern uint_t softlevel1();
635076Smishra 
645107Seota static uint64_t siron1_inum; /* backward compatibility */
652973Sgovinda uint64_t poke_cpu_inum;
662973Sgovinda uint_t poke_cpu_intr(caddr_t arg1, caddr_t arg2);
675076Smishra uint_t siron_poke_cpu_intr(caddr_t arg1, caddr_t arg2);
682973Sgovinda 
69522Ssudheer /*
707459SChristopher.Baumbauer@Sun.COM  * Variable to enable/disable printing a message when an invalid vecintr
717459SChristopher.Baumbauer@Sun.COM  * is received.
727459SChristopher.Baumbauer@Sun.COM  */
737459SChristopher.Baumbauer@Sun.COM uint_t ignore_invalid_vecintr = 0;
747459SChristopher.Baumbauer@Sun.COM 
757459SChristopher.Baumbauer@Sun.COM /*
76522Ssudheer  * Note:-
77522Ssudheer  * siron_pending was originally created to prevent a resource over consumption
78522Ssudheer  * bug in setsoftint(exhaustion of interrupt pool free list).
79522Ssudheer  * It's original intention is obsolete with the use of iv_pending in
80522Ssudheer  * setsoftint. However, siron_pending stayed around, acting as a second
81522Ssudheer  * gatekeeper preventing soft interrupts from being queued. In this capacity,
82522Ssudheer  * it can lead to hangs on MP systems, where due to global visibility issues
83522Ssudheer  * it can end up set while iv_pending is reset, preventing soft interrupts from
84522Ssudheer  * ever being processed. In addition to its gatekeeper role, init_intr also
85522Ssudheer  * uses it to flag the situation where siron() was called before siron_inum has
86522Ssudheer  * been defined.
87522Ssudheer  *
88522Ssudheer  * siron() does not need an extra gatekeeper; any cpu that wishes should be
89522Ssudheer  * allowed to queue a soft interrupt. It is softint()'s job to ensure
90522Ssudheer  * correct handling of the queues. Therefore, siron_pending has been
91522Ssudheer  * stripped of its gatekeeper task, retaining only its intr_init job, where
92522Ssudheer  * it indicates that there is a pending need to call siron().
93522Ssudheer  */
945107Seota static int siron_pending[DDI_IPL_10]; /* software interrupt pending flags */
955107Seota static int siron1_pending; /* backward compatibility */
960Sstevel@tonic-gate 
970Sstevel@tonic-gate int intr_policy = INTR_WEIGHTED_DIST;	/* interrupt distribution policy */
980Sstevel@tonic-gate int intr_dist_debug = 0;
990Sstevel@tonic-gate int32_t intr_dist_weight_max = 1;
1000Sstevel@tonic-gate int32_t intr_dist_weight_maxmax = 1000;
1010Sstevel@tonic-gate int intr_dist_weight_maxfactor = 2;
1020Sstevel@tonic-gate #define	INTR_DEBUG(args) if (intr_dist_debug) cmn_err args
1030Sstevel@tonic-gate 
1040Sstevel@tonic-gate /*
1052973Sgovinda  * intr_init() - Interrupt initialization
1062973Sgovinda  *	Initialize the system's interrupt vector table.
1070Sstevel@tonic-gate  */
1080Sstevel@tonic-gate void
intr_init(cpu_t * cp)1090Sstevel@tonic-gate intr_init(cpu_t *cp)
1100Sstevel@tonic-gate {
1115107Seota 	int i;
1122973Sgovinda 	extern uint_t softlevel1();
1132973Sgovinda 
1140Sstevel@tonic-gate 	init_ivintr();
1152973Sgovinda 	REGISTER_BBUS_INTR();
1162973Sgovinda 
1175076Smishra 	/*
1185107Seota 	 * Register these software interrupts for ddi timer.
1195107Seota 	 * Software interrupts up to the level 10 are supported.
1205107Seota 	 */
1215107Seota 	for (i = DDI_IPL_1; i <= DDI_IPL_10; i++) {
1225107Seota 		siron_inum[i-1] = add_softintr(i, (softintrfunc)timer_softintr,
1235107Seota 		    (caddr_t)(uintptr_t)(i), SOFTINT_ST);
1245107Seota 	}
1255107Seota 
1265107Seota 	siron1_inum = add_softintr(PIL_1, softlevel1, 0, SOFTINT_ST);
1272973Sgovinda 	poke_cpu_inum = add_softintr(PIL_13, poke_cpu_intr, 0, SOFTINT_MT);
1285076Smishra 	siron_poke_cpu_inum = add_softintr(PIL_13,
1295076Smishra 	    siron_poke_cpu_intr, 0, SOFTINT_MT);
1302973Sgovinda 	cp->cpu_m.poke_cpu_outstanding = B_FALSE;
1310Sstevel@tonic-gate 
1320Sstevel@tonic-gate 	mutex_init(&intr_dist_lock, NULL, MUTEX_DEFAULT, NULL);
1330Sstevel@tonic-gate 	mutex_init(&intr_dist_cpu_lock, NULL, MUTEX_DEFAULT, NULL);
1340Sstevel@tonic-gate 
1350Sstevel@tonic-gate 	/*
1360Sstevel@tonic-gate 	 * A soft interrupt may have been requested prior to the initialization
1370Sstevel@tonic-gate 	 * of soft interrupts.  Soft interrupts can't be dispatched until after
1382973Sgovinda 	 * init_intr(), so we have to wait until now before we can dispatch the
1392973Sgovinda 	 * pending soft interrupt (if any).
1400Sstevel@tonic-gate 	 */
1415107Seota 	for (i = DDI_IPL_1; i <= DDI_IPL_10; i++) {
1425107Seota 		if (siron_pending[i-1]) {
1435107Seota 			siron_pending[i-1] = 0;
1445107Seota 			sir_on(i);
1455107Seota 		}
1465107Seota 	}
1475107Seota 	if (siron1_pending) {
1485107Seota 		siron1_pending = 0;
149522Ssudheer 		siron();
150522Ssudheer 	}
1510Sstevel@tonic-gate }
1520Sstevel@tonic-gate 
1530Sstevel@tonic-gate /*
1540Sstevel@tonic-gate  * poke_cpu_intr - fall through when poke_cpu calls
1550Sstevel@tonic-gate  */
1560Sstevel@tonic-gate /* ARGSUSED */
1570Sstevel@tonic-gate uint_t
poke_cpu_intr(caddr_t arg1,caddr_t arg2)1580Sstevel@tonic-gate poke_cpu_intr(caddr_t arg1, caddr_t arg2)
1590Sstevel@tonic-gate {
1600Sstevel@tonic-gate 	CPU->cpu_m.poke_cpu_outstanding = B_FALSE;
1610Sstevel@tonic-gate 	membar_stld_stst();
1620Sstevel@tonic-gate 	return (1);
1630Sstevel@tonic-gate }
1640Sstevel@tonic-gate 
1650Sstevel@tonic-gate /*
1665107Seota  * Trigger software interrupts dedicated to ddi timer.
1675107Seota  */
1685107Seota void
sir_on(int level)1695107Seota sir_on(int level)
1705107Seota {
1715107Seota 	ASSERT(level >= DDI_IPL_1 && level <= DDI_IPL_10);
1725107Seota 	if (siron_inum[level-1])
1735107Seota 		setsoftint(siron_inum[level-1]);
1745107Seota 	else
1755107Seota 		siron_pending[level-1] = 1;
1765107Seota }
1775107Seota 
1785107Seota /*
1794652Scwb  * kmdb uses siron (and thus setsoftint) while the world is stopped in order to
1804652Scwb  * inform its driver component that there's work to be done.  We need to keep
1814652Scwb  * DTrace from instrumenting kmdb's siron and setsoftint.  We duplicate siron,
1824652Scwb  * giving kmdb's version a kdi_ prefix to keep DTrace at bay.  The
1834652Scwb  * implementation of setsoftint is complicated enough that we don't want to
1844652Scwb  * duplicate it, but at the same time we don't want to preclude tracing either.
1854652Scwb  * The meat of setsoftint() therefore goes into kdi_setsoftint, with
1864652Scwb  * setsoftint() implemented as a wrapper.  This allows tracing, while still
1874652Scwb  * providing a way for kmdb to sneak in unmolested.
1880Sstevel@tonic-gate  */
1890Sstevel@tonic-gate void
kdi_siron(void)1904652Scwb kdi_siron(void)
1914652Scwb {
1925107Seota 	if (siron1_inum != 0)
1935107Seota 		kdi_setsoftint(siron1_inum);
1944652Scwb 	else
1955107Seota 		siron1_pending = 1;
1964652Scwb }
1974652Scwb 
1984652Scwb void
setsoftint(uint64_t inum)1994652Scwb setsoftint(uint64_t inum)
2004652Scwb {
2014652Scwb 	kdi_setsoftint(inum);
2024652Scwb }
2034652Scwb 
2045076Smishra /*
2055076Smishra  * Generates softlevel1 interrupt on current CPU if it
2065076Smishra  * is not pending already.
2075076Smishra  */
2084652Scwb void
siron(void)2090Sstevel@tonic-gate siron(void)
2100Sstevel@tonic-gate {
2115076Smishra 	uint64_t inum;
2125076Smishra 
2135107Seota 	if (siron1_inum != 0) {
2146595Smishra 		/*
2156595Smishra 		 * Once siron_cpu_inum has been allocated, we can
2166595Smishra 		 * use per-CPU siron inum.
2176595Smishra 		 */
2186595Smishra 		if (siron_cpu_inum && siron_cpu_inum[CPU->cpu_id] != 0)
2195076Smishra 			inum = siron_cpu_inum[CPU->cpu_id];
2205076Smishra 		else
2215107Seota 			inum = siron1_inum;
2225076Smishra 
2235076Smishra 		setsoftint(inum);
2245076Smishra 	} else
2255107Seota 		siron1_pending = 1;
2260Sstevel@tonic-gate }
2270Sstevel@tonic-gate 
2286595Smishra 
2296595Smishra static void
siron_init(void)2306595Smishra siron_init(void)
2316595Smishra {
2326595Smishra 	/*
2336595Smishra 	 * We just allocate memory for per-cpu siron right now. Rest of
2346595Smishra 	 * the work is done when CPU is configured.
2356595Smishra 	 */
2366595Smishra 	siron_cpu_inum = kmem_zalloc(sizeof (uint64_t) * NCPU, KM_SLEEP);
2376595Smishra }
2386595Smishra 
2390Sstevel@tonic-gate /*
2405076Smishra  * This routine creates per-CPU siron inum for CPUs which are
2415076Smishra  * configured during boot.
2425076Smishra  */
2435076Smishra void
siron_mp_init()2445076Smishra siron_mp_init()
2455076Smishra {
2465076Smishra 	cpu_t *c;
2475076Smishra 
2486595Smishra 	/*
2496595Smishra 	 * Get the memory for per-CPU siron inums
2506595Smishra 	 */
2516595Smishra 	siron_init();
2526595Smishra 
2535076Smishra 	mutex_enter(&cpu_lock);
2545076Smishra 	c = cpu_list;
2555076Smishra 	do {
2565076Smishra 		(void) siron_cpu_setup(CPU_CONFIG, c->cpu_id, NULL);
2575076Smishra 	} while ((c = c->cpu_next) != cpu_list);
2585076Smishra 
2595076Smishra 	register_cpu_setup_func(siron_cpu_setup, NULL);
2605076Smishra 	mutex_exit(&cpu_lock);
2615076Smishra }
2625076Smishra 
2635076Smishra /*
2645076Smishra  * siron_poke_cpu_intr - cross-call handler.
2655076Smishra  */
2665076Smishra /* ARGSUSED */
2675076Smishra uint_t
siron_poke_cpu_intr(caddr_t arg1,caddr_t arg2)2685076Smishra siron_poke_cpu_intr(caddr_t arg1, caddr_t arg2)
2695076Smishra {
2705076Smishra 	/* generate level1 softint */
2715076Smishra 	siron();
2725076Smishra 	return (1);
2735076Smishra }
2745076Smishra 
2755076Smishra /*
2765076Smishra  * This routine generates a cross-call on target CPU(s).
2775076Smishra  */
2785076Smishra void
siron_poke_cpu(cpuset_t poke)2795076Smishra siron_poke_cpu(cpuset_t poke)
2805076Smishra {
2815076Smishra 	int cpuid = CPU->cpu_id;
2825076Smishra 
2835076Smishra 	if (CPU_IN_SET(poke, cpuid)) {
2845076Smishra 		siron();
2855076Smishra 		CPUSET_DEL(poke, cpuid);
2865076Smishra 		if (CPUSET_ISNULL(poke))
2875076Smishra 			return;
2885076Smishra 	}
2895076Smishra 
2905076Smishra 	xt_some(poke, setsoftint_tl1, siron_poke_cpu_inum, 0);
2915076Smishra }
2925076Smishra 
2935076Smishra /*
2945076Smishra  * This callback function allows us to create per-CPU siron inum.
2955076Smishra  */
2965076Smishra /* ARGSUSED */
2975076Smishra static int
siron_cpu_setup(cpu_setup_t what,int id,void * arg)2985076Smishra siron_cpu_setup(cpu_setup_t what, int id, void *arg)
2995076Smishra {
3005076Smishra 	cpu_t *cp = cpu[id];
3015076Smishra 
3025076Smishra 	ASSERT(MUTEX_HELD(&cpu_lock));
3035076Smishra 	ASSERT(cp != NULL);
3045076Smishra 
3055076Smishra 	switch (what) {
3065076Smishra 	case CPU_CONFIG:
3075076Smishra 		siron_cpu_inum[cp->cpu_id] = add_softintr(PIL_1,
3085076Smishra 		    (softintrfunc)softlevel1, 0, SOFTINT_ST);
3095076Smishra 		break;
3105076Smishra 	case CPU_UNCONFIG:
3115076Smishra 		(void) rem_softintr(siron_cpu_inum[cp->cpu_id]);
3125076Smishra 		siron_cpu_inum[cp->cpu_id] = 0;
3135076Smishra 		break;
3145076Smishra 	default:
3155076Smishra 		break;
3165076Smishra 	}
3175076Smishra 
3185076Smishra 	return (0);
3195076Smishra }
3205076Smishra 
3215076Smishra /*
3220Sstevel@tonic-gate  * no_ivintr()
3232973Sgovinda  * 	called by setvecint_tl1() through sys_trap()
3240Sstevel@tonic-gate  *	vector interrupt received but not valid or not
3252973Sgovinda  *	registered in intr_vec_table
3260Sstevel@tonic-gate  *	considered as a spurious mondo interrupt
3270Sstevel@tonic-gate  */
3280Sstevel@tonic-gate /* ARGSUSED */
3290Sstevel@tonic-gate void
no_ivintr(struct regs * rp,int inum,int pil)3300Sstevel@tonic-gate no_ivintr(struct regs *rp, int inum, int pil)
3310Sstevel@tonic-gate {
3327459SChristopher.Baumbauer@Sun.COM 	if (!ignore_invalid_vecintr)
3337459SChristopher.Baumbauer@Sun.COM 		cmn_err(CE_WARN, "invalid vector intr: number 0x%x, pil 0x%x",
3347459SChristopher.Baumbauer@Sun.COM 		    inum, pil);
3350Sstevel@tonic-gate 
3360Sstevel@tonic-gate #ifdef DEBUG_VEC_INTR
3370Sstevel@tonic-gate 	prom_enter_mon();
3380Sstevel@tonic-gate #endif /* DEBUG_VEC_INTR */
3390Sstevel@tonic-gate }
3400Sstevel@tonic-gate 
3410Sstevel@tonic-gate void
intr_dequeue_req(uint_t pil,uint64_t inum)3422973Sgovinda intr_dequeue_req(uint_t pil, uint64_t inum)
3430Sstevel@tonic-gate {
3442973Sgovinda 	intr_vec_t	*iv, *next, *prev;
3452973Sgovinda 	struct machcpu	*mcpu;
3462973Sgovinda 	uint32_t	clr;
3472973Sgovinda 	processorid_t	cpu_id;
3482973Sgovinda 	extern uint_t	getpstate(void);
3490Sstevel@tonic-gate 
3500Sstevel@tonic-gate 	ASSERT((getpstate() & PSTATE_IE) == 0);
3510Sstevel@tonic-gate 
3520Sstevel@tonic-gate 	mcpu = &CPU->cpu_m;
3532973Sgovinda 	cpu_id = CPU->cpu_id;
3542973Sgovinda 
3552973Sgovinda 	iv = (intr_vec_t *)inum;
3562973Sgovinda 	prev = NULL;
3572973Sgovinda 	next = mcpu->intr_head[pil];
3580Sstevel@tonic-gate 
3590Sstevel@tonic-gate 	/* Find a matching entry in the list */
3602973Sgovinda 	while (next != NULL) {
3612973Sgovinda 		if (next == iv)
3620Sstevel@tonic-gate 			break;
3632973Sgovinda 		prev = next;
3642973Sgovinda 		next = IV_GET_PIL_NEXT(next, cpu_id);
3650Sstevel@tonic-gate 	}
3660Sstevel@tonic-gate 
3672973Sgovinda 	if (next != NULL) {
3682973Sgovinda 		intr_vec_t	*next_iv = IV_GET_PIL_NEXT(next, cpu_id);
3692973Sgovinda 
3702973Sgovinda 		/* Remove entry from list */
3712973Sgovinda 		if (prev != NULL)
3722973Sgovinda 			IV_SET_PIL_NEXT(prev, cpu_id, next_iv); /* non-head */
3732973Sgovinda 		else
3742973Sgovinda 			mcpu->intr_head[pil] = next_iv; /* head */
3752973Sgovinda 
3762973Sgovinda 		if (next_iv == NULL)
3772973Sgovinda 			mcpu->intr_tail[pil] = prev; /* tail */
3782973Sgovinda 	}
3792973Sgovinda 
3802973Sgovinda 	/* Clear pending interrupts at this level if the list is empty */
3810Sstevel@tonic-gate 	if (mcpu->intr_head[pil] == NULL) {
3820Sstevel@tonic-gate 		clr = 1 << pil;
3830Sstevel@tonic-gate 		if (pil == PIL_14)
3840Sstevel@tonic-gate 			clr |= (TICK_INT_MASK | STICK_INT_MASK);
3850Sstevel@tonic-gate 		wr_clr_softint(clr);
3860Sstevel@tonic-gate 	}
3870Sstevel@tonic-gate }
3880Sstevel@tonic-gate 
3890Sstevel@tonic-gate 
3900Sstevel@tonic-gate /*
3910Sstevel@tonic-gate  * Send a directed interrupt of specified interrupt number id to a cpu.
3920Sstevel@tonic-gate  */
3930Sstevel@tonic-gate void
send_dirint(int cpuix,int intr_id)3940Sstevel@tonic-gate send_dirint(
3950Sstevel@tonic-gate 	int cpuix,		/* cpu to be interrupted */
3960Sstevel@tonic-gate 	int intr_id)		/* interrupt number id */
3970Sstevel@tonic-gate {
3980Sstevel@tonic-gate 	xt_one(cpuix, setsoftint_tl1, intr_id, 0);
3990Sstevel@tonic-gate }
4000Sstevel@tonic-gate 
4010Sstevel@tonic-gate /*
4020Sstevel@tonic-gate  * Take the specified CPU out of participation in interrupts.
4030Sstevel@tonic-gate  *	Called by p_online(2) when a processor is being taken off-line.
4040Sstevel@tonic-gate  *	This allows interrupt threads being handled on the processor to
4050Sstevel@tonic-gate  *	complete before the processor is idled.
4060Sstevel@tonic-gate  */
4070Sstevel@tonic-gate int
cpu_disable_intr(struct cpu * cp)4080Sstevel@tonic-gate cpu_disable_intr(struct cpu *cp)
4090Sstevel@tonic-gate {
4100Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
4110Sstevel@tonic-gate 
4120Sstevel@tonic-gate 	/*
4130Sstevel@tonic-gate 	 * Turn off the CPU_ENABLE flag before calling the redistribution
4140Sstevel@tonic-gate 	 * function, since it checks for this in the cpu flags.
4150Sstevel@tonic-gate 	 */
4160Sstevel@tonic-gate 	cp->cpu_flags &= ~CPU_ENABLE;
4170Sstevel@tonic-gate 
4180Sstevel@tonic-gate 	intr_redist_all_cpus();
4190Sstevel@tonic-gate 
4200Sstevel@tonic-gate 	return (0);
4210Sstevel@tonic-gate }
4220Sstevel@tonic-gate 
4230Sstevel@tonic-gate /*
4240Sstevel@tonic-gate  * Allow the specified CPU to participate in interrupts.
4250Sstevel@tonic-gate  *	Called by p_online(2) if a processor could not be taken off-line
4260Sstevel@tonic-gate  *	because of bound threads, in order to resume processing interrupts.
4270Sstevel@tonic-gate  *	Also called after starting a processor.
4280Sstevel@tonic-gate  */
4290Sstevel@tonic-gate void
cpu_enable_intr(struct cpu * cp)4300Sstevel@tonic-gate cpu_enable_intr(struct cpu *cp)
4310Sstevel@tonic-gate {
4320Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
4330Sstevel@tonic-gate 
4340Sstevel@tonic-gate 	cp->cpu_flags |= CPU_ENABLE;
4350Sstevel@tonic-gate 
4360Sstevel@tonic-gate 	intr_redist_all_cpus();
4370Sstevel@tonic-gate }
4380Sstevel@tonic-gate 
4390Sstevel@tonic-gate /*
4400Sstevel@tonic-gate  * Add function to callback list for intr_redist_all_cpus.  We keep two lists,
4410Sstevel@tonic-gate  * one for weighted callbacks and one for normal callbacks. Weighted callbacks
4420Sstevel@tonic-gate  * are issued to redirect interrupts of a specified weight, from heavy to
4430Sstevel@tonic-gate  * light.  This allows all the interrupts of a given weight to be redistributed
4440Sstevel@tonic-gate  * for all weighted nexus drivers prior to those of less weight.
4450Sstevel@tonic-gate  */
4460Sstevel@tonic-gate static void
intr_dist_add_list(struct intr_dist ** phead,void (* func)(void *),void * arg)4470Sstevel@tonic-gate intr_dist_add_list(struct intr_dist **phead, void (*func)(void *), void *arg)
4480Sstevel@tonic-gate {
4490Sstevel@tonic-gate 	struct intr_dist *new = kmem_alloc(sizeof (*new), KM_SLEEP);
4500Sstevel@tonic-gate 	struct intr_dist *iptr;
4510Sstevel@tonic-gate 	struct intr_dist **pptr;
4520Sstevel@tonic-gate 
4530Sstevel@tonic-gate 	ASSERT(func);
4540Sstevel@tonic-gate 	new->func = func;
4550Sstevel@tonic-gate 	new->arg = arg;
4560Sstevel@tonic-gate 	new->next = NULL;
4570Sstevel@tonic-gate 
4580Sstevel@tonic-gate 	/* Add to tail so that redistribution occurs in original order. */
4590Sstevel@tonic-gate 	mutex_enter(&intr_dist_lock);
4600Sstevel@tonic-gate 	for (iptr = *phead, pptr = phead; iptr != NULL;
4610Sstevel@tonic-gate 	    pptr = &iptr->next, iptr = iptr->next) {
4620Sstevel@tonic-gate 		/* check for problems as we locate the tail */
4630Sstevel@tonic-gate 		if ((iptr->func == func) && (iptr->arg == arg)) {
4640Sstevel@tonic-gate 			cmn_err(CE_PANIC, "intr_dist_add_list(): duplicate");
4650Sstevel@tonic-gate 			/*NOTREACHED*/
4660Sstevel@tonic-gate 		}
4670Sstevel@tonic-gate 	}
4680Sstevel@tonic-gate 	*pptr = new;
4690Sstevel@tonic-gate 
4700Sstevel@tonic-gate 	mutex_exit(&intr_dist_lock);
4710Sstevel@tonic-gate }
4720Sstevel@tonic-gate 
4730Sstevel@tonic-gate void
intr_dist_add(void (* func)(void *),void * arg)4740Sstevel@tonic-gate intr_dist_add(void (*func)(void *), void *arg)
4750Sstevel@tonic-gate {
4760Sstevel@tonic-gate 	intr_dist_add_list(&intr_dist_head, (void (*)(void *))func, arg);
4770Sstevel@tonic-gate }
4780Sstevel@tonic-gate 
4790Sstevel@tonic-gate void
intr_dist_add_weighted(void (* func)(void *,int32_t,int32_t),void * arg)4800Sstevel@tonic-gate intr_dist_add_weighted(void (*func)(void *, int32_t, int32_t), void *arg)
4810Sstevel@tonic-gate {
4820Sstevel@tonic-gate 	intr_dist_add_list(&intr_dist_whead, (void (*)(void *))func, arg);
4830Sstevel@tonic-gate }
4840Sstevel@tonic-gate 
4850Sstevel@tonic-gate /*
4860Sstevel@tonic-gate  * Search for the interrupt distribution structure with the specified
4870Sstevel@tonic-gate  * mondo vec reg in the interrupt distribution list. If a match is found,
4880Sstevel@tonic-gate  * then delete the entry from the list. The caller is responsible for
4890Sstevel@tonic-gate  * modifying the mondo vector registers.
4900Sstevel@tonic-gate  */
4910Sstevel@tonic-gate static void
intr_dist_rem_list(struct intr_dist ** headp,void (* func)(void *),void * arg)4920Sstevel@tonic-gate intr_dist_rem_list(struct intr_dist **headp, void (*func)(void *), void *arg)
4930Sstevel@tonic-gate {
4940Sstevel@tonic-gate 	struct intr_dist *iptr;
4950Sstevel@tonic-gate 	struct intr_dist **vect;
4960Sstevel@tonic-gate 
4970Sstevel@tonic-gate 	mutex_enter(&intr_dist_lock);
4980Sstevel@tonic-gate 	for (iptr = *headp, vect = headp;
4990Sstevel@tonic-gate 	    iptr != NULL; vect = &iptr->next, iptr = iptr->next) {
5000Sstevel@tonic-gate 		if ((iptr->func == func) && (iptr->arg == arg)) {
5010Sstevel@tonic-gate 			*vect = iptr->next;
5020Sstevel@tonic-gate 			kmem_free(iptr, sizeof (struct intr_dist));
5030Sstevel@tonic-gate 			mutex_exit(&intr_dist_lock);
5040Sstevel@tonic-gate 			return;
5050Sstevel@tonic-gate 		}
5060Sstevel@tonic-gate 	}
5070Sstevel@tonic-gate 
5080Sstevel@tonic-gate 	if (!panicstr)
5090Sstevel@tonic-gate 		cmn_err(CE_PANIC, "intr_dist_rem_list: not found");
5100Sstevel@tonic-gate 	mutex_exit(&intr_dist_lock);
5110Sstevel@tonic-gate }
5120Sstevel@tonic-gate 
5130Sstevel@tonic-gate void
intr_dist_rem(void (* func)(void *),void * arg)5140Sstevel@tonic-gate intr_dist_rem(void (*func)(void *), void *arg)
5150Sstevel@tonic-gate {
5160Sstevel@tonic-gate 	intr_dist_rem_list(&intr_dist_head, (void (*)(void *))func, arg);
5170Sstevel@tonic-gate }
5180Sstevel@tonic-gate 
5190Sstevel@tonic-gate void
intr_dist_rem_weighted(void (* func)(void *,int32_t,int32_t),void * arg)5200Sstevel@tonic-gate intr_dist_rem_weighted(void (*func)(void *, int32_t, int32_t), void *arg)
5210Sstevel@tonic-gate {
5220Sstevel@tonic-gate 	intr_dist_rem_list(&intr_dist_whead, (void (*)(void *))func, arg);
5230Sstevel@tonic-gate }
5240Sstevel@tonic-gate 
5250Sstevel@tonic-gate /*
5260Sstevel@tonic-gate  * Initiate interrupt redistribution.  Redistribution improves the isolation
5270Sstevel@tonic-gate  * associated with interrupt weights by ordering operations from heavy weight
5280Sstevel@tonic-gate  * to light weight.  When a CPUs orientation changes relative to interrupts,
5290Sstevel@tonic-gate  * there is *always* a redistribution to accommodate this change (call to
5300Sstevel@tonic-gate  * intr_redist_all_cpus()).  As devices (not CPUs) attach/detach it is possible
5310Sstevel@tonic-gate  * that a redistribution could improve the quality of an initialization. For
5320Sstevel@tonic-gate  * example, if you are not using a NIC it may not be attached with s10 (devfs).
5330Sstevel@tonic-gate  * If you then configure the NIC (ifconfig), this may cause the NIC to attach
5340Sstevel@tonic-gate  * and plumb interrupts.  The CPU assignment for the NIC's interrupts is
5350Sstevel@tonic-gate  * occurring late, so optimal "isolation" relative to weight is not occurring.
5360Sstevel@tonic-gate  * The same applies to detach, although in this case doing the redistribution
5370Sstevel@tonic-gate  * might improve "spread" for medium weight devices since the "isolation" of
5380Sstevel@tonic-gate  * a higher weight device may no longer be present.
5390Sstevel@tonic-gate  *
5400Sstevel@tonic-gate  * NB: We should provide a utility to trigger redistribution (ala "intradm -r").
5410Sstevel@tonic-gate  *
5420Sstevel@tonic-gate  * NB: There is risk associated with automatically triggering execution of the
5430Sstevel@tonic-gate  * redistribution code at arbitrary times. The risk comes from the fact that
5440Sstevel@tonic-gate  * there is a lot of low-level hardware interaction associated with a
5450Sstevel@tonic-gate  * redistribution.  At some point we may want this code to perform automatic
5460Sstevel@tonic-gate  * redistribution (redistribution thread; trigger timeout when add/remove
5470Sstevel@tonic-gate  * weight delta is large enough, and call cv_signal from timeout - causing
5480Sstevel@tonic-gate  * thead to call i_ddi_intr_redist_all_cpus()) but this is considered too
5490Sstevel@tonic-gate  * risky at this time.
5500Sstevel@tonic-gate  */
5510Sstevel@tonic-gate void
i_ddi_intr_redist_all_cpus()5520Sstevel@tonic-gate i_ddi_intr_redist_all_cpus()
5530Sstevel@tonic-gate {
5540Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
5550Sstevel@tonic-gate 	INTR_DEBUG((CE_CONT, "intr_dist: i_ddi_intr_redist_all_cpus\n"));
5560Sstevel@tonic-gate 	intr_redist_all_cpus();
5570Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
5580Sstevel@tonic-gate }
5590Sstevel@tonic-gate 
5600Sstevel@tonic-gate /*
5610Sstevel@tonic-gate  * Redistribute all interrupts
5620Sstevel@tonic-gate  *
5630Sstevel@tonic-gate  * This function redistributes all interrupting devices, running the
5640Sstevel@tonic-gate  * parent callback functions for each node.
5650Sstevel@tonic-gate  */
5660Sstevel@tonic-gate void
intr_redist_all_cpus(void)5670Sstevel@tonic-gate intr_redist_all_cpus(void)
5680Sstevel@tonic-gate {
5690Sstevel@tonic-gate 	struct cpu *cp;
5700Sstevel@tonic-gate 	struct intr_dist *iptr;
5710Sstevel@tonic-gate 	int32_t weight, max_weight;
5720Sstevel@tonic-gate 
5730Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
5740Sstevel@tonic-gate 	mutex_enter(&intr_dist_lock);
5750Sstevel@tonic-gate 
5760Sstevel@tonic-gate 	/*
5770Sstevel@tonic-gate 	 * zero cpu_intr_weight on all cpus - it is safe to traverse
5780Sstevel@tonic-gate 	 * cpu_list since we hold cpu_lock.
5790Sstevel@tonic-gate 	 */
5800Sstevel@tonic-gate 	cp = cpu_list;
5810Sstevel@tonic-gate 	do {
5820Sstevel@tonic-gate 		cp->cpu_intr_weight = 0;
5830Sstevel@tonic-gate 	} while ((cp = cp->cpu_next) != cpu_list);
5840Sstevel@tonic-gate 
5850Sstevel@tonic-gate 	/*
5860Sstevel@tonic-gate 	 * Assume that this redistribution may encounter a device weight
5870Sstevel@tonic-gate 	 * via driver.conf tuning of "ddi-intr-weight" that is at most
5880Sstevel@tonic-gate 	 * intr_dist_weight_maxfactor times larger.
5890Sstevel@tonic-gate 	 */
5900Sstevel@tonic-gate 	max_weight = intr_dist_weight_max * intr_dist_weight_maxfactor;
5910Sstevel@tonic-gate 	if (max_weight > intr_dist_weight_maxmax)
5920Sstevel@tonic-gate 		max_weight = intr_dist_weight_maxmax;
5930Sstevel@tonic-gate 	intr_dist_weight_max = 1;
5940Sstevel@tonic-gate 
5950Sstevel@tonic-gate 	INTR_DEBUG((CE_CONT, "intr_dist: "
5960Sstevel@tonic-gate 	    "intr_redist_all_cpus: %d-0\n", max_weight));
5970Sstevel@tonic-gate 
5980Sstevel@tonic-gate 	/*
5990Sstevel@tonic-gate 	 * Redistribute weighted, from heavy to light.  The callback that
6000Sstevel@tonic-gate 	 * specifies a weight equal to weight_max should redirect all
6010Sstevel@tonic-gate 	 * interrupts of weight weight_max or greater [weight_max, inf.).
6020Sstevel@tonic-gate 	 * Interrupts of lesser weight should be processed on the call with
6030Sstevel@tonic-gate 	 * the matching weight. This allows all the heaver weight interrupts
6040Sstevel@tonic-gate 	 * on all weighted busses (multiple pci busses) to be redirected prior
6050Sstevel@tonic-gate 	 * to any lesser weight interrupts.
6060Sstevel@tonic-gate 	 */
6070Sstevel@tonic-gate 	for (weight = max_weight; weight >= 0; weight--)
6080Sstevel@tonic-gate 		for (iptr = intr_dist_whead; iptr != NULL; iptr = iptr->next)
6090Sstevel@tonic-gate 			((void (*)(void *, int32_t, int32_t))iptr->func)
6100Sstevel@tonic-gate 			    (iptr->arg, max_weight, weight);
6110Sstevel@tonic-gate 
6120Sstevel@tonic-gate 	/* redistribute normal (non-weighted) interrupts */
6130Sstevel@tonic-gate 	for (iptr = intr_dist_head; iptr != NULL; iptr = iptr->next)
6140Sstevel@tonic-gate 		((void (*)(void *))iptr->func)(iptr->arg);
6150Sstevel@tonic-gate 	mutex_exit(&intr_dist_lock);
6160Sstevel@tonic-gate }
6170Sstevel@tonic-gate 
6180Sstevel@tonic-gate void
intr_redist_all_cpus_shutdown(void)6190Sstevel@tonic-gate intr_redist_all_cpus_shutdown(void)
6200Sstevel@tonic-gate {
6210Sstevel@tonic-gate 	intr_policy = INTR_CURRENT_CPU;
6220Sstevel@tonic-gate 	intr_redist_all_cpus();
6230Sstevel@tonic-gate }
6240Sstevel@tonic-gate 
6250Sstevel@tonic-gate /*
6260Sstevel@tonic-gate  * Determine what CPU to target, based on interrupt policy.
6270Sstevel@tonic-gate  *
6280Sstevel@tonic-gate  * INTR_FLAT_DIST: hold a current CPU pointer in a static variable and
6290Sstevel@tonic-gate  *	advance through interrupt enabled cpus (round-robin).
6300Sstevel@tonic-gate  *
6310Sstevel@tonic-gate  * INTR_WEIGHTED_DIST: search for an enabled CPU with the lowest
6320Sstevel@tonic-gate  *	cpu_intr_weight, round robin when all equal.
6330Sstevel@tonic-gate  *
6340Sstevel@tonic-gate  *	Weighted interrupt distribution provides two things: "spread" of weight
6350Sstevel@tonic-gate  *	(associated with algorithm itself) and "isolation" (associated with a
6360Sstevel@tonic-gate  *	particular device weight). A redistribution is what provides optimal
6370Sstevel@tonic-gate  *	"isolation" of heavy weight interrupts, optimal "spread" of weight
6380Sstevel@tonic-gate  *	(relative to what came before) is always occurring.
6390Sstevel@tonic-gate  *
6400Sstevel@tonic-gate  *	An interrupt weight is a subjective number that represents the
6410Sstevel@tonic-gate  *	percentage of a CPU required to service a device's interrupts: the
6420Sstevel@tonic-gate  *	default weight is 0% (however the algorithm still maintains
6430Sstevel@tonic-gate  *	round-robin), a network interface controller (NIC) may have a large
6440Sstevel@tonic-gate  *	weight (35%). Interrupt weight only has meaning relative to the
6450Sstevel@tonic-gate  *	interrupt weight of other devices: a CPU can be weighted more than
6460Sstevel@tonic-gate  *	100%, and a single device might consume more than 100% of a CPU.
6470Sstevel@tonic-gate  *
6480Sstevel@tonic-gate  *	A coarse interrupt weight can be defined by the parent nexus driver
6490Sstevel@tonic-gate  *	based on bus specific information, like pci class codes. A nexus
6500Sstevel@tonic-gate  *	driver that supports device interrupt weighting for its children
6510Sstevel@tonic-gate  *	should call intr_dist_cpuid_add/rem_device_weight(), which adds
6520Sstevel@tonic-gate  *	and removes the weight of a device from the CPU that an interrupt
6530Sstevel@tonic-gate  *	is directed at.  The quality of initialization improves when the
6540Sstevel@tonic-gate  *	device interrupt weights more accuracy reflect actual run-time weights,
6550Sstevel@tonic-gate  *	and as the assignments are ordered from is heavy to light.
6560Sstevel@tonic-gate  *
6570Sstevel@tonic-gate  *	The implementation also supports interrupt weight being specified in
6580Sstevel@tonic-gate  *	driver.conf files via the property "ddi-intr-weight", which takes
6590Sstevel@tonic-gate  *	precedence over the nexus supplied weight.  This support is added to
6600Sstevel@tonic-gate  *	permit possible tweaking in the product in response to customer
6610Sstevel@tonic-gate  *	problems. This is not a formal or committed interface.
6620Sstevel@tonic-gate  *
6630Sstevel@tonic-gate  *	While a weighted approach chooses the CPU providing the best spread
6640Sstevel@tonic-gate  *	given past weights, less than optimal isolation can result in cases
6650Sstevel@tonic-gate  *	where heavy weight devices show up last. The nexus driver's interrupt
6660Sstevel@tonic-gate  *	redistribution logic should use intr_dist_add/rem_weighted so that
6670Sstevel@tonic-gate  *	interrupts can be redistributed heavy first for optimal isolation.
6680Sstevel@tonic-gate  */
6690Sstevel@tonic-gate uint32_t
intr_dist_cpuid(void)6700Sstevel@tonic-gate intr_dist_cpuid(void)
6710Sstevel@tonic-gate {
6720Sstevel@tonic-gate 	static struct cpu	*curr_cpu;
6730Sstevel@tonic-gate 	struct cpu		*start_cpu;
6740Sstevel@tonic-gate 	struct cpu		*new_cpu;
6750Sstevel@tonic-gate 	struct cpu		*cp;
6760Sstevel@tonic-gate 	int			cpuid = -1;
6770Sstevel@tonic-gate 
6780Sstevel@tonic-gate 	/* Establish exclusion for curr_cpu and cpu_intr_weight manipulation */
6790Sstevel@tonic-gate 	mutex_enter(&intr_dist_cpu_lock);
6800Sstevel@tonic-gate 
6810Sstevel@tonic-gate 	switch (intr_policy) {
6820Sstevel@tonic-gate 	case INTR_CURRENT_CPU:
6830Sstevel@tonic-gate 		cpuid = CPU->cpu_id;
6840Sstevel@tonic-gate 		break;
6850Sstevel@tonic-gate 
6860Sstevel@tonic-gate 	case INTR_BOOT_CPU:
6870Sstevel@tonic-gate 		panic("INTR_BOOT_CPU no longer supported.");
6880Sstevel@tonic-gate 		/*NOTREACHED*/
6890Sstevel@tonic-gate 
6900Sstevel@tonic-gate 	case INTR_FLAT_DIST:
6910Sstevel@tonic-gate 	case INTR_WEIGHTED_DIST:
6920Sstevel@tonic-gate 	default:
6930Sstevel@tonic-gate 		/*
6940Sstevel@tonic-gate 		 * Ensure that curr_cpu is valid - cpu_next will be NULL if
6950Sstevel@tonic-gate 		 * the cpu has been deleted (cpu structs are never freed).
6960Sstevel@tonic-gate 		 */
6970Sstevel@tonic-gate 		if (curr_cpu == NULL || curr_cpu->cpu_next == NULL)
6980Sstevel@tonic-gate 			curr_cpu = CPU;
6990Sstevel@tonic-gate 
7000Sstevel@tonic-gate 		/*
7010Sstevel@tonic-gate 		 * Advance to online CPU after curr_cpu (round-robin). For
7020Sstevel@tonic-gate 		 * INTR_WEIGHTED_DIST we choose the cpu with the lightest
7030Sstevel@tonic-gate 		 * weight.  For a nexus that does not support weight the
7040Sstevel@tonic-gate 		 * default weight of zero is used. We degrade to round-robin
7050Sstevel@tonic-gate 		 * behavior among equal weightes.  The default weight is zero
7060Sstevel@tonic-gate 		 * and round-robin behavior continues.
7070Sstevel@tonic-gate 		 *
7080Sstevel@tonic-gate 		 * Disable preemption while traversing cpu_next_onln to
7090Sstevel@tonic-gate 		 * ensure the list does not change.  This works because
7100Sstevel@tonic-gate 		 * modifiers of this list and other lists in a struct cpu
7110Sstevel@tonic-gate 		 * call pause_cpus() before making changes.
7120Sstevel@tonic-gate 		 */
7130Sstevel@tonic-gate 		kpreempt_disable();
7140Sstevel@tonic-gate 		cp = start_cpu = curr_cpu->cpu_next_onln;
7150Sstevel@tonic-gate 		new_cpu = NULL;
7160Sstevel@tonic-gate 		do {
7170Sstevel@tonic-gate 			/* Skip CPUs with interrupts disabled */
7180Sstevel@tonic-gate 			if ((cp->cpu_flags & CPU_ENABLE) == 0)
7190Sstevel@tonic-gate 				continue;
7200Sstevel@tonic-gate 
7210Sstevel@tonic-gate 			if (intr_policy == INTR_FLAT_DIST) {
7220Sstevel@tonic-gate 				/* select CPU */
7230Sstevel@tonic-gate 				new_cpu = cp;
7240Sstevel@tonic-gate 				break;
7250Sstevel@tonic-gate 			} else if ((new_cpu == NULL) ||
7260Sstevel@tonic-gate 			    (cp->cpu_intr_weight < new_cpu->cpu_intr_weight)) {
7270Sstevel@tonic-gate 				/* Choose if lighter weight */
7280Sstevel@tonic-gate 				new_cpu = cp;
7290Sstevel@tonic-gate 			}
7300Sstevel@tonic-gate 		} while ((cp = cp->cpu_next_onln) != start_cpu);
7310Sstevel@tonic-gate 		ASSERT(new_cpu);
7320Sstevel@tonic-gate 		cpuid = new_cpu->cpu_id;
7330Sstevel@tonic-gate 
7340Sstevel@tonic-gate 		INTR_DEBUG((CE_CONT, "intr_dist: cpu %2d weight %3d: "
7350Sstevel@tonic-gate 		    "targeted\n", cpuid, new_cpu->cpu_intr_weight));
7360Sstevel@tonic-gate 
7370Sstevel@tonic-gate 		/* update static pointer for next round-robin */
7380Sstevel@tonic-gate 		curr_cpu = new_cpu;
7390Sstevel@tonic-gate 		kpreempt_enable();
7400Sstevel@tonic-gate 		break;
7410Sstevel@tonic-gate 	}
7420Sstevel@tonic-gate 	mutex_exit(&intr_dist_cpu_lock);
7430Sstevel@tonic-gate 	return (cpuid);
7440Sstevel@tonic-gate }
7450Sstevel@tonic-gate 
7460Sstevel@tonic-gate /*
7470Sstevel@tonic-gate  * Add or remove the the weight of a device from a CPUs interrupt weight.
7480Sstevel@tonic-gate  *
7490Sstevel@tonic-gate  * We expect nexus drivers to call intr_dist_cpuid_add/rem_device_weight for
7500Sstevel@tonic-gate  * their children to improve the overall quality of interrupt initialization.
7510Sstevel@tonic-gate  *
7520Sstevel@tonic-gate  * If a nexues shares the CPU returned by a single intr_dist_cpuid() call
7530Sstevel@tonic-gate  * among multiple devices (sharing ino) then the nexus should call
7540Sstevel@tonic-gate  * intr_dist_cpuid_add/rem_device_weight for each device separately. Devices
7550Sstevel@tonic-gate  * that share must specify the same cpuid.
7560Sstevel@tonic-gate  *
7570Sstevel@tonic-gate  * If a nexus driver is unable to determine the cpu at remove_intr time
7580Sstevel@tonic-gate  * for some of its interrupts, then it should not call add_device_weight -
7590Sstevel@tonic-gate  * intr_dist_cpuid will still provide round-robin.
7600Sstevel@tonic-gate  *
7610Sstevel@tonic-gate  * An established device weight (from dev_info node) takes precedence over
7620Sstevel@tonic-gate  * the weight passed in.  If a device weight is not already established
7630Sstevel@tonic-gate  * then the passed in nexus weight is established.
7640Sstevel@tonic-gate  */
7650Sstevel@tonic-gate void
intr_dist_cpuid_add_device_weight(uint32_t cpuid,dev_info_t * dip,int32_t nweight)7660Sstevel@tonic-gate intr_dist_cpuid_add_device_weight(uint32_t cpuid,
7670Sstevel@tonic-gate     dev_info_t *dip, int32_t nweight)
7680Sstevel@tonic-gate {
7690Sstevel@tonic-gate 	int32_t		eweight;
7700Sstevel@tonic-gate 
7710Sstevel@tonic-gate 	/*
7720Sstevel@tonic-gate 	 * For non-weighted policy everything has weight of zero (and we get
7730Sstevel@tonic-gate 	 * round-robin distribution from intr_dist_cpuid).
7740Sstevel@tonic-gate 	 * NB: intr_policy is limited to this file. A weighted nexus driver is
7750Sstevel@tonic-gate 	 * calls this rouitne even if intr_policy has been patched to
7760Sstevel@tonic-gate 	 * INTR_FLAG_DIST.
7770Sstevel@tonic-gate 	 */
7780Sstevel@tonic-gate 	ASSERT(dip);
7790Sstevel@tonic-gate 	if (intr_policy != INTR_WEIGHTED_DIST)
7800Sstevel@tonic-gate 		return;
7810Sstevel@tonic-gate 
7820Sstevel@tonic-gate 	eweight = i_ddi_get_intr_weight(dip);
7830Sstevel@tonic-gate 	INTR_DEBUG((CE_CONT, "intr_dist: cpu %2d weight %3d: +%2d/%2d for "
7840Sstevel@tonic-gate 	    "%s#%d/%s#%d\n", cpuid, cpu[cpuid]->cpu_intr_weight,
7850Sstevel@tonic-gate 	    nweight, eweight, ddi_driver_name(ddi_get_parent(dip)),
7860Sstevel@tonic-gate 	    ddi_get_instance(ddi_get_parent(dip)),
7870Sstevel@tonic-gate 	    ddi_driver_name(dip), ddi_get_instance(dip)));
7880Sstevel@tonic-gate 
7890Sstevel@tonic-gate 	/* if no establish weight, establish nexus weight */
7900Sstevel@tonic-gate 	if (eweight < 0) {
7910Sstevel@tonic-gate 		if (nweight > 0)
7920Sstevel@tonic-gate 			(void) i_ddi_set_intr_weight(dip, nweight);
7930Sstevel@tonic-gate 		else
7940Sstevel@tonic-gate 			nweight = 0;
7950Sstevel@tonic-gate 	} else
7960Sstevel@tonic-gate 		nweight = eweight;	/* use established weight */
7970Sstevel@tonic-gate 
7980Sstevel@tonic-gate 	/* Establish exclusion for cpu_intr_weight manipulation */
7990Sstevel@tonic-gate 	mutex_enter(&intr_dist_cpu_lock);
8000Sstevel@tonic-gate 	cpu[cpuid]->cpu_intr_weight += nweight;
8010Sstevel@tonic-gate 
8020Sstevel@tonic-gate 	/* update intr_dist_weight_max */
8030Sstevel@tonic-gate 	if (nweight > intr_dist_weight_max)
8040Sstevel@tonic-gate 		intr_dist_weight_max = nweight;
8050Sstevel@tonic-gate 	mutex_exit(&intr_dist_cpu_lock);
8060Sstevel@tonic-gate }
8070Sstevel@tonic-gate 
8080Sstevel@tonic-gate void
intr_dist_cpuid_rem_device_weight(uint32_t cpuid,dev_info_t * dip)8090Sstevel@tonic-gate intr_dist_cpuid_rem_device_weight(uint32_t cpuid, dev_info_t *dip)
8100Sstevel@tonic-gate {
8110Sstevel@tonic-gate 	struct cpu	*cp;
8120Sstevel@tonic-gate 	int32_t		weight;
8130Sstevel@tonic-gate 
8140Sstevel@tonic-gate 	ASSERT(dip);
8150Sstevel@tonic-gate 	if (intr_policy != INTR_WEIGHTED_DIST)
8160Sstevel@tonic-gate 		return;
8170Sstevel@tonic-gate 
8180Sstevel@tonic-gate 	/* remove weight of device from cpu */
8190Sstevel@tonic-gate 	weight = i_ddi_get_intr_weight(dip);
8200Sstevel@tonic-gate 	if (weight < 0)
8210Sstevel@tonic-gate 		weight = 0;
8220Sstevel@tonic-gate 	INTR_DEBUG((CE_CONT, "intr_dist: cpu %2d weight %3d: -%2d    for "
8230Sstevel@tonic-gate 	    "%s#%d/%s#%d\n", cpuid, cpu[cpuid]->cpu_intr_weight, weight,
8240Sstevel@tonic-gate 	    ddi_driver_name(ddi_get_parent(dip)),
8250Sstevel@tonic-gate 	    ddi_get_instance(ddi_get_parent(dip)),
8260Sstevel@tonic-gate 	    ddi_driver_name(dip), ddi_get_instance(dip)));
8270Sstevel@tonic-gate 
8280Sstevel@tonic-gate 	/* Establish exclusion for cpu_intr_weight manipulation */
8290Sstevel@tonic-gate 	mutex_enter(&intr_dist_cpu_lock);
8300Sstevel@tonic-gate 	cp = cpu[cpuid];
8310Sstevel@tonic-gate 	cp->cpu_intr_weight -= weight;
8320Sstevel@tonic-gate 	if (cp->cpu_intr_weight < 0)
8330Sstevel@tonic-gate 		cp->cpu_intr_weight = 0;	/* sanity */
8340Sstevel@tonic-gate 	mutex_exit(&intr_dist_cpu_lock);
8350Sstevel@tonic-gate }
8365788Smv143129 
8375788Smv143129 ulong_t
create_softint(uint_t pil,uint_t (* func)(caddr_t,caddr_t),caddr_t arg1)8385788Smv143129 create_softint(uint_t pil, uint_t (*func)(caddr_t, caddr_t), caddr_t arg1)
8395788Smv143129 {
8405788Smv143129 	uint64_t inum;
8415788Smv143129 
842*9039SMadhavan.Venkataraman@Sun.COM 	inum = add_softintr(pil, func, arg1, SOFTINT_MT);
8435788Smv143129 	return ((ulong_t)inum);
8445788Smv143129 }
8455788Smv143129 
8465788Smv143129 void
invoke_softint(processorid_t cpuid,ulong_t hdl)8475788Smv143129 invoke_softint(processorid_t cpuid, ulong_t hdl)
8485788Smv143129 {
8495788Smv143129 	uint64_t inum = hdl;
8505788Smv143129 
8515788Smv143129 	if (cpuid == CPU->cpu_id)
8525788Smv143129 		setsoftint(inum);
8535788Smv143129 	else
8545788Smv143129 		xt_one(cpuid, setsoftint_tl1, inum, 0);
8555788Smv143129 }
8565788Smv143129 
8575788Smv143129 void
remove_softint(ulong_t hdl)8585788Smv143129 remove_softint(ulong_t hdl)
8595788Smv143129 {
8605788Smv143129 	uint64_t inum = hdl;
8615788Smv143129 
8625788Smv143129 	(void) rem_softintr(inum);
8635788Smv143129 }
8645788Smv143129 
8655788Smv143129 void
sync_softint(cpuset_t set)8665788Smv143129 sync_softint(cpuset_t set)
8675788Smv143129 {
8685788Smv143129 	xt_sync(set);
8695788Smv143129 }
870