xref: /onnv-gate/usr/src/uts/sun4/os/intr.c (revision 0)
1*0Sstevel@tonic-gate /*
2*0Sstevel@tonic-gate  * CDDL HEADER START
3*0Sstevel@tonic-gate  *
4*0Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*0Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*0Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*0Sstevel@tonic-gate  * with the License.
8*0Sstevel@tonic-gate  *
9*0Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*0Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*0Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*0Sstevel@tonic-gate  * and limitations under the License.
13*0Sstevel@tonic-gate  *
14*0Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*0Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*0Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*0Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*0Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*0Sstevel@tonic-gate  *
20*0Sstevel@tonic-gate  * CDDL HEADER END
21*0Sstevel@tonic-gate  */
22*0Sstevel@tonic-gate /*
23*0Sstevel@tonic-gate  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24*0Sstevel@tonic-gate  * Use is subject to license terms.
25*0Sstevel@tonic-gate  */
26*0Sstevel@tonic-gate 
27*0Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
28*0Sstevel@tonic-gate 
29*0Sstevel@tonic-gate #include <sys/sysmacros.h>
30*0Sstevel@tonic-gate #include <sys/stack.h>
31*0Sstevel@tonic-gate #include <sys/cpuvar.h>
32*0Sstevel@tonic-gate #include <sys/ivintr.h>
33*0Sstevel@tonic-gate #include <sys/intreg.h>
34*0Sstevel@tonic-gate #include <sys/membar.h>
35*0Sstevel@tonic-gate #include <sys/kmem.h>
36*0Sstevel@tonic-gate #include <sys/intr.h>
37*0Sstevel@tonic-gate #include <sys/sunndi.h>
38*0Sstevel@tonic-gate #include <sys/cmn_err.h>
39*0Sstevel@tonic-gate #include <sys/privregs.h>
40*0Sstevel@tonic-gate #include <sys/systm.h>
41*0Sstevel@tonic-gate #include <sys/archsystm.h>
42*0Sstevel@tonic-gate #include <sys/machsystm.h>
43*0Sstevel@tonic-gate #include <sys/x_call.h>
44*0Sstevel@tonic-gate #include <vm/seg_kp.h>
45*0Sstevel@tonic-gate #include <sys/debug.h>
46*0Sstevel@tonic-gate #include <sys/cyclic.h>
47*0Sstevel@tonic-gate 
48*0Sstevel@tonic-gate #include <sys/cpu_sgnblk_defs.h>
49*0Sstevel@tonic-gate 
50*0Sstevel@tonic-gate kmutex_t soft_iv_lock;	/* protect software interrupt vector table */
51*0Sstevel@tonic-gate /* Global locks which protect the interrupt distribution lists */
52*0Sstevel@tonic-gate static kmutex_t intr_dist_lock;
53*0Sstevel@tonic-gate static kmutex_t intr_dist_cpu_lock;
54*0Sstevel@tonic-gate 
55*0Sstevel@tonic-gate /* Head of the interrupt distribution lists */
56*0Sstevel@tonic-gate static struct intr_dist *intr_dist_head = NULL;
57*0Sstevel@tonic-gate static struct intr_dist *intr_dist_whead = NULL;
58*0Sstevel@tonic-gate 
59*0Sstevel@tonic-gate uint_t swinum_base;
60*0Sstevel@tonic-gate uint_t maxswinum;
61*0Sstevel@tonic-gate uint_t siron_inum;
62*0Sstevel@tonic-gate uint_t poke_cpu_inum;
63*0Sstevel@tonic-gate int siron_pending;
64*0Sstevel@tonic-gate 
65*0Sstevel@tonic-gate int intr_policy = INTR_WEIGHTED_DIST;	/* interrupt distribution policy */
66*0Sstevel@tonic-gate int intr_dist_debug = 0;
67*0Sstevel@tonic-gate int32_t intr_dist_weight_max = 1;
68*0Sstevel@tonic-gate int32_t intr_dist_weight_maxmax = 1000;
69*0Sstevel@tonic-gate int intr_dist_weight_maxfactor = 2;
70*0Sstevel@tonic-gate #define	INTR_DEBUG(args) if (intr_dist_debug) cmn_err args
71*0Sstevel@tonic-gate 
72*0Sstevel@tonic-gate static void sw_ivintr_init(cpu_t *);
73*0Sstevel@tonic-gate 
74*0Sstevel@tonic-gate /*
75*0Sstevel@tonic-gate  * intr_init() - interrupt initialization
76*0Sstevel@tonic-gate  *	Initialize the system's software interrupt vector table and
77*0Sstevel@tonic-gate  *	CPU's interrupt free list
78*0Sstevel@tonic-gate  */
79*0Sstevel@tonic-gate void
80*0Sstevel@tonic-gate intr_init(cpu_t *cp)
81*0Sstevel@tonic-gate {
82*0Sstevel@tonic-gate 	init_ivintr();
83*0Sstevel@tonic-gate 	sw_ivintr_init(cp);
84*0Sstevel@tonic-gate 	init_intr_pool(cp);
85*0Sstevel@tonic-gate 
86*0Sstevel@tonic-gate 	mutex_init(&intr_dist_lock, NULL, MUTEX_DEFAULT, NULL);
87*0Sstevel@tonic-gate 	mutex_init(&intr_dist_cpu_lock, NULL, MUTEX_DEFAULT, NULL);
88*0Sstevel@tonic-gate 
89*0Sstevel@tonic-gate 	/*
90*0Sstevel@tonic-gate 	 * A soft interrupt may have been requested prior to the initialization
91*0Sstevel@tonic-gate 	 * of soft interrupts.  Soft interrupts can't be dispatched until after
92*0Sstevel@tonic-gate 	 * init_intr_pool, so we have to wait until now before we can dispatch
93*0Sstevel@tonic-gate 	 * the pending soft interrupt (if any).
94*0Sstevel@tonic-gate 	 */
95*0Sstevel@tonic-gate 	if (siron_pending)
96*0Sstevel@tonic-gate 		setsoftint(siron_inum);
97*0Sstevel@tonic-gate }
98*0Sstevel@tonic-gate 
99*0Sstevel@tonic-gate /*
100*0Sstevel@tonic-gate  * poke_cpu_intr - fall through when poke_cpu calls
101*0Sstevel@tonic-gate  */
102*0Sstevel@tonic-gate 
103*0Sstevel@tonic-gate /* ARGSUSED */
104*0Sstevel@tonic-gate uint_t
105*0Sstevel@tonic-gate poke_cpu_intr(caddr_t arg1, caddr_t arg2)
106*0Sstevel@tonic-gate {
107*0Sstevel@tonic-gate 	CPU->cpu_m.poke_cpu_outstanding = B_FALSE;
108*0Sstevel@tonic-gate 	membar_stld_stst();
109*0Sstevel@tonic-gate 	return (1);
110*0Sstevel@tonic-gate }
111*0Sstevel@tonic-gate 
112*0Sstevel@tonic-gate /*
113*0Sstevel@tonic-gate  * sw_ivintr_init() - software interrupt vector initialization
114*0Sstevel@tonic-gate  *	called after CPU is active
115*0Sstevel@tonic-gate  *	the software interrupt vector table is part of the intr_vector[]
116*0Sstevel@tonic-gate  */
117*0Sstevel@tonic-gate static void
118*0Sstevel@tonic-gate sw_ivintr_init(cpu_t *cp)
119*0Sstevel@tonic-gate {
120*0Sstevel@tonic-gate 	extern uint_t softlevel1();
121*0Sstevel@tonic-gate 
122*0Sstevel@tonic-gate 	mutex_init(&soft_iv_lock, NULL, MUTEX_DEFAULT, NULL);
123*0Sstevel@tonic-gate 
124*0Sstevel@tonic-gate 	swinum_base = SOFTIVNUM;
125*0Sstevel@tonic-gate 
126*0Sstevel@tonic-gate 	/*
127*0Sstevel@tonic-gate 	 * the maximum software interrupt == MAX_SOFT_INO
128*0Sstevel@tonic-gate 	 */
129*0Sstevel@tonic-gate 	maxswinum = swinum_base + MAX_SOFT_INO;
130*0Sstevel@tonic-gate 
131*0Sstevel@tonic-gate 	REGISTER_BBUS_INTR();
132*0Sstevel@tonic-gate 
133*0Sstevel@tonic-gate 	siron_inum = add_softintr(PIL_1, softlevel1, 0);
134*0Sstevel@tonic-gate 	poke_cpu_inum = add_softintr(PIL_13, poke_cpu_intr, 0);
135*0Sstevel@tonic-gate 	cp->cpu_m.poke_cpu_outstanding = B_FALSE;
136*0Sstevel@tonic-gate }
137*0Sstevel@tonic-gate 
138*0Sstevel@tonic-gate cpuset_t intr_add_pools_inuse;
139*0Sstevel@tonic-gate 
140*0Sstevel@tonic-gate /*
141*0Sstevel@tonic-gate  * cleanup_intr_pool()
142*0Sstevel@tonic-gate  *	Free up the extra intr request pool for this cpu.
143*0Sstevel@tonic-gate  */
144*0Sstevel@tonic-gate void
145*0Sstevel@tonic-gate cleanup_intr_pool(cpu_t *cp)
146*0Sstevel@tonic-gate {
147*0Sstevel@tonic-gate 	extern struct intr_req *intr_add_head;
148*0Sstevel@tonic-gate 	int poolno;
149*0Sstevel@tonic-gate 	struct intr_req *pool;
150*0Sstevel@tonic-gate 
151*0Sstevel@tonic-gate 	poolno = cp->cpu_m.intr_pool_added;
152*0Sstevel@tonic-gate 	if (poolno >= 0) {
153*0Sstevel@tonic-gate 		cp->cpu_m.intr_pool_added = -1;
154*0Sstevel@tonic-gate 		pool = (poolno * INTR_PENDING_MAX * intr_add_pools) +
155*0Sstevel@tonic-gate 
156*0Sstevel@tonic-gate 			intr_add_head;	/* not byte arithmetic */
157*0Sstevel@tonic-gate 		bzero(pool, INTR_PENDING_MAX * intr_add_pools *
158*0Sstevel@tonic-gate 		    sizeof (struct intr_req));
159*0Sstevel@tonic-gate 
160*0Sstevel@tonic-gate 		CPUSET_DEL(intr_add_pools_inuse, poolno);
161*0Sstevel@tonic-gate 	}
162*0Sstevel@tonic-gate }
163*0Sstevel@tonic-gate 
164*0Sstevel@tonic-gate /*
165*0Sstevel@tonic-gate  * init_intr_pool()
166*0Sstevel@tonic-gate  *	initialize the intr request pool for the cpu
167*0Sstevel@tonic-gate  * 	should be called for each cpu
168*0Sstevel@tonic-gate  */
169*0Sstevel@tonic-gate void
170*0Sstevel@tonic-gate init_intr_pool(cpu_t *cp)
171*0Sstevel@tonic-gate {
172*0Sstevel@tonic-gate 	extern struct intr_req *intr_add_head;
173*0Sstevel@tonic-gate #ifdef	DEBUG
174*0Sstevel@tonic-gate 	extern struct intr_req *intr_add_tail;
175*0Sstevel@tonic-gate #endif	/* DEBUG */
176*0Sstevel@tonic-gate 	int i, pool;
177*0Sstevel@tonic-gate 
178*0Sstevel@tonic-gate 	cp->cpu_m.intr_pool_added = -1;
179*0Sstevel@tonic-gate 
180*0Sstevel@tonic-gate 	for (i = 0; i < INTR_PENDING_MAX-1; i++) {
181*0Sstevel@tonic-gate 		cp->cpu_m.intr_pool[i].intr_next =
182*0Sstevel@tonic-gate 		    &cp->cpu_m.intr_pool[i+1];
183*0Sstevel@tonic-gate 	}
184*0Sstevel@tonic-gate 	cp->cpu_m.intr_pool[INTR_PENDING_MAX-1].intr_next = NULL;
185*0Sstevel@tonic-gate 
186*0Sstevel@tonic-gate 	cp->cpu_m.intr_head[0] = &cp->cpu_m.intr_pool[0];
187*0Sstevel@tonic-gate 	cp->cpu_m.intr_tail[0] = &cp->cpu_m.intr_pool[INTR_PENDING_MAX-1];
188*0Sstevel@tonic-gate 
189*0Sstevel@tonic-gate 	if (intr_add_pools != 0) {
190*0Sstevel@tonic-gate 
191*0Sstevel@tonic-gate 		/*
192*0Sstevel@tonic-gate 		 * If additional interrupt pools have been allocated,
193*0Sstevel@tonic-gate 		 * initialize those too and add them to the free list.
194*0Sstevel@tonic-gate 		 */
195*0Sstevel@tonic-gate 
196*0Sstevel@tonic-gate 		struct intr_req *trace;
197*0Sstevel@tonic-gate 
198*0Sstevel@tonic-gate 		for (pool = 0; pool < max_ncpus; pool++) {
199*0Sstevel@tonic-gate 			if (!(CPU_IN_SET(intr_add_pools_inuse, pool)))
200*0Sstevel@tonic-gate 			    break;
201*0Sstevel@tonic-gate 		}
202*0Sstevel@tonic-gate 		if (pool >= max_ncpus) {
203*0Sstevel@tonic-gate 			/*
204*0Sstevel@tonic-gate 			 * XXX - intr pools are alloc'd, just not as
205*0Sstevel@tonic-gate 			 * much as we would like.
206*0Sstevel@tonic-gate 			 */
207*0Sstevel@tonic-gate 			cmn_err(CE_WARN, "Failed to alloc all requested intr "
208*0Sstevel@tonic-gate 			    "pools for cpu%d", cp->cpu_id);
209*0Sstevel@tonic-gate 			return;
210*0Sstevel@tonic-gate 		}
211*0Sstevel@tonic-gate 		CPUSET_ADD(intr_add_pools_inuse, pool);
212*0Sstevel@tonic-gate 		cp->cpu_m.intr_pool_added = pool;
213*0Sstevel@tonic-gate 
214*0Sstevel@tonic-gate 		trace = (pool * INTR_PENDING_MAX * intr_add_pools) +
215*0Sstevel@tonic-gate 			intr_add_head;	/* not byte arithmetic */
216*0Sstevel@tonic-gate 
217*0Sstevel@tonic-gate 		cp->cpu_m.intr_pool[INTR_PENDING_MAX-1].intr_next = trace;
218*0Sstevel@tonic-gate 
219*0Sstevel@tonic-gate 		for (i = 1; i < intr_add_pools * INTR_PENDING_MAX; i++, trace++)
220*0Sstevel@tonic-gate 			trace->intr_next = trace + 1;
221*0Sstevel@tonic-gate 		trace->intr_next = NULL;
222*0Sstevel@tonic-gate 
223*0Sstevel@tonic-gate 		ASSERT(trace >= intr_add_head && trace <= intr_add_tail);
224*0Sstevel@tonic-gate 
225*0Sstevel@tonic-gate 		cp->cpu_m.intr_tail[0] = trace;
226*0Sstevel@tonic-gate 	}
227*0Sstevel@tonic-gate }
228*0Sstevel@tonic-gate 
229*0Sstevel@tonic-gate 
230*0Sstevel@tonic-gate /*
231*0Sstevel@tonic-gate  * siron - primitive for sun/os/softint.c
232*0Sstevel@tonic-gate  */
233*0Sstevel@tonic-gate void
234*0Sstevel@tonic-gate siron(void)
235*0Sstevel@tonic-gate {
236*0Sstevel@tonic-gate 	if (!siron_pending) {
237*0Sstevel@tonic-gate 		siron_pending = 1;
238*0Sstevel@tonic-gate 		if (siron_inum != 0)
239*0Sstevel@tonic-gate 			setsoftint(siron_inum);
240*0Sstevel@tonic-gate 	}
241*0Sstevel@tonic-gate }
242*0Sstevel@tonic-gate 
243*0Sstevel@tonic-gate /*
244*0Sstevel@tonic-gate  * no_ivintr()
245*0Sstevel@tonic-gate  * 	called by vec_interrupt() through sys_trap()
246*0Sstevel@tonic-gate  *	vector interrupt received but not valid or not
247*0Sstevel@tonic-gate  *	registered in intr_vector[]
248*0Sstevel@tonic-gate  *	considered as a spurious mondo interrupt
249*0Sstevel@tonic-gate  */
250*0Sstevel@tonic-gate /* ARGSUSED */
251*0Sstevel@tonic-gate void
252*0Sstevel@tonic-gate no_ivintr(struct regs *rp, int inum, int pil)
253*0Sstevel@tonic-gate {
254*0Sstevel@tonic-gate 	cmn_err(CE_WARN, "invalid vector intr: number 0x%x, pil 0x%x",
255*0Sstevel@tonic-gate 	    inum, pil);
256*0Sstevel@tonic-gate 
257*0Sstevel@tonic-gate 
258*0Sstevel@tonic-gate #ifdef DEBUG_VEC_INTR
259*0Sstevel@tonic-gate 	prom_enter_mon();
260*0Sstevel@tonic-gate #endif /* DEBUG_VEC_INTR */
261*0Sstevel@tonic-gate }
262*0Sstevel@tonic-gate 
263*0Sstevel@tonic-gate /*
264*0Sstevel@tonic-gate  * no_intr_pool()
265*0Sstevel@tonic-gate  * 	called by vec_interrupt() through sys_trap()
266*0Sstevel@tonic-gate  *	vector interrupt received but no intr_req entries
267*0Sstevel@tonic-gate  */
268*0Sstevel@tonic-gate /* ARGSUSED */
269*0Sstevel@tonic-gate void
270*0Sstevel@tonic-gate no_intr_pool(struct regs *rp, int inum, int pil)
271*0Sstevel@tonic-gate {
272*0Sstevel@tonic-gate #ifdef DEBUG_VEC_INTR
273*0Sstevel@tonic-gate 	cmn_err(CE_WARN, "intr_req pool empty: num 0x%x, pil 0x%x",
274*0Sstevel@tonic-gate 		inum, pil);
275*0Sstevel@tonic-gate 	prom_enter_mon();
276*0Sstevel@tonic-gate #else
277*0Sstevel@tonic-gate 	cmn_err(CE_PANIC, "intr_req pool empty: num 0x%x, pil 0x%x",
278*0Sstevel@tonic-gate 		inum, pil);
279*0Sstevel@tonic-gate #endif /* DEBUG_VEC_INTR */
280*0Sstevel@tonic-gate }
281*0Sstevel@tonic-gate 
282*0Sstevel@tonic-gate void
283*0Sstevel@tonic-gate intr_dequeue_req(uint_t pil, uint32_t inum)
284*0Sstevel@tonic-gate {
285*0Sstevel@tonic-gate 	struct intr_req *ir, *prev;
286*0Sstevel@tonic-gate 	struct machcpu *mcpu;
287*0Sstevel@tonic-gate 	uint32_t clr;
288*0Sstevel@tonic-gate 	extern uint_t getpstate(void);
289*0Sstevel@tonic-gate 
290*0Sstevel@tonic-gate 	ASSERT((getpstate() & PSTATE_IE) == 0);
291*0Sstevel@tonic-gate 
292*0Sstevel@tonic-gate 	mcpu = &CPU->cpu_m;
293*0Sstevel@tonic-gate 
294*0Sstevel@tonic-gate 	/* Find a matching entry in the list */
295*0Sstevel@tonic-gate 	prev = NULL;
296*0Sstevel@tonic-gate 	ir = mcpu->intr_head[pil];
297*0Sstevel@tonic-gate 	while (ir != NULL) {
298*0Sstevel@tonic-gate 		if (ir->intr_number == inum)
299*0Sstevel@tonic-gate 			break;
300*0Sstevel@tonic-gate 		prev = ir;
301*0Sstevel@tonic-gate 		ir = ir->intr_next;
302*0Sstevel@tonic-gate 	}
303*0Sstevel@tonic-gate 	if (ir != NULL) {
304*0Sstevel@tonic-gate 		/*
305*0Sstevel@tonic-gate 		 * Remove entry from list
306*0Sstevel@tonic-gate 		 */
307*0Sstevel@tonic-gate 		if (prev != NULL)
308*0Sstevel@tonic-gate 			prev->intr_next = ir->intr_next;	/* non-head */
309*0Sstevel@tonic-gate 		else
310*0Sstevel@tonic-gate 			mcpu->intr_head[pil] = ir->intr_next;	/* head */
311*0Sstevel@tonic-gate 
312*0Sstevel@tonic-gate 		if (ir->intr_next == NULL)
313*0Sstevel@tonic-gate 			mcpu->intr_tail[pil] = prev;		/* tail */
314*0Sstevel@tonic-gate 
315*0Sstevel@tonic-gate 		/*
316*0Sstevel@tonic-gate 		 * Place on free list
317*0Sstevel@tonic-gate 		 */
318*0Sstevel@tonic-gate 		ir->intr_next = mcpu->intr_head[0];
319*0Sstevel@tonic-gate 		mcpu->intr_head[0] = ir;
320*0Sstevel@tonic-gate 	}
321*0Sstevel@tonic-gate 
322*0Sstevel@tonic-gate 	/*
323*0Sstevel@tonic-gate 	 * clear pending interrupts at this level if the list is empty
324*0Sstevel@tonic-gate 	 */
325*0Sstevel@tonic-gate 	if (mcpu->intr_head[pil] == NULL) {
326*0Sstevel@tonic-gate 		clr = 1 << pil;
327*0Sstevel@tonic-gate 		if (pil == PIL_14)
328*0Sstevel@tonic-gate 			clr |= (TICK_INT_MASK | STICK_INT_MASK);
329*0Sstevel@tonic-gate 		wr_clr_softint(clr);
330*0Sstevel@tonic-gate 	}
331*0Sstevel@tonic-gate }
332*0Sstevel@tonic-gate 
333*0Sstevel@tonic-gate 
334*0Sstevel@tonic-gate /*
335*0Sstevel@tonic-gate  * Send a directed interrupt of specified interrupt number id to a cpu.
336*0Sstevel@tonic-gate  */
337*0Sstevel@tonic-gate void
338*0Sstevel@tonic-gate send_dirint(
339*0Sstevel@tonic-gate 	int cpuix,		/* cpu to be interrupted */
340*0Sstevel@tonic-gate 	int intr_id)		/* interrupt number id */
341*0Sstevel@tonic-gate {
342*0Sstevel@tonic-gate 	xt_one(cpuix, setsoftint_tl1, intr_id, 0);
343*0Sstevel@tonic-gate }
344*0Sstevel@tonic-gate 
345*0Sstevel@tonic-gate void
346*0Sstevel@tonic-gate init_intr_threads(struct cpu *cp)
347*0Sstevel@tonic-gate {
348*0Sstevel@tonic-gate 	int i;
349*0Sstevel@tonic-gate 
350*0Sstevel@tonic-gate 	for (i = 0; i < NINTR_THREADS; i++)
351*0Sstevel@tonic-gate 		thread_create_intr(cp);
352*0Sstevel@tonic-gate 
353*0Sstevel@tonic-gate 	cp->cpu_intr_stack = (caddr_t)segkp_get(segkp, INTR_STACK_SIZE,
354*0Sstevel@tonic-gate 		KPD_HASREDZONE | KPD_NO_ANON | KPD_LOCKED) +
355*0Sstevel@tonic-gate 		INTR_STACK_SIZE - SA(MINFRAME);
356*0Sstevel@tonic-gate }
357*0Sstevel@tonic-gate 
358*0Sstevel@tonic-gate /*
359*0Sstevel@tonic-gate  * Take the specified CPU out of participation in interrupts.
360*0Sstevel@tonic-gate  *	Called by p_online(2) when a processor is being taken off-line.
361*0Sstevel@tonic-gate  *	This allows interrupt threads being handled on the processor to
362*0Sstevel@tonic-gate  *	complete before the processor is idled.
363*0Sstevel@tonic-gate  */
364*0Sstevel@tonic-gate int
365*0Sstevel@tonic-gate cpu_disable_intr(struct cpu *cp)
366*0Sstevel@tonic-gate {
367*0Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
368*0Sstevel@tonic-gate 
369*0Sstevel@tonic-gate 	/*
370*0Sstevel@tonic-gate 	 * Turn off the CPU_ENABLE flag before calling the redistribution
371*0Sstevel@tonic-gate 	 * function, since it checks for this in the cpu flags.
372*0Sstevel@tonic-gate 	 */
373*0Sstevel@tonic-gate 	cp->cpu_flags &= ~CPU_ENABLE;
374*0Sstevel@tonic-gate 
375*0Sstevel@tonic-gate 	intr_redist_all_cpus();
376*0Sstevel@tonic-gate 
377*0Sstevel@tonic-gate 	return (0);
378*0Sstevel@tonic-gate }
379*0Sstevel@tonic-gate 
380*0Sstevel@tonic-gate /*
381*0Sstevel@tonic-gate  * Allow the specified CPU to participate in interrupts.
382*0Sstevel@tonic-gate  *	Called by p_online(2) if a processor could not be taken off-line
383*0Sstevel@tonic-gate  *	because of bound threads, in order to resume processing interrupts.
384*0Sstevel@tonic-gate  *	Also called after starting a processor.
385*0Sstevel@tonic-gate  */
386*0Sstevel@tonic-gate void
387*0Sstevel@tonic-gate cpu_enable_intr(struct cpu *cp)
388*0Sstevel@tonic-gate {
389*0Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
390*0Sstevel@tonic-gate 
391*0Sstevel@tonic-gate 	cp->cpu_flags |= CPU_ENABLE;
392*0Sstevel@tonic-gate 
393*0Sstevel@tonic-gate 	intr_redist_all_cpus();
394*0Sstevel@tonic-gate }
395*0Sstevel@tonic-gate 
396*0Sstevel@tonic-gate /*
397*0Sstevel@tonic-gate  * Add function to callback list for intr_redist_all_cpus.  We keep two lists,
398*0Sstevel@tonic-gate  * one for weighted callbacks and one for normal callbacks. Weighted callbacks
399*0Sstevel@tonic-gate  * are issued to redirect interrupts of a specified weight, from heavy to
400*0Sstevel@tonic-gate  * light.  This allows all the interrupts of a given weight to be redistributed
401*0Sstevel@tonic-gate  * for all weighted nexus drivers prior to those of less weight.
402*0Sstevel@tonic-gate  */
403*0Sstevel@tonic-gate static void
404*0Sstevel@tonic-gate intr_dist_add_list(struct intr_dist **phead, void (*func)(void *), void *arg)
405*0Sstevel@tonic-gate {
406*0Sstevel@tonic-gate 	struct intr_dist *new = kmem_alloc(sizeof (*new), KM_SLEEP);
407*0Sstevel@tonic-gate 	struct intr_dist *iptr;
408*0Sstevel@tonic-gate 	struct intr_dist **pptr;
409*0Sstevel@tonic-gate 
410*0Sstevel@tonic-gate 	ASSERT(func);
411*0Sstevel@tonic-gate 	new->func = func;
412*0Sstevel@tonic-gate 	new->arg = arg;
413*0Sstevel@tonic-gate 	new->next = NULL;
414*0Sstevel@tonic-gate 
415*0Sstevel@tonic-gate 	/* Add to tail so that redistribution occurs in original order. */
416*0Sstevel@tonic-gate 	mutex_enter(&intr_dist_lock);
417*0Sstevel@tonic-gate 	for (iptr = *phead, pptr = phead; iptr != NULL;
418*0Sstevel@tonic-gate 	    pptr = &iptr->next, iptr = iptr->next) {
419*0Sstevel@tonic-gate 		/* check for problems as we locate the tail */
420*0Sstevel@tonic-gate 		if ((iptr->func == func) && (iptr->arg == arg)) {
421*0Sstevel@tonic-gate 			cmn_err(CE_PANIC, "intr_dist_add_list(): duplicate");
422*0Sstevel@tonic-gate 			/*NOTREACHED*/
423*0Sstevel@tonic-gate 		}
424*0Sstevel@tonic-gate 	}
425*0Sstevel@tonic-gate 	*pptr = new;
426*0Sstevel@tonic-gate 
427*0Sstevel@tonic-gate 	mutex_exit(&intr_dist_lock);
428*0Sstevel@tonic-gate }
429*0Sstevel@tonic-gate 
430*0Sstevel@tonic-gate void
431*0Sstevel@tonic-gate intr_dist_add(void (*func)(void *), void *arg)
432*0Sstevel@tonic-gate {
433*0Sstevel@tonic-gate 	intr_dist_add_list(&intr_dist_head, (void (*)(void *))func, arg);
434*0Sstevel@tonic-gate }
435*0Sstevel@tonic-gate 
436*0Sstevel@tonic-gate void
437*0Sstevel@tonic-gate intr_dist_add_weighted(void (*func)(void *, int32_t, int32_t), void *arg)
438*0Sstevel@tonic-gate {
439*0Sstevel@tonic-gate 	intr_dist_add_list(&intr_dist_whead, (void (*)(void *))func, arg);
440*0Sstevel@tonic-gate }
441*0Sstevel@tonic-gate 
442*0Sstevel@tonic-gate /*
443*0Sstevel@tonic-gate  * Search for the interrupt distribution structure with the specified
444*0Sstevel@tonic-gate  * mondo vec reg in the interrupt distribution list. If a match is found,
445*0Sstevel@tonic-gate  * then delete the entry from the list. The caller is responsible for
446*0Sstevel@tonic-gate  * modifying the mondo vector registers.
447*0Sstevel@tonic-gate  */
448*0Sstevel@tonic-gate static void
449*0Sstevel@tonic-gate intr_dist_rem_list(struct intr_dist **headp, void (*func)(void *), void *arg)
450*0Sstevel@tonic-gate {
451*0Sstevel@tonic-gate 	struct intr_dist *iptr;
452*0Sstevel@tonic-gate 	struct intr_dist **vect;
453*0Sstevel@tonic-gate 
454*0Sstevel@tonic-gate 	mutex_enter(&intr_dist_lock);
455*0Sstevel@tonic-gate 	for (iptr = *headp, vect = headp;
456*0Sstevel@tonic-gate 	    iptr != NULL; vect = &iptr->next, iptr = iptr->next) {
457*0Sstevel@tonic-gate 		if ((iptr->func == func) && (iptr->arg == arg)) {
458*0Sstevel@tonic-gate 			*vect = iptr->next;
459*0Sstevel@tonic-gate 			kmem_free(iptr, sizeof (struct intr_dist));
460*0Sstevel@tonic-gate 			mutex_exit(&intr_dist_lock);
461*0Sstevel@tonic-gate 			return;
462*0Sstevel@tonic-gate 		}
463*0Sstevel@tonic-gate 	}
464*0Sstevel@tonic-gate 
465*0Sstevel@tonic-gate 	if (!panicstr)
466*0Sstevel@tonic-gate 		cmn_err(CE_PANIC, "intr_dist_rem_list: not found");
467*0Sstevel@tonic-gate 	mutex_exit(&intr_dist_lock);
468*0Sstevel@tonic-gate }
469*0Sstevel@tonic-gate 
470*0Sstevel@tonic-gate void
471*0Sstevel@tonic-gate intr_dist_rem(void (*func)(void *), void *arg)
472*0Sstevel@tonic-gate {
473*0Sstevel@tonic-gate 	intr_dist_rem_list(&intr_dist_head, (void (*)(void *))func, arg);
474*0Sstevel@tonic-gate }
475*0Sstevel@tonic-gate 
476*0Sstevel@tonic-gate void
477*0Sstevel@tonic-gate intr_dist_rem_weighted(void (*func)(void *, int32_t, int32_t), void *arg)
478*0Sstevel@tonic-gate {
479*0Sstevel@tonic-gate 	intr_dist_rem_list(&intr_dist_whead, (void (*)(void *))func, arg);
480*0Sstevel@tonic-gate }
481*0Sstevel@tonic-gate 
482*0Sstevel@tonic-gate /*
483*0Sstevel@tonic-gate  * Initiate interrupt redistribution.  Redistribution improves the isolation
484*0Sstevel@tonic-gate  * associated with interrupt weights by ordering operations from heavy weight
485*0Sstevel@tonic-gate  * to light weight.  When a CPUs orientation changes relative to interrupts,
486*0Sstevel@tonic-gate  * there is *always* a redistribution to accommodate this change (call to
487*0Sstevel@tonic-gate  * intr_redist_all_cpus()).  As devices (not CPUs) attach/detach it is possible
488*0Sstevel@tonic-gate  * that a redistribution could improve the quality of an initialization. For
489*0Sstevel@tonic-gate  * example, if you are not using a NIC it may not be attached with s10 (devfs).
490*0Sstevel@tonic-gate  * If you then configure the NIC (ifconfig), this may cause the NIC to attach
491*0Sstevel@tonic-gate  * and plumb interrupts.  The CPU assignment for the NIC's interrupts is
492*0Sstevel@tonic-gate  * occurring late, so optimal "isolation" relative to weight is not occurring.
493*0Sstevel@tonic-gate  * The same applies to detach, although in this case doing the redistribution
494*0Sstevel@tonic-gate  * might improve "spread" for medium weight devices since the "isolation" of
495*0Sstevel@tonic-gate  * a higher weight device may no longer be present.
496*0Sstevel@tonic-gate  *
497*0Sstevel@tonic-gate  * NB: We should provide a utility to trigger redistribution (ala "intradm -r").
498*0Sstevel@tonic-gate  *
499*0Sstevel@tonic-gate  * NB: There is risk associated with automatically triggering execution of the
500*0Sstevel@tonic-gate  * redistribution code at arbitrary times. The risk comes from the fact that
501*0Sstevel@tonic-gate  * there is a lot of low-level hardware interaction associated with a
502*0Sstevel@tonic-gate  * redistribution.  At some point we may want this code to perform automatic
503*0Sstevel@tonic-gate  * redistribution (redistribution thread; trigger timeout when add/remove
504*0Sstevel@tonic-gate  * weight delta is large enough, and call cv_signal from timeout - causing
505*0Sstevel@tonic-gate  * thead to call i_ddi_intr_redist_all_cpus()) but this is considered too
506*0Sstevel@tonic-gate  * risky at this time.
507*0Sstevel@tonic-gate  */
508*0Sstevel@tonic-gate void
509*0Sstevel@tonic-gate i_ddi_intr_redist_all_cpus()
510*0Sstevel@tonic-gate {
511*0Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
512*0Sstevel@tonic-gate 	INTR_DEBUG((CE_CONT, "intr_dist: i_ddi_intr_redist_all_cpus\n"));
513*0Sstevel@tonic-gate 	intr_redist_all_cpus();
514*0Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
515*0Sstevel@tonic-gate }
516*0Sstevel@tonic-gate 
517*0Sstevel@tonic-gate /*
518*0Sstevel@tonic-gate  * Redistribute all interrupts
519*0Sstevel@tonic-gate  *
520*0Sstevel@tonic-gate  * This function redistributes all interrupting devices, running the
521*0Sstevel@tonic-gate  * parent callback functions for each node.
522*0Sstevel@tonic-gate  */
523*0Sstevel@tonic-gate void
524*0Sstevel@tonic-gate intr_redist_all_cpus(void)
525*0Sstevel@tonic-gate {
526*0Sstevel@tonic-gate 	struct cpu *cp;
527*0Sstevel@tonic-gate 	struct intr_dist *iptr;
528*0Sstevel@tonic-gate 	int32_t weight, max_weight;
529*0Sstevel@tonic-gate 
530*0Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
531*0Sstevel@tonic-gate 	mutex_enter(&intr_dist_lock);
532*0Sstevel@tonic-gate 
533*0Sstevel@tonic-gate 	/*
534*0Sstevel@tonic-gate 	 * zero cpu_intr_weight on all cpus - it is safe to traverse
535*0Sstevel@tonic-gate 	 * cpu_list since we hold cpu_lock.
536*0Sstevel@tonic-gate 	 */
537*0Sstevel@tonic-gate 	cp = cpu_list;
538*0Sstevel@tonic-gate 	do {
539*0Sstevel@tonic-gate 		cp->cpu_intr_weight = 0;
540*0Sstevel@tonic-gate 	} while ((cp = cp->cpu_next) != cpu_list);
541*0Sstevel@tonic-gate 
542*0Sstevel@tonic-gate 	/*
543*0Sstevel@tonic-gate 	 * Assume that this redistribution may encounter a device weight
544*0Sstevel@tonic-gate 	 * via driver.conf tuning of "ddi-intr-weight" that is at most
545*0Sstevel@tonic-gate 	 * intr_dist_weight_maxfactor times larger.
546*0Sstevel@tonic-gate 	 */
547*0Sstevel@tonic-gate 	max_weight = intr_dist_weight_max * intr_dist_weight_maxfactor;
548*0Sstevel@tonic-gate 	if (max_weight > intr_dist_weight_maxmax)
549*0Sstevel@tonic-gate 		max_weight = intr_dist_weight_maxmax;
550*0Sstevel@tonic-gate 	intr_dist_weight_max = 1;
551*0Sstevel@tonic-gate 
552*0Sstevel@tonic-gate 	INTR_DEBUG((CE_CONT, "intr_dist: "
553*0Sstevel@tonic-gate 	    "intr_redist_all_cpus: %d-0\n", max_weight));
554*0Sstevel@tonic-gate 
555*0Sstevel@tonic-gate 	/*
556*0Sstevel@tonic-gate 	 * Redistribute weighted, from heavy to light.  The callback that
557*0Sstevel@tonic-gate 	 * specifies a weight equal to weight_max should redirect all
558*0Sstevel@tonic-gate 	 * interrupts of weight weight_max or greater [weight_max, inf.).
559*0Sstevel@tonic-gate 	 * Interrupts of lesser weight should be processed on the call with
560*0Sstevel@tonic-gate 	 * the matching weight. This allows all the heaver weight interrupts
561*0Sstevel@tonic-gate 	 * on all weighted busses (multiple pci busses) to be redirected prior
562*0Sstevel@tonic-gate 	 * to any lesser weight interrupts.
563*0Sstevel@tonic-gate 	 */
564*0Sstevel@tonic-gate 	for (weight = max_weight; weight >= 0; weight--)
565*0Sstevel@tonic-gate 		for (iptr = intr_dist_whead; iptr != NULL; iptr = iptr->next)
566*0Sstevel@tonic-gate 			((void (*)(void *, int32_t, int32_t))iptr->func)
567*0Sstevel@tonic-gate 			    (iptr->arg, max_weight, weight);
568*0Sstevel@tonic-gate 
569*0Sstevel@tonic-gate 	/* redistribute normal (non-weighted) interrupts */
570*0Sstevel@tonic-gate 	for (iptr = intr_dist_head; iptr != NULL; iptr = iptr->next)
571*0Sstevel@tonic-gate 		((void (*)(void *))iptr->func)(iptr->arg);
572*0Sstevel@tonic-gate 	mutex_exit(&intr_dist_lock);
573*0Sstevel@tonic-gate }
574*0Sstevel@tonic-gate 
575*0Sstevel@tonic-gate void
576*0Sstevel@tonic-gate intr_redist_all_cpus_shutdown(void)
577*0Sstevel@tonic-gate {
578*0Sstevel@tonic-gate 	intr_policy = INTR_CURRENT_CPU;
579*0Sstevel@tonic-gate 	intr_redist_all_cpus();
580*0Sstevel@tonic-gate }
581*0Sstevel@tonic-gate 
582*0Sstevel@tonic-gate /*
583*0Sstevel@tonic-gate  * Determine what CPU to target, based on interrupt policy.
584*0Sstevel@tonic-gate  *
585*0Sstevel@tonic-gate  * INTR_FLAT_DIST: hold a current CPU pointer in a static variable and
586*0Sstevel@tonic-gate  *	advance through interrupt enabled cpus (round-robin).
587*0Sstevel@tonic-gate  *
588*0Sstevel@tonic-gate  * INTR_WEIGHTED_DIST: search for an enabled CPU with the lowest
589*0Sstevel@tonic-gate  *	cpu_intr_weight, round robin when all equal.
590*0Sstevel@tonic-gate  *
591*0Sstevel@tonic-gate  *	Weighted interrupt distribution provides two things: "spread" of weight
592*0Sstevel@tonic-gate  *	(associated with algorithm itself) and "isolation" (associated with a
593*0Sstevel@tonic-gate  *	particular device weight). A redistribution is what provides optimal
594*0Sstevel@tonic-gate  *	"isolation" of heavy weight interrupts, optimal "spread" of weight
595*0Sstevel@tonic-gate  *	(relative to what came before) is always occurring.
596*0Sstevel@tonic-gate  *
597*0Sstevel@tonic-gate  *	An interrupt weight is a subjective number that represents the
598*0Sstevel@tonic-gate  *	percentage of a CPU required to service a device's interrupts: the
599*0Sstevel@tonic-gate  *	default weight is 0% (however the algorithm still maintains
600*0Sstevel@tonic-gate  *	round-robin), a network interface controller (NIC) may have a large
601*0Sstevel@tonic-gate  *	weight (35%). Interrupt weight only has meaning relative to the
602*0Sstevel@tonic-gate  *	interrupt weight of other devices: a CPU can be weighted more than
603*0Sstevel@tonic-gate  *	100%, and a single device might consume more than 100% of a CPU.
604*0Sstevel@tonic-gate  *
605*0Sstevel@tonic-gate  *	A coarse interrupt weight can be defined by the parent nexus driver
606*0Sstevel@tonic-gate  *	based on bus specific information, like pci class codes. A nexus
607*0Sstevel@tonic-gate  *	driver that supports device interrupt weighting for its children
608*0Sstevel@tonic-gate  *	should call intr_dist_cpuid_add/rem_device_weight(), which adds
609*0Sstevel@tonic-gate  *	and removes the weight of a device from the CPU that an interrupt
610*0Sstevel@tonic-gate  *	is directed at.  The quality of initialization improves when the
611*0Sstevel@tonic-gate  *	device interrupt weights more accuracy reflect actual run-time weights,
612*0Sstevel@tonic-gate  *	and as the assignments are ordered from is heavy to light.
613*0Sstevel@tonic-gate  *
614*0Sstevel@tonic-gate  *	The implementation also supports interrupt weight being specified in
615*0Sstevel@tonic-gate  *	driver.conf files via the property "ddi-intr-weight", which takes
616*0Sstevel@tonic-gate  *	precedence over the nexus supplied weight.  This support is added to
617*0Sstevel@tonic-gate  *	permit possible tweaking in the product in response to customer
618*0Sstevel@tonic-gate  *	problems. This is not a formal or committed interface.
619*0Sstevel@tonic-gate  *
620*0Sstevel@tonic-gate  *	While a weighted approach chooses the CPU providing the best spread
621*0Sstevel@tonic-gate  *	given past weights, less than optimal isolation can result in cases
622*0Sstevel@tonic-gate  *	where heavy weight devices show up last. The nexus driver's interrupt
623*0Sstevel@tonic-gate  *	redistribution logic should use intr_dist_add/rem_weighted so that
624*0Sstevel@tonic-gate  *	interrupts can be redistributed heavy first for optimal isolation.
625*0Sstevel@tonic-gate  */
626*0Sstevel@tonic-gate uint32_t
627*0Sstevel@tonic-gate intr_dist_cpuid(void)
628*0Sstevel@tonic-gate {
629*0Sstevel@tonic-gate 	static struct cpu	*curr_cpu;
630*0Sstevel@tonic-gate 	struct cpu		*start_cpu;
631*0Sstevel@tonic-gate 	struct cpu		*new_cpu;
632*0Sstevel@tonic-gate 	struct cpu		*cp;
633*0Sstevel@tonic-gate 	int			cpuid = -1;
634*0Sstevel@tonic-gate 
635*0Sstevel@tonic-gate 	/* Establish exclusion for curr_cpu and cpu_intr_weight manipulation */
636*0Sstevel@tonic-gate 	mutex_enter(&intr_dist_cpu_lock);
637*0Sstevel@tonic-gate 
638*0Sstevel@tonic-gate 	switch (intr_policy) {
639*0Sstevel@tonic-gate 	case INTR_CURRENT_CPU:
640*0Sstevel@tonic-gate 		cpuid = CPU->cpu_id;
641*0Sstevel@tonic-gate 		break;
642*0Sstevel@tonic-gate 
643*0Sstevel@tonic-gate 	case INTR_BOOT_CPU:
644*0Sstevel@tonic-gate 		panic("INTR_BOOT_CPU no longer supported.");
645*0Sstevel@tonic-gate 		/*NOTREACHED*/
646*0Sstevel@tonic-gate 
647*0Sstevel@tonic-gate 	case INTR_FLAT_DIST:
648*0Sstevel@tonic-gate 	case INTR_WEIGHTED_DIST:
649*0Sstevel@tonic-gate 	default:
650*0Sstevel@tonic-gate 		/*
651*0Sstevel@tonic-gate 		 * Ensure that curr_cpu is valid - cpu_next will be NULL if
652*0Sstevel@tonic-gate 		 * the cpu has been deleted (cpu structs are never freed).
653*0Sstevel@tonic-gate 		 */
654*0Sstevel@tonic-gate 		if (curr_cpu == NULL || curr_cpu->cpu_next == NULL)
655*0Sstevel@tonic-gate 			curr_cpu = CPU;
656*0Sstevel@tonic-gate 
657*0Sstevel@tonic-gate 		/*
658*0Sstevel@tonic-gate 		 * Advance to online CPU after curr_cpu (round-robin). For
659*0Sstevel@tonic-gate 		 * INTR_WEIGHTED_DIST we choose the cpu with the lightest
660*0Sstevel@tonic-gate 		 * weight.  For a nexus that does not support weight the
661*0Sstevel@tonic-gate 		 * default weight of zero is used. We degrade to round-robin
662*0Sstevel@tonic-gate 		 * behavior among equal weightes.  The default weight is zero
663*0Sstevel@tonic-gate 		 * and round-robin behavior continues.
664*0Sstevel@tonic-gate 		 *
665*0Sstevel@tonic-gate 		 * Disable preemption while traversing cpu_next_onln to
666*0Sstevel@tonic-gate 		 * ensure the list does not change.  This works because
667*0Sstevel@tonic-gate 		 * modifiers of this list and other lists in a struct cpu
668*0Sstevel@tonic-gate 		 * call pause_cpus() before making changes.
669*0Sstevel@tonic-gate 		 */
670*0Sstevel@tonic-gate 		kpreempt_disable();
671*0Sstevel@tonic-gate 		cp = start_cpu = curr_cpu->cpu_next_onln;
672*0Sstevel@tonic-gate 		new_cpu = NULL;
673*0Sstevel@tonic-gate 		do {
674*0Sstevel@tonic-gate 			/* Skip CPUs with interrupts disabled */
675*0Sstevel@tonic-gate 			if ((cp->cpu_flags & CPU_ENABLE) == 0)
676*0Sstevel@tonic-gate 				continue;
677*0Sstevel@tonic-gate 
678*0Sstevel@tonic-gate 			if (intr_policy == INTR_FLAT_DIST) {
679*0Sstevel@tonic-gate 				/* select CPU */
680*0Sstevel@tonic-gate 				new_cpu = cp;
681*0Sstevel@tonic-gate 				break;
682*0Sstevel@tonic-gate 			} else if ((new_cpu == NULL) ||
683*0Sstevel@tonic-gate 			    (cp->cpu_intr_weight < new_cpu->cpu_intr_weight)) {
684*0Sstevel@tonic-gate 				/* Choose if lighter weight */
685*0Sstevel@tonic-gate 				new_cpu = cp;
686*0Sstevel@tonic-gate 			}
687*0Sstevel@tonic-gate 		} while ((cp = cp->cpu_next_onln) != start_cpu);
688*0Sstevel@tonic-gate 		ASSERT(new_cpu);
689*0Sstevel@tonic-gate 		cpuid = new_cpu->cpu_id;
690*0Sstevel@tonic-gate 
691*0Sstevel@tonic-gate 		INTR_DEBUG((CE_CONT, "intr_dist: cpu %2d weight %3d: "
692*0Sstevel@tonic-gate 		    "targeted\n", cpuid, new_cpu->cpu_intr_weight));
693*0Sstevel@tonic-gate 
694*0Sstevel@tonic-gate 		/* update static pointer for next round-robin */
695*0Sstevel@tonic-gate 		curr_cpu = new_cpu;
696*0Sstevel@tonic-gate 		kpreempt_enable();
697*0Sstevel@tonic-gate 		break;
698*0Sstevel@tonic-gate 	}
699*0Sstevel@tonic-gate 	mutex_exit(&intr_dist_cpu_lock);
700*0Sstevel@tonic-gate 	return (cpuid);
701*0Sstevel@tonic-gate }
702*0Sstevel@tonic-gate 
703*0Sstevel@tonic-gate /*
704*0Sstevel@tonic-gate  * Add or remove the the weight of a device from a CPUs interrupt weight.
705*0Sstevel@tonic-gate  *
706*0Sstevel@tonic-gate  * We expect nexus drivers to call intr_dist_cpuid_add/rem_device_weight for
707*0Sstevel@tonic-gate  * their children to improve the overall quality of interrupt initialization.
708*0Sstevel@tonic-gate  *
709*0Sstevel@tonic-gate  * If a nexues shares the CPU returned by a single intr_dist_cpuid() call
710*0Sstevel@tonic-gate  * among multiple devices (sharing ino) then the nexus should call
711*0Sstevel@tonic-gate  * intr_dist_cpuid_add/rem_device_weight for each device separately. Devices
712*0Sstevel@tonic-gate  * that share must specify the same cpuid.
713*0Sstevel@tonic-gate  *
714*0Sstevel@tonic-gate  * If a nexus driver is unable to determine the cpu at remove_intr time
715*0Sstevel@tonic-gate  * for some of its interrupts, then it should not call add_device_weight -
716*0Sstevel@tonic-gate  * intr_dist_cpuid will still provide round-robin.
717*0Sstevel@tonic-gate  *
718*0Sstevel@tonic-gate  * An established device weight (from dev_info node) takes precedence over
719*0Sstevel@tonic-gate  * the weight passed in.  If a device weight is not already established
720*0Sstevel@tonic-gate  * then the passed in nexus weight is established.
721*0Sstevel@tonic-gate  */
722*0Sstevel@tonic-gate void
723*0Sstevel@tonic-gate intr_dist_cpuid_add_device_weight(uint32_t cpuid,
724*0Sstevel@tonic-gate     dev_info_t *dip, int32_t nweight)
725*0Sstevel@tonic-gate {
726*0Sstevel@tonic-gate 	int32_t		eweight;
727*0Sstevel@tonic-gate 
728*0Sstevel@tonic-gate 	/*
729*0Sstevel@tonic-gate 	 * For non-weighted policy everything has weight of zero (and we get
730*0Sstevel@tonic-gate 	 * round-robin distribution from intr_dist_cpuid).
731*0Sstevel@tonic-gate 	 * NB: intr_policy is limited to this file. A weighted nexus driver is
732*0Sstevel@tonic-gate 	 * calls this rouitne even if intr_policy has been patched to
733*0Sstevel@tonic-gate 	 * INTR_FLAG_DIST.
734*0Sstevel@tonic-gate 	 */
735*0Sstevel@tonic-gate 	ASSERT(dip);
736*0Sstevel@tonic-gate 	if (intr_policy != INTR_WEIGHTED_DIST)
737*0Sstevel@tonic-gate 		return;
738*0Sstevel@tonic-gate 
739*0Sstevel@tonic-gate 	eweight = i_ddi_get_intr_weight(dip);
740*0Sstevel@tonic-gate 	INTR_DEBUG((CE_CONT, "intr_dist: cpu %2d weight %3d: +%2d/%2d for "
741*0Sstevel@tonic-gate 	    "%s#%d/%s#%d\n", cpuid, cpu[cpuid]->cpu_intr_weight,
742*0Sstevel@tonic-gate 	    nweight, eweight, ddi_driver_name(ddi_get_parent(dip)),
743*0Sstevel@tonic-gate 	    ddi_get_instance(ddi_get_parent(dip)),
744*0Sstevel@tonic-gate 	    ddi_driver_name(dip), ddi_get_instance(dip)));
745*0Sstevel@tonic-gate 
746*0Sstevel@tonic-gate 	/* if no establish weight, establish nexus weight */
747*0Sstevel@tonic-gate 	if (eweight < 0) {
748*0Sstevel@tonic-gate 		if (nweight > 0)
749*0Sstevel@tonic-gate 			(void) i_ddi_set_intr_weight(dip, nweight);
750*0Sstevel@tonic-gate 		else
751*0Sstevel@tonic-gate 			nweight = 0;
752*0Sstevel@tonic-gate 	} else
753*0Sstevel@tonic-gate 		nweight = eweight;	/* use established weight */
754*0Sstevel@tonic-gate 
755*0Sstevel@tonic-gate 	/* Establish exclusion for cpu_intr_weight manipulation */
756*0Sstevel@tonic-gate 	mutex_enter(&intr_dist_cpu_lock);
757*0Sstevel@tonic-gate 	cpu[cpuid]->cpu_intr_weight += nweight;
758*0Sstevel@tonic-gate 
759*0Sstevel@tonic-gate 	/* update intr_dist_weight_max */
760*0Sstevel@tonic-gate 	if (nweight > intr_dist_weight_max)
761*0Sstevel@tonic-gate 		intr_dist_weight_max = nweight;
762*0Sstevel@tonic-gate 	mutex_exit(&intr_dist_cpu_lock);
763*0Sstevel@tonic-gate }
764*0Sstevel@tonic-gate 
765*0Sstevel@tonic-gate void
766*0Sstevel@tonic-gate intr_dist_cpuid_rem_device_weight(uint32_t cpuid, dev_info_t *dip)
767*0Sstevel@tonic-gate {
768*0Sstevel@tonic-gate 	struct cpu	*cp;
769*0Sstevel@tonic-gate 	int32_t		weight;
770*0Sstevel@tonic-gate 
771*0Sstevel@tonic-gate 	ASSERT(dip);
772*0Sstevel@tonic-gate 	if (intr_policy != INTR_WEIGHTED_DIST)
773*0Sstevel@tonic-gate 		return;
774*0Sstevel@tonic-gate 
775*0Sstevel@tonic-gate 	/* remove weight of device from cpu */
776*0Sstevel@tonic-gate 	weight = i_ddi_get_intr_weight(dip);
777*0Sstevel@tonic-gate 	if (weight < 0)
778*0Sstevel@tonic-gate 		weight = 0;
779*0Sstevel@tonic-gate 	INTR_DEBUG((CE_CONT, "intr_dist: cpu %2d weight %3d: -%2d    for "
780*0Sstevel@tonic-gate 	    "%s#%d/%s#%d\n", cpuid, cpu[cpuid]->cpu_intr_weight, weight,
781*0Sstevel@tonic-gate 	    ddi_driver_name(ddi_get_parent(dip)),
782*0Sstevel@tonic-gate 	    ddi_get_instance(ddi_get_parent(dip)),
783*0Sstevel@tonic-gate 	    ddi_driver_name(dip), ddi_get_instance(dip)));
784*0Sstevel@tonic-gate 
785*0Sstevel@tonic-gate 	/* Establish exclusion for cpu_intr_weight manipulation */
786*0Sstevel@tonic-gate 	mutex_enter(&intr_dist_cpu_lock);
787*0Sstevel@tonic-gate 	cp = cpu[cpuid];
788*0Sstevel@tonic-gate 	cp->cpu_intr_weight -= weight;
789*0Sstevel@tonic-gate 	if (cp->cpu_intr_weight < 0)
790*0Sstevel@tonic-gate 		cp->cpu_intr_weight = 0;	/* sanity */
791*0Sstevel@tonic-gate 	mutex_exit(&intr_dist_cpu_lock);
792*0Sstevel@tonic-gate }
793