xref: /onnv-gate/usr/src/uts/i86pc/os/intr.c (revision 11389:dd00b884e84f)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
51455Sandrei  * Common Development and Distribution License (the "License").
61455Sandrei  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
215084Sjohnlev 
220Sstevel@tonic-gate /*
238803SJonathan.Haslam@Sun.COM  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
240Sstevel@tonic-gate  * Use is subject to license terms.
250Sstevel@tonic-gate  */
260Sstevel@tonic-gate 
270Sstevel@tonic-gate #include <sys/cpuvar.h>
289637SRandy.Fishel@Sun.COM #include <sys/cpu_event.h>
290Sstevel@tonic-gate #include <sys/regset.h>
300Sstevel@tonic-gate #include <sys/psw.h>
310Sstevel@tonic-gate #include <sys/types.h>
320Sstevel@tonic-gate #include <sys/thread.h>
330Sstevel@tonic-gate #include <sys/systm.h>
340Sstevel@tonic-gate #include <sys/segments.h>
350Sstevel@tonic-gate #include <sys/pcb.h>
360Sstevel@tonic-gate #include <sys/trap.h>
370Sstevel@tonic-gate #include <sys/ftrace.h>
380Sstevel@tonic-gate #include <sys/traptrace.h>
390Sstevel@tonic-gate #include <sys/clock.h>
400Sstevel@tonic-gate #include <sys/panic.h>
410Sstevel@tonic-gate #include <sys/disp.h>
420Sstevel@tonic-gate #include <vm/seg_kp.h>
430Sstevel@tonic-gate #include <sys/stack.h>
440Sstevel@tonic-gate #include <sys/sysmacros.h>
450Sstevel@tonic-gate #include <sys/cmn_err.h>
460Sstevel@tonic-gate #include <sys/kstat.h>
470Sstevel@tonic-gate #include <sys/smp_impldefs.h>
480Sstevel@tonic-gate #include <sys/pool_pset.h>
490Sstevel@tonic-gate #include <sys/zone.h>
500Sstevel@tonic-gate #include <sys/bitmap.h>
513446Smrj #include <sys/archsystm.h>
523446Smrj #include <sys/machsystm.h>
533446Smrj #include <sys/ontrap.h>
543446Smrj #include <sys/x86_archext.h>
553446Smrj #include <sys/promif.h>
564191Sjosephb #include <vm/hat_i86.h>
575084Sjohnlev #if defined(__xpv)
585084Sjohnlev #include <sys/hypervisor.h>
595084Sjohnlev #endif
600Sstevel@tonic-gate 
610Sstevel@tonic-gate 
625084Sjohnlev #if defined(__xpv) && defined(DEBUG)
635084Sjohnlev 
645084Sjohnlev /*
655084Sjohnlev  * This panic message is intended as an aid to interrupt debugging.
665084Sjohnlev  *
675084Sjohnlev  * The associated assertion tests the condition of enabling
685084Sjohnlev  * events when events are already enabled.  The implication
695084Sjohnlev  * being that whatever code the programmer thought was
705084Sjohnlev  * protected by having events disabled until the second
715084Sjohnlev  * enable happened really wasn't protected at all ..
725084Sjohnlev  */
735084Sjohnlev 
745084Sjohnlev int stistipanic = 1;	/* controls the debug panic check */
755084Sjohnlev const char *stistimsg = "stisti";
765084Sjohnlev ulong_t laststi[NCPU];
775084Sjohnlev 
785084Sjohnlev /*
795084Sjohnlev  * This variable tracks the last place events were disabled on each cpu
809637SRandy.Fishel@Sun.COM  * it assists in debugging when asserts that interrupts are enabled trip.
815084Sjohnlev  */
825084Sjohnlev ulong_t lastcli[NCPU];
835084Sjohnlev 
845084Sjohnlev #endif
855084Sjohnlev 
860Sstevel@tonic-gate /*
873446Smrj  * Set cpu's base SPL level to the highest active interrupt level
880Sstevel@tonic-gate  */
893446Smrj void
903446Smrj set_base_spl(void)
910Sstevel@tonic-gate {
923446Smrj 	struct cpu *cpu = CPU;
933446Smrj 	uint16_t active = (uint16_t)cpu->cpu_intr_actv;
940Sstevel@tonic-gate 
953446Smrj 	cpu->cpu_base_spl = active == 0 ? 0 : bsrw_insn(active);
960Sstevel@tonic-gate }
970Sstevel@tonic-gate 
980Sstevel@tonic-gate /*
990Sstevel@tonic-gate  * Do all the work necessary to set up the cpu and thread structures
1000Sstevel@tonic-gate  * to dispatch a high-level interrupt.
1010Sstevel@tonic-gate  *
1020Sstevel@tonic-gate  * Returns 0 if we're -not- already on the high-level interrupt stack,
1030Sstevel@tonic-gate  * (and *must* switch to it), non-zero if we are already on that stack.
1040Sstevel@tonic-gate  *
1050Sstevel@tonic-gate  * Called with interrupts masked.
1060Sstevel@tonic-gate  * The 'pil' is already set to the appropriate level for rp->r_trapno.
1070Sstevel@tonic-gate  */
1083446Smrj static int
1090Sstevel@tonic-gate hilevel_intr_prolog(struct cpu *cpu, uint_t pil, uint_t oldpil, struct regs *rp)
1100Sstevel@tonic-gate {
1110Sstevel@tonic-gate 	struct machcpu *mcpu = &cpu->cpu_m;
1120Sstevel@tonic-gate 	uint_t mask;
113590Sesolom 	hrtime_t intrtime;
1143446Smrj 	hrtime_t now = tsc_read();
1150Sstevel@tonic-gate 
1160Sstevel@tonic-gate 	ASSERT(pil > LOCK_LEVEL);
1170Sstevel@tonic-gate 
1180Sstevel@tonic-gate 	if (pil == CBE_HIGH_PIL) {
1190Sstevel@tonic-gate 		cpu->cpu_profile_pil = oldpil;
1200Sstevel@tonic-gate 		if (USERMODE(rp->r_cs)) {
1210Sstevel@tonic-gate 			cpu->cpu_profile_pc = 0;
1220Sstevel@tonic-gate 			cpu->cpu_profile_upc = rp->r_pc;
1238803SJonathan.Haslam@Sun.COM 			cpu->cpu_cpcprofile_pc = 0;
1248803SJonathan.Haslam@Sun.COM 			cpu->cpu_cpcprofile_upc = rp->r_pc;
1250Sstevel@tonic-gate 		} else {
1260Sstevel@tonic-gate 			cpu->cpu_profile_pc = rp->r_pc;
1270Sstevel@tonic-gate 			cpu->cpu_profile_upc = 0;
1288803SJonathan.Haslam@Sun.COM 			cpu->cpu_cpcprofile_pc = rp->r_pc;
1298803SJonathan.Haslam@Sun.COM 			cpu->cpu_cpcprofile_upc = 0;
1300Sstevel@tonic-gate 		}
1310Sstevel@tonic-gate 	}
1320Sstevel@tonic-gate 
1330Sstevel@tonic-gate 	mask = cpu->cpu_intr_actv & CPU_INTR_ACTV_HIGH_LEVEL_MASK;
1340Sstevel@tonic-gate 	if (mask != 0) {
1350Sstevel@tonic-gate 		int nestpil;
1360Sstevel@tonic-gate 
1370Sstevel@tonic-gate 		/*
1380Sstevel@tonic-gate 		 * We have interrupted another high-level interrupt.
1390Sstevel@tonic-gate 		 * Load starting timestamp, compute interval, update
1400Sstevel@tonic-gate 		 * cumulative counter.
1410Sstevel@tonic-gate 		 */
1420Sstevel@tonic-gate 		nestpil = bsrw_insn((uint16_t)mask);
1430Sstevel@tonic-gate 		ASSERT(nestpil < pil);
1443446Smrj 		intrtime = now -
1450Sstevel@tonic-gate 		    mcpu->pil_high_start[nestpil - (LOCK_LEVEL + 1)];
146916Sschwartz 		mcpu->intrstat[nestpil][0] += intrtime;
147590Sesolom 		cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
1480Sstevel@tonic-gate 		/*
1490Sstevel@tonic-gate 		 * Another high-level interrupt is active below this one, so
1500Sstevel@tonic-gate 		 * there is no need to check for an interrupt thread.  That
1510Sstevel@tonic-gate 		 * will be done by the lowest priority high-level interrupt
1520Sstevel@tonic-gate 		 * active.
1530Sstevel@tonic-gate 		 */
1540Sstevel@tonic-gate 	} else {
1550Sstevel@tonic-gate 		kthread_t *t = cpu->cpu_thread;
1560Sstevel@tonic-gate 
1570Sstevel@tonic-gate 		/*
1580Sstevel@tonic-gate 		 * See if we are interrupting a low-level interrupt thread.
1590Sstevel@tonic-gate 		 * If so, account for its time slice only if its time stamp
1600Sstevel@tonic-gate 		 * is non-zero.
1610Sstevel@tonic-gate 		 */
1620Sstevel@tonic-gate 		if ((t->t_flag & T_INTR_THREAD) != 0 && t->t_intr_start != 0) {
1633446Smrj 			intrtime = now - t->t_intr_start;
164916Sschwartz 			mcpu->intrstat[t->t_pil][0] += intrtime;
165590Sesolom 			cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
1660Sstevel@tonic-gate 			t->t_intr_start = 0;
1670Sstevel@tonic-gate 		}
1680Sstevel@tonic-gate 	}
1690Sstevel@tonic-gate 
1700Sstevel@tonic-gate 	/*
1710Sstevel@tonic-gate 	 * Store starting timestamp in CPU structure for this PIL.
1720Sstevel@tonic-gate 	 */
1733446Smrj 	mcpu->pil_high_start[pil - (LOCK_LEVEL + 1)] = now;
1740Sstevel@tonic-gate 
1750Sstevel@tonic-gate 	ASSERT((cpu->cpu_intr_actv & (1 << pil)) == 0);
1760Sstevel@tonic-gate 
1770Sstevel@tonic-gate 	if (pil == 15) {
1780Sstevel@tonic-gate 		/*
1790Sstevel@tonic-gate 		 * To support reentrant level 15 interrupts, we maintain a
1800Sstevel@tonic-gate 		 * recursion count in the top half of cpu_intr_actv.  Only
1810Sstevel@tonic-gate 		 * when this count hits zero do we clear the PIL 15 bit from
1820Sstevel@tonic-gate 		 * the lower half of cpu_intr_actv.
1830Sstevel@tonic-gate 		 */
1840Sstevel@tonic-gate 		uint16_t *refcntp = (uint16_t *)&cpu->cpu_intr_actv + 1;
1850Sstevel@tonic-gate 		(*refcntp)++;
1860Sstevel@tonic-gate 	}
1870Sstevel@tonic-gate 
1880Sstevel@tonic-gate 	mask = cpu->cpu_intr_actv;
1890Sstevel@tonic-gate 
1900Sstevel@tonic-gate 	cpu->cpu_intr_actv |= (1 << pil);
1910Sstevel@tonic-gate 
1920Sstevel@tonic-gate 	return (mask & CPU_INTR_ACTV_HIGH_LEVEL_MASK);
1930Sstevel@tonic-gate }
1940Sstevel@tonic-gate 
1950Sstevel@tonic-gate /*
1960Sstevel@tonic-gate  * Does most of the work of returning from a high level interrupt.
1970Sstevel@tonic-gate  *
1980Sstevel@tonic-gate  * Returns 0 if there are no more high level interrupts (in which
1990Sstevel@tonic-gate  * case we must switch back to the interrupted thread stack) or
2000Sstevel@tonic-gate  * non-zero if there are more (in which case we should stay on it).
2010Sstevel@tonic-gate  *
2020Sstevel@tonic-gate  * Called with interrupts masked
2030Sstevel@tonic-gate  */
2043446Smrj static int
2050Sstevel@tonic-gate hilevel_intr_epilog(struct cpu *cpu, uint_t pil, uint_t oldpil, uint_t vecnum)
2060Sstevel@tonic-gate {
2070Sstevel@tonic-gate 	struct machcpu *mcpu = &cpu->cpu_m;
2080Sstevel@tonic-gate 	uint_t mask;
209590Sesolom 	hrtime_t intrtime;
2103446Smrj 	hrtime_t now = tsc_read();
2110Sstevel@tonic-gate 
2120Sstevel@tonic-gate 	ASSERT(mcpu->mcpu_pri == pil);
2130Sstevel@tonic-gate 
2140Sstevel@tonic-gate 	cpu->cpu_stats.sys.intr[pil - 1]++;
2150Sstevel@tonic-gate 
2160Sstevel@tonic-gate 	ASSERT(cpu->cpu_intr_actv & (1 << pil));
2170Sstevel@tonic-gate 
2180Sstevel@tonic-gate 	if (pil == 15) {
2190Sstevel@tonic-gate 		/*
2200Sstevel@tonic-gate 		 * To support reentrant level 15 interrupts, we maintain a
2210Sstevel@tonic-gate 		 * recursion count in the top half of cpu_intr_actv.  Only
2220Sstevel@tonic-gate 		 * when this count hits zero do we clear the PIL 15 bit from
2230Sstevel@tonic-gate 		 * the lower half of cpu_intr_actv.
2240Sstevel@tonic-gate 		 */
2250Sstevel@tonic-gate 		uint16_t *refcntp = (uint16_t *)&cpu->cpu_intr_actv + 1;
2260Sstevel@tonic-gate 
2270Sstevel@tonic-gate 		ASSERT(*refcntp > 0);
2280Sstevel@tonic-gate 
2290Sstevel@tonic-gate 		if (--(*refcntp) == 0)
2300Sstevel@tonic-gate 			cpu->cpu_intr_actv &= ~(1 << pil);
2310Sstevel@tonic-gate 	} else {
2320Sstevel@tonic-gate 		cpu->cpu_intr_actv &= ~(1 << pil);
2330Sstevel@tonic-gate 	}
2340Sstevel@tonic-gate 
2350Sstevel@tonic-gate 	ASSERT(mcpu->pil_high_start[pil - (LOCK_LEVEL + 1)] != 0);
2360Sstevel@tonic-gate 
2373446Smrj 	intrtime = now - mcpu->pil_high_start[pil - (LOCK_LEVEL + 1)];
238916Sschwartz 	mcpu->intrstat[pil][0] += intrtime;
239590Sesolom 	cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
2400Sstevel@tonic-gate 
2410Sstevel@tonic-gate 	/*
2420Sstevel@tonic-gate 	 * Check for lower-pil nested high-level interrupt beneath
2430Sstevel@tonic-gate 	 * current one.  If so, place a starting timestamp in its
2440Sstevel@tonic-gate 	 * pil_high_start entry.
2450Sstevel@tonic-gate 	 */
2460Sstevel@tonic-gate 	mask = cpu->cpu_intr_actv & CPU_INTR_ACTV_HIGH_LEVEL_MASK;
2470Sstevel@tonic-gate 	if (mask != 0) {
2480Sstevel@tonic-gate 		int nestpil;
2490Sstevel@tonic-gate 
2500Sstevel@tonic-gate 		/*
2510Sstevel@tonic-gate 		 * find PIL of nested interrupt
2520Sstevel@tonic-gate 		 */
2530Sstevel@tonic-gate 		nestpil = bsrw_insn((uint16_t)mask);
2540Sstevel@tonic-gate 		ASSERT(nestpil < pil);
2553446Smrj 		mcpu->pil_high_start[nestpil - (LOCK_LEVEL + 1)] = now;
2560Sstevel@tonic-gate 		/*
2570Sstevel@tonic-gate 		 * (Another high-level interrupt is active below this one,
2580Sstevel@tonic-gate 		 * so there is no need to check for an interrupt
2590Sstevel@tonic-gate 		 * thread.  That will be done by the lowest priority
2600Sstevel@tonic-gate 		 * high-level interrupt active.)
2610Sstevel@tonic-gate 		 */
2620Sstevel@tonic-gate 	} else {
2630Sstevel@tonic-gate 		/*
2640Sstevel@tonic-gate 		 * Check to see if there is a low-level interrupt active.
2650Sstevel@tonic-gate 		 * If so, place a starting timestamp in the thread
2660Sstevel@tonic-gate 		 * structure.
2670Sstevel@tonic-gate 		 */
2680Sstevel@tonic-gate 		kthread_t *t = cpu->cpu_thread;
2690Sstevel@tonic-gate 
2700Sstevel@tonic-gate 		if (t->t_flag & T_INTR_THREAD)
2713446Smrj 			t->t_intr_start = now;
2720Sstevel@tonic-gate 	}
2730Sstevel@tonic-gate 
2740Sstevel@tonic-gate 	mcpu->mcpu_pri = oldpil;
2750Sstevel@tonic-gate 	(void) (*setlvlx)(oldpil, vecnum);
2760Sstevel@tonic-gate 
2770Sstevel@tonic-gate 	return (cpu->cpu_intr_actv & CPU_INTR_ACTV_HIGH_LEVEL_MASK);
2780Sstevel@tonic-gate }
2790Sstevel@tonic-gate 
2800Sstevel@tonic-gate /*
2810Sstevel@tonic-gate  * Set up the cpu, thread and interrupt thread structures for
2820Sstevel@tonic-gate  * executing an interrupt thread.  The new stack pointer of the
2830Sstevel@tonic-gate  * interrupt thread (which *must* be switched to) is returned.
2840Sstevel@tonic-gate  */
2853446Smrj static caddr_t
2860Sstevel@tonic-gate intr_thread_prolog(struct cpu *cpu, caddr_t stackptr, uint_t pil)
2870Sstevel@tonic-gate {
2880Sstevel@tonic-gate 	struct machcpu *mcpu = &cpu->cpu_m;
2890Sstevel@tonic-gate 	kthread_t *t, *volatile it;
2903446Smrj 	hrtime_t now = tsc_read();
2910Sstevel@tonic-gate 
2920Sstevel@tonic-gate 	ASSERT(pil > 0);
2930Sstevel@tonic-gate 	ASSERT((cpu->cpu_intr_actv & (1 << pil)) == 0);
2940Sstevel@tonic-gate 	cpu->cpu_intr_actv |= (1 << pil);
2950Sstevel@tonic-gate 
2960Sstevel@tonic-gate 	/*
2970Sstevel@tonic-gate 	 * Get set to run an interrupt thread.
2980Sstevel@tonic-gate 	 * There should always be an interrupt thread, since we
2990Sstevel@tonic-gate 	 * allocate one for each level on each CPU.
3000Sstevel@tonic-gate 	 *
301989Sesolom 	 * t_intr_start could be zero due to cpu_intr_swtch_enter.
3020Sstevel@tonic-gate 	 */
3030Sstevel@tonic-gate 	t = cpu->cpu_thread;
304989Sesolom 	if ((t->t_flag & T_INTR_THREAD) && t->t_intr_start != 0) {
3053446Smrj 		hrtime_t intrtime = now - t->t_intr_start;
306916Sschwartz 		mcpu->intrstat[t->t_pil][0] += intrtime;
307590Sesolom 		cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
3080Sstevel@tonic-gate 		t->t_intr_start = 0;
3090Sstevel@tonic-gate 	}
3100Sstevel@tonic-gate 
3110Sstevel@tonic-gate 	ASSERT(SA((uintptr_t)stackptr) == (uintptr_t)stackptr);
3120Sstevel@tonic-gate 
3130Sstevel@tonic-gate 	t->t_sp = (uintptr_t)stackptr;	/* mark stack in curthread for resume */
3140Sstevel@tonic-gate 
3150Sstevel@tonic-gate 	/*
3160Sstevel@tonic-gate 	 * unlink the interrupt thread off the cpu
317989Sesolom 	 *
318989Sesolom 	 * Note that the code in kcpc_overflow_intr -relies- on the
319989Sesolom 	 * ordering of events here - in particular that t->t_lwp of
320989Sesolom 	 * the interrupt thread is set to the pinned thread *before*
321989Sesolom 	 * curthread is changed.
3220Sstevel@tonic-gate 	 */
3230Sstevel@tonic-gate 	it = cpu->cpu_intr_thread;
3240Sstevel@tonic-gate 	cpu->cpu_intr_thread = it->t_link;
3250Sstevel@tonic-gate 	it->t_intr = t;
3260Sstevel@tonic-gate 	it->t_lwp = t->t_lwp;
3270Sstevel@tonic-gate 
3280Sstevel@tonic-gate 	/*
3290Sstevel@tonic-gate 	 * (threads on the interrupt thread free list could have state
3300Sstevel@tonic-gate 	 * preset to TS_ONPROC, but it helps in debugging if
3310Sstevel@tonic-gate 	 * they're TS_FREE.)
3320Sstevel@tonic-gate 	 */
3330Sstevel@tonic-gate 	it->t_state = TS_ONPROC;
3340Sstevel@tonic-gate 
3350Sstevel@tonic-gate 	cpu->cpu_thread = it;		/* new curthread on this cpu */
3360Sstevel@tonic-gate 	it->t_pil = (uchar_t)pil;
3370Sstevel@tonic-gate 	it->t_pri = intr_pri + (pri_t)pil;
3383446Smrj 	it->t_intr_start = now;
3390Sstevel@tonic-gate 
3400Sstevel@tonic-gate 	return (it->t_stk);
3410Sstevel@tonic-gate }
3420Sstevel@tonic-gate 
3430Sstevel@tonic-gate 
3440Sstevel@tonic-gate #ifdef DEBUG
3450Sstevel@tonic-gate int intr_thread_cnt;
3460Sstevel@tonic-gate #endif
3470Sstevel@tonic-gate 
3480Sstevel@tonic-gate /*
3490Sstevel@tonic-gate  * Called with interrupts disabled
3500Sstevel@tonic-gate  */
3513446Smrj static void
3520Sstevel@tonic-gate intr_thread_epilog(struct cpu *cpu, uint_t vec, uint_t oldpil)
3530Sstevel@tonic-gate {
3540Sstevel@tonic-gate 	struct machcpu *mcpu = &cpu->cpu_m;
3550Sstevel@tonic-gate 	kthread_t *t;
3560Sstevel@tonic-gate 	kthread_t *it = cpu->cpu_thread;	/* curthread */
3570Sstevel@tonic-gate 	uint_t pil, basespl;
358590Sesolom 	hrtime_t intrtime;
3593446Smrj 	hrtime_t now = tsc_read();
3600Sstevel@tonic-gate 
3610Sstevel@tonic-gate 	pil = it->t_pil;
3620Sstevel@tonic-gate 	cpu->cpu_stats.sys.intr[pil - 1]++;
3630Sstevel@tonic-gate 
3640Sstevel@tonic-gate 	ASSERT(it->t_intr_start != 0);
3653446Smrj 	intrtime = now - it->t_intr_start;
366916Sschwartz 	mcpu->intrstat[pil][0] += intrtime;
367590Sesolom 	cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
3680Sstevel@tonic-gate 
3690Sstevel@tonic-gate 	ASSERT(cpu->cpu_intr_actv & (1 << pil));
3700Sstevel@tonic-gate 	cpu->cpu_intr_actv &= ~(1 << pil);
3710Sstevel@tonic-gate 
3720Sstevel@tonic-gate 	/*
3730Sstevel@tonic-gate 	 * If there is still an interrupted thread underneath this one
3740Sstevel@tonic-gate 	 * then the interrupt was never blocked and the return is
3750Sstevel@tonic-gate 	 * fairly simple.  Otherwise it isn't.
3760Sstevel@tonic-gate 	 */
3770Sstevel@tonic-gate 	if ((t = it->t_intr) == NULL) {
3780Sstevel@tonic-gate 		/*
3790Sstevel@tonic-gate 		 * The interrupted thread is no longer pinned underneath
3800Sstevel@tonic-gate 		 * the interrupt thread.  This means the interrupt must
3810Sstevel@tonic-gate 		 * have blocked, and the interrupted thread has been
3820Sstevel@tonic-gate 		 * unpinned, and has probably been running around the
3830Sstevel@tonic-gate 		 * system for a while.
3840Sstevel@tonic-gate 		 *
3850Sstevel@tonic-gate 		 * Since there is no longer a thread under this one, put
3860Sstevel@tonic-gate 		 * this interrupt thread back on the CPU's free list and
3870Sstevel@tonic-gate 		 * resume the idle thread which will dispatch the next
3880Sstevel@tonic-gate 		 * thread to run.
3890Sstevel@tonic-gate 		 */
3900Sstevel@tonic-gate #ifdef DEBUG
3910Sstevel@tonic-gate 		intr_thread_cnt++;
3920Sstevel@tonic-gate #endif
3930Sstevel@tonic-gate 		cpu->cpu_stats.sys.intrblk++;
3940Sstevel@tonic-gate 		/*
3950Sstevel@tonic-gate 		 * Set CPU's base SPL based on active interrupts bitmask
3960Sstevel@tonic-gate 		 */
3970Sstevel@tonic-gate 		set_base_spl();
3980Sstevel@tonic-gate 		basespl = cpu->cpu_base_spl;
3990Sstevel@tonic-gate 		mcpu->mcpu_pri = basespl;
4000Sstevel@tonic-gate 		(*setlvlx)(basespl, vec);
4010Sstevel@tonic-gate 		(void) splhigh();
4023446Smrj 		sti();
4030Sstevel@tonic-gate 		it->t_state = TS_FREE;
4040Sstevel@tonic-gate 		/*
4050Sstevel@tonic-gate 		 * Return interrupt thread to pool
4060Sstevel@tonic-gate 		 */
4070Sstevel@tonic-gate 		it->t_link = cpu->cpu_intr_thread;
4080Sstevel@tonic-gate 		cpu->cpu_intr_thread = it;
4090Sstevel@tonic-gate 		swtch();
4103446Smrj 		panic("intr_thread_epilog: swtch returned");
4110Sstevel@tonic-gate 		/*NOTREACHED*/
4120Sstevel@tonic-gate 	}
4130Sstevel@tonic-gate 
4140Sstevel@tonic-gate 	/*
4150Sstevel@tonic-gate 	 * Return interrupt thread to the pool
4160Sstevel@tonic-gate 	 */
4170Sstevel@tonic-gate 	it->t_link = cpu->cpu_intr_thread;
4180Sstevel@tonic-gate 	cpu->cpu_intr_thread = it;
4190Sstevel@tonic-gate 	it->t_state = TS_FREE;
4200Sstevel@tonic-gate 
4210Sstevel@tonic-gate 	basespl = cpu->cpu_base_spl;
4220Sstevel@tonic-gate 	pil = MAX(oldpil, basespl);
4230Sstevel@tonic-gate 	mcpu->mcpu_pri = pil;
4240Sstevel@tonic-gate 	(*setlvlx)(pil, vec);
4253446Smrj 	t->t_intr_start = now;
4260Sstevel@tonic-gate 	cpu->cpu_thread = t;
4270Sstevel@tonic-gate }
4280Sstevel@tonic-gate 
429916Sschwartz /*
4303446Smrj  * intr_get_time() is a resource for interrupt handlers to determine how
4313446Smrj  * much time has been spent handling the current interrupt. Such a function
4323446Smrj  * is needed because higher level interrupts can arrive during the
4333446Smrj  * processing of an interrupt.  intr_get_time() only returns time spent in the
4343446Smrj  * current interrupt handler.
4353446Smrj  *
4363446Smrj  * The caller must be calling from an interrupt handler running at a pil
4373446Smrj  * below or at lock level. Timings are not provided for high-level
4383446Smrj  * interrupts.
4393446Smrj  *
4403446Smrj  * The first time intr_get_time() is called while handling an interrupt,
4413446Smrj  * it returns the time since the interrupt handler was invoked. Subsequent
4423446Smrj  * calls will return the time since the prior call to intr_get_time(). Time
4435084Sjohnlev  * is returned as ticks. Use scalehrtimef() to convert ticks to nsec.
4443446Smrj  *
4453446Smrj  * Theory Of Intrstat[][]:
4463446Smrj  *
4473446Smrj  * uint64_t intrstat[pil][0..1] is an array indexed by pil level, with two
4483446Smrj  * uint64_ts per pil.
4493446Smrj  *
4503446Smrj  * intrstat[pil][0] is a cumulative count of the number of ticks spent
4513446Smrj  * handling all interrupts at the specified pil on this CPU. It is
4523446Smrj  * exported via kstats to the user.
4533446Smrj  *
4543446Smrj  * intrstat[pil][1] is always a count of ticks less than or equal to the
4553446Smrj  * value in [0]. The difference between [1] and [0] is the value returned
4563446Smrj  * by a call to intr_get_time(). At the start of interrupt processing,
4573446Smrj  * [0] and [1] will be equal (or nearly so). As the interrupt consumes
4583446Smrj  * time, [0] will increase, but [1] will remain the same. A call to
4593446Smrj  * intr_get_time() will return the difference, then update [1] to be the
4603446Smrj  * same as [0]. Future calls will return the time since the last call.
4613446Smrj  * Finally, when the interrupt completes, [1] is updated to the same as [0].
4623446Smrj  *
4633446Smrj  * Implementation:
4643446Smrj  *
4653446Smrj  * intr_get_time() works much like a higher level interrupt arriving. It
4663446Smrj  * "checkpoints" the timing information by incrementing intrstat[pil][0]
4673446Smrj  * to include elapsed running time, and by setting t_intr_start to rdtsc.
4683446Smrj  * It then sets the return value to intrstat[pil][0] - intrstat[pil][1],
4693446Smrj  * and updates intrstat[pil][1] to be the same as the new value of
4703446Smrj  * intrstat[pil][0].
4713446Smrj  *
4723446Smrj  * In the normal handling of interrupts, after an interrupt handler returns
4733446Smrj  * and the code in intr_thread() updates intrstat[pil][0], it then sets
4743446Smrj  * intrstat[pil][1] to the new value of intrstat[pil][0]. When [0] == [1],
4753446Smrj  * the timings are reset, i.e. intr_get_time() will return [0] - [1] which
4763446Smrj  * is 0.
4773446Smrj  *
4783446Smrj  * Whenever interrupts arrive on a CPU which is handling a lower pil
4793446Smrj  * interrupt, they update the lower pil's [0] to show time spent in the
4803446Smrj  * handler that they've interrupted. This results in a growing discrepancy
4813446Smrj  * between [0] and [1], which is returned the next time intr_get_time() is
4823446Smrj  * called. Time spent in the higher-pil interrupt will not be returned in
4833446Smrj  * the next intr_get_time() call from the original interrupt, because
4843446Smrj  * the higher-pil interrupt's time is accumulated in intrstat[higherpil][].
485916Sschwartz  */
486916Sschwartz uint64_t
4873446Smrj intr_get_time(void)
488916Sschwartz {
4893446Smrj 	struct cpu *cpu;
4903446Smrj 	struct machcpu *mcpu;
4913446Smrj 	kthread_t *t;
492916Sschwartz 	uint64_t time, delta, ret;
4933446Smrj 	uint_t pil;
494916Sschwartz 
4953446Smrj 	cli();
4963446Smrj 	cpu = CPU;
4973446Smrj 	mcpu = &cpu->cpu_m;
4983446Smrj 	t = cpu->cpu_thread;
4993446Smrj 	pil = t->t_pil;
500916Sschwartz 	ASSERT((cpu->cpu_intr_actv & CPU_INTR_ACTV_HIGH_LEVEL_MASK) == 0);
501916Sschwartz 	ASSERT(t->t_flag & T_INTR_THREAD);
502916Sschwartz 	ASSERT(pil != 0);
503916Sschwartz 	ASSERT(t->t_intr_start != 0);
504916Sschwartz 
505916Sschwartz 	time = tsc_read();
506916Sschwartz 	delta = time - t->t_intr_start;
507916Sschwartz 	t->t_intr_start = time;
508916Sschwartz 
509916Sschwartz 	time = mcpu->intrstat[pil][0] + delta;
510916Sschwartz 	ret = time - mcpu->intrstat[pil][1];
511916Sschwartz 	mcpu->intrstat[pil][0] = time;
512916Sschwartz 	mcpu->intrstat[pil][1] = time;
5131887Sjhaslam 	cpu->cpu_intracct[cpu->cpu_mstate] += delta;
514916Sschwartz 
5153446Smrj 	sti();
516916Sschwartz 	return (ret);
517916Sschwartz }
518916Sschwartz 
5193446Smrj static caddr_t
5200Sstevel@tonic-gate dosoftint_prolog(
5210Sstevel@tonic-gate 	struct cpu *cpu,
5220Sstevel@tonic-gate 	caddr_t stackptr,
5230Sstevel@tonic-gate 	uint32_t st_pending,
5240Sstevel@tonic-gate 	uint_t oldpil)
5250Sstevel@tonic-gate {
5260Sstevel@tonic-gate 	kthread_t *t, *volatile it;
5270Sstevel@tonic-gate 	struct machcpu *mcpu = &cpu->cpu_m;
5280Sstevel@tonic-gate 	uint_t pil;
5293446Smrj 	hrtime_t now;
5300Sstevel@tonic-gate 
5310Sstevel@tonic-gate top:
5320Sstevel@tonic-gate 	ASSERT(st_pending == mcpu->mcpu_softinfo.st_pending);
5330Sstevel@tonic-gate 
5340Sstevel@tonic-gate 	pil = bsrw_insn((uint16_t)st_pending);
5350Sstevel@tonic-gate 	if (pil <= oldpil || pil <= cpu->cpu_base_spl)
5360Sstevel@tonic-gate 		return (0);
5370Sstevel@tonic-gate 
5380Sstevel@tonic-gate 	/*
5390Sstevel@tonic-gate 	 * XX64	Sigh.
5400Sstevel@tonic-gate 	 *
5410Sstevel@tonic-gate 	 * This is a transliteration of the i386 assembler code for
5420Sstevel@tonic-gate 	 * soft interrupts.  One question is "why does this need
5430Sstevel@tonic-gate 	 * to be atomic?"  One possible race is -other- processors
5440Sstevel@tonic-gate 	 * posting soft interrupts to us in set_pending() i.e. the
5450Sstevel@tonic-gate 	 * CPU might get preempted just after the address computation,
5460Sstevel@tonic-gate 	 * but just before the atomic transaction, so another CPU would
5470Sstevel@tonic-gate 	 * actually set the original CPU's st_pending bit.  However,
5480Sstevel@tonic-gate 	 * it looks like it would be simpler to disable preemption there.
5490Sstevel@tonic-gate 	 * Are there other races for which preemption control doesn't work?
5500Sstevel@tonic-gate 	 *
5510Sstevel@tonic-gate 	 * The i386 assembler version -also- checks to see if the bit
5520Sstevel@tonic-gate 	 * being cleared was actually set; if it wasn't, it rechecks
5530Sstevel@tonic-gate 	 * for more.  This seems a bit strange, as the only code that
5540Sstevel@tonic-gate 	 * ever clears the bit is -this- code running with interrupts
5550Sstevel@tonic-gate 	 * disabled on -this- CPU.  This code would probably be cheaper:
5560Sstevel@tonic-gate 	 *
5570Sstevel@tonic-gate 	 * atomic_and_32((uint32_t *)&mcpu->mcpu_softinfo.st_pending,
5580Sstevel@tonic-gate 	 *   ~(1 << pil));
5590Sstevel@tonic-gate 	 *
5600Sstevel@tonic-gate 	 * and t->t_preempt--/++ around set_pending() even cheaper,
5610Sstevel@tonic-gate 	 * but at this point, correctness is critical, so we slavishly
5620Sstevel@tonic-gate 	 * emulate the i386 port.
5630Sstevel@tonic-gate 	 */
5643446Smrj 	if (atomic_btr32((uint32_t *)
5653446Smrj 	    &mcpu->mcpu_softinfo.st_pending, pil) == 0) {
5660Sstevel@tonic-gate 		st_pending = mcpu->mcpu_softinfo.st_pending;
5670Sstevel@tonic-gate 		goto top;
5680Sstevel@tonic-gate 	}
5690Sstevel@tonic-gate 
5700Sstevel@tonic-gate 	mcpu->mcpu_pri = pil;
5710Sstevel@tonic-gate 	(*setspl)(pil);
5720Sstevel@tonic-gate 
5733446Smrj 	now = tsc_read();
5743446Smrj 
5750Sstevel@tonic-gate 	/*
5760Sstevel@tonic-gate 	 * Get set to run interrupt thread.
5770Sstevel@tonic-gate 	 * There should always be an interrupt thread since we
5780Sstevel@tonic-gate 	 * allocate one for each level on the CPU.
5790Sstevel@tonic-gate 	 */
5800Sstevel@tonic-gate 	it = cpu->cpu_intr_thread;
5810Sstevel@tonic-gate 	cpu->cpu_intr_thread = it->t_link;
5820Sstevel@tonic-gate 
583989Sesolom 	/* t_intr_start could be zero due to cpu_intr_swtch_enter. */
584989Sesolom 	t = cpu->cpu_thread;
585989Sesolom 	if ((t->t_flag & T_INTR_THREAD) && t->t_intr_start != 0) {
5863446Smrj 		hrtime_t intrtime = now - t->t_intr_start;
587989Sesolom 		mcpu->intrstat[pil][0] += intrtime;
588989Sesolom 		cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
589989Sesolom 		t->t_intr_start = 0;
590989Sesolom 	}
591989Sesolom 
5920Sstevel@tonic-gate 	/*
5930Sstevel@tonic-gate 	 * Note that the code in kcpc_overflow_intr -relies- on the
5940Sstevel@tonic-gate 	 * ordering of events here - in particular that t->t_lwp of
5950Sstevel@tonic-gate 	 * the interrupt thread is set to the pinned thread *before*
596989Sesolom 	 * curthread is changed.
5970Sstevel@tonic-gate 	 */
5980Sstevel@tonic-gate 	it->t_lwp = t->t_lwp;
5990Sstevel@tonic-gate 	it->t_state = TS_ONPROC;
6000Sstevel@tonic-gate 
6010Sstevel@tonic-gate 	/*
6020Sstevel@tonic-gate 	 * Push interrupted thread onto list from new thread.
6030Sstevel@tonic-gate 	 * Set the new thread as the current one.
6040Sstevel@tonic-gate 	 * Set interrupted thread's T_SP because if it is the idle thread,
6050Sstevel@tonic-gate 	 * resume() may use that stack between threads.
6060Sstevel@tonic-gate 	 */
6070Sstevel@tonic-gate 
6080Sstevel@tonic-gate 	ASSERT(SA((uintptr_t)stackptr) == (uintptr_t)stackptr);
6090Sstevel@tonic-gate 	t->t_sp = (uintptr_t)stackptr;
6100Sstevel@tonic-gate 
6110Sstevel@tonic-gate 	it->t_intr = t;
6120Sstevel@tonic-gate 	cpu->cpu_thread = it;
6130Sstevel@tonic-gate 
6140Sstevel@tonic-gate 	/*
6150Sstevel@tonic-gate 	 * Set bit for this pil in CPU's interrupt active bitmask.
6160Sstevel@tonic-gate 	 */
6170Sstevel@tonic-gate 	ASSERT((cpu->cpu_intr_actv & (1 << pil)) == 0);
6180Sstevel@tonic-gate 	cpu->cpu_intr_actv |= (1 << pil);
6190Sstevel@tonic-gate 
6200Sstevel@tonic-gate 	/*
6210Sstevel@tonic-gate 	 * Initialize thread priority level from intr_pri
6220Sstevel@tonic-gate 	 */
6230Sstevel@tonic-gate 	it->t_pil = (uchar_t)pil;
6240Sstevel@tonic-gate 	it->t_pri = (pri_t)pil + intr_pri;
6253446Smrj 	it->t_intr_start = now;
6260Sstevel@tonic-gate 
6270Sstevel@tonic-gate 	return (it->t_stk);
6280Sstevel@tonic-gate }
6290Sstevel@tonic-gate 
6303446Smrj static void
6310Sstevel@tonic-gate dosoftint_epilog(struct cpu *cpu, uint_t oldpil)
6320Sstevel@tonic-gate {
6330Sstevel@tonic-gate 	struct machcpu *mcpu = &cpu->cpu_m;
6340Sstevel@tonic-gate 	kthread_t *t, *it;
6350Sstevel@tonic-gate 	uint_t pil, basespl;
636590Sesolom 	hrtime_t intrtime;
6373446Smrj 	hrtime_t now = tsc_read();
6380Sstevel@tonic-gate 
6390Sstevel@tonic-gate 	it = cpu->cpu_thread;
6400Sstevel@tonic-gate 	pil = it->t_pil;
6410Sstevel@tonic-gate 
6420Sstevel@tonic-gate 	cpu->cpu_stats.sys.intr[pil - 1]++;
6430Sstevel@tonic-gate 
6440Sstevel@tonic-gate 	ASSERT(cpu->cpu_intr_actv & (1 << pil));
6450Sstevel@tonic-gate 	cpu->cpu_intr_actv &= ~(1 << pil);
6463446Smrj 	intrtime = now - it->t_intr_start;
647916Sschwartz 	mcpu->intrstat[pil][0] += intrtime;
648590Sesolom 	cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
6490Sstevel@tonic-gate 
6500Sstevel@tonic-gate 	/*
6510Sstevel@tonic-gate 	 * If there is still an interrupted thread underneath this one
6520Sstevel@tonic-gate 	 * then the interrupt was never blocked and the return is
6530Sstevel@tonic-gate 	 * fairly simple.  Otherwise it isn't.
6540Sstevel@tonic-gate 	 */
6550Sstevel@tonic-gate 	if ((t = it->t_intr) == NULL) {
6560Sstevel@tonic-gate 		/*
6570Sstevel@tonic-gate 		 * Put thread back on the interrupt thread list.
6580Sstevel@tonic-gate 		 * This was an interrupt thread, so set CPU's base SPL.
6590Sstevel@tonic-gate 		 */
6600Sstevel@tonic-gate 		set_base_spl();
6610Sstevel@tonic-gate 		it->t_state = TS_FREE;
6620Sstevel@tonic-gate 		it->t_link = cpu->cpu_intr_thread;
6630Sstevel@tonic-gate 		cpu->cpu_intr_thread = it;
6640Sstevel@tonic-gate 		(void) splhigh();
6653446Smrj 		sti();
6660Sstevel@tonic-gate 		swtch();
6670Sstevel@tonic-gate 		/*NOTREACHED*/
6683446Smrj 		panic("dosoftint_epilog: swtch returned");
6690Sstevel@tonic-gate 	}
6700Sstevel@tonic-gate 	it->t_link = cpu->cpu_intr_thread;
6710Sstevel@tonic-gate 	cpu->cpu_intr_thread = it;
6720Sstevel@tonic-gate 	it->t_state = TS_FREE;
6730Sstevel@tonic-gate 	cpu->cpu_thread = t;
6740Sstevel@tonic-gate 	if (t->t_flag & T_INTR_THREAD)
6753446Smrj 		t->t_intr_start = now;
6760Sstevel@tonic-gate 	basespl = cpu->cpu_base_spl;
6770Sstevel@tonic-gate 	pil = MAX(oldpil, basespl);
6780Sstevel@tonic-gate 	mcpu->mcpu_pri = pil;
6790Sstevel@tonic-gate 	(*setspl)(pil);
6800Sstevel@tonic-gate }
6810Sstevel@tonic-gate 
6823446Smrj 
6830Sstevel@tonic-gate /*
6840Sstevel@tonic-gate  * Make the interrupted thread 'to' be runnable.
6850Sstevel@tonic-gate  *
6860Sstevel@tonic-gate  * Since t->t_sp has already been saved, t->t_pc is all
6870Sstevel@tonic-gate  * that needs to be set in this function.
6880Sstevel@tonic-gate  *
6890Sstevel@tonic-gate  * Returns the interrupt level of the interrupt thread.
6900Sstevel@tonic-gate  */
6910Sstevel@tonic-gate int
6920Sstevel@tonic-gate intr_passivate(
6930Sstevel@tonic-gate 	kthread_t *it,		/* interrupt thread */
6940Sstevel@tonic-gate 	kthread_t *t)		/* interrupted thread */
6950Sstevel@tonic-gate {
6960Sstevel@tonic-gate 	extern void _sys_rtt();
6970Sstevel@tonic-gate 
6980Sstevel@tonic-gate 	ASSERT(it->t_flag & T_INTR_THREAD);
6990Sstevel@tonic-gate 	ASSERT(SA(t->t_sp) == t->t_sp);
7000Sstevel@tonic-gate 
7010Sstevel@tonic-gate 	t->t_pc = (uintptr_t)_sys_rtt;
7020Sstevel@tonic-gate 	return (it->t_pil);
7030Sstevel@tonic-gate }
7040Sstevel@tonic-gate 
7050Sstevel@tonic-gate /*
7060Sstevel@tonic-gate  * Create interrupt kstats for this CPU.
7070Sstevel@tonic-gate  */
7080Sstevel@tonic-gate void
7090Sstevel@tonic-gate cpu_create_intrstat(cpu_t *cp)
7100Sstevel@tonic-gate {
7110Sstevel@tonic-gate 	int		i;
7120Sstevel@tonic-gate 	kstat_t		*intr_ksp;
7130Sstevel@tonic-gate 	kstat_named_t	*knp;
7140Sstevel@tonic-gate 	char		name[KSTAT_STRLEN];
7150Sstevel@tonic-gate 	zoneid_t	zoneid;
7160Sstevel@tonic-gate 
7170Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
7180Sstevel@tonic-gate 
7190Sstevel@tonic-gate 	if (pool_pset_enabled())
7200Sstevel@tonic-gate 		zoneid = GLOBAL_ZONEID;
7210Sstevel@tonic-gate 	else
7220Sstevel@tonic-gate 		zoneid = ALL_ZONES;
7230Sstevel@tonic-gate 
7240Sstevel@tonic-gate 	intr_ksp = kstat_create_zone("cpu", cp->cpu_id, "intrstat", "misc",
7250Sstevel@tonic-gate 	    KSTAT_TYPE_NAMED, PIL_MAX * 2, NULL, zoneid);
7260Sstevel@tonic-gate 
7270Sstevel@tonic-gate 	/*
7280Sstevel@tonic-gate 	 * Initialize each PIL's named kstat
7290Sstevel@tonic-gate 	 */
7300Sstevel@tonic-gate 	if (intr_ksp != NULL) {
7310Sstevel@tonic-gate 		intr_ksp->ks_update = cpu_kstat_intrstat_update;
7320Sstevel@tonic-gate 		knp = (kstat_named_t *)intr_ksp->ks_data;
7330Sstevel@tonic-gate 		intr_ksp->ks_private = cp;
7340Sstevel@tonic-gate 		for (i = 0; i < PIL_MAX; i++) {
7350Sstevel@tonic-gate 			(void) snprintf(name, KSTAT_STRLEN, "level-%d-time",
7360Sstevel@tonic-gate 			    i + 1);
7370Sstevel@tonic-gate 			kstat_named_init(&knp[i * 2], name, KSTAT_DATA_UINT64);
7380Sstevel@tonic-gate 			(void) snprintf(name, KSTAT_STRLEN, "level-%d-count",
7390Sstevel@tonic-gate 			    i + 1);
7400Sstevel@tonic-gate 			kstat_named_init(&knp[(i * 2) + 1], name,
7410Sstevel@tonic-gate 			    KSTAT_DATA_UINT64);
7420Sstevel@tonic-gate 		}
7430Sstevel@tonic-gate 		kstat_install(intr_ksp);
7440Sstevel@tonic-gate 	}
7450Sstevel@tonic-gate }
7460Sstevel@tonic-gate 
7470Sstevel@tonic-gate /*
7480Sstevel@tonic-gate  * Delete interrupt kstats for this CPU.
7490Sstevel@tonic-gate  */
7500Sstevel@tonic-gate void
7510Sstevel@tonic-gate cpu_delete_intrstat(cpu_t *cp)
7520Sstevel@tonic-gate {
7530Sstevel@tonic-gate 	kstat_delete_byname_zone("cpu", cp->cpu_id, "intrstat", ALL_ZONES);
7540Sstevel@tonic-gate }
7550Sstevel@tonic-gate 
7560Sstevel@tonic-gate /*
7570Sstevel@tonic-gate  * Convert interrupt statistics from CPU ticks to nanoseconds and
7580Sstevel@tonic-gate  * update kstat.
7590Sstevel@tonic-gate  */
7600Sstevel@tonic-gate int
7610Sstevel@tonic-gate cpu_kstat_intrstat_update(kstat_t *ksp, int rw)
7620Sstevel@tonic-gate {
7630Sstevel@tonic-gate 	kstat_named_t	*knp = ksp->ks_data;
7640Sstevel@tonic-gate 	cpu_t		*cpup = (cpu_t *)ksp->ks_private;
7650Sstevel@tonic-gate 	int		i;
7660Sstevel@tonic-gate 	hrtime_t	hrt;
7670Sstevel@tonic-gate 
7680Sstevel@tonic-gate 	if (rw == KSTAT_WRITE)
7690Sstevel@tonic-gate 		return (EACCES);
7700Sstevel@tonic-gate 
7710Sstevel@tonic-gate 	for (i = 0; i < PIL_MAX; i++) {
772916Sschwartz 		hrt = (hrtime_t)cpup->cpu_m.intrstat[i + 1][0];
7735084Sjohnlev 		scalehrtimef(&hrt);
7740Sstevel@tonic-gate 		knp[i * 2].value.ui64 = (uint64_t)hrt;
7750Sstevel@tonic-gate 		knp[(i * 2) + 1].value.ui64 = cpup->cpu_stats.sys.intr[i];
7760Sstevel@tonic-gate 	}
7770Sstevel@tonic-gate 
7780Sstevel@tonic-gate 	return (0);
7790Sstevel@tonic-gate }
7800Sstevel@tonic-gate 
7810Sstevel@tonic-gate /*
7820Sstevel@tonic-gate  * An interrupt thread is ending a time slice, so compute the interval it
7830Sstevel@tonic-gate  * ran for and update the statistic for its PIL.
7840Sstevel@tonic-gate  */
7850Sstevel@tonic-gate void
7860Sstevel@tonic-gate cpu_intr_swtch_enter(kthread_id_t t)
7870Sstevel@tonic-gate {
7880Sstevel@tonic-gate 	uint64_t	interval;
7890Sstevel@tonic-gate 	uint64_t	start;
790590Sesolom 	cpu_t		*cpu;
7910Sstevel@tonic-gate 
7920Sstevel@tonic-gate 	ASSERT((t->t_flag & T_INTR_THREAD) != 0);
7930Sstevel@tonic-gate 	ASSERT(t->t_pil > 0 && t->t_pil <= LOCK_LEVEL);
7940Sstevel@tonic-gate 
7950Sstevel@tonic-gate 	/*
7960Sstevel@tonic-gate 	 * We could be here with a zero timestamp. This could happen if:
7970Sstevel@tonic-gate 	 * an interrupt thread which no longer has a pinned thread underneath
7980Sstevel@tonic-gate 	 * it (i.e. it blocked at some point in its past) has finished running
7990Sstevel@tonic-gate 	 * its handler. intr_thread() updated the interrupt statistic for its
8000Sstevel@tonic-gate 	 * PIL and zeroed its timestamp. Since there was no pinned thread to
8010Sstevel@tonic-gate 	 * return to, swtch() gets called and we end up here.
802590Sesolom 	 *
803590Sesolom 	 * Note that we use atomic ops below (cas64 and atomic_add_64), which
804590Sesolom 	 * we don't use in the functions above, because we're not called
805590Sesolom 	 * with interrupts blocked, but the epilog/prolog functions are.
8060Sstevel@tonic-gate 	 */
8070Sstevel@tonic-gate 	if (t->t_intr_start) {
8080Sstevel@tonic-gate 		do {
8090Sstevel@tonic-gate 			start = t->t_intr_start;
8100Sstevel@tonic-gate 			interval = tsc_read() - start;
8110Sstevel@tonic-gate 		} while (cas64(&t->t_intr_start, start, 0) != start);
812590Sesolom 		cpu = CPU;
813916Sschwartz 		cpu->cpu_m.intrstat[t->t_pil][0] += interval;
814590Sesolom 
815590Sesolom 		atomic_add_64((uint64_t *)&cpu->cpu_intracct[cpu->cpu_mstate],
816590Sesolom 		    interval);
8170Sstevel@tonic-gate 	} else
8180Sstevel@tonic-gate 		ASSERT(t->t_intr == NULL);
8190Sstevel@tonic-gate }
8200Sstevel@tonic-gate 
8210Sstevel@tonic-gate /*
8220Sstevel@tonic-gate  * An interrupt thread is returning from swtch(). Place a starting timestamp
8230Sstevel@tonic-gate  * in its thread structure.
8240Sstevel@tonic-gate  */
8250Sstevel@tonic-gate void
8260Sstevel@tonic-gate cpu_intr_swtch_exit(kthread_id_t t)
8270Sstevel@tonic-gate {
8280Sstevel@tonic-gate 	uint64_t ts;
8290Sstevel@tonic-gate 
8300Sstevel@tonic-gate 	ASSERT((t->t_flag & T_INTR_THREAD) != 0);
8310Sstevel@tonic-gate 	ASSERT(t->t_pil > 0 && t->t_pil <= LOCK_LEVEL);
8320Sstevel@tonic-gate 
8330Sstevel@tonic-gate 	do {
8340Sstevel@tonic-gate 		ts = t->t_intr_start;
8350Sstevel@tonic-gate 	} while (cas64(&t->t_intr_start, ts, tsc_read()) != ts);
8360Sstevel@tonic-gate }
8373446Smrj 
8383446Smrj /*
8393446Smrj  * Dispatch a hilevel interrupt (one above LOCK_LEVEL)
8403446Smrj  */
8413446Smrj /*ARGSUSED*/
8423446Smrj static void
8433446Smrj dispatch_hilevel(uint_t vector, uint_t arg2)
8443446Smrj {
8453446Smrj 	sti();
8463446Smrj 	av_dispatch_autovect(vector);
8473446Smrj 	cli();
8483446Smrj }
8493446Smrj 
8503446Smrj /*
8513446Smrj  * Dispatch a soft interrupt
8523446Smrj  */
8533446Smrj /*ARGSUSED*/
8543446Smrj static void
8553446Smrj dispatch_softint(uint_t oldpil, uint_t arg2)
8563446Smrj {
8573446Smrj 	struct cpu *cpu = CPU;
8583446Smrj 
8593446Smrj 	sti();
8603446Smrj 	av_dispatch_softvect((int)cpu->cpu_thread->t_pil);
8613446Smrj 	cli();
8623446Smrj 
8633446Smrj 	/*
8643446Smrj 	 * Must run softint_epilog() on the interrupt thread stack, since
8653446Smrj 	 * there may not be a return from it if the interrupt thread blocked.
8663446Smrj 	 */
8673446Smrj 	dosoftint_epilog(cpu, oldpil);
8683446Smrj }
8693446Smrj 
8703446Smrj /*
8713446Smrj  * Dispatch a normal interrupt
8723446Smrj  */
8733446Smrj static void
8743446Smrj dispatch_hardint(uint_t vector, uint_t oldipl)
8753446Smrj {
8763446Smrj 	struct cpu *cpu = CPU;
8773446Smrj 
8783446Smrj 	sti();
8793446Smrj 	av_dispatch_autovect(vector);
8803446Smrj 	cli();
8813446Smrj 
8823446Smrj 	/*
8833446Smrj 	 * Must run intr_thread_epilog() on the interrupt thread stack, since
8843446Smrj 	 * there may not be a return from it if the interrupt thread blocked.
8853446Smrj 	 */
8863446Smrj 	intr_thread_epilog(cpu, vector, oldipl);
8873446Smrj }
8883446Smrj 
8893446Smrj /*
8903446Smrj  * Deliver any softints the current interrupt priority allows.
8913446Smrj  * Called with interrupts disabled.
8923446Smrj  */
8933446Smrj void
8943446Smrj dosoftint(struct regs *regs)
8953446Smrj {
8963446Smrj 	struct cpu *cpu = CPU;
8973446Smrj 	int oldipl;
8983446Smrj 	caddr_t newsp;
8993446Smrj 
9003446Smrj 	while (cpu->cpu_softinfo.st_pending) {
9013446Smrj 		oldipl = cpu->cpu_pri;
9023446Smrj 		newsp = dosoftint_prolog(cpu, (caddr_t)regs,
9035084Sjohnlev 		    cpu->cpu_softinfo.st_pending, oldipl);
9043446Smrj 		/*
9053446Smrj 		 * If returned stack pointer is NULL, priority is too high
9063446Smrj 		 * to run any of the pending softints now.
9073446Smrj 		 * Break out and they will be run later.
9083446Smrj 		 */
9093446Smrj 		if (newsp == NULL)
9103446Smrj 			break;
9113446Smrj 		switch_sp_and_call(newsp, dispatch_softint, oldipl, 0);
9123446Smrj 	}
9133446Smrj }
9143446Smrj 
9153446Smrj /*
9163446Smrj  * Interrupt service routine, called with interrupts disabled.
9173446Smrj  */
9183446Smrj /*ARGSUSED*/
9193446Smrj void
9203446Smrj do_interrupt(struct regs *rp, trap_trace_rec_t *ttp)
9213446Smrj {
9223446Smrj 	struct cpu *cpu = CPU;
9233446Smrj 	int newipl, oldipl = cpu->cpu_pri;
9243446Smrj 	uint_t vector;
9253446Smrj 	caddr_t newsp;
9263446Smrj 
9273446Smrj #ifdef TRAPTRACE
9283446Smrj 	ttp->ttr_marker = TT_INTERRUPT;
9293446Smrj 	ttp->ttr_ipl = 0xff;
9303446Smrj 	ttp->ttr_pri = oldipl;
9313446Smrj 	ttp->ttr_spl = cpu->cpu_base_spl;
9323446Smrj 	ttp->ttr_vector = 0xff;
9333446Smrj #endif	/* TRAPTRACE */
9343446Smrj 
9359637SRandy.Fishel@Sun.COM 	cpu_idle_exit(CPU_IDLE_CB_FLAG_INTR);
9364191Sjosephb 
93711330SFrank.Vanderlinden@Sun.COM 	++*(uint16_t *)&cpu->cpu_m.mcpu_istamp;
93811330SFrank.Vanderlinden@Sun.COM 
9394191Sjosephb 	/*
9403446Smrj 	 * If it's a softint go do it now.
9413446Smrj 	 */
9423446Smrj 	if (rp->r_trapno == T_SOFTINT) {
9433446Smrj 		dosoftint(rp);
9443446Smrj 		ASSERT(!interrupts_enabled());
9453446Smrj 		return;
9463446Smrj 	}
9473446Smrj 
9483446Smrj 	/*
9493446Smrj 	 * Raise the interrupt priority.
9503446Smrj 	 */
9513446Smrj 	newipl = (*setlvl)(oldipl, (int *)&rp->r_trapno);
9523446Smrj #ifdef TRAPTRACE
9533446Smrj 	ttp->ttr_ipl = newipl;
9543446Smrj #endif	/* TRAPTRACE */
9553446Smrj 
9563446Smrj 	/*
9573446Smrj 	 * Bail if it is a spurious interrupt
9583446Smrj 	 */
9593446Smrj 	if (newipl == -1)
9603446Smrj 		return;
9613446Smrj 	cpu->cpu_pri = newipl;
9623446Smrj 	vector = rp->r_trapno;
9633446Smrj #ifdef TRAPTRACE
9643446Smrj 	ttp->ttr_vector = vector;
9653446Smrj #endif	/* TRAPTRACE */
9663446Smrj 	if (newipl > LOCK_LEVEL) {
9673446Smrj 		/*
9683446Smrj 		 * High priority interrupts run on this cpu's interrupt stack.
9693446Smrj 		 */
9703446Smrj 		if (hilevel_intr_prolog(cpu, newipl, oldipl, rp) == 0) {
9713446Smrj 			newsp = cpu->cpu_intr_stack;
9723446Smrj 			switch_sp_and_call(newsp, dispatch_hilevel, vector, 0);
9733446Smrj 		} else { /* already on the interrupt stack */
9743446Smrj 			dispatch_hilevel(vector, 0);
9753446Smrj 		}
9763446Smrj 		(void) hilevel_intr_epilog(cpu, newipl, oldipl, vector);
9773446Smrj 	} else {
9783446Smrj 		/*
9793446Smrj 		 * Run this interrupt in a separate thread.
9803446Smrj 		 */
9813446Smrj 		newsp = intr_thread_prolog(cpu, (caddr_t)rp, newipl);
9823446Smrj 		switch_sp_and_call(newsp, dispatch_hardint, vector, oldipl);
9833446Smrj 	}
9843446Smrj 
98510175SStuart.Maybee@Sun.COM #if !defined(__xpv)
9863446Smrj 	/*
9873446Smrj 	 * Deliver any pending soft interrupts.
9883446Smrj 	 */
9893446Smrj 	if (cpu->cpu_softinfo.st_pending)
9903446Smrj 		dosoftint(rp);
99110175SStuart.Maybee@Sun.COM #endif	/* !__xpv */
9923446Smrj }
9933446Smrj 
99410175SStuart.Maybee@Sun.COM 
9953446Smrj /*
9963446Smrj  * Common tasks always done by _sys_rtt, called with interrupts disabled.
9973446Smrj  * Returns 1 if returning to userland, 0 if returning to system mode.
9983446Smrj  */
9993446Smrj int
10003446Smrj sys_rtt_common(struct regs *rp)
10013446Smrj {
10023446Smrj 	kthread_t *tp;
10033446Smrj 	extern void mutex_exit_critical_start();
10043446Smrj 	extern long mutex_exit_critical_size;
10055834Spt157919 	extern void mutex_owner_running_critical_start();
10065834Spt157919 	extern long mutex_owner_running_critical_size;
10073446Smrj 
10083446Smrj loop:
10093446Smrj 
10103446Smrj 	/*
10113446Smrj 	 * Check if returning to user
10123446Smrj 	 */
10133446Smrj 	tp = CPU->cpu_thread;
10143446Smrj 	if (USERMODE(rp->r_cs)) {
10153446Smrj 		/*
10163446Smrj 		 * Check if AST pending.
10173446Smrj 		 */
10183446Smrj 		if (tp->t_astflag) {
10193446Smrj 			/*
10203446Smrj 			 * Let trap() handle the AST
10213446Smrj 			 */
10223446Smrj 			sti();
10233446Smrj 			rp->r_trapno = T_AST;
10243446Smrj 			trap(rp, (caddr_t)0, CPU->cpu_id);
10253446Smrj 			cli();
10263446Smrj 			goto loop;
10273446Smrj 		}
10283446Smrj 
10293446Smrj #if defined(__amd64)
10303446Smrj 		/*
10313446Smrj 		 * We are done if segment registers do not need updating.
10323446Smrj 		 */
10334503Ssudheer 		if (tp->t_lwp->lwp_pcb.pcb_rupdate == 0)
10343446Smrj 			return (1);
10353446Smrj 
10363446Smrj 		if (update_sregs(rp, tp->t_lwp)) {
10373446Smrj 			/*
10383446Smrj 			 * 1 or more of the selectors is bad.
10393446Smrj 			 * Deliver a SIGSEGV.
10403446Smrj 			 */
10413446Smrj 			proc_t *p = ttoproc(tp);
10423446Smrj 
10433446Smrj 			sti();
10443446Smrj 			mutex_enter(&p->p_lock);
10453446Smrj 			tp->t_lwp->lwp_cursig = SIGSEGV;
10463446Smrj 			mutex_exit(&p->p_lock);
10473446Smrj 			psig();
10483446Smrj 			tp->t_sig_check = 1;
10493446Smrj 			cli();
10503446Smrj 		}
10514503Ssudheer 		tp->t_lwp->lwp_pcb.pcb_rupdate = 0;
10523446Smrj 
10533446Smrj #endif	/* __amd64 */
10543446Smrj 		return (1);
10553446Smrj 	}
10563446Smrj 
10573446Smrj 	/*
10583446Smrj 	 * Here if we are returning to supervisor mode.
10593446Smrj 	 * Check for a kernel preemption request.
10603446Smrj 	 */
10613446Smrj 	if (CPU->cpu_kprunrun && (rp->r_ps & PS_IE)) {
10623446Smrj 
10633446Smrj 		/*
10643446Smrj 		 * Do nothing if already in kpreempt
10653446Smrj 		 */
10663446Smrj 		if (!tp->t_preempt_lk) {
10673446Smrj 			tp->t_preempt_lk = 1;
10683446Smrj 			sti();
10693446Smrj 			kpreempt(1); /* asynchronous kpreempt call */
10703446Smrj 			cli();
10713446Smrj 			tp->t_preempt_lk = 0;
10723446Smrj 		}
10733446Smrj 	}
10743446Smrj 
10753446Smrj 	/*
10763446Smrj 	 * If we interrupted the mutex_exit() critical region we must
10773446Smrj 	 * reset the PC back to the beginning to prevent missed wakeups
10783446Smrj 	 * See the comments in mutex_exit() for details.
10793446Smrj 	 */
10803446Smrj 	if ((uintptr_t)rp->r_pc - (uintptr_t)mutex_exit_critical_start <
10813446Smrj 	    mutex_exit_critical_size) {
10823446Smrj 		rp->r_pc = (greg_t)mutex_exit_critical_start;
10833446Smrj 	}
10845834Spt157919 
10855834Spt157919 	/*
10865834Spt157919 	 * If we interrupted the mutex_owner_running() critical region we
10875834Spt157919 	 * must reset the PC back to the beginning to prevent dereferencing
10885834Spt157919 	 * of a freed thread pointer. See the comments in mutex_owner_running
10895834Spt157919 	 * for details.
10905834Spt157919 	 */
10915834Spt157919 	if ((uintptr_t)rp->r_pc -
10925834Spt157919 	    (uintptr_t)mutex_owner_running_critical_start <
10935834Spt157919 	    mutex_owner_running_critical_size) {
10945834Spt157919 		rp->r_pc = (greg_t)mutex_owner_running_critical_start;
10955834Spt157919 	}
10965834Spt157919 
10973446Smrj 	return (0);
10983446Smrj }
10993446Smrj 
11003446Smrj void
11013446Smrj send_dirint(int cpuid, int int_level)
11023446Smrj {
11033446Smrj 	(*send_dirintf)(cpuid, int_level);
11043446Smrj }
11053446Smrj 
11063446Smrj /*
11073446Smrj  * do_splx routine, takes new ipl to set
11083446Smrj  * returns the old ipl.
11093446Smrj  * We are careful not to set priority lower than CPU->cpu_base_pri,
11103446Smrj  * even though it seems we're raising the priority, it could be set
11113446Smrj  * higher at any time by an interrupt routine, so we must block interrupts
11123446Smrj  * and look at CPU->cpu_base_pri
11133446Smrj  */
11143446Smrj int
11153446Smrj do_splx(int newpri)
11163446Smrj {
11173446Smrj 	ulong_t	flag;
11183446Smrj 	cpu_t	*cpu;
11193446Smrj 	int	curpri, basepri;
11203446Smrj 
11213446Smrj 	flag = intr_clear();
11223446Smrj 	cpu = CPU; /* ints are disabled, now safe to cache cpu ptr */
11233446Smrj 	curpri = cpu->cpu_m.mcpu_pri;
11243446Smrj 	basepri = cpu->cpu_base_spl;
11253446Smrj 	if (newpri < basepri)
11263446Smrj 		newpri = basepri;
11273446Smrj 	cpu->cpu_m.mcpu_pri = newpri;
11283446Smrj 	(*setspl)(newpri);
11293446Smrj 	/*
11303446Smrj 	 * If we are going to reenable interrupts see if new priority level
11313446Smrj 	 * allows pending softint delivery.
11323446Smrj 	 */
11333446Smrj 	if ((flag & PS_IE) &&
11343446Smrj 	    bsrw_insn((uint16_t)cpu->cpu_softinfo.st_pending) > newpri)
11353446Smrj 		fakesoftint();
11363446Smrj 	ASSERT(!interrupts_enabled());
11373446Smrj 	intr_restore(flag);
11383446Smrj 	return (curpri);
11393446Smrj }
11403446Smrj 
11413446Smrj /*
11423446Smrj  * Common spl raise routine, takes new ipl to set
11433446Smrj  * returns the old ipl, will not lower ipl.
11443446Smrj  */
11453446Smrj int
11463446Smrj splr(int newpri)
11473446Smrj {
11483446Smrj 	ulong_t	flag;
11493446Smrj 	cpu_t	*cpu;
11503446Smrj 	int	curpri, basepri;
11513446Smrj 
11523446Smrj 	flag = intr_clear();
11533446Smrj 	cpu = CPU; /* ints are disabled, now safe to cache cpu ptr */
11543446Smrj 	curpri = cpu->cpu_m.mcpu_pri;
11553446Smrj 	/*
11563446Smrj 	 * Only do something if new priority is larger
11573446Smrj 	 */
11583446Smrj 	if (newpri > curpri) {
11593446Smrj 		basepri = cpu->cpu_base_spl;
11603446Smrj 		if (newpri < basepri)
11613446Smrj 			newpri = basepri;
11623446Smrj 		cpu->cpu_m.mcpu_pri = newpri;
11633446Smrj 		(*setspl)(newpri);
11643446Smrj 		/*
11653446Smrj 		 * See if new priority level allows pending softint delivery
11663446Smrj 		 */
11673446Smrj 		if ((flag & PS_IE) &&
11683446Smrj 		    bsrw_insn((uint16_t)cpu->cpu_softinfo.st_pending) > newpri)
11693446Smrj 			fakesoftint();
11703446Smrj 	}
11713446Smrj 	intr_restore(flag);
11723446Smrj 	return (curpri);
11733446Smrj }
11743446Smrj 
11753446Smrj int
11763446Smrj getpil(void)
11773446Smrj {
11783446Smrj 	return (CPU->cpu_m.mcpu_pri);
11793446Smrj }
11803446Smrj 
11813446Smrj int
1182*11389SAlexander.Kolbasov@Sun.COM spl_xcall(void)
1183*11389SAlexander.Kolbasov@Sun.COM {
1184*11389SAlexander.Kolbasov@Sun.COM 	return (splr(ipltospl(XCALL_PIL)));
1185*11389SAlexander.Kolbasov@Sun.COM }
1186*11389SAlexander.Kolbasov@Sun.COM 
1187*11389SAlexander.Kolbasov@Sun.COM int
11883446Smrj interrupts_enabled(void)
11893446Smrj {
11903446Smrj 	ulong_t	flag;
11913446Smrj 
11923446Smrj 	flag = getflags();
11933446Smrj 	return ((flag & PS_IE) == PS_IE);
11943446Smrj }
11953446Smrj 
11963446Smrj #ifdef DEBUG
11973446Smrj void
11983446Smrj assert_ints_enabled(void)
11993446Smrj {
12003446Smrj 	ASSERT(!interrupts_unleashed || interrupts_enabled());
12013446Smrj }
12023446Smrj #endif	/* DEBUG */
1203