xref: /onnv-gate/usr/src/uts/i86pc/os/cpupm/cpu_idle.c (revision 8983:981f8c40440c)
18906SEric.Saxe@Sun.COM /*
28906SEric.Saxe@Sun.COM  * CDDL HEADER START
38906SEric.Saxe@Sun.COM  *
48906SEric.Saxe@Sun.COM  * The contents of this file are subject to the terms of the
58906SEric.Saxe@Sun.COM  * Common Development and Distribution License (the "License").
68906SEric.Saxe@Sun.COM  * You may not use this file except in compliance with the License.
78906SEric.Saxe@Sun.COM  *
88906SEric.Saxe@Sun.COM  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
98906SEric.Saxe@Sun.COM  * or http://www.opensolaris.org/os/licensing.
108906SEric.Saxe@Sun.COM  * See the License for the specific language governing permissions
118906SEric.Saxe@Sun.COM  * and limitations under the License.
128906SEric.Saxe@Sun.COM  *
138906SEric.Saxe@Sun.COM  * When distributing Covered Code, include this CDDL HEADER in each
148906SEric.Saxe@Sun.COM  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
158906SEric.Saxe@Sun.COM  * If applicable, add the following below this CDDL HEADER, with the
168906SEric.Saxe@Sun.COM  * fields enclosed by brackets "[]" replaced with your own identifying
178906SEric.Saxe@Sun.COM  * information: Portions Copyright [yyyy] [name of copyright owner]
188906SEric.Saxe@Sun.COM  *
198906SEric.Saxe@Sun.COM  * CDDL HEADER END
208906SEric.Saxe@Sun.COM  */
218906SEric.Saxe@Sun.COM /*
228906SEric.Saxe@Sun.COM  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
238906SEric.Saxe@Sun.COM  * Use is subject to license terms.
248906SEric.Saxe@Sun.COM  */
258906SEric.Saxe@Sun.COM 
268906SEric.Saxe@Sun.COM #include <sys/x86_archext.h>
278906SEric.Saxe@Sun.COM #include <sys/machsystm.h>
288906SEric.Saxe@Sun.COM #include <sys/x_call.h>
298906SEric.Saxe@Sun.COM #include <sys/stat.h>
308906SEric.Saxe@Sun.COM #include <sys/acpi/acpi.h>
318906SEric.Saxe@Sun.COM #include <sys/acpica.h>
328906SEric.Saxe@Sun.COM #include <sys/cpu_acpi.h>
338906SEric.Saxe@Sun.COM #include <sys/cpu_idle.h>
348906SEric.Saxe@Sun.COM #include <sys/cpupm.h>
358906SEric.Saxe@Sun.COM #include <sys/hpet.h>
368906SEric.Saxe@Sun.COM #include <sys/archsystm.h>
378906SEric.Saxe@Sun.COM #include <vm/hat_i86.h>
388906SEric.Saxe@Sun.COM #include <sys/dtrace.h>
398906SEric.Saxe@Sun.COM #include <sys/sdt.h>
408906SEric.Saxe@Sun.COM #include <sys/callb.h>
418906SEric.Saxe@Sun.COM 
428906SEric.Saxe@Sun.COM extern void cpu_idle_adaptive(void);
43*8983SBill.Holler@Sun.COM extern uint32_t cpupm_next_cstate(cma_c_state_t *cs_data,
44*8983SBill.Holler@Sun.COM     cpu_acpi_cstate_t *cstates, uint32_t cs_count, hrtime_t start);
458906SEric.Saxe@Sun.COM 
468906SEric.Saxe@Sun.COM static int cpu_idle_init(cpu_t *);
478906SEric.Saxe@Sun.COM static void cpu_idle_fini(cpu_t *);
488906SEric.Saxe@Sun.COM static boolean_t cpu_deep_idle_callb(void *arg, int code);
498906SEric.Saxe@Sun.COM static boolean_t cpu_idle_cpr_callb(void *arg, int code);
508906SEric.Saxe@Sun.COM static void acpi_cpu_cstate(cpu_acpi_cstate_t *cstate);
518906SEric.Saxe@Sun.COM 
528906SEric.Saxe@Sun.COM /*
538906SEric.Saxe@Sun.COM  * Interfaces for modules implementing Intel's deep c-state.
548906SEric.Saxe@Sun.COM  */
558906SEric.Saxe@Sun.COM cpupm_state_ops_t cpu_idle_ops = {
568906SEric.Saxe@Sun.COM 	"Generic ACPI C-state Support",
578906SEric.Saxe@Sun.COM 	cpu_idle_init,
588906SEric.Saxe@Sun.COM 	cpu_idle_fini,
598906SEric.Saxe@Sun.COM 	NULL
608906SEric.Saxe@Sun.COM };
618906SEric.Saxe@Sun.COM 
628906SEric.Saxe@Sun.COM static kmutex_t		cpu_idle_callb_mutex;
638906SEric.Saxe@Sun.COM static callb_id_t	cpu_deep_idle_callb_id;
648906SEric.Saxe@Sun.COM static callb_id_t	cpu_idle_cpr_callb_id;
658906SEric.Saxe@Sun.COM static uint_t		cpu_idle_cfg_state;
668906SEric.Saxe@Sun.COM 
678906SEric.Saxe@Sun.COM static kmutex_t cpu_idle_mutex;
688906SEric.Saxe@Sun.COM 
698906SEric.Saxe@Sun.COM cpu_idle_kstat_t cpu_idle_kstat = {
708906SEric.Saxe@Sun.COM 	{ "address_space_id",	KSTAT_DATA_STRING },
718906SEric.Saxe@Sun.COM 	{ "latency",		KSTAT_DATA_UINT32 },
728906SEric.Saxe@Sun.COM 	{ "power",		KSTAT_DATA_UINT32 },
738906SEric.Saxe@Sun.COM };
748906SEric.Saxe@Sun.COM 
758906SEric.Saxe@Sun.COM /*
768906SEric.Saxe@Sun.COM  * kstat update function of the c-state info
778906SEric.Saxe@Sun.COM  */
788906SEric.Saxe@Sun.COM static int
798906SEric.Saxe@Sun.COM cpu_idle_kstat_update(kstat_t *ksp, int flag)
808906SEric.Saxe@Sun.COM {
818906SEric.Saxe@Sun.COM 	cpu_acpi_cstate_t *cstate = ksp->ks_private;
828906SEric.Saxe@Sun.COM 
838906SEric.Saxe@Sun.COM 	if (flag == KSTAT_WRITE) {
848906SEric.Saxe@Sun.COM 		return (EACCES);
858906SEric.Saxe@Sun.COM 	}
868906SEric.Saxe@Sun.COM 
878906SEric.Saxe@Sun.COM 	if (cstate->cs_addrspace_id == ACPI_ADR_SPACE_FIXED_HARDWARE) {
888906SEric.Saxe@Sun.COM 		kstat_named_setstr(&cpu_idle_kstat.addr_space_id,
898906SEric.Saxe@Sun.COM 		"FFixedHW");
908906SEric.Saxe@Sun.COM 	} else if (cstate->cs_addrspace_id == ACPI_ADR_SPACE_SYSTEM_IO) {
918906SEric.Saxe@Sun.COM 		kstat_named_setstr(&cpu_idle_kstat.addr_space_id,
928906SEric.Saxe@Sun.COM 		"SystemIO");
938906SEric.Saxe@Sun.COM 	} else {
948906SEric.Saxe@Sun.COM 		kstat_named_setstr(&cpu_idle_kstat.addr_space_id,
958906SEric.Saxe@Sun.COM 		"Unsupported");
968906SEric.Saxe@Sun.COM 	}
978906SEric.Saxe@Sun.COM 
988906SEric.Saxe@Sun.COM 	cpu_idle_kstat.cs_latency.value.ui32 = cstate->cs_latency;
998906SEric.Saxe@Sun.COM 	cpu_idle_kstat.cs_power.value.ui32 = cstate->cs_power;
1008906SEric.Saxe@Sun.COM 
1018906SEric.Saxe@Sun.COM 	return (0);
1028906SEric.Saxe@Sun.COM }
1038906SEric.Saxe@Sun.COM 
1048906SEric.Saxe@Sun.COM /*
1058906SEric.Saxe@Sun.COM  * c-state wakeup function.
1068906SEric.Saxe@Sun.COM  * Similar to cpu_wakeup and cpu_wakeup_mwait except this function deals
1078906SEric.Saxe@Sun.COM  * with CPUs asleep in MWAIT, HLT, or ACPI Deep C-State.
1088906SEric.Saxe@Sun.COM  */
1098906SEric.Saxe@Sun.COM void
1108906SEric.Saxe@Sun.COM cstate_wakeup(cpu_t *cp, int bound)
1118906SEric.Saxe@Sun.COM {
1128906SEric.Saxe@Sun.COM 	struct machcpu	*mcpu = &(cp->cpu_m);
1138906SEric.Saxe@Sun.COM 	volatile uint32_t *mcpu_mwait = mcpu->mcpu_mwait;
1148906SEric.Saxe@Sun.COM 	cpupart_t	*cpu_part;
1158906SEric.Saxe@Sun.COM 	uint_t		cpu_found;
1168906SEric.Saxe@Sun.COM 	processorid_t	cpu_sid;
1178906SEric.Saxe@Sun.COM 
1188906SEric.Saxe@Sun.COM 	cpu_part = cp->cpu_part;
1198906SEric.Saxe@Sun.COM 	cpu_sid = cp->cpu_seqid;
1208906SEric.Saxe@Sun.COM 	/*
1218906SEric.Saxe@Sun.COM 	 * Clear the halted bit for that CPU since it will be woken up
1228906SEric.Saxe@Sun.COM 	 * in a moment.
1238906SEric.Saxe@Sun.COM 	 */
1248906SEric.Saxe@Sun.COM 	if (bitset_in_set(&cpu_part->cp_haltset, cpu_sid)) {
1258906SEric.Saxe@Sun.COM 		/*
1268906SEric.Saxe@Sun.COM 		 * Clear the halted bit for that CPU since it will be
1278906SEric.Saxe@Sun.COM 		 * poked in a moment.
1288906SEric.Saxe@Sun.COM 		 */
1298906SEric.Saxe@Sun.COM 		bitset_atomic_del(&cpu_part->cp_haltset, cpu_sid);
1308906SEric.Saxe@Sun.COM 
1318906SEric.Saxe@Sun.COM 		/*
1328906SEric.Saxe@Sun.COM 		 * We may find the current CPU present in the halted cpuset
1338906SEric.Saxe@Sun.COM 		 * if we're in the context of an interrupt that occurred
1348906SEric.Saxe@Sun.COM 		 * before we had a chance to clear our bit in cpu_idle().
1358906SEric.Saxe@Sun.COM 		 * Waking ourself is obviously unnecessary, since if
1368906SEric.Saxe@Sun.COM 		 * we're here, we're not halted.
1378906SEric.Saxe@Sun.COM 		 */
1388906SEric.Saxe@Sun.COM 		if (cp != CPU) {
1398906SEric.Saxe@Sun.COM 			/*
1408906SEric.Saxe@Sun.COM 			 * Use correct wakeup mechanism
1418906SEric.Saxe@Sun.COM 			 */
1428906SEric.Saxe@Sun.COM 			if ((mcpu_mwait != NULL) &&
1438906SEric.Saxe@Sun.COM 			    (*mcpu_mwait == MWAIT_HALTED))
1448906SEric.Saxe@Sun.COM 				MWAIT_WAKEUP(cp);
1458906SEric.Saxe@Sun.COM 			else
1468906SEric.Saxe@Sun.COM 				poke_cpu(cp->cpu_id);
1478906SEric.Saxe@Sun.COM 		}
1488906SEric.Saxe@Sun.COM 		return;
1498906SEric.Saxe@Sun.COM 	} else {
1508906SEric.Saxe@Sun.COM 		/*
1518906SEric.Saxe@Sun.COM 		 * This cpu isn't halted, but it's idle or undergoing a
1528906SEric.Saxe@Sun.COM 		 * context switch. No need to awaken anyone else.
1538906SEric.Saxe@Sun.COM 		 */
1548906SEric.Saxe@Sun.COM 		if (cp->cpu_thread == cp->cpu_idle_thread ||
1558906SEric.Saxe@Sun.COM 		    cp->cpu_disp_flags & CPU_DISP_DONTSTEAL)
1568906SEric.Saxe@Sun.COM 			return;
1578906SEric.Saxe@Sun.COM 	}
1588906SEric.Saxe@Sun.COM 
1598906SEric.Saxe@Sun.COM 	/*
1608906SEric.Saxe@Sun.COM 	 * No need to wake up other CPUs if the thread we just enqueued
1618906SEric.Saxe@Sun.COM 	 * is bound.
1628906SEric.Saxe@Sun.COM 	 */
1638906SEric.Saxe@Sun.COM 	if (bound)
1648906SEric.Saxe@Sun.COM 		return;
1658906SEric.Saxe@Sun.COM 
1668906SEric.Saxe@Sun.COM 
1678906SEric.Saxe@Sun.COM 	/*
1688906SEric.Saxe@Sun.COM 	 * See if there's any other halted CPUs. If there are, then
1698906SEric.Saxe@Sun.COM 	 * select one, and awaken it.
1708906SEric.Saxe@Sun.COM 	 * It's possible that after we find a CPU, somebody else
1718906SEric.Saxe@Sun.COM 	 * will awaken it before we get the chance.
1728906SEric.Saxe@Sun.COM 	 * In that case, look again.
1738906SEric.Saxe@Sun.COM 	 */
1748906SEric.Saxe@Sun.COM 	do {
1758906SEric.Saxe@Sun.COM 		cpu_found = bitset_find(&cpu_part->cp_haltset);
1768906SEric.Saxe@Sun.COM 		if (cpu_found == (uint_t)-1)
1778906SEric.Saxe@Sun.COM 			return;
1788906SEric.Saxe@Sun.COM 
1798906SEric.Saxe@Sun.COM 	} while (bitset_atomic_test_and_del(&cpu_part->cp_haltset,
1808906SEric.Saxe@Sun.COM 	    cpu_found) < 0);
1818906SEric.Saxe@Sun.COM 
1828906SEric.Saxe@Sun.COM 	/*
1838906SEric.Saxe@Sun.COM 	 * Must use correct wakeup mechanism to avoid lost wakeup of
1848906SEric.Saxe@Sun.COM 	 * alternate cpu.
1858906SEric.Saxe@Sun.COM 	 */
1868906SEric.Saxe@Sun.COM 	if (cpu_found != CPU->cpu_seqid) {
1878906SEric.Saxe@Sun.COM 		mcpu_mwait = cpu[cpu_found]->cpu_m.mcpu_mwait;
1888906SEric.Saxe@Sun.COM 		if ((mcpu_mwait != NULL) && (*mcpu_mwait == MWAIT_HALTED))
1898906SEric.Saxe@Sun.COM 			MWAIT_WAKEUP(cpu_seq[cpu_found]);
1908906SEric.Saxe@Sun.COM 		else
1918906SEric.Saxe@Sun.COM 			poke_cpu(cpu_seq[cpu_found]->cpu_id);
1928906SEric.Saxe@Sun.COM 	}
1938906SEric.Saxe@Sun.COM }
1948906SEric.Saxe@Sun.COM 
1958906SEric.Saxe@Sun.COM /*
1968906SEric.Saxe@Sun.COM  * enter deep c-state handler
1978906SEric.Saxe@Sun.COM  */
1988906SEric.Saxe@Sun.COM static void
1998906SEric.Saxe@Sun.COM acpi_cpu_cstate(cpu_acpi_cstate_t *cstate)
2008906SEric.Saxe@Sun.COM {
2018906SEric.Saxe@Sun.COM 	volatile uint32_t	*mcpu_mwait = CPU->cpu_m.mcpu_mwait;
2028906SEric.Saxe@Sun.COM 	cpu_t			*cpup = CPU;
2038906SEric.Saxe@Sun.COM 	processorid_t		cpu_sid = cpup->cpu_seqid;
2048906SEric.Saxe@Sun.COM 	cpupart_t		*cp = cpup->cpu_part;
2058906SEric.Saxe@Sun.COM 	hrtime_t		lapic_expire;
2068906SEric.Saxe@Sun.COM 	uint8_t			type = cstate->cs_addrspace_id;
2078906SEric.Saxe@Sun.COM 	uint32_t		cs_type = cstate->cs_type;
2088906SEric.Saxe@Sun.COM 	int			hset_update = 1;
2098906SEric.Saxe@Sun.COM 	boolean_t		using_hpet_timer;
2108906SEric.Saxe@Sun.COM 
2118906SEric.Saxe@Sun.COM 	/*
2128906SEric.Saxe@Sun.COM 	 * Set our mcpu_mwait here, so we can tell if anyone tries to
2138906SEric.Saxe@Sun.COM 	 * wake us between now and when we call mwait.  No other cpu will
2148906SEric.Saxe@Sun.COM 	 * attempt to set our mcpu_mwait until we add ourself to the haltset.
2158906SEric.Saxe@Sun.COM 	 */
2168906SEric.Saxe@Sun.COM 	if (mcpu_mwait) {
2178906SEric.Saxe@Sun.COM 		if (type == ACPI_ADR_SPACE_SYSTEM_IO)
2188906SEric.Saxe@Sun.COM 			*mcpu_mwait = MWAIT_WAKEUP_IPI;
2198906SEric.Saxe@Sun.COM 		else
2208906SEric.Saxe@Sun.COM 			*mcpu_mwait = MWAIT_HALTED;
2218906SEric.Saxe@Sun.COM 	}
2228906SEric.Saxe@Sun.COM 
2238906SEric.Saxe@Sun.COM 	/*
2248906SEric.Saxe@Sun.COM 	 * If this CPU is online, and there are multiple CPUs
2258906SEric.Saxe@Sun.COM 	 * in the system, then we should note our halting
2268906SEric.Saxe@Sun.COM 	 * by adding ourselves to the partition's halted CPU
2278906SEric.Saxe@Sun.COM 	 * bitmap. This allows other CPUs to find/awaken us when
2288906SEric.Saxe@Sun.COM 	 * work becomes available.
2298906SEric.Saxe@Sun.COM 	 */
2308906SEric.Saxe@Sun.COM 	if (cpup->cpu_flags & CPU_OFFLINE || ncpus == 1)
2318906SEric.Saxe@Sun.COM 		hset_update = 0;
2328906SEric.Saxe@Sun.COM 
2338906SEric.Saxe@Sun.COM 	/*
2348906SEric.Saxe@Sun.COM 	 * Add ourselves to the partition's halted CPUs bitmask
2358906SEric.Saxe@Sun.COM 	 * and set our HALTED flag, if necessary.
2368906SEric.Saxe@Sun.COM 	 *
2378906SEric.Saxe@Sun.COM 	 * When a thread becomes runnable, it is placed on the queue
2388906SEric.Saxe@Sun.COM 	 * and then the halted cpuset is checked to determine who
2398906SEric.Saxe@Sun.COM 	 * (if anyone) should be awakened. We therefore need to first
2408906SEric.Saxe@Sun.COM 	 * add ourselves to the halted cpuset, and and then check if there
2418906SEric.Saxe@Sun.COM 	 * is any work available.
2428906SEric.Saxe@Sun.COM 	 *
2438906SEric.Saxe@Sun.COM 	 * Note that memory barriers after updating the HALTED flag
2448906SEric.Saxe@Sun.COM 	 * are not necessary since an atomic operation (updating the bitmap)
2458906SEric.Saxe@Sun.COM 	 * immediately follows. On x86 the atomic operation acts as a
2468906SEric.Saxe@Sun.COM 	 * memory barrier for the update of cpu_disp_flags.
2478906SEric.Saxe@Sun.COM 	 */
2488906SEric.Saxe@Sun.COM 	if (hset_update) {
2498906SEric.Saxe@Sun.COM 		cpup->cpu_disp_flags |= CPU_DISP_HALTED;
2508906SEric.Saxe@Sun.COM 		bitset_atomic_add(&cp->cp_haltset, cpu_sid);
2518906SEric.Saxe@Sun.COM 	}
2528906SEric.Saxe@Sun.COM 
2538906SEric.Saxe@Sun.COM 	/*
2548906SEric.Saxe@Sun.COM 	 * Check to make sure there's really nothing to do.
2558906SEric.Saxe@Sun.COM 	 * Work destined for this CPU may become available after
2568906SEric.Saxe@Sun.COM 	 * this check. We'll be notified through the clearing of our
2578906SEric.Saxe@Sun.COM 	 * bit in the halted CPU bitmask, and a write to our mcpu_mwait.
2588906SEric.Saxe@Sun.COM 	 *
2598906SEric.Saxe@Sun.COM 	 * disp_anywork() checks disp_nrunnable, so we do not have to later.
2608906SEric.Saxe@Sun.COM 	 */
2618906SEric.Saxe@Sun.COM 	if (disp_anywork()) {
2628906SEric.Saxe@Sun.COM 		if (hset_update) {
2638906SEric.Saxe@Sun.COM 			cpup->cpu_disp_flags &= ~CPU_DISP_HALTED;
2648906SEric.Saxe@Sun.COM 			bitset_atomic_del(&cp->cp_haltset, cpu_sid);
2658906SEric.Saxe@Sun.COM 		}
2668906SEric.Saxe@Sun.COM 		return;
2678906SEric.Saxe@Sun.COM 	}
2688906SEric.Saxe@Sun.COM 
2698906SEric.Saxe@Sun.COM 	/*
2708906SEric.Saxe@Sun.COM 	 * We're on our way to being halted.
2718906SEric.Saxe@Sun.COM 	 *
2728906SEric.Saxe@Sun.COM 	 * The local APIC timer can stop in ACPI C2 and deeper c-states.
2738906SEric.Saxe@Sun.COM 	 * Program the HPET hardware to substitute for this CPU's lAPIC timer.
2748906SEric.Saxe@Sun.COM 	 * hpet.use_hpet_timer() disables the LAPIC Timer.  Make sure to
2758906SEric.Saxe@Sun.COM 	 * start the LAPIC Timer again before leaving this function.
2768906SEric.Saxe@Sun.COM 	 *
2778906SEric.Saxe@Sun.COM 	 * hpet.use_hpet_timer disables interrupts, so we will awaken
2788906SEric.Saxe@Sun.COM 	 * immediately after halting if someone tries to poke us between now
2798906SEric.Saxe@Sun.COM 	 * and the time we actually halt.
2808906SEric.Saxe@Sun.COM 	 */
2818906SEric.Saxe@Sun.COM 	using_hpet_timer = hpet.use_hpet_timer(&lapic_expire);
2828906SEric.Saxe@Sun.COM 
2838906SEric.Saxe@Sun.COM 	/*
2848906SEric.Saxe@Sun.COM 	 * We check for the presence of our bit after disabling interrupts.
2858906SEric.Saxe@Sun.COM 	 * If it's cleared, we'll return. If the bit is cleared after
2868906SEric.Saxe@Sun.COM 	 * we check then the cstate_wakeup() will pop us out of the halted
2878906SEric.Saxe@Sun.COM 	 * state.
2888906SEric.Saxe@Sun.COM 	 *
2898906SEric.Saxe@Sun.COM 	 * This means that the ordering of the cstate_wakeup() and the clearing
2908906SEric.Saxe@Sun.COM 	 * of the bit by cpu_wakeup is important.
2918906SEric.Saxe@Sun.COM 	 * cpu_wakeup() must clear our mc_haltset bit, and then call
2928906SEric.Saxe@Sun.COM 	 * cstate_wakeup().
2938906SEric.Saxe@Sun.COM 	 * acpi_cpu_cstate() must disable interrupts, then check for the bit.
2948906SEric.Saxe@Sun.COM 	 */
2958906SEric.Saxe@Sun.COM 	if (hset_update && bitset_in_set(&cp->cp_haltset, cpu_sid) == 0) {
2968906SEric.Saxe@Sun.COM 		hpet.use_lapic_timer(lapic_expire);
2978906SEric.Saxe@Sun.COM 		cpup->cpu_disp_flags &= ~CPU_DISP_HALTED;
2988906SEric.Saxe@Sun.COM 		return;
2998906SEric.Saxe@Sun.COM 	}
3008906SEric.Saxe@Sun.COM 
3018906SEric.Saxe@Sun.COM 	/*
3028906SEric.Saxe@Sun.COM 	 * The check for anything locally runnable is here for performance
3038906SEric.Saxe@Sun.COM 	 * and isn't needed for correctness. disp_nrunnable ought to be
3048906SEric.Saxe@Sun.COM 	 * in our cache still, so it's inexpensive to check, and if there
3058906SEric.Saxe@Sun.COM 	 * is anything runnable we won't have to wait for the poke.
3068906SEric.Saxe@Sun.COM 	 */
3078906SEric.Saxe@Sun.COM 	if (cpup->cpu_disp->disp_nrunnable != 0) {
3088906SEric.Saxe@Sun.COM 		hpet.use_lapic_timer(lapic_expire);
3098906SEric.Saxe@Sun.COM 		if (hset_update) {
3108906SEric.Saxe@Sun.COM 			cpup->cpu_disp_flags &= ~CPU_DISP_HALTED;
3118906SEric.Saxe@Sun.COM 			bitset_atomic_del(&cp->cp_haltset, cpu_sid);
3128906SEric.Saxe@Sun.COM 		}
3138906SEric.Saxe@Sun.COM 		return;
3148906SEric.Saxe@Sun.COM 	}
3158906SEric.Saxe@Sun.COM 
3168906SEric.Saxe@Sun.COM 	if (using_hpet_timer == B_FALSE) {
3178906SEric.Saxe@Sun.COM 
3188906SEric.Saxe@Sun.COM 		hpet.use_lapic_timer(lapic_expire);
3198906SEric.Saxe@Sun.COM 
3208906SEric.Saxe@Sun.COM 		/*
3218906SEric.Saxe@Sun.COM 		 * We are currently unable to program the HPET to act as this
3228906SEric.Saxe@Sun.COM 		 * CPU's proxy lAPIC timer.  This CPU cannot enter C2 or deeper
3238906SEric.Saxe@Sun.COM 		 * because no timer is set to wake it up while its lAPIC timer
3248906SEric.Saxe@Sun.COM 		 * stalls in deep C-States.
3258906SEric.Saxe@Sun.COM 		 * Enter C1 instead.
3268906SEric.Saxe@Sun.COM 		 *
3278906SEric.Saxe@Sun.COM 		 * cstate_wake_cpu() will wake this CPU with an IPI which
3288906SEric.Saxe@Sun.COM 		 * works with MWAIT.
3298906SEric.Saxe@Sun.COM 		 */
3308906SEric.Saxe@Sun.COM 		i86_monitor(mcpu_mwait, 0, 0);
3318906SEric.Saxe@Sun.COM 		if ((*mcpu_mwait & ~MWAIT_WAKEUP_IPI) == MWAIT_HALTED) {
3328906SEric.Saxe@Sun.COM 			cpu_dtrace_idle_probe(CPU_ACPI_C1);
3338906SEric.Saxe@Sun.COM 
3348906SEric.Saxe@Sun.COM 			tlb_going_idle();
3358906SEric.Saxe@Sun.COM 			i86_mwait(0, 0);
3368906SEric.Saxe@Sun.COM 			tlb_service();
3378906SEric.Saxe@Sun.COM 
3388906SEric.Saxe@Sun.COM 			cpu_dtrace_idle_probe(CPU_ACPI_C0);
3398906SEric.Saxe@Sun.COM 		}
3408906SEric.Saxe@Sun.COM 
3418906SEric.Saxe@Sun.COM 		/*
3428906SEric.Saxe@Sun.COM 		 * We're no longer halted
3438906SEric.Saxe@Sun.COM 		 */
3448906SEric.Saxe@Sun.COM 		if (hset_update) {
3458906SEric.Saxe@Sun.COM 			cpup->cpu_disp_flags &= ~CPU_DISP_HALTED;
3468906SEric.Saxe@Sun.COM 			bitset_atomic_del(&cp->cp_haltset, cpu_sid);
3478906SEric.Saxe@Sun.COM 		}
3488906SEric.Saxe@Sun.COM 		return;
3498906SEric.Saxe@Sun.COM 	}
3508906SEric.Saxe@Sun.COM 
3518906SEric.Saxe@Sun.COM 	cpu_dtrace_idle_probe((uint_t)cs_type);
3528906SEric.Saxe@Sun.COM 
3538906SEric.Saxe@Sun.COM 	if (type == ACPI_ADR_SPACE_FIXED_HARDWARE) {
3548906SEric.Saxe@Sun.COM 		/*
3558906SEric.Saxe@Sun.COM 		 * We're on our way to being halted.
3568906SEric.Saxe@Sun.COM 		 * To avoid a lost wakeup, arm the monitor before checking
3578906SEric.Saxe@Sun.COM 		 * if another cpu wrote to mcpu_mwait to wake us up.
3588906SEric.Saxe@Sun.COM 		 */
3598906SEric.Saxe@Sun.COM 		i86_monitor(mcpu_mwait, 0, 0);
3608906SEric.Saxe@Sun.COM 		if (*mcpu_mwait == MWAIT_HALTED) {
3618906SEric.Saxe@Sun.COM 			uint32_t eax = cstate->cs_address;
3628906SEric.Saxe@Sun.COM 			uint32_t ecx = 1;
3638906SEric.Saxe@Sun.COM 
3648906SEric.Saxe@Sun.COM 			tlb_going_idle();
3658906SEric.Saxe@Sun.COM 			i86_mwait(eax, ecx);
3668906SEric.Saxe@Sun.COM 			tlb_service();
3678906SEric.Saxe@Sun.COM 		}
3688906SEric.Saxe@Sun.COM 	} else if (type == ACPI_ADR_SPACE_SYSTEM_IO) {
3698906SEric.Saxe@Sun.COM 		uint32_t value;
3708906SEric.Saxe@Sun.COM 		ACPI_TABLE_FADT *gbl_FADT;
3718906SEric.Saxe@Sun.COM 
3728906SEric.Saxe@Sun.COM 		if (*mcpu_mwait == MWAIT_WAKEUP_IPI) {
3738906SEric.Saxe@Sun.COM 			tlb_going_idle();
3748906SEric.Saxe@Sun.COM 			(void) cpu_acpi_read_port(cstate->cs_address,
3758906SEric.Saxe@Sun.COM 			    &value, 8);
3768906SEric.Saxe@Sun.COM 			acpica_get_global_FADT(&gbl_FADT);
3778906SEric.Saxe@Sun.COM 			(void) cpu_acpi_read_port(
3788906SEric.Saxe@Sun.COM 			    gbl_FADT->XPmTimerBlock.Address, &value, 32);
3798906SEric.Saxe@Sun.COM 			tlb_service();
3808906SEric.Saxe@Sun.COM 		}
3818906SEric.Saxe@Sun.COM 	}
3828906SEric.Saxe@Sun.COM 
3838906SEric.Saxe@Sun.COM 	/*
3848906SEric.Saxe@Sun.COM 	 * The lAPIC timer may have stopped in deep c-state.
3858906SEric.Saxe@Sun.COM 	 * Reprogram this CPU's lAPIC here before enabling interrupts.
3868906SEric.Saxe@Sun.COM 	 */
3878906SEric.Saxe@Sun.COM 	hpet.use_lapic_timer(lapic_expire);
3888906SEric.Saxe@Sun.COM 
3898906SEric.Saxe@Sun.COM 	cpu_dtrace_idle_probe(CPU_ACPI_C0);
3908906SEric.Saxe@Sun.COM 
3918906SEric.Saxe@Sun.COM 	/*
3928906SEric.Saxe@Sun.COM 	 * We're no longer halted
3938906SEric.Saxe@Sun.COM 	 */
3948906SEric.Saxe@Sun.COM 	if (hset_update) {
3958906SEric.Saxe@Sun.COM 		cpup->cpu_disp_flags &= ~CPU_DISP_HALTED;
3968906SEric.Saxe@Sun.COM 		bitset_atomic_del(&cp->cp_haltset, cpu_sid);
3978906SEric.Saxe@Sun.COM 	}
3988906SEric.Saxe@Sun.COM }
3998906SEric.Saxe@Sun.COM 
4008906SEric.Saxe@Sun.COM /*
4018906SEric.Saxe@Sun.COM  * indicate when bus masters are active
4028906SEric.Saxe@Sun.COM  */
4038906SEric.Saxe@Sun.COM static uint32_t
4048906SEric.Saxe@Sun.COM cpu_acpi_bm_sts(void)
4058906SEric.Saxe@Sun.COM {
4068906SEric.Saxe@Sun.COM 	uint32_t bm_sts = 0;
4078906SEric.Saxe@Sun.COM 
4088906SEric.Saxe@Sun.COM 	cpu_acpi_get_register(ACPI_BITREG_BUS_MASTER_STATUS, &bm_sts);
4098906SEric.Saxe@Sun.COM 
4108906SEric.Saxe@Sun.COM 	if (bm_sts)
4118906SEric.Saxe@Sun.COM 		cpu_acpi_set_register(ACPI_BITREG_BUS_MASTER_STATUS, 1);
4128906SEric.Saxe@Sun.COM 
4138906SEric.Saxe@Sun.COM 	return (bm_sts);
4148906SEric.Saxe@Sun.COM }
4158906SEric.Saxe@Sun.COM 
4168906SEric.Saxe@Sun.COM /*
4178906SEric.Saxe@Sun.COM  * Idle the present CPU, deep c-state is supported
4188906SEric.Saxe@Sun.COM  */
4198906SEric.Saxe@Sun.COM void
4208906SEric.Saxe@Sun.COM cpu_acpi_idle(void)
4218906SEric.Saxe@Sun.COM {
4228906SEric.Saxe@Sun.COM 	cpu_t *cp = CPU;
4238906SEric.Saxe@Sun.COM 	cpu_acpi_handle_t handle;
4248906SEric.Saxe@Sun.COM 	cma_c_state_t *cs_data;
425*8983SBill.Holler@Sun.COM 	cpu_acpi_cstate_t *cstates;
4268906SEric.Saxe@Sun.COM 	hrtime_t start, end;
4278906SEric.Saxe@Sun.COM 	int cpu_max_cstates;
428*8983SBill.Holler@Sun.COM 	uint32_t cs_indx;
429*8983SBill.Holler@Sun.COM 	uint16_t cs_type;
4308906SEric.Saxe@Sun.COM 
4318906SEric.Saxe@Sun.COM 	cpupm_mach_state_t *mach_state =
4328906SEric.Saxe@Sun.COM 	    (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
4338906SEric.Saxe@Sun.COM 	handle = mach_state->ms_acpi_handle;
4348906SEric.Saxe@Sun.COM 	ASSERT(CPU_ACPI_CSTATES(handle) != NULL);
4358906SEric.Saxe@Sun.COM 
4368906SEric.Saxe@Sun.COM 	cs_data = mach_state->ms_cstate.cma_state.cstate;
437*8983SBill.Holler@Sun.COM 	cstates = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle);
438*8983SBill.Holler@Sun.COM 	ASSERT(cstates != NULL);
4398906SEric.Saxe@Sun.COM 	cpu_max_cstates = cpu_acpi_get_max_cstates(handle);
4408906SEric.Saxe@Sun.COM 	if (cpu_max_cstates > CPU_MAX_CSTATES)
4418906SEric.Saxe@Sun.COM 		cpu_max_cstates = CPU_MAX_CSTATES;
442*8983SBill.Holler@Sun.COM 	if (cpu_max_cstates == 1) {	/* no ACPI c-state data */
443*8983SBill.Holler@Sun.COM 		(*non_deep_idle_cpu)();
444*8983SBill.Holler@Sun.COM 		return;
445*8983SBill.Holler@Sun.COM 	}
4468906SEric.Saxe@Sun.COM 
4478906SEric.Saxe@Sun.COM 	start = gethrtime_unscaled();
4488906SEric.Saxe@Sun.COM 
449*8983SBill.Holler@Sun.COM 	cs_indx = cpupm_next_cstate(cs_data, cstates, cpu_max_cstates, start);
4508906SEric.Saxe@Sun.COM 
4518906SEric.Saxe@Sun.COM 	/*
4528906SEric.Saxe@Sun.COM 	 * OSPM uses the BM_STS bit to determine the power state to enter
4538906SEric.Saxe@Sun.COM 	 * when considering a transition to or from the C2/C3 power state.
4548906SEric.Saxe@Sun.COM 	 * if C3 is determined, bus master activity demotes the power state
4558906SEric.Saxe@Sun.COM 	 * to C2.
4568906SEric.Saxe@Sun.COM 	 */
457*8983SBill.Holler@Sun.COM 	if ((cstates[cs_indx].cs_type >= CPU_ACPI_C3) && cpu_acpi_bm_sts())
458*8983SBill.Holler@Sun.COM 		--cs_indx;
459*8983SBill.Holler@Sun.COM 	cs_type = cstates[cs_indx].cs_type;
4608906SEric.Saxe@Sun.COM 
4618906SEric.Saxe@Sun.COM 	/*
4628906SEric.Saxe@Sun.COM 	 * BM_RLD determines if the Cx power state was exited as a result of
4638906SEric.Saxe@Sun.COM 	 * bus master requests. Set this bit when using a C3 power state, and
4648906SEric.Saxe@Sun.COM 	 * clear it when using a C1 or C2 power state.
4658906SEric.Saxe@Sun.COM 	 */
4668906SEric.Saxe@Sun.COM 	if ((CPU_ACPI_BM_INFO(handle) & BM_RLD) && (cs_type < CPU_ACPI_C3)) {
4678906SEric.Saxe@Sun.COM 		cpu_acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0);
4688906SEric.Saxe@Sun.COM 		CPU_ACPI_BM_INFO(handle) &= ~BM_RLD;
4698906SEric.Saxe@Sun.COM 	}
4708906SEric.Saxe@Sun.COM 
4718906SEric.Saxe@Sun.COM 	if ((!(CPU_ACPI_BM_INFO(handle) & BM_RLD)) &&
4728906SEric.Saxe@Sun.COM 	    (cs_type >= CPU_ACPI_C3)) {
4738906SEric.Saxe@Sun.COM 		cpu_acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1);
4748906SEric.Saxe@Sun.COM 		CPU_ACPI_BM_INFO(handle) |= BM_RLD;
4758906SEric.Saxe@Sun.COM 	}
4768906SEric.Saxe@Sun.COM 
4778906SEric.Saxe@Sun.COM 	switch (cs_type) {
4788906SEric.Saxe@Sun.COM 	default:
4798906SEric.Saxe@Sun.COM 		/* FALLTHROUGH */
4808906SEric.Saxe@Sun.COM 	case CPU_ACPI_C1:
4818906SEric.Saxe@Sun.COM 		(*non_deep_idle_cpu)();
4828906SEric.Saxe@Sun.COM 		break;
4838906SEric.Saxe@Sun.COM 
4848906SEric.Saxe@Sun.COM 	case CPU_ACPI_C2:
485*8983SBill.Holler@Sun.COM 		acpi_cpu_cstate(&cstates[cs_indx]);
4868906SEric.Saxe@Sun.COM 		break;
4878906SEric.Saxe@Sun.COM 
4888906SEric.Saxe@Sun.COM 	case CPU_ACPI_C3:
4898906SEric.Saxe@Sun.COM 		/*
4908906SEric.Saxe@Sun.COM 		 * recommended in ACPI spec, providing hardware mechanisms
4918906SEric.Saxe@Sun.COM 		 * to prevent master from writing to memory (UP-only)
4928906SEric.Saxe@Sun.COM 		 */
4938906SEric.Saxe@Sun.COM 		if ((ncpus_online == 1) &&
4948906SEric.Saxe@Sun.COM 		    (CPU_ACPI_BM_INFO(handle) & BM_CTL)) {
4958906SEric.Saxe@Sun.COM 			cpu_acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1);
4968906SEric.Saxe@Sun.COM 			CPU_ACPI_BM_INFO(handle) |= BM_ARB_DIS;
4978906SEric.Saxe@Sun.COM 		/*
4988906SEric.Saxe@Sun.COM 		 * Today all Intel's processor support C3 share cache.
4998906SEric.Saxe@Sun.COM 		 */
5008906SEric.Saxe@Sun.COM 		} else if (x86_vendor != X86_VENDOR_Intel) {
5018906SEric.Saxe@Sun.COM 			__acpi_wbinvd();
5028906SEric.Saxe@Sun.COM 		}
503*8983SBill.Holler@Sun.COM 		acpi_cpu_cstate(&cstates[cs_indx]);
5048906SEric.Saxe@Sun.COM 		if (CPU_ACPI_BM_INFO(handle) & BM_ARB_DIS) {
5058906SEric.Saxe@Sun.COM 			cpu_acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0);
5068906SEric.Saxe@Sun.COM 			CPU_ACPI_BM_INFO(handle) &= ~BM_ARB_DIS;
5078906SEric.Saxe@Sun.COM 		}
5088906SEric.Saxe@Sun.COM 		break;
5098906SEric.Saxe@Sun.COM 	}
5108906SEric.Saxe@Sun.COM 
5118906SEric.Saxe@Sun.COM 	end = gethrtime_unscaled();
5128906SEric.Saxe@Sun.COM 
5138906SEric.Saxe@Sun.COM 	/*
5148906SEric.Saxe@Sun.COM 	 * Update statistics
5158906SEric.Saxe@Sun.COM 	 */
5168906SEric.Saxe@Sun.COM 	cpupm_wakeup_cstate_data(cs_data, end);
5178906SEric.Saxe@Sun.COM }
5188906SEric.Saxe@Sun.COM 
5198906SEric.Saxe@Sun.COM boolean_t
5208906SEric.Saxe@Sun.COM cpu_deep_cstates_supported(void)
5218906SEric.Saxe@Sun.COM {
5228906SEric.Saxe@Sun.COM 	extern int	idle_cpu_no_deep_c;
5238906SEric.Saxe@Sun.COM 
5248906SEric.Saxe@Sun.COM 	if (idle_cpu_no_deep_c)
5258906SEric.Saxe@Sun.COM 		return (B_FALSE);
5268906SEric.Saxe@Sun.COM 
5278906SEric.Saxe@Sun.COM 	if (!cpuid_deep_cstates_supported())
5288906SEric.Saxe@Sun.COM 		return (B_FALSE);
5298906SEric.Saxe@Sun.COM 
5308906SEric.Saxe@Sun.COM 	if ((hpet.supported != HPET_FULL_SUPPORT) || !hpet.install_proxy())
5318906SEric.Saxe@Sun.COM 		return (B_FALSE);
5328906SEric.Saxe@Sun.COM 
5338906SEric.Saxe@Sun.COM 	return (B_TRUE);
5348906SEric.Saxe@Sun.COM }
5358906SEric.Saxe@Sun.COM 
5368906SEric.Saxe@Sun.COM /*
5378906SEric.Saxe@Sun.COM  * Validate that this processor supports deep cstate and if so,
5388906SEric.Saxe@Sun.COM  * get the c-state data from ACPI and cache it.
5398906SEric.Saxe@Sun.COM  */
5408906SEric.Saxe@Sun.COM static int
5418906SEric.Saxe@Sun.COM cpu_idle_init(cpu_t *cp)
5428906SEric.Saxe@Sun.COM {
5438906SEric.Saxe@Sun.COM 	cpupm_mach_state_t *mach_state =
5448906SEric.Saxe@Sun.COM 	    (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
5458906SEric.Saxe@Sun.COM 	cpu_acpi_handle_t handle = mach_state->ms_acpi_handle;
5468906SEric.Saxe@Sun.COM 	cpu_acpi_cstate_t *cstate;
5478906SEric.Saxe@Sun.COM 	char name[KSTAT_STRLEN];
5488906SEric.Saxe@Sun.COM 	int cpu_max_cstates, i;
5498906SEric.Saxe@Sun.COM 	ACPI_TABLE_FADT *gbl_FADT;
5508906SEric.Saxe@Sun.COM 
5518906SEric.Saxe@Sun.COM 	/*
5528906SEric.Saxe@Sun.COM 	 * Cache the C-state specific ACPI data.
5538906SEric.Saxe@Sun.COM 	 */
5548906SEric.Saxe@Sun.COM 	if (cpu_acpi_cache_cstate_data(handle) != 0) {
5558906SEric.Saxe@Sun.COM 		cmn_err(CE_NOTE,
5568906SEric.Saxe@Sun.COM 		    "!cpu_idle_init: Failed to cache ACPI C-state data\n");
5578906SEric.Saxe@Sun.COM 		cpu_idle_fini(cp);
5588906SEric.Saxe@Sun.COM 		return (-1);
5598906SEric.Saxe@Sun.COM 	}
5608906SEric.Saxe@Sun.COM 
5618906SEric.Saxe@Sun.COM 	/*
5628906SEric.Saxe@Sun.COM 	 * Check the bus master arbitration control ability.
5638906SEric.Saxe@Sun.COM 	 */
5648906SEric.Saxe@Sun.COM 	acpica_get_global_FADT(&gbl_FADT);
5658906SEric.Saxe@Sun.COM 	if (gbl_FADT->Pm2ControlBlock && gbl_FADT->Pm2ControlLength)
5668906SEric.Saxe@Sun.COM 		CPU_ACPI_BM_INFO(handle) |= BM_CTL;
5678906SEric.Saxe@Sun.COM 
5688906SEric.Saxe@Sun.COM 	cstate = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle);
5698906SEric.Saxe@Sun.COM 
5708906SEric.Saxe@Sun.COM 	cpu_max_cstates = cpu_acpi_get_max_cstates(handle);
5718906SEric.Saxe@Sun.COM 
5728906SEric.Saxe@Sun.COM 	for (i = CPU_ACPI_C1; i <= cpu_max_cstates; i++) {
5738906SEric.Saxe@Sun.COM 		(void) snprintf(name, KSTAT_STRLEN - 1, "c%d", cstate->cs_type);
5748906SEric.Saxe@Sun.COM 		/*
5758906SEric.Saxe@Sun.COM 		 * Allocate, initialize and install cstate kstat
5768906SEric.Saxe@Sun.COM 		 */
5778906SEric.Saxe@Sun.COM 		cstate->cs_ksp = kstat_create("cstate", CPU->cpu_id,
5788906SEric.Saxe@Sun.COM 		    name, "misc",
5798906SEric.Saxe@Sun.COM 		    KSTAT_TYPE_NAMED,
5808906SEric.Saxe@Sun.COM 		    sizeof (cpu_idle_kstat) / sizeof (kstat_named_t),
5818906SEric.Saxe@Sun.COM 		    KSTAT_FLAG_VIRTUAL);
5828906SEric.Saxe@Sun.COM 
5838906SEric.Saxe@Sun.COM 		if (cstate->cs_ksp == NULL) {
5848906SEric.Saxe@Sun.COM 			cmn_err(CE_NOTE, "kstat_create(c_state) fail");
5858906SEric.Saxe@Sun.COM 		} else {
5868906SEric.Saxe@Sun.COM 			cstate->cs_ksp->ks_data = &cpu_idle_kstat;
5878906SEric.Saxe@Sun.COM 			cstate->cs_ksp->ks_lock = &cpu_idle_mutex;
5888906SEric.Saxe@Sun.COM 			cstate->cs_ksp->ks_update = cpu_idle_kstat_update;
5898906SEric.Saxe@Sun.COM 			cstate->cs_ksp->ks_data_size += MAXNAMELEN;
5908906SEric.Saxe@Sun.COM 			cstate->cs_ksp->ks_private = cstate;
5918906SEric.Saxe@Sun.COM 			kstat_install(cstate->cs_ksp);
5928906SEric.Saxe@Sun.COM 			cstate++;
5938906SEric.Saxe@Sun.COM 		}
5948906SEric.Saxe@Sun.COM 	}
5958906SEric.Saxe@Sun.COM 
5968906SEric.Saxe@Sun.COM 	cpupm_alloc_domains(cp, CPUPM_C_STATES);
5978906SEric.Saxe@Sun.COM 	cpupm_alloc_ms_cstate(cp);
5988906SEric.Saxe@Sun.COM 
5998906SEric.Saxe@Sun.COM 	if (cpu_deep_cstates_supported()) {
6008906SEric.Saxe@Sun.COM 		mutex_enter(&cpu_idle_callb_mutex);
6018906SEric.Saxe@Sun.COM 		if (cpu_deep_idle_callb_id == (callb_id_t)0)
6028906SEric.Saxe@Sun.COM 			cpu_deep_idle_callb_id = callb_add(&cpu_deep_idle_callb,
6038906SEric.Saxe@Sun.COM 			    (void *)NULL, CB_CL_CPU_DEEP_IDLE, "cpu_deep_idle");
6048906SEric.Saxe@Sun.COM 		if (cpu_idle_cpr_callb_id == (callb_id_t)0)
6058906SEric.Saxe@Sun.COM 			cpu_idle_cpr_callb_id = callb_add(&cpu_idle_cpr_callb,
6068906SEric.Saxe@Sun.COM 			    (void *)NULL, CB_CL_CPR_PM, "cpu_idle_cpr");
6078906SEric.Saxe@Sun.COM 		mutex_exit(&cpu_idle_callb_mutex);
6088906SEric.Saxe@Sun.COM 	}
6098906SEric.Saxe@Sun.COM 
6108906SEric.Saxe@Sun.COM 	return (0);
6118906SEric.Saxe@Sun.COM }
6128906SEric.Saxe@Sun.COM 
6138906SEric.Saxe@Sun.COM /*
6148906SEric.Saxe@Sun.COM  * Free resources allocated by cpu_idle_init().
6158906SEric.Saxe@Sun.COM  */
6168906SEric.Saxe@Sun.COM static void
6178906SEric.Saxe@Sun.COM cpu_idle_fini(cpu_t *cp)
6188906SEric.Saxe@Sun.COM {
6198906SEric.Saxe@Sun.COM 	cpupm_mach_state_t *mach_state =
6208906SEric.Saxe@Sun.COM 	    (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state);
6218906SEric.Saxe@Sun.COM 	cpu_acpi_handle_t handle = mach_state->ms_acpi_handle;
6228906SEric.Saxe@Sun.COM 	cpu_acpi_cstate_t *cstate;
6238906SEric.Saxe@Sun.COM 	uint_t	cpu_max_cstates, i;
6248906SEric.Saxe@Sun.COM 
6258906SEric.Saxe@Sun.COM 	/*
6268906SEric.Saxe@Sun.COM 	 * idle cpu points back to the generic one
6278906SEric.Saxe@Sun.COM 	 */
6288906SEric.Saxe@Sun.COM 	idle_cpu = CPU->cpu_m.mcpu_idle_cpu = non_deep_idle_cpu;
6298906SEric.Saxe@Sun.COM 	disp_enq_thread = non_deep_idle_disp_enq_thread;
6308906SEric.Saxe@Sun.COM 
6318906SEric.Saxe@Sun.COM 	cstate = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle);
6328906SEric.Saxe@Sun.COM 	if (cstate) {
6338906SEric.Saxe@Sun.COM 		cpu_max_cstates = cpu_acpi_get_max_cstates(handle);
6348906SEric.Saxe@Sun.COM 
6358906SEric.Saxe@Sun.COM 		for (i = CPU_ACPI_C1; i <= cpu_max_cstates; i++) {
6368906SEric.Saxe@Sun.COM 			if (cstate->cs_ksp != NULL)
6378906SEric.Saxe@Sun.COM 				kstat_delete(cstate->cs_ksp);
6388906SEric.Saxe@Sun.COM 			cstate++;
6398906SEric.Saxe@Sun.COM 		}
6408906SEric.Saxe@Sun.COM 	}
6418906SEric.Saxe@Sun.COM 
6428906SEric.Saxe@Sun.COM 	cpupm_free_ms_cstate(cp);
6438906SEric.Saxe@Sun.COM 	cpupm_free_domains(&cpupm_cstate_domains);
6448906SEric.Saxe@Sun.COM 	cpu_acpi_free_cstate_data(handle);
6458906SEric.Saxe@Sun.COM 
6468906SEric.Saxe@Sun.COM 	mutex_enter(&cpu_idle_callb_mutex);
6478906SEric.Saxe@Sun.COM 	if (cpu_deep_idle_callb_id != (callb_id_t)0) {
6488906SEric.Saxe@Sun.COM 		(void) callb_delete(cpu_deep_idle_callb_id);
6498906SEric.Saxe@Sun.COM 		cpu_deep_idle_callb_id = (callb_id_t)0;
6508906SEric.Saxe@Sun.COM 	}
6518906SEric.Saxe@Sun.COM 	if (cpu_idle_cpr_callb_id != (callb_id_t)0) {
6528906SEric.Saxe@Sun.COM 		(void) callb_delete(cpu_idle_cpr_callb_id);
6538906SEric.Saxe@Sun.COM 		cpu_idle_cpr_callb_id = (callb_id_t)0;
6548906SEric.Saxe@Sun.COM 	}
6558906SEric.Saxe@Sun.COM 	mutex_exit(&cpu_idle_callb_mutex);
6568906SEric.Saxe@Sun.COM }
6578906SEric.Saxe@Sun.COM 
6588906SEric.Saxe@Sun.COM /*ARGSUSED*/
6598906SEric.Saxe@Sun.COM static boolean_t
6608906SEric.Saxe@Sun.COM cpu_deep_idle_callb(void *arg, int code)
6618906SEric.Saxe@Sun.COM {
6628906SEric.Saxe@Sun.COM 	boolean_t rslt = B_TRUE;
6638906SEric.Saxe@Sun.COM 
6648906SEric.Saxe@Sun.COM 	mutex_enter(&cpu_idle_callb_mutex);
6658906SEric.Saxe@Sun.COM 	switch (code) {
6668906SEric.Saxe@Sun.COM 	case PM_DEFAULT_CPU_DEEP_IDLE:
6678906SEric.Saxe@Sun.COM 		/*
6688906SEric.Saxe@Sun.COM 		 * Default policy is same as enable
6698906SEric.Saxe@Sun.COM 		 */
6708906SEric.Saxe@Sun.COM 		/*FALLTHROUGH*/
6718906SEric.Saxe@Sun.COM 	case PM_ENABLE_CPU_DEEP_IDLE:
6728906SEric.Saxe@Sun.COM 		if ((cpu_idle_cfg_state & CPU_IDLE_DEEP_CFG) == 0)
6738906SEric.Saxe@Sun.COM 			break;
6748906SEric.Saxe@Sun.COM 
6758906SEric.Saxe@Sun.COM 		if (hpet.callback(PM_ENABLE_CPU_DEEP_IDLE)) {
6768906SEric.Saxe@Sun.COM 			disp_enq_thread = cstate_wakeup;
6778906SEric.Saxe@Sun.COM 			idle_cpu = cpu_idle_adaptive;
6788906SEric.Saxe@Sun.COM 			cpu_idle_cfg_state &= ~CPU_IDLE_DEEP_CFG;
6798906SEric.Saxe@Sun.COM 		} else {
6808906SEric.Saxe@Sun.COM 			rslt = B_FALSE;
6818906SEric.Saxe@Sun.COM 		}
6828906SEric.Saxe@Sun.COM 		break;
6838906SEric.Saxe@Sun.COM 
6848906SEric.Saxe@Sun.COM 	case PM_DISABLE_CPU_DEEP_IDLE:
6858906SEric.Saxe@Sun.COM 		if (cpu_idle_cfg_state & CPU_IDLE_DEEP_CFG)
6868906SEric.Saxe@Sun.COM 			break;
6878906SEric.Saxe@Sun.COM 
6888906SEric.Saxe@Sun.COM 		idle_cpu = non_deep_idle_cpu;
6898906SEric.Saxe@Sun.COM 		if (hpet.callback(PM_DISABLE_CPU_DEEP_IDLE)) {
6908906SEric.Saxe@Sun.COM 			disp_enq_thread = non_deep_idle_disp_enq_thread;
6918906SEric.Saxe@Sun.COM 			cpu_idle_cfg_state |= CPU_IDLE_DEEP_CFG;
6928906SEric.Saxe@Sun.COM 		}
6938906SEric.Saxe@Sun.COM 		break;
6948906SEric.Saxe@Sun.COM 
6958906SEric.Saxe@Sun.COM 	default:
6968906SEric.Saxe@Sun.COM 		cmn_err(CE_NOTE, "!cpu deep_idle_callb: invalid code %d\n",
6978906SEric.Saxe@Sun.COM 		    code);
6988906SEric.Saxe@Sun.COM 		break;
6998906SEric.Saxe@Sun.COM 	}
7008906SEric.Saxe@Sun.COM 	mutex_exit(&cpu_idle_callb_mutex);
7018906SEric.Saxe@Sun.COM 	return (rslt);
7028906SEric.Saxe@Sun.COM }
7038906SEric.Saxe@Sun.COM 
7048906SEric.Saxe@Sun.COM /*ARGSUSED*/
7058906SEric.Saxe@Sun.COM static boolean_t
7068906SEric.Saxe@Sun.COM cpu_idle_cpr_callb(void *arg, int code)
7078906SEric.Saxe@Sun.COM {
7088906SEric.Saxe@Sun.COM 	boolean_t rslt = B_TRUE;
7098906SEric.Saxe@Sun.COM 
7108906SEric.Saxe@Sun.COM 	mutex_enter(&cpu_idle_callb_mutex);
7118906SEric.Saxe@Sun.COM 	switch (code) {
7128906SEric.Saxe@Sun.COM 	case CB_CODE_CPR_RESUME:
7138906SEric.Saxe@Sun.COM 		if (hpet.callback(CB_CODE_CPR_RESUME)) {
7148906SEric.Saxe@Sun.COM 			/*
7158906SEric.Saxe@Sun.COM 			 * Do not enable dispatcher hooks if disabled by user.
7168906SEric.Saxe@Sun.COM 			 */
7178906SEric.Saxe@Sun.COM 			if (cpu_idle_cfg_state & CPU_IDLE_DEEP_CFG)
7188906SEric.Saxe@Sun.COM 				break;
7198906SEric.Saxe@Sun.COM 
7208906SEric.Saxe@Sun.COM 			disp_enq_thread = cstate_wakeup;
7218906SEric.Saxe@Sun.COM 			idle_cpu = cpu_idle_adaptive;
7228906SEric.Saxe@Sun.COM 		} else {
7238906SEric.Saxe@Sun.COM 			rslt = B_FALSE;
7248906SEric.Saxe@Sun.COM 		}
7258906SEric.Saxe@Sun.COM 		break;
7268906SEric.Saxe@Sun.COM 
7278906SEric.Saxe@Sun.COM 	case CB_CODE_CPR_CHKPT:
7288906SEric.Saxe@Sun.COM 		idle_cpu = non_deep_idle_cpu;
7298906SEric.Saxe@Sun.COM 		disp_enq_thread = non_deep_idle_disp_enq_thread;
7308906SEric.Saxe@Sun.COM 		hpet.callback(CB_CODE_CPR_CHKPT);
7318906SEric.Saxe@Sun.COM 		break;
7328906SEric.Saxe@Sun.COM 
7338906SEric.Saxe@Sun.COM 	default:
7348906SEric.Saxe@Sun.COM 		cmn_err(CE_NOTE, "!cpudvr cpr_callb: invalid code %d\n", code);
7358906SEric.Saxe@Sun.COM 		break;
7368906SEric.Saxe@Sun.COM 	}
7378906SEric.Saxe@Sun.COM 	mutex_exit(&cpu_idle_callb_mutex);
7388906SEric.Saxe@Sun.COM 	return (rslt);
7398906SEric.Saxe@Sun.COM }
7408906SEric.Saxe@Sun.COM 
7418906SEric.Saxe@Sun.COM /*
7428906SEric.Saxe@Sun.COM  * handle _CST notification
7438906SEric.Saxe@Sun.COM  */
7448906SEric.Saxe@Sun.COM void
7458906SEric.Saxe@Sun.COM cpuidle_cstate_instance(cpu_t *cp)
7468906SEric.Saxe@Sun.COM {
7478906SEric.Saxe@Sun.COM #ifndef	__xpv
7488906SEric.Saxe@Sun.COM 	cpupm_mach_state_t	*mach_state =
7498906SEric.Saxe@Sun.COM 	    (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
7508906SEric.Saxe@Sun.COM 	cpu_acpi_handle_t	handle;
7518906SEric.Saxe@Sun.COM 	struct machcpu		*mcpu;
7528906SEric.Saxe@Sun.COM 	cpuset_t 		dom_cpu_set;
7538906SEric.Saxe@Sun.COM 	kmutex_t		*pm_lock;
7548906SEric.Saxe@Sun.COM 	int			result = 0;
7558906SEric.Saxe@Sun.COM 	processorid_t		cpu_id;
7568906SEric.Saxe@Sun.COM 
7578906SEric.Saxe@Sun.COM 	if (mach_state == NULL) {
7588906SEric.Saxe@Sun.COM 		return;
7598906SEric.Saxe@Sun.COM 	}
7608906SEric.Saxe@Sun.COM 
7618906SEric.Saxe@Sun.COM 	ASSERT(mach_state->ms_cstate.cma_domain != NULL);
7628906SEric.Saxe@Sun.COM 	dom_cpu_set = mach_state->ms_cstate.cma_domain->pm_cpus;
7638906SEric.Saxe@Sun.COM 	pm_lock = &mach_state->ms_cstate.cma_domain->pm_lock;
7648906SEric.Saxe@Sun.COM 
7658906SEric.Saxe@Sun.COM 	/*
7668906SEric.Saxe@Sun.COM 	 * Do for all the CPU's in the domain
7678906SEric.Saxe@Sun.COM 	 */
7688906SEric.Saxe@Sun.COM 	mutex_enter(pm_lock);
7698906SEric.Saxe@Sun.COM 	do {
7708906SEric.Saxe@Sun.COM 		CPUSET_FIND(dom_cpu_set, cpu_id);
7718906SEric.Saxe@Sun.COM 		if (cpu_id == CPUSET_NOTINSET)
7728906SEric.Saxe@Sun.COM 			break;
7738906SEric.Saxe@Sun.COM 
7748906SEric.Saxe@Sun.COM 		ASSERT(cpu_id >= 0 && cpu_id < NCPU);
7758906SEric.Saxe@Sun.COM 		cp = cpu[cpu_id];
7768906SEric.Saxe@Sun.COM 		mach_state = (cpupm_mach_state_t *)
7778906SEric.Saxe@Sun.COM 		    cp->cpu_m.mcpu_pm_mach_state;
7788906SEric.Saxe@Sun.COM 		if (!(mach_state->ms_caps & CPUPM_C_STATES)) {
7798906SEric.Saxe@Sun.COM 			mutex_exit(pm_lock);
7808906SEric.Saxe@Sun.COM 			return;
7818906SEric.Saxe@Sun.COM 		}
7828906SEric.Saxe@Sun.COM 		handle = mach_state->ms_acpi_handle;
7838906SEric.Saxe@Sun.COM 		ASSERT(handle != NULL);
7848906SEric.Saxe@Sun.COM 
7858906SEric.Saxe@Sun.COM 		/*
7868906SEric.Saxe@Sun.COM 		 * re-evaluate cstate object
7878906SEric.Saxe@Sun.COM 		 */
7888906SEric.Saxe@Sun.COM 		if (cpu_acpi_cache_cstate_data(handle) != 0) {
7898906SEric.Saxe@Sun.COM 			cmn_err(CE_WARN, "Cannot re-evaluate the cpu c-state"
7908906SEric.Saxe@Sun.COM 			    " object Instance: %d", cpu_id);
7918906SEric.Saxe@Sun.COM 		}
7928906SEric.Saxe@Sun.COM 		mutex_enter(&cpu_lock);
7938906SEric.Saxe@Sun.COM 		mcpu = &(cp->cpu_m);
7948906SEric.Saxe@Sun.COM 		mcpu->max_cstates = cpu_acpi_get_max_cstates(handle);
7958906SEric.Saxe@Sun.COM 		if (mcpu->max_cstates > CPU_ACPI_C1) {
7968906SEric.Saxe@Sun.COM 			hpet.callback(CST_EVENT_MULTIPLE_CSTATES);
7978906SEric.Saxe@Sun.COM 			disp_enq_thread = cstate_wakeup;
7988906SEric.Saxe@Sun.COM 			cp->cpu_m.mcpu_idle_cpu = cpu_acpi_idle;
7998906SEric.Saxe@Sun.COM 		} else if (mcpu->max_cstates == CPU_ACPI_C1) {
8008906SEric.Saxe@Sun.COM 			disp_enq_thread = non_deep_idle_disp_enq_thread;
8018906SEric.Saxe@Sun.COM 			cp->cpu_m.mcpu_idle_cpu = non_deep_idle_cpu;
8028906SEric.Saxe@Sun.COM 			hpet.callback(CST_EVENT_ONE_CSTATE);
8038906SEric.Saxe@Sun.COM 		}
8048906SEric.Saxe@Sun.COM 		mutex_exit(&cpu_lock);
8058906SEric.Saxe@Sun.COM 
8068906SEric.Saxe@Sun.COM 		CPUSET_ATOMIC_XDEL(dom_cpu_set, cpu_id, result);
8078906SEric.Saxe@Sun.COM 		mutex_exit(pm_lock);
8088906SEric.Saxe@Sun.COM 	} while (result < 0);
8098906SEric.Saxe@Sun.COM #endif
8108906SEric.Saxe@Sun.COM }
8118906SEric.Saxe@Sun.COM 
8128906SEric.Saxe@Sun.COM /*
8138906SEric.Saxe@Sun.COM  * handle the number or the type of available processor power states change
8148906SEric.Saxe@Sun.COM  */
8158906SEric.Saxe@Sun.COM void
8168906SEric.Saxe@Sun.COM cpuidle_manage_cstates(void *ctx)
8178906SEric.Saxe@Sun.COM {
8188906SEric.Saxe@Sun.COM 	cpu_t			*cp = ctx;
8198906SEric.Saxe@Sun.COM 	processorid_t		cpu_id = cp->cpu_id;
8208906SEric.Saxe@Sun.COM 	cpupm_mach_state_t	*mach_state =
8218906SEric.Saxe@Sun.COM 	    (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
8228906SEric.Saxe@Sun.COM 	boolean_t		is_ready;
8238906SEric.Saxe@Sun.COM 
8248906SEric.Saxe@Sun.COM 	if (mach_state == NULL) {
8258906SEric.Saxe@Sun.COM 		return;
8268906SEric.Saxe@Sun.COM 	}
8278906SEric.Saxe@Sun.COM 
8288906SEric.Saxe@Sun.COM 	/*
8298906SEric.Saxe@Sun.COM 	 * We currently refuse to power manage if the CPU is not ready to
8308906SEric.Saxe@Sun.COM 	 * take cross calls (cross calls fail silently if CPU is not ready
8318906SEric.Saxe@Sun.COM 	 * for it).
8328906SEric.Saxe@Sun.COM 	 *
8338906SEric.Saxe@Sun.COM 	 * Additionally, for x86 platforms we cannot power manage
8348906SEric.Saxe@Sun.COM 	 * any one instance, until all instances have been initialized.
8358906SEric.Saxe@Sun.COM 	 * That's because we don't know what the CPU domains look like
8368906SEric.Saxe@Sun.COM 	 * until all instances have been initialized.
8378906SEric.Saxe@Sun.COM 	 */
8388906SEric.Saxe@Sun.COM 	is_ready = CPUPM_XCALL_IS_READY(cpu_id) && cpupm_cstate_ready();
8398906SEric.Saxe@Sun.COM 	if (!is_ready)
8408906SEric.Saxe@Sun.COM 		return;
8418906SEric.Saxe@Sun.COM 
8428906SEric.Saxe@Sun.COM 	cpuidle_cstate_instance(cp);
8438906SEric.Saxe@Sun.COM }
844