xref: /onnv-gate/usr/src/uts/i86pc/os/cpupm/cpu_idle.c (revision 8906:e559381f1e2b)
1*8906SEric.Saxe@Sun.COM /*
2*8906SEric.Saxe@Sun.COM  * CDDL HEADER START
3*8906SEric.Saxe@Sun.COM  *
4*8906SEric.Saxe@Sun.COM  * The contents of this file are subject to the terms of the
5*8906SEric.Saxe@Sun.COM  * Common Development and Distribution License (the "License").
6*8906SEric.Saxe@Sun.COM  * You may not use this file except in compliance with the License.
7*8906SEric.Saxe@Sun.COM  *
8*8906SEric.Saxe@Sun.COM  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*8906SEric.Saxe@Sun.COM  * or http://www.opensolaris.org/os/licensing.
10*8906SEric.Saxe@Sun.COM  * See the License for the specific language governing permissions
11*8906SEric.Saxe@Sun.COM  * and limitations under the License.
12*8906SEric.Saxe@Sun.COM  *
13*8906SEric.Saxe@Sun.COM  * When distributing Covered Code, include this CDDL HEADER in each
14*8906SEric.Saxe@Sun.COM  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*8906SEric.Saxe@Sun.COM  * If applicable, add the following below this CDDL HEADER, with the
16*8906SEric.Saxe@Sun.COM  * fields enclosed by brackets "[]" replaced with your own identifying
17*8906SEric.Saxe@Sun.COM  * information: Portions Copyright [yyyy] [name of copyright owner]
18*8906SEric.Saxe@Sun.COM  *
19*8906SEric.Saxe@Sun.COM  * CDDL HEADER END
20*8906SEric.Saxe@Sun.COM  */
21*8906SEric.Saxe@Sun.COM /*
22*8906SEric.Saxe@Sun.COM  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23*8906SEric.Saxe@Sun.COM  * Use is subject to license terms.
24*8906SEric.Saxe@Sun.COM  */
25*8906SEric.Saxe@Sun.COM 
26*8906SEric.Saxe@Sun.COM #include <sys/x86_archext.h>
27*8906SEric.Saxe@Sun.COM #include <sys/machsystm.h>
28*8906SEric.Saxe@Sun.COM #include <sys/x_call.h>
29*8906SEric.Saxe@Sun.COM #include <sys/stat.h>
30*8906SEric.Saxe@Sun.COM #include <sys/acpi/acpi.h>
31*8906SEric.Saxe@Sun.COM #include <sys/acpica.h>
32*8906SEric.Saxe@Sun.COM #include <sys/cpu_acpi.h>
33*8906SEric.Saxe@Sun.COM #include <sys/cpu_idle.h>
34*8906SEric.Saxe@Sun.COM #include <sys/cpupm.h>
35*8906SEric.Saxe@Sun.COM #include <sys/hpet.h>
36*8906SEric.Saxe@Sun.COM #include <sys/archsystm.h>
37*8906SEric.Saxe@Sun.COM #include <vm/hat_i86.h>
38*8906SEric.Saxe@Sun.COM #include <sys/dtrace.h>
39*8906SEric.Saxe@Sun.COM #include <sys/sdt.h>
40*8906SEric.Saxe@Sun.COM #include <sys/callb.h>
41*8906SEric.Saxe@Sun.COM 
42*8906SEric.Saxe@Sun.COM extern void cpu_idle_adaptive(void);
43*8906SEric.Saxe@Sun.COM 
44*8906SEric.Saxe@Sun.COM static int cpu_idle_init(cpu_t *);
45*8906SEric.Saxe@Sun.COM static void cpu_idle_fini(cpu_t *);
46*8906SEric.Saxe@Sun.COM static boolean_t cpu_deep_idle_callb(void *arg, int code);
47*8906SEric.Saxe@Sun.COM static boolean_t cpu_idle_cpr_callb(void *arg, int code);
48*8906SEric.Saxe@Sun.COM static void acpi_cpu_cstate(cpu_acpi_cstate_t *cstate);
49*8906SEric.Saxe@Sun.COM static void cpuidle_set_cstate_latency(cpu_t *cp);
50*8906SEric.Saxe@Sun.COM 
51*8906SEric.Saxe@Sun.COM /*
52*8906SEric.Saxe@Sun.COM  * Interfaces for modules implementing Intel's deep c-state.
53*8906SEric.Saxe@Sun.COM  */
54*8906SEric.Saxe@Sun.COM cpupm_state_ops_t cpu_idle_ops = {
55*8906SEric.Saxe@Sun.COM 	"Generic ACPI C-state Support",
56*8906SEric.Saxe@Sun.COM 	cpu_idle_init,
57*8906SEric.Saxe@Sun.COM 	cpu_idle_fini,
58*8906SEric.Saxe@Sun.COM 	NULL
59*8906SEric.Saxe@Sun.COM };
60*8906SEric.Saxe@Sun.COM 
61*8906SEric.Saxe@Sun.COM static kmutex_t		cpu_idle_callb_mutex;
62*8906SEric.Saxe@Sun.COM static callb_id_t	cpu_deep_idle_callb_id;
63*8906SEric.Saxe@Sun.COM static callb_id_t	cpu_idle_cpr_callb_id;
64*8906SEric.Saxe@Sun.COM static uint_t		cpu_idle_cfg_state;
65*8906SEric.Saxe@Sun.COM 
66*8906SEric.Saxe@Sun.COM static kmutex_t cpu_idle_mutex;
67*8906SEric.Saxe@Sun.COM 
68*8906SEric.Saxe@Sun.COM cpu_idle_kstat_t cpu_idle_kstat = {
69*8906SEric.Saxe@Sun.COM 	{ "address_space_id",	KSTAT_DATA_STRING },
70*8906SEric.Saxe@Sun.COM 	{ "latency",		KSTAT_DATA_UINT32 },
71*8906SEric.Saxe@Sun.COM 	{ "power",		KSTAT_DATA_UINT32 },
72*8906SEric.Saxe@Sun.COM };
73*8906SEric.Saxe@Sun.COM 
74*8906SEric.Saxe@Sun.COM /*
75*8906SEric.Saxe@Sun.COM  * kstat update function of the c-state info
76*8906SEric.Saxe@Sun.COM  */
77*8906SEric.Saxe@Sun.COM static int
78*8906SEric.Saxe@Sun.COM cpu_idle_kstat_update(kstat_t *ksp, int flag)
79*8906SEric.Saxe@Sun.COM {
80*8906SEric.Saxe@Sun.COM 	cpu_acpi_cstate_t *cstate = ksp->ks_private;
81*8906SEric.Saxe@Sun.COM 
82*8906SEric.Saxe@Sun.COM 	if (flag == KSTAT_WRITE) {
83*8906SEric.Saxe@Sun.COM 		return (EACCES);
84*8906SEric.Saxe@Sun.COM 	}
85*8906SEric.Saxe@Sun.COM 
86*8906SEric.Saxe@Sun.COM 	if (cstate->cs_addrspace_id == ACPI_ADR_SPACE_FIXED_HARDWARE) {
87*8906SEric.Saxe@Sun.COM 		kstat_named_setstr(&cpu_idle_kstat.addr_space_id,
88*8906SEric.Saxe@Sun.COM 		"FFixedHW");
89*8906SEric.Saxe@Sun.COM 	} else if (cstate->cs_addrspace_id == ACPI_ADR_SPACE_SYSTEM_IO) {
90*8906SEric.Saxe@Sun.COM 		kstat_named_setstr(&cpu_idle_kstat.addr_space_id,
91*8906SEric.Saxe@Sun.COM 		"SystemIO");
92*8906SEric.Saxe@Sun.COM 	} else {
93*8906SEric.Saxe@Sun.COM 		kstat_named_setstr(&cpu_idle_kstat.addr_space_id,
94*8906SEric.Saxe@Sun.COM 		"Unsupported");
95*8906SEric.Saxe@Sun.COM 	}
96*8906SEric.Saxe@Sun.COM 
97*8906SEric.Saxe@Sun.COM 	cpu_idle_kstat.cs_latency.value.ui32 = cstate->cs_latency;
98*8906SEric.Saxe@Sun.COM 	cpu_idle_kstat.cs_power.value.ui32 = cstate->cs_power;
99*8906SEric.Saxe@Sun.COM 
100*8906SEric.Saxe@Sun.COM 	return (0);
101*8906SEric.Saxe@Sun.COM }
102*8906SEric.Saxe@Sun.COM 
103*8906SEric.Saxe@Sun.COM /*
104*8906SEric.Saxe@Sun.COM  * c-state wakeup function.
105*8906SEric.Saxe@Sun.COM  * Similar to cpu_wakeup and cpu_wakeup_mwait except this function deals
106*8906SEric.Saxe@Sun.COM  * with CPUs asleep in MWAIT, HLT, or ACPI Deep C-State.
107*8906SEric.Saxe@Sun.COM  */
108*8906SEric.Saxe@Sun.COM void
109*8906SEric.Saxe@Sun.COM cstate_wakeup(cpu_t *cp, int bound)
110*8906SEric.Saxe@Sun.COM {
111*8906SEric.Saxe@Sun.COM 	struct machcpu	*mcpu = &(cp->cpu_m);
112*8906SEric.Saxe@Sun.COM 	volatile uint32_t *mcpu_mwait = mcpu->mcpu_mwait;
113*8906SEric.Saxe@Sun.COM 	cpupart_t	*cpu_part;
114*8906SEric.Saxe@Sun.COM 	uint_t		cpu_found;
115*8906SEric.Saxe@Sun.COM 	processorid_t	cpu_sid;
116*8906SEric.Saxe@Sun.COM 
117*8906SEric.Saxe@Sun.COM 	cpu_part = cp->cpu_part;
118*8906SEric.Saxe@Sun.COM 	cpu_sid = cp->cpu_seqid;
119*8906SEric.Saxe@Sun.COM 	/*
120*8906SEric.Saxe@Sun.COM 	 * Clear the halted bit for that CPU since it will be woken up
121*8906SEric.Saxe@Sun.COM 	 * in a moment.
122*8906SEric.Saxe@Sun.COM 	 */
123*8906SEric.Saxe@Sun.COM 	if (bitset_in_set(&cpu_part->cp_haltset, cpu_sid)) {
124*8906SEric.Saxe@Sun.COM 		/*
125*8906SEric.Saxe@Sun.COM 		 * Clear the halted bit for that CPU since it will be
126*8906SEric.Saxe@Sun.COM 		 * poked in a moment.
127*8906SEric.Saxe@Sun.COM 		 */
128*8906SEric.Saxe@Sun.COM 		bitset_atomic_del(&cpu_part->cp_haltset, cpu_sid);
129*8906SEric.Saxe@Sun.COM 
130*8906SEric.Saxe@Sun.COM 		/*
131*8906SEric.Saxe@Sun.COM 		 * We may find the current CPU present in the halted cpuset
132*8906SEric.Saxe@Sun.COM 		 * if we're in the context of an interrupt that occurred
133*8906SEric.Saxe@Sun.COM 		 * before we had a chance to clear our bit in cpu_idle().
134*8906SEric.Saxe@Sun.COM 		 * Waking ourself is obviously unnecessary, since if
135*8906SEric.Saxe@Sun.COM 		 * we're here, we're not halted.
136*8906SEric.Saxe@Sun.COM 		 */
137*8906SEric.Saxe@Sun.COM 		if (cp != CPU) {
138*8906SEric.Saxe@Sun.COM 			/*
139*8906SEric.Saxe@Sun.COM 			 * Use correct wakeup mechanism
140*8906SEric.Saxe@Sun.COM 			 */
141*8906SEric.Saxe@Sun.COM 			if ((mcpu_mwait != NULL) &&
142*8906SEric.Saxe@Sun.COM 			    (*mcpu_mwait == MWAIT_HALTED))
143*8906SEric.Saxe@Sun.COM 				MWAIT_WAKEUP(cp);
144*8906SEric.Saxe@Sun.COM 			else
145*8906SEric.Saxe@Sun.COM 				poke_cpu(cp->cpu_id);
146*8906SEric.Saxe@Sun.COM 		}
147*8906SEric.Saxe@Sun.COM 		return;
148*8906SEric.Saxe@Sun.COM 	} else {
149*8906SEric.Saxe@Sun.COM 		/*
150*8906SEric.Saxe@Sun.COM 		 * This cpu isn't halted, but it's idle or undergoing a
151*8906SEric.Saxe@Sun.COM 		 * context switch. No need to awaken anyone else.
152*8906SEric.Saxe@Sun.COM 		 */
153*8906SEric.Saxe@Sun.COM 		if (cp->cpu_thread == cp->cpu_idle_thread ||
154*8906SEric.Saxe@Sun.COM 		    cp->cpu_disp_flags & CPU_DISP_DONTSTEAL)
155*8906SEric.Saxe@Sun.COM 			return;
156*8906SEric.Saxe@Sun.COM 	}
157*8906SEric.Saxe@Sun.COM 
158*8906SEric.Saxe@Sun.COM 	/*
159*8906SEric.Saxe@Sun.COM 	 * No need to wake up other CPUs if the thread we just enqueued
160*8906SEric.Saxe@Sun.COM 	 * is bound.
161*8906SEric.Saxe@Sun.COM 	 */
162*8906SEric.Saxe@Sun.COM 	if (bound)
163*8906SEric.Saxe@Sun.COM 		return;
164*8906SEric.Saxe@Sun.COM 
165*8906SEric.Saxe@Sun.COM 
166*8906SEric.Saxe@Sun.COM 	/*
167*8906SEric.Saxe@Sun.COM 	 * See if there's any other halted CPUs. If there are, then
168*8906SEric.Saxe@Sun.COM 	 * select one, and awaken it.
169*8906SEric.Saxe@Sun.COM 	 * It's possible that after we find a CPU, somebody else
170*8906SEric.Saxe@Sun.COM 	 * will awaken it before we get the chance.
171*8906SEric.Saxe@Sun.COM 	 * In that case, look again.
172*8906SEric.Saxe@Sun.COM 	 */
173*8906SEric.Saxe@Sun.COM 	do {
174*8906SEric.Saxe@Sun.COM 		cpu_found = bitset_find(&cpu_part->cp_haltset);
175*8906SEric.Saxe@Sun.COM 		if (cpu_found == (uint_t)-1)
176*8906SEric.Saxe@Sun.COM 			return;
177*8906SEric.Saxe@Sun.COM 
178*8906SEric.Saxe@Sun.COM 	} while (bitset_atomic_test_and_del(&cpu_part->cp_haltset,
179*8906SEric.Saxe@Sun.COM 	    cpu_found) < 0);
180*8906SEric.Saxe@Sun.COM 
181*8906SEric.Saxe@Sun.COM 	/*
182*8906SEric.Saxe@Sun.COM 	 * Must use correct wakeup mechanism to avoid lost wakeup of
183*8906SEric.Saxe@Sun.COM 	 * alternate cpu.
184*8906SEric.Saxe@Sun.COM 	 */
185*8906SEric.Saxe@Sun.COM 	if (cpu_found != CPU->cpu_seqid) {
186*8906SEric.Saxe@Sun.COM 		mcpu_mwait = cpu[cpu_found]->cpu_m.mcpu_mwait;
187*8906SEric.Saxe@Sun.COM 		if ((mcpu_mwait != NULL) && (*mcpu_mwait == MWAIT_HALTED))
188*8906SEric.Saxe@Sun.COM 			MWAIT_WAKEUP(cpu_seq[cpu_found]);
189*8906SEric.Saxe@Sun.COM 		else
190*8906SEric.Saxe@Sun.COM 			poke_cpu(cpu_seq[cpu_found]->cpu_id);
191*8906SEric.Saxe@Sun.COM 	}
192*8906SEric.Saxe@Sun.COM }
193*8906SEric.Saxe@Sun.COM 
194*8906SEric.Saxe@Sun.COM /*
195*8906SEric.Saxe@Sun.COM  * enter deep c-state handler
196*8906SEric.Saxe@Sun.COM  */
197*8906SEric.Saxe@Sun.COM static void
198*8906SEric.Saxe@Sun.COM acpi_cpu_cstate(cpu_acpi_cstate_t *cstate)
199*8906SEric.Saxe@Sun.COM {
200*8906SEric.Saxe@Sun.COM 	volatile uint32_t	*mcpu_mwait = CPU->cpu_m.mcpu_mwait;
201*8906SEric.Saxe@Sun.COM 	cpu_t			*cpup = CPU;
202*8906SEric.Saxe@Sun.COM 	processorid_t		cpu_sid = cpup->cpu_seqid;
203*8906SEric.Saxe@Sun.COM 	cpupart_t		*cp = cpup->cpu_part;
204*8906SEric.Saxe@Sun.COM 	hrtime_t		lapic_expire;
205*8906SEric.Saxe@Sun.COM 	uint8_t			type = cstate->cs_addrspace_id;
206*8906SEric.Saxe@Sun.COM 	uint32_t		cs_type = cstate->cs_type;
207*8906SEric.Saxe@Sun.COM 	int			hset_update = 1;
208*8906SEric.Saxe@Sun.COM 	boolean_t		using_hpet_timer;
209*8906SEric.Saxe@Sun.COM 
210*8906SEric.Saxe@Sun.COM 	/*
211*8906SEric.Saxe@Sun.COM 	 * Set our mcpu_mwait here, so we can tell if anyone tries to
212*8906SEric.Saxe@Sun.COM 	 * wake us between now and when we call mwait.  No other cpu will
213*8906SEric.Saxe@Sun.COM 	 * attempt to set our mcpu_mwait until we add ourself to the haltset.
214*8906SEric.Saxe@Sun.COM 	 */
215*8906SEric.Saxe@Sun.COM 	if (mcpu_mwait) {
216*8906SEric.Saxe@Sun.COM 		if (type == ACPI_ADR_SPACE_SYSTEM_IO)
217*8906SEric.Saxe@Sun.COM 			*mcpu_mwait = MWAIT_WAKEUP_IPI;
218*8906SEric.Saxe@Sun.COM 		else
219*8906SEric.Saxe@Sun.COM 			*mcpu_mwait = MWAIT_HALTED;
220*8906SEric.Saxe@Sun.COM 	}
221*8906SEric.Saxe@Sun.COM 
222*8906SEric.Saxe@Sun.COM 	/*
223*8906SEric.Saxe@Sun.COM 	 * If this CPU is online, and there are multiple CPUs
224*8906SEric.Saxe@Sun.COM 	 * in the system, then we should note our halting
225*8906SEric.Saxe@Sun.COM 	 * by adding ourselves to the partition's halted CPU
226*8906SEric.Saxe@Sun.COM 	 * bitmap. This allows other CPUs to find/awaken us when
227*8906SEric.Saxe@Sun.COM 	 * work becomes available.
228*8906SEric.Saxe@Sun.COM 	 */
229*8906SEric.Saxe@Sun.COM 	if (cpup->cpu_flags & CPU_OFFLINE || ncpus == 1)
230*8906SEric.Saxe@Sun.COM 		hset_update = 0;
231*8906SEric.Saxe@Sun.COM 
232*8906SEric.Saxe@Sun.COM 	/*
233*8906SEric.Saxe@Sun.COM 	 * Add ourselves to the partition's halted CPUs bitmask
234*8906SEric.Saxe@Sun.COM 	 * and set our HALTED flag, if necessary.
235*8906SEric.Saxe@Sun.COM 	 *
236*8906SEric.Saxe@Sun.COM 	 * When a thread becomes runnable, it is placed on the queue
237*8906SEric.Saxe@Sun.COM 	 * and then the halted cpuset is checked to determine who
238*8906SEric.Saxe@Sun.COM 	 * (if anyone) should be awakened. We therefore need to first
239*8906SEric.Saxe@Sun.COM 	 * add ourselves to the halted cpuset, and and then check if there
240*8906SEric.Saxe@Sun.COM 	 * is any work available.
241*8906SEric.Saxe@Sun.COM 	 *
242*8906SEric.Saxe@Sun.COM 	 * Note that memory barriers after updating the HALTED flag
243*8906SEric.Saxe@Sun.COM 	 * are not necessary since an atomic operation (updating the bitmap)
244*8906SEric.Saxe@Sun.COM 	 * immediately follows. On x86 the atomic operation acts as a
245*8906SEric.Saxe@Sun.COM 	 * memory barrier for the update of cpu_disp_flags.
246*8906SEric.Saxe@Sun.COM 	 */
247*8906SEric.Saxe@Sun.COM 	if (hset_update) {
248*8906SEric.Saxe@Sun.COM 		cpup->cpu_disp_flags |= CPU_DISP_HALTED;
249*8906SEric.Saxe@Sun.COM 		bitset_atomic_add(&cp->cp_haltset, cpu_sid);
250*8906SEric.Saxe@Sun.COM 	}
251*8906SEric.Saxe@Sun.COM 
252*8906SEric.Saxe@Sun.COM 	/*
253*8906SEric.Saxe@Sun.COM 	 * Check to make sure there's really nothing to do.
254*8906SEric.Saxe@Sun.COM 	 * Work destined for this CPU may become available after
255*8906SEric.Saxe@Sun.COM 	 * this check. We'll be notified through the clearing of our
256*8906SEric.Saxe@Sun.COM 	 * bit in the halted CPU bitmask, and a write to our mcpu_mwait.
257*8906SEric.Saxe@Sun.COM 	 *
258*8906SEric.Saxe@Sun.COM 	 * disp_anywork() checks disp_nrunnable, so we do not have to later.
259*8906SEric.Saxe@Sun.COM 	 */
260*8906SEric.Saxe@Sun.COM 	if (disp_anywork()) {
261*8906SEric.Saxe@Sun.COM 		if (hset_update) {
262*8906SEric.Saxe@Sun.COM 			cpup->cpu_disp_flags &= ~CPU_DISP_HALTED;
263*8906SEric.Saxe@Sun.COM 			bitset_atomic_del(&cp->cp_haltset, cpu_sid);
264*8906SEric.Saxe@Sun.COM 		}
265*8906SEric.Saxe@Sun.COM 		return;
266*8906SEric.Saxe@Sun.COM 	}
267*8906SEric.Saxe@Sun.COM 
268*8906SEric.Saxe@Sun.COM 	/*
269*8906SEric.Saxe@Sun.COM 	 * We're on our way to being halted.
270*8906SEric.Saxe@Sun.COM 	 *
271*8906SEric.Saxe@Sun.COM 	 * The local APIC timer can stop in ACPI C2 and deeper c-states.
272*8906SEric.Saxe@Sun.COM 	 * Program the HPET hardware to substitute for this CPU's lAPIC timer.
273*8906SEric.Saxe@Sun.COM 	 * hpet.use_hpet_timer() disables the LAPIC Timer.  Make sure to
274*8906SEric.Saxe@Sun.COM 	 * start the LAPIC Timer again before leaving this function.
275*8906SEric.Saxe@Sun.COM 	 *
276*8906SEric.Saxe@Sun.COM 	 * hpet.use_hpet_timer disables interrupts, so we will awaken
277*8906SEric.Saxe@Sun.COM 	 * immediately after halting if someone tries to poke us between now
278*8906SEric.Saxe@Sun.COM 	 * and the time we actually halt.
279*8906SEric.Saxe@Sun.COM 	 */
280*8906SEric.Saxe@Sun.COM 	using_hpet_timer = hpet.use_hpet_timer(&lapic_expire);
281*8906SEric.Saxe@Sun.COM 
282*8906SEric.Saxe@Sun.COM 	/*
283*8906SEric.Saxe@Sun.COM 	 * We check for the presence of our bit after disabling interrupts.
284*8906SEric.Saxe@Sun.COM 	 * If it's cleared, we'll return. If the bit is cleared after
285*8906SEric.Saxe@Sun.COM 	 * we check then the cstate_wakeup() will pop us out of the halted
286*8906SEric.Saxe@Sun.COM 	 * state.
287*8906SEric.Saxe@Sun.COM 	 *
288*8906SEric.Saxe@Sun.COM 	 * This means that the ordering of the cstate_wakeup() and the clearing
289*8906SEric.Saxe@Sun.COM 	 * of the bit by cpu_wakeup is important.
290*8906SEric.Saxe@Sun.COM 	 * cpu_wakeup() must clear our mc_haltset bit, and then call
291*8906SEric.Saxe@Sun.COM 	 * cstate_wakeup().
292*8906SEric.Saxe@Sun.COM 	 * acpi_cpu_cstate() must disable interrupts, then check for the bit.
293*8906SEric.Saxe@Sun.COM 	 */
294*8906SEric.Saxe@Sun.COM 	if (hset_update && bitset_in_set(&cp->cp_haltset, cpu_sid) == 0) {
295*8906SEric.Saxe@Sun.COM 		hpet.use_lapic_timer(lapic_expire);
296*8906SEric.Saxe@Sun.COM 		cpup->cpu_disp_flags &= ~CPU_DISP_HALTED;
297*8906SEric.Saxe@Sun.COM 		return;
298*8906SEric.Saxe@Sun.COM 	}
299*8906SEric.Saxe@Sun.COM 
300*8906SEric.Saxe@Sun.COM 	/*
301*8906SEric.Saxe@Sun.COM 	 * The check for anything locally runnable is here for performance
302*8906SEric.Saxe@Sun.COM 	 * and isn't needed for correctness. disp_nrunnable ought to be
303*8906SEric.Saxe@Sun.COM 	 * in our cache still, so it's inexpensive to check, and if there
304*8906SEric.Saxe@Sun.COM 	 * is anything runnable we won't have to wait for the poke.
305*8906SEric.Saxe@Sun.COM 	 */
306*8906SEric.Saxe@Sun.COM 	if (cpup->cpu_disp->disp_nrunnable != 0) {
307*8906SEric.Saxe@Sun.COM 		hpet.use_lapic_timer(lapic_expire);
308*8906SEric.Saxe@Sun.COM 		if (hset_update) {
309*8906SEric.Saxe@Sun.COM 			cpup->cpu_disp_flags &= ~CPU_DISP_HALTED;
310*8906SEric.Saxe@Sun.COM 			bitset_atomic_del(&cp->cp_haltset, cpu_sid);
311*8906SEric.Saxe@Sun.COM 		}
312*8906SEric.Saxe@Sun.COM 		return;
313*8906SEric.Saxe@Sun.COM 	}
314*8906SEric.Saxe@Sun.COM 
315*8906SEric.Saxe@Sun.COM 	if (using_hpet_timer == B_FALSE) {
316*8906SEric.Saxe@Sun.COM 
317*8906SEric.Saxe@Sun.COM 		hpet.use_lapic_timer(lapic_expire);
318*8906SEric.Saxe@Sun.COM 
319*8906SEric.Saxe@Sun.COM 		/*
320*8906SEric.Saxe@Sun.COM 		 * We are currently unable to program the HPET to act as this
321*8906SEric.Saxe@Sun.COM 		 * CPU's proxy lAPIC timer.  This CPU cannot enter C2 or deeper
322*8906SEric.Saxe@Sun.COM 		 * because no timer is set to wake it up while its lAPIC timer
323*8906SEric.Saxe@Sun.COM 		 * stalls in deep C-States.
324*8906SEric.Saxe@Sun.COM 		 * Enter C1 instead.
325*8906SEric.Saxe@Sun.COM 		 *
326*8906SEric.Saxe@Sun.COM 		 * cstate_wake_cpu() will wake this CPU with an IPI which
327*8906SEric.Saxe@Sun.COM 		 * works with MWAIT.
328*8906SEric.Saxe@Sun.COM 		 */
329*8906SEric.Saxe@Sun.COM 		i86_monitor(mcpu_mwait, 0, 0);
330*8906SEric.Saxe@Sun.COM 		if ((*mcpu_mwait & ~MWAIT_WAKEUP_IPI) == MWAIT_HALTED) {
331*8906SEric.Saxe@Sun.COM 			cpu_dtrace_idle_probe(CPU_ACPI_C1);
332*8906SEric.Saxe@Sun.COM 
333*8906SEric.Saxe@Sun.COM 			tlb_going_idle();
334*8906SEric.Saxe@Sun.COM 			i86_mwait(0, 0);
335*8906SEric.Saxe@Sun.COM 			tlb_service();
336*8906SEric.Saxe@Sun.COM 
337*8906SEric.Saxe@Sun.COM 			cpu_dtrace_idle_probe(CPU_ACPI_C0);
338*8906SEric.Saxe@Sun.COM 		}
339*8906SEric.Saxe@Sun.COM 
340*8906SEric.Saxe@Sun.COM 		/*
341*8906SEric.Saxe@Sun.COM 		 * We're no longer halted
342*8906SEric.Saxe@Sun.COM 		 */
343*8906SEric.Saxe@Sun.COM 		if (hset_update) {
344*8906SEric.Saxe@Sun.COM 			cpup->cpu_disp_flags &= ~CPU_DISP_HALTED;
345*8906SEric.Saxe@Sun.COM 			bitset_atomic_del(&cp->cp_haltset, cpu_sid);
346*8906SEric.Saxe@Sun.COM 		}
347*8906SEric.Saxe@Sun.COM 		return;
348*8906SEric.Saxe@Sun.COM 	}
349*8906SEric.Saxe@Sun.COM 
350*8906SEric.Saxe@Sun.COM 	cpu_dtrace_idle_probe((uint_t)cs_type);
351*8906SEric.Saxe@Sun.COM 
352*8906SEric.Saxe@Sun.COM 	if (type == ACPI_ADR_SPACE_FIXED_HARDWARE) {
353*8906SEric.Saxe@Sun.COM 		/*
354*8906SEric.Saxe@Sun.COM 		 * We're on our way to being halted.
355*8906SEric.Saxe@Sun.COM 		 * To avoid a lost wakeup, arm the monitor before checking
356*8906SEric.Saxe@Sun.COM 		 * if another cpu wrote to mcpu_mwait to wake us up.
357*8906SEric.Saxe@Sun.COM 		 */
358*8906SEric.Saxe@Sun.COM 		i86_monitor(mcpu_mwait, 0, 0);
359*8906SEric.Saxe@Sun.COM 		if (*mcpu_mwait == MWAIT_HALTED) {
360*8906SEric.Saxe@Sun.COM 			uint32_t eax = cstate->cs_address;
361*8906SEric.Saxe@Sun.COM 			uint32_t ecx = 1;
362*8906SEric.Saxe@Sun.COM 
363*8906SEric.Saxe@Sun.COM 			tlb_going_idle();
364*8906SEric.Saxe@Sun.COM 			i86_mwait(eax, ecx);
365*8906SEric.Saxe@Sun.COM 			tlb_service();
366*8906SEric.Saxe@Sun.COM 		}
367*8906SEric.Saxe@Sun.COM 	} else if (type == ACPI_ADR_SPACE_SYSTEM_IO) {
368*8906SEric.Saxe@Sun.COM 		uint32_t value;
369*8906SEric.Saxe@Sun.COM 		ACPI_TABLE_FADT *gbl_FADT;
370*8906SEric.Saxe@Sun.COM 
371*8906SEric.Saxe@Sun.COM 		if (*mcpu_mwait == MWAIT_WAKEUP_IPI) {
372*8906SEric.Saxe@Sun.COM 			tlb_going_idle();
373*8906SEric.Saxe@Sun.COM 			(void) cpu_acpi_read_port(cstate->cs_address,
374*8906SEric.Saxe@Sun.COM 			    &value, 8);
375*8906SEric.Saxe@Sun.COM 			acpica_get_global_FADT(&gbl_FADT);
376*8906SEric.Saxe@Sun.COM 			(void) cpu_acpi_read_port(
377*8906SEric.Saxe@Sun.COM 			    gbl_FADT->XPmTimerBlock.Address, &value, 32);
378*8906SEric.Saxe@Sun.COM 			tlb_service();
379*8906SEric.Saxe@Sun.COM 		}
380*8906SEric.Saxe@Sun.COM 	} else {
381*8906SEric.Saxe@Sun.COM 		cmn_err(CE_WARN, "!_CST: cs_type %lx bad asid type %lx\n",
382*8906SEric.Saxe@Sun.COM 		    (long)cs_type, (long)type);
383*8906SEric.Saxe@Sun.COM 	}
384*8906SEric.Saxe@Sun.COM 
385*8906SEric.Saxe@Sun.COM 	/*
386*8906SEric.Saxe@Sun.COM 	 * The lAPIC timer may have stopped in deep c-state.
387*8906SEric.Saxe@Sun.COM 	 * Reprogram this CPU's lAPIC here before enabling interrupts.
388*8906SEric.Saxe@Sun.COM 	 */
389*8906SEric.Saxe@Sun.COM 	hpet.use_lapic_timer(lapic_expire);
390*8906SEric.Saxe@Sun.COM 
391*8906SEric.Saxe@Sun.COM 	cpu_dtrace_idle_probe(CPU_ACPI_C0);
392*8906SEric.Saxe@Sun.COM 
393*8906SEric.Saxe@Sun.COM 	/*
394*8906SEric.Saxe@Sun.COM 	 * We're no longer halted
395*8906SEric.Saxe@Sun.COM 	 */
396*8906SEric.Saxe@Sun.COM 	if (hset_update) {
397*8906SEric.Saxe@Sun.COM 		cpup->cpu_disp_flags &= ~CPU_DISP_HALTED;
398*8906SEric.Saxe@Sun.COM 		bitset_atomic_del(&cp->cp_haltset, cpu_sid);
399*8906SEric.Saxe@Sun.COM 	}
400*8906SEric.Saxe@Sun.COM }
401*8906SEric.Saxe@Sun.COM 
402*8906SEric.Saxe@Sun.COM /*
403*8906SEric.Saxe@Sun.COM  * indicate when bus masters are active
404*8906SEric.Saxe@Sun.COM  */
405*8906SEric.Saxe@Sun.COM static uint32_t
406*8906SEric.Saxe@Sun.COM cpu_acpi_bm_sts(void)
407*8906SEric.Saxe@Sun.COM {
408*8906SEric.Saxe@Sun.COM 	uint32_t bm_sts = 0;
409*8906SEric.Saxe@Sun.COM 
410*8906SEric.Saxe@Sun.COM 	cpu_acpi_get_register(ACPI_BITREG_BUS_MASTER_STATUS, &bm_sts);
411*8906SEric.Saxe@Sun.COM 
412*8906SEric.Saxe@Sun.COM 	if (bm_sts)
413*8906SEric.Saxe@Sun.COM 		cpu_acpi_set_register(ACPI_BITREG_BUS_MASTER_STATUS, 1);
414*8906SEric.Saxe@Sun.COM 
415*8906SEric.Saxe@Sun.COM 	return (bm_sts);
416*8906SEric.Saxe@Sun.COM }
417*8906SEric.Saxe@Sun.COM 
418*8906SEric.Saxe@Sun.COM /*
419*8906SEric.Saxe@Sun.COM  * Idle the present CPU, deep c-state is supported
420*8906SEric.Saxe@Sun.COM  */
421*8906SEric.Saxe@Sun.COM void
422*8906SEric.Saxe@Sun.COM cpu_acpi_idle(void)
423*8906SEric.Saxe@Sun.COM {
424*8906SEric.Saxe@Sun.COM 	cpu_t *cp = CPU;
425*8906SEric.Saxe@Sun.COM 	uint16_t cs_type;
426*8906SEric.Saxe@Sun.COM 	cpu_acpi_handle_t handle;
427*8906SEric.Saxe@Sun.COM 	cma_c_state_t *cs_data;
428*8906SEric.Saxe@Sun.COM 	cpu_acpi_cstate_t *cstate;
429*8906SEric.Saxe@Sun.COM 	hrtime_t start, end;
430*8906SEric.Saxe@Sun.COM 	int cpu_max_cstates;
431*8906SEric.Saxe@Sun.COM 
432*8906SEric.Saxe@Sun.COM 	cpupm_mach_state_t *mach_state =
433*8906SEric.Saxe@Sun.COM 	    (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
434*8906SEric.Saxe@Sun.COM 	handle = mach_state->ms_acpi_handle;
435*8906SEric.Saxe@Sun.COM 	ASSERT(CPU_ACPI_CSTATES(handle) != NULL);
436*8906SEric.Saxe@Sun.COM 
437*8906SEric.Saxe@Sun.COM 	cs_data = mach_state->ms_cstate.cma_state.cstate;
438*8906SEric.Saxe@Sun.COM 	cstate = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle);
439*8906SEric.Saxe@Sun.COM 	ASSERT(cstate != NULL);
440*8906SEric.Saxe@Sun.COM 	cpu_max_cstates = cpu_acpi_get_max_cstates(handle);
441*8906SEric.Saxe@Sun.COM 	if (cpu_max_cstates > CPU_MAX_CSTATES)
442*8906SEric.Saxe@Sun.COM 		cpu_max_cstates = CPU_MAX_CSTATES;
443*8906SEric.Saxe@Sun.COM 
444*8906SEric.Saxe@Sun.COM 	start = gethrtime_unscaled();
445*8906SEric.Saxe@Sun.COM 
446*8906SEric.Saxe@Sun.COM 	cs_type = cpupm_next_cstate(cs_data, start);
447*8906SEric.Saxe@Sun.COM 
448*8906SEric.Saxe@Sun.COM 	/*
449*8906SEric.Saxe@Sun.COM 	 * OSPM uses the BM_STS bit to determine the power state to enter
450*8906SEric.Saxe@Sun.COM 	 * when considering a transition to or from the C2/C3 power state.
451*8906SEric.Saxe@Sun.COM 	 * if C3 is determined, bus master activity demotes the power state
452*8906SEric.Saxe@Sun.COM 	 * to C2.
453*8906SEric.Saxe@Sun.COM 	 */
454*8906SEric.Saxe@Sun.COM 	if ((cs_type >= CPU_ACPI_C3) && cpu_acpi_bm_sts())
455*8906SEric.Saxe@Sun.COM 		cs_type = CPU_ACPI_C2;
456*8906SEric.Saxe@Sun.COM 
457*8906SEric.Saxe@Sun.COM 	/*
458*8906SEric.Saxe@Sun.COM 	 * BM_RLD determines if the Cx power state was exited as a result of
459*8906SEric.Saxe@Sun.COM 	 * bus master requests. Set this bit when using a C3 power state, and
460*8906SEric.Saxe@Sun.COM 	 * clear it when using a C1 or C2 power state.
461*8906SEric.Saxe@Sun.COM 	 */
462*8906SEric.Saxe@Sun.COM 	if ((CPU_ACPI_BM_INFO(handle) & BM_RLD) && (cs_type < CPU_ACPI_C3)) {
463*8906SEric.Saxe@Sun.COM 		cpu_acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0);
464*8906SEric.Saxe@Sun.COM 		CPU_ACPI_BM_INFO(handle) &= ~BM_RLD;
465*8906SEric.Saxe@Sun.COM 	}
466*8906SEric.Saxe@Sun.COM 
467*8906SEric.Saxe@Sun.COM 	if ((!(CPU_ACPI_BM_INFO(handle) & BM_RLD)) &&
468*8906SEric.Saxe@Sun.COM 	    (cs_type >= CPU_ACPI_C3)) {
469*8906SEric.Saxe@Sun.COM 		cpu_acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1);
470*8906SEric.Saxe@Sun.COM 		CPU_ACPI_BM_INFO(handle) |= BM_RLD;
471*8906SEric.Saxe@Sun.COM 	}
472*8906SEric.Saxe@Sun.COM 
473*8906SEric.Saxe@Sun.COM 	cstate += cs_type - 1;
474*8906SEric.Saxe@Sun.COM 
475*8906SEric.Saxe@Sun.COM 	switch (cs_type) {
476*8906SEric.Saxe@Sun.COM 	default:
477*8906SEric.Saxe@Sun.COM 		/* FALLTHROUGH */
478*8906SEric.Saxe@Sun.COM 	case CPU_ACPI_C1:
479*8906SEric.Saxe@Sun.COM 		(*non_deep_idle_cpu)();
480*8906SEric.Saxe@Sun.COM 		break;
481*8906SEric.Saxe@Sun.COM 
482*8906SEric.Saxe@Sun.COM 	case CPU_ACPI_C2:
483*8906SEric.Saxe@Sun.COM 		acpi_cpu_cstate(cstate);
484*8906SEric.Saxe@Sun.COM 		break;
485*8906SEric.Saxe@Sun.COM 
486*8906SEric.Saxe@Sun.COM 	case CPU_ACPI_C3:
487*8906SEric.Saxe@Sun.COM 		/*
488*8906SEric.Saxe@Sun.COM 		 * recommended in ACPI spec, providing hardware mechanisms
489*8906SEric.Saxe@Sun.COM 		 * to prevent master from writing to memory (UP-only)
490*8906SEric.Saxe@Sun.COM 		 */
491*8906SEric.Saxe@Sun.COM 		if ((ncpus_online == 1) &&
492*8906SEric.Saxe@Sun.COM 		    (CPU_ACPI_BM_INFO(handle) & BM_CTL)) {
493*8906SEric.Saxe@Sun.COM 			cpu_acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1);
494*8906SEric.Saxe@Sun.COM 			CPU_ACPI_BM_INFO(handle) |= BM_ARB_DIS;
495*8906SEric.Saxe@Sun.COM 		/*
496*8906SEric.Saxe@Sun.COM 		 * Today all Intel's processor support C3 share cache.
497*8906SEric.Saxe@Sun.COM 		 */
498*8906SEric.Saxe@Sun.COM 		} else if (x86_vendor != X86_VENDOR_Intel) {
499*8906SEric.Saxe@Sun.COM 			__acpi_wbinvd();
500*8906SEric.Saxe@Sun.COM 		}
501*8906SEric.Saxe@Sun.COM 		acpi_cpu_cstate(cstate);
502*8906SEric.Saxe@Sun.COM 		if (CPU_ACPI_BM_INFO(handle) & BM_ARB_DIS) {
503*8906SEric.Saxe@Sun.COM 			cpu_acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0);
504*8906SEric.Saxe@Sun.COM 			CPU_ACPI_BM_INFO(handle) &= ~BM_ARB_DIS;
505*8906SEric.Saxe@Sun.COM 		}
506*8906SEric.Saxe@Sun.COM 		break;
507*8906SEric.Saxe@Sun.COM 	}
508*8906SEric.Saxe@Sun.COM 
509*8906SEric.Saxe@Sun.COM 	end = gethrtime_unscaled();
510*8906SEric.Saxe@Sun.COM 
511*8906SEric.Saxe@Sun.COM 	/*
512*8906SEric.Saxe@Sun.COM 	 * Update statistics
513*8906SEric.Saxe@Sun.COM 	 */
514*8906SEric.Saxe@Sun.COM 	cpupm_wakeup_cstate_data(cs_data, end);
515*8906SEric.Saxe@Sun.COM }
516*8906SEric.Saxe@Sun.COM 
517*8906SEric.Saxe@Sun.COM boolean_t
518*8906SEric.Saxe@Sun.COM cpu_deep_cstates_supported(void)
519*8906SEric.Saxe@Sun.COM {
520*8906SEric.Saxe@Sun.COM 	extern int	idle_cpu_no_deep_c;
521*8906SEric.Saxe@Sun.COM 
522*8906SEric.Saxe@Sun.COM 	if (idle_cpu_no_deep_c)
523*8906SEric.Saxe@Sun.COM 		return (B_FALSE);
524*8906SEric.Saxe@Sun.COM 
525*8906SEric.Saxe@Sun.COM 	if (!cpuid_deep_cstates_supported())
526*8906SEric.Saxe@Sun.COM 		return (B_FALSE);
527*8906SEric.Saxe@Sun.COM 
528*8906SEric.Saxe@Sun.COM 	if ((hpet.supported != HPET_FULL_SUPPORT) || !hpet.install_proxy())
529*8906SEric.Saxe@Sun.COM 		return (B_FALSE);
530*8906SEric.Saxe@Sun.COM 
531*8906SEric.Saxe@Sun.COM 	return (B_TRUE);
532*8906SEric.Saxe@Sun.COM }
533*8906SEric.Saxe@Sun.COM 
534*8906SEric.Saxe@Sun.COM /*
535*8906SEric.Saxe@Sun.COM  * Validate that this processor supports deep cstate and if so,
536*8906SEric.Saxe@Sun.COM  * get the c-state data from ACPI and cache it.
537*8906SEric.Saxe@Sun.COM  */
538*8906SEric.Saxe@Sun.COM static int
539*8906SEric.Saxe@Sun.COM cpu_idle_init(cpu_t *cp)
540*8906SEric.Saxe@Sun.COM {
541*8906SEric.Saxe@Sun.COM 	cpupm_mach_state_t *mach_state =
542*8906SEric.Saxe@Sun.COM 	    (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
543*8906SEric.Saxe@Sun.COM 	cpu_acpi_handle_t handle = mach_state->ms_acpi_handle;
544*8906SEric.Saxe@Sun.COM 	cpu_acpi_cstate_t *cstate;
545*8906SEric.Saxe@Sun.COM 	char name[KSTAT_STRLEN];
546*8906SEric.Saxe@Sun.COM 	int cpu_max_cstates, i;
547*8906SEric.Saxe@Sun.COM 	ACPI_TABLE_FADT *gbl_FADT;
548*8906SEric.Saxe@Sun.COM 
549*8906SEric.Saxe@Sun.COM 	/*
550*8906SEric.Saxe@Sun.COM 	 * Cache the C-state specific ACPI data.
551*8906SEric.Saxe@Sun.COM 	 */
552*8906SEric.Saxe@Sun.COM 	if (cpu_acpi_cache_cstate_data(handle) != 0) {
553*8906SEric.Saxe@Sun.COM 		cmn_err(CE_NOTE,
554*8906SEric.Saxe@Sun.COM 		    "!cpu_idle_init: Failed to cache ACPI C-state data\n");
555*8906SEric.Saxe@Sun.COM 		cpu_idle_fini(cp);
556*8906SEric.Saxe@Sun.COM 		return (-1);
557*8906SEric.Saxe@Sun.COM 	}
558*8906SEric.Saxe@Sun.COM 
559*8906SEric.Saxe@Sun.COM 	/*
560*8906SEric.Saxe@Sun.COM 	 * Check the bus master arbitration control ability.
561*8906SEric.Saxe@Sun.COM 	 */
562*8906SEric.Saxe@Sun.COM 	acpica_get_global_FADT(&gbl_FADT);
563*8906SEric.Saxe@Sun.COM 	if (gbl_FADT->Pm2ControlBlock && gbl_FADT->Pm2ControlLength)
564*8906SEric.Saxe@Sun.COM 		CPU_ACPI_BM_INFO(handle) |= BM_CTL;
565*8906SEric.Saxe@Sun.COM 
566*8906SEric.Saxe@Sun.COM 	cstate = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle);
567*8906SEric.Saxe@Sun.COM 
568*8906SEric.Saxe@Sun.COM 	cpu_max_cstates = cpu_acpi_get_max_cstates(handle);
569*8906SEric.Saxe@Sun.COM 
570*8906SEric.Saxe@Sun.COM 	for (i = CPU_ACPI_C1; i <= cpu_max_cstates; i++) {
571*8906SEric.Saxe@Sun.COM 		(void) snprintf(name, KSTAT_STRLEN - 1, "c%d", cstate->cs_type);
572*8906SEric.Saxe@Sun.COM 		/*
573*8906SEric.Saxe@Sun.COM 		 * Allocate, initialize and install cstate kstat
574*8906SEric.Saxe@Sun.COM 		 */
575*8906SEric.Saxe@Sun.COM 		cstate->cs_ksp = kstat_create("cstate", CPU->cpu_id,
576*8906SEric.Saxe@Sun.COM 		    name, "misc",
577*8906SEric.Saxe@Sun.COM 		    KSTAT_TYPE_NAMED,
578*8906SEric.Saxe@Sun.COM 		    sizeof (cpu_idle_kstat) / sizeof (kstat_named_t),
579*8906SEric.Saxe@Sun.COM 		    KSTAT_FLAG_VIRTUAL);
580*8906SEric.Saxe@Sun.COM 
581*8906SEric.Saxe@Sun.COM 		if (cstate->cs_ksp == NULL) {
582*8906SEric.Saxe@Sun.COM 			cmn_err(CE_NOTE, "kstat_create(c_state) fail");
583*8906SEric.Saxe@Sun.COM 		} else {
584*8906SEric.Saxe@Sun.COM 			cstate->cs_ksp->ks_data = &cpu_idle_kstat;
585*8906SEric.Saxe@Sun.COM 			cstate->cs_ksp->ks_lock = &cpu_idle_mutex;
586*8906SEric.Saxe@Sun.COM 			cstate->cs_ksp->ks_update = cpu_idle_kstat_update;
587*8906SEric.Saxe@Sun.COM 			cstate->cs_ksp->ks_data_size += MAXNAMELEN;
588*8906SEric.Saxe@Sun.COM 			cstate->cs_ksp->ks_private = cstate;
589*8906SEric.Saxe@Sun.COM 			kstat_install(cstate->cs_ksp);
590*8906SEric.Saxe@Sun.COM 			cstate++;
591*8906SEric.Saxe@Sun.COM 		}
592*8906SEric.Saxe@Sun.COM 	}
593*8906SEric.Saxe@Sun.COM 
594*8906SEric.Saxe@Sun.COM 	cpupm_alloc_domains(cp, CPUPM_C_STATES);
595*8906SEric.Saxe@Sun.COM 	cpupm_alloc_ms_cstate(cp);
596*8906SEric.Saxe@Sun.COM 	cpuidle_set_cstate_latency(cp);
597*8906SEric.Saxe@Sun.COM 
598*8906SEric.Saxe@Sun.COM 	if (cpu_deep_cstates_supported()) {
599*8906SEric.Saxe@Sun.COM 		mutex_enter(&cpu_idle_callb_mutex);
600*8906SEric.Saxe@Sun.COM 		if (cpu_deep_idle_callb_id == (callb_id_t)0)
601*8906SEric.Saxe@Sun.COM 			cpu_deep_idle_callb_id = callb_add(&cpu_deep_idle_callb,
602*8906SEric.Saxe@Sun.COM 			    (void *)NULL, CB_CL_CPU_DEEP_IDLE, "cpu_deep_idle");
603*8906SEric.Saxe@Sun.COM 		if (cpu_idle_cpr_callb_id == (callb_id_t)0)
604*8906SEric.Saxe@Sun.COM 			cpu_idle_cpr_callb_id = callb_add(&cpu_idle_cpr_callb,
605*8906SEric.Saxe@Sun.COM 			    (void *)NULL, CB_CL_CPR_PM, "cpu_idle_cpr");
606*8906SEric.Saxe@Sun.COM 		mutex_exit(&cpu_idle_callb_mutex);
607*8906SEric.Saxe@Sun.COM 	}
608*8906SEric.Saxe@Sun.COM 
609*8906SEric.Saxe@Sun.COM 	return (0);
610*8906SEric.Saxe@Sun.COM }
611*8906SEric.Saxe@Sun.COM 
612*8906SEric.Saxe@Sun.COM /*
613*8906SEric.Saxe@Sun.COM  * Free resources allocated by cpu_idle_init().
614*8906SEric.Saxe@Sun.COM  */
615*8906SEric.Saxe@Sun.COM static void
616*8906SEric.Saxe@Sun.COM cpu_idle_fini(cpu_t *cp)
617*8906SEric.Saxe@Sun.COM {
618*8906SEric.Saxe@Sun.COM 	cpupm_mach_state_t *mach_state =
619*8906SEric.Saxe@Sun.COM 	    (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state);
620*8906SEric.Saxe@Sun.COM 	cpu_acpi_handle_t handle = mach_state->ms_acpi_handle;
621*8906SEric.Saxe@Sun.COM 	cpu_acpi_cstate_t *cstate;
622*8906SEric.Saxe@Sun.COM 	uint_t	cpu_max_cstates, i;
623*8906SEric.Saxe@Sun.COM 
624*8906SEric.Saxe@Sun.COM 	/*
625*8906SEric.Saxe@Sun.COM 	 * idle cpu points back to the generic one
626*8906SEric.Saxe@Sun.COM 	 */
627*8906SEric.Saxe@Sun.COM 	idle_cpu = CPU->cpu_m.mcpu_idle_cpu = non_deep_idle_cpu;
628*8906SEric.Saxe@Sun.COM 	disp_enq_thread = non_deep_idle_disp_enq_thread;
629*8906SEric.Saxe@Sun.COM 
630*8906SEric.Saxe@Sun.COM 	cstate = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle);
631*8906SEric.Saxe@Sun.COM 	if (cstate) {
632*8906SEric.Saxe@Sun.COM 		cpu_max_cstates = cpu_acpi_get_max_cstates(handle);
633*8906SEric.Saxe@Sun.COM 
634*8906SEric.Saxe@Sun.COM 		for (i = CPU_ACPI_C1; i <= cpu_max_cstates; i++) {
635*8906SEric.Saxe@Sun.COM 			if (cstate->cs_ksp != NULL)
636*8906SEric.Saxe@Sun.COM 				kstat_delete(cstate->cs_ksp);
637*8906SEric.Saxe@Sun.COM 			cstate++;
638*8906SEric.Saxe@Sun.COM 		}
639*8906SEric.Saxe@Sun.COM 	}
640*8906SEric.Saxe@Sun.COM 
641*8906SEric.Saxe@Sun.COM 	cpupm_free_ms_cstate(cp);
642*8906SEric.Saxe@Sun.COM 	cpupm_free_domains(&cpupm_cstate_domains);
643*8906SEric.Saxe@Sun.COM 	cpu_acpi_free_cstate_data(handle);
644*8906SEric.Saxe@Sun.COM 
645*8906SEric.Saxe@Sun.COM 	mutex_enter(&cpu_idle_callb_mutex);
646*8906SEric.Saxe@Sun.COM 	if (cpu_deep_idle_callb_id != (callb_id_t)0) {
647*8906SEric.Saxe@Sun.COM 		(void) callb_delete(cpu_deep_idle_callb_id);
648*8906SEric.Saxe@Sun.COM 		cpu_deep_idle_callb_id = (callb_id_t)0;
649*8906SEric.Saxe@Sun.COM 	}
650*8906SEric.Saxe@Sun.COM 	if (cpu_idle_cpr_callb_id != (callb_id_t)0) {
651*8906SEric.Saxe@Sun.COM 		(void) callb_delete(cpu_idle_cpr_callb_id);
652*8906SEric.Saxe@Sun.COM 		cpu_idle_cpr_callb_id = (callb_id_t)0;
653*8906SEric.Saxe@Sun.COM 	}
654*8906SEric.Saxe@Sun.COM 	mutex_exit(&cpu_idle_callb_mutex);
655*8906SEric.Saxe@Sun.COM }
656*8906SEric.Saxe@Sun.COM 
657*8906SEric.Saxe@Sun.COM /*ARGSUSED*/
658*8906SEric.Saxe@Sun.COM static boolean_t
659*8906SEric.Saxe@Sun.COM cpu_deep_idle_callb(void *arg, int code)
660*8906SEric.Saxe@Sun.COM {
661*8906SEric.Saxe@Sun.COM 	boolean_t rslt = B_TRUE;
662*8906SEric.Saxe@Sun.COM 
663*8906SEric.Saxe@Sun.COM 	mutex_enter(&cpu_idle_callb_mutex);
664*8906SEric.Saxe@Sun.COM 	switch (code) {
665*8906SEric.Saxe@Sun.COM 	case PM_DEFAULT_CPU_DEEP_IDLE:
666*8906SEric.Saxe@Sun.COM 		/*
667*8906SEric.Saxe@Sun.COM 		 * Default policy is same as enable
668*8906SEric.Saxe@Sun.COM 		 */
669*8906SEric.Saxe@Sun.COM 		/*FALLTHROUGH*/
670*8906SEric.Saxe@Sun.COM 	case PM_ENABLE_CPU_DEEP_IDLE:
671*8906SEric.Saxe@Sun.COM 		if ((cpu_idle_cfg_state & CPU_IDLE_DEEP_CFG) == 0)
672*8906SEric.Saxe@Sun.COM 			break;
673*8906SEric.Saxe@Sun.COM 
674*8906SEric.Saxe@Sun.COM 		if (hpet.callback(PM_ENABLE_CPU_DEEP_IDLE)) {
675*8906SEric.Saxe@Sun.COM 			disp_enq_thread = cstate_wakeup;
676*8906SEric.Saxe@Sun.COM 			idle_cpu = cpu_idle_adaptive;
677*8906SEric.Saxe@Sun.COM 			cpu_idle_cfg_state &= ~CPU_IDLE_DEEP_CFG;
678*8906SEric.Saxe@Sun.COM 		} else {
679*8906SEric.Saxe@Sun.COM 			rslt = B_FALSE;
680*8906SEric.Saxe@Sun.COM 		}
681*8906SEric.Saxe@Sun.COM 		break;
682*8906SEric.Saxe@Sun.COM 
683*8906SEric.Saxe@Sun.COM 	case PM_DISABLE_CPU_DEEP_IDLE:
684*8906SEric.Saxe@Sun.COM 		if (cpu_idle_cfg_state & CPU_IDLE_DEEP_CFG)
685*8906SEric.Saxe@Sun.COM 			break;
686*8906SEric.Saxe@Sun.COM 
687*8906SEric.Saxe@Sun.COM 		idle_cpu = non_deep_idle_cpu;
688*8906SEric.Saxe@Sun.COM 		if (hpet.callback(PM_DISABLE_CPU_DEEP_IDLE)) {
689*8906SEric.Saxe@Sun.COM 			disp_enq_thread = non_deep_idle_disp_enq_thread;
690*8906SEric.Saxe@Sun.COM 			cpu_idle_cfg_state |= CPU_IDLE_DEEP_CFG;
691*8906SEric.Saxe@Sun.COM 		}
692*8906SEric.Saxe@Sun.COM 		break;
693*8906SEric.Saxe@Sun.COM 
694*8906SEric.Saxe@Sun.COM 	default:
695*8906SEric.Saxe@Sun.COM 		cmn_err(CE_NOTE, "!cpu deep_idle_callb: invalid code %d\n",
696*8906SEric.Saxe@Sun.COM 		    code);
697*8906SEric.Saxe@Sun.COM 		break;
698*8906SEric.Saxe@Sun.COM 	}
699*8906SEric.Saxe@Sun.COM 	mutex_exit(&cpu_idle_callb_mutex);
700*8906SEric.Saxe@Sun.COM 	return (rslt);
701*8906SEric.Saxe@Sun.COM }
702*8906SEric.Saxe@Sun.COM 
703*8906SEric.Saxe@Sun.COM /*ARGSUSED*/
704*8906SEric.Saxe@Sun.COM static boolean_t
705*8906SEric.Saxe@Sun.COM cpu_idle_cpr_callb(void *arg, int code)
706*8906SEric.Saxe@Sun.COM {
707*8906SEric.Saxe@Sun.COM 	boolean_t rslt = B_TRUE;
708*8906SEric.Saxe@Sun.COM 
709*8906SEric.Saxe@Sun.COM 	mutex_enter(&cpu_idle_callb_mutex);
710*8906SEric.Saxe@Sun.COM 	switch (code) {
711*8906SEric.Saxe@Sun.COM 	case CB_CODE_CPR_RESUME:
712*8906SEric.Saxe@Sun.COM 		if (hpet.callback(CB_CODE_CPR_RESUME)) {
713*8906SEric.Saxe@Sun.COM 			/*
714*8906SEric.Saxe@Sun.COM 			 * Do not enable dispatcher hooks if disabled by user.
715*8906SEric.Saxe@Sun.COM 			 */
716*8906SEric.Saxe@Sun.COM 			if (cpu_idle_cfg_state & CPU_IDLE_DEEP_CFG)
717*8906SEric.Saxe@Sun.COM 				break;
718*8906SEric.Saxe@Sun.COM 
719*8906SEric.Saxe@Sun.COM 			disp_enq_thread = cstate_wakeup;
720*8906SEric.Saxe@Sun.COM 			idle_cpu = cpu_idle_adaptive;
721*8906SEric.Saxe@Sun.COM 		} else {
722*8906SEric.Saxe@Sun.COM 			rslt = B_FALSE;
723*8906SEric.Saxe@Sun.COM 		}
724*8906SEric.Saxe@Sun.COM 		break;
725*8906SEric.Saxe@Sun.COM 
726*8906SEric.Saxe@Sun.COM 	case CB_CODE_CPR_CHKPT:
727*8906SEric.Saxe@Sun.COM 		idle_cpu = non_deep_idle_cpu;
728*8906SEric.Saxe@Sun.COM 		disp_enq_thread = non_deep_idle_disp_enq_thread;
729*8906SEric.Saxe@Sun.COM 		hpet.callback(CB_CODE_CPR_CHKPT);
730*8906SEric.Saxe@Sun.COM 		break;
731*8906SEric.Saxe@Sun.COM 
732*8906SEric.Saxe@Sun.COM 	default:
733*8906SEric.Saxe@Sun.COM 		cmn_err(CE_NOTE, "!cpudvr cpr_callb: invalid code %d\n", code);
734*8906SEric.Saxe@Sun.COM 		break;
735*8906SEric.Saxe@Sun.COM 	}
736*8906SEric.Saxe@Sun.COM 	mutex_exit(&cpu_idle_callb_mutex);
737*8906SEric.Saxe@Sun.COM 	return (rslt);
738*8906SEric.Saxe@Sun.COM }
739*8906SEric.Saxe@Sun.COM 
740*8906SEric.Saxe@Sun.COM /*
741*8906SEric.Saxe@Sun.COM  * handle _CST notification
742*8906SEric.Saxe@Sun.COM  */
743*8906SEric.Saxe@Sun.COM void
744*8906SEric.Saxe@Sun.COM cpuidle_cstate_instance(cpu_t *cp)
745*8906SEric.Saxe@Sun.COM {
746*8906SEric.Saxe@Sun.COM #ifndef	__xpv
747*8906SEric.Saxe@Sun.COM 	cpupm_mach_state_t	*mach_state =
748*8906SEric.Saxe@Sun.COM 	    (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
749*8906SEric.Saxe@Sun.COM 	cpu_acpi_handle_t	handle;
750*8906SEric.Saxe@Sun.COM 	struct machcpu		*mcpu;
751*8906SEric.Saxe@Sun.COM 	cpuset_t 		dom_cpu_set;
752*8906SEric.Saxe@Sun.COM 	kmutex_t		*pm_lock;
753*8906SEric.Saxe@Sun.COM 	int			result = 0;
754*8906SEric.Saxe@Sun.COM 	processorid_t		cpu_id;
755*8906SEric.Saxe@Sun.COM 
756*8906SEric.Saxe@Sun.COM 	if (mach_state == NULL) {
757*8906SEric.Saxe@Sun.COM 		return;
758*8906SEric.Saxe@Sun.COM 	}
759*8906SEric.Saxe@Sun.COM 
760*8906SEric.Saxe@Sun.COM 	ASSERT(mach_state->ms_cstate.cma_domain != NULL);
761*8906SEric.Saxe@Sun.COM 	dom_cpu_set = mach_state->ms_cstate.cma_domain->pm_cpus;
762*8906SEric.Saxe@Sun.COM 	pm_lock = &mach_state->ms_cstate.cma_domain->pm_lock;
763*8906SEric.Saxe@Sun.COM 
764*8906SEric.Saxe@Sun.COM 	/*
765*8906SEric.Saxe@Sun.COM 	 * Do for all the CPU's in the domain
766*8906SEric.Saxe@Sun.COM 	 */
767*8906SEric.Saxe@Sun.COM 	mutex_enter(pm_lock);
768*8906SEric.Saxe@Sun.COM 	do {
769*8906SEric.Saxe@Sun.COM 		CPUSET_FIND(dom_cpu_set, cpu_id);
770*8906SEric.Saxe@Sun.COM 		if (cpu_id == CPUSET_NOTINSET)
771*8906SEric.Saxe@Sun.COM 			break;
772*8906SEric.Saxe@Sun.COM 
773*8906SEric.Saxe@Sun.COM 		ASSERT(cpu_id >= 0 && cpu_id < NCPU);
774*8906SEric.Saxe@Sun.COM 		cp = cpu[cpu_id];
775*8906SEric.Saxe@Sun.COM 		mach_state = (cpupm_mach_state_t *)
776*8906SEric.Saxe@Sun.COM 		    cp->cpu_m.mcpu_pm_mach_state;
777*8906SEric.Saxe@Sun.COM 		if (!(mach_state->ms_caps & CPUPM_C_STATES)) {
778*8906SEric.Saxe@Sun.COM 			mutex_exit(pm_lock);
779*8906SEric.Saxe@Sun.COM 			return;
780*8906SEric.Saxe@Sun.COM 		}
781*8906SEric.Saxe@Sun.COM 		handle = mach_state->ms_acpi_handle;
782*8906SEric.Saxe@Sun.COM 		ASSERT(handle != NULL);
783*8906SEric.Saxe@Sun.COM 
784*8906SEric.Saxe@Sun.COM 		/*
785*8906SEric.Saxe@Sun.COM 		 * re-evaluate cstate object
786*8906SEric.Saxe@Sun.COM 		 */
787*8906SEric.Saxe@Sun.COM 		if (cpu_acpi_cache_cstate_data(handle) != 0) {
788*8906SEric.Saxe@Sun.COM 			cmn_err(CE_WARN, "Cannot re-evaluate the cpu c-state"
789*8906SEric.Saxe@Sun.COM 			    " object Instance: %d", cpu_id);
790*8906SEric.Saxe@Sun.COM 		}
791*8906SEric.Saxe@Sun.COM 		mutex_enter(&cpu_lock);
792*8906SEric.Saxe@Sun.COM 		mcpu = &(cp->cpu_m);
793*8906SEric.Saxe@Sun.COM 		mcpu->max_cstates = cpu_acpi_get_max_cstates(handle);
794*8906SEric.Saxe@Sun.COM 		if (mcpu->max_cstates > CPU_ACPI_C1) {
795*8906SEric.Saxe@Sun.COM 			hpet.callback(CST_EVENT_MULTIPLE_CSTATES);
796*8906SEric.Saxe@Sun.COM 			disp_enq_thread = cstate_wakeup;
797*8906SEric.Saxe@Sun.COM 			cp->cpu_m.mcpu_idle_cpu = cpu_acpi_idle;
798*8906SEric.Saxe@Sun.COM 			cpuidle_set_cstate_latency(cp);
799*8906SEric.Saxe@Sun.COM 		} else if (mcpu->max_cstates == CPU_ACPI_C1) {
800*8906SEric.Saxe@Sun.COM 			disp_enq_thread = non_deep_idle_disp_enq_thread;
801*8906SEric.Saxe@Sun.COM 			cp->cpu_m.mcpu_idle_cpu = non_deep_idle_cpu;
802*8906SEric.Saxe@Sun.COM 			hpet.callback(CST_EVENT_ONE_CSTATE);
803*8906SEric.Saxe@Sun.COM 		}
804*8906SEric.Saxe@Sun.COM 		mutex_exit(&cpu_lock);
805*8906SEric.Saxe@Sun.COM 
806*8906SEric.Saxe@Sun.COM 		CPUSET_ATOMIC_XDEL(dom_cpu_set, cpu_id, result);
807*8906SEric.Saxe@Sun.COM 		mutex_exit(pm_lock);
808*8906SEric.Saxe@Sun.COM 	} while (result < 0);
809*8906SEric.Saxe@Sun.COM #endif
810*8906SEric.Saxe@Sun.COM }
811*8906SEric.Saxe@Sun.COM 
812*8906SEric.Saxe@Sun.COM /*
813*8906SEric.Saxe@Sun.COM  * handle the number or the type of available processor power states change
814*8906SEric.Saxe@Sun.COM  */
815*8906SEric.Saxe@Sun.COM void
816*8906SEric.Saxe@Sun.COM cpuidle_manage_cstates(void *ctx)
817*8906SEric.Saxe@Sun.COM {
818*8906SEric.Saxe@Sun.COM 	cpu_t			*cp = ctx;
819*8906SEric.Saxe@Sun.COM 	processorid_t		cpu_id = cp->cpu_id;
820*8906SEric.Saxe@Sun.COM 	cpupm_mach_state_t	*mach_state =
821*8906SEric.Saxe@Sun.COM 	    (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
822*8906SEric.Saxe@Sun.COM 	boolean_t		is_ready;
823*8906SEric.Saxe@Sun.COM 
824*8906SEric.Saxe@Sun.COM 	if (mach_state == NULL) {
825*8906SEric.Saxe@Sun.COM 		return;
826*8906SEric.Saxe@Sun.COM 	}
827*8906SEric.Saxe@Sun.COM 
828*8906SEric.Saxe@Sun.COM 	/*
829*8906SEric.Saxe@Sun.COM 	 * We currently refuse to power manage if the CPU is not ready to
830*8906SEric.Saxe@Sun.COM 	 * take cross calls (cross calls fail silently if CPU is not ready
831*8906SEric.Saxe@Sun.COM 	 * for it).
832*8906SEric.Saxe@Sun.COM 	 *
833*8906SEric.Saxe@Sun.COM 	 * Additionally, for x86 platforms we cannot power manage
834*8906SEric.Saxe@Sun.COM 	 * any one instance, until all instances have been initialized.
835*8906SEric.Saxe@Sun.COM 	 * That's because we don't know what the CPU domains look like
836*8906SEric.Saxe@Sun.COM 	 * until all instances have been initialized.
837*8906SEric.Saxe@Sun.COM 	 */
838*8906SEric.Saxe@Sun.COM 	is_ready = CPUPM_XCALL_IS_READY(cpu_id) && cpupm_cstate_ready();
839*8906SEric.Saxe@Sun.COM 	if (!is_ready)
840*8906SEric.Saxe@Sun.COM 		return;
841*8906SEric.Saxe@Sun.COM 
842*8906SEric.Saxe@Sun.COM 	cpuidle_cstate_instance(cp);
843*8906SEric.Saxe@Sun.COM }
844*8906SEric.Saxe@Sun.COM 
845*8906SEric.Saxe@Sun.COM static void
846*8906SEric.Saxe@Sun.COM cpuidle_set_cstate_latency(cpu_t *cp)
847*8906SEric.Saxe@Sun.COM {
848*8906SEric.Saxe@Sun.COM 	cpupm_mach_state_t	*mach_state =
849*8906SEric.Saxe@Sun.COM 	    (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
850*8906SEric.Saxe@Sun.COM 	cpu_acpi_handle_t	handle;
851*8906SEric.Saxe@Sun.COM 	cpu_acpi_cstate_t	*acpi_cstates;
852*8906SEric.Saxe@Sun.COM 	cma_c_state_t		*cpupm_cdata;
853*8906SEric.Saxe@Sun.COM 	uint32_t		i, cnt;
854*8906SEric.Saxe@Sun.COM 
855*8906SEric.Saxe@Sun.COM 	cpupm_cdata = mach_state->ms_cstate.cma_state.cstate;
856*8906SEric.Saxe@Sun.COM 
857*8906SEric.Saxe@Sun.COM 	ASSERT(cpupm_cdata != 0);
858*8906SEric.Saxe@Sun.COM 	ASSERT(mach_state != NULL);
859*8906SEric.Saxe@Sun.COM 	handle = mach_state->ms_acpi_handle;
860*8906SEric.Saxe@Sun.COM 	ASSERT(handle != NULL);
861*8906SEric.Saxe@Sun.COM 
862*8906SEric.Saxe@Sun.COM 	cnt = CPU_ACPI_CSTATES_COUNT(handle);
863*8906SEric.Saxe@Sun.COM 	acpi_cstates = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle);
864*8906SEric.Saxe@Sun.COM 
865*8906SEric.Saxe@Sun.COM 	cpupm_cdata->cs_C2_latency = CPU_CSTATE_LATENCY_UNDEF;
866*8906SEric.Saxe@Sun.COM 	cpupm_cdata->cs_C3_latency = CPU_CSTATE_LATENCY_UNDEF;
867*8906SEric.Saxe@Sun.COM 
868*8906SEric.Saxe@Sun.COM 	for (i = 1; i <= cnt; ++i, ++acpi_cstates) {
869*8906SEric.Saxe@Sun.COM 		if ((cpupm_cdata->cs_C2_latency == CPU_CSTATE_LATENCY_UNDEF) &&
870*8906SEric.Saxe@Sun.COM 		    (acpi_cstates->cs_type == CPU_ACPI_C2))
871*8906SEric.Saxe@Sun.COM 			cpupm_cdata->cs_C2_latency =  acpi_cstates->cs_latency;
872*8906SEric.Saxe@Sun.COM 
873*8906SEric.Saxe@Sun.COM 		if ((cpupm_cdata->cs_C3_latency == CPU_CSTATE_LATENCY_UNDEF) &&
874*8906SEric.Saxe@Sun.COM 		    (acpi_cstates->cs_type == CPU_ACPI_C3))
875*8906SEric.Saxe@Sun.COM 			cpupm_cdata->cs_C3_latency =  acpi_cstates->cs_latency;
876*8906SEric.Saxe@Sun.COM 	}
877*8906SEric.Saxe@Sun.COM }
878