xref: /onnv-gate/usr/src/uts/common/os/cpu_event.c (revision 9637:60f2a2d63713)
1*9637SRandy.Fishel@Sun.COM /*
2*9637SRandy.Fishel@Sun.COM  * CDDL HEADER START
3*9637SRandy.Fishel@Sun.COM  *
4*9637SRandy.Fishel@Sun.COM  * The contents of this file are subject to the terms of the
5*9637SRandy.Fishel@Sun.COM  * Common Development and Distribution License (the "License").
6*9637SRandy.Fishel@Sun.COM  * You may not use this file except in compliance with the License.
7*9637SRandy.Fishel@Sun.COM  *
8*9637SRandy.Fishel@Sun.COM  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*9637SRandy.Fishel@Sun.COM  * or http://www.opensolaris.org/os/licensing.
10*9637SRandy.Fishel@Sun.COM  * See the License for the specific language governing permissions
11*9637SRandy.Fishel@Sun.COM  * and limitations under the License.
12*9637SRandy.Fishel@Sun.COM  *
13*9637SRandy.Fishel@Sun.COM  * When distributing Covered Code, include this CDDL HEADER in each
14*9637SRandy.Fishel@Sun.COM  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*9637SRandy.Fishel@Sun.COM  * If applicable, add the following below this CDDL HEADER, with the
16*9637SRandy.Fishel@Sun.COM  * fields enclosed by brackets "[]" replaced with your own identifying
17*9637SRandy.Fishel@Sun.COM  * information: Portions Copyright [yyyy] [name of copyright owner]
18*9637SRandy.Fishel@Sun.COM  *
19*9637SRandy.Fishel@Sun.COM  * CDDL HEADER END
20*9637SRandy.Fishel@Sun.COM  */
21*9637SRandy.Fishel@Sun.COM /*
22*9637SRandy.Fishel@Sun.COM  * Copyright (c) 2009, Intel Corporation.
23*9637SRandy.Fishel@Sun.COM  * All rights reserved.
24*9637SRandy.Fishel@Sun.COM  */
25*9637SRandy.Fishel@Sun.COM 
26*9637SRandy.Fishel@Sun.COM /*
27*9637SRandy.Fishel@Sun.COM  * Introduction
28*9637SRandy.Fishel@Sun.COM  * This file implements a CPU event notification mechanism to signal clients
29*9637SRandy.Fishel@Sun.COM  * which are interested in CPU related events.
30*9637SRandy.Fishel@Sun.COM  * Currently it only supports CPU idle state change events which will be
31*9637SRandy.Fishel@Sun.COM  * triggered just before CPU entering hardware idle state and just after CPU
32*9637SRandy.Fishel@Sun.COM  * wakes up from hardware idle state.
33*9637SRandy.Fishel@Sun.COM  * Please refer to PSARC/2009/115 for detail information.
34*9637SRandy.Fishel@Sun.COM  *
35*9637SRandy.Fishel@Sun.COM  * Lock Strategy
36*9637SRandy.Fishel@Sun.COM  * 1) cpu_idle_prop_busy/free are protected by cpu_idle_prop_lock.
37*9637SRandy.Fishel@Sun.COM  * 2) No protection for cpu_idle_cb_state because it's per-CPU data.
38*9637SRandy.Fishel@Sun.COM  * 3) cpu_idle_cb_busy is protected by cpu_idle_cb_lock.
39*9637SRandy.Fishel@Sun.COM  * 4) cpu_idle_cb_array is protected by pause_cpus/start_cpus logic.
40*9637SRandy.Fishel@Sun.COM  * 5) cpu_idle_cb_max/curr are protected by both cpu_idle_cb_lock and
41*9637SRandy.Fishel@Sun.COM  *    pause_cpus/start_cpus logic.
42*9637SRandy.Fishel@Sun.COM  * We have optimized the algorithm for hot path on read side access.
43*9637SRandy.Fishel@Sun.COM  * In the current algorithm, it's lock free on read side access.
44*9637SRandy.Fishel@Sun.COM  * On write side, we use pause_cpus() to keep other CPUs in the pause thread,
45*9637SRandy.Fishel@Sun.COM  * which will guarantee that no other threads will access
46*9637SRandy.Fishel@Sun.COM  * cpu_idle_cb_max/curr/array data structure.
47*9637SRandy.Fishel@Sun.COM  */
48*9637SRandy.Fishel@Sun.COM 
49*9637SRandy.Fishel@Sun.COM #include <sys/types.h>
50*9637SRandy.Fishel@Sun.COM #include <sys/cmn_err.h>
51*9637SRandy.Fishel@Sun.COM #include <sys/cpuvar.h>
52*9637SRandy.Fishel@Sun.COM #include <sys/cpu.h>
53*9637SRandy.Fishel@Sun.COM #include <sys/kmem.h>
54*9637SRandy.Fishel@Sun.COM #include <sys/machcpuvar.h>
55*9637SRandy.Fishel@Sun.COM #include <sys/sdt.h>
56*9637SRandy.Fishel@Sun.COM #include <sys/sysmacros.h>
57*9637SRandy.Fishel@Sun.COM #include <sys/synch.h>
58*9637SRandy.Fishel@Sun.COM #include <sys/systm.h>
59*9637SRandy.Fishel@Sun.COM #include <sys/sunddi.h>
60*9637SRandy.Fishel@Sun.COM #if defined(__sparc)
61*9637SRandy.Fishel@Sun.COM #include <sys/machsystm.h>
62*9637SRandy.Fishel@Sun.COM #elif defined(__x86)
63*9637SRandy.Fishel@Sun.COM #include <sys/archsystm.h>
64*9637SRandy.Fishel@Sun.COM #endif
65*9637SRandy.Fishel@Sun.COM #include <sys/cpu_event.h>
66*9637SRandy.Fishel@Sun.COM 
67*9637SRandy.Fishel@Sun.COM /* Define normal state for CPU on different platforms. */
68*9637SRandy.Fishel@Sun.COM #if defined(__x86)
69*9637SRandy.Fishel@Sun.COM #define	CPU_IDLE_STATE_NORMAL		IDLE_STATE_C0
70*9637SRandy.Fishel@Sun.COM #elif defined(__sparc)
71*9637SRandy.Fishel@Sun.COM /*
72*9637SRandy.Fishel@Sun.COM  * At the time of this implementation IDLE_STATE_NORMAL is defined
73*9637SRandy.Fishel@Sun.COM  * in mach_startup.c, and not in a header file.  So if we find it is
74*9637SRandy.Fishel@Sun.COM  * undefined, then we set it to the value as defined in mach_startup.c
75*9637SRandy.Fishel@Sun.COM  * Should it eventually be defined, we will pick it up.
76*9637SRandy.Fishel@Sun.COM  */
77*9637SRandy.Fishel@Sun.COM #ifndef	IDLE_STATE_NORMAL
78*9637SRandy.Fishel@Sun.COM #define	IDLE_STATE_NORMAL	0
79*9637SRandy.Fishel@Sun.COM #endif
80*9637SRandy.Fishel@Sun.COM #define	CPU_IDLE_STATE_NORMAL	IDLE_STATE_NORMAL
81*9637SRandy.Fishel@Sun.COM #endif
82*9637SRandy.Fishel@Sun.COM 
83*9637SRandy.Fishel@Sun.COM /*
84*9637SRandy.Fishel@Sun.COM  * To improve cache efficiency and avoid cache false sharing, CPU idle
85*9637SRandy.Fishel@Sun.COM  * properties are grouped into cache lines as below:
86*9637SRandy.Fishel@Sun.COM  * |     CPU0      |     CPU1      |.........|     CPUn      |
87*9637SRandy.Fishel@Sun.COM  * | cache line 0  | cache line 1  |.........| cache line n  |
88*9637SRandy.Fishel@Sun.COM  * | v0 | ... | vm | v0 | ... | vm |.........| v0 | ... | vm |
89*9637SRandy.Fishel@Sun.COM  * To access value of property m for CPU n, using following value as index:
90*9637SRandy.Fishel@Sun.COM  *    index = seq_id_of_CPUn * CPU_IDLE_VALUE_GROUP_SIZE + m.
91*9637SRandy.Fishel@Sun.COM  */
92*9637SRandy.Fishel@Sun.COM #define	CPU_IDLE_VALUE_GROUP_SIZE	\
93*9637SRandy.Fishel@Sun.COM 	(CPU_CACHE_COHERENCE_SIZE / sizeof (cpu_idle_prop_value_t))
94*9637SRandy.Fishel@Sun.COM 
95*9637SRandy.Fishel@Sun.COM /* Get callback context handle for current CPU. */
96*9637SRandy.Fishel@Sun.COM #define	CPU_IDLE_GET_CTX(cp)		\
97*9637SRandy.Fishel@Sun.COM 	((cpu_idle_callback_context_t)(intptr_t)((cp)->cpu_seqid))
98*9637SRandy.Fishel@Sun.COM 
99*9637SRandy.Fishel@Sun.COM /* Get CPU sequential id from ctx. */
100*9637SRandy.Fishel@Sun.COM #define	CPU_IDLE_CTX2CPUID(ctx)		((processorid_t)(intptr_t)(ctx))
101*9637SRandy.Fishel@Sun.COM 
102*9637SRandy.Fishel@Sun.COM /* Compute index from callback context handle. */
103*9637SRandy.Fishel@Sun.COM #define	CPU_IDLE_CTX2IDX(ctx)		\
104*9637SRandy.Fishel@Sun.COM 	(((int)(intptr_t)(ctx)) * CPU_IDLE_VALUE_GROUP_SIZE)
105*9637SRandy.Fishel@Sun.COM 
106*9637SRandy.Fishel@Sun.COM #define	CPU_IDLE_HDL2VALP(hdl, idx)	\
107*9637SRandy.Fishel@Sun.COM 	(&((cpu_idle_prop_impl_t *)(hdl))->value[(idx)])
108*9637SRandy.Fishel@Sun.COM 
109*9637SRandy.Fishel@Sun.COM /*
110*9637SRandy.Fishel@Sun.COM  * When cpu_idle_cb_array is NULL or full, increase CPU_IDLE_ARRAY_CAPACITY_INC
111*9637SRandy.Fishel@Sun.COM  * entries every time. Here we prefer linear growth instead of exponential.
112*9637SRandy.Fishel@Sun.COM  */
113*9637SRandy.Fishel@Sun.COM #define	CPU_IDLE_ARRAY_CAPACITY_INC	0x10
114*9637SRandy.Fishel@Sun.COM 
115*9637SRandy.Fishel@Sun.COM typedef struct cpu_idle_prop_impl {
116*9637SRandy.Fishel@Sun.COM 	cpu_idle_prop_value_t		*value;
117*9637SRandy.Fishel@Sun.COM 	struct cpu_idle_prop_impl	*next;
118*9637SRandy.Fishel@Sun.COM 	char				*name;
119*9637SRandy.Fishel@Sun.COM 	cpu_idle_prop_update_t		update;
120*9637SRandy.Fishel@Sun.COM 	void				*private;
121*9637SRandy.Fishel@Sun.COM 	cpu_idle_prop_type_t		type;
122*9637SRandy.Fishel@Sun.COM 	uint32_t			refcnt;
123*9637SRandy.Fishel@Sun.COM } cpu_idle_prop_impl_t;
124*9637SRandy.Fishel@Sun.COM 
125*9637SRandy.Fishel@Sun.COM typedef struct cpu_idle_prop_item {
126*9637SRandy.Fishel@Sun.COM 	cpu_idle_prop_type_t		type;
127*9637SRandy.Fishel@Sun.COM 	char				*name;
128*9637SRandy.Fishel@Sun.COM 	cpu_idle_prop_update_t		update;
129*9637SRandy.Fishel@Sun.COM 	void				*arg;
130*9637SRandy.Fishel@Sun.COM 	cpu_idle_prop_handle_t		handle;
131*9637SRandy.Fishel@Sun.COM } cpu_idle_prop_item_t;
132*9637SRandy.Fishel@Sun.COM 
133*9637SRandy.Fishel@Sun.COM /* Structure to maintain registered callbacks in list. */
134*9637SRandy.Fishel@Sun.COM typedef struct cpu_idle_cb_impl {
135*9637SRandy.Fishel@Sun.COM 	struct cpu_idle_cb_impl		*next;
136*9637SRandy.Fishel@Sun.COM 	cpu_idle_callback_t		*callback;
137*9637SRandy.Fishel@Sun.COM 	void				*argument;
138*9637SRandy.Fishel@Sun.COM 	int				priority;
139*9637SRandy.Fishel@Sun.COM } cpu_idle_cb_impl_t;
140*9637SRandy.Fishel@Sun.COM 
141*9637SRandy.Fishel@Sun.COM /*
142*9637SRandy.Fishel@Sun.COM  * Structure to maintain registered callbacks in priority order and also
143*9637SRandy.Fishel@Sun.COM  * optimized for cache efficiency for reading access.
144*9637SRandy.Fishel@Sun.COM  */
145*9637SRandy.Fishel@Sun.COM typedef struct cpu_idle_cb_item {
146*9637SRandy.Fishel@Sun.COM 	cpu_idle_enter_cbfn_t		enter;
147*9637SRandy.Fishel@Sun.COM 	cpu_idle_exit_cbfn_t		exit;
148*9637SRandy.Fishel@Sun.COM 	void				*arg;
149*9637SRandy.Fishel@Sun.COM 	cpu_idle_cb_impl_t		*impl;
150*9637SRandy.Fishel@Sun.COM } cpu_idle_cb_item_t;
151*9637SRandy.Fishel@Sun.COM 
152*9637SRandy.Fishel@Sun.COM /* Per-CPU state aligned to CPU_CACHE_COHERENCE_SIZE to avoid false sharing. */
153*9637SRandy.Fishel@Sun.COM typedef union cpu_idle_cb_state {
154*9637SRandy.Fishel@Sun.COM 	struct {
155*9637SRandy.Fishel@Sun.COM 		int			index;
156*9637SRandy.Fishel@Sun.COM 		boolean_t		ready;
157*9637SRandy.Fishel@Sun.COM 		cpu_idle_prop_value_t	*idle_state;
158*9637SRandy.Fishel@Sun.COM 		cpu_idle_prop_value_t	*enter_ts;
159*9637SRandy.Fishel@Sun.COM 		cpu_idle_prop_value_t	*exit_ts;
160*9637SRandy.Fishel@Sun.COM 		cpu_idle_prop_value_t	*last_idle;
161*9637SRandy.Fishel@Sun.COM 		cpu_idle_prop_value_t	*last_busy;
162*9637SRandy.Fishel@Sun.COM 		cpu_idle_prop_value_t	*total_idle;
163*9637SRandy.Fishel@Sun.COM 		cpu_idle_prop_value_t	*total_busy;
164*9637SRandy.Fishel@Sun.COM 		cpu_idle_prop_value_t	*intr_cnt;
165*9637SRandy.Fishel@Sun.COM 	} v;
166*9637SRandy.Fishel@Sun.COM #ifdef _LP64
167*9637SRandy.Fishel@Sun.COM 	char				align[2 * CPU_CACHE_COHERENCE_SIZE];
168*9637SRandy.Fishel@Sun.COM #else
169*9637SRandy.Fishel@Sun.COM 	char				align[CPU_CACHE_COHERENCE_SIZE];
170*9637SRandy.Fishel@Sun.COM #endif
171*9637SRandy.Fishel@Sun.COM } cpu_idle_cb_state_t;
172*9637SRandy.Fishel@Sun.COM 
173*9637SRandy.Fishel@Sun.COM static kmutex_t				cpu_idle_prop_lock;
174*9637SRandy.Fishel@Sun.COM static cpu_idle_prop_impl_t		*cpu_idle_prop_busy = NULL;
175*9637SRandy.Fishel@Sun.COM static cpu_idle_prop_impl_t		*cpu_idle_prop_free = NULL;
176*9637SRandy.Fishel@Sun.COM 
177*9637SRandy.Fishel@Sun.COM static kmutex_t				cpu_idle_cb_lock;
178*9637SRandy.Fishel@Sun.COM static cpu_idle_cb_impl_t		*cpu_idle_cb_busy = NULL;
179*9637SRandy.Fishel@Sun.COM static cpu_idle_cb_item_t		*cpu_idle_cb_array = NULL;
180*9637SRandy.Fishel@Sun.COM static int				cpu_idle_cb_curr = 0;
181*9637SRandy.Fishel@Sun.COM static int				cpu_idle_cb_max = 0;
182*9637SRandy.Fishel@Sun.COM 
183*9637SRandy.Fishel@Sun.COM static cpu_idle_cb_state_t		*cpu_idle_cb_state;
184*9637SRandy.Fishel@Sun.COM 
185*9637SRandy.Fishel@Sun.COM static int cpu_idle_prop_update_intr_cnt(void *arg, uint64_t seqnum,
186*9637SRandy.Fishel@Sun.COM     cpu_idle_prop_value_t *valp);
187*9637SRandy.Fishel@Sun.COM 
188*9637SRandy.Fishel@Sun.COM static cpu_idle_prop_item_t cpu_idle_prop_array[] = {
189*9637SRandy.Fishel@Sun.COM 	{
190*9637SRandy.Fishel@Sun.COM 	    CPU_IDLE_PROP_TYPE_INTPTR, CPU_IDLE_PROP_IDLE_STATE,
191*9637SRandy.Fishel@Sun.COM 	    NULL, NULL, NULL
192*9637SRandy.Fishel@Sun.COM 	},
193*9637SRandy.Fishel@Sun.COM 	{
194*9637SRandy.Fishel@Sun.COM 	    CPU_IDLE_PROP_TYPE_HRTIME, CPU_IDLE_PROP_ENTER_TIMESTAMP,
195*9637SRandy.Fishel@Sun.COM 	    NULL, NULL, NULL
196*9637SRandy.Fishel@Sun.COM 	},
197*9637SRandy.Fishel@Sun.COM 	{
198*9637SRandy.Fishel@Sun.COM 	    CPU_IDLE_PROP_TYPE_HRTIME, CPU_IDLE_PROP_EXIT_TIMESTAMP,
199*9637SRandy.Fishel@Sun.COM 	    NULL, NULL, NULL
200*9637SRandy.Fishel@Sun.COM 	},
201*9637SRandy.Fishel@Sun.COM 	{
202*9637SRandy.Fishel@Sun.COM 	    CPU_IDLE_PROP_TYPE_HRTIME, CPU_IDLE_PROP_LAST_IDLE_TIME,
203*9637SRandy.Fishel@Sun.COM 	    NULL, NULL, NULL
204*9637SRandy.Fishel@Sun.COM 	},
205*9637SRandy.Fishel@Sun.COM 	{
206*9637SRandy.Fishel@Sun.COM 	    CPU_IDLE_PROP_TYPE_HRTIME, CPU_IDLE_PROP_LAST_BUSY_TIME,
207*9637SRandy.Fishel@Sun.COM 	    NULL, NULL, NULL
208*9637SRandy.Fishel@Sun.COM 	},
209*9637SRandy.Fishel@Sun.COM 	{
210*9637SRandy.Fishel@Sun.COM 	    CPU_IDLE_PROP_TYPE_HRTIME, CPU_IDLE_PROP_TOTAL_IDLE_TIME,
211*9637SRandy.Fishel@Sun.COM 	    NULL, NULL, NULL
212*9637SRandy.Fishel@Sun.COM 	},
213*9637SRandy.Fishel@Sun.COM 	{
214*9637SRandy.Fishel@Sun.COM 	    CPU_IDLE_PROP_TYPE_HRTIME, CPU_IDLE_PROP_TOTAL_BUSY_TIME,
215*9637SRandy.Fishel@Sun.COM 	    NULL, NULL, NULL
216*9637SRandy.Fishel@Sun.COM 	},
217*9637SRandy.Fishel@Sun.COM 	{
218*9637SRandy.Fishel@Sun.COM 	    CPU_IDLE_PROP_TYPE_UINT64, CPU_IDLE_PROP_INTERRUPT_COUNT,
219*9637SRandy.Fishel@Sun.COM 	    cpu_idle_prop_update_intr_cnt, NULL, NULL
220*9637SRandy.Fishel@Sun.COM 	},
221*9637SRandy.Fishel@Sun.COM };
222*9637SRandy.Fishel@Sun.COM 
223*9637SRandy.Fishel@Sun.COM #define	CPU_IDLE_PROP_IDX_IDLE_STATE	0
224*9637SRandy.Fishel@Sun.COM #define	CPU_IDLE_PROP_IDX_ENTER_TS	1
225*9637SRandy.Fishel@Sun.COM #define	CPU_IDLE_PROP_IDX_EXIT_TS	2
226*9637SRandy.Fishel@Sun.COM #define	CPU_IDLE_PROP_IDX_LAST_IDLE	3
227*9637SRandy.Fishel@Sun.COM #define	CPU_IDLE_PROP_IDX_LAST_BUSY	4
228*9637SRandy.Fishel@Sun.COM #define	CPU_IDLE_PROP_IDX_TOTAL_IDLE	5
229*9637SRandy.Fishel@Sun.COM #define	CPU_IDLE_PROP_IDX_TOTAL_BUSY	6
230*9637SRandy.Fishel@Sun.COM #define	CPU_IDLE_PROP_IDX_INTR_CNT	7
231*9637SRandy.Fishel@Sun.COM 
232*9637SRandy.Fishel@Sun.COM /*ARGSUSED*/
233*9637SRandy.Fishel@Sun.COM static void
234*9637SRandy.Fishel@Sun.COM cpu_idle_dtrace_enter(void *arg, cpu_idle_callback_context_t ctx,
235*9637SRandy.Fishel@Sun.COM     cpu_idle_check_wakeup_t check_func, void *check_arg)
236*9637SRandy.Fishel@Sun.COM {
237*9637SRandy.Fishel@Sun.COM 	int state;
238*9637SRandy.Fishel@Sun.COM 
239*9637SRandy.Fishel@Sun.COM 	state = cpu_idle_prop_get_intptr(
240*9637SRandy.Fishel@Sun.COM 	    cpu_idle_prop_array[CPU_IDLE_PROP_IDX_IDLE_STATE].handle, ctx);
241*9637SRandy.Fishel@Sun.COM 	DTRACE_PROBE1(idle__state__transition, uint_t, state);
242*9637SRandy.Fishel@Sun.COM }
243*9637SRandy.Fishel@Sun.COM 
244*9637SRandy.Fishel@Sun.COM /*ARGSUSED*/
245*9637SRandy.Fishel@Sun.COM static void
246*9637SRandy.Fishel@Sun.COM cpu_idle_dtrace_exit(void *arg, cpu_idle_callback_context_t ctx, int flag)
247*9637SRandy.Fishel@Sun.COM {
248*9637SRandy.Fishel@Sun.COM 	DTRACE_PROBE1(idle__state__transition, uint_t, CPU_IDLE_STATE_NORMAL);
249*9637SRandy.Fishel@Sun.COM }
250*9637SRandy.Fishel@Sun.COM 
251*9637SRandy.Fishel@Sun.COM static cpu_idle_callback_handle_t cpu_idle_cb_handle_dtrace;
252*9637SRandy.Fishel@Sun.COM static cpu_idle_callback_t cpu_idle_callback_dtrace = {
253*9637SRandy.Fishel@Sun.COM 	CPU_IDLE_CALLBACK_VERS,
254*9637SRandy.Fishel@Sun.COM 	cpu_idle_dtrace_enter,
255*9637SRandy.Fishel@Sun.COM 	cpu_idle_dtrace_exit,
256*9637SRandy.Fishel@Sun.COM };
257*9637SRandy.Fishel@Sun.COM 
258*9637SRandy.Fishel@Sun.COM #if defined(__x86) && !defined(__xpv)
259*9637SRandy.Fishel@Sun.COM extern void tlb_going_idle(void);
260*9637SRandy.Fishel@Sun.COM extern void tlb_service(void);
261*9637SRandy.Fishel@Sun.COM 
262*9637SRandy.Fishel@Sun.COM static cpu_idle_callback_handle_t cpu_idle_cb_handle_tlb;
263*9637SRandy.Fishel@Sun.COM static cpu_idle_callback_t cpu_idle_callback_tlb = {
264*9637SRandy.Fishel@Sun.COM 	CPU_IDLE_CALLBACK_VERS,
265*9637SRandy.Fishel@Sun.COM 	(cpu_idle_enter_cbfn_t)tlb_going_idle,
266*9637SRandy.Fishel@Sun.COM 	(cpu_idle_exit_cbfn_t)tlb_service,
267*9637SRandy.Fishel@Sun.COM };
268*9637SRandy.Fishel@Sun.COM #endif
269*9637SRandy.Fishel@Sun.COM 
270*9637SRandy.Fishel@Sun.COM void
271*9637SRandy.Fishel@Sun.COM cpu_event_init(void)
272*9637SRandy.Fishel@Sun.COM {
273*9637SRandy.Fishel@Sun.COM 	int i, idx;
274*9637SRandy.Fishel@Sun.COM 	size_t sz;
275*9637SRandy.Fishel@Sun.COM 	intptr_t buf;
276*9637SRandy.Fishel@Sun.COM 	cpu_idle_cb_state_t *sp;
277*9637SRandy.Fishel@Sun.COM 	cpu_idle_prop_item_t *ip;
278*9637SRandy.Fishel@Sun.COM 
279*9637SRandy.Fishel@Sun.COM 	mutex_init(&cpu_idle_cb_lock, NULL, MUTEX_DRIVER, NULL);
280*9637SRandy.Fishel@Sun.COM 	mutex_init(&cpu_idle_prop_lock, NULL, MUTEX_DRIVER, NULL);
281*9637SRandy.Fishel@Sun.COM 
282*9637SRandy.Fishel@Sun.COM 	/* Create internal properties. */
283*9637SRandy.Fishel@Sun.COM 	for (i = 0, ip = cpu_idle_prop_array;
284*9637SRandy.Fishel@Sun.COM 	    i < sizeof (cpu_idle_prop_array) / sizeof (cpu_idle_prop_array[0]);
285*9637SRandy.Fishel@Sun.COM 	    i++, ip++) {
286*9637SRandy.Fishel@Sun.COM 		(void) cpu_idle_prop_create_property(ip->name, ip->type,
287*9637SRandy.Fishel@Sun.COM 		    ip->update, ip->arg, &ip->handle);
288*9637SRandy.Fishel@Sun.COM 		ASSERT(ip->handle != NULL);
289*9637SRandy.Fishel@Sun.COM 	}
290*9637SRandy.Fishel@Sun.COM 
291*9637SRandy.Fishel@Sun.COM 	/* Allocate buffer and align to CPU_CACHE_COHERENCE_SIZE. */
292*9637SRandy.Fishel@Sun.COM 	sz = sizeof (cpu_idle_cb_state_t) * max_ncpus;
293*9637SRandy.Fishel@Sun.COM 	sz += CPU_CACHE_COHERENCE_SIZE;
294*9637SRandy.Fishel@Sun.COM 	buf = (intptr_t)kmem_zalloc(sz, KM_SLEEP);
295*9637SRandy.Fishel@Sun.COM 	cpu_idle_cb_state = (cpu_idle_cb_state_t *)P2ROUNDUP(buf,
296*9637SRandy.Fishel@Sun.COM 	    CPU_CACHE_COHERENCE_SIZE);
297*9637SRandy.Fishel@Sun.COM 
298*9637SRandy.Fishel@Sun.COM 	/* Cache frequently used property value pointers. */
299*9637SRandy.Fishel@Sun.COM 	for (sp = cpu_idle_cb_state, i = 0; i < max_ncpus; i++, sp++) {
300*9637SRandy.Fishel@Sun.COM 		idx = CPU_IDLE_CTX2IDX(i);
301*9637SRandy.Fishel@Sun.COM #define	___INIT_P(f, i)	\
302*9637SRandy.Fishel@Sun.COM 	sp->v.f = CPU_IDLE_HDL2VALP(cpu_idle_prop_array[(i)].handle, idx)
303*9637SRandy.Fishel@Sun.COM 		___INIT_P(idle_state, CPU_IDLE_PROP_IDX_IDLE_STATE);
304*9637SRandy.Fishel@Sun.COM 		___INIT_P(enter_ts, CPU_IDLE_PROP_IDX_ENTER_TS);
305*9637SRandy.Fishel@Sun.COM 		___INIT_P(exit_ts, CPU_IDLE_PROP_IDX_EXIT_TS);
306*9637SRandy.Fishel@Sun.COM 		___INIT_P(last_idle, CPU_IDLE_PROP_IDX_LAST_IDLE);
307*9637SRandy.Fishel@Sun.COM 		___INIT_P(last_busy, CPU_IDLE_PROP_IDX_LAST_BUSY);
308*9637SRandy.Fishel@Sun.COM 		___INIT_P(total_idle, CPU_IDLE_PROP_IDX_TOTAL_IDLE);
309*9637SRandy.Fishel@Sun.COM 		___INIT_P(total_busy, CPU_IDLE_PROP_IDX_TOTAL_BUSY);
310*9637SRandy.Fishel@Sun.COM 		___INIT_P(last_idle, CPU_IDLE_PROP_IDX_INTR_CNT);
311*9637SRandy.Fishel@Sun.COM #undef	___INIT_P
312*9637SRandy.Fishel@Sun.COM 	}
313*9637SRandy.Fishel@Sun.COM 
314*9637SRandy.Fishel@Sun.COM 	/* Register built-in callbacks. */
315*9637SRandy.Fishel@Sun.COM 	if (cpu_idle_register_callback(CPU_IDLE_CB_PRIO_DTRACE,
316*9637SRandy.Fishel@Sun.COM 	    &cpu_idle_callback_dtrace, NULL, &cpu_idle_cb_handle_dtrace) != 0) {
317*9637SRandy.Fishel@Sun.COM 		cmn_err(CE_PANIC,
318*9637SRandy.Fishel@Sun.COM 		    "cpu_idle: failed to register callback for dtrace.");
319*9637SRandy.Fishel@Sun.COM 	}
320*9637SRandy.Fishel@Sun.COM #if defined(__x86) && !defined(__xpv)
321*9637SRandy.Fishel@Sun.COM 	if (cpu_idle_register_callback(CPU_IDLE_CB_PRIO_TLB,
322*9637SRandy.Fishel@Sun.COM 	    &cpu_idle_callback_tlb, NULL, &cpu_idle_cb_handle_tlb) != 0) {
323*9637SRandy.Fishel@Sun.COM 		cmn_err(CE_PANIC,
324*9637SRandy.Fishel@Sun.COM 		    "cpu_idle: failed to register callback for tlb_flush.");
325*9637SRandy.Fishel@Sun.COM 	}
326*9637SRandy.Fishel@Sun.COM #endif
327*9637SRandy.Fishel@Sun.COM }
328*9637SRandy.Fishel@Sun.COM 
329*9637SRandy.Fishel@Sun.COM void
330*9637SRandy.Fishel@Sun.COM cpu_event_init_cpu(cpu_t *cp)
331*9637SRandy.Fishel@Sun.COM {
332*9637SRandy.Fishel@Sun.COM 	ASSERT(cp->cpu_seqid < max_ncpus);
333*9637SRandy.Fishel@Sun.COM 	cpu_idle_cb_state[cp->cpu_seqid].v.ready = B_FALSE;
334*9637SRandy.Fishel@Sun.COM }
335*9637SRandy.Fishel@Sun.COM 
336*9637SRandy.Fishel@Sun.COM void
337*9637SRandy.Fishel@Sun.COM cpu_event_fini_cpu(cpu_t *cp)
338*9637SRandy.Fishel@Sun.COM {
339*9637SRandy.Fishel@Sun.COM 	ASSERT(cp->cpu_seqid < max_ncpus);
340*9637SRandy.Fishel@Sun.COM 	cpu_idle_cb_state[cp->cpu_seqid].v.ready = B_FALSE;
341*9637SRandy.Fishel@Sun.COM }
342*9637SRandy.Fishel@Sun.COM 
343*9637SRandy.Fishel@Sun.COM static void
344*9637SRandy.Fishel@Sun.COM cpu_idle_insert_callback(cpu_idle_cb_impl_t *cip)
345*9637SRandy.Fishel@Sun.COM {
346*9637SRandy.Fishel@Sun.COM 	int unlock = 0, unpause = 0;
347*9637SRandy.Fishel@Sun.COM 	int i, cnt_new = 0, cnt_old = 0;
348*9637SRandy.Fishel@Sun.COM 	char *buf_new = NULL, *buf_old = NULL;
349*9637SRandy.Fishel@Sun.COM 
350*9637SRandy.Fishel@Sun.COM 	ASSERT(MUTEX_HELD(&cpu_idle_cb_lock));
351*9637SRandy.Fishel@Sun.COM 
352*9637SRandy.Fishel@Sun.COM 	/*
353*9637SRandy.Fishel@Sun.COM 	 * Expand array if it's full.
354*9637SRandy.Fishel@Sun.COM 	 * Memory must be allocated out of pause/start_cpus() scope because
355*9637SRandy.Fishel@Sun.COM 	 * kmem_zalloc() can't be called with KM_SLEEP flag within that scope.
356*9637SRandy.Fishel@Sun.COM 	 */
357*9637SRandy.Fishel@Sun.COM 	if (cpu_idle_cb_curr == cpu_idle_cb_max) {
358*9637SRandy.Fishel@Sun.COM 		cnt_new = cpu_idle_cb_max + CPU_IDLE_ARRAY_CAPACITY_INC;
359*9637SRandy.Fishel@Sun.COM 		buf_new = (char *)kmem_zalloc(cnt_new *
360*9637SRandy.Fishel@Sun.COM 		    sizeof (cpu_idle_cb_item_t), KM_SLEEP);
361*9637SRandy.Fishel@Sun.COM 	}
362*9637SRandy.Fishel@Sun.COM 
363*9637SRandy.Fishel@Sun.COM 	/* Try to acquire cpu_lock if not held yet. */
364*9637SRandy.Fishel@Sun.COM 	if (!MUTEX_HELD(&cpu_lock)) {
365*9637SRandy.Fishel@Sun.COM 		mutex_enter(&cpu_lock);
366*9637SRandy.Fishel@Sun.COM 		unlock = 1;
367*9637SRandy.Fishel@Sun.COM 	}
368*9637SRandy.Fishel@Sun.COM 	/*
369*9637SRandy.Fishel@Sun.COM 	 * Pause all other CPUs (and let them run pause thread).
370*9637SRandy.Fishel@Sun.COM 	 * It's guaranteed that no other threads will access cpu_idle_cb_array
371*9637SRandy.Fishel@Sun.COM 	 * after pause_cpus().
372*9637SRandy.Fishel@Sun.COM 	 */
373*9637SRandy.Fishel@Sun.COM 	if (!cpus_paused()) {
374*9637SRandy.Fishel@Sun.COM 		pause_cpus(NULL);
375*9637SRandy.Fishel@Sun.COM 		unpause = 1;
376*9637SRandy.Fishel@Sun.COM 	}
377*9637SRandy.Fishel@Sun.COM 
378*9637SRandy.Fishel@Sun.COM 	/* Copy content to new buffer if needed. */
379*9637SRandy.Fishel@Sun.COM 	if (buf_new != NULL) {
380*9637SRandy.Fishel@Sun.COM 		buf_old = (char *)cpu_idle_cb_array;
381*9637SRandy.Fishel@Sun.COM 		cnt_old = cpu_idle_cb_max;
382*9637SRandy.Fishel@Sun.COM 		if (buf_old != NULL) {
383*9637SRandy.Fishel@Sun.COM 			ASSERT(cnt_old != 0);
384*9637SRandy.Fishel@Sun.COM 			bcopy(cpu_idle_cb_array, buf_new,
385*9637SRandy.Fishel@Sun.COM 			    sizeof (cpu_idle_cb_item_t) * cnt_old);
386*9637SRandy.Fishel@Sun.COM 		}
387*9637SRandy.Fishel@Sun.COM 		cpu_idle_cb_array = (cpu_idle_cb_item_t *)buf_new;
388*9637SRandy.Fishel@Sun.COM 		cpu_idle_cb_max = cnt_new;
389*9637SRandy.Fishel@Sun.COM 	}
390*9637SRandy.Fishel@Sun.COM 
391*9637SRandy.Fishel@Sun.COM 	/* Insert into array according to priority. */
392*9637SRandy.Fishel@Sun.COM 	ASSERT(cpu_idle_cb_curr < cpu_idle_cb_max);
393*9637SRandy.Fishel@Sun.COM 	for (i = cpu_idle_cb_curr; i > 0; i--) {
394*9637SRandy.Fishel@Sun.COM 		if (cpu_idle_cb_array[i - 1].impl->priority >= cip->priority) {
395*9637SRandy.Fishel@Sun.COM 			break;
396*9637SRandy.Fishel@Sun.COM 		}
397*9637SRandy.Fishel@Sun.COM 		cpu_idle_cb_array[i] = cpu_idle_cb_array[i - 1];
398*9637SRandy.Fishel@Sun.COM 	}
399*9637SRandy.Fishel@Sun.COM 	cpu_idle_cb_array[i].arg = cip->argument;
400*9637SRandy.Fishel@Sun.COM 	cpu_idle_cb_array[i].enter = cip->callback->idle_enter;
401*9637SRandy.Fishel@Sun.COM 	cpu_idle_cb_array[i].exit = cip->callback->idle_exit;
402*9637SRandy.Fishel@Sun.COM 	cpu_idle_cb_array[i].impl = cip;
403*9637SRandy.Fishel@Sun.COM 	cpu_idle_cb_curr++;
404*9637SRandy.Fishel@Sun.COM 
405*9637SRandy.Fishel@Sun.COM 	/* Resume other CPUs from paused state if needed. */
406*9637SRandy.Fishel@Sun.COM 	if (unpause) {
407*9637SRandy.Fishel@Sun.COM 		start_cpus();
408*9637SRandy.Fishel@Sun.COM 	}
409*9637SRandy.Fishel@Sun.COM 	if (unlock) {
410*9637SRandy.Fishel@Sun.COM 		mutex_exit(&cpu_lock);
411*9637SRandy.Fishel@Sun.COM 	}
412*9637SRandy.Fishel@Sun.COM 
413*9637SRandy.Fishel@Sun.COM 	/* Free old resource if needed. */
414*9637SRandy.Fishel@Sun.COM 	if (buf_old != NULL) {
415*9637SRandy.Fishel@Sun.COM 		ASSERT(cnt_old != 0);
416*9637SRandy.Fishel@Sun.COM 		kmem_free(buf_old, cnt_old * sizeof (cpu_idle_cb_item_t));
417*9637SRandy.Fishel@Sun.COM 	}
418*9637SRandy.Fishel@Sun.COM }
419*9637SRandy.Fishel@Sun.COM 
420*9637SRandy.Fishel@Sun.COM static void
421*9637SRandy.Fishel@Sun.COM cpu_idle_remove_callback(cpu_idle_cb_impl_t *cip)
422*9637SRandy.Fishel@Sun.COM {
423*9637SRandy.Fishel@Sun.COM 	int i, found = 0;
424*9637SRandy.Fishel@Sun.COM 	int unlock = 0, unpause = 0;
425*9637SRandy.Fishel@Sun.COM 	cpu_idle_cb_state_t *sp;
426*9637SRandy.Fishel@Sun.COM 
427*9637SRandy.Fishel@Sun.COM 	ASSERT(MUTEX_HELD(&cpu_idle_cb_lock));
428*9637SRandy.Fishel@Sun.COM 
429*9637SRandy.Fishel@Sun.COM 	/* Try to acquire cpu_lock if not held yet. */
430*9637SRandy.Fishel@Sun.COM 	if (!MUTEX_HELD(&cpu_lock)) {
431*9637SRandy.Fishel@Sun.COM 		mutex_enter(&cpu_lock);
432*9637SRandy.Fishel@Sun.COM 		unlock = 1;
433*9637SRandy.Fishel@Sun.COM 	}
434*9637SRandy.Fishel@Sun.COM 	/*
435*9637SRandy.Fishel@Sun.COM 	 * Pause all other CPUs.
436*9637SRandy.Fishel@Sun.COM 	 * It's guaranteed that no other threads will access cpu_idle_cb_array
437*9637SRandy.Fishel@Sun.COM 	 * after pause_cpus().
438*9637SRandy.Fishel@Sun.COM 	 */
439*9637SRandy.Fishel@Sun.COM 	if (!cpus_paused()) {
440*9637SRandy.Fishel@Sun.COM 		pause_cpus(NULL);
441*9637SRandy.Fishel@Sun.COM 		unpause = 1;
442*9637SRandy.Fishel@Sun.COM 	}
443*9637SRandy.Fishel@Sun.COM 
444*9637SRandy.Fishel@Sun.COM 	/* Remove cip from array. */
445*9637SRandy.Fishel@Sun.COM 	for (i = 0; i < cpu_idle_cb_curr; i++) {
446*9637SRandy.Fishel@Sun.COM 		if (found == 0) {
447*9637SRandy.Fishel@Sun.COM 			if (cpu_idle_cb_array[i].impl == cip) {
448*9637SRandy.Fishel@Sun.COM 				found = 1;
449*9637SRandy.Fishel@Sun.COM 			}
450*9637SRandy.Fishel@Sun.COM 		} else {
451*9637SRandy.Fishel@Sun.COM 			cpu_idle_cb_array[i - 1] = cpu_idle_cb_array[i];
452*9637SRandy.Fishel@Sun.COM 		}
453*9637SRandy.Fishel@Sun.COM 	}
454*9637SRandy.Fishel@Sun.COM 	ASSERT(found != 0);
455*9637SRandy.Fishel@Sun.COM 	cpu_idle_cb_curr--;
456*9637SRandy.Fishel@Sun.COM 
457*9637SRandy.Fishel@Sun.COM 	/*
458*9637SRandy.Fishel@Sun.COM 	 * Reset property ready flag for all CPUs if no registered callback
459*9637SRandy.Fishel@Sun.COM 	 * left because cpu_idle_enter/exit will stop updating property if
460*9637SRandy.Fishel@Sun.COM 	 * there's no callback registered.
461*9637SRandy.Fishel@Sun.COM 	 */
462*9637SRandy.Fishel@Sun.COM 	if (cpu_idle_cb_curr == 0) {
463*9637SRandy.Fishel@Sun.COM 		for (sp = cpu_idle_cb_state, i = 0; i < max_ncpus; i++, sp++) {
464*9637SRandy.Fishel@Sun.COM 			sp->v.ready = B_FALSE;
465*9637SRandy.Fishel@Sun.COM 		}
466*9637SRandy.Fishel@Sun.COM 	}
467*9637SRandy.Fishel@Sun.COM 
468*9637SRandy.Fishel@Sun.COM 	/* Resume other CPUs from paused state if needed. */
469*9637SRandy.Fishel@Sun.COM 	if (unpause) {
470*9637SRandy.Fishel@Sun.COM 		start_cpus();
471*9637SRandy.Fishel@Sun.COM 	}
472*9637SRandy.Fishel@Sun.COM 	if (unlock) {
473*9637SRandy.Fishel@Sun.COM 		mutex_exit(&cpu_lock);
474*9637SRandy.Fishel@Sun.COM 	}
475*9637SRandy.Fishel@Sun.COM }
476*9637SRandy.Fishel@Sun.COM 
477*9637SRandy.Fishel@Sun.COM int
478*9637SRandy.Fishel@Sun.COM cpu_idle_register_callback(uint_t prio, cpu_idle_callback_t *cbp,
479*9637SRandy.Fishel@Sun.COM     void *arg, cpu_idle_callback_handle_t *hdlp)
480*9637SRandy.Fishel@Sun.COM {
481*9637SRandy.Fishel@Sun.COM 	cpu_idle_cb_state_t *sp;
482*9637SRandy.Fishel@Sun.COM 	cpu_idle_cb_impl_t *cip = NULL;
483*9637SRandy.Fishel@Sun.COM 
484*9637SRandy.Fishel@Sun.COM 	/* First validate parameters. */
485*9637SRandy.Fishel@Sun.COM 	ASSERT(!CPU_ON_INTR(CPU));
486*9637SRandy.Fishel@Sun.COM 	ASSERT(CPU->cpu_seqid < max_ncpus);
487*9637SRandy.Fishel@Sun.COM 	sp = &cpu_idle_cb_state[CPU->cpu_seqid];
488*9637SRandy.Fishel@Sun.COM 	if (sp->v.index != 0) {
489*9637SRandy.Fishel@Sun.COM 		cmn_err(CE_NOTE,
490*9637SRandy.Fishel@Sun.COM 		    "!cpu_event: register_callback called from callback.");
491*9637SRandy.Fishel@Sun.COM 		return (EBUSY);
492*9637SRandy.Fishel@Sun.COM 	} else if (cbp == NULL || hdlp == NULL) {
493*9637SRandy.Fishel@Sun.COM 		cmn_err(CE_NOTE,
494*9637SRandy.Fishel@Sun.COM 		    "!cpu_event: NULL parameters in register_callback.");
495*9637SRandy.Fishel@Sun.COM 		return (EINVAL);
496*9637SRandy.Fishel@Sun.COM 	} else if (prio < CPU_IDLE_CB_PRIO_LOW_BASE ||
497*9637SRandy.Fishel@Sun.COM 	    prio >= CPU_IDLE_CB_PRIO_RESV_BASE) {
498*9637SRandy.Fishel@Sun.COM 		cmn_err(CE_NOTE,
499*9637SRandy.Fishel@Sun.COM 		    "!cpu_event: priority 0x%x out of range.", prio);
500*9637SRandy.Fishel@Sun.COM 		return (EINVAL);
501*9637SRandy.Fishel@Sun.COM 	} else if (cbp->version != CPU_IDLE_CALLBACK_VERS) {
502*9637SRandy.Fishel@Sun.COM 		cmn_err(CE_NOTE,
503*9637SRandy.Fishel@Sun.COM 		    "!cpu_event: callback version %d is not supported.",
504*9637SRandy.Fishel@Sun.COM 		    cbp->version);
505*9637SRandy.Fishel@Sun.COM 		return (EINVAL);
506*9637SRandy.Fishel@Sun.COM 	}
507*9637SRandy.Fishel@Sun.COM 
508*9637SRandy.Fishel@Sun.COM 	mutex_enter(&cpu_idle_cb_lock);
509*9637SRandy.Fishel@Sun.COM 	/* Check whether callback with priority exists if not dynamic. */
510*9637SRandy.Fishel@Sun.COM 	if (prio != CPU_IDLE_CB_PRIO_DYNAMIC) {
511*9637SRandy.Fishel@Sun.COM 		for (cip = cpu_idle_cb_busy; cip != NULL;
512*9637SRandy.Fishel@Sun.COM 		    cip = cip->next) {
513*9637SRandy.Fishel@Sun.COM 			if (cip->priority == prio) {
514*9637SRandy.Fishel@Sun.COM 				mutex_exit(&cpu_idle_cb_lock);
515*9637SRandy.Fishel@Sun.COM 				cmn_err(CE_NOTE, "!cpu_event: callback with "
516*9637SRandy.Fishel@Sun.COM 				    "priority 0x%x already exists.", prio);
517*9637SRandy.Fishel@Sun.COM 				return (EEXIST);
518*9637SRandy.Fishel@Sun.COM 			}
519*9637SRandy.Fishel@Sun.COM 		}
520*9637SRandy.Fishel@Sun.COM 	}
521*9637SRandy.Fishel@Sun.COM 
522*9637SRandy.Fishel@Sun.COM 	cip = kmem_zalloc(sizeof (*cip), KM_SLEEP);
523*9637SRandy.Fishel@Sun.COM 	cip->callback = cbp;
524*9637SRandy.Fishel@Sun.COM 	cip->argument = arg;
525*9637SRandy.Fishel@Sun.COM 	cip->priority = prio;
526*9637SRandy.Fishel@Sun.COM 	cip->next = cpu_idle_cb_busy;
527*9637SRandy.Fishel@Sun.COM 	cpu_idle_cb_busy = cip;
528*9637SRandy.Fishel@Sun.COM 	cpu_idle_insert_callback(cip);
529*9637SRandy.Fishel@Sun.COM 	mutex_exit(&cpu_idle_cb_lock);
530*9637SRandy.Fishel@Sun.COM 
531*9637SRandy.Fishel@Sun.COM 	*hdlp = (cpu_idle_callback_handle_t)cip;
532*9637SRandy.Fishel@Sun.COM 
533*9637SRandy.Fishel@Sun.COM 	return (0);
534*9637SRandy.Fishel@Sun.COM }
535*9637SRandy.Fishel@Sun.COM 
536*9637SRandy.Fishel@Sun.COM int
537*9637SRandy.Fishel@Sun.COM cpu_idle_unregister_callback(cpu_idle_callback_handle_t hdl)
538*9637SRandy.Fishel@Sun.COM {
539*9637SRandy.Fishel@Sun.COM 	int rc = ENODEV;
540*9637SRandy.Fishel@Sun.COM 	cpu_idle_cb_state_t *sp;
541*9637SRandy.Fishel@Sun.COM 	cpu_idle_cb_impl_t *ip, **ipp;
542*9637SRandy.Fishel@Sun.COM 
543*9637SRandy.Fishel@Sun.COM 	ASSERT(!CPU_ON_INTR(CPU));
544*9637SRandy.Fishel@Sun.COM 	ASSERT(CPU->cpu_seqid < max_ncpus);
545*9637SRandy.Fishel@Sun.COM 	sp = &cpu_idle_cb_state[CPU->cpu_seqid];
546*9637SRandy.Fishel@Sun.COM 	if (sp->v.index != 0) {
547*9637SRandy.Fishel@Sun.COM 		cmn_err(CE_NOTE,
548*9637SRandy.Fishel@Sun.COM 		    "!cpu_event: unregister_callback called from callback.");
549*9637SRandy.Fishel@Sun.COM 		return (EBUSY);
550*9637SRandy.Fishel@Sun.COM 	} else if (hdl == NULL) {
551*9637SRandy.Fishel@Sun.COM 		cmn_err(CE_NOTE,
552*9637SRandy.Fishel@Sun.COM 		    "!cpu_event: hdl is NULL in unregister_callback.");
553*9637SRandy.Fishel@Sun.COM 		return (EINVAL);
554*9637SRandy.Fishel@Sun.COM 	}
555*9637SRandy.Fishel@Sun.COM 
556*9637SRandy.Fishel@Sun.COM 	ip = (cpu_idle_cb_impl_t *)hdl;
557*9637SRandy.Fishel@Sun.COM 	mutex_enter(&cpu_idle_cb_lock);
558*9637SRandy.Fishel@Sun.COM 	for (ipp = &cpu_idle_cb_busy; *ipp != NULL; ipp = &(*ipp)->next) {
559*9637SRandy.Fishel@Sun.COM 		if (*ipp == ip) {
560*9637SRandy.Fishel@Sun.COM 			*ipp = ip->next;
561*9637SRandy.Fishel@Sun.COM 			cpu_idle_remove_callback(ip);
562*9637SRandy.Fishel@Sun.COM 			rc = 0;
563*9637SRandy.Fishel@Sun.COM 			break;
564*9637SRandy.Fishel@Sun.COM 		}
565*9637SRandy.Fishel@Sun.COM 	}
566*9637SRandy.Fishel@Sun.COM 	mutex_exit(&cpu_idle_cb_lock);
567*9637SRandy.Fishel@Sun.COM 
568*9637SRandy.Fishel@Sun.COM 	if (rc == 0) {
569*9637SRandy.Fishel@Sun.COM 		kmem_free(ip, sizeof (*ip));
570*9637SRandy.Fishel@Sun.COM 	} else {
571*9637SRandy.Fishel@Sun.COM 		cmn_err(CE_NOTE,
572*9637SRandy.Fishel@Sun.COM 		    "!cpu_event: callback handle %p not found.", (void *)hdl);
573*9637SRandy.Fishel@Sun.COM 	}
574*9637SRandy.Fishel@Sun.COM 
575*9637SRandy.Fishel@Sun.COM 	return (rc);
576*9637SRandy.Fishel@Sun.COM }
577*9637SRandy.Fishel@Sun.COM 
578*9637SRandy.Fishel@Sun.COM static int
579*9637SRandy.Fishel@Sun.COM cpu_idle_enter_state(cpu_idle_cb_state_t *sp, intptr_t state)
580*9637SRandy.Fishel@Sun.COM {
581*9637SRandy.Fishel@Sun.COM 	sp->v.idle_state->cipv_intptr = state;
582*9637SRandy.Fishel@Sun.COM 	sp->v.enter_ts->cipv_hrtime = gethrtime_unscaled();
583*9637SRandy.Fishel@Sun.COM 	sp->v.last_busy->cipv_hrtime = sp->v.enter_ts->cipv_hrtime -
584*9637SRandy.Fishel@Sun.COM 	    sp->v.exit_ts->cipv_hrtime;
585*9637SRandy.Fishel@Sun.COM 	sp->v.total_busy->cipv_hrtime += sp->v.last_busy->cipv_hrtime;
586*9637SRandy.Fishel@Sun.COM 	if (sp->v.ready == B_FALSE) {
587*9637SRandy.Fishel@Sun.COM 		sp->v.ready = B_TRUE;
588*9637SRandy.Fishel@Sun.COM 		return (0);
589*9637SRandy.Fishel@Sun.COM 	}
590*9637SRandy.Fishel@Sun.COM 
591*9637SRandy.Fishel@Sun.COM 	return (1);
592*9637SRandy.Fishel@Sun.COM }
593*9637SRandy.Fishel@Sun.COM 
594*9637SRandy.Fishel@Sun.COM static void
595*9637SRandy.Fishel@Sun.COM cpu_idle_exit_state(cpu_idle_cb_state_t *sp)
596*9637SRandy.Fishel@Sun.COM {
597*9637SRandy.Fishel@Sun.COM 	sp->v.idle_state->cipv_intptr = CPU_IDLE_STATE_NORMAL;
598*9637SRandy.Fishel@Sun.COM 	sp->v.exit_ts->cipv_hrtime = gethrtime_unscaled();
599*9637SRandy.Fishel@Sun.COM 	sp->v.last_idle->cipv_hrtime = sp->v.exit_ts->cipv_hrtime -
600*9637SRandy.Fishel@Sun.COM 	    sp->v.enter_ts->cipv_hrtime;
601*9637SRandy.Fishel@Sun.COM 	sp->v.total_idle->cipv_hrtime += sp->v.last_idle->cipv_hrtime;
602*9637SRandy.Fishel@Sun.COM }
603*9637SRandy.Fishel@Sun.COM 
604*9637SRandy.Fishel@Sun.COM /*ARGSUSED*/
605*9637SRandy.Fishel@Sun.COM int
606*9637SRandy.Fishel@Sun.COM cpu_idle_enter(int state, int flag,
607*9637SRandy.Fishel@Sun.COM     cpu_idle_check_wakeup_t check_func, void *check_arg)
608*9637SRandy.Fishel@Sun.COM {
609*9637SRandy.Fishel@Sun.COM 	int i;
610*9637SRandy.Fishel@Sun.COM 	cpu_idle_cb_item_t *cip;
611*9637SRandy.Fishel@Sun.COM 	cpu_idle_cb_state_t *sp;
612*9637SRandy.Fishel@Sun.COM 	cpu_idle_callback_context_t ctx;
613*9637SRandy.Fishel@Sun.COM #if defined(__x86)
614*9637SRandy.Fishel@Sun.COM 	ulong_t iflags;
615*9637SRandy.Fishel@Sun.COM #endif
616*9637SRandy.Fishel@Sun.COM 
617*9637SRandy.Fishel@Sun.COM 	ctx = CPU_IDLE_GET_CTX(CPU);
618*9637SRandy.Fishel@Sun.COM 	ASSERT(CPU->cpu_seqid < max_ncpus);
619*9637SRandy.Fishel@Sun.COM 	sp = &cpu_idle_cb_state[CPU->cpu_seqid];
620*9637SRandy.Fishel@Sun.COM 	ASSERT(sp->v.index == 0);
621*9637SRandy.Fishel@Sun.COM 
622*9637SRandy.Fishel@Sun.COM 	/*
623*9637SRandy.Fishel@Sun.COM 	 * On x86, cpu_idle_enter can be called from idle thread with either
624*9637SRandy.Fishel@Sun.COM 	 * interrupts enabled or disabled, so we need to make sure interrupts
625*9637SRandy.Fishel@Sun.COM 	 * are disabled here.
626*9637SRandy.Fishel@Sun.COM 	 * On SPARC, cpu_idle_enter will be called from idle thread with
627*9637SRandy.Fishel@Sun.COM 	 * interrupt disabled, so no special handling necessary.
628*9637SRandy.Fishel@Sun.COM 	 */
629*9637SRandy.Fishel@Sun.COM #if defined(__x86)
630*9637SRandy.Fishel@Sun.COM 	iflags = intr_clear();
631*9637SRandy.Fishel@Sun.COM #endif
632*9637SRandy.Fishel@Sun.COM 
633*9637SRandy.Fishel@Sun.COM 	/* Skip calling callback if state is not ready for current CPU. */
634*9637SRandy.Fishel@Sun.COM 	if (cpu_idle_enter_state(sp, state) == 0) {
635*9637SRandy.Fishel@Sun.COM #if defined(__x86)
636*9637SRandy.Fishel@Sun.COM 		intr_restore(iflags);
637*9637SRandy.Fishel@Sun.COM #endif
638*9637SRandy.Fishel@Sun.COM 		return (0);
639*9637SRandy.Fishel@Sun.COM 	}
640*9637SRandy.Fishel@Sun.COM 
641*9637SRandy.Fishel@Sun.COM 	for (i = 0, cip = cpu_idle_cb_array; i < cpu_idle_cb_curr; i++, cip++) {
642*9637SRandy.Fishel@Sun.COM 		/*
643*9637SRandy.Fishel@Sun.COM 		 * Increase index so corresponding idle_exit callback
644*9637SRandy.Fishel@Sun.COM 		 * will be invoked should interrupt happen during
645*9637SRandy.Fishel@Sun.COM 		 * idle_enter callback.
646*9637SRandy.Fishel@Sun.COM 		 */
647*9637SRandy.Fishel@Sun.COM 		sp->v.index++;
648*9637SRandy.Fishel@Sun.COM 
649*9637SRandy.Fishel@Sun.COM 		/* Call idle_enter callback function if it's not NULL. */
650*9637SRandy.Fishel@Sun.COM 		if (cip->enter != NULL) {
651*9637SRandy.Fishel@Sun.COM 			cip->enter(cip->arg, ctx, check_func, check_arg);
652*9637SRandy.Fishel@Sun.COM 
653*9637SRandy.Fishel@Sun.COM 			/*
654*9637SRandy.Fishel@Sun.COM 			 * cpu_idle_enter runs with interrupts
655*9637SRandy.Fishel@Sun.COM 			 * disabled, so the idle_enter callbacks will
656*9637SRandy.Fishel@Sun.COM 			 * also be called with interrupts disabled.
657*9637SRandy.Fishel@Sun.COM 			 * It is permissible for the callbacks to
658*9637SRandy.Fishel@Sun.COM 			 * enable the interrupts, if they can also
659*9637SRandy.Fishel@Sun.COM 			 * handle the condition if the interrupt
660*9637SRandy.Fishel@Sun.COM 			 * occurs.
661*9637SRandy.Fishel@Sun.COM 			 *
662*9637SRandy.Fishel@Sun.COM 			 * However, if an interrupt occurs and we
663*9637SRandy.Fishel@Sun.COM 			 * return here without dealing with it, we
664*9637SRandy.Fishel@Sun.COM 			 * return to the cpu_idle_enter() caller
665*9637SRandy.Fishel@Sun.COM 			 * with an EBUSY, and the caller will not
666*9637SRandy.Fishel@Sun.COM 			 * enter the idle state.
667*9637SRandy.Fishel@Sun.COM 			 *
668*9637SRandy.Fishel@Sun.COM 			 * We detect the interrupt, by checking the
669*9637SRandy.Fishel@Sun.COM 			 * index value of the state pointer.  If it
670*9637SRandy.Fishel@Sun.COM 			 * is not the index we incremented above,
671*9637SRandy.Fishel@Sun.COM 			 * then it was cleared while processing
672*9637SRandy.Fishel@Sun.COM 			 * the interrupt.
673*9637SRandy.Fishel@Sun.COM 			 *
674*9637SRandy.Fishel@Sun.COM 			 * Also note, that at this point of the code
675*9637SRandy.Fishel@Sun.COM 			 * the normal index value will be one greater
676*9637SRandy.Fishel@Sun.COM 			 * than the variable 'i' in the loop, as it
677*9637SRandy.Fishel@Sun.COM 			 * hasn't yet been incremented.
678*9637SRandy.Fishel@Sun.COM 			 */
679*9637SRandy.Fishel@Sun.COM 			if (sp->v.index != i + 1) {
680*9637SRandy.Fishel@Sun.COM #if defined(__x86)
681*9637SRandy.Fishel@Sun.COM 				intr_restore(iflags);
682*9637SRandy.Fishel@Sun.COM #endif
683*9637SRandy.Fishel@Sun.COM 				return (EBUSY);
684*9637SRandy.Fishel@Sun.COM 			}
685*9637SRandy.Fishel@Sun.COM 		}
686*9637SRandy.Fishel@Sun.COM 	}
687*9637SRandy.Fishel@Sun.COM #if defined(__x86)
688*9637SRandy.Fishel@Sun.COM 	intr_restore(iflags);
689*9637SRandy.Fishel@Sun.COM #endif
690*9637SRandy.Fishel@Sun.COM 
691*9637SRandy.Fishel@Sun.COM 	return (0);
692*9637SRandy.Fishel@Sun.COM }
693*9637SRandy.Fishel@Sun.COM 
694*9637SRandy.Fishel@Sun.COM void
695*9637SRandy.Fishel@Sun.COM cpu_idle_exit(int flag)
696*9637SRandy.Fishel@Sun.COM {
697*9637SRandy.Fishel@Sun.COM 	int i;
698*9637SRandy.Fishel@Sun.COM 	cpu_idle_cb_item_t *cip;
699*9637SRandy.Fishel@Sun.COM 	cpu_idle_cb_state_t *sp;
700*9637SRandy.Fishel@Sun.COM 	cpu_idle_callback_context_t ctx;
701*9637SRandy.Fishel@Sun.COM #if defined(__x86)
702*9637SRandy.Fishel@Sun.COM 	ulong_t iflags;
703*9637SRandy.Fishel@Sun.COM #endif
704*9637SRandy.Fishel@Sun.COM 
705*9637SRandy.Fishel@Sun.COM 	ASSERT(CPU->cpu_seqid < max_ncpus);
706*9637SRandy.Fishel@Sun.COM 	sp = &cpu_idle_cb_state[CPU->cpu_seqid];
707*9637SRandy.Fishel@Sun.COM 
708*9637SRandy.Fishel@Sun.COM #if defined(__sparc)
709*9637SRandy.Fishel@Sun.COM 	/*
710*9637SRandy.Fishel@Sun.COM 	 * On SPARC, cpu_idle_exit will only be called from idle thread
711*9637SRandy.Fishel@Sun.COM 	 * with interrupt disabled.
712*9637SRandy.Fishel@Sun.COM 	 */
713*9637SRandy.Fishel@Sun.COM 
714*9637SRandy.Fishel@Sun.COM 	if (sp->v.index != 0) {
715*9637SRandy.Fishel@Sun.COM 		ctx = CPU_IDLE_GET_CTX(CPU);
716*9637SRandy.Fishel@Sun.COM 		cpu_idle_exit_state(sp);
717*9637SRandy.Fishel@Sun.COM 		for (i = sp->v.index - 1; i >= 0; i--) {
718*9637SRandy.Fishel@Sun.COM 			cip = &cpu_idle_cb_array[i];
719*9637SRandy.Fishel@Sun.COM 			if (cip->exit != NULL) {
720*9637SRandy.Fishel@Sun.COM 				cip->exit(cip->arg, ctx, flag);
721*9637SRandy.Fishel@Sun.COM 			}
722*9637SRandy.Fishel@Sun.COM 		}
723*9637SRandy.Fishel@Sun.COM 		sp->v.index = 0;
724*9637SRandy.Fishel@Sun.COM 	}
725*9637SRandy.Fishel@Sun.COM #elif defined(__x86)
726*9637SRandy.Fishel@Sun.COM 	/*
727*9637SRandy.Fishel@Sun.COM 	 * On x86, cpu_idle_exit will be called from idle thread or interrupt
728*9637SRandy.Fishel@Sun.COM 	 * handler. When called from interrupt handler, interrupts will be
729*9637SRandy.Fishel@Sun.COM 	 * disabled. When called from idle thread, interrupts may be disabled
730*9637SRandy.Fishel@Sun.COM 	 * or enabled.
731*9637SRandy.Fishel@Sun.COM 	 */
732*9637SRandy.Fishel@Sun.COM 
733*9637SRandy.Fishel@Sun.COM 	/* Called from interrupt, interrupts are already disabled. */
734*9637SRandy.Fishel@Sun.COM 	if (flag & CPU_IDLE_CB_FLAG_INTR) {
735*9637SRandy.Fishel@Sun.COM 		/*
736*9637SRandy.Fishel@Sun.COM 		 * return if cpu_idle_exit already called or
737*9637SRandy.Fishel@Sun.COM 		 * there is no registered callback.
738*9637SRandy.Fishel@Sun.COM 		 */
739*9637SRandy.Fishel@Sun.COM 		if (sp->v.index == 0) {
740*9637SRandy.Fishel@Sun.COM 			return;
741*9637SRandy.Fishel@Sun.COM 		}
742*9637SRandy.Fishel@Sun.COM 		ctx = CPU_IDLE_GET_CTX(CPU);
743*9637SRandy.Fishel@Sun.COM 		cpu_idle_exit_state(sp);
744*9637SRandy.Fishel@Sun.COM 		for (i = sp->v.index - 1; i >= 0; i--) {
745*9637SRandy.Fishel@Sun.COM 			cip = &cpu_idle_cb_array[i];
746*9637SRandy.Fishel@Sun.COM 			if (cip->exit != NULL) {
747*9637SRandy.Fishel@Sun.COM 				cip->exit(cip->arg, ctx, flag);
748*9637SRandy.Fishel@Sun.COM 			}
749*9637SRandy.Fishel@Sun.COM 		}
750*9637SRandy.Fishel@Sun.COM 		sp->v.index = 0;
751*9637SRandy.Fishel@Sun.COM 
752*9637SRandy.Fishel@Sun.COM 	/* Called from idle thread, need to disable interrupt. */
753*9637SRandy.Fishel@Sun.COM 	} else {
754*9637SRandy.Fishel@Sun.COM 		iflags = intr_clear();
755*9637SRandy.Fishel@Sun.COM 		if (sp->v.index != 0) {
756*9637SRandy.Fishel@Sun.COM 			ctx = CPU_IDLE_GET_CTX(CPU);
757*9637SRandy.Fishel@Sun.COM 			cpu_idle_exit_state(sp);
758*9637SRandy.Fishel@Sun.COM 			for (i = sp->v.index - 1; i >= 0; i--) {
759*9637SRandy.Fishel@Sun.COM 				cip = &cpu_idle_cb_array[i];
760*9637SRandy.Fishel@Sun.COM 				if (cip->exit != NULL) {
761*9637SRandy.Fishel@Sun.COM 					cip->exit(cip->arg, ctx, flag);
762*9637SRandy.Fishel@Sun.COM 				}
763*9637SRandy.Fishel@Sun.COM 			}
764*9637SRandy.Fishel@Sun.COM 			sp->v.index = 0;
765*9637SRandy.Fishel@Sun.COM 		}
766*9637SRandy.Fishel@Sun.COM 		intr_restore(iflags);
767*9637SRandy.Fishel@Sun.COM 	}
768*9637SRandy.Fishel@Sun.COM #endif
769*9637SRandy.Fishel@Sun.COM }
770*9637SRandy.Fishel@Sun.COM 
771*9637SRandy.Fishel@Sun.COM cpu_idle_callback_context_t
772*9637SRandy.Fishel@Sun.COM cpu_idle_get_context(void)
773*9637SRandy.Fishel@Sun.COM {
774*9637SRandy.Fishel@Sun.COM 	return (CPU_IDLE_GET_CTX(CPU));
775*9637SRandy.Fishel@Sun.COM }
776*9637SRandy.Fishel@Sun.COM 
777*9637SRandy.Fishel@Sun.COM /*
778*9637SRandy.Fishel@Sun.COM  * Allocate property structure in group of CPU_IDLE_VALUE_GROUP_SIZE to improve
779*9637SRandy.Fishel@Sun.COM  * cache efficiency. To simplify implementation, allocated memory for property
780*9637SRandy.Fishel@Sun.COM  * structure won't be freed.
781*9637SRandy.Fishel@Sun.COM  */
782*9637SRandy.Fishel@Sun.COM static void
783*9637SRandy.Fishel@Sun.COM cpu_idle_prop_allocate_impl(void)
784*9637SRandy.Fishel@Sun.COM {
785*9637SRandy.Fishel@Sun.COM 	int i;
786*9637SRandy.Fishel@Sun.COM 	size_t sz;
787*9637SRandy.Fishel@Sun.COM 	intptr_t buf;
788*9637SRandy.Fishel@Sun.COM 	cpu_idle_prop_impl_t *prop;
789*9637SRandy.Fishel@Sun.COM 	cpu_idle_prop_value_t *valp;
790*9637SRandy.Fishel@Sun.COM 
791*9637SRandy.Fishel@Sun.COM 	ASSERT(!CPU_ON_INTR(CPU));
792*9637SRandy.Fishel@Sun.COM 	prop = kmem_zalloc(sizeof (*prop) * CPU_IDLE_VALUE_GROUP_SIZE,
793*9637SRandy.Fishel@Sun.COM 	    KM_SLEEP);
794*9637SRandy.Fishel@Sun.COM 	sz = sizeof (*valp) * CPU_IDLE_VALUE_GROUP_SIZE * max_ncpus;
795*9637SRandy.Fishel@Sun.COM 	sz += CPU_CACHE_COHERENCE_SIZE;
796*9637SRandy.Fishel@Sun.COM 	buf = (intptr_t)kmem_zalloc(sz, KM_SLEEP);
797*9637SRandy.Fishel@Sun.COM 	valp = (cpu_idle_prop_value_t *)P2ROUNDUP(buf,
798*9637SRandy.Fishel@Sun.COM 	    CPU_CACHE_COHERENCE_SIZE);
799*9637SRandy.Fishel@Sun.COM 
800*9637SRandy.Fishel@Sun.COM 	for (i = 0; i < CPU_IDLE_VALUE_GROUP_SIZE; i++, prop++, valp++) {
801*9637SRandy.Fishel@Sun.COM 		prop->value = valp;
802*9637SRandy.Fishel@Sun.COM 		prop->next = cpu_idle_prop_free;
803*9637SRandy.Fishel@Sun.COM 		cpu_idle_prop_free = prop;
804*9637SRandy.Fishel@Sun.COM 	}
805*9637SRandy.Fishel@Sun.COM }
806*9637SRandy.Fishel@Sun.COM 
807*9637SRandy.Fishel@Sun.COM int
808*9637SRandy.Fishel@Sun.COM cpu_idle_prop_create_property(const char *name, cpu_idle_prop_type_t type,
809*9637SRandy.Fishel@Sun.COM     cpu_idle_prop_update_t update, void *arg, cpu_idle_prop_handle_t *hdlp)
810*9637SRandy.Fishel@Sun.COM {
811*9637SRandy.Fishel@Sun.COM 	int rc = EEXIST;
812*9637SRandy.Fishel@Sun.COM 	cpu_idle_prop_impl_t *prop;
813*9637SRandy.Fishel@Sun.COM 
814*9637SRandy.Fishel@Sun.COM 	ASSERT(!CPU_ON_INTR(CPU));
815*9637SRandy.Fishel@Sun.COM 	if (name == NULL || hdlp == NULL) {
816*9637SRandy.Fishel@Sun.COM 		cmn_err(CE_WARN,
817*9637SRandy.Fishel@Sun.COM 		    "!cpu_event: NULL parameters in create_property.");
818*9637SRandy.Fishel@Sun.COM 		return (EINVAL);
819*9637SRandy.Fishel@Sun.COM 	}
820*9637SRandy.Fishel@Sun.COM 
821*9637SRandy.Fishel@Sun.COM 	mutex_enter(&cpu_idle_prop_lock);
822*9637SRandy.Fishel@Sun.COM 	for (prop = cpu_idle_prop_busy; prop != NULL; prop = prop->next) {
823*9637SRandy.Fishel@Sun.COM 		if (strcmp(prop->name, name) == 0) {
824*9637SRandy.Fishel@Sun.COM 			cmn_err(CE_NOTE,
825*9637SRandy.Fishel@Sun.COM 			    "!cpu_event: property %s already exists.", name);
826*9637SRandy.Fishel@Sun.COM 			break;
827*9637SRandy.Fishel@Sun.COM 		}
828*9637SRandy.Fishel@Sun.COM 	}
829*9637SRandy.Fishel@Sun.COM 	if (prop == NULL) {
830*9637SRandy.Fishel@Sun.COM 		if (cpu_idle_prop_free == NULL) {
831*9637SRandy.Fishel@Sun.COM 			cpu_idle_prop_allocate_impl();
832*9637SRandy.Fishel@Sun.COM 		}
833*9637SRandy.Fishel@Sun.COM 		ASSERT(cpu_idle_prop_free != NULL);
834*9637SRandy.Fishel@Sun.COM 		prop = cpu_idle_prop_free;
835*9637SRandy.Fishel@Sun.COM 		cpu_idle_prop_free = prop->next;
836*9637SRandy.Fishel@Sun.COM 		prop->next = cpu_idle_prop_busy;
837*9637SRandy.Fishel@Sun.COM 		cpu_idle_prop_busy = prop;
838*9637SRandy.Fishel@Sun.COM 
839*9637SRandy.Fishel@Sun.COM 		ASSERT(prop->value != NULL);
840*9637SRandy.Fishel@Sun.COM 		prop->name = strdup(name);
841*9637SRandy.Fishel@Sun.COM 		prop->type = type;
842*9637SRandy.Fishel@Sun.COM 		prop->update = update;
843*9637SRandy.Fishel@Sun.COM 		prop->private = arg;
844*9637SRandy.Fishel@Sun.COM 		prop->refcnt = 1;
845*9637SRandy.Fishel@Sun.COM 		*hdlp = prop;
846*9637SRandy.Fishel@Sun.COM 		rc = 0;
847*9637SRandy.Fishel@Sun.COM 	}
848*9637SRandy.Fishel@Sun.COM 	mutex_exit(&cpu_idle_prop_lock);
849*9637SRandy.Fishel@Sun.COM 
850*9637SRandy.Fishel@Sun.COM 	return (rc);
851*9637SRandy.Fishel@Sun.COM }
852*9637SRandy.Fishel@Sun.COM 
853*9637SRandy.Fishel@Sun.COM int
854*9637SRandy.Fishel@Sun.COM cpu_idle_prop_destroy_property(cpu_idle_prop_handle_t hdl)
855*9637SRandy.Fishel@Sun.COM {
856*9637SRandy.Fishel@Sun.COM 	int rc = ENODEV;
857*9637SRandy.Fishel@Sun.COM 	cpu_idle_prop_impl_t *prop, **propp;
858*9637SRandy.Fishel@Sun.COM 	cpu_idle_prop_value_t *valp;
859*9637SRandy.Fishel@Sun.COM 
860*9637SRandy.Fishel@Sun.COM 	ASSERT(!CPU_ON_INTR(CPU));
861*9637SRandy.Fishel@Sun.COM 	if (hdl == NULL) {
862*9637SRandy.Fishel@Sun.COM 		cmn_err(CE_WARN,
863*9637SRandy.Fishel@Sun.COM 		    "!cpu_event: hdl is NULL in destroy_property.");
864*9637SRandy.Fishel@Sun.COM 		return (EINVAL);
865*9637SRandy.Fishel@Sun.COM 	}
866*9637SRandy.Fishel@Sun.COM 
867*9637SRandy.Fishel@Sun.COM 	prop = (cpu_idle_prop_impl_t *)hdl;
868*9637SRandy.Fishel@Sun.COM 	mutex_enter(&cpu_idle_prop_lock);
869*9637SRandy.Fishel@Sun.COM 	for (propp = &cpu_idle_prop_busy; *propp != NULL;
870*9637SRandy.Fishel@Sun.COM 	    propp = &(*propp)->next) {
871*9637SRandy.Fishel@Sun.COM 		if (*propp == prop) {
872*9637SRandy.Fishel@Sun.COM 			ASSERT(prop->refcnt > 0);
873*9637SRandy.Fishel@Sun.COM 			if (atomic_cas_32(&prop->refcnt, 1, 0) == 1) {
874*9637SRandy.Fishel@Sun.COM 				*propp = prop->next;
875*9637SRandy.Fishel@Sun.COM 				strfree(prop->name);
876*9637SRandy.Fishel@Sun.COM 				valp = prop->value;
877*9637SRandy.Fishel@Sun.COM 				bzero(prop, sizeof (*prop));
878*9637SRandy.Fishel@Sun.COM 				prop->value = valp;
879*9637SRandy.Fishel@Sun.COM 				prop->next = cpu_idle_prop_free;
880*9637SRandy.Fishel@Sun.COM 				cpu_idle_prop_free = prop;
881*9637SRandy.Fishel@Sun.COM 				rc = 0;
882*9637SRandy.Fishel@Sun.COM 			} else {
883*9637SRandy.Fishel@Sun.COM 				rc = EBUSY;
884*9637SRandy.Fishel@Sun.COM 			}
885*9637SRandy.Fishel@Sun.COM 			break;
886*9637SRandy.Fishel@Sun.COM 		}
887*9637SRandy.Fishel@Sun.COM 	}
888*9637SRandy.Fishel@Sun.COM 	mutex_exit(&cpu_idle_prop_lock);
889*9637SRandy.Fishel@Sun.COM 
890*9637SRandy.Fishel@Sun.COM 	return (rc);
891*9637SRandy.Fishel@Sun.COM }
892*9637SRandy.Fishel@Sun.COM 
893*9637SRandy.Fishel@Sun.COM int
894*9637SRandy.Fishel@Sun.COM cpu_idle_prop_create_handle(const char *name, cpu_idle_prop_handle_t *hdlp)
895*9637SRandy.Fishel@Sun.COM {
896*9637SRandy.Fishel@Sun.COM 	int rc = ENODEV;
897*9637SRandy.Fishel@Sun.COM 	cpu_idle_prop_impl_t *prop;
898*9637SRandy.Fishel@Sun.COM 
899*9637SRandy.Fishel@Sun.COM 	ASSERT(!CPU_ON_INTR(CPU));
900*9637SRandy.Fishel@Sun.COM 	if (name == NULL || hdlp == NULL) {
901*9637SRandy.Fishel@Sun.COM 		cmn_err(CE_WARN,
902*9637SRandy.Fishel@Sun.COM 		    "!cpu_event: NULL parameters in create_handle.");
903*9637SRandy.Fishel@Sun.COM 		return (EINVAL);
904*9637SRandy.Fishel@Sun.COM 	}
905*9637SRandy.Fishel@Sun.COM 
906*9637SRandy.Fishel@Sun.COM 	mutex_enter(&cpu_idle_prop_lock);
907*9637SRandy.Fishel@Sun.COM 	for (prop = cpu_idle_prop_busy; prop != NULL; prop = prop->next) {
908*9637SRandy.Fishel@Sun.COM 		if (strcmp(prop->name, name) == 0) {
909*9637SRandy.Fishel@Sun.COM 			/* Hold one refcount on object. */
910*9637SRandy.Fishel@Sun.COM 			ASSERT(prop->refcnt > 0);
911*9637SRandy.Fishel@Sun.COM 			atomic_inc_32(&prop->refcnt);
912*9637SRandy.Fishel@Sun.COM 			*hdlp = (cpu_idle_prop_handle_t)prop;
913*9637SRandy.Fishel@Sun.COM 			rc = 0;
914*9637SRandy.Fishel@Sun.COM 			break;
915*9637SRandy.Fishel@Sun.COM 		}
916*9637SRandy.Fishel@Sun.COM 	}
917*9637SRandy.Fishel@Sun.COM 	mutex_exit(&cpu_idle_prop_lock);
918*9637SRandy.Fishel@Sun.COM 
919*9637SRandy.Fishel@Sun.COM 	return (rc);
920*9637SRandy.Fishel@Sun.COM }
921*9637SRandy.Fishel@Sun.COM 
922*9637SRandy.Fishel@Sun.COM int
923*9637SRandy.Fishel@Sun.COM cpu_idle_prop_destroy_handle(cpu_idle_prop_handle_t hdl)
924*9637SRandy.Fishel@Sun.COM {
925*9637SRandy.Fishel@Sun.COM 	int rc = ENODEV;
926*9637SRandy.Fishel@Sun.COM 	cpu_idle_prop_impl_t *prop;
927*9637SRandy.Fishel@Sun.COM 
928*9637SRandy.Fishel@Sun.COM 	ASSERT(!CPU_ON_INTR(CPU));
929*9637SRandy.Fishel@Sun.COM 	if (hdl == NULL) {
930*9637SRandy.Fishel@Sun.COM 		cmn_err(CE_WARN,
931*9637SRandy.Fishel@Sun.COM 		    "!cpu_event: hdl is NULL in destroy_handle.");
932*9637SRandy.Fishel@Sun.COM 		return (EINVAL);
933*9637SRandy.Fishel@Sun.COM 	}
934*9637SRandy.Fishel@Sun.COM 
935*9637SRandy.Fishel@Sun.COM 	mutex_enter(&cpu_idle_prop_lock);
936*9637SRandy.Fishel@Sun.COM 	for (prop = cpu_idle_prop_busy; prop != NULL; prop = prop->next) {
937*9637SRandy.Fishel@Sun.COM 		if (prop == hdl) {
938*9637SRandy.Fishel@Sun.COM 			/* Release refcnt held in create_handle. */
939*9637SRandy.Fishel@Sun.COM 			ASSERT(prop->refcnt > 1);
940*9637SRandy.Fishel@Sun.COM 			atomic_dec_32(&prop->refcnt);
941*9637SRandy.Fishel@Sun.COM 			rc = 0;
942*9637SRandy.Fishel@Sun.COM 			break;
943*9637SRandy.Fishel@Sun.COM 		}
944*9637SRandy.Fishel@Sun.COM 	}
945*9637SRandy.Fishel@Sun.COM 	mutex_exit(&cpu_idle_prop_lock);
946*9637SRandy.Fishel@Sun.COM 
947*9637SRandy.Fishel@Sun.COM 	return (rc);
948*9637SRandy.Fishel@Sun.COM }
949*9637SRandy.Fishel@Sun.COM 
950*9637SRandy.Fishel@Sun.COM cpu_idle_prop_type_t
951*9637SRandy.Fishel@Sun.COM cpu_idle_prop_get_type(cpu_idle_prop_handle_t hdl)
952*9637SRandy.Fishel@Sun.COM {
953*9637SRandy.Fishel@Sun.COM 	ASSERT(hdl != NULL);
954*9637SRandy.Fishel@Sun.COM 	return (((cpu_idle_prop_impl_t *)hdl)->type);
955*9637SRandy.Fishel@Sun.COM }
956*9637SRandy.Fishel@Sun.COM 
957*9637SRandy.Fishel@Sun.COM const char *
958*9637SRandy.Fishel@Sun.COM cpu_idle_prop_get_name(cpu_idle_prop_handle_t hdl)
959*9637SRandy.Fishel@Sun.COM {
960*9637SRandy.Fishel@Sun.COM 	ASSERT(hdl != NULL);
961*9637SRandy.Fishel@Sun.COM 	return (((cpu_idle_prop_impl_t *)hdl)->name);
962*9637SRandy.Fishel@Sun.COM }
963*9637SRandy.Fishel@Sun.COM 
964*9637SRandy.Fishel@Sun.COM int
965*9637SRandy.Fishel@Sun.COM cpu_idle_prop_get_value(cpu_idle_prop_handle_t hdl,
966*9637SRandy.Fishel@Sun.COM     cpu_idle_callback_context_t ctx, cpu_idle_prop_value_t *valp)
967*9637SRandy.Fishel@Sun.COM {
968*9637SRandy.Fishel@Sun.COM 	int idx, rc = 0;
969*9637SRandy.Fishel@Sun.COM 	cpu_idle_prop_impl_t *prop = (cpu_idle_prop_impl_t *)hdl;
970*9637SRandy.Fishel@Sun.COM 
971*9637SRandy.Fishel@Sun.COM 	ASSERT(CPU_IDLE_CTX2CPUID(ctx) < max_ncpus);
972*9637SRandy.Fishel@Sun.COM 	if (hdl == NULL || valp == NULL) {
973*9637SRandy.Fishel@Sun.COM 		cmn_err(CE_NOTE, "!cpu_event: NULL parameters in prop_get.");
974*9637SRandy.Fishel@Sun.COM 		return (EINVAL);
975*9637SRandy.Fishel@Sun.COM 	}
976*9637SRandy.Fishel@Sun.COM 	idx = CPU_IDLE_CTX2IDX(ctx);
977*9637SRandy.Fishel@Sun.COM 	if (prop->update != NULL) {
978*9637SRandy.Fishel@Sun.COM 		cpu_idle_cb_state_t *sp;
979*9637SRandy.Fishel@Sun.COM 
980*9637SRandy.Fishel@Sun.COM 		ASSERT(CPU->cpu_seqid < max_ncpus);
981*9637SRandy.Fishel@Sun.COM 		sp = &cpu_idle_cb_state[CPU->cpu_seqid];
982*9637SRandy.Fishel@Sun.COM 		/* CPU's idle enter timestamp as sequence number. */
983*9637SRandy.Fishel@Sun.COM 		rc = prop->update(prop->private,
984*9637SRandy.Fishel@Sun.COM 		    (uint64_t)sp->v.enter_ts->cipv_hrtime, &prop->value[idx]);
985*9637SRandy.Fishel@Sun.COM 	}
986*9637SRandy.Fishel@Sun.COM 	if (rc == 0) {
987*9637SRandy.Fishel@Sun.COM 		*valp = prop->value[idx];
988*9637SRandy.Fishel@Sun.COM 	}
989*9637SRandy.Fishel@Sun.COM 
990*9637SRandy.Fishel@Sun.COM 	return (rc);
991*9637SRandy.Fishel@Sun.COM }
992*9637SRandy.Fishel@Sun.COM 
993*9637SRandy.Fishel@Sun.COM uint32_t
994*9637SRandy.Fishel@Sun.COM cpu_idle_prop_get_uint32(cpu_idle_prop_handle_t hdl,
995*9637SRandy.Fishel@Sun.COM     cpu_idle_callback_context_t ctx)
996*9637SRandy.Fishel@Sun.COM {
997*9637SRandy.Fishel@Sun.COM 	int idx;
998*9637SRandy.Fishel@Sun.COM 	cpu_idle_prop_impl_t *prop = (cpu_idle_prop_impl_t *)hdl;
999*9637SRandy.Fishel@Sun.COM 
1000*9637SRandy.Fishel@Sun.COM 	ASSERT(hdl != NULL);
1001*9637SRandy.Fishel@Sun.COM 	ASSERT(CPU_IDLE_CTX2CPUID(ctx) < max_ncpus);
1002*9637SRandy.Fishel@Sun.COM 	idx = CPU_IDLE_CTX2IDX(ctx);
1003*9637SRandy.Fishel@Sun.COM 	return (prop->value[idx].cipv_uint32);
1004*9637SRandy.Fishel@Sun.COM }
1005*9637SRandy.Fishel@Sun.COM 
1006*9637SRandy.Fishel@Sun.COM uint64_t
1007*9637SRandy.Fishel@Sun.COM cpu_idle_prop_get_uint64(cpu_idle_prop_handle_t hdl,
1008*9637SRandy.Fishel@Sun.COM     cpu_idle_callback_context_t ctx)
1009*9637SRandy.Fishel@Sun.COM {
1010*9637SRandy.Fishel@Sun.COM 	int idx;
1011*9637SRandy.Fishel@Sun.COM 	cpu_idle_prop_impl_t *prop = (cpu_idle_prop_impl_t *)hdl;
1012*9637SRandy.Fishel@Sun.COM 
1013*9637SRandy.Fishel@Sun.COM 	ASSERT(hdl != NULL);
1014*9637SRandy.Fishel@Sun.COM 	ASSERT(CPU_IDLE_CTX2CPUID(ctx) < max_ncpus);
1015*9637SRandy.Fishel@Sun.COM 	idx = CPU_IDLE_CTX2IDX(ctx);
1016*9637SRandy.Fishel@Sun.COM 	return (prop->value[idx].cipv_uint64);
1017*9637SRandy.Fishel@Sun.COM }
1018*9637SRandy.Fishel@Sun.COM 
1019*9637SRandy.Fishel@Sun.COM intptr_t
1020*9637SRandy.Fishel@Sun.COM cpu_idle_prop_get_intptr(cpu_idle_prop_handle_t hdl,
1021*9637SRandy.Fishel@Sun.COM     cpu_idle_callback_context_t ctx)
1022*9637SRandy.Fishel@Sun.COM {
1023*9637SRandy.Fishel@Sun.COM 	int idx;
1024*9637SRandy.Fishel@Sun.COM 	cpu_idle_prop_impl_t *prop = (cpu_idle_prop_impl_t *)hdl;
1025*9637SRandy.Fishel@Sun.COM 
1026*9637SRandy.Fishel@Sun.COM 	ASSERT(hdl != NULL);
1027*9637SRandy.Fishel@Sun.COM 	ASSERT(CPU_IDLE_CTX2CPUID(ctx) < max_ncpus);
1028*9637SRandy.Fishel@Sun.COM 	idx = CPU_IDLE_CTX2IDX(ctx);
1029*9637SRandy.Fishel@Sun.COM 	return (prop->value[idx].cipv_intptr);
1030*9637SRandy.Fishel@Sun.COM }
1031*9637SRandy.Fishel@Sun.COM 
1032*9637SRandy.Fishel@Sun.COM hrtime_t
1033*9637SRandy.Fishel@Sun.COM cpu_idle_prop_get_hrtime(cpu_idle_prop_handle_t hdl,
1034*9637SRandy.Fishel@Sun.COM     cpu_idle_callback_context_t ctx)
1035*9637SRandy.Fishel@Sun.COM {
1036*9637SRandy.Fishel@Sun.COM 	int idx;
1037*9637SRandy.Fishel@Sun.COM 	cpu_idle_prop_impl_t *prop = (cpu_idle_prop_impl_t *)hdl;
1038*9637SRandy.Fishel@Sun.COM 
1039*9637SRandy.Fishel@Sun.COM 	ASSERT(hdl != NULL);
1040*9637SRandy.Fishel@Sun.COM 	ASSERT(CPU_IDLE_CTX2CPUID(ctx) < max_ncpus);
1041*9637SRandy.Fishel@Sun.COM 	idx = CPU_IDLE_CTX2IDX(ctx);
1042*9637SRandy.Fishel@Sun.COM 	return (prop->value[idx].cipv_hrtime);
1043*9637SRandy.Fishel@Sun.COM }
1044*9637SRandy.Fishel@Sun.COM 
1045*9637SRandy.Fishel@Sun.COM void
1046*9637SRandy.Fishel@Sun.COM cpu_idle_prop_set_value(cpu_idle_prop_handle_t hdl,
1047*9637SRandy.Fishel@Sun.COM     cpu_idle_callback_context_t ctx, cpu_idle_prop_value_t val)
1048*9637SRandy.Fishel@Sun.COM {
1049*9637SRandy.Fishel@Sun.COM 	int idx;
1050*9637SRandy.Fishel@Sun.COM 	cpu_idle_prop_impl_t *prop = (cpu_idle_prop_impl_t *)hdl;
1051*9637SRandy.Fishel@Sun.COM 
1052*9637SRandy.Fishel@Sun.COM 	ASSERT(hdl != NULL);
1053*9637SRandy.Fishel@Sun.COM 	ASSERT(CPU_IDLE_CTX2CPUID(ctx) < max_ncpus);
1054*9637SRandy.Fishel@Sun.COM 	idx = CPU_IDLE_CTX2IDX(ctx);
1055*9637SRandy.Fishel@Sun.COM 	prop->value[idx] = val;
1056*9637SRandy.Fishel@Sun.COM }
1057*9637SRandy.Fishel@Sun.COM 
1058*9637SRandy.Fishel@Sun.COM void
1059*9637SRandy.Fishel@Sun.COM cpu_idle_prop_set_all(cpu_idle_prop_handle_t hdl, cpu_idle_prop_value_t val)
1060*9637SRandy.Fishel@Sun.COM {
1061*9637SRandy.Fishel@Sun.COM 	int i, idx;
1062*9637SRandy.Fishel@Sun.COM 	cpu_idle_prop_impl_t *prop = (cpu_idle_prop_impl_t *)hdl;
1063*9637SRandy.Fishel@Sun.COM 
1064*9637SRandy.Fishel@Sun.COM 	ASSERT(hdl != NULL);
1065*9637SRandy.Fishel@Sun.COM 	for (i = 0; i < max_ncpus; i++) {
1066*9637SRandy.Fishel@Sun.COM 		idx = CPU_IDLE_CTX2IDX(i);
1067*9637SRandy.Fishel@Sun.COM 		prop->value[idx] = val;
1068*9637SRandy.Fishel@Sun.COM 	}
1069*9637SRandy.Fishel@Sun.COM }
1070*9637SRandy.Fishel@Sun.COM 
1071*9637SRandy.Fishel@Sun.COM /*ARGSUSED*/
1072*9637SRandy.Fishel@Sun.COM static int cpu_idle_prop_update_intr_cnt(void *arg, uint64_t seqnum,
1073*9637SRandy.Fishel@Sun.COM     cpu_idle_prop_value_t *valp)
1074*9637SRandy.Fishel@Sun.COM {
1075*9637SRandy.Fishel@Sun.COM 	int i;
1076*9637SRandy.Fishel@Sun.COM 	uint64_t val;
1077*9637SRandy.Fishel@Sun.COM 
1078*9637SRandy.Fishel@Sun.COM 	for (val = 0, i = 0; i < PIL_MAX; i++) {
1079*9637SRandy.Fishel@Sun.COM 		val += CPU->cpu_stats.sys.intr[i];
1080*9637SRandy.Fishel@Sun.COM 	}
1081*9637SRandy.Fishel@Sun.COM 	valp->cipv_uint64 = val;
1082*9637SRandy.Fishel@Sun.COM 
1083*9637SRandy.Fishel@Sun.COM 	return (0);
1084*9637SRandy.Fishel@Sun.COM }
1085*9637SRandy.Fishel@Sun.COM 
1086*9637SRandy.Fishel@Sun.COM uint_t
1087*9637SRandy.Fishel@Sun.COM cpu_idle_get_cpu_state(cpu_t *cp)
1088*9637SRandy.Fishel@Sun.COM {
1089*9637SRandy.Fishel@Sun.COM 	ASSERT(cp != NULL && cp->cpu_seqid < max_ncpus);
1090*9637SRandy.Fishel@Sun.COM 	return ((uint_t)cpu_idle_prop_get_uint32(
1091*9637SRandy.Fishel@Sun.COM 	    cpu_idle_prop_array[CPU_IDLE_PROP_IDX_IDLE_STATE].handle,
1092*9637SRandy.Fishel@Sun.COM 	    CPU_IDLE_GET_CTX(cp)));
1093*9637SRandy.Fishel@Sun.COM }
1094