18906SEric.Saxe@Sun.COM /*
28906SEric.Saxe@Sun.COM * CDDL HEADER START
38906SEric.Saxe@Sun.COM *
48906SEric.Saxe@Sun.COM * The contents of this file are subject to the terms of the
58906SEric.Saxe@Sun.COM * Common Development and Distribution License (the "License").
68906SEric.Saxe@Sun.COM * You may not use this file except in compliance with the License.
78906SEric.Saxe@Sun.COM *
88906SEric.Saxe@Sun.COM * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
98906SEric.Saxe@Sun.COM * or http://www.opensolaris.org/os/licensing.
108906SEric.Saxe@Sun.COM * See the License for the specific language governing permissions
118906SEric.Saxe@Sun.COM * and limitations under the License.
128906SEric.Saxe@Sun.COM *
138906SEric.Saxe@Sun.COM * When distributing Covered Code, include this CDDL HEADER in each
148906SEric.Saxe@Sun.COM * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
158906SEric.Saxe@Sun.COM * If applicable, add the following below this CDDL HEADER, with the
168906SEric.Saxe@Sun.COM * fields enclosed by brackets "[]" replaced with your own identifying
178906SEric.Saxe@Sun.COM * information: Portions Copyright [yyyy] [name of copyright owner]
188906SEric.Saxe@Sun.COM *
198906SEric.Saxe@Sun.COM * CDDL HEADER END
208906SEric.Saxe@Sun.COM */
218906SEric.Saxe@Sun.COM /*
228906SEric.Saxe@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
238906SEric.Saxe@Sun.COM * Use is subject to license terms.
248906SEric.Saxe@Sun.COM */
258906SEric.Saxe@Sun.COM
268906SEric.Saxe@Sun.COM #include <sys/cpu_pm.h>
278906SEric.Saxe@Sun.COM #include <sys/cmn_err.h>
2810797SEric.Saxe@Sun.COM #include <sys/time.h>
298906SEric.Saxe@Sun.COM #include <sys/sdt.h>
308906SEric.Saxe@Sun.COM
318906SEric.Saxe@Sun.COM /*
328906SEric.Saxe@Sun.COM * Solaris Event Based CPU Power Manager
338906SEric.Saxe@Sun.COM *
348906SEric.Saxe@Sun.COM * This file implements platform independent event based CPU power management.
358906SEric.Saxe@Sun.COM * When CPUs are configured into the system, the CMT scheduling subsystem will
368906SEric.Saxe@Sun.COM * query the platform to determine if the CPU belongs to any power management
378906SEric.Saxe@Sun.COM * domains. That is, sets of CPUs that share power management states.
388906SEric.Saxe@Sun.COM *
398906SEric.Saxe@Sun.COM * Active Power Management domains represent a group of CPUs across which the
408906SEric.Saxe@Sun.COM * Operating System can request speed changes (which may in turn result
418906SEric.Saxe@Sun.COM * in voltage changes). This allows the operating system to trade off
428906SEric.Saxe@Sun.COM * performance for power savings.
438906SEric.Saxe@Sun.COM *
448906SEric.Saxe@Sun.COM * Idle Power Management domains can enter power savings states when they are
458906SEric.Saxe@Sun.COM * unutilized. These states allow the Operating System to trade off power
468906SEric.Saxe@Sun.COM * for performance (in the form of latency to transition from the idle state
478906SEric.Saxe@Sun.COM * to an active one).
488906SEric.Saxe@Sun.COM *
498906SEric.Saxe@Sun.COM * For each active and idle power domain the CMT subsystem instantiates, a
508906SEric.Saxe@Sun.COM * cpupm_domain_t structure is created. As the dispatcher schedules threads
518906SEric.Saxe@Sun.COM * to run on the system's CPUs, it will also track the utilization of the
528906SEric.Saxe@Sun.COM * enumerated power domains. Significant changes in utilization will result
538906SEric.Saxe@Sun.COM * in the dispatcher sending the power manager events that relate to the
548906SEric.Saxe@Sun.COM * utilization of the power domain. The power manager recieves the events,
558906SEric.Saxe@Sun.COM * and in the context of the policy objectives in force, may decide to request
568906SEric.Saxe@Sun.COM * the domain's power/performance state be changed.
578906SEric.Saxe@Sun.COM *
588906SEric.Saxe@Sun.COM * Under the "elastic" CPUPM policy, when the utilization rises, the CPU power
598906SEric.Saxe@Sun.COM * manager will request the CPUs in the domain run at their fastest (and most
608906SEric.Saxe@Sun.COM * power consuming) state. When the domain becomes idle (utilization at zero),
618906SEric.Saxe@Sun.COM * the power manager will request that the CPUs run at a speed that saves the
628906SEric.Saxe@Sun.COM * most power.
638906SEric.Saxe@Sun.COM *
648906SEric.Saxe@Sun.COM * The advantage of this scheme, is that the CPU power manager working with the
658906SEric.Saxe@Sun.COM * dispatcher can be extremely responsive to changes in utilization. Optimizing
668906SEric.Saxe@Sun.COM * for performance in the presence of utilization, and power savings in the
678906SEric.Saxe@Sun.COM * presence of idleness. Such close collaboration with the dispatcher has other
688906SEric.Saxe@Sun.COM * benefits that will play out in the form of more sophisticated power /
698906SEric.Saxe@Sun.COM * performance policy in the near future.
708906SEric.Saxe@Sun.COM *
718906SEric.Saxe@Sun.COM * Avoiding state thrashing in the presence of transient periods of utilization
728906SEric.Saxe@Sun.COM * and idleness while still being responsive to non-transient periods is key.
7310797SEric.Saxe@Sun.COM * The power manager implements a "governor" that is used to throttle
748906SEric.Saxe@Sun.COM * state transitions when a significant amount of transient idle or transient
758906SEric.Saxe@Sun.COM * work is detected.
768906SEric.Saxe@Sun.COM *
778906SEric.Saxe@Sun.COM * Kernel background activity (e.g. taskq threads) are by far the most common
788906SEric.Saxe@Sun.COM * form of transient utilization. Ungoverned in the face of this utililzation,
798906SEric.Saxe@Sun.COM * hundreds of state transitions per second would result on an idle system.
808906SEric.Saxe@Sun.COM *
818906SEric.Saxe@Sun.COM * Transient idleness is common when a thread briefly yields the CPU to
828906SEric.Saxe@Sun.COM * wait for an event elsewhere in the system. Where the idle period is short
838906SEric.Saxe@Sun.COM * enough, the overhead associated with making the state transition doesn't
848906SEric.Saxe@Sun.COM * justify the power savings.
8510797SEric.Saxe@Sun.COM *
8610797SEric.Saxe@Sun.COM * The following is the state machine for the governor implemented by
8710797SEric.Saxe@Sun.COM * cpupm_utilization_event():
8810797SEric.Saxe@Sun.COM *
8910797SEric.Saxe@Sun.COM * ----->---tw---->-----
9010797SEric.Saxe@Sun.COM * / \
9110797SEric.Saxe@Sun.COM * (I)-<-ti-<- -<-ntw-<(W)
9210797SEric.Saxe@Sun.COM * | \ / |
9310797SEric.Saxe@Sun.COM * \ \ / /
9410797SEric.Saxe@Sun.COM * >-nti/rm->(D)--->-tw->-
9510797SEric.Saxe@Sun.COM * Key:
9610797SEric.Saxe@Sun.COM *
9710797SEric.Saxe@Sun.COM * States
9810797SEric.Saxe@Sun.COM * - (D): Default (ungoverned)
9910797SEric.Saxe@Sun.COM * - (W): Transient work governed
10010797SEric.Saxe@Sun.COM * - (I): Transient idle governed
10110797SEric.Saxe@Sun.COM * State Transitions
10210797SEric.Saxe@Sun.COM * - tw: transient work
10310797SEric.Saxe@Sun.COM * - ti: transient idleness
10410797SEric.Saxe@Sun.COM * - ntw: non-transient work
10510797SEric.Saxe@Sun.COM * - nti: non-transient idleness
10610797SEric.Saxe@Sun.COM * - rm: thread remain event
1078906SEric.Saxe@Sun.COM */
1088906SEric.Saxe@Sun.COM
1098906SEric.Saxe@Sun.COM static cpupm_domain_t *cpupm_domains = NULL;
1108906SEric.Saxe@Sun.COM
1118906SEric.Saxe@Sun.COM /*
1128906SEric.Saxe@Sun.COM * Uninitialized state of CPU power management is disabled
1138906SEric.Saxe@Sun.COM */
1148906SEric.Saxe@Sun.COM cpupm_policy_t cpupm_policy = CPUPM_POLICY_DISABLED;
1158906SEric.Saxe@Sun.COM
1168906SEric.Saxe@Sun.COM /*
1178906SEric.Saxe@Sun.COM * Periods of utilization lasting less than this time interval are characterized
1188906SEric.Saxe@Sun.COM * as transient. State changes associated with transient work are considered
1198906SEric.Saxe@Sun.COM * to be mispredicted. That is, it's not worth raising and lower power states
1208906SEric.Saxe@Sun.COM * where the utilization lasts for less than this interval.
1218906SEric.Saxe@Sun.COM */
1228906SEric.Saxe@Sun.COM hrtime_t cpupm_tw_predict_interval;
1238906SEric.Saxe@Sun.COM
1248906SEric.Saxe@Sun.COM /*
1258906SEric.Saxe@Sun.COM * Periods of idleness lasting less than this time interval are characterized
1268906SEric.Saxe@Sun.COM * as transient. State changes associated with transient idle are considered
1278906SEric.Saxe@Sun.COM * to be mispredicted. That is, it's not worth lowering and raising power
1288906SEric.Saxe@Sun.COM * states where the idleness lasts for less than this interval.
1298906SEric.Saxe@Sun.COM */
1308906SEric.Saxe@Sun.COM hrtime_t cpupm_ti_predict_interval;
1318906SEric.Saxe@Sun.COM
1328906SEric.Saxe@Sun.COM /*
1338906SEric.Saxe@Sun.COM * Number of mispredictions after which future transitions will be governed.
1348906SEric.Saxe@Sun.COM */
13510797SEric.Saxe@Sun.COM int cpupm_mispredict_thresh = 4;
1368906SEric.Saxe@Sun.COM
1378906SEric.Saxe@Sun.COM /*
1388906SEric.Saxe@Sun.COM * Likewise, the number of mispredicted governed transitions after which the
1398906SEric.Saxe@Sun.COM * governor will be removed.
1408906SEric.Saxe@Sun.COM */
14110797SEric.Saxe@Sun.COM int cpupm_mispredict_gov_thresh = 4;
1428906SEric.Saxe@Sun.COM
1438906SEric.Saxe@Sun.COM /*
14410797SEric.Saxe@Sun.COM * The transient work and transient idle prediction intervals are specified
14510797SEric.Saxe@Sun.COM * here. Tuning them higher will result in the transient work, and transient
14610797SEric.Saxe@Sun.COM * idle governors being used more aggresively, which limits the frequency of
14710797SEric.Saxe@Sun.COM * state transitions at the expense of performance and power savings,
14810797SEric.Saxe@Sun.COM * respectively. The intervals are specified in nanoseconds.
14910797SEric.Saxe@Sun.COM */
15010797SEric.Saxe@Sun.COM /*
15110797SEric.Saxe@Sun.COM * 400 usec
1528906SEric.Saxe@Sun.COM */
15310797SEric.Saxe@Sun.COM #define CPUPM_DEFAULT_TI_INTERVAL 400000
1548906SEric.Saxe@Sun.COM /*
15510797SEric.Saxe@Sun.COM * 400 usec
1568906SEric.Saxe@Sun.COM */
15710797SEric.Saxe@Sun.COM #define CPUPM_DEFAULT_TW_INTERVAL 400000
1588906SEric.Saxe@Sun.COM
15910797SEric.Saxe@Sun.COM hrtime_t cpupm_ti_gov_interval = CPUPM_DEFAULT_TI_INTERVAL;
16010797SEric.Saxe@Sun.COM hrtime_t cpupm_tw_gov_interval = CPUPM_DEFAULT_TW_INTERVAL;
1618906SEric.Saxe@Sun.COM
1628906SEric.Saxe@Sun.COM
16310797SEric.Saxe@Sun.COM static void cpupm_governor_initialize(void);
1648906SEric.Saxe@Sun.COM static void cpupm_state_change_global(cpupm_dtype_t, cpupm_state_name_t);
1658906SEric.Saxe@Sun.COM
1668906SEric.Saxe@Sun.COM cpupm_policy_t
cpupm_get_policy(void)1678906SEric.Saxe@Sun.COM cpupm_get_policy(void)
1688906SEric.Saxe@Sun.COM {
1698906SEric.Saxe@Sun.COM return (cpupm_policy);
1708906SEric.Saxe@Sun.COM }
1718906SEric.Saxe@Sun.COM
1728906SEric.Saxe@Sun.COM int
cpupm_set_policy(cpupm_policy_t new_policy)1738906SEric.Saxe@Sun.COM cpupm_set_policy(cpupm_policy_t new_policy)
1748906SEric.Saxe@Sun.COM {
1758906SEric.Saxe@Sun.COM static int gov_init = 0;
1768906SEric.Saxe@Sun.COM int result = 0;
1778906SEric.Saxe@Sun.COM
1788906SEric.Saxe@Sun.COM mutex_enter(&cpu_lock);
1798906SEric.Saxe@Sun.COM if (new_policy == cpupm_policy) {
1808906SEric.Saxe@Sun.COM mutex_exit(&cpu_lock);
1818906SEric.Saxe@Sun.COM return (result);
1828906SEric.Saxe@Sun.COM }
1838906SEric.Saxe@Sun.COM
1848906SEric.Saxe@Sun.COM /*
1858906SEric.Saxe@Sun.COM * Pausing CPUs causes a high priority thread to be scheduled
1868906SEric.Saxe@Sun.COM * on all other CPUs (besides the current one). This locks out
1878906SEric.Saxe@Sun.COM * other CPUs from making CPUPM state transitions.
1888906SEric.Saxe@Sun.COM */
1898906SEric.Saxe@Sun.COM switch (new_policy) {
1908906SEric.Saxe@Sun.COM case CPUPM_POLICY_DISABLED:
1918906SEric.Saxe@Sun.COM pause_cpus(NULL);
1928906SEric.Saxe@Sun.COM cpupm_policy = CPUPM_POLICY_DISABLED;
1938906SEric.Saxe@Sun.COM start_cpus();
1948906SEric.Saxe@Sun.COM
1958906SEric.Saxe@Sun.COM result = cmt_pad_disable(PGHW_POW_ACTIVE);
1968906SEric.Saxe@Sun.COM
1978906SEric.Saxe@Sun.COM /*
1988906SEric.Saxe@Sun.COM * Once PAD has been enabled, it should always be possible
1998906SEric.Saxe@Sun.COM * to disable it.
2008906SEric.Saxe@Sun.COM */
2018906SEric.Saxe@Sun.COM ASSERT(result == 0);
2028906SEric.Saxe@Sun.COM
2038906SEric.Saxe@Sun.COM /*
2048906SEric.Saxe@Sun.COM * Bring all the active power domains to the maximum
2058906SEric.Saxe@Sun.COM * performance state.
2068906SEric.Saxe@Sun.COM */
2078906SEric.Saxe@Sun.COM cpupm_state_change_global(CPUPM_DTYPE_ACTIVE,
2088906SEric.Saxe@Sun.COM CPUPM_STATE_MAX_PERF);
2098906SEric.Saxe@Sun.COM
2108906SEric.Saxe@Sun.COM break;
2118906SEric.Saxe@Sun.COM case CPUPM_POLICY_ELASTIC:
2128906SEric.Saxe@Sun.COM
2138906SEric.Saxe@Sun.COM result = cmt_pad_enable(PGHW_POW_ACTIVE);
2148906SEric.Saxe@Sun.COM if (result < 0) {
2158906SEric.Saxe@Sun.COM /*
2168906SEric.Saxe@Sun.COM * Failed to enable PAD across the active power
2178906SEric.Saxe@Sun.COM * domains, which may well be because none were
2188906SEric.Saxe@Sun.COM * enumerated.
2198906SEric.Saxe@Sun.COM */
2208906SEric.Saxe@Sun.COM break;
2218906SEric.Saxe@Sun.COM }
2228906SEric.Saxe@Sun.COM
2238906SEric.Saxe@Sun.COM /*
22410797SEric.Saxe@Sun.COM * Initialize the governor parameters the first time through.
2258906SEric.Saxe@Sun.COM */
2268906SEric.Saxe@Sun.COM if (gov_init == 0) {
22710797SEric.Saxe@Sun.COM cpupm_governor_initialize();
22810797SEric.Saxe@Sun.COM gov_init = 1;
2298906SEric.Saxe@Sun.COM }
23010797SEric.Saxe@Sun.COM
23110797SEric.Saxe@Sun.COM pause_cpus(NULL);
2328906SEric.Saxe@Sun.COM cpupm_policy = CPUPM_POLICY_ELASTIC;
2338906SEric.Saxe@Sun.COM start_cpus();
2348906SEric.Saxe@Sun.COM
2358906SEric.Saxe@Sun.COM break;
2368906SEric.Saxe@Sun.COM default:
2378906SEric.Saxe@Sun.COM cmn_err(CE_WARN, "Attempt to set unknown CPUPM policy %d\n",
2388906SEric.Saxe@Sun.COM new_policy);
2398906SEric.Saxe@Sun.COM ASSERT(0);
2408906SEric.Saxe@Sun.COM break;
2418906SEric.Saxe@Sun.COM }
2428906SEric.Saxe@Sun.COM mutex_exit(&cpu_lock);
2438906SEric.Saxe@Sun.COM
2448906SEric.Saxe@Sun.COM return (result);
2458906SEric.Saxe@Sun.COM }
2468906SEric.Saxe@Sun.COM
2478906SEric.Saxe@Sun.COM /*
2488906SEric.Saxe@Sun.COM * Look for an existing power domain
2498906SEric.Saxe@Sun.COM */
2508906SEric.Saxe@Sun.COM static cpupm_domain_t *
cpupm_domain_find(id_t id,cpupm_dtype_t type)2518906SEric.Saxe@Sun.COM cpupm_domain_find(id_t id, cpupm_dtype_t type)
2528906SEric.Saxe@Sun.COM {
2538906SEric.Saxe@Sun.COM ASSERT(MUTEX_HELD(&cpu_lock));
2548906SEric.Saxe@Sun.COM
2558906SEric.Saxe@Sun.COM cpupm_domain_t *dom;
2568906SEric.Saxe@Sun.COM
2578906SEric.Saxe@Sun.COM dom = cpupm_domains;
2588906SEric.Saxe@Sun.COM while (dom != NULL) {
2598906SEric.Saxe@Sun.COM if (id == dom->cpd_id && type == dom->cpd_type)
2608906SEric.Saxe@Sun.COM return (dom);
2618906SEric.Saxe@Sun.COM dom = dom->cpd_next;
2628906SEric.Saxe@Sun.COM }
2638906SEric.Saxe@Sun.COM return (NULL);
2648906SEric.Saxe@Sun.COM }
2658906SEric.Saxe@Sun.COM
2668906SEric.Saxe@Sun.COM /*
2678906SEric.Saxe@Sun.COM * Create a new domain
2688906SEric.Saxe@Sun.COM */
2698906SEric.Saxe@Sun.COM static cpupm_domain_t *
cpupm_domain_create(id_t id,cpupm_dtype_t type)2708906SEric.Saxe@Sun.COM cpupm_domain_create(id_t id, cpupm_dtype_t type)
2718906SEric.Saxe@Sun.COM {
2728906SEric.Saxe@Sun.COM cpupm_domain_t *dom;
2738906SEric.Saxe@Sun.COM
2748906SEric.Saxe@Sun.COM ASSERT(MUTEX_HELD(&cpu_lock));
2758906SEric.Saxe@Sun.COM
2768906SEric.Saxe@Sun.COM dom = kmem_zalloc(sizeof (cpupm_domain_t), KM_SLEEP);
2778906SEric.Saxe@Sun.COM dom->cpd_id = id;
2788906SEric.Saxe@Sun.COM dom->cpd_type = type;
2798906SEric.Saxe@Sun.COM
2808906SEric.Saxe@Sun.COM /* Link into the known domain list */
2818906SEric.Saxe@Sun.COM dom->cpd_next = cpupm_domains;
2828906SEric.Saxe@Sun.COM cpupm_domains = dom;
2838906SEric.Saxe@Sun.COM
2848906SEric.Saxe@Sun.COM return (dom);
2858906SEric.Saxe@Sun.COM }
2868906SEric.Saxe@Sun.COM
2878906SEric.Saxe@Sun.COM static void
cpupm_domain_state_enum(struct cpu * cp,cpupm_domain_t * dom)2888906SEric.Saxe@Sun.COM cpupm_domain_state_enum(struct cpu *cp, cpupm_domain_t *dom)
2898906SEric.Saxe@Sun.COM {
2908906SEric.Saxe@Sun.COM /*
2918906SEric.Saxe@Sun.COM * In the envent we're enumerating because the domain's state
2928906SEric.Saxe@Sun.COM * configuration has changed, toss any existing states.
2938906SEric.Saxe@Sun.COM */
2948906SEric.Saxe@Sun.COM if (dom->cpd_nstates > 0) {
2958906SEric.Saxe@Sun.COM kmem_free(dom->cpd_states,
2968906SEric.Saxe@Sun.COM sizeof (cpupm_state_t) * dom->cpd_nstates);
2978906SEric.Saxe@Sun.COM dom->cpd_nstates = 0;
2988906SEric.Saxe@Sun.COM }
2998906SEric.Saxe@Sun.COM
3008906SEric.Saxe@Sun.COM /*
3018906SEric.Saxe@Sun.COM * Query to determine the number of states, allocate storage
3028906SEric.Saxe@Sun.COM * large enough to hold the state information, and pass it back
3038906SEric.Saxe@Sun.COM * to the platform driver to complete the enumeration.
3048906SEric.Saxe@Sun.COM */
3058906SEric.Saxe@Sun.COM dom->cpd_nstates = cpupm_plat_state_enumerate(cp, dom->cpd_type, NULL);
3068906SEric.Saxe@Sun.COM
3078906SEric.Saxe@Sun.COM if (dom->cpd_nstates == 0)
3088906SEric.Saxe@Sun.COM return;
3098906SEric.Saxe@Sun.COM
3108906SEric.Saxe@Sun.COM dom->cpd_states =
3118906SEric.Saxe@Sun.COM kmem_zalloc(dom->cpd_nstates * sizeof (cpupm_state_t), KM_SLEEP);
3128906SEric.Saxe@Sun.COM (void) cpupm_plat_state_enumerate(cp, dom->cpd_type, dom->cpd_states);
3138906SEric.Saxe@Sun.COM }
3148906SEric.Saxe@Sun.COM
3158906SEric.Saxe@Sun.COM /*
3168906SEric.Saxe@Sun.COM * Initialize the specified type of power domain on behalf of the CPU
3178906SEric.Saxe@Sun.COM */
3188906SEric.Saxe@Sun.COM cpupm_domain_t *
cpupm_domain_init(struct cpu * cp,cpupm_dtype_t type)3198906SEric.Saxe@Sun.COM cpupm_domain_init(struct cpu *cp, cpupm_dtype_t type)
3208906SEric.Saxe@Sun.COM {
3218906SEric.Saxe@Sun.COM cpupm_domain_t *dom;
3228906SEric.Saxe@Sun.COM id_t did;
3238906SEric.Saxe@Sun.COM
3248906SEric.Saxe@Sun.COM ASSERT(MUTEX_HELD(&cpu_lock));
3258906SEric.Saxe@Sun.COM
3268906SEric.Saxe@Sun.COM /*
3278906SEric.Saxe@Sun.COM * Instantiate the domain if it doesn't already exist
3288906SEric.Saxe@Sun.COM * and enumerate its power states.
3298906SEric.Saxe@Sun.COM */
3308906SEric.Saxe@Sun.COM did = cpupm_domain_id(cp, type);
3318906SEric.Saxe@Sun.COM dom = cpupm_domain_find(did, type);
3328906SEric.Saxe@Sun.COM if (dom == NULL) {
3338906SEric.Saxe@Sun.COM dom = cpupm_domain_create(did, type);
3348906SEric.Saxe@Sun.COM cpupm_domain_state_enum(cp, dom);
3358906SEric.Saxe@Sun.COM }
3368906SEric.Saxe@Sun.COM
3378906SEric.Saxe@Sun.COM /*
3388906SEric.Saxe@Sun.COM * Named state initialization
3398906SEric.Saxe@Sun.COM */
3408906SEric.Saxe@Sun.COM if (type == CPUPM_DTYPE_ACTIVE) {
3418906SEric.Saxe@Sun.COM /*
3428906SEric.Saxe@Sun.COM * For active power domains, the highest performance
3438906SEric.Saxe@Sun.COM * state is defined as first state returned from
3448906SEric.Saxe@Sun.COM * the domain enumeration.
3458906SEric.Saxe@Sun.COM */
3468906SEric.Saxe@Sun.COM dom->cpd_named_states[CPUPM_STATE_MAX_PERF] =
3478906SEric.Saxe@Sun.COM &dom->cpd_states[0];
3488906SEric.Saxe@Sun.COM dom->cpd_named_states[CPUPM_STATE_LOW_POWER] =
3498906SEric.Saxe@Sun.COM &dom->cpd_states[dom->cpd_nstates - 1];
3508906SEric.Saxe@Sun.COM
3518906SEric.Saxe@Sun.COM /*
3528906SEric.Saxe@Sun.COM * Begin by assuming CPU is running at the max perf state.
3538906SEric.Saxe@Sun.COM */
3548906SEric.Saxe@Sun.COM dom->cpd_state = dom->cpd_named_states[CPUPM_STATE_MAX_PERF];
3558906SEric.Saxe@Sun.COM }
3568906SEric.Saxe@Sun.COM
3578906SEric.Saxe@Sun.COM return (dom);
3588906SEric.Saxe@Sun.COM }
3598906SEric.Saxe@Sun.COM
3608906SEric.Saxe@Sun.COM /*
3618906SEric.Saxe@Sun.COM * Return the id associated with the given type of domain
3628906SEric.Saxe@Sun.COM * to which cp belongs
3638906SEric.Saxe@Sun.COM */
3648906SEric.Saxe@Sun.COM id_t
cpupm_domain_id(struct cpu * cp,cpupm_dtype_t type)3658906SEric.Saxe@Sun.COM cpupm_domain_id(struct cpu *cp, cpupm_dtype_t type)
3668906SEric.Saxe@Sun.COM {
3678906SEric.Saxe@Sun.COM return (cpupm_plat_domain_id(cp, type));
3688906SEric.Saxe@Sun.COM }
3698906SEric.Saxe@Sun.COM
3708906SEric.Saxe@Sun.COM /*
3718906SEric.Saxe@Sun.COM * Initiate a state change for the specified domain on behalf of cp
3728906SEric.Saxe@Sun.COM */
3738906SEric.Saxe@Sun.COM int
cpupm_change_state(struct cpu * cp,cpupm_domain_t * dom,cpupm_state_t * state)3748906SEric.Saxe@Sun.COM cpupm_change_state(struct cpu *cp, cpupm_domain_t *dom, cpupm_state_t *state)
3758906SEric.Saxe@Sun.COM {
3768906SEric.Saxe@Sun.COM if (cpupm_plat_change_state(cp, state) < 0)
3778906SEric.Saxe@Sun.COM return (-1);
3788906SEric.Saxe@Sun.COM
3798906SEric.Saxe@Sun.COM DTRACE_PROBE2(cpupm__change__state,
3808906SEric.Saxe@Sun.COM cpupm_domain_t *, dom,
3818906SEric.Saxe@Sun.COM cpupm_state_t *, state);
3828906SEric.Saxe@Sun.COM
3838906SEric.Saxe@Sun.COM dom->cpd_state = state;
3848906SEric.Saxe@Sun.COM return (0);
3858906SEric.Saxe@Sun.COM }
3868906SEric.Saxe@Sun.COM
3878906SEric.Saxe@Sun.COM /*
3888906SEric.Saxe@Sun.COM * Interface into the CPU power manager to indicate a significant change
3898906SEric.Saxe@Sun.COM * in utilization of the specified active power domain
3908906SEric.Saxe@Sun.COM */
3918906SEric.Saxe@Sun.COM void
cpupm_utilization_event(struct cpu * cp,hrtime_t now,cpupm_domain_t * dom,cpupm_util_event_t event)3928906SEric.Saxe@Sun.COM cpupm_utilization_event(struct cpu *cp, hrtime_t now, cpupm_domain_t *dom,
3938906SEric.Saxe@Sun.COM cpupm_util_event_t event)
3948906SEric.Saxe@Sun.COM {
3958906SEric.Saxe@Sun.COM cpupm_state_t *new_state = NULL;
3968906SEric.Saxe@Sun.COM hrtime_t last;
3978906SEric.Saxe@Sun.COM
3988906SEric.Saxe@Sun.COM if (cpupm_policy == CPUPM_POLICY_DISABLED) {
3998906SEric.Saxe@Sun.COM return;
4008906SEric.Saxe@Sun.COM }
4018906SEric.Saxe@Sun.COM
4028906SEric.Saxe@Sun.COM /*
4038906SEric.Saxe@Sun.COM * What follows is a simple elastic power state management policy.
4048906SEric.Saxe@Sun.COM *
4058906SEric.Saxe@Sun.COM * If the utilization has become non-zero, and the domain was
4068906SEric.Saxe@Sun.COM * previously at it's lowest power state, then transition it
4078906SEric.Saxe@Sun.COM * to the highest state in the spirit of "race to idle".
4088906SEric.Saxe@Sun.COM *
4098906SEric.Saxe@Sun.COM * If the utilization has dropped to zero, then transition the
4108906SEric.Saxe@Sun.COM * domain to its lowest power state.
4118906SEric.Saxe@Sun.COM *
41210797SEric.Saxe@Sun.COM * Statistics are maintained to implement a governor to reduce state
4138906SEric.Saxe@Sun.COM * transitions resulting from either transient work, or periods of
4148906SEric.Saxe@Sun.COM * transient idleness on the domain.
4158906SEric.Saxe@Sun.COM */
4168906SEric.Saxe@Sun.COM switch (event) {
4178906SEric.Saxe@Sun.COM case CPUPM_DOM_REMAIN_BUSY:
4188906SEric.Saxe@Sun.COM
4198906SEric.Saxe@Sun.COM /*
4208906SEric.Saxe@Sun.COM * We've received an event that the domain is running a thread
4218906SEric.Saxe@Sun.COM * that's made it to the end of it's time slice. If we are at
4228906SEric.Saxe@Sun.COM * low power, then raise it. If the transient work governor
4238906SEric.Saxe@Sun.COM * is engaged, then remove it.
4248906SEric.Saxe@Sun.COM */
4258906SEric.Saxe@Sun.COM if (dom->cpd_state ==
4268906SEric.Saxe@Sun.COM dom->cpd_named_states[CPUPM_STATE_LOW_POWER]) {
4278906SEric.Saxe@Sun.COM new_state =
4288906SEric.Saxe@Sun.COM dom->cpd_named_states[CPUPM_STATE_MAX_PERF];
42910797SEric.Saxe@Sun.COM if (dom->cpd_governor == CPUPM_GOV_TRANS_WORK) {
43010797SEric.Saxe@Sun.COM dom->cpd_governor = CPUPM_GOV_DISENGAGED;
4318906SEric.Saxe@Sun.COM dom->cpd_tw = 0;
4328906SEric.Saxe@Sun.COM }
4338906SEric.Saxe@Sun.COM }
4348906SEric.Saxe@Sun.COM break;
4358906SEric.Saxe@Sun.COM
4368906SEric.Saxe@Sun.COM case CPUPM_DOM_BUSY_FROM_IDLE:
4378906SEric.Saxe@Sun.COM last = dom->cpd_last_lower;
4388906SEric.Saxe@Sun.COM dom->cpd_last_raise = now;
4398906SEric.Saxe@Sun.COM
4408906SEric.Saxe@Sun.COM DTRACE_PROBE3(cpupm__raise__req,
4418906SEric.Saxe@Sun.COM cpupm_domain_t *, dom,
4428906SEric.Saxe@Sun.COM hrtime_t, last,
4438906SEric.Saxe@Sun.COM hrtime_t, now);
4448906SEric.Saxe@Sun.COM
4458906SEric.Saxe@Sun.COM if (dom->cpd_state ==
4468906SEric.Saxe@Sun.COM dom->cpd_named_states[CPUPM_STATE_LOW_POWER]) {
4478906SEric.Saxe@Sun.COM
4488906SEric.Saxe@Sun.COM /*
4498906SEric.Saxe@Sun.COM * There's non-zero utilization, and the domain is
4508906SEric.Saxe@Sun.COM * running in the lower power state. Before we
45110797SEric.Saxe@Sun.COM * consider raising power, check if the preceeding
45210797SEric.Saxe@Sun.COM * idle period was transient in duration.
45310797SEric.Saxe@Sun.COM *
45410797SEric.Saxe@Sun.COM * If the domain is already transient work governed,
45510797SEric.Saxe@Sun.COM * then we don't bother maintaining transient idle
45610797SEric.Saxe@Sun.COM * statistics, as the presence of enough transient work
45710797SEric.Saxe@Sun.COM * can also make the domain frequently transiently idle.
45810797SEric.Saxe@Sun.COM * In this case, we still want to remain transient work
45910797SEric.Saxe@Sun.COM * governed.
4608906SEric.Saxe@Sun.COM */
46110797SEric.Saxe@Sun.COM if (dom->cpd_governor == CPUPM_GOV_DISENGAGED) {
4628906SEric.Saxe@Sun.COM if ((now - last) < cpupm_ti_predict_interval) {
4638906SEric.Saxe@Sun.COM /*
4648906SEric.Saxe@Sun.COM * We're raising the domain power and
4658906SEric.Saxe@Sun.COM * we *just* lowered it. Consider
4668906SEric.Saxe@Sun.COM * this a mispredicted power state
4678906SEric.Saxe@Sun.COM * transition due to a transient
4688906SEric.Saxe@Sun.COM * idle period.
4698906SEric.Saxe@Sun.COM */
47010797SEric.Saxe@Sun.COM if (++dom->cpd_ti >=
4718906SEric.Saxe@Sun.COM cpupm_mispredict_thresh) {
4728906SEric.Saxe@Sun.COM /*
4738906SEric.Saxe@Sun.COM * There's enough transient
4748906SEric.Saxe@Sun.COM * idle transitions to
4758906SEric.Saxe@Sun.COM * justify governing future
4768906SEric.Saxe@Sun.COM * lowering requests.
4778906SEric.Saxe@Sun.COM */
47810797SEric.Saxe@Sun.COM dom->cpd_governor =
47910797SEric.Saxe@Sun.COM CPUPM_GOV_TRANS_IDLE;
4808906SEric.Saxe@Sun.COM dom->cpd_ti = 0;
4818906SEric.Saxe@Sun.COM DTRACE_PROBE1(
4828906SEric.Saxe@Sun.COM cpupm__ti__governed,
4838906SEric.Saxe@Sun.COM cpupm_domain_t *, dom);
4848906SEric.Saxe@Sun.COM }
4858906SEric.Saxe@Sun.COM } else {
4868906SEric.Saxe@Sun.COM /*
4878906SEric.Saxe@Sun.COM * We correctly predicted the last
4888906SEric.Saxe@Sun.COM * lowering.
4898906SEric.Saxe@Sun.COM */
4908906SEric.Saxe@Sun.COM dom->cpd_ti = 0;
4918906SEric.Saxe@Sun.COM }
4928906SEric.Saxe@Sun.COM }
49310797SEric.Saxe@Sun.COM if (dom->cpd_governor == CPUPM_GOV_TRANS_WORK) {
4948906SEric.Saxe@Sun.COM /*
4958906SEric.Saxe@Sun.COM * Raise requests are governed due to
4968906SEric.Saxe@Sun.COM * transient work.
4978906SEric.Saxe@Sun.COM */
4988906SEric.Saxe@Sun.COM DTRACE_PROBE1(cpupm__raise__governed,
4998906SEric.Saxe@Sun.COM cpupm_domain_t *, dom);
5008906SEric.Saxe@Sun.COM
5018906SEric.Saxe@Sun.COM return;
5028906SEric.Saxe@Sun.COM }
5038906SEric.Saxe@Sun.COM /*
5048906SEric.Saxe@Sun.COM * Prepare to transition to the higher power state
5058906SEric.Saxe@Sun.COM */
5068906SEric.Saxe@Sun.COM new_state = dom->cpd_named_states[CPUPM_STATE_MAX_PERF];
5078906SEric.Saxe@Sun.COM
5088906SEric.Saxe@Sun.COM } else if (dom->cpd_state ==
5098906SEric.Saxe@Sun.COM dom->cpd_named_states[CPUPM_STATE_MAX_PERF]) {
5108906SEric.Saxe@Sun.COM
5118906SEric.Saxe@Sun.COM /*
5128906SEric.Saxe@Sun.COM * Utilization is non-zero, and we're already running
5138906SEric.Saxe@Sun.COM * in the higher power state. Take this opportunity to
5148906SEric.Saxe@Sun.COM * perform some book keeping if the last lowering
5158906SEric.Saxe@Sun.COM * request was governed.
5168906SEric.Saxe@Sun.COM */
51710797SEric.Saxe@Sun.COM if (dom->cpd_governor == CPUPM_GOV_TRANS_IDLE) {
51810797SEric.Saxe@Sun.COM
5198906SEric.Saxe@Sun.COM if ((now - last) >= cpupm_ti_predict_interval) {
5208906SEric.Saxe@Sun.COM /*
5218906SEric.Saxe@Sun.COM * The domain is transient idle
5228906SEric.Saxe@Sun.COM * governed, and we mispredicted
5238906SEric.Saxe@Sun.COM * governing the last lowering request.
5248906SEric.Saxe@Sun.COM */
5258906SEric.Saxe@Sun.COM if (++dom->cpd_ti >=
5268906SEric.Saxe@Sun.COM cpupm_mispredict_gov_thresh) {
5278906SEric.Saxe@Sun.COM /*
5288906SEric.Saxe@Sun.COM * There's enough non-transient
5298906SEric.Saxe@Sun.COM * idle periods to justify
5308906SEric.Saxe@Sun.COM * removing the governor.
5318906SEric.Saxe@Sun.COM */
53210797SEric.Saxe@Sun.COM dom->cpd_governor =
53310797SEric.Saxe@Sun.COM CPUPM_GOV_DISENGAGED;
5348906SEric.Saxe@Sun.COM dom->cpd_ti = 0;
5358906SEric.Saxe@Sun.COM DTRACE_PROBE1(
5368906SEric.Saxe@Sun.COM cpupm__ti__ungoverned,
5378906SEric.Saxe@Sun.COM cpupm_domain_t *, dom);
5388906SEric.Saxe@Sun.COM }
5398906SEric.Saxe@Sun.COM } else {
5408906SEric.Saxe@Sun.COM /*
5418906SEric.Saxe@Sun.COM * Correctly predicted governing the
5428906SEric.Saxe@Sun.COM * last lowering request.
5438906SEric.Saxe@Sun.COM */
5448906SEric.Saxe@Sun.COM dom->cpd_ti = 0;
5458906SEric.Saxe@Sun.COM }
5468906SEric.Saxe@Sun.COM }
5478906SEric.Saxe@Sun.COM }
5488906SEric.Saxe@Sun.COM break;
5498906SEric.Saxe@Sun.COM
5508906SEric.Saxe@Sun.COM case CPUPM_DOM_IDLE_FROM_BUSY:
5518906SEric.Saxe@Sun.COM last = dom->cpd_last_raise;
5528906SEric.Saxe@Sun.COM dom->cpd_last_lower = now;
5538906SEric.Saxe@Sun.COM
5548906SEric.Saxe@Sun.COM DTRACE_PROBE3(cpupm__lower__req,
5558906SEric.Saxe@Sun.COM cpupm_domain_t *, dom,
5568906SEric.Saxe@Sun.COM hrtime_t, last,
5578906SEric.Saxe@Sun.COM hrtime_t, now);
5588906SEric.Saxe@Sun.COM
5598906SEric.Saxe@Sun.COM if (dom->cpd_state ==
5608906SEric.Saxe@Sun.COM dom->cpd_named_states[CPUPM_STATE_MAX_PERF]) {
5618906SEric.Saxe@Sun.COM
5628906SEric.Saxe@Sun.COM /*
5638906SEric.Saxe@Sun.COM * The domain is idle, and is running in the highest
5648906SEric.Saxe@Sun.COM * performance state. Before we consider lowering power,
5658906SEric.Saxe@Sun.COM * perform some book keeping for the transient work
5668906SEric.Saxe@Sun.COM * governor.
5678906SEric.Saxe@Sun.COM */
56810797SEric.Saxe@Sun.COM if (dom->cpd_governor == CPUPM_GOV_DISENGAGED) {
5698906SEric.Saxe@Sun.COM if ((now - last) < cpupm_tw_predict_interval) {
5708906SEric.Saxe@Sun.COM /*
5718906SEric.Saxe@Sun.COM * We're lowering the domain power and
5728906SEric.Saxe@Sun.COM * we *just* raised it. Consider the
5738906SEric.Saxe@Sun.COM * last raise mispredicted due to
5748906SEric.Saxe@Sun.COM * transient work.
5758906SEric.Saxe@Sun.COM */
5768906SEric.Saxe@Sun.COM if (++dom->cpd_tw >=
5778906SEric.Saxe@Sun.COM cpupm_mispredict_thresh) {
5788906SEric.Saxe@Sun.COM /*
57910797SEric.Saxe@Sun.COM * There's enough transient work
5808906SEric.Saxe@Sun.COM * transitions to justify
58110797SEric.Saxe@Sun.COM * governing future raise
5828906SEric.Saxe@Sun.COM * requests.
5838906SEric.Saxe@Sun.COM */
58410797SEric.Saxe@Sun.COM dom->cpd_governor =
58510797SEric.Saxe@Sun.COM CPUPM_GOV_TRANS_WORK;
5868906SEric.Saxe@Sun.COM dom->cpd_tw = 0;
5878906SEric.Saxe@Sun.COM DTRACE_PROBE1(
5888906SEric.Saxe@Sun.COM cpupm__tw__governed,
5898906SEric.Saxe@Sun.COM cpupm_domain_t *, dom);
5908906SEric.Saxe@Sun.COM }
5918906SEric.Saxe@Sun.COM } else {
5928906SEric.Saxe@Sun.COM /*
5938906SEric.Saxe@Sun.COM * We correctly predicted during the
5948906SEric.Saxe@Sun.COM * last raise.
5958906SEric.Saxe@Sun.COM */
5968906SEric.Saxe@Sun.COM dom->cpd_tw = 0;
5978906SEric.Saxe@Sun.COM }
5988906SEric.Saxe@Sun.COM }
59910797SEric.Saxe@Sun.COM if (dom->cpd_governor == CPUPM_GOV_TRANS_IDLE) {
6008906SEric.Saxe@Sun.COM /*
6018906SEric.Saxe@Sun.COM * Lowering requests are governed due to
6028906SEric.Saxe@Sun.COM * transient idleness.
6038906SEric.Saxe@Sun.COM */
6048906SEric.Saxe@Sun.COM DTRACE_PROBE1(cpupm__lowering__governed,
6058906SEric.Saxe@Sun.COM cpupm_domain_t *, dom);
6068906SEric.Saxe@Sun.COM
6078906SEric.Saxe@Sun.COM return;
6088906SEric.Saxe@Sun.COM }
6098906SEric.Saxe@Sun.COM
6108906SEric.Saxe@Sun.COM /*
6118906SEric.Saxe@Sun.COM * Prepare to transition to a lower power state.
6128906SEric.Saxe@Sun.COM */
6138906SEric.Saxe@Sun.COM new_state =
6148906SEric.Saxe@Sun.COM dom->cpd_named_states[CPUPM_STATE_LOW_POWER];
6158906SEric.Saxe@Sun.COM
6168906SEric.Saxe@Sun.COM } else if (dom->cpd_state ==
6178906SEric.Saxe@Sun.COM dom->cpd_named_states[CPUPM_STATE_LOW_POWER]) {
6188906SEric.Saxe@Sun.COM
6198906SEric.Saxe@Sun.COM /*
6208906SEric.Saxe@Sun.COM * The domain is idle, and we're already running in
6218906SEric.Saxe@Sun.COM * the lower power state. Take this opportunity to
6228906SEric.Saxe@Sun.COM * perform some book keeping if the last raising
6238906SEric.Saxe@Sun.COM * request was governed.
6248906SEric.Saxe@Sun.COM */
62510797SEric.Saxe@Sun.COM if (dom->cpd_governor == CPUPM_GOV_TRANS_WORK) {
6268906SEric.Saxe@Sun.COM if ((now - last) >= cpupm_tw_predict_interval) {
6278906SEric.Saxe@Sun.COM /*
6288906SEric.Saxe@Sun.COM * The domain is transient work
6298906SEric.Saxe@Sun.COM * governed, and we mispredicted
6308906SEric.Saxe@Sun.COM * governing the last raising request.
6318906SEric.Saxe@Sun.COM */
6328906SEric.Saxe@Sun.COM if (++dom->cpd_tw >=
6338906SEric.Saxe@Sun.COM cpupm_mispredict_gov_thresh) {
6348906SEric.Saxe@Sun.COM /*
6358906SEric.Saxe@Sun.COM * There's enough non-transient
6368906SEric.Saxe@Sun.COM * work to justify removing
6378906SEric.Saxe@Sun.COM * the governor.
6388906SEric.Saxe@Sun.COM */
63910797SEric.Saxe@Sun.COM dom->cpd_governor =
64010797SEric.Saxe@Sun.COM CPUPM_GOV_DISENGAGED;
6418906SEric.Saxe@Sun.COM dom->cpd_tw = 0;
6428906SEric.Saxe@Sun.COM DTRACE_PROBE1(
6438906SEric.Saxe@Sun.COM cpupm__tw__ungoverned,
6448906SEric.Saxe@Sun.COM cpupm_domain_t *, dom);
6458906SEric.Saxe@Sun.COM }
6468906SEric.Saxe@Sun.COM } else {
6478906SEric.Saxe@Sun.COM /*
6488906SEric.Saxe@Sun.COM * We correctly predicted governing
6498906SEric.Saxe@Sun.COM * the last raise.
6508906SEric.Saxe@Sun.COM */
6518906SEric.Saxe@Sun.COM dom->cpd_tw = 0;
6528906SEric.Saxe@Sun.COM }
6538906SEric.Saxe@Sun.COM }
6548906SEric.Saxe@Sun.COM }
6558906SEric.Saxe@Sun.COM break;
6568906SEric.Saxe@Sun.COM }
6578906SEric.Saxe@Sun.COM /*
6588906SEric.Saxe@Sun.COM * Change the power state
6598906SEric.Saxe@Sun.COM * Not much currently done if this doesn't succeed
6608906SEric.Saxe@Sun.COM */
6618906SEric.Saxe@Sun.COM if (new_state)
6628906SEric.Saxe@Sun.COM (void) cpupm_change_state(cp, dom, new_state);
6638906SEric.Saxe@Sun.COM }
6648906SEric.Saxe@Sun.COM
6658906SEric.Saxe@Sun.COM
6668906SEric.Saxe@Sun.COM /*
6678906SEric.Saxe@Sun.COM * Interface called by platforms to dynamically change the
6688906SEric.Saxe@Sun.COM * MAX performance cpupm state
6698906SEric.Saxe@Sun.COM */
6708906SEric.Saxe@Sun.COM void
cpupm_redefine_max_activepwr_state(struct cpu * cp,int max_perf_level)6718906SEric.Saxe@Sun.COM cpupm_redefine_max_activepwr_state(struct cpu *cp, int max_perf_level)
6728906SEric.Saxe@Sun.COM {
6738906SEric.Saxe@Sun.COM cpupm_domain_t *dom;
6748906SEric.Saxe@Sun.COM id_t did;
6758906SEric.Saxe@Sun.COM cpupm_dtype_t type = CPUPM_DTYPE_ACTIVE;
6768906SEric.Saxe@Sun.COM boolean_t change_state = B_FALSE;
6778906SEric.Saxe@Sun.COM cpupm_state_t *new_state = NULL;
6788906SEric.Saxe@Sun.COM
6798906SEric.Saxe@Sun.COM did = cpupm_domain_id(cp, type);
680*12004Sjiang.liu@intel.com if (MUTEX_HELD(&cpu_lock)) {
681*12004Sjiang.liu@intel.com dom = cpupm_domain_find(did, type);
682*12004Sjiang.liu@intel.com } else {
683*12004Sjiang.liu@intel.com mutex_enter(&cpu_lock);
684*12004Sjiang.liu@intel.com dom = cpupm_domain_find(did, type);
685*12004Sjiang.liu@intel.com mutex_exit(&cpu_lock);
686*12004Sjiang.liu@intel.com }
6878906SEric.Saxe@Sun.COM
6888906SEric.Saxe@Sun.COM /*
6898906SEric.Saxe@Sun.COM * Can use a lock to avoid changing the power state of the cpu when
6908906SEric.Saxe@Sun.COM * CPUPM_STATE_MAX_PERF is getting changed.
6918906SEric.Saxe@Sun.COM * Since the occurance of events to change MAX_PERF is not frequent,
6928906SEric.Saxe@Sun.COM * it may not be a good idea to overburden with locks. In the worst
6938906SEric.Saxe@Sun.COM * case, for one cycle the power may not get changed to the required
6948906SEric.Saxe@Sun.COM * level
6958906SEric.Saxe@Sun.COM */
6968906SEric.Saxe@Sun.COM if (dom != NULL) {
6978906SEric.Saxe@Sun.COM if (dom->cpd_state ==
6988906SEric.Saxe@Sun.COM dom->cpd_named_states[CPUPM_STATE_MAX_PERF]) {
6998906SEric.Saxe@Sun.COM change_state = B_TRUE;
7008906SEric.Saxe@Sun.COM }
7018906SEric.Saxe@Sun.COM
7028906SEric.Saxe@Sun.COM /*
7038906SEric.Saxe@Sun.COM * If an out of range level is passed, use the lowest supported
7048906SEric.Saxe@Sun.COM * speed.
7058906SEric.Saxe@Sun.COM */
7068906SEric.Saxe@Sun.COM if (max_perf_level >= dom->cpd_nstates &&
7078906SEric.Saxe@Sun.COM dom->cpd_nstates > 1) {
7088906SEric.Saxe@Sun.COM max_perf_level = dom->cpd_nstates - 1;
7098906SEric.Saxe@Sun.COM }
7108906SEric.Saxe@Sun.COM
7118906SEric.Saxe@Sun.COM dom->cpd_named_states[CPUPM_STATE_MAX_PERF] =
7128906SEric.Saxe@Sun.COM &dom->cpd_states[max_perf_level];
7138906SEric.Saxe@Sun.COM
7148906SEric.Saxe@Sun.COM /*
7158906SEric.Saxe@Sun.COM * If the current state is MAX_PERF, change the current state
7168906SEric.Saxe@Sun.COM * to the new MAX_PERF
7178906SEric.Saxe@Sun.COM */
7188906SEric.Saxe@Sun.COM if (change_state) {
7198906SEric.Saxe@Sun.COM new_state =
7208906SEric.Saxe@Sun.COM dom->cpd_named_states[CPUPM_STATE_MAX_PERF];
7218906SEric.Saxe@Sun.COM if (new_state) {
7228906SEric.Saxe@Sun.COM (void) cpupm_change_state(cp, dom, new_state);
7238906SEric.Saxe@Sun.COM }
7248906SEric.Saxe@Sun.COM }
7258906SEric.Saxe@Sun.COM }
7268906SEric.Saxe@Sun.COM }
7278906SEric.Saxe@Sun.COM
7288906SEric.Saxe@Sun.COM /*
72910797SEric.Saxe@Sun.COM * Initialize the parameters for the transience governor state machine
7308906SEric.Saxe@Sun.COM */
73110797SEric.Saxe@Sun.COM static void
cpupm_governor_initialize(void)7328906SEric.Saxe@Sun.COM cpupm_governor_initialize(void)
7338906SEric.Saxe@Sun.COM {
7348906SEric.Saxe@Sun.COM /*
73510797SEric.Saxe@Sun.COM * The default prediction intervals are specified in nanoseconds.
73610797SEric.Saxe@Sun.COM * Convert these to the equivalent in unscaled hrtime, which is the
73710797SEric.Saxe@Sun.COM * format of the timestamps passed to cpupm_utilization_event()
7388906SEric.Saxe@Sun.COM */
73910797SEric.Saxe@Sun.COM cpupm_ti_predict_interval = unscalehrtime(cpupm_ti_gov_interval);
74010797SEric.Saxe@Sun.COM cpupm_tw_predict_interval = unscalehrtime(cpupm_tw_gov_interval);
7418906SEric.Saxe@Sun.COM }
7428906SEric.Saxe@Sun.COM
7438906SEric.Saxe@Sun.COM /*
7448906SEric.Saxe@Sun.COM * Initiate a state change in all CPUPM domain instances of the specified type
7458906SEric.Saxe@Sun.COM */
7468906SEric.Saxe@Sun.COM static void
cpupm_state_change_global(cpupm_dtype_t type,cpupm_state_name_t state)7478906SEric.Saxe@Sun.COM cpupm_state_change_global(cpupm_dtype_t type, cpupm_state_name_t state)
7488906SEric.Saxe@Sun.COM {
7498906SEric.Saxe@Sun.COM cpu_t *cp;
7508906SEric.Saxe@Sun.COM pg_cmt_t *pwr_pg;
7518906SEric.Saxe@Sun.COM cpupm_domain_t *dom;
7528906SEric.Saxe@Sun.COM group_t *hwset;
7538906SEric.Saxe@Sun.COM group_iter_t giter;
7548906SEric.Saxe@Sun.COM pg_cpu_itr_t cpu_iter;
7558906SEric.Saxe@Sun.COM pghw_type_t hw;
7568906SEric.Saxe@Sun.COM
7578906SEric.Saxe@Sun.COM ASSERT(MUTEX_HELD(&cpu_lock));
7588906SEric.Saxe@Sun.COM
7598906SEric.Saxe@Sun.COM switch (type) {
7608906SEric.Saxe@Sun.COM case CPUPM_DTYPE_ACTIVE:
7618906SEric.Saxe@Sun.COM hw = PGHW_POW_ACTIVE;
7628906SEric.Saxe@Sun.COM break;
7638906SEric.Saxe@Sun.COM default:
7648906SEric.Saxe@Sun.COM /*
7658906SEric.Saxe@Sun.COM * Power domain types other than "active" unsupported.
7668906SEric.Saxe@Sun.COM */
7678906SEric.Saxe@Sun.COM ASSERT(type == CPUPM_DTYPE_ACTIVE);
7688906SEric.Saxe@Sun.COM return;
7698906SEric.Saxe@Sun.COM }
7708906SEric.Saxe@Sun.COM
7718906SEric.Saxe@Sun.COM if ((hwset = pghw_set_lookup(hw)) == NULL)
7728906SEric.Saxe@Sun.COM return;
7738906SEric.Saxe@Sun.COM
7748906SEric.Saxe@Sun.COM /*
7758906SEric.Saxe@Sun.COM * Iterate over the power domains
7768906SEric.Saxe@Sun.COM */
7778906SEric.Saxe@Sun.COM group_iter_init(&giter);
7788906SEric.Saxe@Sun.COM while ((pwr_pg = group_iterate(hwset, &giter)) != NULL) {
7798906SEric.Saxe@Sun.COM
7808906SEric.Saxe@Sun.COM dom = (cpupm_domain_t *)pwr_pg->cmt_pg.pghw_handle;
7818906SEric.Saxe@Sun.COM
7828906SEric.Saxe@Sun.COM /*
7838906SEric.Saxe@Sun.COM * Iterate over the CPUs in each domain
7848906SEric.Saxe@Sun.COM */
7858906SEric.Saxe@Sun.COM PG_CPU_ITR_INIT(pwr_pg, cpu_iter);
7868906SEric.Saxe@Sun.COM while ((cp = pg_cpu_next(&cpu_iter)) != NULL) {
7878906SEric.Saxe@Sun.COM (void) cpupm_change_state(cp, dom,
7888906SEric.Saxe@Sun.COM dom->cpd_named_states[state]);
7898906SEric.Saxe@Sun.COM }
7908906SEric.Saxe@Sun.COM }
7918906SEric.Saxe@Sun.COM }
792