18906SEric.Saxe@Sun.COM /* 28906SEric.Saxe@Sun.COM * CDDL HEADER START 38906SEric.Saxe@Sun.COM * 48906SEric.Saxe@Sun.COM * The contents of this file are subject to the terms of the 58906SEric.Saxe@Sun.COM * Common Development and Distribution License (the "License"). 68906SEric.Saxe@Sun.COM * You may not use this file except in compliance with the License. 78906SEric.Saxe@Sun.COM * 88906SEric.Saxe@Sun.COM * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 98906SEric.Saxe@Sun.COM * or http://www.opensolaris.org/os/licensing. 108906SEric.Saxe@Sun.COM * See the License for the specific language governing permissions 118906SEric.Saxe@Sun.COM * and limitations under the License. 128906SEric.Saxe@Sun.COM * 138906SEric.Saxe@Sun.COM * When distributing Covered Code, include this CDDL HEADER in each 148906SEric.Saxe@Sun.COM * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 158906SEric.Saxe@Sun.COM * If applicable, add the following below this CDDL HEADER, with the 168906SEric.Saxe@Sun.COM * fields enclosed by brackets "[]" replaced with your own identifying 178906SEric.Saxe@Sun.COM * information: Portions Copyright [yyyy] [name of copyright owner] 188906SEric.Saxe@Sun.COM * 198906SEric.Saxe@Sun.COM * CDDL HEADER END 208906SEric.Saxe@Sun.COM */ 218906SEric.Saxe@Sun.COM /* 228906SEric.Saxe@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 238906SEric.Saxe@Sun.COM * Use is subject to license terms. 248906SEric.Saxe@Sun.COM */ 258906SEric.Saxe@Sun.COM 268906SEric.Saxe@Sun.COM #include <sys/cpu_pm.h> 278906SEric.Saxe@Sun.COM #include <sys/cmn_err.h> 28*10797SEric.Saxe@Sun.COM #include <sys/time.h> 298906SEric.Saxe@Sun.COM #include <sys/sdt.h> 308906SEric.Saxe@Sun.COM 318906SEric.Saxe@Sun.COM /* 328906SEric.Saxe@Sun.COM * Solaris Event Based CPU Power Manager 338906SEric.Saxe@Sun.COM * 348906SEric.Saxe@Sun.COM * This file implements platform independent event based CPU power management. 358906SEric.Saxe@Sun.COM * When CPUs are configured into the system, the CMT scheduling subsystem will 368906SEric.Saxe@Sun.COM * query the platform to determine if the CPU belongs to any power management 378906SEric.Saxe@Sun.COM * domains. That is, sets of CPUs that share power management states. 388906SEric.Saxe@Sun.COM * 398906SEric.Saxe@Sun.COM * Active Power Management domains represent a group of CPUs across which the 408906SEric.Saxe@Sun.COM * Operating System can request speed changes (which may in turn result 418906SEric.Saxe@Sun.COM * in voltage changes). This allows the operating system to trade off 428906SEric.Saxe@Sun.COM * performance for power savings. 438906SEric.Saxe@Sun.COM * 448906SEric.Saxe@Sun.COM * Idle Power Management domains can enter power savings states when they are 458906SEric.Saxe@Sun.COM * unutilized. These states allow the Operating System to trade off power 468906SEric.Saxe@Sun.COM * for performance (in the form of latency to transition from the idle state 478906SEric.Saxe@Sun.COM * to an active one). 488906SEric.Saxe@Sun.COM * 498906SEric.Saxe@Sun.COM * For each active and idle power domain the CMT subsystem instantiates, a 508906SEric.Saxe@Sun.COM * cpupm_domain_t structure is created. As the dispatcher schedules threads 518906SEric.Saxe@Sun.COM * to run on the system's CPUs, it will also track the utilization of the 528906SEric.Saxe@Sun.COM * enumerated power domains. Significant changes in utilization will result 538906SEric.Saxe@Sun.COM * in the dispatcher sending the power manager events that relate to the 548906SEric.Saxe@Sun.COM * utilization of the power domain. The power manager recieves the events, 558906SEric.Saxe@Sun.COM * and in the context of the policy objectives in force, may decide to request 568906SEric.Saxe@Sun.COM * the domain's power/performance state be changed. 578906SEric.Saxe@Sun.COM * 588906SEric.Saxe@Sun.COM * Under the "elastic" CPUPM policy, when the utilization rises, the CPU power 598906SEric.Saxe@Sun.COM * manager will request the CPUs in the domain run at their fastest (and most 608906SEric.Saxe@Sun.COM * power consuming) state. When the domain becomes idle (utilization at zero), 618906SEric.Saxe@Sun.COM * the power manager will request that the CPUs run at a speed that saves the 628906SEric.Saxe@Sun.COM * most power. 638906SEric.Saxe@Sun.COM * 648906SEric.Saxe@Sun.COM * The advantage of this scheme, is that the CPU power manager working with the 658906SEric.Saxe@Sun.COM * dispatcher can be extremely responsive to changes in utilization. Optimizing 668906SEric.Saxe@Sun.COM * for performance in the presence of utilization, and power savings in the 678906SEric.Saxe@Sun.COM * presence of idleness. Such close collaboration with the dispatcher has other 688906SEric.Saxe@Sun.COM * benefits that will play out in the form of more sophisticated power / 698906SEric.Saxe@Sun.COM * performance policy in the near future. 708906SEric.Saxe@Sun.COM * 718906SEric.Saxe@Sun.COM * Avoiding state thrashing in the presence of transient periods of utilization 728906SEric.Saxe@Sun.COM * and idleness while still being responsive to non-transient periods is key. 73*10797SEric.Saxe@Sun.COM * The power manager implements a "governor" that is used to throttle 748906SEric.Saxe@Sun.COM * state transitions when a significant amount of transient idle or transient 758906SEric.Saxe@Sun.COM * work is detected. 768906SEric.Saxe@Sun.COM * 778906SEric.Saxe@Sun.COM * Kernel background activity (e.g. taskq threads) are by far the most common 788906SEric.Saxe@Sun.COM * form of transient utilization. Ungoverned in the face of this utililzation, 798906SEric.Saxe@Sun.COM * hundreds of state transitions per second would result on an idle system. 808906SEric.Saxe@Sun.COM * 818906SEric.Saxe@Sun.COM * Transient idleness is common when a thread briefly yields the CPU to 828906SEric.Saxe@Sun.COM * wait for an event elsewhere in the system. Where the idle period is short 838906SEric.Saxe@Sun.COM * enough, the overhead associated with making the state transition doesn't 848906SEric.Saxe@Sun.COM * justify the power savings. 85*10797SEric.Saxe@Sun.COM * 86*10797SEric.Saxe@Sun.COM * The following is the state machine for the governor implemented by 87*10797SEric.Saxe@Sun.COM * cpupm_utilization_event(): 88*10797SEric.Saxe@Sun.COM * 89*10797SEric.Saxe@Sun.COM * ----->---tw---->----- 90*10797SEric.Saxe@Sun.COM * / \ 91*10797SEric.Saxe@Sun.COM * (I)-<-ti-<- -<-ntw-<(W) 92*10797SEric.Saxe@Sun.COM * | \ / | 93*10797SEric.Saxe@Sun.COM * \ \ / / 94*10797SEric.Saxe@Sun.COM * >-nti/rm->(D)--->-tw->- 95*10797SEric.Saxe@Sun.COM * Key: 96*10797SEric.Saxe@Sun.COM * 97*10797SEric.Saxe@Sun.COM * States 98*10797SEric.Saxe@Sun.COM * - (D): Default (ungoverned) 99*10797SEric.Saxe@Sun.COM * - (W): Transient work governed 100*10797SEric.Saxe@Sun.COM * - (I): Transient idle governed 101*10797SEric.Saxe@Sun.COM * State Transitions 102*10797SEric.Saxe@Sun.COM * - tw: transient work 103*10797SEric.Saxe@Sun.COM * - ti: transient idleness 104*10797SEric.Saxe@Sun.COM * - ntw: non-transient work 105*10797SEric.Saxe@Sun.COM * - nti: non-transient idleness 106*10797SEric.Saxe@Sun.COM * - rm: thread remain event 1078906SEric.Saxe@Sun.COM */ 1088906SEric.Saxe@Sun.COM 1098906SEric.Saxe@Sun.COM static cpupm_domain_t *cpupm_domains = NULL; 1108906SEric.Saxe@Sun.COM 1118906SEric.Saxe@Sun.COM /* 1128906SEric.Saxe@Sun.COM * Uninitialized state of CPU power management is disabled 1138906SEric.Saxe@Sun.COM */ 1148906SEric.Saxe@Sun.COM cpupm_policy_t cpupm_policy = CPUPM_POLICY_DISABLED; 1158906SEric.Saxe@Sun.COM 1168906SEric.Saxe@Sun.COM /* 1178906SEric.Saxe@Sun.COM * Periods of utilization lasting less than this time interval are characterized 1188906SEric.Saxe@Sun.COM * as transient. State changes associated with transient work are considered 1198906SEric.Saxe@Sun.COM * to be mispredicted. That is, it's not worth raising and lower power states 1208906SEric.Saxe@Sun.COM * where the utilization lasts for less than this interval. 1218906SEric.Saxe@Sun.COM */ 1228906SEric.Saxe@Sun.COM hrtime_t cpupm_tw_predict_interval; 1238906SEric.Saxe@Sun.COM 1248906SEric.Saxe@Sun.COM /* 1258906SEric.Saxe@Sun.COM * Periods of idleness lasting less than this time interval are characterized 1268906SEric.Saxe@Sun.COM * as transient. State changes associated with transient idle are considered 1278906SEric.Saxe@Sun.COM * to be mispredicted. That is, it's not worth lowering and raising power 1288906SEric.Saxe@Sun.COM * states where the idleness lasts for less than this interval. 1298906SEric.Saxe@Sun.COM */ 1308906SEric.Saxe@Sun.COM hrtime_t cpupm_ti_predict_interval; 1318906SEric.Saxe@Sun.COM 1328906SEric.Saxe@Sun.COM /* 1338906SEric.Saxe@Sun.COM * Number of mispredictions after which future transitions will be governed. 1348906SEric.Saxe@Sun.COM */ 135*10797SEric.Saxe@Sun.COM int cpupm_mispredict_thresh = 4; 1368906SEric.Saxe@Sun.COM 1378906SEric.Saxe@Sun.COM /* 1388906SEric.Saxe@Sun.COM * Likewise, the number of mispredicted governed transitions after which the 1398906SEric.Saxe@Sun.COM * governor will be removed. 1408906SEric.Saxe@Sun.COM */ 141*10797SEric.Saxe@Sun.COM int cpupm_mispredict_gov_thresh = 4; 1428906SEric.Saxe@Sun.COM 1438906SEric.Saxe@Sun.COM /* 144*10797SEric.Saxe@Sun.COM * The transient work and transient idle prediction intervals are specified 145*10797SEric.Saxe@Sun.COM * here. Tuning them higher will result in the transient work, and transient 146*10797SEric.Saxe@Sun.COM * idle governors being used more aggresively, which limits the frequency of 147*10797SEric.Saxe@Sun.COM * state transitions at the expense of performance and power savings, 148*10797SEric.Saxe@Sun.COM * respectively. The intervals are specified in nanoseconds. 149*10797SEric.Saxe@Sun.COM */ 150*10797SEric.Saxe@Sun.COM /* 151*10797SEric.Saxe@Sun.COM * 400 usec 1528906SEric.Saxe@Sun.COM */ 153*10797SEric.Saxe@Sun.COM #define CPUPM_DEFAULT_TI_INTERVAL 400000 1548906SEric.Saxe@Sun.COM /* 155*10797SEric.Saxe@Sun.COM * 400 usec 1568906SEric.Saxe@Sun.COM */ 157*10797SEric.Saxe@Sun.COM #define CPUPM_DEFAULT_TW_INTERVAL 400000 1588906SEric.Saxe@Sun.COM 159*10797SEric.Saxe@Sun.COM hrtime_t cpupm_ti_gov_interval = CPUPM_DEFAULT_TI_INTERVAL; 160*10797SEric.Saxe@Sun.COM hrtime_t cpupm_tw_gov_interval = CPUPM_DEFAULT_TW_INTERVAL; 1618906SEric.Saxe@Sun.COM 1628906SEric.Saxe@Sun.COM 163*10797SEric.Saxe@Sun.COM static void cpupm_governor_initialize(void); 1648906SEric.Saxe@Sun.COM static void cpupm_state_change_global(cpupm_dtype_t, cpupm_state_name_t); 1658906SEric.Saxe@Sun.COM 1668906SEric.Saxe@Sun.COM cpupm_policy_t 1678906SEric.Saxe@Sun.COM cpupm_get_policy(void) 1688906SEric.Saxe@Sun.COM { 1698906SEric.Saxe@Sun.COM return (cpupm_policy); 1708906SEric.Saxe@Sun.COM } 1718906SEric.Saxe@Sun.COM 1728906SEric.Saxe@Sun.COM int 1738906SEric.Saxe@Sun.COM cpupm_set_policy(cpupm_policy_t new_policy) 1748906SEric.Saxe@Sun.COM { 1758906SEric.Saxe@Sun.COM static int gov_init = 0; 1768906SEric.Saxe@Sun.COM int result = 0; 1778906SEric.Saxe@Sun.COM 1788906SEric.Saxe@Sun.COM mutex_enter(&cpu_lock); 1798906SEric.Saxe@Sun.COM if (new_policy == cpupm_policy) { 1808906SEric.Saxe@Sun.COM mutex_exit(&cpu_lock); 1818906SEric.Saxe@Sun.COM return (result); 1828906SEric.Saxe@Sun.COM } 1838906SEric.Saxe@Sun.COM 1848906SEric.Saxe@Sun.COM /* 1858906SEric.Saxe@Sun.COM * Pausing CPUs causes a high priority thread to be scheduled 1868906SEric.Saxe@Sun.COM * on all other CPUs (besides the current one). This locks out 1878906SEric.Saxe@Sun.COM * other CPUs from making CPUPM state transitions. 1888906SEric.Saxe@Sun.COM */ 1898906SEric.Saxe@Sun.COM switch (new_policy) { 1908906SEric.Saxe@Sun.COM case CPUPM_POLICY_DISABLED: 1918906SEric.Saxe@Sun.COM pause_cpus(NULL); 1928906SEric.Saxe@Sun.COM cpupm_policy = CPUPM_POLICY_DISABLED; 1938906SEric.Saxe@Sun.COM start_cpus(); 1948906SEric.Saxe@Sun.COM 1958906SEric.Saxe@Sun.COM result = cmt_pad_disable(PGHW_POW_ACTIVE); 1968906SEric.Saxe@Sun.COM 1978906SEric.Saxe@Sun.COM /* 1988906SEric.Saxe@Sun.COM * Once PAD has been enabled, it should always be possible 1998906SEric.Saxe@Sun.COM * to disable it. 2008906SEric.Saxe@Sun.COM */ 2018906SEric.Saxe@Sun.COM ASSERT(result == 0); 2028906SEric.Saxe@Sun.COM 2038906SEric.Saxe@Sun.COM /* 2048906SEric.Saxe@Sun.COM * Bring all the active power domains to the maximum 2058906SEric.Saxe@Sun.COM * performance state. 2068906SEric.Saxe@Sun.COM */ 2078906SEric.Saxe@Sun.COM cpupm_state_change_global(CPUPM_DTYPE_ACTIVE, 2088906SEric.Saxe@Sun.COM CPUPM_STATE_MAX_PERF); 2098906SEric.Saxe@Sun.COM 2108906SEric.Saxe@Sun.COM break; 2118906SEric.Saxe@Sun.COM case CPUPM_POLICY_ELASTIC: 2128906SEric.Saxe@Sun.COM 2138906SEric.Saxe@Sun.COM result = cmt_pad_enable(PGHW_POW_ACTIVE); 2148906SEric.Saxe@Sun.COM if (result < 0) { 2158906SEric.Saxe@Sun.COM /* 2168906SEric.Saxe@Sun.COM * Failed to enable PAD across the active power 2178906SEric.Saxe@Sun.COM * domains, which may well be because none were 2188906SEric.Saxe@Sun.COM * enumerated. 2198906SEric.Saxe@Sun.COM */ 2208906SEric.Saxe@Sun.COM break; 2218906SEric.Saxe@Sun.COM } 2228906SEric.Saxe@Sun.COM 2238906SEric.Saxe@Sun.COM /* 224*10797SEric.Saxe@Sun.COM * Initialize the governor parameters the first time through. 2258906SEric.Saxe@Sun.COM */ 2268906SEric.Saxe@Sun.COM if (gov_init == 0) { 227*10797SEric.Saxe@Sun.COM cpupm_governor_initialize(); 228*10797SEric.Saxe@Sun.COM gov_init = 1; 2298906SEric.Saxe@Sun.COM } 230*10797SEric.Saxe@Sun.COM 231*10797SEric.Saxe@Sun.COM pause_cpus(NULL); 2328906SEric.Saxe@Sun.COM cpupm_policy = CPUPM_POLICY_ELASTIC; 2338906SEric.Saxe@Sun.COM start_cpus(); 2348906SEric.Saxe@Sun.COM 2358906SEric.Saxe@Sun.COM break; 2368906SEric.Saxe@Sun.COM default: 2378906SEric.Saxe@Sun.COM cmn_err(CE_WARN, "Attempt to set unknown CPUPM policy %d\n", 2388906SEric.Saxe@Sun.COM new_policy); 2398906SEric.Saxe@Sun.COM ASSERT(0); 2408906SEric.Saxe@Sun.COM break; 2418906SEric.Saxe@Sun.COM } 2428906SEric.Saxe@Sun.COM mutex_exit(&cpu_lock); 2438906SEric.Saxe@Sun.COM 2448906SEric.Saxe@Sun.COM return (result); 2458906SEric.Saxe@Sun.COM } 2468906SEric.Saxe@Sun.COM 2478906SEric.Saxe@Sun.COM /* 2488906SEric.Saxe@Sun.COM * Look for an existing power domain 2498906SEric.Saxe@Sun.COM */ 2508906SEric.Saxe@Sun.COM static cpupm_domain_t * 2518906SEric.Saxe@Sun.COM cpupm_domain_find(id_t id, cpupm_dtype_t type) 2528906SEric.Saxe@Sun.COM { 2538906SEric.Saxe@Sun.COM ASSERT(MUTEX_HELD(&cpu_lock)); 2548906SEric.Saxe@Sun.COM 2558906SEric.Saxe@Sun.COM cpupm_domain_t *dom; 2568906SEric.Saxe@Sun.COM 2578906SEric.Saxe@Sun.COM dom = cpupm_domains; 2588906SEric.Saxe@Sun.COM while (dom != NULL) { 2598906SEric.Saxe@Sun.COM if (id == dom->cpd_id && type == dom->cpd_type) 2608906SEric.Saxe@Sun.COM return (dom); 2618906SEric.Saxe@Sun.COM dom = dom->cpd_next; 2628906SEric.Saxe@Sun.COM } 2638906SEric.Saxe@Sun.COM return (NULL); 2648906SEric.Saxe@Sun.COM } 2658906SEric.Saxe@Sun.COM 2668906SEric.Saxe@Sun.COM /* 2678906SEric.Saxe@Sun.COM * Create a new domain 2688906SEric.Saxe@Sun.COM */ 2698906SEric.Saxe@Sun.COM static cpupm_domain_t * 2708906SEric.Saxe@Sun.COM cpupm_domain_create(id_t id, cpupm_dtype_t type) 2718906SEric.Saxe@Sun.COM { 2728906SEric.Saxe@Sun.COM cpupm_domain_t *dom; 2738906SEric.Saxe@Sun.COM 2748906SEric.Saxe@Sun.COM ASSERT(MUTEX_HELD(&cpu_lock)); 2758906SEric.Saxe@Sun.COM 2768906SEric.Saxe@Sun.COM dom = kmem_zalloc(sizeof (cpupm_domain_t), KM_SLEEP); 2778906SEric.Saxe@Sun.COM dom->cpd_id = id; 2788906SEric.Saxe@Sun.COM dom->cpd_type = type; 2798906SEric.Saxe@Sun.COM 2808906SEric.Saxe@Sun.COM /* Link into the known domain list */ 2818906SEric.Saxe@Sun.COM dom->cpd_next = cpupm_domains; 2828906SEric.Saxe@Sun.COM cpupm_domains = dom; 2838906SEric.Saxe@Sun.COM 2848906SEric.Saxe@Sun.COM return (dom); 2858906SEric.Saxe@Sun.COM } 2868906SEric.Saxe@Sun.COM 2878906SEric.Saxe@Sun.COM static void 2888906SEric.Saxe@Sun.COM cpupm_domain_state_enum(struct cpu *cp, cpupm_domain_t *dom) 2898906SEric.Saxe@Sun.COM { 2908906SEric.Saxe@Sun.COM /* 2918906SEric.Saxe@Sun.COM * In the envent we're enumerating because the domain's state 2928906SEric.Saxe@Sun.COM * configuration has changed, toss any existing states. 2938906SEric.Saxe@Sun.COM */ 2948906SEric.Saxe@Sun.COM if (dom->cpd_nstates > 0) { 2958906SEric.Saxe@Sun.COM kmem_free(dom->cpd_states, 2968906SEric.Saxe@Sun.COM sizeof (cpupm_state_t) * dom->cpd_nstates); 2978906SEric.Saxe@Sun.COM dom->cpd_nstates = 0; 2988906SEric.Saxe@Sun.COM } 2998906SEric.Saxe@Sun.COM 3008906SEric.Saxe@Sun.COM /* 3018906SEric.Saxe@Sun.COM * Query to determine the number of states, allocate storage 3028906SEric.Saxe@Sun.COM * large enough to hold the state information, and pass it back 3038906SEric.Saxe@Sun.COM * to the platform driver to complete the enumeration. 3048906SEric.Saxe@Sun.COM */ 3058906SEric.Saxe@Sun.COM dom->cpd_nstates = cpupm_plat_state_enumerate(cp, dom->cpd_type, NULL); 3068906SEric.Saxe@Sun.COM 3078906SEric.Saxe@Sun.COM if (dom->cpd_nstates == 0) 3088906SEric.Saxe@Sun.COM return; 3098906SEric.Saxe@Sun.COM 3108906SEric.Saxe@Sun.COM dom->cpd_states = 3118906SEric.Saxe@Sun.COM kmem_zalloc(dom->cpd_nstates * sizeof (cpupm_state_t), KM_SLEEP); 3128906SEric.Saxe@Sun.COM (void) cpupm_plat_state_enumerate(cp, dom->cpd_type, dom->cpd_states); 3138906SEric.Saxe@Sun.COM } 3148906SEric.Saxe@Sun.COM 3158906SEric.Saxe@Sun.COM /* 3168906SEric.Saxe@Sun.COM * Initialize the specified type of power domain on behalf of the CPU 3178906SEric.Saxe@Sun.COM */ 3188906SEric.Saxe@Sun.COM cpupm_domain_t * 3198906SEric.Saxe@Sun.COM cpupm_domain_init(struct cpu *cp, cpupm_dtype_t type) 3208906SEric.Saxe@Sun.COM { 3218906SEric.Saxe@Sun.COM cpupm_domain_t *dom; 3228906SEric.Saxe@Sun.COM id_t did; 3238906SEric.Saxe@Sun.COM 3248906SEric.Saxe@Sun.COM ASSERT(MUTEX_HELD(&cpu_lock)); 3258906SEric.Saxe@Sun.COM 3268906SEric.Saxe@Sun.COM /* 3278906SEric.Saxe@Sun.COM * Instantiate the domain if it doesn't already exist 3288906SEric.Saxe@Sun.COM * and enumerate its power states. 3298906SEric.Saxe@Sun.COM */ 3308906SEric.Saxe@Sun.COM did = cpupm_domain_id(cp, type); 3318906SEric.Saxe@Sun.COM dom = cpupm_domain_find(did, type); 3328906SEric.Saxe@Sun.COM if (dom == NULL) { 3338906SEric.Saxe@Sun.COM dom = cpupm_domain_create(did, type); 3348906SEric.Saxe@Sun.COM cpupm_domain_state_enum(cp, dom); 3358906SEric.Saxe@Sun.COM } 3368906SEric.Saxe@Sun.COM 3378906SEric.Saxe@Sun.COM /* 3388906SEric.Saxe@Sun.COM * Named state initialization 3398906SEric.Saxe@Sun.COM */ 3408906SEric.Saxe@Sun.COM if (type == CPUPM_DTYPE_ACTIVE) { 3418906SEric.Saxe@Sun.COM /* 3428906SEric.Saxe@Sun.COM * For active power domains, the highest performance 3438906SEric.Saxe@Sun.COM * state is defined as first state returned from 3448906SEric.Saxe@Sun.COM * the domain enumeration. 3458906SEric.Saxe@Sun.COM */ 3468906SEric.Saxe@Sun.COM dom->cpd_named_states[CPUPM_STATE_MAX_PERF] = 3478906SEric.Saxe@Sun.COM &dom->cpd_states[0]; 3488906SEric.Saxe@Sun.COM dom->cpd_named_states[CPUPM_STATE_LOW_POWER] = 3498906SEric.Saxe@Sun.COM &dom->cpd_states[dom->cpd_nstates - 1]; 3508906SEric.Saxe@Sun.COM 3518906SEric.Saxe@Sun.COM /* 3528906SEric.Saxe@Sun.COM * Begin by assuming CPU is running at the max perf state. 3538906SEric.Saxe@Sun.COM */ 3548906SEric.Saxe@Sun.COM dom->cpd_state = dom->cpd_named_states[CPUPM_STATE_MAX_PERF]; 3558906SEric.Saxe@Sun.COM } 3568906SEric.Saxe@Sun.COM 3578906SEric.Saxe@Sun.COM return (dom); 3588906SEric.Saxe@Sun.COM } 3598906SEric.Saxe@Sun.COM 3608906SEric.Saxe@Sun.COM /* 3618906SEric.Saxe@Sun.COM * Return the id associated with the given type of domain 3628906SEric.Saxe@Sun.COM * to which cp belongs 3638906SEric.Saxe@Sun.COM */ 3648906SEric.Saxe@Sun.COM id_t 3658906SEric.Saxe@Sun.COM cpupm_domain_id(struct cpu *cp, cpupm_dtype_t type) 3668906SEric.Saxe@Sun.COM { 3678906SEric.Saxe@Sun.COM return (cpupm_plat_domain_id(cp, type)); 3688906SEric.Saxe@Sun.COM } 3698906SEric.Saxe@Sun.COM 3708906SEric.Saxe@Sun.COM /* 3718906SEric.Saxe@Sun.COM * Initiate a state change for the specified domain on behalf of cp 3728906SEric.Saxe@Sun.COM */ 3738906SEric.Saxe@Sun.COM int 3748906SEric.Saxe@Sun.COM cpupm_change_state(struct cpu *cp, cpupm_domain_t *dom, cpupm_state_t *state) 3758906SEric.Saxe@Sun.COM { 3768906SEric.Saxe@Sun.COM if (cpupm_plat_change_state(cp, state) < 0) 3778906SEric.Saxe@Sun.COM return (-1); 3788906SEric.Saxe@Sun.COM 3798906SEric.Saxe@Sun.COM DTRACE_PROBE2(cpupm__change__state, 3808906SEric.Saxe@Sun.COM cpupm_domain_t *, dom, 3818906SEric.Saxe@Sun.COM cpupm_state_t *, state); 3828906SEric.Saxe@Sun.COM 3838906SEric.Saxe@Sun.COM dom->cpd_state = state; 3848906SEric.Saxe@Sun.COM return (0); 3858906SEric.Saxe@Sun.COM } 3868906SEric.Saxe@Sun.COM 3878906SEric.Saxe@Sun.COM /* 3888906SEric.Saxe@Sun.COM * Interface into the CPU power manager to indicate a significant change 3898906SEric.Saxe@Sun.COM * in utilization of the specified active power domain 3908906SEric.Saxe@Sun.COM */ 3918906SEric.Saxe@Sun.COM void 3928906SEric.Saxe@Sun.COM cpupm_utilization_event(struct cpu *cp, hrtime_t now, cpupm_domain_t *dom, 3938906SEric.Saxe@Sun.COM cpupm_util_event_t event) 3948906SEric.Saxe@Sun.COM { 3958906SEric.Saxe@Sun.COM cpupm_state_t *new_state = NULL; 3968906SEric.Saxe@Sun.COM hrtime_t last; 3978906SEric.Saxe@Sun.COM 3988906SEric.Saxe@Sun.COM if (cpupm_policy == CPUPM_POLICY_DISABLED) { 3998906SEric.Saxe@Sun.COM return; 4008906SEric.Saxe@Sun.COM } 4018906SEric.Saxe@Sun.COM 4028906SEric.Saxe@Sun.COM /* 4038906SEric.Saxe@Sun.COM * What follows is a simple elastic power state management policy. 4048906SEric.Saxe@Sun.COM * 4058906SEric.Saxe@Sun.COM * If the utilization has become non-zero, and the domain was 4068906SEric.Saxe@Sun.COM * previously at it's lowest power state, then transition it 4078906SEric.Saxe@Sun.COM * to the highest state in the spirit of "race to idle". 4088906SEric.Saxe@Sun.COM * 4098906SEric.Saxe@Sun.COM * If the utilization has dropped to zero, then transition the 4108906SEric.Saxe@Sun.COM * domain to its lowest power state. 4118906SEric.Saxe@Sun.COM * 412*10797SEric.Saxe@Sun.COM * Statistics are maintained to implement a governor to reduce state 4138906SEric.Saxe@Sun.COM * transitions resulting from either transient work, or periods of 4148906SEric.Saxe@Sun.COM * transient idleness on the domain. 4158906SEric.Saxe@Sun.COM */ 4168906SEric.Saxe@Sun.COM switch (event) { 4178906SEric.Saxe@Sun.COM case CPUPM_DOM_REMAIN_BUSY: 4188906SEric.Saxe@Sun.COM 4198906SEric.Saxe@Sun.COM /* 4208906SEric.Saxe@Sun.COM * We've received an event that the domain is running a thread 4218906SEric.Saxe@Sun.COM * that's made it to the end of it's time slice. If we are at 4228906SEric.Saxe@Sun.COM * low power, then raise it. If the transient work governor 4238906SEric.Saxe@Sun.COM * is engaged, then remove it. 4248906SEric.Saxe@Sun.COM */ 4258906SEric.Saxe@Sun.COM if (dom->cpd_state == 4268906SEric.Saxe@Sun.COM dom->cpd_named_states[CPUPM_STATE_LOW_POWER]) { 4278906SEric.Saxe@Sun.COM new_state = 4288906SEric.Saxe@Sun.COM dom->cpd_named_states[CPUPM_STATE_MAX_PERF]; 429*10797SEric.Saxe@Sun.COM if (dom->cpd_governor == CPUPM_GOV_TRANS_WORK) { 430*10797SEric.Saxe@Sun.COM dom->cpd_governor = CPUPM_GOV_DISENGAGED; 4318906SEric.Saxe@Sun.COM dom->cpd_tw = 0; 4328906SEric.Saxe@Sun.COM } 4338906SEric.Saxe@Sun.COM } 4348906SEric.Saxe@Sun.COM break; 4358906SEric.Saxe@Sun.COM 4368906SEric.Saxe@Sun.COM case CPUPM_DOM_BUSY_FROM_IDLE: 4378906SEric.Saxe@Sun.COM last = dom->cpd_last_lower; 4388906SEric.Saxe@Sun.COM dom->cpd_last_raise = now; 4398906SEric.Saxe@Sun.COM 4408906SEric.Saxe@Sun.COM DTRACE_PROBE3(cpupm__raise__req, 4418906SEric.Saxe@Sun.COM cpupm_domain_t *, dom, 4428906SEric.Saxe@Sun.COM hrtime_t, last, 4438906SEric.Saxe@Sun.COM hrtime_t, now); 4448906SEric.Saxe@Sun.COM 4458906SEric.Saxe@Sun.COM if (dom->cpd_state == 4468906SEric.Saxe@Sun.COM dom->cpd_named_states[CPUPM_STATE_LOW_POWER]) { 4478906SEric.Saxe@Sun.COM 4488906SEric.Saxe@Sun.COM /* 4498906SEric.Saxe@Sun.COM * There's non-zero utilization, and the domain is 4508906SEric.Saxe@Sun.COM * running in the lower power state. Before we 451*10797SEric.Saxe@Sun.COM * consider raising power, check if the preceeding 452*10797SEric.Saxe@Sun.COM * idle period was transient in duration. 453*10797SEric.Saxe@Sun.COM * 454*10797SEric.Saxe@Sun.COM * If the domain is already transient work governed, 455*10797SEric.Saxe@Sun.COM * then we don't bother maintaining transient idle 456*10797SEric.Saxe@Sun.COM * statistics, as the presence of enough transient work 457*10797SEric.Saxe@Sun.COM * can also make the domain frequently transiently idle. 458*10797SEric.Saxe@Sun.COM * In this case, we still want to remain transient work 459*10797SEric.Saxe@Sun.COM * governed. 4608906SEric.Saxe@Sun.COM */ 461*10797SEric.Saxe@Sun.COM if (dom->cpd_governor == CPUPM_GOV_DISENGAGED) { 4628906SEric.Saxe@Sun.COM if ((now - last) < cpupm_ti_predict_interval) { 4638906SEric.Saxe@Sun.COM /* 4648906SEric.Saxe@Sun.COM * We're raising the domain power and 4658906SEric.Saxe@Sun.COM * we *just* lowered it. Consider 4668906SEric.Saxe@Sun.COM * this a mispredicted power state 4678906SEric.Saxe@Sun.COM * transition due to a transient 4688906SEric.Saxe@Sun.COM * idle period. 4698906SEric.Saxe@Sun.COM */ 470*10797SEric.Saxe@Sun.COM if (++dom->cpd_ti >= 4718906SEric.Saxe@Sun.COM cpupm_mispredict_thresh) { 4728906SEric.Saxe@Sun.COM /* 4738906SEric.Saxe@Sun.COM * There's enough transient 4748906SEric.Saxe@Sun.COM * idle transitions to 4758906SEric.Saxe@Sun.COM * justify governing future 4768906SEric.Saxe@Sun.COM * lowering requests. 4778906SEric.Saxe@Sun.COM */ 478*10797SEric.Saxe@Sun.COM dom->cpd_governor = 479*10797SEric.Saxe@Sun.COM CPUPM_GOV_TRANS_IDLE; 4808906SEric.Saxe@Sun.COM dom->cpd_ti = 0; 4818906SEric.Saxe@Sun.COM DTRACE_PROBE1( 4828906SEric.Saxe@Sun.COM cpupm__ti__governed, 4838906SEric.Saxe@Sun.COM cpupm_domain_t *, dom); 4848906SEric.Saxe@Sun.COM } 4858906SEric.Saxe@Sun.COM } else { 4868906SEric.Saxe@Sun.COM /* 4878906SEric.Saxe@Sun.COM * We correctly predicted the last 4888906SEric.Saxe@Sun.COM * lowering. 4898906SEric.Saxe@Sun.COM */ 4908906SEric.Saxe@Sun.COM dom->cpd_ti = 0; 4918906SEric.Saxe@Sun.COM } 4928906SEric.Saxe@Sun.COM } 493*10797SEric.Saxe@Sun.COM if (dom->cpd_governor == CPUPM_GOV_TRANS_WORK) { 4948906SEric.Saxe@Sun.COM /* 4958906SEric.Saxe@Sun.COM * Raise requests are governed due to 4968906SEric.Saxe@Sun.COM * transient work. 4978906SEric.Saxe@Sun.COM */ 4988906SEric.Saxe@Sun.COM DTRACE_PROBE1(cpupm__raise__governed, 4998906SEric.Saxe@Sun.COM cpupm_domain_t *, dom); 5008906SEric.Saxe@Sun.COM 5018906SEric.Saxe@Sun.COM return; 5028906SEric.Saxe@Sun.COM } 5038906SEric.Saxe@Sun.COM /* 5048906SEric.Saxe@Sun.COM * Prepare to transition to the higher power state 5058906SEric.Saxe@Sun.COM */ 5068906SEric.Saxe@Sun.COM new_state = dom->cpd_named_states[CPUPM_STATE_MAX_PERF]; 5078906SEric.Saxe@Sun.COM 5088906SEric.Saxe@Sun.COM } else if (dom->cpd_state == 5098906SEric.Saxe@Sun.COM dom->cpd_named_states[CPUPM_STATE_MAX_PERF]) { 5108906SEric.Saxe@Sun.COM 5118906SEric.Saxe@Sun.COM /* 5128906SEric.Saxe@Sun.COM * Utilization is non-zero, and we're already running 5138906SEric.Saxe@Sun.COM * in the higher power state. Take this opportunity to 5148906SEric.Saxe@Sun.COM * perform some book keeping if the last lowering 5158906SEric.Saxe@Sun.COM * request was governed. 5168906SEric.Saxe@Sun.COM */ 517*10797SEric.Saxe@Sun.COM if (dom->cpd_governor == CPUPM_GOV_TRANS_IDLE) { 518*10797SEric.Saxe@Sun.COM 5198906SEric.Saxe@Sun.COM if ((now - last) >= cpupm_ti_predict_interval) { 5208906SEric.Saxe@Sun.COM /* 5218906SEric.Saxe@Sun.COM * The domain is transient idle 5228906SEric.Saxe@Sun.COM * governed, and we mispredicted 5238906SEric.Saxe@Sun.COM * governing the last lowering request. 5248906SEric.Saxe@Sun.COM */ 5258906SEric.Saxe@Sun.COM if (++dom->cpd_ti >= 5268906SEric.Saxe@Sun.COM cpupm_mispredict_gov_thresh) { 5278906SEric.Saxe@Sun.COM /* 5288906SEric.Saxe@Sun.COM * There's enough non-transient 5298906SEric.Saxe@Sun.COM * idle periods to justify 5308906SEric.Saxe@Sun.COM * removing the governor. 5318906SEric.Saxe@Sun.COM */ 532*10797SEric.Saxe@Sun.COM dom->cpd_governor = 533*10797SEric.Saxe@Sun.COM CPUPM_GOV_DISENGAGED; 5348906SEric.Saxe@Sun.COM dom->cpd_ti = 0; 5358906SEric.Saxe@Sun.COM DTRACE_PROBE1( 5368906SEric.Saxe@Sun.COM cpupm__ti__ungoverned, 5378906SEric.Saxe@Sun.COM cpupm_domain_t *, dom); 5388906SEric.Saxe@Sun.COM } 5398906SEric.Saxe@Sun.COM } else { 5408906SEric.Saxe@Sun.COM /* 5418906SEric.Saxe@Sun.COM * Correctly predicted governing the 5428906SEric.Saxe@Sun.COM * last lowering request. 5438906SEric.Saxe@Sun.COM */ 5448906SEric.Saxe@Sun.COM dom->cpd_ti = 0; 5458906SEric.Saxe@Sun.COM } 5468906SEric.Saxe@Sun.COM } 5478906SEric.Saxe@Sun.COM } 5488906SEric.Saxe@Sun.COM break; 5498906SEric.Saxe@Sun.COM 5508906SEric.Saxe@Sun.COM case CPUPM_DOM_IDLE_FROM_BUSY: 5518906SEric.Saxe@Sun.COM last = dom->cpd_last_raise; 5528906SEric.Saxe@Sun.COM dom->cpd_last_lower = now; 5538906SEric.Saxe@Sun.COM 5548906SEric.Saxe@Sun.COM DTRACE_PROBE3(cpupm__lower__req, 5558906SEric.Saxe@Sun.COM cpupm_domain_t *, dom, 5568906SEric.Saxe@Sun.COM hrtime_t, last, 5578906SEric.Saxe@Sun.COM hrtime_t, now); 5588906SEric.Saxe@Sun.COM 5598906SEric.Saxe@Sun.COM if (dom->cpd_state == 5608906SEric.Saxe@Sun.COM dom->cpd_named_states[CPUPM_STATE_MAX_PERF]) { 5618906SEric.Saxe@Sun.COM 5628906SEric.Saxe@Sun.COM /* 5638906SEric.Saxe@Sun.COM * The domain is idle, and is running in the highest 5648906SEric.Saxe@Sun.COM * performance state. Before we consider lowering power, 5658906SEric.Saxe@Sun.COM * perform some book keeping for the transient work 5668906SEric.Saxe@Sun.COM * governor. 5678906SEric.Saxe@Sun.COM */ 568*10797SEric.Saxe@Sun.COM if (dom->cpd_governor == CPUPM_GOV_DISENGAGED) { 5698906SEric.Saxe@Sun.COM if ((now - last) < cpupm_tw_predict_interval) { 5708906SEric.Saxe@Sun.COM /* 5718906SEric.Saxe@Sun.COM * We're lowering the domain power and 5728906SEric.Saxe@Sun.COM * we *just* raised it. Consider the 5738906SEric.Saxe@Sun.COM * last raise mispredicted due to 5748906SEric.Saxe@Sun.COM * transient work. 5758906SEric.Saxe@Sun.COM */ 5768906SEric.Saxe@Sun.COM if (++dom->cpd_tw >= 5778906SEric.Saxe@Sun.COM cpupm_mispredict_thresh) { 5788906SEric.Saxe@Sun.COM /* 579*10797SEric.Saxe@Sun.COM * There's enough transient work 5808906SEric.Saxe@Sun.COM * transitions to justify 581*10797SEric.Saxe@Sun.COM * governing future raise 5828906SEric.Saxe@Sun.COM * requests. 5838906SEric.Saxe@Sun.COM */ 584*10797SEric.Saxe@Sun.COM dom->cpd_governor = 585*10797SEric.Saxe@Sun.COM CPUPM_GOV_TRANS_WORK; 5868906SEric.Saxe@Sun.COM dom->cpd_tw = 0; 5878906SEric.Saxe@Sun.COM DTRACE_PROBE1( 5888906SEric.Saxe@Sun.COM cpupm__tw__governed, 5898906SEric.Saxe@Sun.COM cpupm_domain_t *, dom); 5908906SEric.Saxe@Sun.COM } 5918906SEric.Saxe@Sun.COM } else { 5928906SEric.Saxe@Sun.COM /* 5938906SEric.Saxe@Sun.COM * We correctly predicted during the 5948906SEric.Saxe@Sun.COM * last raise. 5958906SEric.Saxe@Sun.COM */ 5968906SEric.Saxe@Sun.COM dom->cpd_tw = 0; 5978906SEric.Saxe@Sun.COM } 5988906SEric.Saxe@Sun.COM } 599*10797SEric.Saxe@Sun.COM if (dom->cpd_governor == CPUPM_GOV_TRANS_IDLE) { 6008906SEric.Saxe@Sun.COM /* 6018906SEric.Saxe@Sun.COM * Lowering requests are governed due to 6028906SEric.Saxe@Sun.COM * transient idleness. 6038906SEric.Saxe@Sun.COM */ 6048906SEric.Saxe@Sun.COM DTRACE_PROBE1(cpupm__lowering__governed, 6058906SEric.Saxe@Sun.COM cpupm_domain_t *, dom); 6068906SEric.Saxe@Sun.COM 6078906SEric.Saxe@Sun.COM return; 6088906SEric.Saxe@Sun.COM } 6098906SEric.Saxe@Sun.COM 6108906SEric.Saxe@Sun.COM /* 6118906SEric.Saxe@Sun.COM * Prepare to transition to a lower power state. 6128906SEric.Saxe@Sun.COM */ 6138906SEric.Saxe@Sun.COM new_state = 6148906SEric.Saxe@Sun.COM dom->cpd_named_states[CPUPM_STATE_LOW_POWER]; 6158906SEric.Saxe@Sun.COM 6168906SEric.Saxe@Sun.COM } else if (dom->cpd_state == 6178906SEric.Saxe@Sun.COM dom->cpd_named_states[CPUPM_STATE_LOW_POWER]) { 6188906SEric.Saxe@Sun.COM 6198906SEric.Saxe@Sun.COM /* 6208906SEric.Saxe@Sun.COM * The domain is idle, and we're already running in 6218906SEric.Saxe@Sun.COM * the lower power state. Take this opportunity to 6228906SEric.Saxe@Sun.COM * perform some book keeping if the last raising 6238906SEric.Saxe@Sun.COM * request was governed. 6248906SEric.Saxe@Sun.COM */ 625*10797SEric.Saxe@Sun.COM if (dom->cpd_governor == CPUPM_GOV_TRANS_WORK) { 6268906SEric.Saxe@Sun.COM if ((now - last) >= cpupm_tw_predict_interval) { 6278906SEric.Saxe@Sun.COM /* 6288906SEric.Saxe@Sun.COM * The domain is transient work 6298906SEric.Saxe@Sun.COM * governed, and we mispredicted 6308906SEric.Saxe@Sun.COM * governing the last raising request. 6318906SEric.Saxe@Sun.COM */ 6328906SEric.Saxe@Sun.COM if (++dom->cpd_tw >= 6338906SEric.Saxe@Sun.COM cpupm_mispredict_gov_thresh) { 6348906SEric.Saxe@Sun.COM /* 6358906SEric.Saxe@Sun.COM * There's enough non-transient 6368906SEric.Saxe@Sun.COM * work to justify removing 6378906SEric.Saxe@Sun.COM * the governor. 6388906SEric.Saxe@Sun.COM */ 639*10797SEric.Saxe@Sun.COM dom->cpd_governor = 640*10797SEric.Saxe@Sun.COM CPUPM_GOV_DISENGAGED; 6418906SEric.Saxe@Sun.COM dom->cpd_tw = 0; 6428906SEric.Saxe@Sun.COM DTRACE_PROBE1( 6438906SEric.Saxe@Sun.COM cpupm__tw__ungoverned, 6448906SEric.Saxe@Sun.COM cpupm_domain_t *, dom); 6458906SEric.Saxe@Sun.COM } 6468906SEric.Saxe@Sun.COM } else { 6478906SEric.Saxe@Sun.COM /* 6488906SEric.Saxe@Sun.COM * We correctly predicted governing 6498906SEric.Saxe@Sun.COM * the last raise. 6508906SEric.Saxe@Sun.COM */ 6518906SEric.Saxe@Sun.COM dom->cpd_tw = 0; 6528906SEric.Saxe@Sun.COM } 6538906SEric.Saxe@Sun.COM } 6548906SEric.Saxe@Sun.COM } 6558906SEric.Saxe@Sun.COM break; 6568906SEric.Saxe@Sun.COM } 6578906SEric.Saxe@Sun.COM /* 6588906SEric.Saxe@Sun.COM * Change the power state 6598906SEric.Saxe@Sun.COM * Not much currently done if this doesn't succeed 6608906SEric.Saxe@Sun.COM */ 6618906SEric.Saxe@Sun.COM if (new_state) 6628906SEric.Saxe@Sun.COM (void) cpupm_change_state(cp, dom, new_state); 6638906SEric.Saxe@Sun.COM } 6648906SEric.Saxe@Sun.COM 6658906SEric.Saxe@Sun.COM 6668906SEric.Saxe@Sun.COM /* 6678906SEric.Saxe@Sun.COM * Interface called by platforms to dynamically change the 6688906SEric.Saxe@Sun.COM * MAX performance cpupm state 6698906SEric.Saxe@Sun.COM */ 6708906SEric.Saxe@Sun.COM void 6718906SEric.Saxe@Sun.COM cpupm_redefine_max_activepwr_state(struct cpu *cp, int max_perf_level) 6728906SEric.Saxe@Sun.COM { 6738906SEric.Saxe@Sun.COM cpupm_domain_t *dom; 6748906SEric.Saxe@Sun.COM id_t did; 6758906SEric.Saxe@Sun.COM cpupm_dtype_t type = CPUPM_DTYPE_ACTIVE; 6768906SEric.Saxe@Sun.COM boolean_t change_state = B_FALSE; 6778906SEric.Saxe@Sun.COM cpupm_state_t *new_state = NULL; 6788906SEric.Saxe@Sun.COM 6798906SEric.Saxe@Sun.COM did = cpupm_domain_id(cp, type); 6808906SEric.Saxe@Sun.COM mutex_enter(&cpu_lock); 6818906SEric.Saxe@Sun.COM dom = cpupm_domain_find(did, type); 6828906SEric.Saxe@Sun.COM mutex_exit(&cpu_lock); 6838906SEric.Saxe@Sun.COM 6848906SEric.Saxe@Sun.COM /* 6858906SEric.Saxe@Sun.COM * Can use a lock to avoid changing the power state of the cpu when 6868906SEric.Saxe@Sun.COM * CPUPM_STATE_MAX_PERF is getting changed. 6878906SEric.Saxe@Sun.COM * Since the occurance of events to change MAX_PERF is not frequent, 6888906SEric.Saxe@Sun.COM * it may not be a good idea to overburden with locks. In the worst 6898906SEric.Saxe@Sun.COM * case, for one cycle the power may not get changed to the required 6908906SEric.Saxe@Sun.COM * level 6918906SEric.Saxe@Sun.COM */ 6928906SEric.Saxe@Sun.COM if (dom != NULL) { 6938906SEric.Saxe@Sun.COM if (dom->cpd_state == 6948906SEric.Saxe@Sun.COM dom->cpd_named_states[CPUPM_STATE_MAX_PERF]) { 6958906SEric.Saxe@Sun.COM change_state = B_TRUE; 6968906SEric.Saxe@Sun.COM } 6978906SEric.Saxe@Sun.COM 6988906SEric.Saxe@Sun.COM /* 6998906SEric.Saxe@Sun.COM * If an out of range level is passed, use the lowest supported 7008906SEric.Saxe@Sun.COM * speed. 7018906SEric.Saxe@Sun.COM */ 7028906SEric.Saxe@Sun.COM if (max_perf_level >= dom->cpd_nstates && 7038906SEric.Saxe@Sun.COM dom->cpd_nstates > 1) { 7048906SEric.Saxe@Sun.COM max_perf_level = dom->cpd_nstates - 1; 7058906SEric.Saxe@Sun.COM } 7068906SEric.Saxe@Sun.COM 7078906SEric.Saxe@Sun.COM dom->cpd_named_states[CPUPM_STATE_MAX_PERF] = 7088906SEric.Saxe@Sun.COM &dom->cpd_states[max_perf_level]; 7098906SEric.Saxe@Sun.COM 7108906SEric.Saxe@Sun.COM /* 7118906SEric.Saxe@Sun.COM * If the current state is MAX_PERF, change the current state 7128906SEric.Saxe@Sun.COM * to the new MAX_PERF 7138906SEric.Saxe@Sun.COM */ 7148906SEric.Saxe@Sun.COM if (change_state) { 7158906SEric.Saxe@Sun.COM new_state = 7168906SEric.Saxe@Sun.COM dom->cpd_named_states[CPUPM_STATE_MAX_PERF]; 7178906SEric.Saxe@Sun.COM if (new_state) { 7188906SEric.Saxe@Sun.COM (void) cpupm_change_state(cp, dom, new_state); 7198906SEric.Saxe@Sun.COM } 7208906SEric.Saxe@Sun.COM } 7218906SEric.Saxe@Sun.COM } 7228906SEric.Saxe@Sun.COM } 7238906SEric.Saxe@Sun.COM 7248906SEric.Saxe@Sun.COM /* 725*10797SEric.Saxe@Sun.COM * Initialize the parameters for the transience governor state machine 7268906SEric.Saxe@Sun.COM */ 727*10797SEric.Saxe@Sun.COM static void 7288906SEric.Saxe@Sun.COM cpupm_governor_initialize(void) 7298906SEric.Saxe@Sun.COM { 7308906SEric.Saxe@Sun.COM /* 731*10797SEric.Saxe@Sun.COM * The default prediction intervals are specified in nanoseconds. 732*10797SEric.Saxe@Sun.COM * Convert these to the equivalent in unscaled hrtime, which is the 733*10797SEric.Saxe@Sun.COM * format of the timestamps passed to cpupm_utilization_event() 7348906SEric.Saxe@Sun.COM */ 735*10797SEric.Saxe@Sun.COM cpupm_ti_predict_interval = unscalehrtime(cpupm_ti_gov_interval); 736*10797SEric.Saxe@Sun.COM cpupm_tw_predict_interval = unscalehrtime(cpupm_tw_gov_interval); 7378906SEric.Saxe@Sun.COM } 7388906SEric.Saxe@Sun.COM 7398906SEric.Saxe@Sun.COM /* 7408906SEric.Saxe@Sun.COM * Initiate a state change in all CPUPM domain instances of the specified type 7418906SEric.Saxe@Sun.COM */ 7428906SEric.Saxe@Sun.COM static void 7438906SEric.Saxe@Sun.COM cpupm_state_change_global(cpupm_dtype_t type, cpupm_state_name_t state) 7448906SEric.Saxe@Sun.COM { 7458906SEric.Saxe@Sun.COM cpu_t *cp; 7468906SEric.Saxe@Sun.COM pg_cmt_t *pwr_pg; 7478906SEric.Saxe@Sun.COM cpupm_domain_t *dom; 7488906SEric.Saxe@Sun.COM group_t *hwset; 7498906SEric.Saxe@Sun.COM group_iter_t giter; 7508906SEric.Saxe@Sun.COM pg_cpu_itr_t cpu_iter; 7518906SEric.Saxe@Sun.COM pghw_type_t hw; 7528906SEric.Saxe@Sun.COM 7538906SEric.Saxe@Sun.COM ASSERT(MUTEX_HELD(&cpu_lock)); 7548906SEric.Saxe@Sun.COM 7558906SEric.Saxe@Sun.COM switch (type) { 7568906SEric.Saxe@Sun.COM case CPUPM_DTYPE_ACTIVE: 7578906SEric.Saxe@Sun.COM hw = PGHW_POW_ACTIVE; 7588906SEric.Saxe@Sun.COM break; 7598906SEric.Saxe@Sun.COM default: 7608906SEric.Saxe@Sun.COM /* 7618906SEric.Saxe@Sun.COM * Power domain types other than "active" unsupported. 7628906SEric.Saxe@Sun.COM */ 7638906SEric.Saxe@Sun.COM ASSERT(type == CPUPM_DTYPE_ACTIVE); 7648906SEric.Saxe@Sun.COM return; 7658906SEric.Saxe@Sun.COM } 7668906SEric.Saxe@Sun.COM 7678906SEric.Saxe@Sun.COM if ((hwset = pghw_set_lookup(hw)) == NULL) 7688906SEric.Saxe@Sun.COM return; 7698906SEric.Saxe@Sun.COM 7708906SEric.Saxe@Sun.COM /* 7718906SEric.Saxe@Sun.COM * Iterate over the power domains 7728906SEric.Saxe@Sun.COM */ 7738906SEric.Saxe@Sun.COM group_iter_init(&giter); 7748906SEric.Saxe@Sun.COM while ((pwr_pg = group_iterate(hwset, &giter)) != NULL) { 7758906SEric.Saxe@Sun.COM 7768906SEric.Saxe@Sun.COM dom = (cpupm_domain_t *)pwr_pg->cmt_pg.pghw_handle; 7778906SEric.Saxe@Sun.COM 7788906SEric.Saxe@Sun.COM /* 7798906SEric.Saxe@Sun.COM * Iterate over the CPUs in each domain 7808906SEric.Saxe@Sun.COM */ 7818906SEric.Saxe@Sun.COM PG_CPU_ITR_INIT(pwr_pg, cpu_iter); 7828906SEric.Saxe@Sun.COM while ((cp = pg_cpu_next(&cpu_iter)) != NULL) { 7838906SEric.Saxe@Sun.COM (void) cpupm_change_state(cp, dom, 7848906SEric.Saxe@Sun.COM dom->cpd_named_states[state]); 7858906SEric.Saxe@Sun.COM } 7868906SEric.Saxe@Sun.COM } 7878906SEric.Saxe@Sun.COM } 788