xref: /onnv-gate/usr/src/cmd/powertop/common/cpufreq.c (revision 11122:393b5ac48d9b)
19338Srafael.vanoni@sun.com /*
29338Srafael.vanoni@sun.com  * Copyright 2009, Intel Corporation
39338Srafael.vanoni@sun.com  * Copyright 2009, Sun Microsystems, Inc
49338Srafael.vanoni@sun.com  *
59338Srafael.vanoni@sun.com  * This file is part of PowerTOP
69338Srafael.vanoni@sun.com  *
79338Srafael.vanoni@sun.com  * This program file is free software; you can redistribute it and/or modify it
89338Srafael.vanoni@sun.com  * under the terms of the GNU General Public License as published by the
99338Srafael.vanoni@sun.com  * Free Software Foundation; version 2 of the License.
109338Srafael.vanoni@sun.com  *
119338Srafael.vanoni@sun.com  * This program is distributed in the hope that it will be useful, but WITHOUT
129338Srafael.vanoni@sun.com  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
139338Srafael.vanoni@sun.com  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
149338Srafael.vanoni@sun.com  * for more details.
159338Srafael.vanoni@sun.com  *
169338Srafael.vanoni@sun.com  * You should have received a copy of the GNU General Public License
179338Srafael.vanoni@sun.com  * along with this program in a file named COPYING; if not, write to the
189338Srafael.vanoni@sun.com  * Free Software Foundation, Inc.,
199338Srafael.vanoni@sun.com  * 51 Franklin Street, Fifth Floor,
209338Srafael.vanoni@sun.com  * Boston, MA 02110-1301 USA
219338Srafael.vanoni@sun.com  *
229338Srafael.vanoni@sun.com  * Authors:
239338Srafael.vanoni@sun.com  *	Arjan van de Ven <arjan@linux.intel.com>
249338Srafael.vanoni@sun.com  *	Eric C Saxe <eric.saxe@sun.com>
259338Srafael.vanoni@sun.com  *	Aubrey Li <aubrey.li@intel.com>
269338Srafael.vanoni@sun.com  */
279338Srafael.vanoni@sun.com 
289338Srafael.vanoni@sun.com /*
299338Srafael.vanoni@sun.com  * GPL Disclaimer
309338Srafael.vanoni@sun.com  *
319338Srafael.vanoni@sun.com  * For the avoidance of doubt, except that if any license choice other
329338Srafael.vanoni@sun.com  * than GPL or LGPL is available it will apply instead, Sun elects to
339338Srafael.vanoni@sun.com  * use only the General Public License version 2 (GPLv2) at this time
349338Srafael.vanoni@sun.com  * for any software where a choice of GPL license versions is made
359338Srafael.vanoni@sun.com  * available with the language indicating that GPLv2 or any later
369338Srafael.vanoni@sun.com  * version may be used, or where a choice of which version of the GPL
379338Srafael.vanoni@sun.com  * is applied is otherwise unspecified.
389338Srafael.vanoni@sun.com  */
399338Srafael.vanoni@sun.com 
409338Srafael.vanoni@sun.com #include <stdlib.h>
419338Srafael.vanoni@sun.com #include <string.h>
429338Srafael.vanoni@sun.com #include <dtrace.h>
439338Srafael.vanoni@sun.com #include <kstat.h>
449338Srafael.vanoni@sun.com #include <errno.h>
459338Srafael.vanoni@sun.com #include "powertop.h"
469338Srafael.vanoni@sun.com 
479711Srafael.vanoni@sun.com #define	HZ2MHZ(speed)	((speed) / MICROSEC)
489338Srafael.vanoni@sun.com #define	DTP_ARG_COUNT	2
499338Srafael.vanoni@sun.com #define	DTP_ARG_LENGTH	5
509338Srafael.vanoni@sun.com 
519338Srafael.vanoni@sun.com static uint64_t		max_cpufreq = 0;
529338Srafael.vanoni@sun.com static dtrace_hdl_t	*dtp;
539338Srafael.vanoni@sun.com static char		**dtp_argv;
549338Srafael.vanoni@sun.com 
559338Srafael.vanoni@sun.com /*
569338Srafael.vanoni@sun.com  * Enabling PM through /etc/power.conf
579908Srafael.vanoni@sun.com  * See pt_cpufreq_suggest()
589338Srafael.vanoni@sun.com  */
599338Srafael.vanoni@sun.com static char default_conf[]	= "/etc/power.conf";
609338Srafael.vanoni@sun.com static char default_pmconf[]	= "/usr/sbin/pmconfig";
619908Srafael.vanoni@sun.com static char cpupm_enable[]	= "echo cpupm enable >> /etc/power.conf";
629908Srafael.vanoni@sun.com static char cpupm_treshold[]	= "echo cpu-threshold 1s >> /etc/power.conf";
639338Srafael.vanoni@sun.com 
649338Srafael.vanoni@sun.com /*
659338Srafael.vanoni@sun.com  * Buffer containing DTrace program to track CPU frequency transitions
669338Srafael.vanoni@sun.com  */
679338Srafael.vanoni@sun.com static const char *dtp_cpufreq =
689338Srafael.vanoni@sun.com "hrtime_t last[$0];"
699338Srafael.vanoni@sun.com ""
709338Srafael.vanoni@sun.com "BEGIN"
719338Srafael.vanoni@sun.com "{"
729338Srafael.vanoni@sun.com "	begin = timestamp;"
739338Srafael.vanoni@sun.com "}"
749338Srafael.vanoni@sun.com ""
759338Srafael.vanoni@sun.com ":::cpu-change-speed"
769338Srafael.vanoni@sun.com "/last[(processorid_t)arg0] != 0/"
779338Srafael.vanoni@sun.com "{"
789338Srafael.vanoni@sun.com "	this->cpu = (processorid_t)arg0;"
799711Srafael.vanoni@sun.com "	this->oldspeed = (uint64_t)arg1;"
809338Srafael.vanoni@sun.com "	@times[this->cpu, this->oldspeed] = sum(timestamp - last[this->cpu]);"
819338Srafael.vanoni@sun.com "	last[this->cpu] = timestamp;"
829338Srafael.vanoni@sun.com "}"
839338Srafael.vanoni@sun.com ":::cpu-change-speed"
849338Srafael.vanoni@sun.com "/last[(processorid_t)arg0] == 0/"
859338Srafael.vanoni@sun.com "{"
869338Srafael.vanoni@sun.com "	this->cpu = (processorid_t)arg0;"
879711Srafael.vanoni@sun.com "	this->oldspeed = (uint64_t)arg1;"
889338Srafael.vanoni@sun.com "	@times[this->cpu, this->oldspeed] = sum(timestamp - begin);"
899338Srafael.vanoni@sun.com "	last[this->cpu] = timestamp;"
909338Srafael.vanoni@sun.com "}";
919338Srafael.vanoni@sun.com 
929338Srafael.vanoni@sun.com /*
939338Srafael.vanoni@sun.com  * Same as above, but only for a specific CPU
949338Srafael.vanoni@sun.com  */
959338Srafael.vanoni@sun.com static const char *dtp_cpufreq_c =
969338Srafael.vanoni@sun.com "hrtime_t last;"
979338Srafael.vanoni@sun.com ""
989338Srafael.vanoni@sun.com "BEGIN"
999338Srafael.vanoni@sun.com "{"
1009338Srafael.vanoni@sun.com "	begin = timestamp;"
1019338Srafael.vanoni@sun.com "}"
1029338Srafael.vanoni@sun.com ""
1039338Srafael.vanoni@sun.com ":::cpu-change-speed"
1049338Srafael.vanoni@sun.com "/(processorid_t)arg0 == $1 &&"
1059338Srafael.vanoni@sun.com " last != 0/"
1069338Srafael.vanoni@sun.com "{"
1079338Srafael.vanoni@sun.com "	this->cpu = (processorid_t)arg0;"
1089711Srafael.vanoni@sun.com "	this->oldspeed = (uint64_t)arg1;"
1099338Srafael.vanoni@sun.com "	@times[this->cpu, this->oldspeed] = sum(timestamp - last);"
1109338Srafael.vanoni@sun.com "	last = timestamp;"
1119338Srafael.vanoni@sun.com "}"
1129338Srafael.vanoni@sun.com ":::cpu-change-speed"
1139338Srafael.vanoni@sun.com "/(processorid_t)arg0 == $1 &&"
1149338Srafael.vanoni@sun.com " last == 0/"
1159338Srafael.vanoni@sun.com "{"
1169338Srafael.vanoni@sun.com "	this->cpu = (processorid_t)arg0;"
1179711Srafael.vanoni@sun.com "	this->oldspeed = (uint64_t)arg1;"
1189338Srafael.vanoni@sun.com "	@times[this->cpu, this->oldspeed] = sum(timestamp - begin);"
1199338Srafael.vanoni@sun.com "	last = timestamp;"
1209338Srafael.vanoni@sun.com "}";
1219338Srafael.vanoni@sun.com 
1229338Srafael.vanoni@sun.com static int	pt_cpufreq_setup(void);
1239338Srafael.vanoni@sun.com static int	pt_cpufreq_snapshot(void);
1249338Srafael.vanoni@sun.com static int	pt_cpufreq_dtrace_walk(const dtrace_aggdata_t *, void *);
1259338Srafael.vanoni@sun.com static void	pt_cpufreq_stat_account(double, uint_t);
1269908Srafael.vanoni@sun.com static int	pt_cpufreq_snapshot_cpu(kstat_ctl_t *, uint_t);
1279908Srafael.vanoni@sun.com static int	pt_cpufreq_check_pm(void);
1289908Srafael.vanoni@sun.com static void	pt_cpufreq_enable(void);
1299338Srafael.vanoni@sun.com 
1309338Srafael.vanoni@sun.com static int
pt_cpufreq_setup(void)1319338Srafael.vanoni@sun.com pt_cpufreq_setup(void)
1329338Srafael.vanoni@sun.com {
1339338Srafael.vanoni@sun.com 	if ((dtp_argv = malloc(sizeof (char *) * DTP_ARG_COUNT)) == NULL)
134*11122Srafael.vanoni@sun.com 		return (1);
1359338Srafael.vanoni@sun.com 
1369338Srafael.vanoni@sun.com 	if ((dtp_argv[0] = malloc(sizeof (char) * DTP_ARG_LENGTH)) == NULL) {
1379338Srafael.vanoni@sun.com 		free(dtp_argv);
138*11122Srafael.vanoni@sun.com 		return (1);
1399338Srafael.vanoni@sun.com 	}
1409338Srafael.vanoni@sun.com 
1419338Srafael.vanoni@sun.com 	(void) snprintf(dtp_argv[0], 5, "%d\0", g_ncpus_observed);
1429338Srafael.vanoni@sun.com 
1439711Srafael.vanoni@sun.com 	if (PT_ON_CPU) {
1449338Srafael.vanoni@sun.com 		if ((dtp_argv[1] = malloc(sizeof (char) * DTP_ARG_LENGTH))
1459338Srafael.vanoni@sun.com 		    == NULL) {
1469338Srafael.vanoni@sun.com 			free(dtp_argv[0]);
1479338Srafael.vanoni@sun.com 			free(dtp_argv);
148*11122Srafael.vanoni@sun.com 			return (1);
1499338Srafael.vanoni@sun.com 		}
1509338Srafael.vanoni@sun.com 		(void) snprintf(dtp_argv[1], 5, "%d\0", g_observed_cpu);
1519338Srafael.vanoni@sun.com 	}
1529338Srafael.vanoni@sun.com 
1539338Srafael.vanoni@sun.com 	return (0);
1549338Srafael.vanoni@sun.com }
1559338Srafael.vanoni@sun.com 
1569338Srafael.vanoni@sun.com /*
1579338Srafael.vanoni@sun.com  * Perform setup necessary to enumerate and track CPU speed changes
1589338Srafael.vanoni@sun.com  */
1599338Srafael.vanoni@sun.com int
pt_cpufreq_stat_prepare(void)1609338Srafael.vanoni@sun.com pt_cpufreq_stat_prepare(void)
1619338Srafael.vanoni@sun.com {
1629338Srafael.vanoni@sun.com 	dtrace_prog_t 		*prog;
1639338Srafael.vanoni@sun.com 	dtrace_proginfo_t 	info;
1649338Srafael.vanoni@sun.com 	dtrace_optval_t 	statustime;
1659338Srafael.vanoni@sun.com 	kstat_ctl_t 		*kc;
1669338Srafael.vanoni@sun.com 	kstat_t 		*ksp;
1679338Srafael.vanoni@sun.com 	kstat_named_t 		*knp;
1689338Srafael.vanoni@sun.com 	freq_state_info_t 	*state;
1699338Srafael.vanoni@sun.com 	char 			*s, *token, *prog_ptr;
1709338Srafael.vanoni@sun.com 	int 			err;
1719338Srafael.vanoni@sun.com 
1729338Srafael.vanoni@sun.com 	if ((err = pt_cpufreq_setup()) != 0) {
173*11122Srafael.vanoni@sun.com 		pt_error("failed to setup %s report (couldn't allocate "
174*11122Srafael.vanoni@sun.com 		    "memory)\n", g_msg_freq_state);
1759338Srafael.vanoni@sun.com 		return (errno);
1769338Srafael.vanoni@sun.com 	}
1779338Srafael.vanoni@sun.com 
1789338Srafael.vanoni@sun.com 	state = g_pstate_info;
1799338Srafael.vanoni@sun.com 	if ((g_cpu_power_states = calloc((size_t)g_ncpus,
1809338Srafael.vanoni@sun.com 	    sizeof (cpu_power_info_t))) == NULL)
1819338Srafael.vanoni@sun.com 		return (-1);
1829338Srafael.vanoni@sun.com 
1839338Srafael.vanoni@sun.com 	/*
1849338Srafael.vanoni@sun.com 	 * Enumerate the CPU frequencies
1859338Srafael.vanoni@sun.com 	 */
1869338Srafael.vanoni@sun.com 	if ((kc = kstat_open()) == NULL)
1879338Srafael.vanoni@sun.com 		return (errno);
1889338Srafael.vanoni@sun.com 
1899338Srafael.vanoni@sun.com 	ksp = kstat_lookup(kc, "cpu_info", g_cpu_table[g_observed_cpu], NULL);
1909338Srafael.vanoni@sun.com 
1919338Srafael.vanoni@sun.com 	if (ksp == NULL) {
1929338Srafael.vanoni@sun.com 		err = errno;
1939338Srafael.vanoni@sun.com 		(void) kstat_close(kc);
1949338Srafael.vanoni@sun.com 		return (err);
1959338Srafael.vanoni@sun.com 	}
1969338Srafael.vanoni@sun.com 
1979338Srafael.vanoni@sun.com 	(void) kstat_read(kc, ksp, NULL);
1989338Srafael.vanoni@sun.com 
1999338Srafael.vanoni@sun.com 	knp = kstat_data_lookup(ksp, "supported_frequencies_Hz");
2009338Srafael.vanoni@sun.com 	s = knp->value.str.addr.ptr;
2019338Srafael.vanoni@sun.com 
2029338Srafael.vanoni@sun.com 	g_npstates = 0;
2039338Srafael.vanoni@sun.com 
2049338Srafael.vanoni@sun.com 	for (token = strtok(s, ":"), s = NULL;
205*11122Srafael.vanoni@sun.com 	    token != NULL && g_npstates < NSTATES;
2069338Srafael.vanoni@sun.com 	    token = strtok(NULL, ":")) {
2079338Srafael.vanoni@sun.com 
2089338Srafael.vanoni@sun.com 		state->speed = HZ2MHZ(atoll(token));
2099338Srafael.vanoni@sun.com 
2109338Srafael.vanoni@sun.com 		if (state->speed > max_cpufreq)
2119338Srafael.vanoni@sun.com 			max_cpufreq = state->speed;
2129338Srafael.vanoni@sun.com 
2139338Srafael.vanoni@sun.com 		state->total_time = (uint64_t)0;
2149338Srafael.vanoni@sun.com 
2159338Srafael.vanoni@sun.com 		g_npstates++;
2169338Srafael.vanoni@sun.com 		state++;
2179338Srafael.vanoni@sun.com 	}
2189338Srafael.vanoni@sun.com 
2199338Srafael.vanoni@sun.com 	if (token != NULL)
220*11122Srafael.vanoni@sun.com 		pt_error("CPU exceeds the supported number of %s\n",
221*11122Srafael.vanoni@sun.com 		    g_msg_freq_state);
2229338Srafael.vanoni@sun.com 
2239338Srafael.vanoni@sun.com 	(void) kstat_close(kc);
2249338Srafael.vanoni@sun.com 
2259338Srafael.vanoni@sun.com 	/*
2269338Srafael.vanoni@sun.com 	 * Return if speed transition is not supported
2279338Srafael.vanoni@sun.com 	 */
2289338Srafael.vanoni@sun.com 	if (g_npstates < 2)
2299338Srafael.vanoni@sun.com 		return (-1);
2309338Srafael.vanoni@sun.com 
2319338Srafael.vanoni@sun.com 	/*
2329338Srafael.vanoni@sun.com 	 * Setup DTrace to look for CPU frequency changes
2339338Srafael.vanoni@sun.com 	 */
2349338Srafael.vanoni@sun.com 	if ((dtp = dtrace_open(DTRACE_VERSION, 0, &err)) == NULL) {
235*11122Srafael.vanoni@sun.com 		pt_error("cannot open dtrace library for the %s report: %s\n",
236*11122Srafael.vanoni@sun.com 		    g_msg_freq_state, dtrace_errmsg(NULL, err));
2379338Srafael.vanoni@sun.com 		return (-2);
2389338Srafael.vanoni@sun.com 	}
2399338Srafael.vanoni@sun.com 
2409338Srafael.vanoni@sun.com 	/*
2419338Srafael.vanoni@sun.com 	 * Execute different scripts (defined above) depending on
2429338Srafael.vanoni@sun.com 	 * user specified options. Default mode uses dtp_cpufreq.
2439338Srafael.vanoni@sun.com 	 */
2449711Srafael.vanoni@sun.com 	if (PT_ON_CPU)
2459338Srafael.vanoni@sun.com 		prog_ptr = (char *)dtp_cpufreq_c;
2469338Srafael.vanoni@sun.com 	else
2479338Srafael.vanoni@sun.com 		prog_ptr = (char *)dtp_cpufreq;
2489338Srafael.vanoni@sun.com 
2499338Srafael.vanoni@sun.com 	if ((prog = dtrace_program_strcompile(dtp, prog_ptr,
2509338Srafael.vanoni@sun.com 	    DTRACE_PROBESPEC_NAME, 0, (1 + g_argc), dtp_argv)) == NULL) {
251*11122Srafael.vanoni@sun.com 		pt_error("failed to compile %s program\n", g_msg_freq_state);
2529338Srafael.vanoni@sun.com 		return (dtrace_errno(dtp));
2539338Srafael.vanoni@sun.com 	}
2549338Srafael.vanoni@sun.com 
2559338Srafael.vanoni@sun.com 	if (dtrace_program_exec(dtp, prog, &info) == -1) {
256*11122Srafael.vanoni@sun.com 		pt_error("failed to enable %s probes\n", g_msg_freq_state);
2579338Srafael.vanoni@sun.com 		return (dtrace_errno(dtp));
2589338Srafael.vanoni@sun.com 	}
2599338Srafael.vanoni@sun.com 
260*11122Srafael.vanoni@sun.com 	if (dtrace_setopt(dtp, "aggsize", "128k") == -1)
261*11122Srafael.vanoni@sun.com 		pt_error("failed to set %s 'aggsize'\n", g_msg_freq_state);
2629338Srafael.vanoni@sun.com 
263*11122Srafael.vanoni@sun.com 	if (dtrace_setopt(dtp, "aggrate", "0") == -1)
264*11122Srafael.vanoni@sun.com 		pt_error("failed to set %s 'aggrate'\n", g_msg_freq_state);
2659338Srafael.vanoni@sun.com 
266*11122Srafael.vanoni@sun.com 	if (dtrace_setopt(dtp, "aggpercpu", 0) == -1)
267*11122Srafael.vanoni@sun.com 		pt_error("failed to set %s 'aggpercpu'\n", g_msg_freq_state);
2689338Srafael.vanoni@sun.com 
2699338Srafael.vanoni@sun.com 	if (dtrace_go(dtp) != 0) {
270*11122Srafael.vanoni@sun.com 		pt_error("failed to start %s observation\n", g_msg_freq_state);
2719338Srafael.vanoni@sun.com 		return (dtrace_errno(dtp));
2729338Srafael.vanoni@sun.com 	}
2739338Srafael.vanoni@sun.com 
2749338Srafael.vanoni@sun.com 	if (dtrace_getopt(dtp, "statusrate", &statustime) == -1) {
275*11122Srafael.vanoni@sun.com 		pt_error("failed to get %s 'statusrate'\n", g_msg_freq_state);
2769338Srafael.vanoni@sun.com 		return (dtrace_errno(dtp));
2779338Srafael.vanoni@sun.com 	}
2789338Srafael.vanoni@sun.com 
2799338Srafael.vanoni@sun.com 	return (0);
2809338Srafael.vanoni@sun.com }
2819338Srafael.vanoni@sun.com 
2829338Srafael.vanoni@sun.com /*
2839338Srafael.vanoni@sun.com  * The DTrace probes have already been enabled, and are tracking
2849338Srafael.vanoni@sun.com  * CPU speed transitions. Take a snapshot of the aggregations, and
2859338Srafael.vanoni@sun.com  * look for any CPUs that have made a speed transition over the last
2869338Srafael.vanoni@sun.com  * sampling interval. Note that the aggregations may be empty if no
2879338Srafael.vanoni@sun.com  * speed transitions took place over the last interval. In that case,
2889338Srafael.vanoni@sun.com  * notate that we have already accounted for the time, so that when
2899338Srafael.vanoni@sun.com  * we do encounter a speed transition in a future sampling interval
2909338Srafael.vanoni@sun.com  * we can subtract that time back out.
2919338Srafael.vanoni@sun.com  */
2929338Srafael.vanoni@sun.com int
pt_cpufreq_stat_collect(double interval)2939338Srafael.vanoni@sun.com pt_cpufreq_stat_collect(double interval)
2949338Srafael.vanoni@sun.com {
2959711Srafael.vanoni@sun.com 	int i, ret;
2969338Srafael.vanoni@sun.com 
2979338Srafael.vanoni@sun.com 	/*
2989338Srafael.vanoni@sun.com 	 * Zero out the interval time reported by DTrace for
2999338Srafael.vanoni@sun.com 	 * this interval
3009338Srafael.vanoni@sun.com 	 */
3019338Srafael.vanoni@sun.com 	for (i = 0; i < g_npstates; i++)
3029338Srafael.vanoni@sun.com 		g_pstate_info[i].total_time = 0;
3039338Srafael.vanoni@sun.com 
3049338Srafael.vanoni@sun.com 	for (i = 0; i < g_ncpus; i++)
3059338Srafael.vanoni@sun.com 		g_cpu_power_states[i].dtrace_time = 0;
3069338Srafael.vanoni@sun.com 
3079338Srafael.vanoni@sun.com 	if (dtrace_status(dtp) == -1)
3089338Srafael.vanoni@sun.com 		return (-1);
3099338Srafael.vanoni@sun.com 
3109338Srafael.vanoni@sun.com 	if (dtrace_aggregate_snap(dtp) != 0)
311*11122Srafael.vanoni@sun.com 		pt_error("failed to collect data for %s\n", g_msg_freq_state);
3129338Srafael.vanoni@sun.com 
3139338Srafael.vanoni@sun.com 	if (dtrace_aggregate_walk_keyvarsorted(dtp, pt_cpufreq_dtrace_walk,
3149338Srafael.vanoni@sun.com 	    NULL) != 0)
315*11122Srafael.vanoni@sun.com 		pt_error("failed to sort data for %s\n", g_msg_freq_state);
3169338Srafael.vanoni@sun.com 
3179338Srafael.vanoni@sun.com 	dtrace_aggregate_clear(dtp);
3189338Srafael.vanoni@sun.com 
3199338Srafael.vanoni@sun.com 	if ((ret = pt_cpufreq_snapshot()) != 0) {
320*11122Srafael.vanoni@sun.com 		pt_error("failed to snapshot %s state\n", g_msg_freq_state);
3219338Srafael.vanoni@sun.com 		return (ret);
3229338Srafael.vanoni@sun.com 	}
3239338Srafael.vanoni@sun.com 
3249338Srafael.vanoni@sun.com 	switch (g_op_mode) {
3259711Srafael.vanoni@sun.com 	case PT_MODE_CPU:
3269338Srafael.vanoni@sun.com 		pt_cpufreq_stat_account(interval, g_observed_cpu);
3279338Srafael.vanoni@sun.com 		break;
3289711Srafael.vanoni@sun.com 	case PT_MODE_DEFAULT:
3299338Srafael.vanoni@sun.com 	default:
3309338Srafael.vanoni@sun.com 		for (i = 0; i < g_ncpus_observed; i++)
3319338Srafael.vanoni@sun.com 			pt_cpufreq_stat_account(interval, i);
3329338Srafael.vanoni@sun.com 		break;
3339338Srafael.vanoni@sun.com 	}
3349338Srafael.vanoni@sun.com 
3359338Srafael.vanoni@sun.com 	return (0);
3369338Srafael.vanoni@sun.com }
3379338Srafael.vanoni@sun.com 
3389338Srafael.vanoni@sun.com static void
pt_cpufreq_stat_account(double interval,uint_t cpu)3399338Srafael.vanoni@sun.com pt_cpufreq_stat_account(double interval, uint_t cpu)
3409338Srafael.vanoni@sun.com {
3419711Srafael.vanoni@sun.com 	cpu_power_info_t 	*cpu_pow;
3429338Srafael.vanoni@sun.com 	uint64_t 		speed;
3439338Srafael.vanoni@sun.com 	hrtime_t 		duration;
3449338Srafael.vanoni@sun.com 	int			i;
3459338Srafael.vanoni@sun.com 
3469338Srafael.vanoni@sun.com 	cpu_pow = &g_cpu_power_states[cpu];
3479338Srafael.vanoni@sun.com 	speed = cpu_pow->current_pstate;
3489338Srafael.vanoni@sun.com 
3499711Srafael.vanoni@sun.com 	duration = (hrtime_t)(interval * NANOSEC) - cpu_pow->dtrace_time;
3509711Srafael.vanoni@sun.com 
3519711Srafael.vanoni@sun.com 	/*
3529711Srafael.vanoni@sun.com 	 * 'duration' may be a negative value when we're using or forcing a
3539711Srafael.vanoni@sun.com 	 * small interval, and the amount of time already accounted ends up
3549711Srafael.vanoni@sun.com 	 * being larger than the the former.
3559711Srafael.vanoni@sun.com 	 */
3569711Srafael.vanoni@sun.com 	if (duration < 0)
3579711Srafael.vanoni@sun.com 		return;
3589338Srafael.vanoni@sun.com 
3599338Srafael.vanoni@sun.com 	for (i = 0; i < g_npstates; i++) {
3609338Srafael.vanoni@sun.com 		if (g_pstate_info[i].speed == speed) {
3619338Srafael.vanoni@sun.com 			g_pstate_info[i].total_time += duration;
3629338Srafael.vanoni@sun.com 			cpu_pow->time_accounted += duration;
3639711Srafael.vanoni@sun.com 			cpu_pow->speed_accounted = speed;
3649338Srafael.vanoni@sun.com 		}
3659338Srafael.vanoni@sun.com 	}
3669338Srafael.vanoni@sun.com }
3679338Srafael.vanoni@sun.com 
3689338Srafael.vanoni@sun.com /*
3699338Srafael.vanoni@sun.com  * Take a snapshot of each CPU's speed by looking through the cpu_info kstats.
3709338Srafael.vanoni@sun.com  */
3719338Srafael.vanoni@sun.com static int
pt_cpufreq_snapshot(void)3729338Srafael.vanoni@sun.com pt_cpufreq_snapshot(void)
3739338Srafael.vanoni@sun.com {
3749711Srafael.vanoni@sun.com 	kstat_ctl_t 	*kc;
3759711Srafael.vanoni@sun.com 	int 		ret;
3769711Srafael.vanoni@sun.com 	uint_t		i;
3779338Srafael.vanoni@sun.com 
3789338Srafael.vanoni@sun.com 	if ((kc = kstat_open()) == NULL)
3799338Srafael.vanoni@sun.com 		return (errno);
3809338Srafael.vanoni@sun.com 
3819338Srafael.vanoni@sun.com 	switch (g_op_mode) {
3829711Srafael.vanoni@sun.com 	case PT_MODE_CPU:
3839338Srafael.vanoni@sun.com 		ret = pt_cpufreq_snapshot_cpu(kc, g_observed_cpu);
3849338Srafael.vanoni@sun.com 		break;
3859711Srafael.vanoni@sun.com 	case PT_MODE_DEFAULT:
3869338Srafael.vanoni@sun.com 	default:
3879338Srafael.vanoni@sun.com 		for (i = 0; i < g_ncpus_observed; i++)
3889338Srafael.vanoni@sun.com 			if ((ret = pt_cpufreq_snapshot_cpu(kc, i)) != 0)
3899338Srafael.vanoni@sun.com 				break;
3909338Srafael.vanoni@sun.com 		break;
3919338Srafael.vanoni@sun.com 	}
3929338Srafael.vanoni@sun.com 
3939338Srafael.vanoni@sun.com 	if (kstat_close(kc) != 0)
394*11122Srafael.vanoni@sun.com 		pt_error("couldn't close %s kstat\n", g_msg_freq_state);
3959338Srafael.vanoni@sun.com 
3969338Srafael.vanoni@sun.com 	return (ret);
3979338Srafael.vanoni@sun.com }
3989338Srafael.vanoni@sun.com 
3999338Srafael.vanoni@sun.com static int
pt_cpufreq_snapshot_cpu(kstat_ctl_t * kc,uint_t cpu)4009338Srafael.vanoni@sun.com pt_cpufreq_snapshot_cpu(kstat_ctl_t *kc, uint_t cpu)
4019338Srafael.vanoni@sun.com {
4029338Srafael.vanoni@sun.com 	kstat_t 		*ksp;
4039338Srafael.vanoni@sun.com 	kstat_named_t 		*knp;
4049338Srafael.vanoni@sun.com 
4059338Srafael.vanoni@sun.com 	ksp = kstat_lookup(kc, "cpu_info", g_cpu_table[cpu], NULL);
4069338Srafael.vanoni@sun.com 	if (ksp == NULL) {
407*11122Srafael.vanoni@sun.com 		pt_error("couldn't find 'cpu_info' kstat for CPU %d\n while "
408*11122Srafael.vanoni@sun.com 		    "taking a snapshot of %s\n", cpu, g_msg_freq_state);
4099338Srafael.vanoni@sun.com 		return (1);
4109338Srafael.vanoni@sun.com 	}
4119338Srafael.vanoni@sun.com 
4129338Srafael.vanoni@sun.com 	if (kstat_read(kc, ksp, NULL) == -1) {
413*11122Srafael.vanoni@sun.com 		pt_error("couldn't read 'cpu_info' kstat for CPU %d\n while "
414*11122Srafael.vanoni@sun.com 		    "taking a snapshot of %s\n", cpu, g_msg_freq_state);
4159338Srafael.vanoni@sun.com 		return (2);
4169338Srafael.vanoni@sun.com 	}
4179338Srafael.vanoni@sun.com 
4189338Srafael.vanoni@sun.com 	knp = kstat_data_lookup(ksp, "current_clock_Hz");
4199338Srafael.vanoni@sun.com 	if (knp == NULL) {
420*11122Srafael.vanoni@sun.com 		pt_error("couldn't find 'current_clock_Hz' kstat for CPU %d "
421*11122Srafael.vanoni@sun.com 		    "while taking a snapshot of %s\n", cpu, g_msg_freq_state);
4229338Srafael.vanoni@sun.com 		return (3);
4239338Srafael.vanoni@sun.com 	}
4249338Srafael.vanoni@sun.com 
4259338Srafael.vanoni@sun.com 	g_cpu_power_states[cpu].current_pstate = HZ2MHZ(knp->value.ui64);
4269338Srafael.vanoni@sun.com 
4279338Srafael.vanoni@sun.com 	return (0);
4289338Srafael.vanoni@sun.com }
4299338Srafael.vanoni@sun.com 
4309338Srafael.vanoni@sun.com /*
4319338Srafael.vanoni@sun.com  * DTrace aggregation walker that sorts through a snapshot of the
4329338Srafael.vanoni@sun.com  * aggregation data collected during firings of the cpu-change-speed
4339338Srafael.vanoni@sun.com  * probe.
4349338Srafael.vanoni@sun.com  */
4359338Srafael.vanoni@sun.com /*ARGSUSED*/
4369338Srafael.vanoni@sun.com static int
pt_cpufreq_dtrace_walk(const dtrace_aggdata_t * data,void * arg)4379338Srafael.vanoni@sun.com pt_cpufreq_dtrace_walk(const dtrace_aggdata_t *data, void *arg)
4389338Srafael.vanoni@sun.com {
4399338Srafael.vanoni@sun.com 	dtrace_aggdesc_t 	*aggdesc = data->dtada_desc;
4409338Srafael.vanoni@sun.com 	dtrace_recdesc_t 	*cpu_rec, *speed_rec;
4419711Srafael.vanoni@sun.com 	cpu_power_info_t 	*cp;
4429338Srafael.vanoni@sun.com 	int32_t 		cpu;
4439338Srafael.vanoni@sun.com 	uint64_t 		speed;
4449711Srafael.vanoni@sun.com 	hrtime_t 		res;
4459338Srafael.vanoni@sun.com 	int 			i;
4469338Srafael.vanoni@sun.com 
4479338Srafael.vanoni@sun.com 	if (strcmp(aggdesc->dtagd_name, "times") == 0) {
4489338Srafael.vanoni@sun.com 		cpu_rec = &aggdesc->dtagd_rec[1];
4499338Srafael.vanoni@sun.com 		speed_rec = &aggdesc->dtagd_rec[2];
4509338Srafael.vanoni@sun.com 
4519711Srafael.vanoni@sun.com 		/* LINTED - alignment */
4529711Srafael.vanoni@sun.com 		cpu = *(int32_t *)(data->dtada_data + cpu_rec->dtrd_offset);
4539338Srafael.vanoni@sun.com 
4549338Srafael.vanoni@sun.com 		/* LINTED - alignment */
4559711Srafael.vanoni@sun.com 		res = *((hrtime_t *)(data->dtada_percpu[cpu]));
4569711Srafael.vanoni@sun.com 
4579338Srafael.vanoni@sun.com 		/* LINTED - alignment */
4589338Srafael.vanoni@sun.com 		speed = *(uint64_t *)(data->dtada_data +
4599338Srafael.vanoni@sun.com 		    speed_rec->dtrd_offset);
4609338Srafael.vanoni@sun.com 
4619711Srafael.vanoni@sun.com 		if (speed == 0)
4629338Srafael.vanoni@sun.com 			speed = max_cpufreq;
4639711Srafael.vanoni@sun.com 		else
4649711Srafael.vanoni@sun.com 			speed = HZ2MHZ(speed);
4659338Srafael.vanoni@sun.com 
4669338Srafael.vanoni@sun.com 		/*
4679338Srafael.vanoni@sun.com 		 * We have an aggregation record for "cpu" being at "speed"
4689338Srafael.vanoni@sun.com 		 * for an interval of "n" nanoseconds. The reported interval
4699338Srafael.vanoni@sun.com 		 * may exceed the powertop sampling interval, since we only
4709338Srafael.vanoni@sun.com 		 * notice during potentially infrequent firings of the
4719338Srafael.vanoni@sun.com 		 * "speed change" DTrace probe. In this case powertop would
4729338Srafael.vanoni@sun.com 		 * have already accounted for the portions of the interval
4739338Srafael.vanoni@sun.com 		 * that happened during prior powertop samplings, so subtract
4749338Srafael.vanoni@sun.com 		 * out time already accounted.
4759338Srafael.vanoni@sun.com 		 */
4769711Srafael.vanoni@sun.com 		cp = &g_cpu_power_states[cpu];
4779338Srafael.vanoni@sun.com 
4789338Srafael.vanoni@sun.com 		for (i = 0; i < g_npstates; i++) {
4799338Srafael.vanoni@sun.com 			if (g_pstate_info[i].speed == speed) {
4809711Srafael.vanoni@sun.com 
4819711Srafael.vanoni@sun.com 				if (cp->time_accounted > 0 &&
4829711Srafael.vanoni@sun.com 				    cp->speed_accounted == speed) {
4839711Srafael.vanoni@sun.com 					if (res > cp->time_accounted) {
4849711Srafael.vanoni@sun.com 						res -= cp->time_accounted;
4859711Srafael.vanoni@sun.com 						cp->time_accounted = 0;
4869711Srafael.vanoni@sun.com 						cp->speed_accounted = 0;
4879711Srafael.vanoni@sun.com 					} else {
4889711Srafael.vanoni@sun.com 						return (DTRACE_AGGWALK_NEXT);
4899338Srafael.vanoni@sun.com 					}
4909338Srafael.vanoni@sun.com 				}
4919711Srafael.vanoni@sun.com 
4929711Srafael.vanoni@sun.com 				g_pstate_info[i].total_time += res;
4939711Srafael.vanoni@sun.com 				cp->dtrace_time += res;
4949338Srafael.vanoni@sun.com 			}
4959338Srafael.vanoni@sun.com 		}
4969338Srafael.vanoni@sun.com 	}
4979711Srafael.vanoni@sun.com 
4989338Srafael.vanoni@sun.com 	return (DTRACE_AGGWALK_NEXT);
4999338Srafael.vanoni@sun.com }
5009338Srafael.vanoni@sun.com 
5019338Srafael.vanoni@sun.com /*
5029908Srafael.vanoni@sun.com  * Checks if PM is enabled in /etc/power.conf, enabling if not
5039908Srafael.vanoni@sun.com  */
5049908Srafael.vanoni@sun.com void
pt_cpufreq_suggest(void)5059908Srafael.vanoni@sun.com pt_cpufreq_suggest(void)
5069908Srafael.vanoni@sun.com {
5079908Srafael.vanoni@sun.com 	int ret = pt_cpufreq_check_pm();
5089908Srafael.vanoni@sun.com 
5099908Srafael.vanoni@sun.com 	switch (ret) {
5109908Srafael.vanoni@sun.com 	case 0:
5119908Srafael.vanoni@sun.com 		pt_sugg_add("Suggestion: enable CPU power management by "
5129908Srafael.vanoni@sun.com 		    "pressing the P key", 40, 'P', (char *)g_msg_freq_enable,
5139908Srafael.vanoni@sun.com 		    pt_cpufreq_enable);
5149908Srafael.vanoni@sun.com 		break;
5159908Srafael.vanoni@sun.com 	}
5169908Srafael.vanoni@sun.com }
5179908Srafael.vanoni@sun.com 
5189908Srafael.vanoni@sun.com /*
5199908Srafael.vanoni@sun.com  * Checks /etc/power.conf and returns:
5209908Srafael.vanoni@sun.com  *
5219908Srafael.vanoni@sun.com  *     0 if CPUPM is not enabled
5229908Srafael.vanoni@sun.com  *     1 if there's nothing for us to do because:
5239908Srafael.vanoni@sun.com  *         (a) the system does not support frequency scaling
5249908Srafael.vanoni@sun.com  *         (b) there's no power.conf.
5259908Srafael.vanoni@sun.com  *     2 if CPUPM is enabled
5269908Srafael.vanoni@sun.com  *     3 if the system is running in poll-mode, as opposed to event-mode
5279908Srafael.vanoni@sun.com  *
5289908Srafael.vanoni@sun.com  * Notice the ordering of the return values, they will be picked up and
5299908Srafael.vanoni@sun.com  * switched upon ascendingly.
5309908Srafael.vanoni@sun.com  */
5319908Srafael.vanoni@sun.com static int
pt_cpufreq_check_pm(void)5329908Srafael.vanoni@sun.com pt_cpufreq_check_pm(void)
5339908Srafael.vanoni@sun.com {
5349908Srafael.vanoni@sun.com 	char line[1024];
5359908Srafael.vanoni@sun.com 	FILE *file;
5369908Srafael.vanoni@sun.com 	int ret = 0;
5379908Srafael.vanoni@sun.com 
5389908Srafael.vanoni@sun.com 	if (g_npstates < 2 || (file = fopen(default_conf, "r")) == NULL)
5399908Srafael.vanoni@sun.com 		return (1);
5409908Srafael.vanoni@sun.com 
5419908Srafael.vanoni@sun.com 	(void) memset(line, 0, 1024);
5429908Srafael.vanoni@sun.com 
5439908Srafael.vanoni@sun.com 	while (fgets(line, 1024, file)) {
5449908Srafael.vanoni@sun.com 		if (strstr(line, "cpupm")) {
5459908Srafael.vanoni@sun.com 			if (strstr(line, "enable")) {
5469908Srafael.vanoni@sun.com 				(void) fclose(file);
5479908Srafael.vanoni@sun.com 				return (2);
5489908Srafael.vanoni@sun.com 			}
5499908Srafael.vanoni@sun.com 		}
5509908Srafael.vanoni@sun.com 		if (strstr(line, "poll"))
5519908Srafael.vanoni@sun.com 			ret = 3;
5529908Srafael.vanoni@sun.com 	}
5539908Srafael.vanoni@sun.com 
5549908Srafael.vanoni@sun.com 	(void) fclose(file);
5559908Srafael.vanoni@sun.com 
5569908Srafael.vanoni@sun.com 	return (ret);
5579908Srafael.vanoni@sun.com }
5589908Srafael.vanoni@sun.com 
5599908Srafael.vanoni@sun.com /*
5609338Srafael.vanoni@sun.com  * Used as a suggestion, sets PM in /etc/power.conf and
5619338Srafael.vanoni@sun.com  * a 1sec threshold, then calls /usr/sbin/pmconfig
5629338Srafael.vanoni@sun.com  */
5639908Srafael.vanoni@sun.com static void
pt_cpufreq_enable(void)5649908Srafael.vanoni@sun.com pt_cpufreq_enable(void)
5659338Srafael.vanoni@sun.com {
5669338Srafael.vanoni@sun.com 	(void) system(cpupm_enable);
5679338Srafael.vanoni@sun.com 	(void) system(cpupm_treshold);
5689338Srafael.vanoni@sun.com 	(void) system(default_pmconf);
5699908Srafael.vanoni@sun.com 
5709908Srafael.vanoni@sun.com 	if (pt_sugg_remove(pt_cpufreq_enable) == 0)
571*11122Srafael.vanoni@sun.com 		pt_error("failed to remove a %s suggestion\n",
572*11122Srafael.vanoni@sun.com 		    g_msg_freq_state);
5739338Srafael.vanoni@sun.com }
574