xref: /dflybsd-src/usr.sbin/powerd/powerd.c (revision e85b99abf6da4a83a7dc495b0ef37ce19864149f)
1 /*
2  * Copyright (c) 2010 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 /*
36  * The powerd daemon monitors the cpu load and adjusts cpu frequencies
37  * via hw.acpi.cpu.px_dom*.
38  */
39 
40 #define _KERNEL_STRUCTURES
41 #include <sys/types.h>
42 #include <sys/sysctl.h>
43 #include <sys/kinfo.h>
44 #include <sys/file.h>
45 #include <stdio.h>
46 #include <stdlib.h>
47 #include <unistd.h>
48 #include <string.h>
49 #include <syslog.h>
50 
51 static void usage(void);
52 static double getcputime(void);
53 static void acpi_setcpufreq(int nstate);
54 static void setupdominfo(void);
55 
56 int DebugOpt;
57 int TurboOpt = 1;
58 int CpuLimit;		/* # of cpus at max frequency */
59 int DomLimit;		/* # of domains at max frequency */
60 int PowerFd;
61 int DomBeg;
62 int DomEnd;
63 int NCpus;
64 int CpuCount[256];	/* # of cpus in any given domain */
65 int CpuToDom[256];	/* domain a particular cpu belongs to */
66 int Hysteresis = 10;
67 double TriggerUp = 0.25;/* load per cpu to force max freq */
68 double TriggerDown; /* load per cpu to force the min freq */
69 
70 static void sigintr(int signo);
71 
72 int
73 main(int ac, char **av)
74 {
75 	double qavg;
76 	double savg;
77 	int ch;
78 	int nstate;
79 	char buf[64];
80 
81 	while ((ch = getopt(ac, av, "dp:tu:")) != -1) {
82 		switch(ch) {
83 		case 'd':
84 			DebugOpt = 1;
85 			break;
86 		case 'p':
87 			Hysteresis = (int)strtol(optarg, NULL, 10);
88 			break;
89 		case 't':
90 			TurboOpt = 0;
91 			break;
92 		case 'u':
93 			TriggerUp = (double)strtol(optarg, NULL, 10) / 100;
94 			break;
95 		default:
96 			usage();
97 			/* NOT REACHED */
98 		}
99 	}
100 	ac -= optind;
101 	av += optind;
102 
103 	if (0 > Hysteresis || Hysteresis > 99) {
104 		fprintf(stderr, "Invalid hysteresis value\n");
105 		exit(1);
106 	}
107 
108 	if (0 > TriggerUp || TriggerUp > 1) {
109 		fprintf(stderr, "Invalid load limit value\n");
110 		exit(1);
111 	}
112 
113 	TriggerDown = TriggerUp - (TriggerUp * (double) Hysteresis / 100);
114 
115 	/*
116 	 * Make sure powerd is not already running.
117 	 */
118 	PowerFd = open("/var/run/powerd.pid", O_CREAT|O_RDWR, 0644);
119 	if (PowerFd < 0) {
120 		fprintf(stderr,
121 			"Cannot create /var/run/powerd.pid, "
122 			"continuing anyway\n");
123 	} else {
124 		if (flock(PowerFd, LOCK_EX|LOCK_NB) < 0) {
125 			fprintf(stderr, "powerd is already running\n");
126 			exit(1);
127 		}
128 	}
129 
130 	/*
131 	 * Demonize and set pid
132 	 */
133 	if (DebugOpt == 0) {
134 		daemon(0, 0);
135 		openlog("powerd", LOG_CONS | LOG_PID, LOG_DAEMON);
136 	}
137 
138 	if (PowerFd >= 0) {
139 		ftruncate(PowerFd, 0);
140 		snprintf(buf, sizeof(buf), "%d\n", (int)getpid());
141 		write(PowerFd, buf, strlen(buf));
142 	}
143 
144 	/*
145 	 * Wait hw.acpi.cpu.px_dom* sysctl to be created by kernel
146 	 *
147 	 * Since hw.acpi.cpu.px_dom* creation is queued into ACPI
148 	 * taskqueue and ACPI taskqueue is shared across various
149 	 * ACPI modules, any delay in other modules may cause
150 	 * hw.acpi.cpu.px_dom* to be created at quite a later time
151 	 * (e.g. cmbat module's task could take quite a lot of time).
152 	 */
153 	for (;;) {
154 		/*
155 		 * Prime delta cputime calculation, make sure at least
156 		 * dom0 exists.
157 		 */
158 		getcputime();
159 		savg = 0.0;
160 
161 		setupdominfo();
162 		if (DomBeg >= DomEnd) {
163 			sleep(1);
164 			continue;
165 		}
166 
167 		DomLimit = DomEnd;
168 		CpuLimit = NCpus;
169 		break;
170 	}
171 
172 	/*
173 	 * Set to maximum performance if killed.
174 	 */
175 	signal(SIGINT, sigintr);
176 	signal(SIGTERM, sigintr);
177 
178 	/*
179 	 * Monitoring loop
180 	 *
181 	 * Calculate nstate, the number of cpus we wish to run at max
182 	 * frequency.  All remaining cpus will be set to their lowest
183 	 * frequency and mapped out of the user process scheduler.
184 	 */
185 	for (;;) {
186 		qavg = getcputime();
187 		savg = (savg * 7.0 + qavg) / 8.0;
188 
189 		nstate = savg / TriggerUp;
190 		if (nstate < CpuLimit)
191 			nstate = savg / TriggerDown;
192 		if (nstate > NCpus)
193 			nstate = NCpus;
194 		if (DebugOpt) {
195 			printf("\rqavg=%5.2f savg=%5.2f %2d/%2d ncpus=%d\r",
196 				qavg, savg, CpuLimit, DomLimit, nstate);
197 			fflush(stdout);
198 		}
199 		if (nstate != CpuLimit)
200 			acpi_setcpufreq(nstate);
201 		sleep(1);
202 	}
203 }
204 
205 static
206 void
207 sigintr(int signo __unused)
208 {
209 	syslog(LOG_INFO, "killed, setting max and exiting");
210 	acpi_setcpufreq(NCpus);
211 	exit(1);
212 }
213 
214 /*
215  * Figure out the domains and calculate the CpuCount[] and CpuToDom[]
216  * arrays.
217  */
218 static
219 void
220 setupdominfo(void)
221 {
222 	char buf[64];
223 	char members[1024];
224 	char *str;
225 	size_t msize;
226 	int i;
227 	int n;
228 
229 	for (i = 0; i < 256; ++i) {
230 		snprintf(buf, sizeof(buf),
231 			 "hw.acpi.cpu.px_dom%d.available", i);
232 		if (sysctlbyname(buf, NULL, NULL, NULL, 0) >= 0)
233 			break;
234 	}
235 	DomBeg = i;
236 
237 	for (i = 255; i >= DomBeg; --i) {
238 		snprintf(buf, sizeof(buf),
239 			 "hw.acpi.cpu.px_dom%d.available", i);
240 		if (sysctlbyname(buf, NULL, NULL, NULL, 0) >= 0) {
241 			++i;
242 			break;
243 		}
244 	}
245 	DomEnd = i;
246 
247 	for (i = DomBeg; i < DomEnd; ++i) {
248 		snprintf(buf, sizeof(buf),
249 			 "hw.acpi.cpu.px_dom%d.members", i);
250 		msize = sizeof(members);
251 		if (sysctlbyname(buf, members, &msize, NULL, 0) == 0) {
252 			members[msize] = 0;
253 			for (str = strtok(members, " "); str;
254 			     str = strtok(NULL, " ")) {
255 				n = -1;
256 				sscanf(str, "cpu%d", &n);
257 				if (n >= 0) {
258 					++NCpus;
259 					++CpuCount[i];
260 					CpuToDom[n]= i;
261 				}
262 			}
263 		}
264 	}
265 }
266 
267 /*
268  * Return the one-second cpu load.  One cpu at 100% will return a value
269  * of 1.0.  On a SMP system N cpus running at 100% will return a value of N.
270  */
271 static
272 double
273 getcputime(void)
274 {
275 	static struct kinfo_cputime ocpu_time[64];
276 	static struct kinfo_cputime ncpu_time[64];
277 	size_t slen;
278 	int ncpu;
279 	int cpu;
280 	uint64_t delta;
281 
282 	bcopy(ncpu_time, ocpu_time, sizeof(ncpu_time));
283 	slen = sizeof(ncpu_time);
284 	if (sysctlbyname("kern.cputime", &ncpu_time, &slen, NULL, 0) < 0) {
285 		fprintf(stderr, "kern.cputime sysctl not available\n");
286 		exit(1);
287 	}
288 	ncpu = slen / sizeof(ncpu_time[0]);
289 	delta = 0;
290 
291 	for (cpu = 0; cpu < ncpu; ++cpu) {
292 		delta += (ncpu_time[cpu].cp_user + ncpu_time[cpu].cp_sys +
293 			  ncpu_time[cpu].cp_nice + ncpu_time[cpu].cp_intr) -
294 			 (ocpu_time[cpu].cp_user + ocpu_time[cpu].cp_sys +
295 			  ocpu_time[cpu].cp_nice + ocpu_time[cpu].cp_intr);
296 	}
297 	return((double)delta / 1000000.0);
298 }
299 
300 /*
301  * nstate is the requested number of cpus that we wish to run at full
302  * frequency.  We calculate how many domains we have to adjust to reach
303  * this goal.
304  *
305  * This function also sets the user scheduler global cpu mask.
306  */
307 static
308 void
309 acpi_setcpufreq(int nstate)
310 {
311 	int ncpus = 0;
312 	int increasing = (nstate > CpuLimit);
313 	int dom;
314 	int domBeg;
315 	int domEnd;
316 	int lowest;
317 	int highest;
318 	int desired;
319 	int v;
320 	char *sysid;
321 	char *ptr;
322 	char buf[256];
323 	size_t buflen;
324 	cpumask_t global_cpumask;
325 
326 	/*
327 	 * Calculate the ending domain if the number of operating cpus
328 	 * has increased.
329 	 *
330 	 * Calculate the starting domain if the number of operating cpus
331 	 * has decreased.
332 	 */
333 	for (dom = DomBeg; dom < DomEnd; ++dom) {
334 		if (ncpus >= nstate)
335 			break;
336 		ncpus += CpuCount[dom];
337 	}
338 
339 	syslog(LOG_INFO, "using %d cpus", nstate);
340 
341 	/*
342 	 * Set the mask of cpus the userland scheduler is allowed to use.
343 	 */
344 	CPUMASK_ASSBMASK(global_cpumask, nstate);
345 	sysctlbyname("kern.usched_global_cpumask", NULL, 0,
346 		     &global_cpumask, sizeof(global_cpumask));
347 
348 	if (increasing) {
349 		domBeg = DomLimit;
350 		domEnd = dom;
351 	} else {
352 		domBeg = dom;
353 		domEnd = DomLimit;
354 	}
355 	DomLimit = dom;
356 	CpuLimit = nstate;
357 
358 	/*
359 	 * Adjust the cpu frequency
360 	 */
361 	if (DebugOpt)
362 		printf("\n");
363 	for (dom = domBeg; dom < domEnd; ++dom) {
364 		/*
365 		 * Retrieve availability list
366 		 */
367 		asprintf(&sysid, "hw.acpi.cpu.px_dom%d.available", dom);
368 		buflen = sizeof(buf) - 1;
369 		v = sysctlbyname(sysid, buf, &buflen, NULL, 0);
370 		free(sysid);
371 		if (v < 0)
372 			continue;
373 		buf[buflen] = 0;
374 
375 		/*
376 		 * Parse out the highest and lowest cpu frequencies
377 		 */
378 		ptr = buf;
379 		highest = lowest = 0;
380 		while (ptr && (v = strtol(ptr, &ptr, 10)) > 0) {
381 			if (lowest == 0 || lowest > v)
382 				lowest = v;
383 			if (highest == 0 || highest < v)
384 				highest = v;
385 			/*
386 			 * Detect turbo mode
387 			 */
388 			if ((highest - v == 1) && ! TurboOpt)
389 				highest = v;
390 
391 		}
392 
393 		/*
394 		 * Calculate the desired cpu frequency, test, and set.
395 		 */
396 		desired = increasing ? highest : lowest;
397 
398 		asprintf(&sysid, "hw.acpi.cpu.px_dom%d.select", dom);
399 		buflen = sizeof(v);
400 		v = 0;
401 		sysctlbyname(sysid, &v, &buflen, NULL, 0);
402 		{
403 			if (DebugOpt) {
404 				printf("dom%d set frequency %d\n",
405 				       dom, desired);
406 			}
407 			sysctlbyname(sysid, NULL, NULL,
408 				     &desired, sizeof(desired));
409 		}
410 		free(sysid);
411 	}
412 }
413 
414 static
415 void
416 usage(void)
417 {
418 	fprintf(stderr, "usage: powerd [-dt] [-p hysteresis] [-u trigger_up]\n");
419 	exit(1);
420 }
421