xref: /dflybsd-src/usr.sbin/powerd/powerd.c (revision afd2da4dc9056ea79cdf15e8a9386a3d3998f33e)
1 /*
2  * Copyright (c) 2010 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 /*
36  * The powerd daemon :
37  * - Monitor the cpu load and adjusts cpu and cpu power domain
38  *   performance accordingly.
39  * - Monitor battery life.  Alarm alerts and shutdown the machine
40  *   if battery life goes low.
41  */
42 
43 #define _KERNEL_STRUCTURES
44 #include <sys/types.h>
45 #include <sys/sysctl.h>
46 #include <sys/kinfo.h>
47 #include <sys/file.h>
48 #include <sys/queue.h>
49 #include <sys/soundcard.h>
50 #include <sys/time.h>
51 #include <machine/cpufunc.h>
52 #include <machine/cpumask.h>
53 #include <err.h>
54 #include <stdio.h>
55 #include <stdlib.h>
56 #include <unistd.h>
57 #include <string.h>
58 #include <syslog.h>
59 
60 #include "alert1.h"
61 
62 #define MAXDOM		MAXCPU	/* worst case, 1 cpu per domain */
63 
64 #define MAXFREQ		64
65 #define CST_STRLEN	16
66 
67 struct cpu_pwrdom {
68 	TAILQ_ENTRY(cpu_pwrdom)	dom_link;
69 	int			dom_id;
70 	int			dom_ncpus;
71 	cpumask_t		dom_cpumask;
72 };
73 
74 struct cpu_state {
75 	double			cpu_qavg;
76 	double			cpu_uavg;	/* used for speeding up */
77 	double			cpu_davg;	/* used for slowing down */
78 	int			cpu_limit;
79 	int			cpu_count;
80 	char			cpu_name[8];
81 };
82 
83 static void usage(void);
84 static void get_ncpus(void);
85 
86 /* usched cpumask */
87 static void get_uschedcpus(void);
88 static void set_uschedcpus(void);
89 
90 /* perfbias(4) */
91 static int has_perfbias(void);
92 static void set_perfbias(int, int);
93 
94 /* acpi(4) P-state */
95 static void acpi_getcpufreq_str(int, int *, int *);
96 static int acpi_getcpufreq_bin(int, int *, int *);
97 static void acpi_get_cpufreq(int, int *, int *);
98 static void acpi_set_cpufreq(int, int);
99 static int acpi_get_cpupwrdom(void);
100 
101 /* mwait C-state hint */
102 static int probe_cstate(void);
103 static void set_cstate(int, int);
104 
105 /* Performance monitoring */
106 static void init_perf(void);
107 static void mon_perf(double);
108 static void adj_perf(cpumask_t, cpumask_t);
109 static void adj_cpu_pwrdom(int, int);
110 static void adj_cpu_perf(int, int);
111 static void get_cputime(double);
112 static int get_nstate(struct cpu_state *, double);
113 static void add_spare_cpus(const cpumask_t, int);
114 static void restore_perf(void);
115 
116 /* Battery monitoring */
117 static int has_battery(void);
118 static int mon_battery(void);
119 static void low_battery_alert(int);
120 
121 /* Backlight */
122 static void restore_backlight(void);
123 
124 /* Runtime states for performance monitoring */
125 static int global_pcpu_limit;
126 static struct cpu_state pcpu_state[MAXCPU];
127 static struct cpu_state global_cpu_state;
128 static cpumask_t cpu_used;		/* cpus w/ high perf */
129 static cpumask_t cpu_pwrdom_used;	/* cpu power domains w/ high perf */
130 static cpumask_t usched_cpu_used;	/* cpus for usched */
131 
132 /* Constants */
133 static cpumask_t cpu_pwrdom_mask;	/* usable cpu power domains */
134 static int cpu2pwrdom[MAXCPU];		/* cpu to cpu power domain map */
135 static struct cpu_pwrdom *cpu_pwrdomain[MAXDOM];
136 static int NCpus;			/* # of cpus */
137 static char orig_global_cx[CST_STRLEN];
138 static char cpu_perf_cx[CST_STRLEN];
139 static int cpu_perf_cxlen;
140 static char cpu_idle_cx[CST_STRLEN];
141 static int cpu_idle_cxlen;
142 
143 static int DebugOpt;
144 static int TurboOpt = 1;
145 static int PowerFd;
146 static int Hysteresis = 10;	/* percentage */
147 static double TriggerUp = 0.25;	/* single-cpu load to force max freq */
148 static double TriggerDown;	/* load per cpu to force the min freq */
149 static int HasPerfbias = 0;
150 static int AdjustCpuFreq = 1;
151 static int AdjustCstate = 0;
152 static int HighestCpuFreq;
153 static int LowestCpuFreq;
154 
155 static volatile int stopped;
156 
157 /* Battery life monitoring */
158 static int BatLifeMin = 2;	/* shutdown the box, if low on battery life */
159 static struct timespec BatLifePrevT;
160 static int BatLifePollIntvl = 5; /* unit: sec */
161 static struct timespec BatShutdownStartT;
162 static int BatShutdownLinger = -1;
163 static int BatShutdownLingerSet = 60; /* unit: sec */
164 static int BatShutdownLingerCnt;
165 static int BatShutdownAudioAlert = 1;
166 static int BackLightPct = 100;
167 static int OldBackLightLevel;
168 static int BackLightDown;
169 
170 static void sigintr(int signo);
171 
172 int
173 main(int ac, char **av)
174 {
175 	double srt;
176 	double pollrate;
177 	int ch;
178 	char buf[64];
179 	int monbat;
180 
181 	srt = 8.0;	/* time for samples - 8 seconds */
182 	pollrate = 1.0;	/* polling rate in seconds */
183 
184 	while ((ch = getopt(ac, av, "b:cdefh:l:p:r:tu:B:L:P:QT:")) != -1) {
185 		switch(ch) {
186 		case 'b':
187 			BackLightPct = strtol(optarg, NULL, 10);
188 			break;
189 		case 'c':
190 			AdjustCstate = 1;
191 			break;
192 		case 'd':
193 			DebugOpt = 1;
194 			break;
195 		case 'e':
196 			HasPerfbias = 1;
197 			break;
198 		case 'f':
199 			AdjustCpuFreq = 0;
200 			break;
201 		case 'h':
202 			HighestCpuFreq = strtol(optarg, NULL, 10);
203 			break;
204 		case 'l':
205 			LowestCpuFreq = strtol(optarg, NULL, 10);
206 			break;
207 		case 'p':
208 			Hysteresis = (int)strtol(optarg, NULL, 10);
209 			break;
210 		case 'r':
211 			pollrate = strtod(optarg, NULL);
212 			break;
213 		case 't':
214 			TurboOpt = 0;
215 			break;
216 		case 'u':
217 			TriggerUp = (double)strtol(optarg, NULL, 10) / 100;
218 			break;
219 		case 'B':
220 			BatLifeMin = strtol(optarg, NULL, 10);
221 			break;
222 		case 'L':
223 			BatShutdownLingerSet = strtol(optarg, NULL, 10);
224 			if (BatShutdownLingerSet < 0)
225 				BatShutdownLingerSet = 0;
226 			break;
227 		case 'P':
228 			BatLifePollIntvl = strtol(optarg, NULL, 10);
229 			break;
230 		case 'Q':
231 			BatShutdownAudioAlert = 0;
232 			break;
233 		case 'T':
234 			srt = strtod(optarg, NULL);
235 			break;
236 		default:
237 			usage();
238 			/* NOT REACHED */
239 		}
240 	}
241 	ac -= optind;
242 	av += optind;
243 
244 	setlinebuf(stdout);
245 
246 	/* Get number of cpus */
247 	get_ncpus();
248 
249 	if (0 > Hysteresis || Hysteresis > 99) {
250 		fprintf(stderr, "Invalid hysteresis value\n");
251 		exit(1);
252 	}
253 
254 	if (0 > TriggerUp || TriggerUp > 1) {
255 		fprintf(stderr, "Invalid load limit value\n");
256 		exit(1);
257 	}
258 
259 	if (BackLightPct > 100 || BackLightPct <= 0) {
260 		fprintf(stderr, "Invalid backlight setting, ignore\n");
261 		BackLightPct = 100;
262 	}
263 
264 	TriggerDown = TriggerUp - (TriggerUp * (double) Hysteresis / 100);
265 
266 	/*
267 	 * Make sure powerd is not already running.
268 	 */
269 	PowerFd = open("/var/run/powerd.pid", O_CREAT|O_RDWR, 0644);
270 	if (PowerFd < 0) {
271 		fprintf(stderr,
272 			"Cannot create /var/run/powerd.pid, "
273 			"continuing anyway\n");
274 	} else {
275 		if (flock(PowerFd, LOCK_EX|LOCK_NB) < 0) {
276 			fprintf(stderr, "powerd is already running\n");
277 			exit(1);
278 		}
279 	}
280 
281 	/*
282 	 * Demonize and set pid
283 	 */
284 	if (DebugOpt == 0) {
285 		daemon(0, 0);
286 		openlog("powerd", LOG_CONS | LOG_PID, LOG_DAEMON);
287 	}
288 
289 	if (PowerFd >= 0) {
290 		ftruncate(PowerFd, 0);
291 		snprintf(buf, sizeof(buf), "%d\n", (int)getpid());
292 		write(PowerFd, buf, strlen(buf));
293 	}
294 
295 	/* Do we need to monitor battery life? */
296 	if (BatLifePollIntvl <= 0)
297 		monbat = 0;
298 	else
299 		monbat = has_battery();
300 
301 	/* Do we have perfbias(4)? */
302 	if (HasPerfbias)
303 		HasPerfbias = has_perfbias();
304 
305 	/* Could we adjust C-state? */
306 	if (AdjustCstate)
307 		AdjustCstate = probe_cstate();
308 
309 	/*
310 	 * Wait hw.acpi.cpu.px_dom* sysctl to be created by kernel.
311 	 *
312 	 * Since hw.acpi.cpu.px_dom* creation is queued into ACPI
313 	 * taskqueue and ACPI taskqueue is shared across various
314 	 * ACPI modules, any delay in other modules may cause
315 	 * hw.acpi.cpu.px_dom* to be created at quite a later time
316 	 * (e.g. cmbat module's task could take quite a lot of time).
317 	 */
318 	for (;;) {
319 		/* Prime delta cputime calculation. */
320 		get_cputime(pollrate);
321 
322 		/* Wait for all cpus to appear */
323 		if (acpi_get_cpupwrdom())
324 			break;
325 		usleep((int)(pollrate * 1000000.0));
326 	}
327 
328 	/*
329 	 * Catch some signals so that max performance could be restored.
330 	 */
331 	signal(SIGINT, sigintr);
332 	signal(SIGTERM, sigintr);
333 
334 	/* Initialize performance states */
335 	init_perf();
336 
337 	srt = srt / pollrate;	/* convert to sample count */
338 	if (DebugOpt)
339 		printf("samples for downgrading: %5.2f\n", srt);
340 
341 	/*
342 	 * Monitoring loop
343 	 */
344 	while (!stopped) {
345 		/*
346 		 * Monitor performance
347 		 */
348 		get_cputime(pollrate);
349 		mon_perf(srt);
350 
351 		/*
352 		 * Monitor battery
353 		 */
354 		if (monbat)
355 			monbat = mon_battery();
356 
357 		usleep((int)(pollrate * 1000000.0));
358 	}
359 
360 	/*
361 	 * Set to maximum performance if killed.
362 	 */
363 	syslog(LOG_INFO, "killed, setting max and exiting");
364 	restore_perf();
365 	restore_backlight();
366 
367 	exit(0);
368 }
369 
370 static void
371 sigintr(int signo __unused)
372 {
373 	stopped = 1;
374 }
375 
376 /*
377  * Figure out the cpu power domains.
378  */
379 static int
380 acpi_get_cpupwrdom(void)
381 {
382 	struct cpu_pwrdom *dom;
383 	cpumask_t pwrdom_mask;
384 	char buf[64];
385 	char members[1024];
386 	char *str;
387 	size_t msize;
388 	int n, i, ncpu = 0, dom_id;
389 
390 	memset(cpu2pwrdom, 0, sizeof(cpu2pwrdom));
391 	memset(cpu_pwrdomain, 0, sizeof(cpu_pwrdomain));
392 	CPUMASK_ASSZERO(cpu_pwrdom_mask);
393 
394 	for (i = 0; i < MAXDOM; ++i) {
395 		snprintf(buf, sizeof(buf),
396 			 "hw.acpi.cpu.px_dom%d.available", i);
397 		if (sysctlbyname(buf, NULL, NULL, NULL, 0) < 0)
398 			continue;
399 
400 		dom = calloc(1, sizeof(*dom));
401 		dom->dom_id = i;
402 
403 		if (cpu_pwrdomain[i] != NULL) {
404 			fprintf(stderr, "cpu power domain %d exists\n", i);
405 			exit(1);
406 		}
407 		cpu_pwrdomain[i] = dom;
408 		CPUMASK_ORBIT(cpu_pwrdom_mask, i);
409 	}
410 	pwrdom_mask = cpu_pwrdom_mask;
411 
412 	while (CPUMASK_TESTNZERO(pwrdom_mask)) {
413 		dom_id = BSFCPUMASK(pwrdom_mask);
414 		CPUMASK_NANDBIT(pwrdom_mask, dom_id);
415 		dom = cpu_pwrdomain[dom_id];
416 
417 		CPUMASK_ASSZERO(dom->dom_cpumask);
418 
419 		snprintf(buf, sizeof(buf),
420 			 "hw.acpi.cpu.px_dom%d.members", dom->dom_id);
421 		msize = sizeof(members);
422 		if (sysctlbyname(buf, members, &msize, NULL, 0) < 0) {
423 			cpu_pwrdomain[dom_id] = NULL;
424 			free(dom);
425 			continue;
426 		}
427 
428 		members[msize] = 0;
429 		for (str = strtok(members, " "); str; str = strtok(NULL, " ")) {
430 			n = -1;
431 			sscanf(str, "cpu%d", &n);
432 			if (n >= 0) {
433 				++ncpu;
434 				++dom->dom_ncpus;
435 				CPUMASK_ORBIT(dom->dom_cpumask, n);
436 				cpu2pwrdom[n] = dom->dom_id;
437 			}
438 		}
439 		if (dom->dom_ncpus == 0) {
440 			cpu_pwrdomain[dom_id] = NULL;
441 			free(dom);
442 			continue;
443 		}
444 		if (DebugOpt) {
445 			printf("dom%d cpumask: ", dom->dom_id);
446 			for (i = 0; i < (int)NELEM(dom->dom_cpumask.ary); ++i) {
447 				printf("%jx ",
448 				    (uintmax_t)dom->dom_cpumask.ary[i]);
449 			}
450 			printf("\n");
451 		}
452 	}
453 
454 	if (ncpu != NCpus) {
455 		if (DebugOpt)
456 			printf("Found %d cpus, expecting %d\n", ncpu, NCpus);
457 
458 		pwrdom_mask = cpu_pwrdom_mask;
459 		while (CPUMASK_TESTNZERO(pwrdom_mask)) {
460 			dom_id = BSFCPUMASK(pwrdom_mask);
461 			CPUMASK_NANDBIT(pwrdom_mask, dom_id);
462 			dom = cpu_pwrdomain[dom_id];
463 			if (dom != NULL)
464 				free(dom);
465 		}
466 		return 0;
467 	}
468 	return 1;
469 }
470 
471 /*
472  * Save per-cpu load and sum of per-cpu load.
473  */
474 static void
475 get_cputime(double pollrate)
476 {
477 	static struct kinfo_cputime ocpu_time[MAXCPU];
478 	static struct kinfo_cputime ncpu_time[MAXCPU];
479 	size_t slen;
480 	int ncpu;
481 	int cpu;
482 	uint64_t delta;
483 
484 	bcopy(ncpu_time, ocpu_time, sizeof(struct kinfo_cputime) * NCpus);
485 
486 	slen = sizeof(ncpu_time);
487 	if (sysctlbyname("kern.cputime", &ncpu_time, &slen, NULL, 0) < 0) {
488 		fprintf(stderr, "kern.cputime sysctl not available\n");
489 		exit(1);
490 	}
491 	ncpu = slen / sizeof(ncpu_time[0]);
492 
493 	delta = 0;
494 	for (cpu = 0; cpu < ncpu; ++cpu) {
495 		uint64_t d;
496 
497 		d = (ncpu_time[cpu].cp_user + ncpu_time[cpu].cp_sys +
498 		     ncpu_time[cpu].cp_nice + ncpu_time[cpu].cp_intr) -
499 		    (ocpu_time[cpu].cp_user + ocpu_time[cpu].cp_sys +
500 		     ocpu_time[cpu].cp_nice + ocpu_time[cpu].cp_intr);
501 		pcpu_state[cpu].cpu_qavg = (double)d / (pollrate * 1000000.0);
502 
503 		delta += d;
504 	}
505 	global_cpu_state.cpu_qavg = (double)delta / (pollrate * 1000000.0);
506 }
507 
508 static void
509 acpi_getcpufreq_str(int dom_id, int *highest0, int *lowest0)
510 {
511 	char buf[256], sysid[64];
512 	size_t buflen;
513 	char *ptr;
514 	int v, highest, lowest;
515 
516 	/*
517 	 * Retrieve availability list
518 	 */
519 	snprintf(sysid, sizeof(sysid), "hw.acpi.cpu.px_dom%d.available",
520 	    dom_id);
521 	buflen = sizeof(buf) - 1;
522 	if (sysctlbyname(sysid, buf, &buflen, NULL, 0) < 0)
523 		return;
524 	buf[buflen] = 0;
525 
526 	/*
527 	 * Parse out the highest and lowest cpu frequencies
528 	 */
529 	ptr = buf;
530 	highest = lowest = 0;
531 	while (ptr && (v = strtol(ptr, &ptr, 10)) > 0) {
532 		if ((lowest == 0 || lowest > v) &&
533 		    (LowestCpuFreq <= 0 || v >= LowestCpuFreq))
534 			lowest = v;
535 		if ((highest == 0 || highest < v) &&
536 		    (HighestCpuFreq <= 0 || v <= HighestCpuFreq))
537 			highest = v;
538 		/*
539 		 * Detect turbo mode
540 		 */
541 		if (!TurboOpt && highest - v == 1)
542 			highest = v;
543 	}
544 
545 	*highest0 = highest;
546 	*lowest0 = lowest;
547 }
548 
549 static int
550 acpi_getcpufreq_bin(int dom_id, int *highest0, int *lowest0)
551 {
552 	char sysid[64];
553 	int freq[MAXFREQ];
554 	size_t freqlen;
555 	int freqcnt, i;
556 
557 	/*
558 	 * Retrieve availability list
559 	 */
560 	snprintf(sysid, sizeof(sysid), "hw.acpi.cpu.px_dom%d.avail", dom_id);
561 	freqlen = sizeof(freq);
562 	if (sysctlbyname(sysid, freq, &freqlen, NULL, 0) < 0)
563 		return 0;
564 
565 	freqcnt = freqlen / sizeof(freq[0]);
566 	if (freqcnt == 0)
567 		return 0;
568 
569 	for (i = freqcnt - 1; i >= 0; --i) {
570 		*lowest0 = freq[i];
571 		if (LowestCpuFreq <= 0 || *lowest0 >= LowestCpuFreq)
572 			break;
573 	}
574 
575 	i = 0;
576 	*highest0 = freq[0];
577 	if (!TurboOpt && freqcnt > 1 && freq[0] - freq[1] == 1) {
578 		i = 1;
579 		*highest0 = freq[1];
580 	}
581 	for (; i < freqcnt; ++i) {
582 		if (HighestCpuFreq <= 0 || *highest0 <= HighestCpuFreq)
583 			break;
584 		*highest0 = freq[i];
585 	}
586 	return 1;
587 }
588 
589 static void
590 acpi_get_cpufreq(int dom_id, int *highest, int *lowest)
591 {
592 	*highest = 0;
593 	*lowest = 0;
594 
595 	if (acpi_getcpufreq_bin(dom_id, highest, lowest))
596 		return;
597 	acpi_getcpufreq_str(dom_id, highest, lowest);
598 }
599 
600 static
601 void
602 usage(void)
603 {
604 	fprintf(stderr, "usage: powerd [-cdeftQ] [-p hysteresis] "
605 	    "[-h highest_freq] [-l lowest_freq] "
606 	    "[-r poll_interval] [-u trigger_up] "
607 	    "[-B min_battery_life] [-L low_battery_linger] "
608 	    "[-P battery_poll_interval] [-T sample_interval] "
609 	    "[-b backlight]\n");
610 	exit(1);
611 }
612 
613 #ifndef timespecsub
614 #define timespecsub(vvp, uvp)						\
615 	do {								\
616 		(vvp)->tv_sec -= (uvp)->tv_sec;				\
617 		(vvp)->tv_nsec -= (uvp)->tv_nsec;			\
618 		if ((vvp)->tv_nsec < 0) {				\
619 			(vvp)->tv_sec--;				\
620 			(vvp)->tv_nsec += 1000000000;			\
621 		}							\
622 	} while (0)
623 #endif
624 
625 #define BAT_SYSCTL_TIME_MAX	50000000 /* unit: nanosecond */
626 
627 static int
628 has_battery(void)
629 {
630 	struct timespec s, e;
631 	size_t len;
632 	int val;
633 
634 	clock_gettime(CLOCK_MONOTONIC_FAST, &s);
635 	BatLifePrevT = s;
636 
637 	len = sizeof(val);
638 	if (sysctlbyname("hw.acpi.acline", &val, &len, NULL, 0) < 0) {
639 		/* No AC line information */
640 		return 0;
641 	}
642 	clock_gettime(CLOCK_MONOTONIC_FAST, &e);
643 
644 	timespecsub(&e, &s);
645 	if (e.tv_sec > 0 || e.tv_nsec > BAT_SYSCTL_TIME_MAX) {
646 		/* hw.acpi.acline takes to long to be useful */
647 		syslog(LOG_NOTICE, "hw.acpi.acline takes too long");
648 		return 0;
649 	}
650 
651 	clock_gettime(CLOCK_MONOTONIC_FAST, &s);
652 	len = sizeof(val);
653 	if (sysctlbyname("hw.acpi.battery.life", &val, &len, NULL, 0) < 0) {
654 		/* No battery life */
655 		return 0;
656 	}
657 	clock_gettime(CLOCK_MONOTONIC_FAST, &e);
658 
659 	timespecsub(&e, &s);
660 	if (e.tv_sec > 0 || e.tv_nsec > BAT_SYSCTL_TIME_MAX) {
661 		/* hw.acpi.battery.life takes to long to be useful */
662 		syslog(LOG_NOTICE, "hw.acpi.battery.life takes too long");
663 		return 0;
664 	}
665 	return 1;
666 }
667 
668 static void
669 low_battery_alert(int life)
670 {
671 	int fmt, stereo, freq;
672 	int fd;
673 
674 	syslog(LOG_ALERT, "low battery life %d%%, please plugin AC line, #%d",
675 	    life, BatShutdownLingerCnt);
676 	++BatShutdownLingerCnt;
677 
678 	if (!BatShutdownAudioAlert)
679 		return;
680 
681 	fd = open("/dev/dsp", O_WRONLY);
682 	if (fd < 0)
683 		return;
684 
685 	fmt = AFMT_S16_LE;
686 	if (ioctl(fd, SNDCTL_DSP_SETFMT, &fmt, sizeof(fmt)) < 0)
687 		goto done;
688 
689 	stereo = 0;
690 	if (ioctl(fd, SNDCTL_DSP_STEREO, &stereo, sizeof(stereo)) < 0)
691 		goto done;
692 
693 	freq = 44100;
694 	if (ioctl(fd, SNDCTL_DSP_SPEED, &freq, sizeof(freq)) < 0)
695 		goto done;
696 
697 	write(fd, alert1, sizeof(alert1));
698 	write(fd, alert1, sizeof(alert1));
699 
700 done:
701 	close(fd);
702 }
703 
704 static int
705 mon_battery(void)
706 {
707 	struct timespec cur, ts;
708 	int acline, life;
709 	size_t len;
710 
711 	clock_gettime(CLOCK_MONOTONIC_FAST, &cur);
712 	ts = cur;
713 	timespecsub(&ts, &BatLifePrevT);
714 	if (ts.tv_sec < BatLifePollIntvl)
715 		return 1;
716 	BatLifePrevT = cur;
717 
718 	len = sizeof(acline);
719 	if (sysctlbyname("hw.acpi.acline", &acline, &len, NULL, 0) < 0)
720 		return 1;
721 	if (acline) {
722 		BatShutdownLinger = -1;
723 		BatShutdownLingerCnt = 0;
724 		restore_backlight();
725 		return 1;
726 	}
727 
728 	if (!BackLightDown && BackLightPct != 100) {
729 		int backlight_max, backlight;
730 
731 		len = sizeof(backlight_max);
732 		if (sysctlbyname("hw.backlight_max", &backlight_max, &len,
733 		    NULL, 0) < 0) {
734 			/* No more backlight adjustment */
735 			BackLightPct = 100;
736 			goto after_backlight;
737 		}
738 
739 		len = sizeof(OldBackLightLevel);
740 		if (sysctlbyname("hw.backlight_level", &OldBackLightLevel, &len,
741 		    NULL, 0) < 0) {
742 			/* No more backlight adjustment */
743 			BackLightPct = 100;
744 			goto after_backlight;
745 		}
746 
747 		backlight = (backlight_max * BackLightPct) / 100;
748 		if (backlight >= OldBackLightLevel) {
749 			/* No more backlight adjustment */
750 			BackLightPct = 100;
751 			goto after_backlight;
752 		}
753 
754 		if (sysctlbyname("hw.backlight_level", NULL, NULL,
755 		    &backlight, sizeof(backlight)) < 0) {
756 			/* No more backlight adjustment */
757 			BackLightPct = 100;
758 			goto after_backlight;
759 		}
760 		BackLightDown = 1;
761 	}
762 after_backlight:
763 
764 	len = sizeof(life);
765 	if (sysctlbyname("hw.acpi.battery.life", &life, &len, NULL, 0) < 0)
766 		return 1;
767 
768 	if (BatShutdownLinger > 0) {
769 		ts = cur;
770 		timespecsub(&ts, &BatShutdownStartT);
771 		if (ts.tv_sec > BatShutdownLinger)
772 			BatShutdownLinger = 0;
773 	}
774 
775 	if (life <= BatLifeMin) {
776 		if (BatShutdownLinger == 0 || BatShutdownLingerSet == 0) {
777 			syslog(LOG_ALERT, "low battery life %d%%, "
778 			    "shutting down", life);
779 			if (vfork() == 0)
780 				execlp("poweroff", "poweroff", NULL);
781 			return 0;
782 		} else if (BatShutdownLinger < 0) {
783 			BatShutdownLinger = BatShutdownLingerSet;
784 			BatShutdownStartT = cur;
785 		}
786 		low_battery_alert(life);
787 	}
788 	return 1;
789 }
790 
791 static void
792 get_ncpus(void)
793 {
794 	size_t slen;
795 
796 	slen = sizeof(NCpus);
797 	if (sysctlbyname("hw.ncpu", &NCpus, &slen, NULL, 0) < 0)
798 		err(1, "sysctlbyname hw.ncpu failed");
799 	if (DebugOpt)
800 		printf("hw.ncpu %d\n", NCpus);
801 }
802 
803 static void
804 get_uschedcpus(void)
805 {
806 	size_t slen;
807 
808 	slen = sizeof(usched_cpu_used);
809 	if (sysctlbyname("kern.usched_global_cpumask", &usched_cpu_used, &slen,
810 	    NULL, 0) < 0)
811 		err(1, "sysctlbyname kern.usched_global_cpumask failed");
812 	if (DebugOpt) {
813 		int i;
814 
815 		printf("usched cpumask was: ");
816 		for (i = 0; i < (int)NELEM(usched_cpu_used.ary); ++i)
817 			printf("%jx ", (uintmax_t)usched_cpu_used.ary[i]);
818 		printf("\n");
819 	}
820 }
821 
822 static void
823 set_uschedcpus(void)
824 {
825 	if (DebugOpt) {
826 		int i;
827 
828 		printf("usched cpumask: ");
829 		for (i = 0; i < (int)NELEM(usched_cpu_used.ary); ++i) {
830 			printf("%jx ",
831 			    (uintmax_t)usched_cpu_used.ary[i]);
832 		}
833 		printf("\n");
834 	}
835 	sysctlbyname("kern.usched_global_cpumask", NULL, 0,
836 	    &usched_cpu_used, sizeof(usched_cpu_used));
837 }
838 
839 static int
840 has_perfbias(void)
841 {
842 	size_t len;
843 	int hint;
844 
845 	len = sizeof(hint);
846 	if (sysctlbyname("machdep.perfbias0.hint", &hint, &len, NULL, 0) < 0)
847 		return 0;
848 	return 1;
849 }
850 
851 static void
852 set_perfbias(int cpu, int inc)
853 {
854 	int hint = inc ? 0 : 15;
855 	char sysid[64];
856 
857 	if (DebugOpt)
858 		printf("cpu%d set perfbias hint %d\n", cpu, hint);
859 	snprintf(sysid, sizeof(sysid), "machdep.perfbias%d.hint", cpu);
860 	sysctlbyname(sysid, NULL, NULL, &hint, sizeof(hint));
861 }
862 
863 static void
864 init_perf(void)
865 {
866 	struct cpu_state *state;
867 	int cpu;
868 
869 	/* Get usched cpumask */
870 	get_uschedcpus();
871 
872 	/*
873 	 * Assume everything are used and are maxed out, before we
874 	 * start.
875 	 */
876 
877 	CPUMASK_ASSBMASK(cpu_used, NCpus);
878 	cpu_pwrdom_used = cpu_pwrdom_mask;
879 	global_pcpu_limit = NCpus;
880 
881 	for (cpu = 0; cpu < NCpus; ++cpu) {
882 		state = &pcpu_state[cpu];
883 
884 		state->cpu_uavg = 0.0;
885 		state->cpu_davg = 0.0;
886 		state->cpu_limit = 1;
887 		state->cpu_count = 1;
888 		snprintf(state->cpu_name, sizeof(state->cpu_name), "cpu%d",
889 		    cpu);
890 	}
891 
892 	state = &global_cpu_state;
893 	state->cpu_uavg = 0.0;
894 	state->cpu_davg = 0.0;
895 	state->cpu_limit = NCpus;
896 	state->cpu_count = NCpus;
897 	strlcpy(state->cpu_name, "global", sizeof(state->cpu_name));
898 }
899 
900 static int
901 get_nstate(struct cpu_state *state, double srt)
902 {
903 	int ustate, dstate, nstate;
904 
905 	/* speeding up */
906 	state->cpu_uavg = (state->cpu_uavg * 2.0 + state->cpu_qavg) / 3.0;
907 	/* slowing down */
908 	state->cpu_davg = (state->cpu_davg * srt + state->cpu_qavg) / (srt + 1);
909 	if (state->cpu_davg < state->cpu_uavg)
910 		state->cpu_davg = state->cpu_uavg;
911 
912 	ustate = state->cpu_uavg / TriggerUp;
913 	if (ustate < state->cpu_limit)
914 		ustate = state->cpu_uavg / TriggerDown;
915 	dstate = state->cpu_davg / TriggerUp;
916 	if (dstate < state->cpu_limit)
917 		dstate = state->cpu_davg / TriggerDown;
918 
919 	nstate = (ustate > dstate) ? ustate : dstate;
920 	if (nstate > state->cpu_count)
921 		nstate = state->cpu_count;
922 
923 	if (DebugOpt) {
924 		printf("%s qavg=%5.2f uavg=%5.2f davg=%5.2f "
925 		    "%2d ncpus=%d\n", state->cpu_name,
926 		    state->cpu_qavg, state->cpu_uavg, state->cpu_davg,
927 		    state->cpu_limit, nstate);
928 	}
929 	return nstate;
930 }
931 
932 static void
933 mon_perf(double srt)
934 {
935 	cpumask_t ocpu_used, ocpu_pwrdom_used;
936 	int pnstate = 0, nstate;
937 	int cpu;
938 
939 	/*
940 	 * Find cpus requiring performance and their cooresponding power
941 	 * domains.  Save the number of cpus requiring performance in
942 	 * pnstate.
943 	 */
944 	ocpu_used = cpu_used;
945 	ocpu_pwrdom_used = cpu_pwrdom_used;
946 
947 	CPUMASK_ASSZERO(cpu_used);
948 	CPUMASK_ASSZERO(cpu_pwrdom_used);
949 
950 	for (cpu = 0; cpu < NCpus; ++cpu) {
951 		struct cpu_state *state = &pcpu_state[cpu];
952 		int s;
953 
954 		s = get_nstate(state, srt);
955 		if (s) {
956 			CPUMASK_ORBIT(cpu_used, cpu);
957 			CPUMASK_ORBIT(cpu_pwrdom_used, cpu2pwrdom[cpu]);
958 		}
959 		pnstate += s;
960 
961 		state->cpu_limit = s;
962 	}
963 
964 	/*
965 	 * Calculate nstate, the number of cpus we wish to run at max
966 	 * performance.
967 	 */
968 	nstate = get_nstate(&global_cpu_state, srt);
969 
970 	if (nstate == global_cpu_state.cpu_limit &&
971 	    (pnstate == global_pcpu_limit || nstate > pnstate)) {
972 		/* Nothing changed; keep the sets */
973 		cpu_used = ocpu_used;
974 		cpu_pwrdom_used = ocpu_pwrdom_used;
975 
976 		global_pcpu_limit = pnstate;
977 		return;
978 	}
979 	global_pcpu_limit = pnstate;
980 
981 	if (nstate > pnstate) {
982 		/*
983 		 * Add spare cpus to meet global performance requirement.
984 		 */
985 		add_spare_cpus(ocpu_used, nstate - pnstate);
986 	}
987 
988 	global_cpu_state.cpu_limit = nstate;
989 
990 	/*
991 	 * Adjust cpu and cpu power domain performance
992 	 */
993 	adj_perf(ocpu_used, ocpu_pwrdom_used);
994 }
995 
996 static void
997 add_spare_cpus(const cpumask_t ocpu_used, int ncpu)
998 {
999 	cpumask_t saved_pwrdom, xcpu_used;
1000 	int done = 0, cpu;
1001 
1002 	/*
1003 	 * Find more cpus in the previous cpu set.
1004 	 */
1005 	xcpu_used = cpu_used;
1006 	CPUMASK_XORMASK(xcpu_used, ocpu_used);
1007 	while (CPUMASK_TESTNZERO(xcpu_used)) {
1008 		cpu = BSFCPUMASK(xcpu_used);
1009 		CPUMASK_NANDBIT(xcpu_used, cpu);
1010 
1011 		if (CPUMASK_TESTBIT(ocpu_used, cpu)) {
1012 			CPUMASK_ORBIT(cpu_pwrdom_used, cpu2pwrdom[cpu]);
1013 			CPUMASK_ORBIT(cpu_used, cpu);
1014 			--ncpu;
1015 			if (ncpu == 0)
1016 				return;
1017 		}
1018 	}
1019 
1020 	/*
1021 	 * Find more cpus in the used cpu power domains.
1022 	 */
1023 	saved_pwrdom = cpu_pwrdom_used;
1024 again:
1025 	while (CPUMASK_TESTNZERO(saved_pwrdom)) {
1026 		cpumask_t unused_cpumask;
1027 		int dom;
1028 
1029 		dom = BSFCPUMASK(saved_pwrdom);
1030 		CPUMASK_NANDBIT(saved_pwrdom, dom);
1031 
1032 		unused_cpumask = cpu_pwrdomain[dom]->dom_cpumask;
1033 		CPUMASK_NANDMASK(unused_cpumask, cpu_used);
1034 
1035 		while (CPUMASK_TESTNZERO(unused_cpumask)) {
1036 			cpu = BSFCPUMASK(unused_cpumask);
1037 			CPUMASK_NANDBIT(unused_cpumask, cpu);
1038 
1039 			CPUMASK_ORBIT(cpu_pwrdom_used, dom);
1040 			CPUMASK_ORBIT(cpu_used, cpu);
1041 			--ncpu;
1042 			if (ncpu == 0)
1043 				return;
1044 		}
1045 	}
1046 	if (!done) {
1047 		done = 1;
1048 		/*
1049 		 * Find more cpus in unused cpu power domains
1050 		 */
1051 		saved_pwrdom = cpu_pwrdom_mask;
1052 		CPUMASK_NANDMASK(saved_pwrdom, cpu_pwrdom_used);
1053 		goto again;
1054 	}
1055 	if (DebugOpt)
1056 		printf("%d cpus not found\n", ncpu);
1057 }
1058 
1059 static void
1060 acpi_set_cpufreq(int dom, int inc)
1061 {
1062 	int lowest, highest, desired;
1063 	char sysid[64];
1064 
1065 	acpi_get_cpufreq(dom, &highest, &lowest);
1066 	if (highest == 0 || lowest == 0)
1067 		return;
1068 	desired = inc ? highest : lowest;
1069 
1070 	if (DebugOpt)
1071 		printf("dom%d set frequency %d\n", dom, desired);
1072 	snprintf(sysid, sizeof(sysid), "hw.acpi.cpu.px_dom%d.select", dom);
1073 	sysctlbyname(sysid, NULL, NULL, &desired, sizeof(desired));
1074 }
1075 
1076 static void
1077 adj_cpu_pwrdom(int dom, int inc)
1078 {
1079 	if (AdjustCpuFreq)
1080 		acpi_set_cpufreq(dom, inc);
1081 }
1082 
1083 static void
1084 adj_cpu_perf(int cpu, int inc)
1085 {
1086 	if (DebugOpt) {
1087 		if (inc)
1088 			printf("cpu%d increase perf\n", cpu);
1089 		else
1090 			printf("cpu%d decrease perf\n", cpu);
1091 	}
1092 
1093 	if (HasPerfbias)
1094 		set_perfbias(cpu, inc);
1095 	if (AdjustCstate)
1096 		set_cstate(cpu, inc);
1097 }
1098 
1099 static void
1100 adj_perf(cpumask_t xcpu_used, cpumask_t xcpu_pwrdom_used)
1101 {
1102 	cpumask_t old_usched_used;
1103 	int cpu, inc;
1104 
1105 	/*
1106 	 * Set cpus requiring performance to the userland process
1107 	 * scheduler.  Leave the rest of cpus unmapped.
1108 	 */
1109 	old_usched_used = usched_cpu_used;
1110 	usched_cpu_used = cpu_used;
1111 	if (CPUMASK_TESTZERO(usched_cpu_used))
1112 		CPUMASK_ORBIT(usched_cpu_used, 0);
1113 	if (CPUMASK_CMPMASKNEQ(usched_cpu_used, old_usched_used))
1114 		set_uschedcpus();
1115 
1116 	/*
1117 	 * Adjust per-cpu performance.
1118 	 */
1119 	CPUMASK_XORMASK(xcpu_used, cpu_used);
1120 	while (CPUMASK_TESTNZERO(xcpu_used)) {
1121 		cpu = BSFCPUMASK(xcpu_used);
1122 		CPUMASK_NANDBIT(xcpu_used, cpu);
1123 
1124 		if (CPUMASK_TESTBIT(cpu_used, cpu)) {
1125 			/* Increase cpu performance */
1126 			inc = 1;
1127 		} else {
1128 			/* Decrease cpu performance */
1129 			inc = 0;
1130 		}
1131 		adj_cpu_perf(cpu, inc);
1132 	}
1133 
1134 	/*
1135 	 * Adjust cpu power domain performance.  This could affect
1136 	 * a set of cpus.
1137 	 */
1138 	CPUMASK_XORMASK(xcpu_pwrdom_used, cpu_pwrdom_used);
1139 	while (CPUMASK_TESTNZERO(xcpu_pwrdom_used)) {
1140 		int dom;
1141 
1142 		dom = BSFCPUMASK(xcpu_pwrdom_used);
1143 		CPUMASK_NANDBIT(xcpu_pwrdom_used, dom);
1144 
1145 		if (CPUMASK_TESTBIT(cpu_pwrdom_used, dom)) {
1146 			/* Increase cpu power domain performance */
1147 			inc = 1;
1148 		} else {
1149 			/* Decrease cpu power domain performance */
1150 			inc = 0;
1151 		}
1152 		adj_cpu_pwrdom(dom, inc);
1153 	}
1154 }
1155 
1156 static void
1157 restore_perf(void)
1158 {
1159 	cpumask_t ocpu_used, ocpu_pwrdom_used;
1160 
1161 	/* Remove highest cpu frequency limitation */
1162 	HighestCpuFreq = 0;
1163 
1164 	ocpu_used = cpu_used;
1165 	ocpu_pwrdom_used = cpu_pwrdom_used;
1166 
1167 	/* Max out all cpus and cpu power domains performance */
1168 	CPUMASK_ASSBMASK(cpu_used, NCpus);
1169 	cpu_pwrdom_used = cpu_pwrdom_mask;
1170 
1171 	adj_perf(ocpu_used, ocpu_pwrdom_used);
1172 
1173 	if (AdjustCstate) {
1174 		/*
1175 		 * Restore the original mwait C-state
1176 		 */
1177 		if (DebugOpt)
1178 			printf("global set cstate %s\n", orig_global_cx);
1179 		sysctlbyname("machdep.mwait.CX.idle", NULL, NULL,
1180 		    orig_global_cx, strlen(orig_global_cx) + 1);
1181 	}
1182 }
1183 
1184 static int
1185 probe_cstate(void)
1186 {
1187 	char cx_supported[1024];
1188 	const char *target;
1189 	char *ptr;
1190 	int idle_hlt, deep = 1;
1191 	size_t len;
1192 
1193 	len = sizeof(idle_hlt);
1194 	if (sysctlbyname("machdep.cpu_idle_hlt", &idle_hlt, &len, NULL, 0) < 0)
1195 		return 0;
1196 	if (idle_hlt != 1)
1197 		return 0;
1198 
1199 	len = sizeof(cx_supported);
1200 	if (sysctlbyname("machdep.mwait.CX.supported", cx_supported, &len,
1201 	    NULL, 0) < 0)
1202 		return 0;
1203 
1204 	len = sizeof(orig_global_cx);
1205 	if (sysctlbyname("machdep.mwait.CX.idle", orig_global_cx, &len,
1206 	    NULL, 0) < 0)
1207 		return 0;
1208 
1209 	strlcpy(cpu_perf_cx, "AUTODEEP", sizeof(cpu_perf_cx));
1210 	cpu_perf_cxlen = strlen(cpu_perf_cx) + 1;
1211 	if (sysctlbyname("machdep.mwait.CX.idle", NULL, NULL,
1212 	    cpu_perf_cx, cpu_perf_cxlen) < 0) {
1213 		/* AUTODEEP is not supported; try AUTO */
1214 		deep = 0;
1215 		strlcpy(cpu_perf_cx, "AUTO", sizeof(cpu_perf_cx));
1216 		cpu_perf_cxlen = strlen(cpu_perf_cx) + 1;
1217 		if (sysctlbyname("machdep.mwait.CX.idle", NULL, NULL,
1218 		    cpu_perf_cx, cpu_perf_cxlen) < 0)
1219 			return 0;
1220 	}
1221 
1222 	if (!deep)
1223 		target = "C2/0";
1224 	else
1225 		target = NULL;
1226 	for (ptr = strtok(cx_supported, " "); ptr != NULL;
1227 	     ptr = strtok(NULL, " ")) {
1228 		if (target == NULL ||
1229 		    (target != NULL && strcmp(ptr, target) == 0)) {
1230 			strlcpy(cpu_idle_cx, ptr, sizeof(cpu_idle_cx));
1231 			cpu_idle_cxlen = strlen(cpu_idle_cx) + 1;
1232 			if (target != NULL)
1233 				break;
1234 		}
1235 	}
1236 	if (cpu_idle_cxlen == 0)
1237 		return 0;
1238 
1239 	if (DebugOpt) {
1240 		printf("cstate orig %s, perf %s, idle %s\n",
1241 		    orig_global_cx, cpu_perf_cx, cpu_idle_cx);
1242 	}
1243 	return 1;
1244 }
1245 
1246 static void
1247 set_cstate(int cpu, int inc)
1248 {
1249 	const char *cst;
1250 	char sysid[64];
1251 	size_t len;
1252 
1253 	if (inc) {
1254 		cst = cpu_perf_cx;
1255 		len = cpu_perf_cxlen;
1256 	} else {
1257 		cst = cpu_idle_cx;
1258 		len = cpu_idle_cxlen;
1259 	}
1260 
1261 	if (DebugOpt)
1262 		printf("cpu%d set cstate %s\n", cpu, cst);
1263 	snprintf(sysid, sizeof(sysid), "machdep.mwait.CX.idle%d", cpu);
1264 	sysctlbyname(sysid, NULL, NULL, cst, len);
1265 }
1266 
1267 static void
1268 restore_backlight(void)
1269 {
1270 	if (BackLightDown) {
1271 		BackLightDown = 0;
1272 		sysctlbyname("hw.backlight_level", NULL, NULL,
1273 		    &OldBackLightLevel, sizeof(OldBackLightLevel));
1274 	}
1275 }
1276