xref: /dflybsd-src/usr.sbin/powerd/powerd.c (revision 05172c8dd418493b9dd5ea9bf9cc684f3cf2e705)
1 /*
2  * Copyright (c) 2010 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 /*
36  * The powerd daemon :
37  * - Monitor the cpu load and adjusts cpu and cpu power domain
38  *   performance accordingly.
39  * - Monitor battery life.  Alarm alerts and shutdown the machine
40  *   if battery life goes low.
41  */
42 
43 #define _KERNEL_STRUCTURES
44 #include <sys/types.h>
45 #include <sys/sysctl.h>
46 #include <sys/kinfo.h>
47 #include <sys/file.h>
48 #include <sys/queue.h>
49 #include <sys/soundcard.h>
50 #include <sys/time.h>
51 #include <machine/cpufunc.h>
52 #include <err.h>
53 #include <stdio.h>
54 #include <stdlib.h>
55 #include <unistd.h>
56 #include <string.h>
57 #include <syslog.h>
58 
59 #include "alert1.h"
60 
61 #define MAXDOM		MAXCPU	/* worst case, 1 cpu per domain */
62 
63 #define MAXFREQ		64
64 #define CST_STRLEN	16
65 
66 struct cpu_pwrdom {
67 	TAILQ_ENTRY(cpu_pwrdom)	dom_link;
68 	int			dom_id;
69 	int			dom_ncpus;
70 	cpumask_t		dom_cpumask;
71 };
72 
73 struct cpu_state {
74 	double			cpu_qavg;
75 	double			cpu_uavg;	/* used for speeding up */
76 	double			cpu_davg;	/* used for slowing down */
77 	int			cpu_limit;
78 	int			cpu_count;
79 	char			cpu_name[8];
80 };
81 
82 static void usage(void);
83 static void get_ncpus(void);
84 
85 /* usched cpumask */
86 static void get_uschedcpus(void);
87 static void set_uschedcpus(void);
88 
89 /* perfbias(4) */
90 static int has_perfbias(void);
91 static void set_perfbias(int, int);
92 
93 /* acpi(4) P-state */
94 static void acpi_getcpufreq_str(int, int *, int *);
95 static int acpi_getcpufreq_bin(int, int *, int *);
96 static void acpi_get_cpufreq(int, int *, int *);
97 static void acpi_set_cpufreq(int, int);
98 static int acpi_get_cpupwrdom(void);
99 
100 /* mwait C-state hint */
101 static int probe_cstate(void);
102 static void set_cstate(int, int);
103 
104 /* Performance monitoring */
105 static void init_perf(void);
106 static void mon_perf(double);
107 static void adj_perf(cpumask_t, cpumask_t);
108 static void adj_cpu_pwrdom(int, int);
109 static void adj_cpu_perf(int, int);
110 static void get_cputime(double);
111 static int get_nstate(struct cpu_state *, double);
112 static void add_spare_cpus(const cpumask_t, int);
113 static void restore_perf(void);
114 
115 /* Battery monitoring */
116 static int has_battery(void);
117 static int mon_battery(void);
118 static void low_battery_alert(int);
119 
120 /* Runtime states for performance monitoring */
121 static int global_pcpu_limit;
122 static struct cpu_state pcpu_state[MAXCPU];
123 static struct cpu_state global_cpu_state;
124 static cpumask_t cpu_used;		/* cpus w/ high perf */
125 static cpumask_t cpu_pwrdom_used;	/* cpu power domains w/ high perf */
126 static cpumask_t usched_cpu_used;	/* cpus for usched */
127 
128 /* Constants */
129 static cpumask_t cpu_pwrdom_mask;	/* usable cpu power domains */
130 static int cpu2pwrdom[MAXCPU];		/* cpu to cpu power domain map */
131 static struct cpu_pwrdom *cpu_pwrdomain[MAXDOM];
132 static int NCpus;			/* # of cpus */
133 static char orig_global_cx[CST_STRLEN];
134 static char cpu_perf_cx[CST_STRLEN];
135 static int cpu_perf_cxlen;
136 static char cpu_idle_cx[CST_STRLEN];
137 static int cpu_idle_cxlen;
138 
139 static int DebugOpt;
140 static int TurboOpt = 1;
141 static int PowerFd;
142 static int Hysteresis = 10;	/* percentage */
143 static double TriggerUp = 0.25;	/* single-cpu load to force max freq */
144 static double TriggerDown;	/* load per cpu to force the min freq */
145 static int HasPerfbias = 0;
146 static int AdjustCpuFreq = 1;
147 static int AdjustCstate = 0;
148 static int HighestCpuFreq;
149 static int LowestCpuFreq;
150 
151 static volatile int stopped;
152 
153 /* Battery life monitoring */
154 static int BatLifeMin = 2;	/* shutdown the box, if low on battery life */
155 static struct timespec BatLifePrevT;
156 static int BatLifePollIntvl = 5; /* unit: sec */
157 static struct timespec BatShutdownStartT;
158 static int BatShutdownLinger = -1;
159 static int BatShutdownLingerSet = 60; /* unit: sec */
160 static int BatShutdownLingerCnt;
161 static int BatShutdownAudioAlert = 1;
162 
163 static void sigintr(int signo);
164 
165 int
166 main(int ac, char **av)
167 {
168 	double srt;
169 	double pollrate;
170 	int ch;
171 	char buf[64];
172 	int monbat;
173 
174 	srt = 8.0;	/* time for samples - 8 seconds */
175 	pollrate = 1.0;	/* polling rate in seconds */
176 
177 	while ((ch = getopt(ac, av, "cdefh:l:p:r:tu:B:L:P:QT:")) != -1) {
178 		switch(ch) {
179 		case 'c':
180 			AdjustCstate = 1;
181 			break;
182 		case 'd':
183 			DebugOpt = 1;
184 			break;
185 		case 'e':
186 			HasPerfbias = 1;
187 			break;
188 		case 'f':
189 			AdjustCpuFreq = 0;
190 			break;
191 		case 'h':
192 			HighestCpuFreq = strtol(optarg, NULL, 10);
193 			break;
194 		case 'l':
195 			LowestCpuFreq = strtol(optarg, NULL, 10);
196 			break;
197 		case 'p':
198 			Hysteresis = (int)strtol(optarg, NULL, 10);
199 			break;
200 		case 'r':
201 			pollrate = strtod(optarg, NULL);
202 			break;
203 		case 't':
204 			TurboOpt = 0;
205 			break;
206 		case 'u':
207 			TriggerUp = (double)strtol(optarg, NULL, 10) / 100;
208 			break;
209 		case 'B':
210 			BatLifeMin = strtol(optarg, NULL, 10);
211 			break;
212 		case 'L':
213 			BatShutdownLingerSet = strtol(optarg, NULL, 10);
214 			if (BatShutdownLingerSet < 0)
215 				BatShutdownLingerSet = 0;
216 			break;
217 		case 'P':
218 			BatLifePollIntvl = strtol(optarg, NULL, 10);
219 			break;
220 		case 'Q':
221 			BatShutdownAudioAlert = 0;
222 			break;
223 		case 'T':
224 			srt = strtod(optarg, NULL);
225 			break;
226 		default:
227 			usage();
228 			/* NOT REACHED */
229 		}
230 	}
231 	ac -= optind;
232 	av += optind;
233 
234 	setlinebuf(stdout);
235 
236 	/* Get number of cpus */
237 	get_ncpus();
238 
239 	if (0 > Hysteresis || Hysteresis > 99) {
240 		fprintf(stderr, "Invalid hysteresis value\n");
241 		exit(1);
242 	}
243 
244 	if (0 > TriggerUp || TriggerUp > 1) {
245 		fprintf(stderr, "Invalid load limit value\n");
246 		exit(1);
247 	}
248 
249 	TriggerDown = TriggerUp - (TriggerUp * (double) Hysteresis / 100);
250 
251 	/*
252 	 * Make sure powerd is not already running.
253 	 */
254 	PowerFd = open("/var/run/powerd.pid", O_CREAT|O_RDWR, 0644);
255 	if (PowerFd < 0) {
256 		fprintf(stderr,
257 			"Cannot create /var/run/powerd.pid, "
258 			"continuing anyway\n");
259 	} else {
260 		if (flock(PowerFd, LOCK_EX|LOCK_NB) < 0) {
261 			fprintf(stderr, "powerd is already running\n");
262 			exit(1);
263 		}
264 	}
265 
266 	/*
267 	 * Demonize and set pid
268 	 */
269 	if (DebugOpt == 0) {
270 		daemon(0, 0);
271 		openlog("powerd", LOG_CONS | LOG_PID, LOG_DAEMON);
272 	}
273 
274 	if (PowerFd >= 0) {
275 		ftruncate(PowerFd, 0);
276 		snprintf(buf, sizeof(buf), "%d\n", (int)getpid());
277 		write(PowerFd, buf, strlen(buf));
278 	}
279 
280 	/* Do we need to monitor battery life? */
281 	if (BatLifePollIntvl <= 0)
282 		monbat = 0;
283 	else
284 		monbat = has_battery();
285 
286 	/* Do we have perfbias(4)? */
287 	if (HasPerfbias)
288 		HasPerfbias = has_perfbias();
289 
290 	/* Could we adjust C-state? */
291 	if (AdjustCstate)
292 		AdjustCstate = probe_cstate();
293 
294 	/*
295 	 * Wait hw.acpi.cpu.px_dom* sysctl to be created by kernel.
296 	 *
297 	 * Since hw.acpi.cpu.px_dom* creation is queued into ACPI
298 	 * taskqueue and ACPI taskqueue is shared across various
299 	 * ACPI modules, any delay in other modules may cause
300 	 * hw.acpi.cpu.px_dom* to be created at quite a later time
301 	 * (e.g. cmbat module's task could take quite a lot of time).
302 	 */
303 	for (;;) {
304 		/* Prime delta cputime calculation. */
305 		get_cputime(pollrate);
306 
307 		/* Wait for all cpus to appear */
308 		if (acpi_get_cpupwrdom())
309 			break;
310 		usleep((int)(pollrate * 1000000.0));
311 	}
312 
313 	/*
314 	 * Catch some signals so that max performance could be restored.
315 	 */
316 	signal(SIGINT, sigintr);
317 	signal(SIGTERM, sigintr);
318 
319 	/* Initialize performance states */
320 	init_perf();
321 
322 	srt = srt / pollrate;	/* convert to sample count */
323 	if (DebugOpt)
324 		printf("samples for downgrading: %5.2f\n", srt);
325 
326 	/*
327 	 * Monitoring loop
328 	 */
329 	while (!stopped) {
330 		/*
331 		 * Monitor performance
332 		 */
333 		get_cputime(pollrate);
334 		mon_perf(srt);
335 
336 		/*
337 		 * Monitor battery
338 		 */
339 		if (monbat)
340 			monbat = mon_battery();
341 
342 		usleep((int)(pollrate * 1000000.0));
343 	}
344 
345 	/*
346 	 * Set to maximum performance if killed.
347 	 */
348 	syslog(LOG_INFO, "killed, setting max and exiting");
349 	restore_perf();
350 
351 	exit(0);
352 }
353 
354 static void
355 sigintr(int signo __unused)
356 {
357 	stopped = 1;
358 }
359 
360 /*
361  * Figure out the cpu power domains.
362  */
363 static int
364 acpi_get_cpupwrdom(void)
365 {
366 	struct cpu_pwrdom *dom;
367 	cpumask_t pwrdom_mask;
368 	char buf[64];
369 	char members[1024];
370 	char *str;
371 	size_t msize;
372 	int n, i, ncpu = 0, dom_id;
373 
374 	memset(cpu2pwrdom, 0, sizeof(cpu2pwrdom));
375 	memset(cpu_pwrdomain, 0, sizeof(cpu_pwrdomain));
376 	CPUMASK_ASSZERO(cpu_pwrdom_mask);
377 
378 	for (i = 0; i < MAXDOM; ++i) {
379 		snprintf(buf, sizeof(buf),
380 			 "hw.acpi.cpu.px_dom%d.available", i);
381 		if (sysctlbyname(buf, NULL, NULL, NULL, 0) < 0)
382 			continue;
383 
384 		dom = calloc(1, sizeof(*dom));
385 		dom->dom_id = i;
386 
387 		if (cpu_pwrdomain[i] != NULL) {
388 			fprintf(stderr, "cpu power domain %d exists\n", i);
389 			exit(1);
390 		}
391 		cpu_pwrdomain[i] = dom;
392 		CPUMASK_ORBIT(cpu_pwrdom_mask, i);
393 	}
394 	pwrdom_mask = cpu_pwrdom_mask;
395 
396 	while (CPUMASK_TESTNZERO(pwrdom_mask)) {
397 		dom_id = BSFCPUMASK(pwrdom_mask);
398 		CPUMASK_NANDBIT(pwrdom_mask, dom_id);
399 		dom = cpu_pwrdomain[dom_id];
400 
401 		CPUMASK_ASSZERO(dom->dom_cpumask);
402 
403 		snprintf(buf, sizeof(buf),
404 			 "hw.acpi.cpu.px_dom%d.members", dom->dom_id);
405 		msize = sizeof(members);
406 		if (sysctlbyname(buf, members, &msize, NULL, 0) < 0) {
407 			cpu_pwrdomain[dom_id] = NULL;
408 			free(dom);
409 			continue;
410 		}
411 
412 		members[msize] = 0;
413 		for (str = strtok(members, " "); str; str = strtok(NULL, " ")) {
414 			n = -1;
415 			sscanf(str, "cpu%d", &n);
416 			if (n >= 0) {
417 				++ncpu;
418 				++dom->dom_ncpus;
419 				CPUMASK_ORBIT(dom->dom_cpumask, n);
420 				cpu2pwrdom[n] = dom->dom_id;
421 			}
422 		}
423 		if (dom->dom_ncpus == 0) {
424 			cpu_pwrdomain[dom_id] = NULL;
425 			free(dom);
426 			continue;
427 		}
428 		if (DebugOpt) {
429 			printf("dom%d cpumask: ", dom->dom_id);
430 			for (i = 0; i < (int)NELEM(dom->dom_cpumask.ary); ++i) {
431 				printf("%jx ",
432 				    (uintmax_t)dom->dom_cpumask.ary[i]);
433 			}
434 			printf("\n");
435 		}
436 	}
437 
438 	if (ncpu != NCpus) {
439 		if (DebugOpt)
440 			printf("Found %d cpus, expecting %d\n", ncpu, NCpus);
441 
442 		pwrdom_mask = cpu_pwrdom_mask;
443 		while (CPUMASK_TESTNZERO(pwrdom_mask)) {
444 			dom_id = BSFCPUMASK(pwrdom_mask);
445 			CPUMASK_NANDBIT(pwrdom_mask, dom_id);
446 			dom = cpu_pwrdomain[dom_id];
447 			if (dom != NULL)
448 				free(dom);
449 		}
450 		return 0;
451 	}
452 	return 1;
453 }
454 
455 /*
456  * Save per-cpu load and sum of per-cpu load.
457  */
458 static void
459 get_cputime(double pollrate)
460 {
461 	static struct kinfo_cputime ocpu_time[MAXCPU];
462 	static struct kinfo_cputime ncpu_time[MAXCPU];
463 	size_t slen;
464 	int ncpu;
465 	int cpu;
466 	uint64_t delta;
467 
468 	bcopy(ncpu_time, ocpu_time, sizeof(struct kinfo_cputime) * NCpus);
469 
470 	slen = sizeof(ncpu_time);
471 	if (sysctlbyname("kern.cputime", &ncpu_time, &slen, NULL, 0) < 0) {
472 		fprintf(stderr, "kern.cputime sysctl not available\n");
473 		exit(1);
474 	}
475 	ncpu = slen / sizeof(ncpu_time[0]);
476 
477 	delta = 0;
478 	for (cpu = 0; cpu < ncpu; ++cpu) {
479 		uint64_t d;
480 
481 		d = (ncpu_time[cpu].cp_user + ncpu_time[cpu].cp_sys +
482 		     ncpu_time[cpu].cp_nice + ncpu_time[cpu].cp_intr) -
483 		    (ocpu_time[cpu].cp_user + ocpu_time[cpu].cp_sys +
484 		     ocpu_time[cpu].cp_nice + ocpu_time[cpu].cp_intr);
485 		pcpu_state[cpu].cpu_qavg = (double)d / (pollrate * 1000000.0);
486 
487 		delta += d;
488 	}
489 	global_cpu_state.cpu_qavg = (double)delta / (pollrate * 1000000.0);
490 }
491 
492 static void
493 acpi_getcpufreq_str(int dom_id, int *highest0, int *lowest0)
494 {
495 	char buf[256], sysid[64];
496 	size_t buflen;
497 	char *ptr;
498 	int v, highest, lowest;
499 
500 	/*
501 	 * Retrieve availability list
502 	 */
503 	snprintf(sysid, sizeof(sysid), "hw.acpi.cpu.px_dom%d.available",
504 	    dom_id);
505 	buflen = sizeof(buf) - 1;
506 	if (sysctlbyname(sysid, buf, &buflen, NULL, 0) < 0)
507 		return;
508 	buf[buflen] = 0;
509 
510 	/*
511 	 * Parse out the highest and lowest cpu frequencies
512 	 */
513 	ptr = buf;
514 	highest = lowest = 0;
515 	while (ptr && (v = strtol(ptr, &ptr, 10)) > 0) {
516 		if ((lowest == 0 || lowest > v) &&
517 		    (LowestCpuFreq <= 0 || v >= LowestCpuFreq))
518 			lowest = v;
519 		if ((highest == 0 || highest < v) &&
520 		    (HighestCpuFreq <= 0 || v <= HighestCpuFreq))
521 			highest = v;
522 		/*
523 		 * Detect turbo mode
524 		 */
525 		if (!TurboOpt && highest - v == 1)
526 			highest = v;
527 	}
528 
529 	*highest0 = highest;
530 	*lowest0 = lowest;
531 }
532 
533 static int
534 acpi_getcpufreq_bin(int dom_id, int *highest0, int *lowest0)
535 {
536 	char sysid[64];
537 	int freq[MAXFREQ];
538 	size_t freqlen;
539 	int freqcnt, i;
540 
541 	/*
542 	 * Retrieve availability list
543 	 */
544 	snprintf(sysid, sizeof(sysid), "hw.acpi.cpu.px_dom%d.avail", dom_id);
545 	freqlen = sizeof(freq);
546 	if (sysctlbyname(sysid, freq, &freqlen, NULL, 0) < 0)
547 		return 0;
548 
549 	freqcnt = freqlen / sizeof(freq[0]);
550 	if (freqcnt == 0)
551 		return 0;
552 
553 	for (i = freqcnt - 1; i >= 0; --i) {
554 		*lowest0 = freq[i];
555 		if (LowestCpuFreq <= 0 || *lowest0 >= LowestCpuFreq)
556 			break;
557 	}
558 
559 	i = 0;
560 	*highest0 = freq[0];
561 	if (!TurboOpt && freqcnt > 1 && freq[0] - freq[1] == 1) {
562 		i = 1;
563 		*highest0 = freq[1];
564 	}
565 	for (; i < freqcnt; ++i) {
566 		if (HighestCpuFreq <= 0 || *highest0 <= HighestCpuFreq)
567 			break;
568 		*highest0 = freq[i];
569 	}
570 	return 1;
571 }
572 
573 static void
574 acpi_get_cpufreq(int dom_id, int *highest, int *lowest)
575 {
576 	*highest = 0;
577 	*lowest = 0;
578 
579 	if (acpi_getcpufreq_bin(dom_id, highest, lowest))
580 		return;
581 	acpi_getcpufreq_str(dom_id, highest, lowest);
582 }
583 
584 static
585 void
586 usage(void)
587 {
588 	fprintf(stderr, "usage: powerd [-cdeftQ] [-p hysteresis] "
589 	    "[-h highest_freq] [-l lowest_freq] "
590 	    "[-r poll_interval] [-u trigger_up] "
591 	    "[-B min_battery_life] [-L low_battery_linger] "
592 	    "[-P battery_poll_interval] [-T sample_interval]\n");
593 	exit(1);
594 }
595 
596 #ifndef timespecsub
597 #define timespecsub(vvp, uvp)						\
598 	do {								\
599 		(vvp)->tv_sec -= (uvp)->tv_sec;				\
600 		(vvp)->tv_nsec -= (uvp)->tv_nsec;			\
601 		if ((vvp)->tv_nsec < 0) {				\
602 			(vvp)->tv_sec--;				\
603 			(vvp)->tv_nsec += 1000000000;			\
604 		}							\
605 	} while (0)
606 #endif
607 
608 #define BAT_SYSCTL_TIME_MAX	50000000 /* unit: nanosecond */
609 
610 static int
611 has_battery(void)
612 {
613 	struct timespec s, e;
614 	size_t len;
615 	int val;
616 
617 	clock_gettime(CLOCK_MONOTONIC_FAST, &s);
618 	BatLifePrevT = s;
619 
620 	len = sizeof(val);
621 	if (sysctlbyname("hw.acpi.acline", &val, &len, NULL, 0) < 0) {
622 		/* No AC line information */
623 		return 0;
624 	}
625 	clock_gettime(CLOCK_MONOTONIC_FAST, &e);
626 
627 	timespecsub(&e, &s);
628 	if (e.tv_sec > 0 || e.tv_nsec > BAT_SYSCTL_TIME_MAX) {
629 		/* hw.acpi.acline takes to long to be useful */
630 		syslog(LOG_NOTICE, "hw.acpi.acline takes too long");
631 		return 0;
632 	}
633 
634 	clock_gettime(CLOCK_MONOTONIC_FAST, &s);
635 	len = sizeof(val);
636 	if (sysctlbyname("hw.acpi.battery.life", &val, &len, NULL, 0) < 0) {
637 		/* No battery life */
638 		return 0;
639 	}
640 	clock_gettime(CLOCK_MONOTONIC_FAST, &e);
641 
642 	timespecsub(&e, &s);
643 	if (e.tv_sec > 0 || e.tv_nsec > BAT_SYSCTL_TIME_MAX) {
644 		/* hw.acpi.battery.life takes to long to be useful */
645 		syslog(LOG_NOTICE, "hw.acpi.battery.life takes too long");
646 		return 0;
647 	}
648 	return 1;
649 }
650 
651 static void
652 low_battery_alert(int life)
653 {
654 	int fmt, stereo, freq;
655 	int fd;
656 
657 	syslog(LOG_ALERT, "low battery life %d%%, please plugin AC line, #%d",
658 	    life, BatShutdownLingerCnt);
659 	++BatShutdownLingerCnt;
660 
661 	if (!BatShutdownAudioAlert)
662 		return;
663 
664 	fd = open("/dev/dsp", O_WRONLY);
665 	if (fd < 0)
666 		return;
667 
668 	fmt = AFMT_S16_LE;
669 	if (ioctl(fd, SNDCTL_DSP_SETFMT, &fmt, sizeof(fmt)) < 0)
670 		goto done;
671 
672 	stereo = 0;
673 	if (ioctl(fd, SNDCTL_DSP_STEREO, &stereo, sizeof(stereo)) < 0)
674 		goto done;
675 
676 	freq = 44100;
677 	if (ioctl(fd, SNDCTL_DSP_SPEED, &freq, sizeof(freq)) < 0)
678 		goto done;
679 
680 	write(fd, alert1, sizeof(alert1));
681 	write(fd, alert1, sizeof(alert1));
682 
683 done:
684 	close(fd);
685 }
686 
687 static int
688 mon_battery(void)
689 {
690 	struct timespec cur, ts;
691 	int acline, life;
692 	size_t len;
693 
694 	clock_gettime(CLOCK_MONOTONIC_FAST, &cur);
695 	ts = cur;
696 	timespecsub(&ts, &BatLifePrevT);
697 	if (ts.tv_sec < BatLifePollIntvl)
698 		return 1;
699 	BatLifePrevT = cur;
700 
701 	len = sizeof(acline);
702 	if (sysctlbyname("hw.acpi.acline", &acline, &len, NULL, 0) < 0)
703 		return 1;
704 	if (acline) {
705 		BatShutdownLinger = -1;
706 		BatShutdownLingerCnt = 0;
707 		return 1;
708 	}
709 
710 	len = sizeof(life);
711 	if (sysctlbyname("hw.acpi.battery.life", &life, &len, NULL, 0) < 0)
712 		return 1;
713 
714 	if (BatShutdownLinger > 0) {
715 		ts = cur;
716 		timespecsub(&ts, &BatShutdownStartT);
717 		if (ts.tv_sec > BatShutdownLinger)
718 			BatShutdownLinger = 0;
719 	}
720 
721 	if (life <= BatLifeMin) {
722 		if (BatShutdownLinger == 0 || BatShutdownLingerSet == 0) {
723 			syslog(LOG_ALERT, "low battery life %d%%, "
724 			    "shutting down", life);
725 			if (vfork() == 0)
726 				execlp("poweroff", "poweroff", NULL);
727 			return 0;
728 		} else if (BatShutdownLinger < 0) {
729 			BatShutdownLinger = BatShutdownLingerSet;
730 			BatShutdownStartT = cur;
731 		}
732 		low_battery_alert(life);
733 	}
734 	return 1;
735 }
736 
737 static void
738 get_ncpus(void)
739 {
740 	size_t slen;
741 
742 	slen = sizeof(NCpus);
743 	if (sysctlbyname("hw.ncpu", &NCpus, &slen, NULL, 0) < 0)
744 		err(1, "sysctlbyname hw.ncpu failed");
745 	if (DebugOpt)
746 		printf("hw.ncpu %d\n", NCpus);
747 }
748 
749 static void
750 get_uschedcpus(void)
751 {
752 	size_t slen;
753 
754 	slen = sizeof(usched_cpu_used);
755 	if (sysctlbyname("kern.usched_global_cpumask", &usched_cpu_used, &slen,
756 	    NULL, 0) < 0)
757 		err(1, "sysctlbyname kern.usched_global_cpumask failed");
758 	if (DebugOpt) {
759 		int i;
760 
761 		printf("usched cpumask was: ");
762 		for (i = 0; i < (int)NELEM(usched_cpu_used.ary); ++i)
763 			printf("%jx ", (uintmax_t)usched_cpu_used.ary[i]);
764 		printf("\n");
765 	}
766 }
767 
768 static void
769 set_uschedcpus(void)
770 {
771 	if (DebugOpt) {
772 		int i;
773 
774 		printf("usched cpumask: ");
775 		for (i = 0; i < (int)NELEM(usched_cpu_used.ary); ++i) {
776 			printf("%jx ",
777 			    (uintmax_t)usched_cpu_used.ary[i]);
778 		}
779 		printf("\n");
780 	}
781 	sysctlbyname("kern.usched_global_cpumask", NULL, 0,
782 	    &usched_cpu_used, sizeof(usched_cpu_used));
783 }
784 
785 static int
786 has_perfbias(void)
787 {
788 	size_t len;
789 	int hint;
790 
791 	len = sizeof(hint);
792 	if (sysctlbyname("machdep.perfbias0.hint", &hint, &len, NULL, 0) < 0)
793 		return 0;
794 	return 1;
795 }
796 
797 static void
798 set_perfbias(int cpu, int inc)
799 {
800 	int hint = inc ? 0 : 15;
801 	char sysid[64];
802 
803 	if (DebugOpt)
804 		printf("cpu%d set perfbias hint %d\n", cpu, hint);
805 	snprintf(sysid, sizeof(sysid), "machdep.perfbias%d.hint", cpu);
806 	sysctlbyname(sysid, NULL, NULL, &hint, sizeof(hint));
807 }
808 
809 static void
810 init_perf(void)
811 {
812 	struct cpu_state *state;
813 	int cpu;
814 
815 	/* Get usched cpumask */
816 	get_uschedcpus();
817 
818 	/*
819 	 * Assume everything are used and are maxed out, before we
820 	 * start.
821 	 */
822 
823 	CPUMASK_ASSBMASK(cpu_used, NCpus);
824 	cpu_pwrdom_used = cpu_pwrdom_mask;
825 	global_pcpu_limit = NCpus;
826 
827 	for (cpu = 0; cpu < NCpus; ++cpu) {
828 		state = &pcpu_state[cpu];
829 
830 		state->cpu_uavg = 0.0;
831 		state->cpu_davg = 0.0;
832 		state->cpu_limit = 1;
833 		state->cpu_count = 1;
834 		snprintf(state->cpu_name, sizeof(state->cpu_name), "cpu%d",
835 		    cpu);
836 	}
837 
838 	state = &global_cpu_state;
839 	state->cpu_uavg = 0.0;
840 	state->cpu_davg = 0.0;
841 	state->cpu_limit = NCpus;
842 	state->cpu_count = NCpus;
843 	strlcpy(state->cpu_name, "global", sizeof(state->cpu_name));
844 }
845 
846 static int
847 get_nstate(struct cpu_state *state, double srt)
848 {
849 	int ustate, dstate, nstate;
850 
851 	/* speeding up */
852 	state->cpu_uavg = (state->cpu_uavg * 2.0 + state->cpu_qavg) / 3.0;
853 	/* slowing down */
854 	state->cpu_davg = (state->cpu_davg * srt + state->cpu_qavg) / (srt + 1);
855 	if (state->cpu_davg < state->cpu_uavg)
856 		state->cpu_davg = state->cpu_uavg;
857 
858 	ustate = state->cpu_uavg / TriggerUp;
859 	if (ustate < state->cpu_limit)
860 		ustate = state->cpu_uavg / TriggerDown;
861 	dstate = state->cpu_davg / TriggerUp;
862 	if (dstate < state->cpu_limit)
863 		dstate = state->cpu_davg / TriggerDown;
864 
865 	nstate = (ustate > dstate) ? ustate : dstate;
866 	if (nstate > state->cpu_count)
867 		nstate = state->cpu_count;
868 
869 	if (DebugOpt) {
870 		printf("%s qavg=%5.2f uavg=%5.2f davg=%5.2f "
871 		    "%2d ncpus=%d\n", state->cpu_name,
872 		    state->cpu_qavg, state->cpu_uavg, state->cpu_davg,
873 		    state->cpu_limit, nstate);
874 	}
875 	return nstate;
876 }
877 
878 static void
879 mon_perf(double srt)
880 {
881 	cpumask_t ocpu_used, ocpu_pwrdom_used;
882 	int pnstate = 0, nstate;
883 	int cpu;
884 
885 	/*
886 	 * Find cpus requiring performance and their cooresponding power
887 	 * domains.  Save the number of cpus requiring performance in
888 	 * pnstate.
889 	 */
890 	ocpu_used = cpu_used;
891 	ocpu_pwrdom_used = cpu_pwrdom_used;
892 
893 	CPUMASK_ASSZERO(cpu_used);
894 	CPUMASK_ASSZERO(cpu_pwrdom_used);
895 
896 	for (cpu = 0; cpu < NCpus; ++cpu) {
897 		struct cpu_state *state = &pcpu_state[cpu];
898 		int s;
899 
900 		s = get_nstate(state, srt);
901 		if (s) {
902 			CPUMASK_ORBIT(cpu_used, cpu);
903 			CPUMASK_ORBIT(cpu_pwrdom_used, cpu2pwrdom[cpu]);
904 		}
905 		pnstate += s;
906 
907 		state->cpu_limit = s;
908 	}
909 
910 	/*
911 	 * Calculate nstate, the number of cpus we wish to run at max
912 	 * performance.
913 	 */
914 	nstate = get_nstate(&global_cpu_state, srt);
915 
916 	if (nstate == global_cpu_state.cpu_limit &&
917 	    (pnstate == global_pcpu_limit || nstate > pnstate)) {
918 		/* Nothing changed; keep the sets */
919 		cpu_used = ocpu_used;
920 		cpu_pwrdom_used = ocpu_pwrdom_used;
921 
922 		global_pcpu_limit = pnstate;
923 		return;
924 	}
925 	global_pcpu_limit = pnstate;
926 
927 	if (nstate > pnstate) {
928 		/*
929 		 * Add spare cpus to meet global performance requirement.
930 		 */
931 		add_spare_cpus(ocpu_used, nstate - pnstate);
932 	}
933 
934 	global_cpu_state.cpu_limit = nstate;
935 
936 	/*
937 	 * Adjust cpu and cpu power domain performance
938 	 */
939 	adj_perf(ocpu_used, ocpu_pwrdom_used);
940 }
941 
942 static void
943 add_spare_cpus(const cpumask_t ocpu_used, int ncpu)
944 {
945 	cpumask_t saved_pwrdom, xcpu_used;
946 	int done = 0, cpu;
947 
948 	/*
949 	 * Find more cpus in the previous cpu set.
950 	 */
951 	xcpu_used = cpu_used;
952 	CPUMASK_XORMASK(xcpu_used, ocpu_used);
953 	while (CPUMASK_TESTNZERO(xcpu_used)) {
954 		cpu = BSFCPUMASK(xcpu_used);
955 		CPUMASK_NANDBIT(xcpu_used, cpu);
956 
957 		if (CPUMASK_TESTBIT(ocpu_used, cpu)) {
958 			CPUMASK_ORBIT(cpu_pwrdom_used, cpu2pwrdom[cpu]);
959 			CPUMASK_ORBIT(cpu_used, cpu);
960 			--ncpu;
961 			if (ncpu == 0)
962 				return;
963 		}
964 	}
965 
966 	/*
967 	 * Find more cpus in the used cpu power domains.
968 	 */
969 	saved_pwrdom = cpu_pwrdom_used;
970 again:
971 	while (CPUMASK_TESTNZERO(saved_pwrdom)) {
972 		cpumask_t unused_cpumask;
973 		int dom;
974 
975 		dom = BSFCPUMASK(saved_pwrdom);
976 		CPUMASK_NANDBIT(saved_pwrdom, dom);
977 
978 		unused_cpumask = cpu_pwrdomain[dom]->dom_cpumask;
979 		CPUMASK_NANDMASK(unused_cpumask, cpu_used);
980 
981 		while (CPUMASK_TESTNZERO(unused_cpumask)) {
982 			cpu = BSFCPUMASK(unused_cpumask);
983 			CPUMASK_NANDBIT(unused_cpumask, cpu);
984 
985 			CPUMASK_ORBIT(cpu_pwrdom_used, dom);
986 			CPUMASK_ORBIT(cpu_used, cpu);
987 			--ncpu;
988 			if (ncpu == 0)
989 				return;
990 		}
991 	}
992 	if (!done) {
993 		done = 1;
994 		/*
995 		 * Find more cpus in unused cpu power domains
996 		 */
997 		saved_pwrdom = cpu_pwrdom_mask;
998 		CPUMASK_NANDMASK(saved_pwrdom, cpu_pwrdom_used);
999 		goto again;
1000 	}
1001 	if (DebugOpt)
1002 		printf("%d cpus not found\n", ncpu);
1003 }
1004 
1005 static void
1006 acpi_set_cpufreq(int dom, int inc)
1007 {
1008 	int lowest, highest, desired;
1009 	char sysid[64];
1010 
1011 	acpi_get_cpufreq(dom, &highest, &lowest);
1012 	if (highest == 0 || lowest == 0)
1013 		return;
1014 	desired = inc ? highest : lowest;
1015 
1016 	if (DebugOpt)
1017 		printf("dom%d set frequency %d\n", dom, desired);
1018 	snprintf(sysid, sizeof(sysid), "hw.acpi.cpu.px_dom%d.select", dom);
1019 	sysctlbyname(sysid, NULL, NULL, &desired, sizeof(desired));
1020 }
1021 
1022 static void
1023 adj_cpu_pwrdom(int dom, int inc)
1024 {
1025 	if (AdjustCpuFreq)
1026 		acpi_set_cpufreq(dom, inc);
1027 }
1028 
1029 static void
1030 adj_cpu_perf(int cpu, int inc)
1031 {
1032 	if (DebugOpt) {
1033 		if (inc)
1034 			printf("cpu%d increase perf\n", cpu);
1035 		else
1036 			printf("cpu%d decrease perf\n", cpu);
1037 	}
1038 
1039 	if (HasPerfbias)
1040 		set_perfbias(cpu, inc);
1041 	if (AdjustCstate)
1042 		set_cstate(cpu, inc);
1043 }
1044 
1045 static void
1046 adj_perf(cpumask_t xcpu_used, cpumask_t xcpu_pwrdom_used)
1047 {
1048 	cpumask_t old_usched_used;
1049 	int cpu, inc;
1050 
1051 	/*
1052 	 * Set cpus requiring performance to the userland process
1053 	 * scheduler.  Leave the rest of cpus unmapped.
1054 	 */
1055 	old_usched_used = usched_cpu_used;
1056 	usched_cpu_used = cpu_used;
1057 	if (CPUMASK_TESTZERO(usched_cpu_used))
1058 		CPUMASK_ORBIT(usched_cpu_used, 0);
1059 	if (CPUMASK_CMPMASKNEQ(usched_cpu_used, old_usched_used))
1060 		set_uschedcpus();
1061 
1062 	/*
1063 	 * Adjust per-cpu performance.
1064 	 */
1065 	CPUMASK_XORMASK(xcpu_used, cpu_used);
1066 	while (CPUMASK_TESTNZERO(xcpu_used)) {
1067 		cpu = BSFCPUMASK(xcpu_used);
1068 		CPUMASK_NANDBIT(xcpu_used, cpu);
1069 
1070 		if (CPUMASK_TESTBIT(cpu_used, cpu)) {
1071 			/* Increase cpu performance */
1072 			inc = 1;
1073 		} else {
1074 			/* Decrease cpu performance */
1075 			inc = 0;
1076 		}
1077 		adj_cpu_perf(cpu, inc);
1078 	}
1079 
1080 	/*
1081 	 * Adjust cpu power domain performance.  This could affect
1082 	 * a set of cpus.
1083 	 */
1084 	CPUMASK_XORMASK(xcpu_pwrdom_used, cpu_pwrdom_used);
1085 	while (CPUMASK_TESTNZERO(xcpu_pwrdom_used)) {
1086 		int dom;
1087 
1088 		dom = BSFCPUMASK(xcpu_pwrdom_used);
1089 		CPUMASK_NANDBIT(xcpu_pwrdom_used, dom);
1090 
1091 		if (CPUMASK_TESTBIT(cpu_pwrdom_used, dom)) {
1092 			/* Increase cpu power domain performance */
1093 			inc = 1;
1094 		} else {
1095 			/* Decrease cpu power domain performance */
1096 			inc = 0;
1097 		}
1098 		adj_cpu_pwrdom(dom, inc);
1099 	}
1100 }
1101 
1102 static void
1103 restore_perf(void)
1104 {
1105 	cpumask_t ocpu_used, ocpu_pwrdom_used;
1106 
1107 	/* Remove highest cpu frequency limitation */
1108 	HighestCpuFreq = 0;
1109 
1110 	ocpu_used = cpu_used;
1111 	ocpu_pwrdom_used = cpu_pwrdom_used;
1112 
1113 	/* Max out all cpus and cpu power domains performance */
1114 	CPUMASK_ASSBMASK(cpu_used, NCpus);
1115 	cpu_pwrdom_used = cpu_pwrdom_mask;
1116 
1117 	adj_perf(ocpu_used, ocpu_pwrdom_used);
1118 
1119 	if (AdjustCstate) {
1120 		/*
1121 		 * Restore the original mwait C-state
1122 		 */
1123 		if (DebugOpt)
1124 			printf("global set cstate %s\n", orig_global_cx);
1125 		sysctlbyname("machdep.mwait.CX.idle", NULL, NULL,
1126 		    orig_global_cx, strlen(orig_global_cx) + 1);
1127 	}
1128 }
1129 
1130 static int
1131 probe_cstate(void)
1132 {
1133 	char cx_supported[1024];
1134 	const char *target;
1135 	char *ptr;
1136 	int idle_hlt, deep = 1;
1137 	size_t len;
1138 
1139 	len = sizeof(idle_hlt);
1140 	if (sysctlbyname("machdep.cpu_idle_hlt", &idle_hlt, &len, NULL, 0) < 0)
1141 		return 0;
1142 	if (idle_hlt != 1)
1143 		return 0;
1144 
1145 	len = sizeof(cx_supported);
1146 	if (sysctlbyname("machdep.mwait.CX.supported", cx_supported, &len,
1147 	    NULL, 0) < 0)
1148 		return 0;
1149 
1150 	len = sizeof(orig_global_cx);
1151 	if (sysctlbyname("machdep.mwait.CX.idle", orig_global_cx, &len,
1152 	    NULL, 0) < 0)
1153 		return 0;
1154 
1155 	strlcpy(cpu_perf_cx, "AUTODEEP", sizeof(cpu_perf_cx));
1156 	cpu_perf_cxlen = strlen(cpu_perf_cx) + 1;
1157 	if (sysctlbyname("machdep.mwait.CX.idle", NULL, NULL,
1158 	    cpu_perf_cx, cpu_perf_cxlen) < 0) {
1159 		/* AUTODEEP is not supported; try AUTO */
1160 		deep = 0;
1161 		strlcpy(cpu_perf_cx, "AUTO", sizeof(cpu_perf_cx));
1162 		cpu_perf_cxlen = strlen(cpu_perf_cx) + 1;
1163 		if (sysctlbyname("machdep.mwait.CX.idle", NULL, NULL,
1164 		    cpu_perf_cx, cpu_perf_cxlen) < 0)
1165 			return 0;
1166 	}
1167 
1168 	if (!deep)
1169 		target = "C2/0";
1170 	else
1171 		target = NULL;
1172 	for (ptr = strtok(cx_supported, " "); ptr != NULL;
1173 	     ptr = strtok(NULL, " ")) {
1174 		if (target == NULL ||
1175 		    (target != NULL && strcmp(ptr, target) == 0)) {
1176 			strlcpy(cpu_idle_cx, ptr, sizeof(cpu_idle_cx));
1177 			cpu_idle_cxlen = strlen(cpu_idle_cx) + 1;
1178 			if (target != NULL)
1179 				break;
1180 		}
1181 	}
1182 	if (cpu_idle_cxlen == 0)
1183 		return 0;
1184 
1185 	if (DebugOpt) {
1186 		printf("cstate orig %s, perf %s, idle %s\n",
1187 		    orig_global_cx, cpu_perf_cx, cpu_idle_cx);
1188 	}
1189 	return 1;
1190 }
1191 
1192 static void
1193 set_cstate(int cpu, int inc)
1194 {
1195 	const char *cst;
1196 	char sysid[64];
1197 	size_t len;
1198 
1199 	if (inc) {
1200 		cst = cpu_perf_cx;
1201 		len = cpu_perf_cxlen;
1202 	} else {
1203 		cst = cpu_idle_cx;
1204 		len = cpu_idle_cxlen;
1205 	}
1206 
1207 	if (DebugOpt)
1208 		printf("cpu%d set cstate %s\n", cpu, cst);
1209 	snprintf(sysid, sizeof(sysid), "machdep.mwait.CX.idle%d", cpu);
1210 	sysctlbyname(sysid, NULL, NULL, cst, len);
1211 }
1212