1 /* 2 * Copyright (c) 2010 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 /* 36 * The powerd daemon : 37 * - Monitor the cpu load and adjusts cpu and cpu power domain 38 * performance accordingly. 39 * - Monitor battery life. Alarm alerts and shutdown the machine 40 * if battery life goes low. 41 */ 42 43 #define _KERNEL_STRUCTURES 44 #include <sys/types.h> 45 #include <sys/sysctl.h> 46 #include <sys/kinfo.h> 47 #include <sys/file.h> 48 #include <sys/queue.h> 49 #include <sys/soundcard.h> 50 #include <sys/time.h> 51 #include <machine/cpufunc.h> 52 #include <err.h> 53 #include <stdio.h> 54 #include <stdlib.h> 55 #include <unistd.h> 56 #include <string.h> 57 #include <syslog.h> 58 59 #include "alert1.h" 60 61 #define MAXDOM MAXCPU /* worst case, 1 cpu per domain */ 62 63 #define MAXFREQ 64 64 #define CST_STRLEN 16 65 66 struct cpu_pwrdom { 67 TAILQ_ENTRY(cpu_pwrdom) dom_link; 68 int dom_id; 69 int dom_ncpus; 70 cpumask_t dom_cpumask; 71 }; 72 73 struct cpu_state { 74 double cpu_qavg; 75 double cpu_uavg; /* used for speeding up */ 76 double cpu_davg; /* used for slowing down */ 77 int cpu_limit; 78 int cpu_count; 79 char cpu_name[8]; 80 }; 81 82 static void usage(void); 83 static void get_ncpus(void); 84 85 /* usched cpumask */ 86 static void get_uschedcpus(void); 87 static void set_uschedcpus(void); 88 89 /* perfbias(4) */ 90 static int has_perfbias(void); 91 static void set_perfbias(int, int); 92 93 /* acpi(4) P-state */ 94 static void acpi_getcpufreq_str(int, int *, int *); 95 static int acpi_getcpufreq_bin(int, int *, int *); 96 static void acpi_get_cpufreq(int, int *, int *); 97 static void acpi_set_cpufreq(int, int); 98 static int acpi_get_cpupwrdom(void); 99 100 /* mwait C-state hint */ 101 static int probe_cstate(void); 102 static void set_cstate(int, int); 103 104 /* Performance monitoring */ 105 static void init_perf(void); 106 static void mon_perf(double); 107 static void adj_perf(cpumask_t, cpumask_t); 108 static void adj_cpu_pwrdom(int, int); 109 static void adj_cpu_perf(int, int); 110 static void get_cputime(double); 111 static int get_nstate(struct cpu_state *, double); 112 static void add_spare_cpus(const cpumask_t, int); 113 static void restore_perf(void); 114 115 /* Battery monitoring */ 116 static int has_battery(void); 117 static int mon_battery(void); 118 static void low_battery_alert(int); 119 120 /* Runtime states for performance monitoring */ 121 static int global_pcpu_limit; 122 static struct cpu_state pcpu_state[MAXCPU]; 123 static struct cpu_state global_cpu_state; 124 static cpumask_t cpu_used; /* cpus w/ high perf */ 125 static cpumask_t cpu_pwrdom_used; /* cpu power domains w/ high perf */ 126 static cpumask_t usched_cpu_used; /* cpus for usched */ 127 128 /* Constants */ 129 static cpumask_t cpu_pwrdom_mask; /* usable cpu power domains */ 130 static int cpu2pwrdom[MAXCPU]; /* cpu to cpu power domain map */ 131 static struct cpu_pwrdom *cpu_pwrdomain[MAXDOM]; 132 static int NCpus; /* # of cpus */ 133 static char orig_global_cx[CST_STRLEN]; 134 static char cpu_perf_cx[CST_STRLEN]; 135 static int cpu_perf_cxlen; 136 static char cpu_idle_cx[CST_STRLEN]; 137 static int cpu_idle_cxlen; 138 139 static int DebugOpt; 140 static int TurboOpt = 1; 141 static int PowerFd; 142 static int Hysteresis = 10; /* percentage */ 143 static double TriggerUp = 0.25; /* single-cpu load to force max freq */ 144 static double TriggerDown; /* load per cpu to force the min freq */ 145 static int HasPerfbias = 0; 146 static int AdjustCpuFreq = 1; 147 static int AdjustCstate = 0; 148 149 static volatile int stopped; 150 151 /* Battery life monitoring */ 152 static int BatLifeMin = 2; /* shutdown the box, if low on battery life */ 153 static struct timespec BatLifePrevT; 154 static int BatLifePollIntvl = 5; /* unit: sec */ 155 static struct timespec BatShutdownStartT; 156 static int BatShutdownLinger = -1; 157 static int BatShutdownLingerSet = 60; /* unit: sec */ 158 static int BatShutdownLingerCnt; 159 static int BatShutdownAudioAlert = 1; 160 161 static void sigintr(int signo); 162 163 int 164 main(int ac, char **av) 165 { 166 double srt; 167 double pollrate; 168 int ch; 169 char buf[64]; 170 int monbat; 171 172 srt = 8.0; /* time for samples - 8 seconds */ 173 pollrate = 1.0; /* polling rate in seconds */ 174 175 while ((ch = getopt(ac, av, "cdefp:r:tu:B:L:P:QT:")) != -1) { 176 switch(ch) { 177 case 'c': 178 AdjustCstate = 1; 179 break; 180 case 'd': 181 DebugOpt = 1; 182 break; 183 case 'e': 184 HasPerfbias = 1; 185 break; 186 case 'f': 187 AdjustCpuFreq = 0; 188 break; 189 case 'p': 190 Hysteresis = (int)strtol(optarg, NULL, 10); 191 break; 192 case 'r': 193 pollrate = strtod(optarg, NULL); 194 break; 195 case 't': 196 TurboOpt = 0; 197 break; 198 case 'u': 199 TriggerUp = (double)strtol(optarg, NULL, 10) / 100; 200 break; 201 case 'B': 202 BatLifeMin = strtol(optarg, NULL, 10); 203 break; 204 case 'L': 205 BatShutdownLingerSet = strtol(optarg, NULL, 10); 206 if (BatShutdownLingerSet < 0) 207 BatShutdownLingerSet = 0; 208 break; 209 case 'P': 210 BatLifePollIntvl = strtol(optarg, NULL, 10); 211 break; 212 case 'Q': 213 BatShutdownAudioAlert = 0; 214 break; 215 case 'T': 216 srt = strtod(optarg, NULL); 217 break; 218 default: 219 usage(); 220 /* NOT REACHED */ 221 } 222 } 223 ac -= optind; 224 av += optind; 225 226 setlinebuf(stdout); 227 228 /* Get number of cpus */ 229 get_ncpus(); 230 231 if (0 > Hysteresis || Hysteresis > 99) { 232 fprintf(stderr, "Invalid hysteresis value\n"); 233 exit(1); 234 } 235 236 if (0 > TriggerUp || TriggerUp > 1) { 237 fprintf(stderr, "Invalid load limit value\n"); 238 exit(1); 239 } 240 241 TriggerDown = TriggerUp - (TriggerUp * (double) Hysteresis / 100); 242 243 /* 244 * Make sure powerd is not already running. 245 */ 246 PowerFd = open("/var/run/powerd.pid", O_CREAT|O_RDWR, 0644); 247 if (PowerFd < 0) { 248 fprintf(stderr, 249 "Cannot create /var/run/powerd.pid, " 250 "continuing anyway\n"); 251 } else { 252 if (flock(PowerFd, LOCK_EX|LOCK_NB) < 0) { 253 fprintf(stderr, "powerd is already running\n"); 254 exit(1); 255 } 256 } 257 258 /* 259 * Demonize and set pid 260 */ 261 if (DebugOpt == 0) { 262 daemon(0, 0); 263 openlog("powerd", LOG_CONS | LOG_PID, LOG_DAEMON); 264 } 265 266 if (PowerFd >= 0) { 267 ftruncate(PowerFd, 0); 268 snprintf(buf, sizeof(buf), "%d\n", (int)getpid()); 269 write(PowerFd, buf, strlen(buf)); 270 } 271 272 /* Do we need to monitor battery life? */ 273 if (BatLifePollIntvl <= 0) 274 monbat = 0; 275 else 276 monbat = has_battery(); 277 278 /* Do we have perfbias(4)? */ 279 if (HasPerfbias) 280 HasPerfbias = has_perfbias(); 281 282 /* Could we adjust C-state? */ 283 if (AdjustCstate) 284 AdjustCstate = probe_cstate(); 285 286 /* 287 * Wait hw.acpi.cpu.px_dom* sysctl to be created by kernel. 288 * 289 * Since hw.acpi.cpu.px_dom* creation is queued into ACPI 290 * taskqueue and ACPI taskqueue is shared across various 291 * ACPI modules, any delay in other modules may cause 292 * hw.acpi.cpu.px_dom* to be created at quite a later time 293 * (e.g. cmbat module's task could take quite a lot of time). 294 */ 295 for (;;) { 296 /* Prime delta cputime calculation. */ 297 get_cputime(pollrate); 298 299 /* Wait for all cpus to appear */ 300 if (acpi_get_cpupwrdom()) 301 break; 302 usleep((int)(pollrate * 1000000.0)); 303 } 304 305 /* 306 * Catch some signals so that max performance could be restored. 307 */ 308 signal(SIGINT, sigintr); 309 signal(SIGTERM, sigintr); 310 311 /* Initialize performance states */ 312 init_perf(); 313 314 srt = srt / pollrate; /* convert to sample count */ 315 if (DebugOpt) 316 printf("samples for downgrading: %5.2f\n", srt); 317 318 /* 319 * Monitoring loop 320 */ 321 while (!stopped) { 322 /* 323 * Monitor performance 324 */ 325 get_cputime(pollrate); 326 mon_perf(srt); 327 328 /* 329 * Monitor battery 330 */ 331 if (monbat) 332 monbat = mon_battery(); 333 334 usleep((int)(pollrate * 1000000.0)); 335 } 336 337 /* 338 * Set to maximum performance if killed. 339 */ 340 syslog(LOG_INFO, "killed, setting max and exiting"); 341 restore_perf(); 342 343 exit(0); 344 } 345 346 static void 347 sigintr(int signo __unused) 348 { 349 stopped = 1; 350 } 351 352 /* 353 * Figure out the cpu power domains. 354 */ 355 static int 356 acpi_get_cpupwrdom(void) 357 { 358 struct cpu_pwrdom *dom; 359 cpumask_t pwrdom_mask; 360 char buf[64]; 361 char members[1024]; 362 char *str; 363 size_t msize; 364 int n, i, ncpu = 0, dom_id; 365 366 memset(cpu2pwrdom, 0, sizeof(cpu2pwrdom)); 367 memset(cpu_pwrdomain, 0, sizeof(cpu_pwrdomain)); 368 CPUMASK_ASSZERO(cpu_pwrdom_mask); 369 370 for (i = 0; i < MAXDOM; ++i) { 371 snprintf(buf, sizeof(buf), 372 "hw.acpi.cpu.px_dom%d.available", i); 373 if (sysctlbyname(buf, NULL, NULL, NULL, 0) < 0) 374 continue; 375 376 dom = calloc(1, sizeof(*dom)); 377 dom->dom_id = i; 378 379 if (cpu_pwrdomain[i] != NULL) { 380 fprintf(stderr, "cpu power domain %d exists\n", i); 381 exit(1); 382 } 383 cpu_pwrdomain[i] = dom; 384 CPUMASK_ORBIT(cpu_pwrdom_mask, i); 385 } 386 pwrdom_mask = cpu_pwrdom_mask; 387 388 while (CPUMASK_TESTNZERO(pwrdom_mask)) { 389 dom_id = BSFCPUMASK(pwrdom_mask); 390 CPUMASK_NANDBIT(pwrdom_mask, dom_id); 391 dom = cpu_pwrdomain[dom_id]; 392 393 CPUMASK_ASSZERO(dom->dom_cpumask); 394 395 snprintf(buf, sizeof(buf), 396 "hw.acpi.cpu.px_dom%d.members", dom->dom_id); 397 msize = sizeof(members); 398 if (sysctlbyname(buf, members, &msize, NULL, 0) < 0) { 399 cpu_pwrdomain[dom_id] = NULL; 400 free(dom); 401 continue; 402 } 403 404 members[msize] = 0; 405 for (str = strtok(members, " "); str; str = strtok(NULL, " ")) { 406 n = -1; 407 sscanf(str, "cpu%d", &n); 408 if (n >= 0) { 409 ++ncpu; 410 ++dom->dom_ncpus; 411 CPUMASK_ORBIT(dom->dom_cpumask, n); 412 cpu2pwrdom[n] = dom->dom_id; 413 } 414 } 415 if (dom->dom_ncpus == 0) { 416 cpu_pwrdomain[dom_id] = NULL; 417 free(dom); 418 continue; 419 } 420 if (DebugOpt) { 421 printf("dom%d cpumask: ", dom->dom_id); 422 for (i = 0; i < (int)NELEM(dom->dom_cpumask.ary); ++i) { 423 printf("%jx ", 424 (uintmax_t)dom->dom_cpumask.ary[i]); 425 } 426 printf("\n"); 427 } 428 } 429 430 if (ncpu != NCpus) { 431 if (DebugOpt) 432 printf("Found %d cpus, expecting %d\n", ncpu, NCpus); 433 434 pwrdom_mask = cpu_pwrdom_mask; 435 while (CPUMASK_TESTNZERO(pwrdom_mask)) { 436 dom_id = BSFCPUMASK(pwrdom_mask); 437 CPUMASK_NANDBIT(pwrdom_mask, dom_id); 438 dom = cpu_pwrdomain[dom_id]; 439 if (dom != NULL) 440 free(dom); 441 } 442 return 0; 443 } 444 return 1; 445 } 446 447 /* 448 * Save per-cpu load and sum of per-cpu load. 449 */ 450 static void 451 get_cputime(double pollrate) 452 { 453 static struct kinfo_cputime ocpu_time[MAXCPU]; 454 static struct kinfo_cputime ncpu_time[MAXCPU]; 455 size_t slen; 456 int ncpu; 457 int cpu; 458 uint64_t delta; 459 460 bcopy(ncpu_time, ocpu_time, sizeof(struct kinfo_cputime) * NCpus); 461 462 slen = sizeof(ncpu_time); 463 if (sysctlbyname("kern.cputime", &ncpu_time, &slen, NULL, 0) < 0) { 464 fprintf(stderr, "kern.cputime sysctl not available\n"); 465 exit(1); 466 } 467 ncpu = slen / sizeof(ncpu_time[0]); 468 469 delta = 0; 470 for (cpu = 0; cpu < ncpu; ++cpu) { 471 uint64_t d; 472 473 d = (ncpu_time[cpu].cp_user + ncpu_time[cpu].cp_sys + 474 ncpu_time[cpu].cp_nice + ncpu_time[cpu].cp_intr) - 475 (ocpu_time[cpu].cp_user + ocpu_time[cpu].cp_sys + 476 ocpu_time[cpu].cp_nice + ocpu_time[cpu].cp_intr); 477 pcpu_state[cpu].cpu_qavg = (double)d / (pollrate * 1000000.0); 478 479 delta += d; 480 } 481 global_cpu_state.cpu_qavg = (double)delta / (pollrate * 1000000.0); 482 } 483 484 static void 485 acpi_getcpufreq_str(int dom_id, int *highest0, int *lowest0) 486 { 487 char buf[256], sysid[64]; 488 size_t buflen; 489 char *ptr; 490 int v, highest, lowest; 491 492 /* 493 * Retrieve availability list 494 */ 495 snprintf(sysid, sizeof(sysid), "hw.acpi.cpu.px_dom%d.available", 496 dom_id); 497 buflen = sizeof(buf) - 1; 498 if (sysctlbyname(sysid, buf, &buflen, NULL, 0) < 0) 499 return; 500 buf[buflen] = 0; 501 502 /* 503 * Parse out the highest and lowest cpu frequencies 504 */ 505 ptr = buf; 506 highest = lowest = 0; 507 while (ptr && (v = strtol(ptr, &ptr, 10)) > 0) { 508 if (lowest == 0 || lowest > v) 509 lowest = v; 510 if (highest == 0 || highest < v) 511 highest = v; 512 /* 513 * Detect turbo mode 514 */ 515 if (!TurboOpt && highest - v == 1) 516 highest = v; 517 } 518 519 *highest0 = highest; 520 *lowest0 = lowest; 521 } 522 523 static int 524 acpi_getcpufreq_bin(int dom_id, int *highest0, int *lowest0) 525 { 526 char sysid[64]; 527 int freq[MAXFREQ]; 528 size_t freqlen; 529 int freqcnt; 530 531 /* 532 * Retrieve availability list 533 */ 534 snprintf(sysid, sizeof(sysid), "hw.acpi.cpu.px_dom%d.avail", dom_id); 535 freqlen = sizeof(freq); 536 if (sysctlbyname(sysid, freq, &freqlen, NULL, 0) < 0) 537 return 0; 538 539 freqcnt = freqlen / sizeof(freq[0]); 540 if (freqcnt == 0) 541 return 0; 542 543 *lowest0 = freq[freqcnt - 1]; 544 545 *highest0 = freq[0]; 546 if (!TurboOpt && freqcnt > 1 && freq[0] - freq[1] == 1) 547 *highest0 = freq[1]; 548 return 1; 549 } 550 551 static void 552 acpi_get_cpufreq(int dom_id, int *highest, int *lowest) 553 { 554 *highest = 0; 555 *lowest = 0; 556 557 if (acpi_getcpufreq_bin(dom_id, highest, lowest)) 558 return; 559 acpi_getcpufreq_str(dom_id, highest, lowest); 560 } 561 562 static 563 void 564 usage(void) 565 { 566 fprintf(stderr, "usage: powerd [-cdeftQ] [-p hysteresis] " 567 "[-r poll_interval] [-u trigger_up] " 568 "[-B min_battery_life] [-L low_battery_linger] " 569 "[-P battery_poll_interval] [-T sample_interval]\n"); 570 exit(1); 571 } 572 573 #ifndef timespecsub 574 #define timespecsub(vvp, uvp) \ 575 do { \ 576 (vvp)->tv_sec -= (uvp)->tv_sec; \ 577 (vvp)->tv_nsec -= (uvp)->tv_nsec; \ 578 if ((vvp)->tv_nsec < 0) { \ 579 (vvp)->tv_sec--; \ 580 (vvp)->tv_nsec += 1000000000; \ 581 } \ 582 } while (0) 583 #endif 584 585 #define BAT_SYSCTL_TIME_MAX 50000000 /* unit: nanosecond */ 586 587 static int 588 has_battery(void) 589 { 590 struct timespec s, e; 591 size_t len; 592 int val; 593 594 clock_gettime(CLOCK_MONOTONIC_FAST, &s); 595 BatLifePrevT = s; 596 597 len = sizeof(val); 598 if (sysctlbyname("hw.acpi.acline", &val, &len, NULL, 0) < 0) { 599 /* No AC line information */ 600 return 0; 601 } 602 clock_gettime(CLOCK_MONOTONIC_FAST, &e); 603 604 timespecsub(&e, &s); 605 if (e.tv_sec > 0 || e.tv_nsec > BAT_SYSCTL_TIME_MAX) { 606 /* hw.acpi.acline takes to long to be useful */ 607 syslog(LOG_NOTICE, "hw.acpi.acline takes too long"); 608 return 0; 609 } 610 611 clock_gettime(CLOCK_MONOTONIC_FAST, &s); 612 len = sizeof(val); 613 if (sysctlbyname("hw.acpi.battery.life", &val, &len, NULL, 0) < 0) { 614 /* No battery life */ 615 return 0; 616 } 617 clock_gettime(CLOCK_MONOTONIC_FAST, &e); 618 619 timespecsub(&e, &s); 620 if (e.tv_sec > 0 || e.tv_nsec > BAT_SYSCTL_TIME_MAX) { 621 /* hw.acpi.battery.life takes to long to be useful */ 622 syslog(LOG_NOTICE, "hw.acpi.battery.life takes too long"); 623 return 0; 624 } 625 return 1; 626 } 627 628 static void 629 low_battery_alert(int life) 630 { 631 int fmt, stereo, freq; 632 int fd; 633 634 syslog(LOG_ALERT, "low battery life %d%%, please plugin AC line, #%d", 635 life, BatShutdownLingerCnt); 636 ++BatShutdownLingerCnt; 637 638 if (!BatShutdownAudioAlert) 639 return; 640 641 fd = open("/dev/dsp", O_WRONLY); 642 if (fd < 0) 643 return; 644 645 fmt = AFMT_S16_LE; 646 if (ioctl(fd, SNDCTL_DSP_SETFMT, &fmt, sizeof(fmt)) < 0) 647 goto done; 648 649 stereo = 0; 650 if (ioctl(fd, SNDCTL_DSP_STEREO, &stereo, sizeof(stereo)) < 0) 651 goto done; 652 653 freq = 44100; 654 if (ioctl(fd, SNDCTL_DSP_SPEED, &freq, sizeof(freq)) < 0) 655 goto done; 656 657 write(fd, alert1, sizeof(alert1)); 658 write(fd, alert1, sizeof(alert1)); 659 660 done: 661 close(fd); 662 } 663 664 static int 665 mon_battery(void) 666 { 667 struct timespec cur, ts; 668 int acline, life; 669 size_t len; 670 671 clock_gettime(CLOCK_MONOTONIC_FAST, &cur); 672 ts = cur; 673 timespecsub(&ts, &BatLifePrevT); 674 if (ts.tv_sec < BatLifePollIntvl) 675 return 1; 676 BatLifePrevT = cur; 677 678 len = sizeof(acline); 679 if (sysctlbyname("hw.acpi.acline", &acline, &len, NULL, 0) < 0) 680 return 1; 681 if (acline) { 682 BatShutdownLinger = -1; 683 BatShutdownLingerCnt = 0; 684 return 1; 685 } 686 687 len = sizeof(life); 688 if (sysctlbyname("hw.acpi.battery.life", &life, &len, NULL, 0) < 0) 689 return 1; 690 691 if (BatShutdownLinger > 0) { 692 ts = cur; 693 timespecsub(&ts, &BatShutdownStartT); 694 if (ts.tv_sec > BatShutdownLinger) 695 BatShutdownLinger = 0; 696 } 697 698 if (life <= BatLifeMin) { 699 if (BatShutdownLinger == 0 || BatShutdownLingerSet == 0) { 700 syslog(LOG_ALERT, "low battery life %d%%, " 701 "shutting down", life); 702 if (vfork() == 0) 703 execlp("poweroff", "poweroff", NULL); 704 return 0; 705 } else if (BatShutdownLinger < 0) { 706 BatShutdownLinger = BatShutdownLingerSet; 707 BatShutdownStartT = cur; 708 } 709 low_battery_alert(life); 710 } 711 return 1; 712 } 713 714 static void 715 get_ncpus(void) 716 { 717 size_t slen; 718 719 slen = sizeof(NCpus); 720 if (sysctlbyname("hw.ncpu", &NCpus, &slen, NULL, 0) < 0) 721 err(1, "sysctlbyname hw.ncpu failed"); 722 if (DebugOpt) 723 printf("hw.ncpu %d\n", NCpus); 724 } 725 726 static void 727 get_uschedcpus(void) 728 { 729 size_t slen; 730 731 slen = sizeof(usched_cpu_used); 732 if (sysctlbyname("kern.usched_global_cpumask", &usched_cpu_used, &slen, 733 NULL, 0) < 0) 734 err(1, "sysctlbyname kern.usched_global_cpumask failed"); 735 if (DebugOpt) { 736 int i; 737 738 printf("usched cpumask was: "); 739 for (i = 0; i < (int)NELEM(usched_cpu_used.ary); ++i) 740 printf("%jx ", (uintmax_t)usched_cpu_used.ary[i]); 741 printf("\n"); 742 } 743 } 744 745 static void 746 set_uschedcpus(void) 747 { 748 if (DebugOpt) { 749 int i; 750 751 printf("usched cpumask: "); 752 for (i = 0; i < (int)NELEM(usched_cpu_used.ary); ++i) { 753 printf("%jx ", 754 (uintmax_t)usched_cpu_used.ary[i]); 755 } 756 printf("\n"); 757 } 758 sysctlbyname("kern.usched_global_cpumask", NULL, 0, 759 &usched_cpu_used, sizeof(usched_cpu_used)); 760 } 761 762 static int 763 has_perfbias(void) 764 { 765 size_t len; 766 int hint; 767 768 len = sizeof(hint); 769 if (sysctlbyname("machdep.perfbias0.hint", &hint, &len, NULL, 0) < 0) 770 return 0; 771 return 1; 772 } 773 774 static void 775 set_perfbias(int cpu, int inc) 776 { 777 int hint = inc ? 0 : 15; 778 char sysid[64]; 779 780 if (DebugOpt) 781 printf("cpu%d set perfbias hint %d\n", cpu, hint); 782 snprintf(sysid, sizeof(sysid), "machdep.perfbias%d.hint", cpu); 783 sysctlbyname(sysid, NULL, NULL, &hint, sizeof(hint)); 784 } 785 786 static void 787 init_perf(void) 788 { 789 struct cpu_state *state; 790 int cpu; 791 792 /* Get usched cpumask */ 793 get_uschedcpus(); 794 795 /* 796 * Assume everything are used and are maxed out, before we 797 * start. 798 */ 799 800 CPUMASK_ASSBMASK(cpu_used, NCpus); 801 cpu_pwrdom_used = cpu_pwrdom_mask; 802 global_pcpu_limit = NCpus; 803 804 for (cpu = 0; cpu < NCpus; ++cpu) { 805 state = &pcpu_state[cpu]; 806 807 state->cpu_uavg = 0.0; 808 state->cpu_davg = 0.0; 809 state->cpu_limit = 1; 810 state->cpu_count = 1; 811 snprintf(state->cpu_name, sizeof(state->cpu_name), "cpu%d", 812 cpu); 813 } 814 815 state = &global_cpu_state; 816 state->cpu_uavg = 0.0; 817 state->cpu_davg = 0.0; 818 state->cpu_limit = NCpus; 819 state->cpu_count = NCpus; 820 strlcpy(state->cpu_name, "global", sizeof(state->cpu_name)); 821 } 822 823 static int 824 get_nstate(struct cpu_state *state, double srt) 825 { 826 int ustate, dstate, nstate; 827 828 /* speeding up */ 829 state->cpu_uavg = (state->cpu_uavg * 2.0 + state->cpu_qavg) / 3.0; 830 /* slowing down */ 831 state->cpu_davg = (state->cpu_davg * srt + state->cpu_qavg) / (srt + 1); 832 if (state->cpu_davg < state->cpu_uavg) 833 state->cpu_davg = state->cpu_uavg; 834 835 ustate = state->cpu_uavg / TriggerUp; 836 if (ustate < state->cpu_limit) 837 ustate = state->cpu_uavg / TriggerDown; 838 dstate = state->cpu_davg / TriggerUp; 839 if (dstate < state->cpu_limit) 840 dstate = state->cpu_davg / TriggerDown; 841 842 nstate = (ustate > dstate) ? ustate : dstate; 843 if (nstate > state->cpu_count) 844 nstate = state->cpu_count; 845 846 if (DebugOpt) { 847 printf("%s qavg=%5.2f uavg=%5.2f davg=%5.2f " 848 "%2d ncpus=%d\n", state->cpu_name, 849 state->cpu_qavg, state->cpu_uavg, state->cpu_davg, 850 state->cpu_limit, nstate); 851 } 852 return nstate; 853 } 854 855 static void 856 mon_perf(double srt) 857 { 858 cpumask_t ocpu_used, ocpu_pwrdom_used; 859 int pnstate = 0, nstate; 860 int cpu; 861 862 /* 863 * Find cpus requiring performance and their cooresponding power 864 * domains. Save the number of cpus requiring performance in 865 * pnstate. 866 */ 867 ocpu_used = cpu_used; 868 ocpu_pwrdom_used = cpu_pwrdom_used; 869 870 CPUMASK_ASSZERO(cpu_used); 871 CPUMASK_ASSZERO(cpu_pwrdom_used); 872 873 for (cpu = 0; cpu < NCpus; ++cpu) { 874 struct cpu_state *state = &pcpu_state[cpu]; 875 int s; 876 877 s = get_nstate(state, srt); 878 if (s) { 879 CPUMASK_ORBIT(cpu_used, cpu); 880 CPUMASK_ORBIT(cpu_pwrdom_used, cpu2pwrdom[cpu]); 881 } 882 pnstate += s; 883 884 state->cpu_limit = s; 885 } 886 887 /* 888 * Calculate nstate, the number of cpus we wish to run at max 889 * performance. 890 */ 891 nstate = get_nstate(&global_cpu_state, srt); 892 893 if (nstate == global_cpu_state.cpu_limit && 894 (pnstate == global_pcpu_limit || nstate > pnstate)) { 895 /* Nothing changed; keep the sets */ 896 cpu_used = ocpu_used; 897 cpu_pwrdom_used = ocpu_pwrdom_used; 898 899 global_pcpu_limit = pnstate; 900 return; 901 } 902 global_pcpu_limit = pnstate; 903 904 if (nstate > pnstate) { 905 /* 906 * Add spare cpus to meet global performance requirement. 907 */ 908 add_spare_cpus(ocpu_used, nstate - pnstate); 909 } 910 911 global_cpu_state.cpu_limit = nstate; 912 913 /* 914 * Adjust cpu and cpu power domain performance 915 */ 916 adj_perf(ocpu_used, ocpu_pwrdom_used); 917 } 918 919 static void 920 add_spare_cpus(const cpumask_t ocpu_used, int ncpu) 921 { 922 cpumask_t saved_pwrdom, xcpu_used; 923 int done = 0, cpu; 924 925 /* 926 * Find more cpus in the previous cpu set. 927 */ 928 xcpu_used = cpu_used; 929 CPUMASK_XORMASK(xcpu_used, ocpu_used); 930 while (CPUMASK_TESTNZERO(xcpu_used)) { 931 cpu = BSFCPUMASK(xcpu_used); 932 CPUMASK_NANDBIT(xcpu_used, cpu); 933 934 if (CPUMASK_TESTBIT(ocpu_used, cpu)) { 935 CPUMASK_ORBIT(cpu_pwrdom_used, cpu2pwrdom[cpu]); 936 CPUMASK_ORBIT(cpu_used, cpu); 937 --ncpu; 938 if (ncpu == 0) 939 return; 940 } 941 } 942 943 /* 944 * Find more cpus in the used cpu power domains. 945 */ 946 saved_pwrdom = cpu_pwrdom_used; 947 again: 948 while (CPUMASK_TESTNZERO(saved_pwrdom)) { 949 cpumask_t unused_cpumask; 950 int dom; 951 952 dom = BSFCPUMASK(saved_pwrdom); 953 CPUMASK_NANDBIT(saved_pwrdom, dom); 954 955 unused_cpumask = cpu_pwrdomain[dom]->dom_cpumask; 956 CPUMASK_NANDMASK(unused_cpumask, cpu_used); 957 958 while (CPUMASK_TESTNZERO(unused_cpumask)) { 959 cpu = BSFCPUMASK(unused_cpumask); 960 CPUMASK_NANDBIT(unused_cpumask, cpu); 961 962 CPUMASK_ORBIT(cpu_pwrdom_used, dom); 963 CPUMASK_ORBIT(cpu_used, cpu); 964 --ncpu; 965 if (ncpu == 0) 966 return; 967 } 968 } 969 if (!done) { 970 done = 1; 971 /* 972 * Find more cpus in unused cpu power domains 973 */ 974 saved_pwrdom = cpu_pwrdom_mask; 975 CPUMASK_NANDMASK(saved_pwrdom, cpu_pwrdom_used); 976 goto again; 977 } 978 if (DebugOpt) 979 printf("%d cpus not found\n", ncpu); 980 } 981 982 static void 983 acpi_set_cpufreq(int dom, int inc) 984 { 985 int lowest, highest, desired; 986 char sysid[64]; 987 988 acpi_get_cpufreq(dom, &highest, &lowest); 989 if (highest == 0 || lowest == 0) 990 return; 991 desired = inc ? highest : lowest; 992 993 if (DebugOpt) 994 printf("dom%d set frequency %d\n", dom, desired); 995 snprintf(sysid, sizeof(sysid), "hw.acpi.cpu.px_dom%d.select", dom); 996 sysctlbyname(sysid, NULL, NULL, &desired, sizeof(desired)); 997 } 998 999 static void 1000 adj_cpu_pwrdom(int dom, int inc) 1001 { 1002 if (AdjustCpuFreq) 1003 acpi_set_cpufreq(dom, inc); 1004 } 1005 1006 static void 1007 adj_cpu_perf(int cpu, int inc) 1008 { 1009 if (DebugOpt) { 1010 if (inc) 1011 printf("cpu%d increase perf\n", cpu); 1012 else 1013 printf("cpu%d decrease perf\n", cpu); 1014 } 1015 1016 if (HasPerfbias) 1017 set_perfbias(cpu, inc); 1018 if (AdjustCstate) 1019 set_cstate(cpu, inc); 1020 } 1021 1022 static void 1023 adj_perf(cpumask_t xcpu_used, cpumask_t xcpu_pwrdom_used) 1024 { 1025 cpumask_t old_usched_used; 1026 int cpu, inc; 1027 1028 /* 1029 * Set cpus requiring performance to the userland process 1030 * scheduler. Leave the rest of cpus unmapped. 1031 */ 1032 old_usched_used = usched_cpu_used; 1033 usched_cpu_used = cpu_used; 1034 if (CPUMASK_TESTZERO(usched_cpu_used)) 1035 CPUMASK_ORBIT(usched_cpu_used, 0); 1036 if (CPUMASK_CMPMASKNEQ(usched_cpu_used, old_usched_used)) 1037 set_uschedcpus(); 1038 1039 /* 1040 * Adjust per-cpu performance. 1041 */ 1042 CPUMASK_XORMASK(xcpu_used, cpu_used); 1043 while (CPUMASK_TESTNZERO(xcpu_used)) { 1044 cpu = BSFCPUMASK(xcpu_used); 1045 CPUMASK_NANDBIT(xcpu_used, cpu); 1046 1047 if (CPUMASK_TESTBIT(cpu_used, cpu)) { 1048 /* Increase cpu performance */ 1049 inc = 1; 1050 } else { 1051 /* Decrease cpu performance */ 1052 inc = 0; 1053 } 1054 adj_cpu_perf(cpu, inc); 1055 } 1056 1057 /* 1058 * Adjust cpu power domain performance. This could affect 1059 * a set of cpus. 1060 */ 1061 CPUMASK_XORMASK(xcpu_pwrdom_used, cpu_pwrdom_used); 1062 while (CPUMASK_TESTNZERO(xcpu_pwrdom_used)) { 1063 int dom; 1064 1065 dom = BSFCPUMASK(xcpu_pwrdom_used); 1066 CPUMASK_NANDBIT(xcpu_pwrdom_used, dom); 1067 1068 if (CPUMASK_TESTBIT(cpu_pwrdom_used, dom)) { 1069 /* Increase cpu power domain performance */ 1070 inc = 1; 1071 } else { 1072 /* Decrease cpu power domain performance */ 1073 inc = 0; 1074 } 1075 adj_cpu_pwrdom(dom, inc); 1076 } 1077 } 1078 1079 static void 1080 restore_perf(void) 1081 { 1082 cpumask_t ocpu_used, ocpu_pwrdom_used; 1083 1084 ocpu_used = cpu_used; 1085 ocpu_pwrdom_used = cpu_pwrdom_used; 1086 1087 /* Max out all cpus and cpu power domains performance */ 1088 CPUMASK_ASSBMASK(cpu_used, NCpus); 1089 cpu_pwrdom_used = cpu_pwrdom_mask; 1090 1091 adj_perf(ocpu_used, ocpu_pwrdom_used); 1092 1093 if (AdjustCstate) { 1094 /* 1095 * Restore the original mwait C-state 1096 */ 1097 if (DebugOpt) 1098 printf("global set cstate %s\n", orig_global_cx); 1099 sysctlbyname("machdep.mwait.CX.idle", NULL, NULL, 1100 orig_global_cx, strlen(orig_global_cx) + 1); 1101 } 1102 } 1103 1104 static int 1105 probe_cstate(void) 1106 { 1107 char cx_supported[1024]; 1108 const char *target; 1109 char *ptr; 1110 int idle_hlt, deep = 1; 1111 size_t len; 1112 1113 len = sizeof(idle_hlt); 1114 if (sysctlbyname("machdep.cpu_idle_hlt", &idle_hlt, &len, NULL, 0) < 0) 1115 return 0; 1116 if (idle_hlt != 1) 1117 return 0; 1118 1119 len = sizeof(cx_supported); 1120 if (sysctlbyname("machdep.mwait.CX.supported", cx_supported, &len, 1121 NULL, 0) < 0) 1122 return 0; 1123 1124 len = sizeof(orig_global_cx); 1125 if (sysctlbyname("machdep.mwait.CX.idle", orig_global_cx, &len, 1126 NULL, 0) < 0) 1127 return 0; 1128 1129 strlcpy(cpu_perf_cx, "AUTODEEP", sizeof(cpu_perf_cx)); 1130 cpu_perf_cxlen = strlen(cpu_perf_cx) + 1; 1131 if (sysctlbyname("machdep.mwait.CX.idle", NULL, NULL, 1132 cpu_perf_cx, cpu_perf_cxlen) < 0) { 1133 /* AUTODEEP is not supported; try AUTO */ 1134 deep = 0; 1135 strlcpy(cpu_perf_cx, "AUTO", sizeof(cpu_perf_cx)); 1136 cpu_perf_cxlen = strlen(cpu_perf_cx) + 1; 1137 if (sysctlbyname("machdep.mwait.CX.idle", NULL, NULL, 1138 cpu_perf_cx, cpu_perf_cxlen) < 0) 1139 return 0; 1140 } 1141 1142 if (!deep) 1143 target = "C2/0"; 1144 else 1145 target = NULL; 1146 for (ptr = strtok(cx_supported, " "); ptr != NULL; 1147 ptr = strtok(NULL, " ")) { 1148 if (target == NULL || 1149 (target != NULL && strcmp(ptr, target) == 0)) { 1150 strlcpy(cpu_idle_cx, ptr, sizeof(cpu_idle_cx)); 1151 cpu_idle_cxlen = strlen(cpu_idle_cx) + 1; 1152 if (target != NULL) 1153 break; 1154 } 1155 } 1156 if (cpu_idle_cxlen == 0) 1157 return 0; 1158 1159 if (DebugOpt) { 1160 printf("cstate orig %s, perf %s, idle %s\n", 1161 orig_global_cx, cpu_perf_cx, cpu_idle_cx); 1162 } 1163 return 1; 1164 } 1165 1166 static void 1167 set_cstate(int cpu, int inc) 1168 { 1169 const char *cst; 1170 char sysid[64]; 1171 size_t len; 1172 1173 if (inc) { 1174 cst = cpu_perf_cx; 1175 len = cpu_perf_cxlen; 1176 } else { 1177 cst = cpu_idle_cx; 1178 len = cpu_idle_cxlen; 1179 } 1180 1181 if (DebugOpt) 1182 printf("cpu%d set cstate %s\n", cpu, cst); 1183 snprintf(sysid, sizeof(sysid), "machdep.mwait.CX.idle%d", cpu); 1184 sysctlbyname(sysid, NULL, NULL, cst, len); 1185 } 1186