1 /* 2 * Copyright (c) 2010 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 /* 36 * The powerd daemon monitors the cpu load and adjusts cpu frequencies 37 * via hw.acpi.cpu.px_dom*. 38 */ 39 40 #define _KERNEL_STRUCTURES 41 #include <sys/types.h> 42 #include <sys/sysctl.h> 43 #include <sys/kinfo.h> 44 #include <sys/file.h> 45 #include <stdio.h> 46 #include <stdlib.h> 47 #include <unistd.h> 48 #include <string.h> 49 #include <syslog.h> 50 51 static void usage(void); 52 static double getcputime(void); 53 static void acpi_setcpufreq(int nstate); 54 static void setupdominfo(void); 55 56 int DebugOpt; 57 int TurboOpt = 1; 58 int CpuLimit; /* # of cpus at max frequency */ 59 int DomLimit; /* # of domains at max frequency */ 60 int PowerFd; 61 int DomBeg; 62 int DomEnd; 63 int NCpus; 64 int CpuCount[256]; /* # of cpus in any given domain */ 65 int CpuToDom[256]; /* domain a particular cpu belongs to */ 66 int Hysteresis = 10; 67 double TriggerUp = 0.25;/* load per cpu to force max freq */ 68 double TriggerDown; /* load per cpu to force the min freq */ 69 70 static void sigintr(int signo); 71 72 int 73 main(int ac, char **av) 74 { 75 double qavg; 76 double savg; 77 int ch; 78 int nstate; 79 char buf[64]; 80 81 while ((ch = getopt(ac, av, "dp:tu:")) != -1) { 82 switch(ch) { 83 case 'd': 84 DebugOpt = 1; 85 break; 86 case 'p': 87 Hysteresis = (int)strtol(optarg, NULL, 10); 88 break; 89 case 't': 90 TurboOpt = 0; 91 break; 92 case 'u': 93 TriggerUp = (double)strtol(optarg, NULL, 10) / 100; 94 break; 95 default: 96 usage(); 97 /* NOT REACHED */ 98 } 99 } 100 ac -= optind; 101 av += optind; 102 103 if (0 > Hysteresis || Hysteresis > 99) { 104 fprintf(stderr, "Invalid hysteresis value\n"); 105 exit(1); 106 } 107 108 if (0 > TriggerUp || TriggerUp > 1) { 109 fprintf(stderr, "Invalid load limit value\n"); 110 exit(1); 111 } 112 113 TriggerDown = TriggerUp - (TriggerUp * (double) Hysteresis / 100); 114 115 /* 116 * Make sure powerd is not already running. 117 */ 118 PowerFd = open("/var/run/powerd.pid", O_CREAT|O_RDWR, 0644); 119 if (PowerFd < 0) { 120 fprintf(stderr, 121 "Cannot create /var/run/powerd.pid, " 122 "continuing anyway\n"); 123 } else { 124 if (flock(PowerFd, LOCK_EX|LOCK_NB) < 0) { 125 fprintf(stderr, "powerd is already running\n"); 126 exit(1); 127 } 128 } 129 130 /* 131 * Demonize and set pid 132 */ 133 if (DebugOpt == 0) { 134 daemon(0, 0); 135 openlog("powerd", LOG_CONS | LOG_PID, LOG_DAEMON); 136 } 137 138 if (PowerFd >= 0) { 139 ftruncate(PowerFd, 0); 140 snprintf(buf, sizeof(buf), "%d\n", (int)getpid()); 141 write(PowerFd, buf, strlen(buf)); 142 } 143 144 /* 145 * Wait hw.acpi.cpu.px_dom* sysctl to be created by kernel 146 * 147 * Since hw.acpi.cpu.px_dom* creation is queued into ACPI 148 * taskqueue and ACPI taskqueue is shared across various 149 * ACPI modules, any delay in other modules may cause 150 * hw.acpi.cpu.px_dom* to be created at quite a later time 151 * (e.g. cmbat module's task could take quite a lot of time). 152 */ 153 for (;;) { 154 /* 155 * Prime delta cputime calculation, make sure at least 156 * dom0 exists. 157 */ 158 getcputime(); 159 savg = 0.0; 160 161 setupdominfo(); 162 if (DomBeg >= DomEnd) { 163 sleep(1); 164 continue; 165 } 166 167 DomLimit = DomEnd; 168 CpuLimit = NCpus; 169 break; 170 } 171 172 /* 173 * Set to maximum performance if killed. 174 */ 175 signal(SIGINT, sigintr); 176 signal(SIGTERM, sigintr); 177 178 /* 179 * Monitoring loop 180 * 181 * Calculate nstate, the number of cpus we wish to run at max 182 * frequency. All remaining cpus will be set to their lowest 183 * frequency and mapped out of the user process scheduler. 184 */ 185 for (;;) { 186 qavg = getcputime(); 187 savg = (savg * 7.0 + qavg) / 8.0; 188 189 nstate = savg / TriggerUp; 190 if (nstate < CpuLimit) 191 nstate = savg / TriggerDown; 192 if (nstate > NCpus) 193 nstate = NCpus; 194 if (DebugOpt) { 195 printf("\rqavg=%5.2f savg=%5.2f %2d/%2d ncpus=%d\r", 196 qavg, savg, CpuLimit, DomLimit, nstate); 197 fflush(stdout); 198 } 199 if (nstate != CpuLimit) 200 acpi_setcpufreq(nstate); 201 sleep(1); 202 } 203 } 204 205 static 206 void 207 sigintr(int signo __unused) 208 { 209 syslog(LOG_INFO, "killed, setting max and exiting"); 210 acpi_setcpufreq(NCpus); 211 exit(1); 212 } 213 214 /* 215 * Figure out the domains and calculate the CpuCount[] and CpuToDom[] 216 * arrays. 217 */ 218 static 219 void 220 setupdominfo(void) 221 { 222 char buf[64]; 223 char members[1024]; 224 char *str; 225 size_t msize; 226 int i; 227 int n; 228 229 for (i = 0; i < 256; ++i) { 230 snprintf(buf, sizeof(buf), 231 "hw.acpi.cpu.px_dom%d.available", i); 232 if (sysctlbyname(buf, NULL, NULL, NULL, 0) >= 0) 233 break; 234 } 235 DomBeg = i; 236 237 for (i = 255; i >= DomBeg; --i) { 238 snprintf(buf, sizeof(buf), 239 "hw.acpi.cpu.px_dom%d.available", i); 240 if (sysctlbyname(buf, NULL, NULL, NULL, 0) >= 0) { 241 ++i; 242 break; 243 } 244 } 245 DomEnd = i; 246 247 for (i = DomBeg; i < DomEnd; ++i) { 248 snprintf(buf, sizeof(buf), 249 "hw.acpi.cpu.px_dom%d.members", i); 250 msize = sizeof(members); 251 if (sysctlbyname(buf, members, &msize, NULL, 0) == 0) { 252 members[msize] = 0; 253 for (str = strtok(members, " "); str; 254 str = strtok(NULL, " ")) { 255 n = -1; 256 sscanf(str, "cpu%d", &n); 257 if (n >= 0) { 258 ++NCpus; 259 ++CpuCount[i]; 260 CpuToDom[n]= i; 261 } 262 } 263 } 264 } 265 } 266 267 /* 268 * Return the one-second cpu load. One cpu at 100% will return a value 269 * of 1.0. On a SMP system N cpus running at 100% will return a value of N. 270 */ 271 static 272 double 273 getcputime(void) 274 { 275 static struct kinfo_cputime ocpu_time[64]; 276 static struct kinfo_cputime ncpu_time[64]; 277 size_t slen; 278 int ncpu; 279 int cpu; 280 uint64_t delta; 281 282 bcopy(ncpu_time, ocpu_time, sizeof(ncpu_time)); 283 slen = sizeof(ncpu_time); 284 if (sysctlbyname("kern.cputime", &ncpu_time, &slen, NULL, 0) < 0) { 285 fprintf(stderr, "kern.cputime sysctl not available\n"); 286 exit(1); 287 } 288 ncpu = slen / sizeof(ncpu_time[0]); 289 delta = 0; 290 291 for (cpu = 0; cpu < ncpu; ++cpu) { 292 delta += (ncpu_time[cpu].cp_user + ncpu_time[cpu].cp_sys + 293 ncpu_time[cpu].cp_nice + ncpu_time[cpu].cp_intr) - 294 (ocpu_time[cpu].cp_user + ocpu_time[cpu].cp_sys + 295 ocpu_time[cpu].cp_nice + ocpu_time[cpu].cp_intr); 296 } 297 return((double)delta / 1000000.0); 298 } 299 300 /* 301 * nstate is the requested number of cpus that we wish to run at full 302 * frequency. We calculate how many domains we have to adjust to reach 303 * this goal. 304 * 305 * This function also sets the user scheduler global cpu mask. 306 */ 307 static 308 void 309 acpi_setcpufreq(int nstate) 310 { 311 int ncpus = 0; 312 int increasing = (nstate > CpuLimit); 313 int dom; 314 int domBeg; 315 int domEnd; 316 int lowest; 317 int highest; 318 int desired; 319 int v; 320 char *sysid; 321 char *ptr; 322 char buf[256]; 323 size_t buflen; 324 cpumask_t global_cpumask; 325 326 /* 327 * Calculate the ending domain if the number of operating cpus 328 * has increased. 329 * 330 * Calculate the starting domain if the number of operating cpus 331 * has decreased. 332 */ 333 for (dom = DomBeg; dom < DomEnd; ++dom) { 334 if (ncpus >= nstate) 335 break; 336 ncpus += CpuCount[dom]; 337 } 338 339 syslog(LOG_INFO, "using %d cpus", nstate); 340 341 /* 342 * Set the mask of cpus the userland scheduler is allowed to use. 343 */ 344 CPUMASK_ASSBMASK(global_cpumask, nstate); 345 sysctlbyname("kern.usched_global_cpumask", NULL, 0, 346 &global_cpumask, sizeof(global_cpumask)); 347 348 if (increasing) { 349 domBeg = DomLimit; 350 domEnd = dom; 351 } else { 352 domBeg = dom; 353 domEnd = DomLimit; 354 } 355 DomLimit = dom; 356 CpuLimit = nstate; 357 358 /* 359 * Adjust the cpu frequency 360 */ 361 if (DebugOpt) 362 printf("\n"); 363 for (dom = domBeg; dom < domEnd; ++dom) { 364 /* 365 * Retrieve availability list 366 */ 367 asprintf(&sysid, "hw.acpi.cpu.px_dom%d.available", dom); 368 buflen = sizeof(buf) - 1; 369 v = sysctlbyname(sysid, buf, &buflen, NULL, 0); 370 free(sysid); 371 if (v < 0) 372 continue; 373 buf[buflen] = 0; 374 375 /* 376 * Parse out the highest and lowest cpu frequencies 377 */ 378 ptr = buf; 379 highest = lowest = 0; 380 while (ptr && (v = strtol(ptr, &ptr, 10)) > 0) { 381 if (lowest == 0 || lowest > v) 382 lowest = v; 383 if (highest == 0 || highest < v) 384 highest = v; 385 /* 386 * Detect turbo mode 387 */ 388 if ((highest - v == 1) && ! TurboOpt) 389 highest = v; 390 391 } 392 393 /* 394 * Calculate the desired cpu frequency, test, and set. 395 */ 396 desired = increasing ? highest : lowest; 397 398 asprintf(&sysid, "hw.acpi.cpu.px_dom%d.select", dom); 399 buflen = sizeof(v); 400 v = 0; 401 sysctlbyname(sysid, &v, &buflen, NULL, 0); 402 { 403 if (DebugOpt) { 404 printf("dom%d set frequency %d\n", 405 dom, desired); 406 } 407 sysctlbyname(sysid, NULL, NULL, 408 &desired, sizeof(desired)); 409 } 410 free(sysid); 411 } 412 } 413 414 static 415 void 416 usage(void) 417 { 418 fprintf(stderr, "usage: powerd [-dt] [-p hysteresis] [-u trigger_up]\n"); 419 exit(1); 420 } 421