1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #include <stdio.h>
28 #include <sys/types.h>
29 #include <fcntl.h>
30 #include <string.h>
31 #include <stdlib.h>
32 #include <unistd.h>
33 #include <errno.h>
34
35 #include <sys/stat.h>
36 #include <poll.h>
37 #include <signal.h>
38 #include <pthread.h>
39 #include <thread.h>
40 #include <time.h>
41 #include <sys/systeminfo.h>
42 #include <sys/cred.h>
43 #include <dirent.h>
44 #include <libdevinfo.h>
45 #include <sys/pm.h>
46 #include <sys/ppmio.h>
47 #include <locale.h>
48
49 #include "fpsapi.h"
50 #include "fpsd.h"
51 #include "messages.h"
52
53
54 #define DEV_PM "/devices/pseudo/pm@0:pm"
55 #define DEFAULT_CPU_FULL_POWER 3
56
57 int is_estar_system = 0; /* Not an E* system, by default */
58 int sys_pm_state = PM_SYSTEM_PM_DISABLED; /* By default autopm disabled */
59
60
61 static di_node_t fps_di_root = DI_NODE_NIL;
62 static di_prom_handle_t fps_di_prom = DI_PROM_HANDLE_NIL;
63 static char **cpu_dpaths = NULL; /* Used only on E* system */
64 static int *proc_ids = NULL; /* Used only on E* system */
65 static int num_cpus = 0; /* Used only on E* system */
66 static int devpm_fd = -1; /* Used only on E* system */
67 static int full_pwr = DEFAULT_CPU_FULL_POWER;
68
69 /*
70 * Initialize system PM state enable/disable and
71 * enable system default info logging accordingly.
72 * Note: Even for systems for which CPU PM is not enabled by
73 * default, disk PM may be enabled explicitly using power.conf;
74 * If power management is enabled, disable informational logging
75 * by default.
76 * Some platforms don't have /dev/pm entry. It is perfectly OK.
77 * Don't complain if there is no /dev/pm entry.
78 * The platforms on which CPU PM is enabled by default, would
79 * ofcourse have /dev/pm entry.
80 *
81 * Note: open_dev_pm() should have been called initially before
82 * calling this function.
83 *
84 */
85
86 void
update_pm_state()87 update_pm_state()
88 {
89 int pm_stat;
90
91 if (devpm_fd == -1)
92 return;
93
94 pm_stat = ioctl(devpm_fd, PM_GET_PM_STATE);
95
96 if (pm_stat == -1)
97 return;
98
99 sys_pm_state = pm_stat;
100
101 }
102
103 /*
104 * Some platforms don't support power management. (neither CPU nor disk)
105 * Those platforms don't have /dev/pm entry. Don't complain in such case.
106 * Some platfors support PM only for disks. (they have /dev/pm entry.
107 * and logging is disabled on those platforms.)
108 * Some platforms support PM for both disks and CPUs (apart from others).
109 * Those platforms also have /dev/pm entry.
110 * Note that even desktops which support CPU PM E* can be custom
111 * configured to remove power management drivers. In that case,
112 * there won't be any /dev/pm entry and it is valid config.
113 *
114 */
115
open_dev_pm()116 static void open_dev_pm()
117 {
118 devpm_fd = open(DEV_PM, O_RDWR);
119
120 }
121
122 /*
123 * Initialize Estar info database.
124 *
125 */
126
127 void
init_estar_db()128 init_estar_db()
129 {
130 di_node_t fnode, node;
131 di_prop_t nextp;
132 char *path = NULL;
133 int cpu_i;
134 int is_pmprop_found = 0;
135 pm_req_t pmreq;
136 uchar_t *prop_data = NULL;
137
138 /*
139 * First open /dev/pm and keep it open for later uses.
140 * Note that this needs to be open on all power management supported
141 * systems. Some systems support power mgmt on only some
142 * devices like disk, but not CPU. /dev/pm does not exist on
143 * some platforms. Also PM drivers can be removed on custom
144 * configurations.
145 */
146 open_dev_pm();
147
148 if (devpm_fd == -1)
149 return;
150
151 fps_di_root = di_init("/", DINFOCPYALL);
152
153 if (DI_NODE_NIL == fps_di_root) {
154 fpsd_message(FPSD_EXIT_ERROR, FPS_WARNING, DI_INIT_FAIL);
155 }
156
157 fps_di_prom = di_prom_init();
158
159 if (DI_PROM_HANDLE_NIL == fps_di_prom) {
160 fpsd_message(FPSD_EXIT_ERROR, FPS_WARNING, DI_PROM_INIT_FAIL);
161 di_fini(fps_di_root);
162 }
163
164 if (di_prom_prop_lookup_bytes(fps_di_prom, fps_di_root,
165 "energystar-v3", &prop_data) == -1)
166 goto exit_es;
167
168 /*
169 * As a final check, also check for "us" driver property pm-components
170 * On Estar systems, the driver should define this property.
171 */
172
173 fnode = node = di_drv_first_node("us", fps_di_root);
174
175 if (DI_NODE_NIL == node) {
176 goto exit_es;
177 }
178
179 is_pmprop_found = 0;
180 for (nextp = di_prop_next(node, DI_PROP_NIL); nextp != DI_PROP_NIL;
181 nextp = di_prop_next(node, nextp)) {
182 if (strcmp(di_prop_name(nextp), "pm-components") == 0) {
183 is_pmprop_found = 1;
184 break;
185 }
186 }
187
188 if (!is_pmprop_found)
189 goto exit_es;
190
191 is_estar_system = 1; /* CPU power mgmt supported E* system */
192
193 num_cpus = 0;
194 while (node != DI_NODE_NIL) {
195 num_cpus++;
196 node = di_drv_next_node(node);
197 }
198
199 cpu_dpaths = (char **)calloc(num_cpus+1, sizeof (char *));
200 proc_ids = (int *)calloc(num_cpus+1, sizeof (int));
201 proc_ids[num_cpus] = -1; /* Terminate processor ids by -1 */
202
203 cpu_i = 0;
204 for (node = fnode; node != DI_NODE_NIL; node = di_drv_next_node(node)) {
205 proc_ids[cpu_i] = -1;
206 cpu_dpaths[cpu_i] = NULL;
207
208 path = di_devfs_path(node);
209 if (NULL == path)
210 continue;
211 cpu_dpaths[cpu_i] = strdup(path);
212 di_devfs_path_free(path);
213 /*
214 * Keep the mapping between path and processor IDs.
215 * Currently, processor IDs are not used.
216 * But may be used in future.
217 */
218
219 /*
220 * On workstation platforms (where CPU E* supported),
221 * processor ID and instance numbers are same.
222 * This may change in future. So watch out.
223 */
224
225 proc_ids[cpu_i] = di_instance(node); /* Currently unused. */
226 cpu_i++;
227 }
228
229 proc_ids[cpu_i] = -1;
230 cpu_dpaths[cpu_i] = NULL;
231
232 /* Initialize what "FULL POWER" mode is. */
233 full_pwr = DEFAULT_CPU_FULL_POWER;
234
235 pmreq.physpath = cpu_dpaths[0];
236 pmreq.component = 0;
237 pmreq.value = 0;
238 pmreq.data = NULL;
239 pmreq.datasize = 0;
240
241
242 full_pwr = ioctl(devpm_fd, PM_GET_FULL_POWER, &pmreq);
243 if (full_pwr == -1)
244 full_pwr = DEFAULT_CPU_FULL_POWER;
245 exit_es:
246
247 if (fps_di_root != DI_NODE_NIL) {
248 di_fini(fps_di_root);
249 fps_di_root = DI_NODE_NIL;
250 }
251 if (DI_PROM_HANDLE_NIL != fps_di_prom) {
252 di_prom_fini(fps_di_prom);
253 fps_di_prom = DI_PROM_HANDLE_NIL;
254 }
255 }
256
257 /*
258 * Return the min(idle_times), min(remaining_times), max(rem_time) for all
259 * CPUs in full power mode. The "remain time" is the remaining
260 * threshold time after which the CPU will make next lower level
261 * power transition if left idle.
262 * If the CPUs are not in full power mode or could not exactly determine
263 * the power mode then return -1.
264 * return 0 if CPUs are in full power mode.
265 */
266
267 int
get_idle_rem_stats(int * min_idle,int * min_rem,int * max_rem)268 get_idle_rem_stats(int *min_idle, int *min_rem, int *max_rem)
269 {
270 int idle_time;
271 int pmstats[2];
272 int i;
273 pm_req_t pmreq;
274 int ret;
275
276 *min_idle = -1;
277 *min_rem = -1;
278 *max_rem = -1;
279
280 for (i = 0; i < num_cpus; i++) {
281
282 pmreq.physpath = cpu_dpaths[i];
283 pmreq.component = 0;
284 pmreq.value = 0;
285 pmreq.data = pmstats;
286 pmreq.datasize = sizeof (pmstats);
287 idle_time = ioctl(devpm_fd, PM_GET_TIME_IDLE, &pmreq);
288 if (idle_time == -1)
289 continue;
290 ret = ioctl(devpm_fd, PM_GET_STATS, &pmreq);
291
292 /* Now pmstats[0] = cur power level; pmstats[1]=remain time */
293 if (ret == -1)
294 continue;
295 if (pmstats[0] != full_pwr)
296 continue;
297
298 if ((*min_idle == -1) || (idle_time < *min_idle))
299 *min_idle = idle_time;
300 if (*min_rem == -1 || pmstats[1] < *min_rem) {
301 *min_rem = pmstats[1];
302
303 /*
304 * The remain time can be negative if there are 2 cpus
305 * and 1 cpu is ready to transition
306 * and the other one is not
307 */
308 if (*min_rem < 0)
309 *min_rem = 0;
310 }
311 if (*max_rem == -1 || pmstats[1] > *max_rem)
312 *max_rem = pmstats[1];
313 }
314
315 return
316 ((*min_idle == -1 || *min_rem == -1 || *max_rem == -1) ? -1 : 0);
317 }
318
319 /*
320 * Wait until CPU comes to full power state or timeout occurs.
321 * If multiple threads call this function, execute the
322 * PM ioctl system call only once.
323 * This is better than all 3 threads polling cpu pwr state same time.
324 *
325 * Callers of this function should not assume that on returning from
326 * this function CPU will be in full power state.
327 * (They should check again).
328 * This function just optimizes for performance during wait.
329 *
330 *
331 */
332
333 void
wait_for_pm_state_change()334 wait_for_pm_state_change()
335 {
336 int res;
337 static pthread_mutex_t wrlck;
338 static int is_active = 0;
339 static pm_req_t pmreq;
340 static pm_state_change_t pmsc;
341 static char path[MAXPATHLEN];
342
343 int pwr = 0;
344 int cur_lvl = 0; /* 0 = unknown. 1=low, 3=full power */
345
346 pmreq.physpath = cpu_dpaths[0];
347 pmreq.component = 0;
348 pmreq.value = 0;
349 pmreq.data = NULL;
350 pmreq.datasize = 0;
351
352
353 (void) pthread_mutex_lock(&wrlck);
354
355 if (!is_active) { /* This is the first thread trying to wait */
356 is_active = 1;
357 (void) pthread_mutex_unlock(&wrlck);
358
359 pmsc.physpath = path;
360 pmsc.size = MAXPATHLEN;
361 path[0] = 0; /* init not required. Just in case... */
362
363 /*
364 * PM starts buffering the state changes after the first call to
365 * PM_GET_STATE_CHANGE/PM_GET_STATE_CHANGE_WAIT
366 *
367 * The PM_GET_STATE_CHANGE is a non-blocking call where as
368 * _WAIT is blocking call. The PM_GET_STATE_CHANGE also
369 * returns all the info * about the latest buffered state
370 * change if already buffered event is available. So it is
371 * important to drain out all old events,
372 * if you are only interested in future events.
373 *
374 * After the state changes the exact information/timestamp about
375 * state changes are reflected in the ioctl struct.
376 * To keep things simple, after draining out all buffered info,
377 * we issue get current power to get the current power level and
378 * then we issue another _WAIT command to get the
379 * next power change.
380 *
381 */
382
383 do {
384
385 res = ioctl(devpm_fd, PM_GET_STATE_CHANGE, &pmsc);
386
387 if (res == -1 && errno != EWOULDBLOCK) {
388 fpsd_message(FPSD_NO_EXIT, FPS_WARNING,
389 INTERNAL_FAILURE_WARN,
390 strerror(errno));
391 /* 1 second sleep. Avoid busy loop */
392 (void) poll(NULL, 0, 1000);
393 /* Probably will succeed in next call. */
394 goto psc_complete;
395 }
396
397 } while (errno != EWOULDBLOCK);
398
399 /* drain out all buffered state changes */
400
401 /* If current state is full power, then get out. */
402
403 do {
404 pwr = ioctl(devpm_fd, PM_GET_CURRENT_POWER, &pmreq);
405 if (pwr != -1) break;
406 if (errno == EAGAIN) {
407 (void) poll(NULL, 0, 1000); /* 1 sec sleep */
408 continue;
409 } else {
410 fpsd_message(FPSD_NO_EXIT, FPS_WARNING,
411 INTERNAL_FAILURE_WARN1,
412 strerror(errno));
413 (void) poll(NULL, 0, 1000); /* 1 sec sleep */
414 goto psc_complete;
415 }
416 /*CONSTCOND*/
417 } while (1);
418
419 if (pwr == full_pwr)
420 goto psc_complete;
421
422 while (cur_lvl != full_pwr) {
423 pmsc.physpath = path;
424 pmsc.size = MAXPATHLEN;
425 path[0] = 0; /* init not required. Just in case... */
426
427 do {
428 res = ioctl(devpm_fd,
429 PM_GET_STATE_CHANGE_WAIT, &pmsc);
430 if (res == -1 && errno == EINTR) {
431 /* 1 second sleep */
432 (void) poll(NULL, 0, 1000);
433 }
434 } while (res == -1 && errno == EINTR);
435
436 if (res == -1) {
437 fpsd_message(FPSD_NO_EXIT, FPS_WARNING,
438 INTERNAL_FAILURE_WARN2,
439 strerror(errno));
440 /*
441 * If there are failures in state change ioctl,
442 * just would fall back to normal polling of
443 * status later. get out quiet.
444 */
445 /* avoid busy loop -- 1 second sleep */
446 (void) poll(NULL, 0, 1000);
447 goto psc_complete;
448 }
449
450 if (strcmp(pmsc.physpath, cpu_dpaths[0]) == 0 &&
451 pmsc.new_level == full_pwr)
452 cur_lvl = full_pwr;
453 }
454
455 psc_complete:
456 (void) pthread_mutex_lock(&wrlck);
457 is_active = 0;
458 (void) pthread_mutex_unlock(&wrlck);
459
460 } else {
461 /* Release the lock first */
462 (void) pthread_mutex_unlock(&wrlck);
463 /*
464 * Already one other thread is active issuing ioctl call.
465 * Just poll here to check the local flag without any expensive
466 * ioctl calls until the transition is complete.
467 */
468 (void) poll(NULL, 0, 1000); /* first time 1 second wait */
469 for (;;) {
470 (void) pthread_mutex_lock(&wrlck);
471 if (!is_active) {
472 (void) pthread_mutex_unlock(&wrlck);
473 break;
474 }
475 (void) pthread_mutex_unlock(&wrlck);
476 (void) poll(NULL, 0, 4000); /* 4 seconds wait */
477 }
478 }
479 }
480