xref: /onnv-gate/usr/src/cmd/fps/fpsd/fpsd_esutil.c (revision 7435:f05e6d8813dc)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <stdio.h>
28 #include <sys/types.h>
29 #include <fcntl.h>
30 #include <string.h>
31 #include <stdlib.h>
32 #include <unistd.h>
33 #include <errno.h>
34 
35 #include <sys/stat.h>
36 #include <poll.h>
37 #include <signal.h>
38 #include <pthread.h>
39 #include <thread.h>
40 #include <time.h>
41 #include <sys/systeminfo.h>
42 #include <sys/cred.h>
43 #include <dirent.h>
44 #include <libdevinfo.h>
45 #include <sys/pm.h>
46 #include <sys/ppmio.h>
47 #include <locale.h>
48 
49 #include "fpsapi.h"
50 #include "fpsd.h"
51 #include "messages.h"
52 
53 
54 #define	DEV_PM	"/devices/pseudo/pm@0:pm"
55 #define	DEFAULT_CPU_FULL_POWER	3
56 
57 int  is_estar_system = 0;   /* Not an E* system, by default */
58 int  sys_pm_state = PM_SYSTEM_PM_DISABLED; /* By default autopm disabled */
59 
60 
61 static di_node_t  fps_di_root = DI_NODE_NIL;
62 static di_prom_handle_t  fps_di_prom = DI_PROM_HANDLE_NIL;
63 static char **cpu_dpaths = NULL;  /* Used only on E* system */
64 static	int	*proc_ids = NULL;	/* Used only on E* system */
65 static	int	num_cpus = 0;	/* Used only on E* system */
66 static int  devpm_fd = -1;	/* Used only on E* system */
67 static int  full_pwr = DEFAULT_CPU_FULL_POWER;
68 
69 /*
70  * Initialize system PM state enable/disable and
71  * enable system default info logging accordingly.
72  * Note: Even for systems for which CPU PM is not enabled by
73  * default, disk PM may be enabled explicitly using power.conf;
74  * If power management is enabled, disable informational logging
75  * by default.
76  *   Some platforms don't have /dev/pm entry. It is perfectly OK.
77  * Don't complain if there is no /dev/pm entry.
78  * The platforms on which CPU PM is enabled by default, would
79  * ofcourse have /dev/pm entry.
80  *
81  * Note: open_dev_pm() should have been called initially before
82  *       calling this function.
83  *
84  */
85 
86 void
update_pm_state()87 update_pm_state()
88 {
89 	int pm_stat;
90 
91 	if (devpm_fd == -1)
92 		return;
93 
94 	pm_stat = ioctl(devpm_fd, PM_GET_PM_STATE);
95 
96 	if (pm_stat == -1)
97 		return;
98 
99 	sys_pm_state = pm_stat;
100 
101 }
102 
103 /*
104  * Some platforms don't support power management. (neither CPU nor disk)
105  * Those platforms don't have /dev/pm entry. Don't complain in such case.
106  * Some platfors support PM only for disks. (they have /dev/pm entry.
107  * and logging is disabled on those platforms.)
108  * Some platforms support PM for both disks and CPUs (apart from others).
109  * Those platforms also have /dev/pm entry.
110  * Note that even desktops which support CPU PM E* can be custom
111  * configured to remove power management drivers. In that case,
112  * there won't be any /dev/pm entry and it is valid config.
113  *
114  */
115 
open_dev_pm()116 static  void  open_dev_pm()
117 {
118 	devpm_fd = open(DEV_PM, O_RDWR);
119 
120 }
121 
122 /*
123  * Initialize Estar info database.
124  *
125  */
126 
127 void
init_estar_db()128 init_estar_db()
129 {
130 	di_node_t  fnode, node;
131 	di_prop_t  nextp;
132 	char *path = NULL;
133 	int cpu_i;
134 	int  is_pmprop_found = 0;
135 	pm_req_t  pmreq;
136 	uchar_t  *prop_data = NULL;
137 
138 	/*
139 	 * First open /dev/pm and keep it open for later uses.
140 	 * Note that this needs to be open on all power management supported
141 	 * systems. Some systems support power mgmt on only some
142 	 * devices like disk, but not CPU. /dev/pm does not exist on
143 	 * some platforms. Also PM drivers can be removed on custom
144 	 * configurations.
145 	 */
146 	open_dev_pm();
147 
148 	if (devpm_fd == -1)
149 		return;
150 
151 	fps_di_root = di_init("/", DINFOCPYALL);
152 
153 	if (DI_NODE_NIL == fps_di_root) {
154 		fpsd_message(FPSD_EXIT_ERROR, FPS_WARNING, DI_INIT_FAIL);
155 	}
156 
157 	fps_di_prom = di_prom_init();
158 
159 	if (DI_PROM_HANDLE_NIL == fps_di_prom) {
160 		fpsd_message(FPSD_EXIT_ERROR, FPS_WARNING, DI_PROM_INIT_FAIL);
161 		di_fini(fps_di_root);
162 	}
163 
164 	if (di_prom_prop_lookup_bytes(fps_di_prom, fps_di_root,
165 	    "energystar-v3", &prop_data) == -1)
166 		goto exit_es;
167 
168 	/*
169 	 * As a final check, also check for "us" driver property pm-components
170 	 * On Estar systems, the driver should define this property.
171 	 */
172 
173 	fnode = node = di_drv_first_node("us", fps_di_root);
174 
175 	if (DI_NODE_NIL == node) {
176 		goto exit_es;
177 	}
178 
179 	is_pmprop_found = 0;
180 	for (nextp = di_prop_next(node, DI_PROP_NIL); nextp != DI_PROP_NIL;
181 	    nextp = di_prop_next(node, nextp)) {
182 		if (strcmp(di_prop_name(nextp), "pm-components") == 0) {
183 			is_pmprop_found = 1;
184 			break;
185 		}
186 	}
187 
188 	if (!is_pmprop_found)
189 		goto exit_es;
190 
191 	is_estar_system = 1;  /* CPU power mgmt supported E* system */
192 
193 	num_cpus = 0;
194 	while (node != DI_NODE_NIL) {
195 		num_cpus++;
196 		node = di_drv_next_node(node);
197 	}
198 
199 	cpu_dpaths = (char **)calloc(num_cpus+1, sizeof (char *));
200 	proc_ids = (int *)calloc(num_cpus+1, sizeof (int));
201 	proc_ids[num_cpus] = -1;  /* Terminate processor ids by -1 */
202 
203 	cpu_i = 0;
204 	for (node = fnode; node != DI_NODE_NIL; node = di_drv_next_node(node)) {
205 		proc_ids[cpu_i] = -1;
206 		cpu_dpaths[cpu_i] = NULL;
207 
208 		path = di_devfs_path(node);
209 		if (NULL == path)
210 			continue;
211 		cpu_dpaths[cpu_i] = strdup(path);
212 		di_devfs_path_free(path);
213 		/*
214 		 * Keep the mapping between path and processor IDs.
215 		 * Currently, processor IDs are not used.
216 		 * But may be used in future.
217 		 */
218 
219 		/*
220 		 * On workstation platforms (where CPU E* supported),
221 		 * processor ID and instance numbers are same.
222 		 * This may change in future. So watch out.
223 		 */
224 
225 		proc_ids[cpu_i]  = di_instance(node); /* Currently unused. */
226 		cpu_i++;
227 	}
228 
229 	proc_ids[cpu_i] = -1;
230 	cpu_dpaths[cpu_i] = NULL;
231 
232 	/* Initialize what "FULL POWER" mode is. */
233 	full_pwr = DEFAULT_CPU_FULL_POWER;
234 
235 	pmreq.physpath = cpu_dpaths[0];
236 	pmreq.component = 0;
237 	pmreq.value = 0;
238 	pmreq.data  = NULL;
239 	pmreq.datasize  = 0;
240 
241 
242 	full_pwr = ioctl(devpm_fd, PM_GET_FULL_POWER, &pmreq);
243 	if (full_pwr == -1)
244 		full_pwr = DEFAULT_CPU_FULL_POWER;
245 exit_es:
246 
247 	if (fps_di_root != DI_NODE_NIL) {
248 		di_fini(fps_di_root);
249 		fps_di_root = DI_NODE_NIL;
250 	}
251 	if (DI_PROM_HANDLE_NIL != fps_di_prom) {
252 		di_prom_fini(fps_di_prom);
253 		fps_di_prom = DI_PROM_HANDLE_NIL;
254 	}
255 }
256 
257 /*
258  *  Return the min(idle_times), min(remaining_times), max(rem_time) for all
259  *  CPUs in full power mode. The "remain time" is the remaining
260  *  threshold time after which the CPU will make next lower level
261  *  power transition if left idle.
262  *  If the CPUs are not in full power mode or could not exactly determine
263  *  the power mode then return -1.
264  *  return 0 if CPUs are in full power mode.
265  */
266 
267 int
get_idle_rem_stats(int * min_idle,int * min_rem,int * max_rem)268 get_idle_rem_stats(int *min_idle, int *min_rem, int *max_rem)
269 {
270 	int idle_time;
271 	int pmstats[2];
272 	int i;
273 	pm_req_t  pmreq;
274 	int ret;
275 
276 	*min_idle = -1;
277 	*min_rem = -1;
278 	*max_rem = -1;
279 
280 	for (i = 0; i < num_cpus; i++) {
281 
282 		pmreq.physpath = cpu_dpaths[i];
283 		pmreq.component = 0;
284 		pmreq.value = 0;
285 		pmreq.data  = pmstats;
286 		pmreq.datasize  = sizeof (pmstats);
287 		idle_time = ioctl(devpm_fd, PM_GET_TIME_IDLE, &pmreq);
288 		if (idle_time == -1)
289 			continue;
290 		ret = ioctl(devpm_fd, PM_GET_STATS, &pmreq);
291 
292 		/* Now pmstats[0] = cur power level; pmstats[1]=remain time */
293 		if (ret == -1)
294 			continue;
295 		if (pmstats[0] != full_pwr)
296 			continue;
297 
298 		if ((*min_idle == -1) || (idle_time < *min_idle))
299 			*min_idle = idle_time;
300 		if (*min_rem == -1 || pmstats[1] < *min_rem) {
301 			*min_rem = pmstats[1];
302 
303 			/*
304 			 * The remain time can be negative if there are 2 cpus
305 			 * and 1 cpu is ready to transition
306 			 * and the other one is not
307 			 */
308 			if (*min_rem < 0)
309 				*min_rem = 0;
310 		}
311 		if (*max_rem == -1 || pmstats[1] > *max_rem)
312 			*max_rem = pmstats[1];
313 	}
314 
315 	return
316 	    ((*min_idle == -1 || *min_rem == -1 || *max_rem == -1) ? -1 : 0);
317 }
318 
319 /*
320  * Wait until CPU comes to full power state or timeout occurs.
321  * If multiple threads call this function, execute the
322  * PM ioctl system call only once.
323  * This is better than all 3 threads polling cpu pwr state same time.
324  *
325  * Callers of this function should not assume that on returning from
326  * this function CPU will be in full power state.
327  * (They should check again).
328  * This function just optimizes for performance during wait.
329  *
330  *
331  */
332 
333 void
wait_for_pm_state_change()334 wait_for_pm_state_change()
335 {
336 	int res;
337 	static pthread_mutex_t wrlck;
338 	static int  is_active = 0;
339 	static pm_req_t  pmreq;
340 	static pm_state_change_t  pmsc;
341 	static char  path[MAXPATHLEN];
342 
343 	int pwr = 0;
344 	int cur_lvl = 0; /* 0 = unknown. 1=low, 3=full power */
345 
346 	pmreq.physpath = cpu_dpaths[0];
347 	pmreq.component = 0;
348 	pmreq.value = 0;
349 	pmreq.data  = NULL;
350 	pmreq.datasize  = 0;
351 
352 
353 	(void) pthread_mutex_lock(&wrlck);
354 
355 	if (!is_active) {    /* This is the first thread trying to wait */
356 		is_active = 1;
357 		(void) pthread_mutex_unlock(&wrlck);
358 
359 		pmsc.physpath = path;
360 		pmsc.size = MAXPATHLEN;
361 		path[0] = 0; /* init not required. Just in case... */
362 
363 		/*
364 		 * PM starts buffering the state changes after the first call to
365 		 * PM_GET_STATE_CHANGE/PM_GET_STATE_CHANGE_WAIT
366 		 *
367 		 * The PM_GET_STATE_CHANGE is a non-blocking call where as
368 		 * _WAIT is blocking call. The PM_GET_STATE_CHANGE also
369 		 * returns all the info * about the latest buffered state
370 		 * change if already buffered event is available. So it is
371 		 * important to drain out all old events,
372 		 * if you are only interested in future events.
373 		 *
374 		 * After the state changes the exact information/timestamp about
375 		 * state changes are reflected in the ioctl struct.
376 		 * To keep things simple, after draining out all buffered info,
377 		 * we issue get current power to get the current power level and
378 		 * then we issue another _WAIT command to get the
379 		 * next power change.
380 		 *
381 		 */
382 
383 		do {
384 
385 			res =  ioctl(devpm_fd, PM_GET_STATE_CHANGE, &pmsc);
386 
387 			if (res == -1 && errno != EWOULDBLOCK) {
388 				fpsd_message(FPSD_NO_EXIT, FPS_WARNING,
389 				    INTERNAL_FAILURE_WARN,
390 				    strerror(errno));
391 				/* 1 second sleep. Avoid busy loop */
392 				(void) poll(NULL, 0, 1000);
393 				/* Probably will succeed in next call. */
394 				goto psc_complete;
395 			}
396 
397 		} while (errno != EWOULDBLOCK);
398 
399 		/* drain out all buffered state changes */
400 
401 		/* If current state is full power, then get out. */
402 
403 		do {
404 			pwr = ioctl(devpm_fd, PM_GET_CURRENT_POWER, &pmreq);
405 			if (pwr != -1) break;
406 			if (errno == EAGAIN) {
407 				(void) poll(NULL, 0, 1000);  /* 1 sec sleep */
408 				continue;
409 			} else {
410 				fpsd_message(FPSD_NO_EXIT, FPS_WARNING,
411 				    INTERNAL_FAILURE_WARN1,
412 				    strerror(errno));
413 				(void) poll(NULL, 0, 1000);  /* 1 sec sleep */
414 				goto psc_complete;
415 			}
416 			/*CONSTCOND*/
417 		} while (1);
418 
419 		if (pwr == full_pwr)
420 			goto psc_complete;
421 
422 		while (cur_lvl != full_pwr) {
423 			pmsc.physpath = path;
424 			pmsc.size = MAXPATHLEN;
425 			path[0] = 0; /* init not required. Just in case... */
426 
427 			do {
428 				res = ioctl(devpm_fd,
429 				    PM_GET_STATE_CHANGE_WAIT, &pmsc);
430 				if (res == -1 && errno == EINTR) {
431 					/* 1 second sleep */
432 					(void) poll(NULL, 0, 1000);
433 				}
434 			} while (res == -1 && errno == EINTR);
435 
436 			if (res == -1) {
437 				fpsd_message(FPSD_NO_EXIT, FPS_WARNING,
438 				    INTERNAL_FAILURE_WARN2,
439 				    strerror(errno));
440 			/*
441 			 * If there are failures in state change ioctl,
442 			 * just would fall back to normal polling of
443 			 * status later. get out quiet.
444 			 */
445 			/* avoid busy loop -- 1 second sleep */
446 			(void) poll(NULL, 0, 1000);
447 			goto psc_complete;
448 		}
449 
450 		if (strcmp(pmsc.physpath, cpu_dpaths[0]) == 0 &&
451 		    pmsc.new_level == full_pwr)
452 			cur_lvl = full_pwr;
453 		}
454 
455 psc_complete:
456 		(void) pthread_mutex_lock(&wrlck);
457 		is_active = 0;
458 		(void) pthread_mutex_unlock(&wrlck);
459 
460 	} else {
461 		/* Release the lock first */
462 		(void) pthread_mutex_unlock(&wrlck);
463 		/*
464 		 * Already one other thread is active issuing ioctl call.
465 		 * Just poll here to check the local flag without any expensive
466 		 * ioctl calls until the transition is complete.
467 		 */
468 		(void) poll(NULL, 0, 1000); /* first time 1 second wait */
469 		for (;;) {
470 			(void) pthread_mutex_lock(&wrlck);
471 			if (!is_active) {
472 				(void) pthread_mutex_unlock(&wrlck);
473 				break;
474 			}
475 			(void) pthread_mutex_unlock(&wrlck);
476 			(void) poll(NULL, 0, 4000); /* 4 seconds wait */
477 		}
478 	}
479 }
480