xref: /onnv-gate/usr/src/cmd/fps/fpsd/fpsd_main.c (revision 7435:f05e6d8813dc)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <stdio.h>
28 #include <string.h>
29 #include <sys/types.h>
30 #include <dirent.h>
31 #include <stdarg.h>
32 #include <stddef.h>
33 #include <stdlib.h>
34 #include <dlfcn.h>
35 #include <door.h>
36 #include <errno.h>
37 #include <fcntl.h>
38 #include <strings.h>
39 #include <unistd.h>
40 #include <synch.h>
41 #include <syslog.h>
42 #include <pthread.h>
43 #include <thread.h>
44 #include <signal.h>
45 #include <limits.h>
46 #include <locale.h>
47 #include <sys/stat.h>
48 #include <sys/systeminfo.h>
49 #include <sys/wait.h>
50 #include <sys/processor.h>
51 #include <ctype.h>
52 #include <poll.h>
53 #include <sys/wait.h>
54 #include <dirent.h>
55 #include <kstat.h>
56 #include <libscf.h>
57 #include <sys/pset.h>
58 #include <sys/param.h>
59 #include <sys/corectl.h>
60 #include <libgen.h>
61 #include <priv_utils.h>
62 #include <fpsapi.h>
63 
64 #include "fpsd.h"
65 #include "messages.h"
66 
67 #define	SMF_SNAPSHOT_RUNNING	"running"
68 
69 /* Only messages of priority 'debug_level' and lower will be logged */
70 int debug_level = DFLT_DBG_LVL;
71 
72 fpsd_t  fpsd;
73 mach_conf_t fpsd_conf;
74 char  fps_tst_path[MAXPATHLEN + MAXNAMELEN];
75 
76 void terminate_process();
77 
78 /* Local Static Variables */
79 
80 static int  door_id = -1;
81 static char *str_fps_fmri = NULL;
82 
83 /* Local static functions */
84 
85 static int check_if_supported_CPU(char *cpu_brand, char *arch);
86 static int read_conf_props();
87 static  void fpsd_fini();
88 static int reprobe_and_reread_config();
89 static int fpsd_probe_config();
90 static int fpsd_probe(mach_conf_t *m_stat);
91 
92 
93 /* ARGSUSED */
94 void
sig_hup_handler(int sig,siginfo_t * siginfo,void * sigctx)95 sig_hup_handler(int sig, siginfo_t *siginfo, void *sigctx)
96 {
97 	fpsd_message(FPSD_NO_EXIT, FPS_INFO,
98 	    SIGNAL_INFO, "HUP", SIGHUP);
99 	fpsd.d_conf->m_reprobe = 1;
100 }
101 
102 void
fpsd_read_config()103 fpsd_read_config()
104 {
105 	int ret;
106 
107 	ret = reprobe_and_reread_config();
108 	if (NO_CPUS_2_TEST == ret) {
109 		while (NO_CPUS_2_TEST == ret) {
110 			if (!fpsd.d_conf->m_reprobe) {
111 				(void) sleep(600);
112 			}
113 			ret = reprobe_and_reread_config();
114 		}
115 	}
116 }
117 
118 static int
reprobe_and_reread_config()119 reprobe_and_reread_config()
120 {
121 	int ret;
122 	static int first_time = 1;
123 
124 	if (!first_time) {
125 		fpsd.d_conf->m_reprobe = 1;
126 		if (fpsd_probe(fpsd.d_conf) != 0) {
127 			(void) fpsd_message(FPSD_EXIT_ERROR,
128 			    FPS_WARNING, REPROBE_FAILURE);
129 		}
130 	} else {
131 		first_time = 0;
132 	}
133 	ret = fpsd_probe_config();
134 	if (ZERO_INTERVAL == ret) {
135 		fpsd_message(FPSD_EXIT_ERROR, FPS_WARNING,
136 		    FPSD_ZERO_INTVL, fpsd.d_interval);
137 	}
138 	return (ret);
139 
140 }
141 
142 static int
daemon_exists()143 daemon_exists()
144 {
145 	int door_fd;
146 	struct door_info dinfo;
147 
148 	door_fd = open(FPS_DOOR_FILE, O_RDONLY);
149 	if (door_fd < 0)
150 		return (NO_DAEMON);
151 	if (door_info(door_fd, &dinfo) < 0) {
152 		(void) close(door_fd);
153 		return (NO_DAEMON);
154 	}
155 	if ((dinfo.di_attributes & DOOR_REVOKED) ||
156 	    (dinfo.di_data != (uintptr_t)FPS_DOOR_COOKIE)) {
157 		(void) close(door_fd);
158 		return (NO_DAEMON);
159 	}
160 	if (dinfo.di_target != getpid()) {
161 		/* Daemon exists; different process */
162 		(void) close(door_fd);
163 		return (DAEMON_EXISTS);
164 	} else {
165 		(void) close(door_fd);
166 		return (DAEMON_EXISTS_AND_SAME_PROC); /* Same process */
167 	}
168 
169 }
170 
171 static  int
fps_setup_door(void)172 fps_setup_door(void)
173 {
174 
175 	struct stat	stbuf;
176 	int newfd;
177 
178 	/*  Create the door */
179 	door_id = door_create(fps_door_handler, FPS_DOOR_COOKIE, 0);
180 
181 	if (door_id < 0) {
182 		fpsd_message(FPSD_NO_EXIT, FPS_WARNING, DAEMON_DOOR_FAIL,
183 		    strerror(errno));
184 		return (-1);
185 	}
186 
187 	if (stat(FPS_DOOR_FILE, &stbuf) < 0) {
188 		if ((newfd = creat(FPS_DOOR_FILE, 0600)) < 0) {
189 			fpsd_message(FPSD_NO_EXIT, FPS_ERROR,
190 			    DAEMON_DOOR_FILE_FAIL, strerror(errno));
191 			return (-1);
192 		}
193 		(void) close(newfd);
194 	}
195 
196 	if (fattach(door_id, FPS_DOOR_FILE) < 0) {
197 		if ((errno != EBUSY) || (fdetach(FPS_DOOR_FILE) < 0) ||
198 		    (fattach(door_id, FPS_DOOR_FILE) < 0)) {
199 			fpsd_message(FPSD_NO_EXIT, FPS_ERROR,
200 			    DAEMON_DOOR_FATTACH_FAIL,
201 			    strerror(errno));
202 			return (-1);
203 		}
204 	}
205 
206 	return (0);
207 }
208 
209 void
terminate_process()210 terminate_process()
211 {
212 	fpsd_fini();
213 	if (door_id >= 0) {
214 		(void) door_revoke(door_id);
215 		(void) unlink(FPS_DOOR_FILE);
216 	}
217 }
218 
219 static int
become_daemon_init()220 become_daemon_init()
221 {
222 	int pfds[2];
223 	pid_t pid;
224 	int status;
225 	sigset_t set, oset;
226 
227 	/*
228 	 * Block all signals prior to the fork and leave them blocked in
229 	 * the parent so we don't get in a situation where the parent gets
230 	 * SIGINT and returns non-zero exit status and the child is
231 	 * actually running. In the child, restore the signal mask once
232 	 * we've done our setsid().
233 	 */
234 	(void) sigfillset(&set);
235 	(void) sigdelset(&set, SIGABRT);
236 	(void) sigdelset(&set, SIGHUP);
237 	(void) sigprocmask(SIG_BLOCK, &set, &oset);
238 
239 
240 	if (pipe(pfds) == -1)
241 		fpsd_message(FPSD_EXIT_ERROR, FPS_ERROR, DAEMON_PIPE_FAIL,
242 		    strerror(errno));
243 
244 	if ((pid = fork()) == -1)
245 		fpsd_message(FPSD_EXIT_ERROR, FPS_ERROR, DAEMON_FORK_FAIL,
246 		    strerror(errno));
247 
248 	/*
249 	 * If we're the parent process, wait for either the child to send
250 	 * us the appropriate exit status over the pipe or for the read to
251 	 * fail (presumably with 0 for EOF if our child terminated
252 	 * abnormally). If the read fails, exit with either the child's
253 	 * exit status if it exited or with FPSD_EXIT_ERROR if it died
254 	 * from a fatal signal.
255 	 */
256 	if (pid != 0) { /* Parent */
257 		(void) close(pfds[1]);
258 
259 		if (read(pfds[0], &status, sizeof (status)) == sizeof (status))
260 			_exit(status);
261 
262 		if (waitpid(pid, &status, 0) == pid && WIFEXITED(status))
263 			_exit(WEXITSTATUS(status));
264 
265 		_exit(FPSD_EXIT_ERROR);
266 	}
267 
268 	fpsd.d_pid = getpid();
269 	(void) sigprocmask(SIG_SETMASK, &oset, NULL); /* Restore signal mask */
270 	(void) setsid();
271 	(void) chdir("/");
272 	(void) umask(022);
273 	(void) close(pfds[0]);
274 	return (pfds[1]);
275 }
276 
277 
278 static void
become_daemon_fini(int fd)279 become_daemon_fini(int fd)
280 {
281 	(void) close(fd);
282 	if ((fd = open("/dev/null", O_RDWR)) >= 0) {
283 		(void) fcntl(fd, F_DUP2FD, STDIN_FILENO);
284 		(void) fcntl(fd, F_DUP2FD, STDOUT_FILENO);
285 		(void) fcntl(fd, F_DUP2FD, STDERR_FILENO);
286 		(void) close(fd);
287 	}
288 
289 }
290 
291 /*
292  * Calculates the number of iterations needed for each testable cpu
293  * based on the frequency and using the following table. This table
294  * tells how much time it takes for the matrix sizes on a processor
295  * with frequencies upto 1000MHz/1500 MHz/ 2000 MHz. This data is
296  * based on profiling done earlier.
297  *
298  * f\p\t| 100  200   300   400    500     600     700     800     900 ms
299  * ======================================================================
300  * 1000  1-28 29-50 51-62 63-72  73-81   82-90   91-98   99-105  106-112
301  * 1500  1-36 37-64 65-80 81-93  94-106  107-115 116-126 127-134 135-144
302  * 2000  1-39 40-70 71-87 88-102 103-113 114-126 127-137 138-148 149-157
303  *
304  * If asc is 0, these iterations will be executed in the descending of
305  * of matrix size; else the iterations will be executed in the increasing
306  * order of matrix sizes. This is done to average out the execution time
307  * as large matrices mean more time to complete the test.
308  */
309 
310 static void
calculateTotalIterations(mach_conf_t * m_stat)311 calculateTotalIterations(mach_conf_t *m_stat)
312 {
313 	const int num_iterations_1K = 112;
314 	const int num_iterations_1500 = 144;
315 	const int num_iterations_2K = 157;
316 
317 	int total_iterations = 0;
318 	int asc = 1;
319 	int i;
320 	int freq;
321 
322 	if (m_stat->m_cpuids_size <= 0) {
323 		fpsd_message(FPSD_EXIT_ERROR, FPS_WARNING,
324 		    ZERO_CPUS_2_TST);
325 	}
326 	m_stat->m_num_cpus_to_test = 0;
327 	for (i = 0; i < m_stat->m_cpuids_size; i++) {
328 		if (m_stat->m_cpus[i].disable_test)
329 			continue;
330 		freq = m_stat->m_cpus[i].frequency;
331 		m_stat->m_cpus[i].asc = asc;
332 		if (freq < 1500) {
333 			total_iterations += num_iterations_1K;
334 			m_stat->m_cpus[i].total_iterations = num_iterations_1K;
335 		} else if (freq < 2000) {
336 			total_iterations += num_iterations_1500;
337 			m_stat->m_cpus[i].total_iterations =
338 			    num_iterations_1500;
339 		} else {
340 			total_iterations += num_iterations_2K;
341 			m_stat->m_cpus[i].total_iterations = num_iterations_2K;
342 		}
343 		if (asc) {
344 			m_stat->m_cpus[i].previous_iteration = 0;
345 			asc = 0;
346 		} else {
347 			m_stat->m_cpus[i].previous_iteration =
348 			    m_stat->m_cpus[i].total_iterations + 1;
349 			asc = 1;
350 		}
351 		m_stat->m_num_cpus_to_test++;
352 	}
353 	fpsd_message(FPSD_NO_EXIT, FPS_DEBUG, TOT_ITERS,
354 	    total_iterations, m_stat->m_num_cpus_to_test);
355 	fpsd.d_conf->total_iter = total_iterations;
356 }
357 
358 /*
359  * Calculates the time interval between the tests invocation in seconds.
360  * The goal is to complete once all iterations for all cpus in a 24hr
361  * period.
362  */
363 
364 static int
calculateTimeInterval()365 calculateTimeInterval()
366 {
367 	int total_iterations = fpsd.d_conf->total_iter;
368 	int intvl;
369 
370 	if (total_iterations <= 0) {
371 		fpsd_message(FPSD_EXIT_ERROR, FPS_WARNING,
372 		    FPSD_MIS_CALCULATIONS, total_iterations);
373 	}
374 	intvl = (24*60*60) / (total_iterations);
375 	fpsd.d_interval = intvl;
376 	return (1);
377 }
378 
379 /*
380  * Checks if a platform is supported by looking for the corresponding
381  * binary under /usr/lib/fps/ARCH/CPU_BRAND/fptest; (e.g) ARCH = sun4u,
382  * CPU_BRAND = UltraSPARC-III;
383  */
384 
385 static int
check_if_supported_CPU(char * cpu_brand,char * arch)386 check_if_supported_CPU(char *cpu_brand, char *arch)
387 {
388 	if ((NULL == cpu_brand) || (NULL == arch)) {
389 		return (0);
390 	}
391 	(void) snprintf(fps_tst_path, sizeof (fps_tst_path), "%s/%s/%s/%s",
392 	    FPS_DIR, arch, cpu_brand, FPS_FPUTST_NAME);
393 	fpsd_message(FPSD_NO_EXIT, FPS_DEBUG, FPTST_BIN_PTH, fps_tst_path);
394 	if (access(fps_tst_path, X_OK) == 0)
395 		return (1);
396 	else
397 		return (0);
398 }
399 
400 /*
401  * fpsd_probe(): probes system configuration and
402  * sets up the fpsd_t structure.
403  * Returns 0 on success, non-zero on failure.
404  *
405  */
406 static int
fpsd_probe(mach_conf_t * m_stat)407 fpsd_probe(mach_conf_t *m_stat)
408 {
409 	kstat_ctl_t *kstat_ctl;
410 	kstat_t *fps_kstat;
411 	kstat_named_t *kstat_cpu_name;
412 	kstat_named_t *kstat_cpu_freq;
413 	char *cpu_brand = NULL;
414 	int cpu_freq;
415 	int supported;
416 	int i;
417 	int cpuid_index;
418 
419 	processorid_t *cpuid_list = NULL;
420 	kid_t ret;
421 	int total_onln = sysconf(_SC_NPROCESSORS_ONLN);
422 
423 	/* probe the system and fill in mach_conf_t elements */
424 
425 	(void) sysinfo(SI_MACHINE, m_stat->m_machine,
426 	    sizeof (m_stat->m_machine) - 1);
427 
428 	if (1 == m_stat->m_reprobe) {
429 		/* Reprobe request */
430 		fpsd_message(FPSD_NO_EXIT, FPS_DEBUG, REPRBE_REQ);
431 		fpsd.d_iteration = 0;
432 		fpsd.d_interval = 0;
433 		fpsd.d_fpuid_index = 0;
434 		m_stat->m_num_on_fpuids = 0;
435 		m_stat->m_cpuids_size = 0;
436 		m_stat->total_iter = 0;
437 		m_stat->m_reprobe = 0;
438 		m_stat->m_num_cpus_to_test = 0;
439 
440 		if (NULL != fpsd.d_ignore_cpuid) {
441 			free(fpsd.d_ignore_cpuid);
442 		}
443 	}
444 
445 	/*
446 	 * Find number of online FPUs, and initialize
447 	 * m_stat->m_num_on_fpuids. Then collect kstat
448 	 * cpu_info for each.
449 	 */
450 
451 	cpuid_list = (processorid_t *)malloc(m_stat->m_num_fpus *
452 	    sizeof (processorid_t));
453 	if (NULL == cpuid_list) {
454 		fpsd_message(FPSD_NO_EXIT, FPS_INFO, LIBRARY_CALL_FAIL,
455 		    "malloc", strerror(errno));
456 		return (-1);
457 	}
458 	/* Initialize cpuid_list */
459 	for (i = 0; i < m_stat->m_num_fpus; i++) {
460 		cpuid_list[i] = -1;
461 	}
462 
463 	cpuid_index = 0;
464 	for (i = 0; i < m_stat->m_max_cpuid; i++) {
465 		if (p_online(i, P_STATUS) == P_ONLINE) {
466 			cpuid_list[cpuid_index++] = i;
467 		}
468 		if (cpuid_index == total_onln) {
469 			/* Break after all onln cpuids found */
470 			break;
471 		}
472 	}
473 	m_stat->m_num_on_fpuids = (uint_t)cpuid_index;
474 	fpsd_message(FPSD_NO_EXIT, FPS_DEBUG, NUM_ONLN_CPUS,
475 	    m_stat->m_num_on_fpuids);
476 
477 	/*
478 	 * Get cpu-brand info all valid cpuids using kstat.
479 	 * This is needed to take care
480 	 * of mixed cpu scenario
481 	 */
482 
483 	kstat_ctl = kstat_open();
484 	if (NULL == kstat_ctl) {
485 		fpsd_message(FPSD_NO_EXIT, FPS_WARNING, LIBRARY_CALL_FAIL,
486 		    "kstat_open", strerror(errno));
487 		free(cpuid_list);
488 		return (-1);
489 	}
490 
491 
492 	for (i = 0; i < m_stat->m_num_on_fpuids; i++) {
493 
494 		supported = 0;
495 		fps_kstat = NULL;
496 
497 		if ((cpuid_list[i] < 0) ||
498 		    (cpuid_list[i] >= m_stat->m_max_cpuid)) {
499 			fpsd_message(FPSD_NO_EXIT, FPS_INFO,
500 			    INVALID_CPUID, cpuid_list[i]);
501 			free(cpuid_list);
502 			return (-1);
503 		}
504 		fps_kstat = kstat_lookup(kstat_ctl, "cpu_info",
505 		    cpuid_list[i], NULL);
506 		if (NULL == fps_kstat) {
507 			fpsd_message(FPSD_NO_EXIT, FPS_INFO,
508 			    LIBRARY_CALL_FAIL, "kstat_lookup",
509 			    strerror(errno));
510 			(void) kstat_close(kstat_ctl);
511 			free(cpuid_list);
512 			return (-1);
513 		}
514 		ret = kstat_read(kstat_ctl, fps_kstat, NULL);
515 		if (ret != -1) {
516 			kstat_cpu_name = kstat_data_lookup(fps_kstat,
517 			    "brand");
518 			if (NULL != kstat_cpu_name) {
519 				cpu_brand = KSTAT_NAMED_STR_PTR(
520 				    kstat_cpu_name);
521 
522 				supported = check_if_supported_CPU(
523 				    cpu_brand, m_stat->m_machine);
524 			}
525 		} else {
526 			fpsd_message(FPSD_NO_EXIT, FPS_INFO,
527 			    CPU_BRAND_PROBE_FAIL, cpuid_list[i]);
528 			(void) kstat_close(kstat_ctl);
529 			free(cpuid_list);
530 			return (-1);
531 		}
532 		if (!supported) {
533 			fpsd_message(FPSD_NO_EXIT, FPS_INFO,
534 			    CPU_NOT_SUPPORTED, cpu_brand,
535 			    cpuid_list[i]);
536 			m_stat->m_cpus[i].disable_test = 1;
537 			(void) strcpy(m_stat->m_cpus[i].fptest_path, "");
538 		} else {
539 			m_stat->m_cpus[i].disable_test = 0;
540 			m_stat->m_num_cpus_to_test++;
541 			(void) strlcpy(m_stat->m_cpus[i].fptest_path,
542 			    fps_tst_path,
543 			    sizeof (m_stat->m_cpus[i].fptest_path));
544 		}
545 
546 		/* Get frequency */
547 
548 		kstat_cpu_freq = kstat_data_lookup(fps_kstat,
549 		    "clock_MHz");
550 		if (NULL != kstat_cpu_freq) {
551 			cpu_freq = (int)kstat_cpu_freq->value.l;
552 		} else {
553 			fpsd_message(FPSD_NO_EXIT, FPS_INFO,
554 			    FREQ_PROBE_FAIL, cpuid_list[i]);
555 			(void) kstat_close(kstat_ctl);
556 			free(cpuid_list);
557 			return (-1);
558 		}
559 
560 		m_stat->m_cpus[i].cpuid = cpuid_list[i];
561 		m_stat->m_cpus[i].frequency = cpu_freq;
562 		(void) strncpy(m_stat->m_cpus[i].brand, cpu_brand,
563 		    sizeof (m_stat->m_cpus[i].brand));
564 		m_stat->m_cpus[i].num_failures = 0;
565 
566 		fpsd_message(FPSD_NO_EXIT, FPS_DEBUG, CPU_INFO,
567 		    cpuid_list[i], m_stat->m_cpus[i].brand,
568 		    cpu_freq);
569 	}
570 	m_stat->m_cpuids_size = (int)m_stat->m_num_on_fpuids;
571 	fpsd_message(FPSD_NO_EXIT, FPS_DEBUG,
572 	    NUM_CPUS_2_TST, m_stat->m_cpuids_size);
573 	free(cpuid_list);
574 	(void) kstat_close(kstat_ctl);
575 	if (m_stat->m_num_cpus_to_test <= 0) {
576 		fpsd_message(FPSD_NO_EXIT, FPS_DEBUG,
577 		    FPSD_NO_CPUS_TO_TEST);
578 		return (-1);
579 	}
580 	return (0);
581 }
582 
583 /*
584  * returns 1 if cpuid is found in the list of cpus to be
585  * excluded from testing.
586  */
587 static int
ignore_cpu(int cpuid)588 ignore_cpu(int cpuid)
589 {
590 	int found = 0;
591 	int i;
592 	processorid_t   *ignore_cpus = fpsd.d_ignore_cpuid;
593 	for (i = 0; (i < fpsd.num_ignore_cpus) && (!found); i++) {
594 		if (ignore_cpus[i] == cpuid) {
595 			found = 1;
596 		}
597 	}
598 	return (found);
599 }
600 
601 /*
602  * This function checks if the string has contiguous valid
603  * digits. Leading and trailing blanks are O.K. Returns 0
604  * if string is not a valid integer and 1 if valid.
605  */
606 
607 static int
valid_integer(char * cpu_str)608 valid_integer(char *cpu_str)
609 {
610 	char *tmp_str = cpu_str;
611 
612 	if ((NULL == cpu_str) || (strlen(cpu_str) == 0)) {
613 		return (0);
614 	}
615 	while (*tmp_str) {
616 		if (isblank(*tmp_str)) {
617 			tmp_str++;
618 		} else if (isdigit(*tmp_str)) {
619 			break;
620 		} else {
621 			return (0);
622 		}
623 	}
624 	if (!(*tmp_str)) {
625 		return (0);
626 	}
627 	while (*tmp_str) {
628 		if (isdigit(*tmp_str)) {
629 			tmp_str++;
630 		} else if (isblank(*tmp_str)) {
631 			break;
632 		} else
633 			return (0);
634 	}
635 	if (*tmp_str) {
636 		while (*tmp_str) {
637 			if (isblank(*tmp_str)) {
638 				tmp_str++;
639 			}
640 			else
641 				return (0);
642 		}
643 	}
644 	return (1);
645 }
646 
647 /*
648  * This function parses the string of cpu-ids separated by
649  * "," , constructs the list and disables testing on those
650  * cpus. This function assumes fpsd_probe has been called and all
651  * the machine config info is available in structure fpsd.
652  */
653 
654 static int
parse_and_set_cpu_id_list(char * strCPUs)655 parse_and_set_cpu_id_list(char *strCPUs)
656 {
657 	char *last;
658 	int num_cpus = 0, invalid = 0;
659 	int *tmp_cpus;
660 	int num_cpus_to_test = 0;
661 	int i;
662 	int t_cpuid;
663 	char *cpu_id;
664 	static int first_time = 1;
665 
666 	tmp_cpus = (int *)malloc((int)fpsd.d_conf->m_num_fpus * sizeof (int));
667 	if (NULL == tmp_cpus)
668 		return (-1);
669 	cpu_id = strtok_r(strCPUs, ",", &last);
670 
671 	while ((NULL != cpu_id) && (!invalid)) {
672 		if (valid_integer(cpu_id)) {
673 			tmp_cpus[num_cpus++] =
674 			    (int)strtol(cpu_id, (char **)NULL, 10);
675 			cpu_id = strtok_r(NULL, ",", &last);
676 		} else {
677 			fpsd_message(FPSD_NO_EXIT, FPS_INFO,
678 			    INVAL_PROP_VALUE, cpu_id);
679 			invalid = 1;
680 		}
681 		if (num_cpus > fpsd.d_conf->m_num_fpus) {
682 			/* More than max supported cpus */
683 			fpsd_message(FPSD_NO_EXIT, FPS_INFO,
684 			    INVAL_PROP_VALUE, strCPUs);
685 			invalid = 1;
686 		}
687 	}
688 	if ((!invalid) && (num_cpus > 0)) {
689 		fpsd.d_ignore_cpuid = (processorid_t *)malloc(
690 		    sizeof (processorid_t) * (int) num_cpus);
691 		if (NULL != fpsd.d_ignore_cpuid) {
692 			for (i = 0; i < num_cpus; i++) {
693 				fpsd.d_ignore_cpuid[i] = tmp_cpus[i];
694 			}
695 			fpsd.num_ignore_cpus = num_cpus;
696 		} else {
697 			fpsd.num_ignore_cpus = 0;
698 		}
699 	} else {
700 		fpsd.d_ignore_cpuid = NULL;
701 		fpsd.num_ignore_cpus = 0;
702 	}
703 	free(tmp_cpus);
704 	fpsd_message(FPSD_NO_EXIT, FPS_DEBUG, NUM_IGN_CPUS,
705 	    fpsd.num_ignore_cpus);
706 	if ((fpsd.num_ignore_cpus > 0) && (fpsd.d_conf->m_cpuids_size > 0)) {
707 		for (i = 0; i < fpsd.d_conf->m_cpuids_size; i++) {
708 			t_cpuid = fpsd.d_conf->m_cpus[i].cpuid;
709 			if (ignore_cpu(t_cpuid)) {
710 				fpsd.d_conf->m_cpus[i].disable_test = 1;
711 				fpsd_message(FPSD_NO_EXIT, FPS_DEBUG,
712 				    IGN_CPUS, t_cpuid);
713 			} else {
714 				num_cpus_to_test++;
715 			}
716 		}
717 		fpsd.d_conf->m_num_cpus_to_test = num_cpus_to_test;
718 		if (num_cpus_to_test <= 0)  {
719 			if (1 == first_time) {
720 				fpsd_message(FPSD_NO_EXIT, FPS_INFO,
721 				    ALL_CPUS_EXCLDED);
722 				first_time = 0;
723 			} else {
724 				fpsd_message(FPSD_NO_EXIT, FPS_INFO,
725 				    ALL_CPUS_EXCLDED);
726 			}
727 			return (NO_CPUS_2_TEST);
728 		}
729 	}
730 	first_time = 1;
731 	return (0);
732 }
733 
734 #define	CLEAN_UP_SCF_STUFF	{	\
735 	if (scf_handle_p) {	\
736 		(void) scf_handle_unbind(scf_handle_p);	\
737 		scf_handle_destroy(scf_handle_p);	\
738 	}	\
739 	if (inst)	\
740 		scf_instance_destroy(inst);	\
741 	if (pg)	\
742 		scf_pg_destroy(pg);	\
743 	if (scf_prop_p)	\
744 		scf_property_destroy(scf_prop_p);	\
745 	if (value)	\
746 		scf_value_destroy(value);	\
747 	if (scf_snapshot_p)	\
748 		scf_snapshot_destroy(scf_snapshot_p);	\
749 }
750 
751 /* Read properties from SMF configuration repository using libscf APIs */
752 
753 static int
read_conf_props()754 read_conf_props()
755 {
756 	scf_handle_t *scf_handle_p;
757 	scf_property_t *scf_prop_p = NULL;
758 	scf_instance_t *inst = NULL;
759 	scf_propertygroup_t *pg = NULL;
760 	scf_value_t	*value = NULL;
761 	int ret_val = -1;
762 	int val;
763 	int64_t intvl;
764 	int name_len;
765 	char *strCPUs;
766 	scf_snapshot_t *scf_snapshot_p = NULL;
767 
768 	scf_handle_p = scf_handle_create(SCF_VERSION);
769 	if ((NULL != scf_handle_p) && (NULL != str_fps_fmri)) {
770 		if (scf_handle_bind(scf_handle_p) != -1) {
771 			inst = scf_instance_create(scf_handle_p);
772 			pg = scf_pg_create(scf_handle_p);
773 			scf_prop_p = scf_property_create(scf_handle_p);
774 			if ((NULL == inst) || (NULL == pg) ||
775 			    (NULL == scf_prop_p)) {
776 				fpsd_message(FPSD_NO_EXIT, FPS_DEBUG,
777 				    CREATE_FAIL,
778 				    scf_strerror(scf_error()));
779 				CLEAN_UP_SCF_STUFF
780 				return (-1);
781 			}
782 			val = scf_handle_decode_fmri(scf_handle_p,
783 			    str_fps_fmri,
784 			    NULL, NULL, inst, pg, scf_prop_p, 0);
785 			if (val != 0) {
786 				fpsd_message(FPSD_NO_EXIT, FPS_DEBUG,
787 				    HANDLE_DECODE_FAIL,
788 				    scf_strerror(scf_error()));
789 				CLEAN_UP_SCF_STUFF
790 				return (-1);
791 			}
792 			scf_snapshot_p = scf_snapshot_create(scf_handle_p);
793 			if (NULL == scf_snapshot_p) {
794 				fpsd_message(FPSD_NO_EXIT, FPS_DEBUG,
795 				    SNAPSHOT_CREAT_FAIL,
796 				    scf_strerror(scf_error()));
797 				CLEAN_UP_SCF_STUFF
798 				return (-1);
799 			}
800 			val = scf_instance_get_snapshot(inst,
801 			    SMF_SNAPSHOT_RUNNING, scf_snapshot_p);
802 			if (val == -1) {
803 				fpsd_message(FPSD_NO_EXIT, FPS_DEBUG,
804 				    INST_SNAPSHOT_GET_FAIL,
805 				    scf_strerror(scf_error()));
806 				CLEAN_UP_SCF_STUFF
807 				return (-1);
808 			}
809 			val = scf_instance_get_pg_composed(inst, scf_snapshot_p,
810 			    SMF_FPS_PROP_GRP_NAME, pg);
811 			if (val != 0) {
812 				fpsd_message(FPSD_NO_EXIT, FPS_DEBUG,
813 				    INSTANCE_PG_GET_FAIL,
814 				    scf_strerror(scf_error()));
815 				CLEAN_UP_SCF_STUFF
816 				return (-1);
817 			}
818 			val = scf_pg_get_property(pg, SMF_PROP_INTVL,
819 			    scf_prop_p);
820 			/* Read interval property if defined */
821 			if (val == 0) {
822 				value = scf_value_create(scf_handle_p);
823 				if (NULL != value) {
824 					val = scf_property_get_value(scf_prop_p,
825 					    value);
826 					if (0 == val) {
827 						val =
828 						    scf_value_get_integer(value,
829 						    &intvl);
830 						if ((0 == val) && (intvl > 0)) {
831 							fpsd.d_interval =
832 							    (int)intvl;
833 							fpsd_message(
834 							    FPSD_NO_EXIT,
835 							    FPS_DEBUG,
836 							    INTVL_VAL, intvl);
837 							ret_val = 0;
838 						}
839 					}
840 				}
841 			} else {
842 				fpsd_message(FPSD_NO_EXIT, FPS_INFO,
843 				    PROP_UNDEFINED, SMF_PROP_INTVL,
844 				    scf_strerror(scf_error()));
845 			}
846 			/*
847 			 * Read property "exclude_cpus" if defined - this is
848 			 * the array of cpu-ids to be excluded from testing.
849 			 */
850 			val = scf_pg_get_property(pg, SMF_PROP_EXCLD_CPUS,
851 			    scf_prop_p);
852 			if (val == 0) {
853 				val = scf_property_get_value(scf_prop_p, value);
854 				if (0 == val) {
855 					name_len =
856 					    scf_limit(
857 					    SCF_LIMIT_MAX_NAME_LENGTH);
858 					strCPUs = malloc(name_len +1);
859 					if (NULL == strCPUs) {
860 						fpsd_message(FPSD_NO_EXIT,
861 						    FPS_DEBUG,
862 						    LIBRARY_CALL_FAIL,
863 						    "malloc");
864 						CLEAN_UP_SCF_STUFF
865 						return (-1);
866 					}
867 					val = scf_value_get_astring(value,
868 					    strCPUs, name_len+1);
869 					if ((val != -1) &&
870 					    (strlen(strCPUs) > 0)) {
871 						fpsd_message(FPSD_NO_EXIT,
872 						    FPS_DEBUG,
873 						    EXCL_CPUS, strCPUs);
874 						ret_val =
875 						    parse_and_set_cpu_id_list(
876 						    strCPUs);
877 					}
878 					free(strCPUs);
879 				}
880 			} else {
881 				fpsd_message(FPSD_NO_EXIT, FPS_DEBUG,
882 				    PROP_UNDEFINED,
883 				    SMF_PROP_EXCLD_CPUS,
884 				    scf_strerror(scf_error()));
885 			}
886 		}
887 	}
888 
889 	/* Clean up */
890 
891 	CLEAN_UP_SCF_STUFF
892 	return (ret_val);
893 }
894 
895 static int
fpsd_init()896 fpsd_init()
897 {
898 	mach_conf_t *m_conf_p;
899 
900 	debug_level = DFLT_DBG_LVL;
901 	fpsd.d_fg = 0;
902 	fpsd.d_daemon = 0;
903 	fpsd.d_ignore_cpuid = NULL;
904 	fpsd.d_iteration = 0;
905 	fpsd.d_interval = 0;
906 	fpsd.d_fpuid_index = 0;
907 	fpsd.d_rootdir = "/";
908 	fpsd.d_pid = getpid();
909 	fpsd.d_conf = &fpsd_conf;
910 	fpsd.d_ts_hup = 0;
911 
912 	m_conf_p = fpsd.d_conf;
913 	m_conf_p->m_machine[0] = '\0';
914 	m_conf_p->m_num_on_fpuids = 0;
915 	m_conf_p->m_cpuids_size = 0;
916 	m_conf_p->total_iter = 0;
917 	m_conf_p->m_reprobe = 0;
918 	m_conf_p->m_num_cpus_to_test = 0;
919 	m_conf_p->m_num_fpus = (uint_t)sysconf(_SC_NPROCESSORS_MAX);
920 
921 	(void) pthread_mutex_init(&log_mutex, NULL);
922 
923 	m_conf_p->m_max_cpuid = (int)sysconf(_SC_CPUID_MAX) + 1;
924 
925 	/*
926 	 * Allocate enough memory to accomodate maximum number of CPUs
927 	 * supported by this platform.
928 	 */
929 	m_conf_p->m_cpus = malloc(sizeof (fps_cpu_t) *
930 	    m_conf_p->m_num_fpus);
931 	if (NULL == m_conf_p->m_cpus)
932 		return (1);
933 	else
934 		return (0);
935 
936 }
937 
938 static void
fpsd_fini()939 fpsd_fini()
940 {
941 	if (fpsd.d_ignore_cpuid)
942 		free(fpsd.d_ignore_cpuid);
943 	if (fpsd.d_conf->m_cpus)
944 		free(fpsd.d_conf->m_cpus);
945 }
946 
947 static int
fpsd_probe_config()948 fpsd_probe_config()
949 {
950 	int smf_invoked = 0;
951 	int ret = 0;
952 
953 	/*
954 	 * Use smf_get_state to get the status of the service to see
955 	 * if the status is "online" by now. If so, read the proper-
956 	 * ties defined using SCF.
957 	 */
958 
959 	if (NULL != str_fps_fmri) {
960 		smf_invoked = 1;
961 
962 		/* Read SMF properties if invoked thro' SMF */
963 		ret = read_conf_props();
964 		if (ret == NO_CPUS_2_TEST) {
965 			return (ret);
966 		}
967 	}
968 	calculateTotalIterations(fpsd.d_conf);
969 	if ((ret == -1) || (!smf_invoked) || (fpsd.d_interval <= 0)) {
970 		ret = calculateTimeInterval();
971 		fpsd_message(FPSD_NO_EXIT, FPS_DEBUG,
972 		    PRINT_INTVL, fpsd.d_interval);
973 		if ((ret != 1) || (fpsd.d_interval <= 0)) {
974 			return (ZERO_INTERVAL);
975 		}
976 	}
977 	return (0);
978 }
979 
980 int
main(int argc,char ** argv)981 main(int argc, char **argv)
982 {
983 
984 	int sig;
985 	sigset_t  sigs;
986 	/* Pipe fd to write the status back to parent after becoming daemon */
987 	int pfd = -1;
988 	int status = FPSD_INIT_SUCCESS;
989 	char rcvsigstr[32];
990 	int c;
991 	int ret;
992 	struct rlimit rlim;
993 	char path[MAXPATHLEN];
994 	int probe_status = -1;
995 	const char *progname;
996 	struct	sigaction	act;
997 
998 	progname = strrchr(argv[0], '/');
999 	if (NULL != progname)
1000 		progname++;
1001 	else
1002 		progname = argv[0];
1003 
1004 #ifndef TEXT_DOMAIN		/* Should be defined via Makefile */
1005 #define	TEXT_DOMAIN  "SUNW_FPS"
1006 #endif
1007 
1008 	(void) setlocale(LC_ALL, "");
1009 	(void) textdomain(TEXT_DOMAIN);
1010 
1011 	openlog(FPS_DAEMON_NAME, LOG_PID, LOG_DAEMON);
1012 
1013 	if (fpsd_init()) {
1014 		fpsd_message(FPSD_EXIT_ERROR, FPS_ERROR, DAEMON_INIT_FAIL);
1015 	}
1016 
1017 	/*
1018 	 * Set our per-process core file path to leave core files in
1019 	 * var/fps/core directory, named after the PID to aid in
1020 	 * debugging, and make sure that there is no restriction on core
1021 	 * file size.
1022 	 */
1023 
1024 	if ((ret = access(FPS_CORE_DIR, W_OK)) != 0) {
1025 		if ((ret = mkdirp(FPS_CORE_DIR, 0755)) != 0) {
1026 			fpsd_message(FPSD_NO_EXIT, FPS_ERROR,
1027 			    CORE_DIR_CREATION_FAILED,
1028 			    FPS_CORE_DIR, strerror(errno));
1029 		}
1030 	}
1031 
1032 	if (ret == 0) {
1033 		(void) snprintf(path, sizeof (path), "%s/core.%s.%%p",
1034 		    FPS_CORE_DIR, progname);
1035 		(void) core_set_process_path(path, strlen(path) + 1,
1036 		    fpsd.d_pid);
1037 		rlim.rlim_cur = RLIM_INFINITY;
1038 		rlim.rlim_max = RLIM_INFINITY;
1039 
1040 		(void) setrlimit(RLIMIT_CORE, &rlim);
1041 	}
1042 
1043 
1044 	/*  parse arguments */
1045 	while ((c = getopt(argc, argv, "dl:")) != EOF) {
1046 		switch (c) {
1047 		case 'd':
1048 			fpsd.d_fg = 1;
1049 			break;
1050 
1051 		case 'l':
1052 			debug_level = atoi(optarg);
1053 			if (debug_level < 0)
1054 				debug_level = DFLT_DBG_LVL;
1055 			break;
1056 
1057 		default:
1058 			fpsd_message(FPSD_EXIT_USAGE, FPS_ERROR, USAGE_MSG,
1059 			    progname);
1060 			break;
1061 		}
1062 	}
1063 
1064 
1065 	/*
1066 	 * Reset all of our privilege sets to the minimum set of required
1067 	 * privileges.  We continue to run as root so that files we create
1068 	 * such as logs and checkpoints are secured in the /var
1069 	 * filesystem.
1070 	 */
1071 	if (__init_daemon_priv(PU_RESETGROUPS | PU_LIMITPRIVS | PU_INHERITPRIVS,
1072 	    0, 0, /* run as uid 0 and gid 0 */
1073 	    PRIV_FILE_DAC_EXECUTE, PRIV_FILE_DAC_READ, PRIV_FILE_DAC_SEARCH,
1074 	    PRIV_FILE_DAC_WRITE, PRIV_FILE_OWNER, PRIV_PROC_OWNER,
1075 	    PRIV_PROC_PRIOCNTL, PRIV_SYS_ADMIN, PRIV_SYS_CONFIG,
1076 	    PRIV_SYS_DEVICES, PRIV_SYS_RES_CONFIG,
1077 	    PRIV_NET_PRIVADDR, NULL) != 0) {
1078 
1079 		(void) fpsd_message(FPSD_EXIT_ERROR, FPS_ERROR,
1080 		    INSUFFICIENT_PRIVS, progname);
1081 	}
1082 
1083 
1084 	if (!fpsd.d_fg) {   /* Now become daemon */
1085 		pfd = become_daemon_init();
1086 	} else {
1087 		(void) chdir(FPS_DIR);
1088 	}
1089 
1090 	if (daemon_exists()) {
1091 		/*
1092 		 * If another instance of fpsd daemon is already running;
1093 		 * exit. Should not clean up door file
1094 		 */
1095 		fpsd_message(FPSD_EXIT_ERROR, FPS_ERROR,
1096 		    DAEMON_ALREADY_RUNNING);
1097 	}
1098 
1099 	/*
1100 	 * Setup door prevents any more instances of fpsd from running.
1101 	 */
1102 	if (fps_setup_door() == -1) {
1103 		fpsd_message(FPSD_EXIT_ERROR, FPS_ERROR, DOOR_SETUP_FAIL);
1104 	}
1105 
1106 	/*
1107 	 * Some desktop platforms satisfy E* guidelines. Here CPU power
1108 	 * management is enabled by default. The scheduling algorithms
1109 	 * change on these platforms to not to do testing on idle system
1110 	 * to save power.
1111 	 */
1112 	init_estar_db();    /* Initialize Estar config data base */
1113 	/* Print message on CPU E* enabled system */
1114 	if (is_estar_system)
1115 		fpsd_message(FPSD_NO_EXIT, FPS_DEBUG, ESTAR_INFO);
1116 
1117 	if ((probe_status = fpsd_probe(fpsd.d_conf)) != 0) {
1118 		(void) fpsd_message(FPSD_NO_EXIT, FPS_DEBUG,
1119 		    UNSUPPORTED_SYSTEM);
1120 	}
1121 
1122 	if (!fpsd.d_fg) {   /* Complete daemonize proces */
1123 
1124 		fpsd.d_daemon = 1;
1125 		/*
1126 		 * Now that we're running, if a pipe fd was specified, write an
1127 		 * exit status to it to indicate that our parent process can
1128 		 * safely detach.
1129 		 */
1130 		if (pfd >= 0) {
1131 			(void) write(pfd, &status, sizeof (status));
1132 		}
1133 		become_daemon_fini(pfd);
1134 
1135 	} else {
1136 		/*
1137 		 * Mask all signals before creating sched thread. We will
1138 		 * unmask selective siganls from main thread. This ensures
1139 		 * that only main thread handles signals. This is done in
1140 		 * become_daemon() if we had to daemonize.
1141 		 */
1142 
1143 		(void) sigfillset(&sigs);
1144 		(void) sigprocmask(SIG_BLOCK, &sigs, NULL);
1145 	}
1146 
1147 	/*
1148 	 * Give some time for SMF to read the exit status
1149 	 * of parent and update fpsd fmri state
1150 	 */
1151 	(void) poll(NULL, 0, 3*1000);
1152 
1153 	str_fps_fmri = getenv("SMF_FMRI");
1154 	if (NULL != str_fps_fmri) {
1155 		fpsd_message(FPSD_NO_EXIT, FPS_DEBUG, SMF_INVKD, str_fps_fmri);
1156 	} else {
1157 		fpsd_message(FPSD_NO_EXIT, FPS_DEBUG, CLI_INVKD);
1158 	}
1159 
1160 	if (probe_status != 0) {
1161 		/* Exit child proces too */
1162 
1163 		if (NULL != str_fps_fmri) {
1164 			const char *smf_state;
1165 			ret =  smf_disable_instance(str_fps_fmri,
1166 			    SMF_TEMPORARY);
1167 			if (0 == ret) {
1168 				(void) fpsd_message(FPSD_NO_EXIT,
1169 				    FPS_DEBUG, FPSD_STATE);
1170 			} else {
1171 				/* Unable to disable the service. */
1172 				smf_state = smf_get_state(str_fps_fmri);
1173 				if (NULL == smf_state) {
1174 					smf_state = " ";
1175 					(void) fpsd_message(FPSD_NO_EXIT,
1176 					    FPS_ERROR, DISABLE_SVC_FAILED,
1177 					    smf_state);
1178 				}
1179 				(void) poll(NULL, 0, 3*1000);
1180 			}
1181 		}
1182 		terminate_process();
1183 		_exit(FPSD_EXIT_ERROR);
1184 	}
1185 
1186 	act.sa_sigaction = sig_hup_handler;
1187 	(void) sigemptyset(&act.sa_mask);
1188 	act.sa_flags = SA_SIGINFO;
1189 	(void) sigaction(SIGHUP, &act, NULL);
1190 	fpsd_read_config();
1191 	(void) sigfillset(&sigs);
1192 	(void) sigprocmask(SIG_BLOCK, &sigs, NULL);
1193 
1194 	/*
1195 	 * On estar-systems, if interval < MIN_INTERVAL, scheduling tests will
1196 	 * reset the idle counter and prevent system from going to sleep.
1197 	 * To  avoid this, setting interval to MIN_INTERVAL.
1198 	 */
1199 
1200 	if ((is_estar_system) && (fpsd.d_interval < MIN_INTERVAL)) {
1201 		fpsd.d_interval = MIN_INTERVAL;
1202 		fpsd_message(FPSD_NO_EXIT, FPS_DEBUG, MIN_INTERVAL_MSG,
1203 		    fpsd.d_interval, MIN_INTERVAL);
1204 	}
1205 
1206 	/* Run scheduling thread */
1207 	if ((ret == 0) && thr_create(NULL, 0,
1208 	    test_fpu_thr, (void *) NULL, THR_BOUND, NULL) != 0) {
1209 		fpsd_message(FPSD_EXIT_ERROR, FPS_WARNING, THR_CREATION_FAIL);
1210 	}
1211 
1212 	/*
1213 	 * We unmask selective signals here. Besides terminating on
1214 	 * SIGINT & SIGTERM, we handle SIGHUP that is used to cause
1215 	 * daemon to re-read the SMF properties.
1216 	 */
1217 	(void) sigemptyset(&sigs);
1218 	(void) sigaddset(&sigs, SIGINT);
1219 	(void) sigaddset(&sigs, SIGTERM);
1220 	(void) sigaddset(&sigs, SIGHUP);
1221 	(void) sigprocmask(SIG_UNBLOCK, &sigs, NULL);
1222 
1223 	for (;;) {
1224 		(void) sigwait(&sigs, &sig);
1225 		(void) sig2str(sig, rcvsigstr);
1226 
1227 		if (sig != -1) {
1228 			fpsd_message(FPSD_NO_EXIT, FPS_INFO,
1229 			    SIGNAL_INFO, rcvsigstr, sig);
1230 			switch (sig) {
1231 				case SIGINT:
1232 				case SIGTERM:
1233 					terminate_process();
1234 					_exit(FPSD_EXIT_ERROR);
1235 					break;
1236 				case SIGHUP:
1237 					fpsd.d_ts_hup = gethrtime();
1238 					break;
1239 				default: break;
1240 			}
1241 		}
1242 	}
1243 #pragma error_messages(off, E_STATEMENT_NOT_REACHED)
1244 	/* NOTREACHED */
1245 	return (0);
1246 }
1247