xref: /onnv-gate/usr/src/cmd/svc/startd/wait.c (revision 111:347ecf011416)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
50Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
60Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
70Sstevel@tonic-gate  * with the License.
80Sstevel@tonic-gate  *
90Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
100Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
110Sstevel@tonic-gate  * See the License for the specific language governing permissions
120Sstevel@tonic-gate  * and limitations under the License.
130Sstevel@tonic-gate  *
140Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
150Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
160Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
170Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
180Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
190Sstevel@tonic-gate  *
200Sstevel@tonic-gate  * CDDL HEADER END
210Sstevel@tonic-gate  */
220Sstevel@tonic-gate /*
23*111Srm88369  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
240Sstevel@tonic-gate  * Use is subject to license terms.
250Sstevel@tonic-gate  */
260Sstevel@tonic-gate 
270Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
280Sstevel@tonic-gate 
290Sstevel@tonic-gate /*
300Sstevel@tonic-gate  * wait.c - asynchronous monitoring of "wait registered" start methods
310Sstevel@tonic-gate  *
320Sstevel@tonic-gate  * Use event ports to poll on the set of fds representing the /proc/[pid]/psinfo
330Sstevel@tonic-gate  * files.  If one of these fds returns an event, then we inform the restarter
340Sstevel@tonic-gate  * that it has stopped.
350Sstevel@tonic-gate  *
360Sstevel@tonic-gate  * The wait_info_list holds the series of processes currently being monitored
370Sstevel@tonic-gate  * for exit.  The wi_fd member, which contains the file descriptor of the psinfo
380Sstevel@tonic-gate  * file being polled upon ("event ported upon"), will be set to -1 if the file
390Sstevel@tonic-gate  * descriptor is inactive (already closed or not yet opened).
400Sstevel@tonic-gate  */
410Sstevel@tonic-gate 
420Sstevel@tonic-gate #ifdef _FILE_OFFSET_BITS
430Sstevel@tonic-gate #undef _FILE_OFFSET_BITS
440Sstevel@tonic-gate #endif /* _FILE_OFFSET_BITS */
450Sstevel@tonic-gate 
460Sstevel@tonic-gate #include <sys/resource.h>
470Sstevel@tonic-gate #include <sys/stat.h>
480Sstevel@tonic-gate #include <sys/types.h>
490Sstevel@tonic-gate #include <sys/uio.h>
500Sstevel@tonic-gate #include <sys/wait.h>
510Sstevel@tonic-gate 
520Sstevel@tonic-gate #include <assert.h>
530Sstevel@tonic-gate #include <errno.h>
540Sstevel@tonic-gate #include <fcntl.h>
550Sstevel@tonic-gate #include <libuutil.h>
560Sstevel@tonic-gate #include <poll.h>
570Sstevel@tonic-gate #include <port.h>
580Sstevel@tonic-gate #include <pthread.h>
590Sstevel@tonic-gate #include <procfs.h>
600Sstevel@tonic-gate #include <string.h>
610Sstevel@tonic-gate #include <stropts.h>
620Sstevel@tonic-gate #include <unistd.h>
630Sstevel@tonic-gate 
640Sstevel@tonic-gate #include "startd.h"
650Sstevel@tonic-gate 
660Sstevel@tonic-gate #define	WAIT_FILES	262144		/* reasonably high maximum */
670Sstevel@tonic-gate 
680Sstevel@tonic-gate static int port_fd;
690Sstevel@tonic-gate static scf_handle_t *wait_hndl;
700Sstevel@tonic-gate static struct rlimit init_fd_rlimit;
710Sstevel@tonic-gate 
720Sstevel@tonic-gate static uu_list_pool_t *wait_info_pool;
730Sstevel@tonic-gate static uu_list_t *wait_info_list;
740Sstevel@tonic-gate 
750Sstevel@tonic-gate static pthread_mutex_t wait_info_lock;
760Sstevel@tonic-gate 
770Sstevel@tonic-gate /*
780Sstevel@tonic-gate  * void wait_remove(wait_info_t *, int)
790Sstevel@tonic-gate  *   Remove the given wait_info structure from our list, performing various
800Sstevel@tonic-gate  *   cleanup operations along the way.  If the direct flag is false (meaning
810Sstevel@tonic-gate  *   that we are being called with from restarter instance list context), then
820Sstevel@tonic-gate  *   notify the restarter that the associated instance has exited.
830Sstevel@tonic-gate  *
840Sstevel@tonic-gate  *   Since we may no longer be the startd that started this process, we only are
850Sstevel@tonic-gate  *   concerned with a waitpid(3C) failure if the wi_parent field is non-zero.
860Sstevel@tonic-gate  */
870Sstevel@tonic-gate static void
880Sstevel@tonic-gate wait_remove(wait_info_t *wi, int direct)
890Sstevel@tonic-gate {
900Sstevel@tonic-gate 	int status;
910Sstevel@tonic-gate 
920Sstevel@tonic-gate 	if (waitpid(wi->wi_pid, &status, 0) == -1) {
930Sstevel@tonic-gate 		if (wi->wi_parent)
940Sstevel@tonic-gate 			log_framework(LOG_INFO,
950Sstevel@tonic-gate 			    "instance %s waitpid failure: %s\n", wi->wi_fmri,
960Sstevel@tonic-gate 			    strerror(errno));
970Sstevel@tonic-gate 	} else {
980Sstevel@tonic-gate 		if (WEXITSTATUS(status) != 0) {
990Sstevel@tonic-gate 			log_framework(LOG_NOTICE,
1000Sstevel@tonic-gate 			    "instance %s exited with status %d\n", wi->wi_fmri,
1010Sstevel@tonic-gate 			    WEXITSTATUS(status));
1020Sstevel@tonic-gate 		}
1030Sstevel@tonic-gate 	}
1040Sstevel@tonic-gate 
1050Sstevel@tonic-gate 	MUTEX_LOCK(&wait_info_lock);
106*111Srm88369 	if (wi->wi_fd != -1) {
107*111Srm88369 		startd_close(wi->wi_fd);
108*111Srm88369 		wi->wi_fd = -1;
109*111Srm88369 	}
1100Sstevel@tonic-gate 	uu_list_remove(wait_info_list, wi);
1110Sstevel@tonic-gate 	MUTEX_UNLOCK(&wait_info_lock);
1120Sstevel@tonic-gate 
1130Sstevel@tonic-gate 	/*
1140Sstevel@tonic-gate 	 * Make an attempt to clear out any utmpx record associated with this
1150Sstevel@tonic-gate 	 * PID.
1160Sstevel@tonic-gate 	 */
1170Sstevel@tonic-gate 	utmpx_mark_dead(wi->wi_pid, status, B_FALSE);
1180Sstevel@tonic-gate 
1190Sstevel@tonic-gate 	if (!direct) {
1200Sstevel@tonic-gate 		/*
1210Sstevel@tonic-gate 		 * Bind wait_hndl lazily.
1220Sstevel@tonic-gate 		 */
1230Sstevel@tonic-gate 		if (wait_hndl == NULL) {
1240Sstevel@tonic-gate 			for (wait_hndl =
1250Sstevel@tonic-gate 			    libscf_handle_create_bound(SCF_VERSION);
1260Sstevel@tonic-gate 			    wait_hndl == NULL;
1270Sstevel@tonic-gate 			    wait_hndl =
1280Sstevel@tonic-gate 			    libscf_handle_create_bound(SCF_VERSION)) {
1290Sstevel@tonic-gate 				log_error(LOG_INFO, "[wait_remove] Unable to "
1300Sstevel@tonic-gate 				    "bind a new repository handle: %s\n",
1310Sstevel@tonic-gate 				    scf_strerror(scf_error()));
1320Sstevel@tonic-gate 				(void) sleep(2);
1330Sstevel@tonic-gate 			}
1340Sstevel@tonic-gate 		}
1350Sstevel@tonic-gate 
1360Sstevel@tonic-gate 		log_framework(LOG_DEBUG,
1370Sstevel@tonic-gate 		    "wait_remove requesting stop of %s\n", wi->wi_fmri);
1380Sstevel@tonic-gate 		(void) stop_instance_fmri(wait_hndl, wi->wi_fmri, RSTOP_EXIT);
1390Sstevel@tonic-gate 	}
1400Sstevel@tonic-gate 
1410Sstevel@tonic-gate 	uu_list_node_fini(wi, &wi->wi_link, wait_info_pool);
1420Sstevel@tonic-gate 	startd_free(wi, sizeof (wait_info_t));
1430Sstevel@tonic-gate }
1440Sstevel@tonic-gate 
1450Sstevel@tonic-gate /*
1460Sstevel@tonic-gate  * int wait_register(pid_t, char *, int, int)
1470Sstevel@tonic-gate  *   wait_register is called after we have called fork(2), and know which pid we
1480Sstevel@tonic-gate  *   wish to monitor.  However, since the child may have already exited by the
1490Sstevel@tonic-gate  *   time we are called, we must handle the error cases from open(2)
1500Sstevel@tonic-gate  *   appropriately.  The am_parent flag is recorded to handle waitpid(2)
1510Sstevel@tonic-gate  *   behaviour on removal; similarly, the direct flag is passed through to a
1520Sstevel@tonic-gate  *   potential call to wait_remove() to govern its behaviour in different
1530Sstevel@tonic-gate  *   contexts.
1540Sstevel@tonic-gate  *
1550Sstevel@tonic-gate  *   Returns 0 if registration successful, 1 if child pid did not exist, and -1
1560Sstevel@tonic-gate  *   if a different error occurred.
1570Sstevel@tonic-gate  */
1580Sstevel@tonic-gate int
1590Sstevel@tonic-gate wait_register(pid_t pid, const char *inst_fmri, int am_parent, int direct)
1600Sstevel@tonic-gate {
1610Sstevel@tonic-gate 	char *fname = uu_msprintf("/proc/%ld/psinfo", pid);
1620Sstevel@tonic-gate 	int fd;
1630Sstevel@tonic-gate 	wait_info_t *wi;
1640Sstevel@tonic-gate 
1650Sstevel@tonic-gate 	assert(pid != 0);
1660Sstevel@tonic-gate 
1670Sstevel@tonic-gate 	if (fname == NULL)
1680Sstevel@tonic-gate 		return (-1);
1690Sstevel@tonic-gate 
1700Sstevel@tonic-gate 	wi = startd_alloc(sizeof (wait_info_t));
1710Sstevel@tonic-gate 
1720Sstevel@tonic-gate 	uu_list_node_init(wi, &wi->wi_link, wait_info_pool);
1730Sstevel@tonic-gate 
1740Sstevel@tonic-gate 	wi->wi_fd = -1;
1750Sstevel@tonic-gate 	wi->wi_pid = pid;
1760Sstevel@tonic-gate 	wi->wi_fmri = inst_fmri;
1770Sstevel@tonic-gate 	wi->wi_parent = am_parent;
1780Sstevel@tonic-gate 
1790Sstevel@tonic-gate 	MUTEX_LOCK(&wait_info_lock);
1800Sstevel@tonic-gate 	(void) uu_list_insert_before(wait_info_list, NULL, wi);
1810Sstevel@tonic-gate 	MUTEX_UNLOCK(&wait_info_lock);
1820Sstevel@tonic-gate 
1830Sstevel@tonic-gate 	if ((fd = open(fname, O_RDONLY)) == -1) {
1840Sstevel@tonic-gate 		if (errno == ENOENT) {
1850Sstevel@tonic-gate 			/*
1860Sstevel@tonic-gate 			 * Child has already exited.
1870Sstevel@tonic-gate 			 */
1880Sstevel@tonic-gate 			wait_remove(wi, direct);
1890Sstevel@tonic-gate 			uu_free(fname);
1900Sstevel@tonic-gate 			return (1);
1910Sstevel@tonic-gate 		} else {
1920Sstevel@tonic-gate 			log_error(LOG_WARNING,
1930Sstevel@tonic-gate 			    "open %s failed; not monitoring %s: %s\n", fname,
1940Sstevel@tonic-gate 			    inst_fmri, strerror(errno));
1950Sstevel@tonic-gate 			uu_free(fname);
1960Sstevel@tonic-gate 			return (-1);
1970Sstevel@tonic-gate 		}
1980Sstevel@tonic-gate 	}
1990Sstevel@tonic-gate 
2000Sstevel@tonic-gate 	uu_free(fname);
2010Sstevel@tonic-gate 
2020Sstevel@tonic-gate 	wi->wi_fd = fd;
2030Sstevel@tonic-gate 
2040Sstevel@tonic-gate 	if (port_associate(port_fd, PORT_SOURCE_FD, fd, 0, wi)) {
2050Sstevel@tonic-gate 		log_error(LOG_WARNING,
2060Sstevel@tonic-gate 		    "initial port_association of %d / %s failed: %s\n", fd,
2070Sstevel@tonic-gate 		    inst_fmri, strerror(errno));
2080Sstevel@tonic-gate 		return (-1);
2090Sstevel@tonic-gate 	}
2100Sstevel@tonic-gate 
2110Sstevel@tonic-gate 	log_framework(LOG_DEBUG, "monitoring PID %ld on fd %d (%s)\n", pid, fd,
2120Sstevel@tonic-gate 	    inst_fmri);
2130Sstevel@tonic-gate 
2140Sstevel@tonic-gate 	return (0);
2150Sstevel@tonic-gate }
2160Sstevel@tonic-gate 
2170Sstevel@tonic-gate /*ARGSUSED*/
2180Sstevel@tonic-gate void *
2190Sstevel@tonic-gate wait_thread(void *args)
2200Sstevel@tonic-gate {
2210Sstevel@tonic-gate 	for (;;) {
2220Sstevel@tonic-gate 		port_event_t pe;
2230Sstevel@tonic-gate 		int fd;
2240Sstevel@tonic-gate 		wait_info_t *wi;
2250Sstevel@tonic-gate 
226*111Srm88369 		if (port_get(port_fd, &pe, NULL) != 0) {
227*111Srm88369 			if (errno == EINTR)
2280Sstevel@tonic-gate 				continue;
229*111Srm88369 			else {
2300Sstevel@tonic-gate 				log_error(LOG_WARNING,
231*111Srm88369 				    "port_get() failed with %s\n",
232*111Srm88369 				    strerror(errno));
233*111Srm88369 				bad_error("port_get", errno);
234*111Srm88369 			}
235*111Srm88369 		}
2360Sstevel@tonic-gate 
2370Sstevel@tonic-gate 		fd = pe.portev_object;
2380Sstevel@tonic-gate 		wi = pe.portev_user;
239*111Srm88369 		assert(wi != NULL);
240*111Srm88369 		assert(fd == wi->wi_fd);
2410Sstevel@tonic-gate 
2420Sstevel@tonic-gate 		if ((pe.portev_events & POLLHUP) == POLLHUP) {
2430Sstevel@tonic-gate 			psinfo_t psi;
2440Sstevel@tonic-gate 
2450Sstevel@tonic-gate 			if (lseek(fd, 0, SEEK_SET) != 0 ||
2460Sstevel@tonic-gate 			    read(fd, &psi, sizeof (psinfo_t)) !=
2470Sstevel@tonic-gate 			    sizeof (psinfo_t)) {
2480Sstevel@tonic-gate 				log_framework(LOG_WARNING,
2490Sstevel@tonic-gate 				    "couldn't get psinfo data for %s (%s); "
2500Sstevel@tonic-gate 				    "assuming failed\n", wi->wi_fmri,
2510Sstevel@tonic-gate 				    strerror(errno));
2520Sstevel@tonic-gate 				    goto err_remove;
2530Sstevel@tonic-gate 			}
2540Sstevel@tonic-gate 
2550Sstevel@tonic-gate 			if (psi.pr_nlwp != 0 ||
2560Sstevel@tonic-gate 			    psi.pr_nzomb != 0 ||
2570Sstevel@tonic-gate 			    psi.pr_lwp.pr_lwpid != 0) {
2580Sstevel@tonic-gate 				/*
2590Sstevel@tonic-gate 				 * We have determined, in accordance with the
2600Sstevel@tonic-gate 				 * definition in proc(4), this process is not a
2610Sstevel@tonic-gate 				 * zombie.  Reassociate.
2620Sstevel@tonic-gate 				 */
2630Sstevel@tonic-gate 				if (port_associate(port_fd, PORT_SOURCE_FD, fd,
2640Sstevel@tonic-gate 					0, wi))
2650Sstevel@tonic-gate 					log_error(LOG_WARNING,
2660Sstevel@tonic-gate 					    "port_association of %d / %s "
2670Sstevel@tonic-gate 					    "failed\n", fd, wi->wi_fmri);
2680Sstevel@tonic-gate 				continue;
2690Sstevel@tonic-gate 			}
2700Sstevel@tonic-gate 		} else if (
2710Sstevel@tonic-gate 		    (pe.portev_events & POLLERR) == 0) {
2720Sstevel@tonic-gate 			if (port_associate(port_fd, PORT_SOURCE_FD, fd, 0, wi))
2730Sstevel@tonic-gate 				log_error(LOG_WARNING,
2740Sstevel@tonic-gate 				    "port_association of %d / %s "
2750Sstevel@tonic-gate 				    "failed\n", fd, wi->wi_fmri);
2760Sstevel@tonic-gate 			continue;
2770Sstevel@tonic-gate 		}
2780Sstevel@tonic-gate 
2790Sstevel@tonic-gate err_remove:
2800Sstevel@tonic-gate 		wait_remove(wi, 0);
2810Sstevel@tonic-gate 	}
2820Sstevel@tonic-gate 
2830Sstevel@tonic-gate 	/*LINTED E_FUNC_HAS_NO_RETURN_STMT*/
2840Sstevel@tonic-gate }
2850Sstevel@tonic-gate 
2860Sstevel@tonic-gate void
2870Sstevel@tonic-gate wait_prefork()
2880Sstevel@tonic-gate {
2890Sstevel@tonic-gate 	MUTEX_LOCK(&wait_info_lock);
2900Sstevel@tonic-gate }
2910Sstevel@tonic-gate 
2920Sstevel@tonic-gate void
2930Sstevel@tonic-gate wait_postfork(pid_t pid)
2940Sstevel@tonic-gate {
2950Sstevel@tonic-gate 	wait_info_t *wi;
2960Sstevel@tonic-gate 
2970Sstevel@tonic-gate 	MUTEX_UNLOCK(&wait_info_lock);
2980Sstevel@tonic-gate 
2990Sstevel@tonic-gate 	if (pid != 0)
3000Sstevel@tonic-gate 		return;
3010Sstevel@tonic-gate 
3020Sstevel@tonic-gate 	/*
3030Sstevel@tonic-gate 	 * Close all of the child's wait-related fds.  The wait_thread() is
3040Sstevel@tonic-gate 	 * gone, so no need to worry about returning events.  We always exec(2)
3050Sstevel@tonic-gate 	 * after a fork request, so we needn't free the list elements
3060Sstevel@tonic-gate 	 * themselves.
3070Sstevel@tonic-gate 	 */
3080Sstevel@tonic-gate 
3090Sstevel@tonic-gate 	for (wi = uu_list_first(wait_info_list);
3100Sstevel@tonic-gate 	    wi != NULL;
3110Sstevel@tonic-gate 	    wi = uu_list_next(wait_info_list, wi)) {
3120Sstevel@tonic-gate 		if (wi->wi_fd != -1)
3130Sstevel@tonic-gate 			startd_close(wi->wi_fd);
3140Sstevel@tonic-gate 	}
3150Sstevel@tonic-gate 
3160Sstevel@tonic-gate 	startd_close(port_fd);
3170Sstevel@tonic-gate 
3180Sstevel@tonic-gate 	(void) setrlimit(RLIMIT_NOFILE, &init_fd_rlimit);
3190Sstevel@tonic-gate }
3200Sstevel@tonic-gate 
3210Sstevel@tonic-gate void
3220Sstevel@tonic-gate wait_init()
3230Sstevel@tonic-gate {
3240Sstevel@tonic-gate 	struct rlimit fd_new;
3250Sstevel@tonic-gate 
3260Sstevel@tonic-gate 	(void) getrlimit(RLIMIT_NOFILE, &init_fd_rlimit);
3270Sstevel@tonic-gate 	(void) getrlimit(RLIMIT_NOFILE, &fd_new);
3280Sstevel@tonic-gate 
3290Sstevel@tonic-gate 	fd_new.rlim_max = fd_new.rlim_cur = WAIT_FILES;
3300Sstevel@tonic-gate 
3310Sstevel@tonic-gate 	(void) setrlimit(RLIMIT_NOFILE, &fd_new);
3320Sstevel@tonic-gate 
3330Sstevel@tonic-gate 	if ((port_fd = port_create()) == -1)
3340Sstevel@tonic-gate 		uu_die("wait_init couldn't port_create");
3350Sstevel@tonic-gate 
3360Sstevel@tonic-gate 	wait_info_pool = uu_list_pool_create("wait_info", sizeof (wait_info_t),
3370Sstevel@tonic-gate 	    offsetof(wait_info_t, wi_link), NULL, UU_LIST_POOL_DEBUG);
3380Sstevel@tonic-gate 	if (wait_info_pool == NULL)
3390Sstevel@tonic-gate 		uu_die("wait_init couldn't create wait_info_pool");
3400Sstevel@tonic-gate 
3410Sstevel@tonic-gate 	wait_info_list = uu_list_create(wait_info_pool, wait_info_list, 0);
3420Sstevel@tonic-gate 	if (wait_info_list == NULL)
3430Sstevel@tonic-gate 		uu_die("wait_init couldn't create wait_info_list");
3440Sstevel@tonic-gate 
3450Sstevel@tonic-gate 	(void) pthread_mutex_init(&wait_info_lock, &mutex_attrs);
3460Sstevel@tonic-gate }
347