xref: /onnv-gate/usr/src/cmd/svc/startd/wait.c (revision 7219:343338355d03)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*7219Srm88369  * Common Development and Distribution License (the "License").
6*7219Srm88369  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
22*7219Srm88369  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
230Sstevel@tonic-gate  * Use is subject to license terms.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate 
260Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
270Sstevel@tonic-gate 
280Sstevel@tonic-gate /*
290Sstevel@tonic-gate  * wait.c - asynchronous monitoring of "wait registered" start methods
300Sstevel@tonic-gate  *
310Sstevel@tonic-gate  * Use event ports to poll on the set of fds representing the /proc/[pid]/psinfo
320Sstevel@tonic-gate  * files.  If one of these fds returns an event, then we inform the restarter
330Sstevel@tonic-gate  * that it has stopped.
340Sstevel@tonic-gate  *
350Sstevel@tonic-gate  * The wait_info_list holds the series of processes currently being monitored
360Sstevel@tonic-gate  * for exit.  The wi_fd member, which contains the file descriptor of the psinfo
370Sstevel@tonic-gate  * file being polled upon ("event ported upon"), will be set to -1 if the file
380Sstevel@tonic-gate  * descriptor is inactive (already closed or not yet opened).
390Sstevel@tonic-gate  */
400Sstevel@tonic-gate 
410Sstevel@tonic-gate #ifdef _FILE_OFFSET_BITS
420Sstevel@tonic-gate #undef _FILE_OFFSET_BITS
430Sstevel@tonic-gate #endif /* _FILE_OFFSET_BITS */
440Sstevel@tonic-gate 
450Sstevel@tonic-gate #include <sys/resource.h>
460Sstevel@tonic-gate #include <sys/stat.h>
470Sstevel@tonic-gate #include <sys/types.h>
480Sstevel@tonic-gate #include <sys/uio.h>
490Sstevel@tonic-gate #include <sys/wait.h>
500Sstevel@tonic-gate 
510Sstevel@tonic-gate #include <assert.h>
520Sstevel@tonic-gate #include <errno.h>
530Sstevel@tonic-gate #include <fcntl.h>
540Sstevel@tonic-gate #include <libuutil.h>
550Sstevel@tonic-gate #include <poll.h>
560Sstevel@tonic-gate #include <port.h>
570Sstevel@tonic-gate #include <pthread.h>
580Sstevel@tonic-gate #include <procfs.h>
590Sstevel@tonic-gate #include <string.h>
600Sstevel@tonic-gate #include <stropts.h>
610Sstevel@tonic-gate #include <unistd.h>
620Sstevel@tonic-gate 
630Sstevel@tonic-gate #include "startd.h"
640Sstevel@tonic-gate 
650Sstevel@tonic-gate #define	WAIT_FILES	262144		/* reasonably high maximum */
660Sstevel@tonic-gate 
670Sstevel@tonic-gate static int port_fd;
680Sstevel@tonic-gate static scf_handle_t *wait_hndl;
690Sstevel@tonic-gate static struct rlimit init_fd_rlimit;
700Sstevel@tonic-gate 
710Sstevel@tonic-gate static uu_list_pool_t *wait_info_pool;
720Sstevel@tonic-gate static uu_list_t *wait_info_list;
730Sstevel@tonic-gate 
740Sstevel@tonic-gate static pthread_mutex_t wait_info_lock;
750Sstevel@tonic-gate 
760Sstevel@tonic-gate /*
770Sstevel@tonic-gate  * void wait_remove(wait_info_t *, int)
780Sstevel@tonic-gate  *   Remove the given wait_info structure from our list, performing various
790Sstevel@tonic-gate  *   cleanup operations along the way.  If the direct flag is false (meaning
80*7219Srm88369  *   that we are being called with from restarter instance list context) and
81*7219Srm88369  *   the instance should not be ignored, then notify the restarter that the
82*7219Srm88369  *   associated instance has exited. If the wi_ignore flag is true then it
83*7219Srm88369  *   means that the stop was initiated from within svc.startd, rather than
84*7219Srm88369  *   from outside it.
850Sstevel@tonic-gate  *
860Sstevel@tonic-gate  *   Since we may no longer be the startd that started this process, we only are
870Sstevel@tonic-gate  *   concerned with a waitpid(3C) failure if the wi_parent field is non-zero.
880Sstevel@tonic-gate  */
890Sstevel@tonic-gate static void
wait_remove(wait_info_t * wi,int direct)900Sstevel@tonic-gate wait_remove(wait_info_t *wi, int direct)
910Sstevel@tonic-gate {
920Sstevel@tonic-gate 	int status;
930Sstevel@tonic-gate 
940Sstevel@tonic-gate 	if (waitpid(wi->wi_pid, &status, 0) == -1) {
950Sstevel@tonic-gate 		if (wi->wi_parent)
960Sstevel@tonic-gate 			log_framework(LOG_INFO,
970Sstevel@tonic-gate 			    "instance %s waitpid failure: %s\n", wi->wi_fmri,
980Sstevel@tonic-gate 			    strerror(errno));
990Sstevel@tonic-gate 	} else {
1000Sstevel@tonic-gate 		if (WEXITSTATUS(status) != 0) {
1010Sstevel@tonic-gate 			log_framework(LOG_NOTICE,
1020Sstevel@tonic-gate 			    "instance %s exited with status %d\n", wi->wi_fmri,
1030Sstevel@tonic-gate 			    WEXITSTATUS(status));
1040Sstevel@tonic-gate 		}
1050Sstevel@tonic-gate 	}
1060Sstevel@tonic-gate 
1070Sstevel@tonic-gate 	MUTEX_LOCK(&wait_info_lock);
108111Srm88369 	if (wi->wi_fd != -1) {
109111Srm88369 		startd_close(wi->wi_fd);
110111Srm88369 		wi->wi_fd = -1;
111111Srm88369 	}
1120Sstevel@tonic-gate 	uu_list_remove(wait_info_list, wi);
1130Sstevel@tonic-gate 	MUTEX_UNLOCK(&wait_info_lock);
1140Sstevel@tonic-gate 
1150Sstevel@tonic-gate 	/*
1160Sstevel@tonic-gate 	 * Make an attempt to clear out any utmpx record associated with this
1170Sstevel@tonic-gate 	 * PID.
1180Sstevel@tonic-gate 	 */
1190Sstevel@tonic-gate 	utmpx_mark_dead(wi->wi_pid, status, B_FALSE);
1200Sstevel@tonic-gate 
121*7219Srm88369 	if (!direct && !wi->wi_ignore) {
1220Sstevel@tonic-gate 		/*
1230Sstevel@tonic-gate 		 * Bind wait_hndl lazily.
1240Sstevel@tonic-gate 		 */
1250Sstevel@tonic-gate 		if (wait_hndl == NULL) {
1260Sstevel@tonic-gate 			for (wait_hndl =
1270Sstevel@tonic-gate 			    libscf_handle_create_bound(SCF_VERSION);
1280Sstevel@tonic-gate 			    wait_hndl == NULL;
1290Sstevel@tonic-gate 			    wait_hndl =
1300Sstevel@tonic-gate 			    libscf_handle_create_bound(SCF_VERSION)) {
1310Sstevel@tonic-gate 				log_error(LOG_INFO, "[wait_remove] Unable to "
1320Sstevel@tonic-gate 				    "bind a new repository handle: %s\n",
1330Sstevel@tonic-gate 				    scf_strerror(scf_error()));
1340Sstevel@tonic-gate 				(void) sleep(2);
1350Sstevel@tonic-gate 			}
1360Sstevel@tonic-gate 		}
1370Sstevel@tonic-gate 
1380Sstevel@tonic-gate 		log_framework(LOG_DEBUG,
1390Sstevel@tonic-gate 		    "wait_remove requesting stop of %s\n", wi->wi_fmri);
1400Sstevel@tonic-gate 		(void) stop_instance_fmri(wait_hndl, wi->wi_fmri, RSTOP_EXIT);
1410Sstevel@tonic-gate 	}
1420Sstevel@tonic-gate 
1430Sstevel@tonic-gate 	uu_list_node_fini(wi, &wi->wi_link, wait_info_pool);
1440Sstevel@tonic-gate 	startd_free(wi, sizeof (wait_info_t));
1450Sstevel@tonic-gate }
1460Sstevel@tonic-gate 
1470Sstevel@tonic-gate /*
148*7219Srm88369  * void wait_ignore_by_fmri(const char *)
149*7219Srm88369  *   wait_ignore_by_fmri is called when svc.startd is going to stop the
150*7219Srm88369  *   instance. Since we need to wait on the process and close the utmpx record,
151*7219Srm88369  *   we're going to set the wi_ignore flag, so that when the process exits we
152*7219Srm88369  *   clean up, but don't tell the restarter to stop it.
153*7219Srm88369  */
154*7219Srm88369 void
wait_ignore_by_fmri(const char * fmri)155*7219Srm88369 wait_ignore_by_fmri(const char *fmri)
156*7219Srm88369 {
157*7219Srm88369 	wait_info_t *wi;
158*7219Srm88369 
159*7219Srm88369 	MUTEX_LOCK(&wait_info_lock);
160*7219Srm88369 
161*7219Srm88369 	for (wi = uu_list_first(wait_info_list); wi != NULL;
162*7219Srm88369 	    wi = uu_list_next(wait_info_list, wi)) {
163*7219Srm88369 		if (strcmp(wi->wi_fmri, fmri) == 0)
164*7219Srm88369 			break;
165*7219Srm88369 	}
166*7219Srm88369 
167*7219Srm88369 	if (wi != NULL) {
168*7219Srm88369 		wi->wi_ignore = 1;
169*7219Srm88369 	}
170*7219Srm88369 
171*7219Srm88369 	MUTEX_UNLOCK(&wait_info_lock);
172*7219Srm88369 }
173*7219Srm88369 
174*7219Srm88369 /*
1750Sstevel@tonic-gate  * int wait_register(pid_t, char *, int, int)
1760Sstevel@tonic-gate  *   wait_register is called after we have called fork(2), and know which pid we
1770Sstevel@tonic-gate  *   wish to monitor.  However, since the child may have already exited by the
1780Sstevel@tonic-gate  *   time we are called, we must handle the error cases from open(2)
1790Sstevel@tonic-gate  *   appropriately.  The am_parent flag is recorded to handle waitpid(2)
1800Sstevel@tonic-gate  *   behaviour on removal; similarly, the direct flag is passed through to a
1810Sstevel@tonic-gate  *   potential call to wait_remove() to govern its behaviour in different
1820Sstevel@tonic-gate  *   contexts.
1830Sstevel@tonic-gate  *
1840Sstevel@tonic-gate  *   Returns 0 if registration successful, 1 if child pid did not exist, and -1
1850Sstevel@tonic-gate  *   if a different error occurred.
1860Sstevel@tonic-gate  */
1870Sstevel@tonic-gate int
wait_register(pid_t pid,const char * inst_fmri,int am_parent,int direct)1880Sstevel@tonic-gate wait_register(pid_t pid, const char *inst_fmri, int am_parent, int direct)
1890Sstevel@tonic-gate {
1900Sstevel@tonic-gate 	char *fname = uu_msprintf("/proc/%ld/psinfo", pid);
1910Sstevel@tonic-gate 	int fd;
1920Sstevel@tonic-gate 	wait_info_t *wi;
1930Sstevel@tonic-gate 
1940Sstevel@tonic-gate 	assert(pid != 0);
1950Sstevel@tonic-gate 
1960Sstevel@tonic-gate 	if (fname == NULL)
1970Sstevel@tonic-gate 		return (-1);
1980Sstevel@tonic-gate 
1990Sstevel@tonic-gate 	wi = startd_alloc(sizeof (wait_info_t));
2000Sstevel@tonic-gate 
2010Sstevel@tonic-gate 	uu_list_node_init(wi, &wi->wi_link, wait_info_pool);
2020Sstevel@tonic-gate 
2030Sstevel@tonic-gate 	wi->wi_fd = -1;
2040Sstevel@tonic-gate 	wi->wi_pid = pid;
2050Sstevel@tonic-gate 	wi->wi_fmri = inst_fmri;
2060Sstevel@tonic-gate 	wi->wi_parent = am_parent;
207*7219Srm88369 	wi->wi_ignore = 0;
2080Sstevel@tonic-gate 
2090Sstevel@tonic-gate 	MUTEX_LOCK(&wait_info_lock);
2100Sstevel@tonic-gate 	(void) uu_list_insert_before(wait_info_list, NULL, wi);
2110Sstevel@tonic-gate 	MUTEX_UNLOCK(&wait_info_lock);
2120Sstevel@tonic-gate 
2130Sstevel@tonic-gate 	if ((fd = open(fname, O_RDONLY)) == -1) {
2140Sstevel@tonic-gate 		if (errno == ENOENT) {
2150Sstevel@tonic-gate 			/*
2160Sstevel@tonic-gate 			 * Child has already exited.
2170Sstevel@tonic-gate 			 */
2180Sstevel@tonic-gate 			wait_remove(wi, direct);
2190Sstevel@tonic-gate 			uu_free(fname);
2200Sstevel@tonic-gate 			return (1);
2210Sstevel@tonic-gate 		} else {
2220Sstevel@tonic-gate 			log_error(LOG_WARNING,
2230Sstevel@tonic-gate 			    "open %s failed; not monitoring %s: %s\n", fname,
2240Sstevel@tonic-gate 			    inst_fmri, strerror(errno));
2250Sstevel@tonic-gate 			uu_free(fname);
2260Sstevel@tonic-gate 			return (-1);
2270Sstevel@tonic-gate 		}
2280Sstevel@tonic-gate 	}
2290Sstevel@tonic-gate 
2300Sstevel@tonic-gate 	uu_free(fname);
2310Sstevel@tonic-gate 
2320Sstevel@tonic-gate 	wi->wi_fd = fd;
2330Sstevel@tonic-gate 
2340Sstevel@tonic-gate 	if (port_associate(port_fd, PORT_SOURCE_FD, fd, 0, wi)) {
2350Sstevel@tonic-gate 		log_error(LOG_WARNING,
2360Sstevel@tonic-gate 		    "initial port_association of %d / %s failed: %s\n", fd,
2370Sstevel@tonic-gate 		    inst_fmri, strerror(errno));
2380Sstevel@tonic-gate 		return (-1);
2390Sstevel@tonic-gate 	}
2400Sstevel@tonic-gate 
2410Sstevel@tonic-gate 	log_framework(LOG_DEBUG, "monitoring PID %ld on fd %d (%s)\n", pid, fd,
2420Sstevel@tonic-gate 	    inst_fmri);
2430Sstevel@tonic-gate 
2440Sstevel@tonic-gate 	return (0);
2450Sstevel@tonic-gate }
2460Sstevel@tonic-gate 
2470Sstevel@tonic-gate /*ARGSUSED*/
2480Sstevel@tonic-gate void *
wait_thread(void * args)2490Sstevel@tonic-gate wait_thread(void *args)
2500Sstevel@tonic-gate {
2510Sstevel@tonic-gate 	for (;;) {
2520Sstevel@tonic-gate 		port_event_t pe;
2530Sstevel@tonic-gate 		int fd;
2540Sstevel@tonic-gate 		wait_info_t *wi;
2550Sstevel@tonic-gate 
256111Srm88369 		if (port_get(port_fd, &pe, NULL) != 0) {
257111Srm88369 			if (errno == EINTR)
2580Sstevel@tonic-gate 				continue;
259111Srm88369 			else {
2600Sstevel@tonic-gate 				log_error(LOG_WARNING,
261111Srm88369 				    "port_get() failed with %s\n",
262111Srm88369 				    strerror(errno));
263111Srm88369 				bad_error("port_get", errno);
264111Srm88369 			}
265111Srm88369 		}
2660Sstevel@tonic-gate 
2670Sstevel@tonic-gate 		fd = pe.portev_object;
2680Sstevel@tonic-gate 		wi = pe.portev_user;
269111Srm88369 		assert(wi != NULL);
270111Srm88369 		assert(fd == wi->wi_fd);
2710Sstevel@tonic-gate 
2720Sstevel@tonic-gate 		if ((pe.portev_events & POLLHUP) == POLLHUP) {
2730Sstevel@tonic-gate 			psinfo_t psi;
2740Sstevel@tonic-gate 
2750Sstevel@tonic-gate 			if (lseek(fd, 0, SEEK_SET) != 0 ||
2760Sstevel@tonic-gate 			    read(fd, &psi, sizeof (psinfo_t)) !=
2770Sstevel@tonic-gate 			    sizeof (psinfo_t)) {
2780Sstevel@tonic-gate 				log_framework(LOG_WARNING,
2790Sstevel@tonic-gate 				    "couldn't get psinfo data for %s (%s); "
2800Sstevel@tonic-gate 				    "assuming failed\n", wi->wi_fmri,
2810Sstevel@tonic-gate 				    strerror(errno));
282*7219Srm88369 				goto err_remove;
2830Sstevel@tonic-gate 			}
2840Sstevel@tonic-gate 
2850Sstevel@tonic-gate 			if (psi.pr_nlwp != 0 ||
2860Sstevel@tonic-gate 			    psi.pr_nzomb != 0 ||
2870Sstevel@tonic-gate 			    psi.pr_lwp.pr_lwpid != 0) {
2880Sstevel@tonic-gate 				/*
2890Sstevel@tonic-gate 				 * We have determined, in accordance with the
2900Sstevel@tonic-gate 				 * definition in proc(4), this process is not a
2910Sstevel@tonic-gate 				 * zombie.  Reassociate.
2920Sstevel@tonic-gate 				 */
2930Sstevel@tonic-gate 				if (port_associate(port_fd, PORT_SOURCE_FD, fd,
294*7219Srm88369 				    0, wi))
2950Sstevel@tonic-gate 					log_error(LOG_WARNING,
2960Sstevel@tonic-gate 					    "port_association of %d / %s "
2970Sstevel@tonic-gate 					    "failed\n", fd, wi->wi_fmri);
2980Sstevel@tonic-gate 				continue;
2990Sstevel@tonic-gate 			}
3000Sstevel@tonic-gate 		} else if (
3010Sstevel@tonic-gate 		    (pe.portev_events & POLLERR) == 0) {
3020Sstevel@tonic-gate 			if (port_associate(port_fd, PORT_SOURCE_FD, fd, 0, wi))
3030Sstevel@tonic-gate 				log_error(LOG_WARNING,
3040Sstevel@tonic-gate 				    "port_association of %d / %s "
3050Sstevel@tonic-gate 				    "failed\n", fd, wi->wi_fmri);
3060Sstevel@tonic-gate 			continue;
3070Sstevel@tonic-gate 		}
3080Sstevel@tonic-gate 
3090Sstevel@tonic-gate err_remove:
3100Sstevel@tonic-gate 		wait_remove(wi, 0);
3110Sstevel@tonic-gate 	}
3120Sstevel@tonic-gate 
3130Sstevel@tonic-gate 	/*LINTED E_FUNC_HAS_NO_RETURN_STMT*/
3140Sstevel@tonic-gate }
3150Sstevel@tonic-gate 
3160Sstevel@tonic-gate void
wait_prefork()3170Sstevel@tonic-gate wait_prefork()
3180Sstevel@tonic-gate {
3190Sstevel@tonic-gate 	MUTEX_LOCK(&wait_info_lock);
3200Sstevel@tonic-gate }
3210Sstevel@tonic-gate 
3220Sstevel@tonic-gate void
wait_postfork(pid_t pid)3230Sstevel@tonic-gate wait_postfork(pid_t pid)
3240Sstevel@tonic-gate {
3250Sstevel@tonic-gate 	wait_info_t *wi;
3260Sstevel@tonic-gate 
3270Sstevel@tonic-gate 	MUTEX_UNLOCK(&wait_info_lock);
3280Sstevel@tonic-gate 
3290Sstevel@tonic-gate 	if (pid != 0)
3300Sstevel@tonic-gate 		return;
3310Sstevel@tonic-gate 
3320Sstevel@tonic-gate 	/*
3330Sstevel@tonic-gate 	 * Close all of the child's wait-related fds.  The wait_thread() is
3340Sstevel@tonic-gate 	 * gone, so no need to worry about returning events.  We always exec(2)
3350Sstevel@tonic-gate 	 * after a fork request, so we needn't free the list elements
3360Sstevel@tonic-gate 	 * themselves.
3370Sstevel@tonic-gate 	 */
3380Sstevel@tonic-gate 
3390Sstevel@tonic-gate 	for (wi = uu_list_first(wait_info_list);
3400Sstevel@tonic-gate 	    wi != NULL;
3410Sstevel@tonic-gate 	    wi = uu_list_next(wait_info_list, wi)) {
3420Sstevel@tonic-gate 		if (wi->wi_fd != -1)
3430Sstevel@tonic-gate 			startd_close(wi->wi_fd);
3440Sstevel@tonic-gate 	}
3450Sstevel@tonic-gate 
3460Sstevel@tonic-gate 	startd_close(port_fd);
3470Sstevel@tonic-gate 
3480Sstevel@tonic-gate 	(void) setrlimit(RLIMIT_NOFILE, &init_fd_rlimit);
3490Sstevel@tonic-gate }
3500Sstevel@tonic-gate 
3510Sstevel@tonic-gate void
wait_init()3520Sstevel@tonic-gate wait_init()
3530Sstevel@tonic-gate {
3540Sstevel@tonic-gate 	struct rlimit fd_new;
3550Sstevel@tonic-gate 
3560Sstevel@tonic-gate 	(void) getrlimit(RLIMIT_NOFILE, &init_fd_rlimit);
3570Sstevel@tonic-gate 	(void) getrlimit(RLIMIT_NOFILE, &fd_new);
3580Sstevel@tonic-gate 
3590Sstevel@tonic-gate 	fd_new.rlim_max = fd_new.rlim_cur = WAIT_FILES;
3600Sstevel@tonic-gate 
3610Sstevel@tonic-gate 	(void) setrlimit(RLIMIT_NOFILE, &fd_new);
3620Sstevel@tonic-gate 
3630Sstevel@tonic-gate 	if ((port_fd = port_create()) == -1)
3640Sstevel@tonic-gate 		uu_die("wait_init couldn't port_create");
3650Sstevel@tonic-gate 
3660Sstevel@tonic-gate 	wait_info_pool = uu_list_pool_create("wait_info", sizeof (wait_info_t),
3670Sstevel@tonic-gate 	    offsetof(wait_info_t, wi_link), NULL, UU_LIST_POOL_DEBUG);
3680Sstevel@tonic-gate 	if (wait_info_pool == NULL)
3690Sstevel@tonic-gate 		uu_die("wait_init couldn't create wait_info_pool");
3700Sstevel@tonic-gate 
3710Sstevel@tonic-gate 	wait_info_list = uu_list_create(wait_info_pool, wait_info_list, 0);
3720Sstevel@tonic-gate 	if (wait_info_list == NULL)
3730Sstevel@tonic-gate 		uu_die("wait_init couldn't create wait_info_list");
3740Sstevel@tonic-gate 
3750Sstevel@tonic-gate 	(void) pthread_mutex_init(&wait_info_lock, &mutex_attrs);
3760Sstevel@tonic-gate }
377