10Sstevel@tonic-gate /*
20Sstevel@tonic-gate * CDDL HEADER START
30Sstevel@tonic-gate *
40Sstevel@tonic-gate * The contents of this file are subject to the terms of the
5*7219Srm88369 * Common Development and Distribution License (the "License").
6*7219Srm88369 * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate *
80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate * See the License for the specific language governing permissions
110Sstevel@tonic-gate * and limitations under the License.
120Sstevel@tonic-gate *
130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate *
190Sstevel@tonic-gate * CDDL HEADER END
200Sstevel@tonic-gate */
210Sstevel@tonic-gate /*
22*7219Srm88369 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
230Sstevel@tonic-gate * Use is subject to license terms.
240Sstevel@tonic-gate */
250Sstevel@tonic-gate
260Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI"
270Sstevel@tonic-gate
280Sstevel@tonic-gate /*
290Sstevel@tonic-gate * wait.c - asynchronous monitoring of "wait registered" start methods
300Sstevel@tonic-gate *
310Sstevel@tonic-gate * Use event ports to poll on the set of fds representing the /proc/[pid]/psinfo
320Sstevel@tonic-gate * files. If one of these fds returns an event, then we inform the restarter
330Sstevel@tonic-gate * that it has stopped.
340Sstevel@tonic-gate *
350Sstevel@tonic-gate * The wait_info_list holds the series of processes currently being monitored
360Sstevel@tonic-gate * for exit. The wi_fd member, which contains the file descriptor of the psinfo
370Sstevel@tonic-gate * file being polled upon ("event ported upon"), will be set to -1 if the file
380Sstevel@tonic-gate * descriptor is inactive (already closed or not yet opened).
390Sstevel@tonic-gate */
400Sstevel@tonic-gate
410Sstevel@tonic-gate #ifdef _FILE_OFFSET_BITS
420Sstevel@tonic-gate #undef _FILE_OFFSET_BITS
430Sstevel@tonic-gate #endif /* _FILE_OFFSET_BITS */
440Sstevel@tonic-gate
450Sstevel@tonic-gate #include <sys/resource.h>
460Sstevel@tonic-gate #include <sys/stat.h>
470Sstevel@tonic-gate #include <sys/types.h>
480Sstevel@tonic-gate #include <sys/uio.h>
490Sstevel@tonic-gate #include <sys/wait.h>
500Sstevel@tonic-gate
510Sstevel@tonic-gate #include <assert.h>
520Sstevel@tonic-gate #include <errno.h>
530Sstevel@tonic-gate #include <fcntl.h>
540Sstevel@tonic-gate #include <libuutil.h>
550Sstevel@tonic-gate #include <poll.h>
560Sstevel@tonic-gate #include <port.h>
570Sstevel@tonic-gate #include <pthread.h>
580Sstevel@tonic-gate #include <procfs.h>
590Sstevel@tonic-gate #include <string.h>
600Sstevel@tonic-gate #include <stropts.h>
610Sstevel@tonic-gate #include <unistd.h>
620Sstevel@tonic-gate
630Sstevel@tonic-gate #include "startd.h"
640Sstevel@tonic-gate
650Sstevel@tonic-gate #define WAIT_FILES 262144 /* reasonably high maximum */
660Sstevel@tonic-gate
670Sstevel@tonic-gate static int port_fd;
680Sstevel@tonic-gate static scf_handle_t *wait_hndl;
690Sstevel@tonic-gate static struct rlimit init_fd_rlimit;
700Sstevel@tonic-gate
710Sstevel@tonic-gate static uu_list_pool_t *wait_info_pool;
720Sstevel@tonic-gate static uu_list_t *wait_info_list;
730Sstevel@tonic-gate
740Sstevel@tonic-gate static pthread_mutex_t wait_info_lock;
750Sstevel@tonic-gate
760Sstevel@tonic-gate /*
770Sstevel@tonic-gate * void wait_remove(wait_info_t *, int)
780Sstevel@tonic-gate * Remove the given wait_info structure from our list, performing various
790Sstevel@tonic-gate * cleanup operations along the way. If the direct flag is false (meaning
80*7219Srm88369 * that we are being called with from restarter instance list context) and
81*7219Srm88369 * the instance should not be ignored, then notify the restarter that the
82*7219Srm88369 * associated instance has exited. If the wi_ignore flag is true then it
83*7219Srm88369 * means that the stop was initiated from within svc.startd, rather than
84*7219Srm88369 * from outside it.
850Sstevel@tonic-gate *
860Sstevel@tonic-gate * Since we may no longer be the startd that started this process, we only are
870Sstevel@tonic-gate * concerned with a waitpid(3C) failure if the wi_parent field is non-zero.
880Sstevel@tonic-gate */
890Sstevel@tonic-gate static void
wait_remove(wait_info_t * wi,int direct)900Sstevel@tonic-gate wait_remove(wait_info_t *wi, int direct)
910Sstevel@tonic-gate {
920Sstevel@tonic-gate int status;
930Sstevel@tonic-gate
940Sstevel@tonic-gate if (waitpid(wi->wi_pid, &status, 0) == -1) {
950Sstevel@tonic-gate if (wi->wi_parent)
960Sstevel@tonic-gate log_framework(LOG_INFO,
970Sstevel@tonic-gate "instance %s waitpid failure: %s\n", wi->wi_fmri,
980Sstevel@tonic-gate strerror(errno));
990Sstevel@tonic-gate } else {
1000Sstevel@tonic-gate if (WEXITSTATUS(status) != 0) {
1010Sstevel@tonic-gate log_framework(LOG_NOTICE,
1020Sstevel@tonic-gate "instance %s exited with status %d\n", wi->wi_fmri,
1030Sstevel@tonic-gate WEXITSTATUS(status));
1040Sstevel@tonic-gate }
1050Sstevel@tonic-gate }
1060Sstevel@tonic-gate
1070Sstevel@tonic-gate MUTEX_LOCK(&wait_info_lock);
108111Srm88369 if (wi->wi_fd != -1) {
109111Srm88369 startd_close(wi->wi_fd);
110111Srm88369 wi->wi_fd = -1;
111111Srm88369 }
1120Sstevel@tonic-gate uu_list_remove(wait_info_list, wi);
1130Sstevel@tonic-gate MUTEX_UNLOCK(&wait_info_lock);
1140Sstevel@tonic-gate
1150Sstevel@tonic-gate /*
1160Sstevel@tonic-gate * Make an attempt to clear out any utmpx record associated with this
1170Sstevel@tonic-gate * PID.
1180Sstevel@tonic-gate */
1190Sstevel@tonic-gate utmpx_mark_dead(wi->wi_pid, status, B_FALSE);
1200Sstevel@tonic-gate
121*7219Srm88369 if (!direct && !wi->wi_ignore) {
1220Sstevel@tonic-gate /*
1230Sstevel@tonic-gate * Bind wait_hndl lazily.
1240Sstevel@tonic-gate */
1250Sstevel@tonic-gate if (wait_hndl == NULL) {
1260Sstevel@tonic-gate for (wait_hndl =
1270Sstevel@tonic-gate libscf_handle_create_bound(SCF_VERSION);
1280Sstevel@tonic-gate wait_hndl == NULL;
1290Sstevel@tonic-gate wait_hndl =
1300Sstevel@tonic-gate libscf_handle_create_bound(SCF_VERSION)) {
1310Sstevel@tonic-gate log_error(LOG_INFO, "[wait_remove] Unable to "
1320Sstevel@tonic-gate "bind a new repository handle: %s\n",
1330Sstevel@tonic-gate scf_strerror(scf_error()));
1340Sstevel@tonic-gate (void) sleep(2);
1350Sstevel@tonic-gate }
1360Sstevel@tonic-gate }
1370Sstevel@tonic-gate
1380Sstevel@tonic-gate log_framework(LOG_DEBUG,
1390Sstevel@tonic-gate "wait_remove requesting stop of %s\n", wi->wi_fmri);
1400Sstevel@tonic-gate (void) stop_instance_fmri(wait_hndl, wi->wi_fmri, RSTOP_EXIT);
1410Sstevel@tonic-gate }
1420Sstevel@tonic-gate
1430Sstevel@tonic-gate uu_list_node_fini(wi, &wi->wi_link, wait_info_pool);
1440Sstevel@tonic-gate startd_free(wi, sizeof (wait_info_t));
1450Sstevel@tonic-gate }
1460Sstevel@tonic-gate
1470Sstevel@tonic-gate /*
148*7219Srm88369 * void wait_ignore_by_fmri(const char *)
149*7219Srm88369 * wait_ignore_by_fmri is called when svc.startd is going to stop the
150*7219Srm88369 * instance. Since we need to wait on the process and close the utmpx record,
151*7219Srm88369 * we're going to set the wi_ignore flag, so that when the process exits we
152*7219Srm88369 * clean up, but don't tell the restarter to stop it.
153*7219Srm88369 */
154*7219Srm88369 void
wait_ignore_by_fmri(const char * fmri)155*7219Srm88369 wait_ignore_by_fmri(const char *fmri)
156*7219Srm88369 {
157*7219Srm88369 wait_info_t *wi;
158*7219Srm88369
159*7219Srm88369 MUTEX_LOCK(&wait_info_lock);
160*7219Srm88369
161*7219Srm88369 for (wi = uu_list_first(wait_info_list); wi != NULL;
162*7219Srm88369 wi = uu_list_next(wait_info_list, wi)) {
163*7219Srm88369 if (strcmp(wi->wi_fmri, fmri) == 0)
164*7219Srm88369 break;
165*7219Srm88369 }
166*7219Srm88369
167*7219Srm88369 if (wi != NULL) {
168*7219Srm88369 wi->wi_ignore = 1;
169*7219Srm88369 }
170*7219Srm88369
171*7219Srm88369 MUTEX_UNLOCK(&wait_info_lock);
172*7219Srm88369 }
173*7219Srm88369
174*7219Srm88369 /*
1750Sstevel@tonic-gate * int wait_register(pid_t, char *, int, int)
1760Sstevel@tonic-gate * wait_register is called after we have called fork(2), and know which pid we
1770Sstevel@tonic-gate * wish to monitor. However, since the child may have already exited by the
1780Sstevel@tonic-gate * time we are called, we must handle the error cases from open(2)
1790Sstevel@tonic-gate * appropriately. The am_parent flag is recorded to handle waitpid(2)
1800Sstevel@tonic-gate * behaviour on removal; similarly, the direct flag is passed through to a
1810Sstevel@tonic-gate * potential call to wait_remove() to govern its behaviour in different
1820Sstevel@tonic-gate * contexts.
1830Sstevel@tonic-gate *
1840Sstevel@tonic-gate * Returns 0 if registration successful, 1 if child pid did not exist, and -1
1850Sstevel@tonic-gate * if a different error occurred.
1860Sstevel@tonic-gate */
1870Sstevel@tonic-gate int
wait_register(pid_t pid,const char * inst_fmri,int am_parent,int direct)1880Sstevel@tonic-gate wait_register(pid_t pid, const char *inst_fmri, int am_parent, int direct)
1890Sstevel@tonic-gate {
1900Sstevel@tonic-gate char *fname = uu_msprintf("/proc/%ld/psinfo", pid);
1910Sstevel@tonic-gate int fd;
1920Sstevel@tonic-gate wait_info_t *wi;
1930Sstevel@tonic-gate
1940Sstevel@tonic-gate assert(pid != 0);
1950Sstevel@tonic-gate
1960Sstevel@tonic-gate if (fname == NULL)
1970Sstevel@tonic-gate return (-1);
1980Sstevel@tonic-gate
1990Sstevel@tonic-gate wi = startd_alloc(sizeof (wait_info_t));
2000Sstevel@tonic-gate
2010Sstevel@tonic-gate uu_list_node_init(wi, &wi->wi_link, wait_info_pool);
2020Sstevel@tonic-gate
2030Sstevel@tonic-gate wi->wi_fd = -1;
2040Sstevel@tonic-gate wi->wi_pid = pid;
2050Sstevel@tonic-gate wi->wi_fmri = inst_fmri;
2060Sstevel@tonic-gate wi->wi_parent = am_parent;
207*7219Srm88369 wi->wi_ignore = 0;
2080Sstevel@tonic-gate
2090Sstevel@tonic-gate MUTEX_LOCK(&wait_info_lock);
2100Sstevel@tonic-gate (void) uu_list_insert_before(wait_info_list, NULL, wi);
2110Sstevel@tonic-gate MUTEX_UNLOCK(&wait_info_lock);
2120Sstevel@tonic-gate
2130Sstevel@tonic-gate if ((fd = open(fname, O_RDONLY)) == -1) {
2140Sstevel@tonic-gate if (errno == ENOENT) {
2150Sstevel@tonic-gate /*
2160Sstevel@tonic-gate * Child has already exited.
2170Sstevel@tonic-gate */
2180Sstevel@tonic-gate wait_remove(wi, direct);
2190Sstevel@tonic-gate uu_free(fname);
2200Sstevel@tonic-gate return (1);
2210Sstevel@tonic-gate } else {
2220Sstevel@tonic-gate log_error(LOG_WARNING,
2230Sstevel@tonic-gate "open %s failed; not monitoring %s: %s\n", fname,
2240Sstevel@tonic-gate inst_fmri, strerror(errno));
2250Sstevel@tonic-gate uu_free(fname);
2260Sstevel@tonic-gate return (-1);
2270Sstevel@tonic-gate }
2280Sstevel@tonic-gate }
2290Sstevel@tonic-gate
2300Sstevel@tonic-gate uu_free(fname);
2310Sstevel@tonic-gate
2320Sstevel@tonic-gate wi->wi_fd = fd;
2330Sstevel@tonic-gate
2340Sstevel@tonic-gate if (port_associate(port_fd, PORT_SOURCE_FD, fd, 0, wi)) {
2350Sstevel@tonic-gate log_error(LOG_WARNING,
2360Sstevel@tonic-gate "initial port_association of %d / %s failed: %s\n", fd,
2370Sstevel@tonic-gate inst_fmri, strerror(errno));
2380Sstevel@tonic-gate return (-1);
2390Sstevel@tonic-gate }
2400Sstevel@tonic-gate
2410Sstevel@tonic-gate log_framework(LOG_DEBUG, "monitoring PID %ld on fd %d (%s)\n", pid, fd,
2420Sstevel@tonic-gate inst_fmri);
2430Sstevel@tonic-gate
2440Sstevel@tonic-gate return (0);
2450Sstevel@tonic-gate }
2460Sstevel@tonic-gate
2470Sstevel@tonic-gate /*ARGSUSED*/
2480Sstevel@tonic-gate void *
wait_thread(void * args)2490Sstevel@tonic-gate wait_thread(void *args)
2500Sstevel@tonic-gate {
2510Sstevel@tonic-gate for (;;) {
2520Sstevel@tonic-gate port_event_t pe;
2530Sstevel@tonic-gate int fd;
2540Sstevel@tonic-gate wait_info_t *wi;
2550Sstevel@tonic-gate
256111Srm88369 if (port_get(port_fd, &pe, NULL) != 0) {
257111Srm88369 if (errno == EINTR)
2580Sstevel@tonic-gate continue;
259111Srm88369 else {
2600Sstevel@tonic-gate log_error(LOG_WARNING,
261111Srm88369 "port_get() failed with %s\n",
262111Srm88369 strerror(errno));
263111Srm88369 bad_error("port_get", errno);
264111Srm88369 }
265111Srm88369 }
2660Sstevel@tonic-gate
2670Sstevel@tonic-gate fd = pe.portev_object;
2680Sstevel@tonic-gate wi = pe.portev_user;
269111Srm88369 assert(wi != NULL);
270111Srm88369 assert(fd == wi->wi_fd);
2710Sstevel@tonic-gate
2720Sstevel@tonic-gate if ((pe.portev_events & POLLHUP) == POLLHUP) {
2730Sstevel@tonic-gate psinfo_t psi;
2740Sstevel@tonic-gate
2750Sstevel@tonic-gate if (lseek(fd, 0, SEEK_SET) != 0 ||
2760Sstevel@tonic-gate read(fd, &psi, sizeof (psinfo_t)) !=
2770Sstevel@tonic-gate sizeof (psinfo_t)) {
2780Sstevel@tonic-gate log_framework(LOG_WARNING,
2790Sstevel@tonic-gate "couldn't get psinfo data for %s (%s); "
2800Sstevel@tonic-gate "assuming failed\n", wi->wi_fmri,
2810Sstevel@tonic-gate strerror(errno));
282*7219Srm88369 goto err_remove;
2830Sstevel@tonic-gate }
2840Sstevel@tonic-gate
2850Sstevel@tonic-gate if (psi.pr_nlwp != 0 ||
2860Sstevel@tonic-gate psi.pr_nzomb != 0 ||
2870Sstevel@tonic-gate psi.pr_lwp.pr_lwpid != 0) {
2880Sstevel@tonic-gate /*
2890Sstevel@tonic-gate * We have determined, in accordance with the
2900Sstevel@tonic-gate * definition in proc(4), this process is not a
2910Sstevel@tonic-gate * zombie. Reassociate.
2920Sstevel@tonic-gate */
2930Sstevel@tonic-gate if (port_associate(port_fd, PORT_SOURCE_FD, fd,
294*7219Srm88369 0, wi))
2950Sstevel@tonic-gate log_error(LOG_WARNING,
2960Sstevel@tonic-gate "port_association of %d / %s "
2970Sstevel@tonic-gate "failed\n", fd, wi->wi_fmri);
2980Sstevel@tonic-gate continue;
2990Sstevel@tonic-gate }
3000Sstevel@tonic-gate } else if (
3010Sstevel@tonic-gate (pe.portev_events & POLLERR) == 0) {
3020Sstevel@tonic-gate if (port_associate(port_fd, PORT_SOURCE_FD, fd, 0, wi))
3030Sstevel@tonic-gate log_error(LOG_WARNING,
3040Sstevel@tonic-gate "port_association of %d / %s "
3050Sstevel@tonic-gate "failed\n", fd, wi->wi_fmri);
3060Sstevel@tonic-gate continue;
3070Sstevel@tonic-gate }
3080Sstevel@tonic-gate
3090Sstevel@tonic-gate err_remove:
3100Sstevel@tonic-gate wait_remove(wi, 0);
3110Sstevel@tonic-gate }
3120Sstevel@tonic-gate
3130Sstevel@tonic-gate /*LINTED E_FUNC_HAS_NO_RETURN_STMT*/
3140Sstevel@tonic-gate }
3150Sstevel@tonic-gate
3160Sstevel@tonic-gate void
wait_prefork()3170Sstevel@tonic-gate wait_prefork()
3180Sstevel@tonic-gate {
3190Sstevel@tonic-gate MUTEX_LOCK(&wait_info_lock);
3200Sstevel@tonic-gate }
3210Sstevel@tonic-gate
3220Sstevel@tonic-gate void
wait_postfork(pid_t pid)3230Sstevel@tonic-gate wait_postfork(pid_t pid)
3240Sstevel@tonic-gate {
3250Sstevel@tonic-gate wait_info_t *wi;
3260Sstevel@tonic-gate
3270Sstevel@tonic-gate MUTEX_UNLOCK(&wait_info_lock);
3280Sstevel@tonic-gate
3290Sstevel@tonic-gate if (pid != 0)
3300Sstevel@tonic-gate return;
3310Sstevel@tonic-gate
3320Sstevel@tonic-gate /*
3330Sstevel@tonic-gate * Close all of the child's wait-related fds. The wait_thread() is
3340Sstevel@tonic-gate * gone, so no need to worry about returning events. We always exec(2)
3350Sstevel@tonic-gate * after a fork request, so we needn't free the list elements
3360Sstevel@tonic-gate * themselves.
3370Sstevel@tonic-gate */
3380Sstevel@tonic-gate
3390Sstevel@tonic-gate for (wi = uu_list_first(wait_info_list);
3400Sstevel@tonic-gate wi != NULL;
3410Sstevel@tonic-gate wi = uu_list_next(wait_info_list, wi)) {
3420Sstevel@tonic-gate if (wi->wi_fd != -1)
3430Sstevel@tonic-gate startd_close(wi->wi_fd);
3440Sstevel@tonic-gate }
3450Sstevel@tonic-gate
3460Sstevel@tonic-gate startd_close(port_fd);
3470Sstevel@tonic-gate
3480Sstevel@tonic-gate (void) setrlimit(RLIMIT_NOFILE, &init_fd_rlimit);
3490Sstevel@tonic-gate }
3500Sstevel@tonic-gate
3510Sstevel@tonic-gate void
wait_init()3520Sstevel@tonic-gate wait_init()
3530Sstevel@tonic-gate {
3540Sstevel@tonic-gate struct rlimit fd_new;
3550Sstevel@tonic-gate
3560Sstevel@tonic-gate (void) getrlimit(RLIMIT_NOFILE, &init_fd_rlimit);
3570Sstevel@tonic-gate (void) getrlimit(RLIMIT_NOFILE, &fd_new);
3580Sstevel@tonic-gate
3590Sstevel@tonic-gate fd_new.rlim_max = fd_new.rlim_cur = WAIT_FILES;
3600Sstevel@tonic-gate
3610Sstevel@tonic-gate (void) setrlimit(RLIMIT_NOFILE, &fd_new);
3620Sstevel@tonic-gate
3630Sstevel@tonic-gate if ((port_fd = port_create()) == -1)
3640Sstevel@tonic-gate uu_die("wait_init couldn't port_create");
3650Sstevel@tonic-gate
3660Sstevel@tonic-gate wait_info_pool = uu_list_pool_create("wait_info", sizeof (wait_info_t),
3670Sstevel@tonic-gate offsetof(wait_info_t, wi_link), NULL, UU_LIST_POOL_DEBUG);
3680Sstevel@tonic-gate if (wait_info_pool == NULL)
3690Sstevel@tonic-gate uu_die("wait_init couldn't create wait_info_pool");
3700Sstevel@tonic-gate
3710Sstevel@tonic-gate wait_info_list = uu_list_create(wait_info_pool, wait_info_list, 0);
3720Sstevel@tonic-gate if (wait_info_list == NULL)
3730Sstevel@tonic-gate uu_die("wait_init couldn't create wait_info_list");
3740Sstevel@tonic-gate
3750Sstevel@tonic-gate (void) pthread_mutex_init(&wait_info_lock, &mutex_attrs);
3760Sstevel@tonic-gate }
377