10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 50Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 60Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 70Sstevel@tonic-gate * with the License. 80Sstevel@tonic-gate * 90Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 100Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 110Sstevel@tonic-gate * See the License for the specific language governing permissions 120Sstevel@tonic-gate * and limitations under the License. 130Sstevel@tonic-gate * 140Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 150Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 160Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 170Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 180Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 190Sstevel@tonic-gate * 200Sstevel@tonic-gate * CDDL HEADER END 210Sstevel@tonic-gate */ 220Sstevel@tonic-gate /* 23*111Srm88369 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 240Sstevel@tonic-gate * Use is subject to license terms. 250Sstevel@tonic-gate */ 260Sstevel@tonic-gate 270Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 280Sstevel@tonic-gate 290Sstevel@tonic-gate /* 300Sstevel@tonic-gate * wait.c - asynchronous monitoring of "wait registered" start methods 310Sstevel@tonic-gate * 320Sstevel@tonic-gate * Use event ports to poll on the set of fds representing the /proc/[pid]/psinfo 330Sstevel@tonic-gate * files. If one of these fds returns an event, then we inform the restarter 340Sstevel@tonic-gate * that it has stopped. 350Sstevel@tonic-gate * 360Sstevel@tonic-gate * The wait_info_list holds the series of processes currently being monitored 370Sstevel@tonic-gate * for exit. The wi_fd member, which contains the file descriptor of the psinfo 380Sstevel@tonic-gate * file being polled upon ("event ported upon"), will be set to -1 if the file 390Sstevel@tonic-gate * descriptor is inactive (already closed or not yet opened). 400Sstevel@tonic-gate */ 410Sstevel@tonic-gate 420Sstevel@tonic-gate #ifdef _FILE_OFFSET_BITS 430Sstevel@tonic-gate #undef _FILE_OFFSET_BITS 440Sstevel@tonic-gate #endif /* _FILE_OFFSET_BITS */ 450Sstevel@tonic-gate 460Sstevel@tonic-gate #include <sys/resource.h> 470Sstevel@tonic-gate #include <sys/stat.h> 480Sstevel@tonic-gate #include <sys/types.h> 490Sstevel@tonic-gate #include <sys/uio.h> 500Sstevel@tonic-gate #include <sys/wait.h> 510Sstevel@tonic-gate 520Sstevel@tonic-gate #include <assert.h> 530Sstevel@tonic-gate #include <errno.h> 540Sstevel@tonic-gate #include <fcntl.h> 550Sstevel@tonic-gate #include <libuutil.h> 560Sstevel@tonic-gate #include <poll.h> 570Sstevel@tonic-gate #include <port.h> 580Sstevel@tonic-gate #include <pthread.h> 590Sstevel@tonic-gate #include <procfs.h> 600Sstevel@tonic-gate #include <string.h> 610Sstevel@tonic-gate #include <stropts.h> 620Sstevel@tonic-gate #include <unistd.h> 630Sstevel@tonic-gate 640Sstevel@tonic-gate #include "startd.h" 650Sstevel@tonic-gate 660Sstevel@tonic-gate #define WAIT_FILES 262144 /* reasonably high maximum */ 670Sstevel@tonic-gate 680Sstevel@tonic-gate static int port_fd; 690Sstevel@tonic-gate static scf_handle_t *wait_hndl; 700Sstevel@tonic-gate static struct rlimit init_fd_rlimit; 710Sstevel@tonic-gate 720Sstevel@tonic-gate static uu_list_pool_t *wait_info_pool; 730Sstevel@tonic-gate static uu_list_t *wait_info_list; 740Sstevel@tonic-gate 750Sstevel@tonic-gate static pthread_mutex_t wait_info_lock; 760Sstevel@tonic-gate 770Sstevel@tonic-gate /* 780Sstevel@tonic-gate * void wait_remove(wait_info_t *, int) 790Sstevel@tonic-gate * Remove the given wait_info structure from our list, performing various 800Sstevel@tonic-gate * cleanup operations along the way. If the direct flag is false (meaning 810Sstevel@tonic-gate * that we are being called with from restarter instance list context), then 820Sstevel@tonic-gate * notify the restarter that the associated instance has exited. 830Sstevel@tonic-gate * 840Sstevel@tonic-gate * Since we may no longer be the startd that started this process, we only are 850Sstevel@tonic-gate * concerned with a waitpid(3C) failure if the wi_parent field is non-zero. 860Sstevel@tonic-gate */ 870Sstevel@tonic-gate static void 880Sstevel@tonic-gate wait_remove(wait_info_t *wi, int direct) 890Sstevel@tonic-gate { 900Sstevel@tonic-gate int status; 910Sstevel@tonic-gate 920Sstevel@tonic-gate if (waitpid(wi->wi_pid, &status, 0) == -1) { 930Sstevel@tonic-gate if (wi->wi_parent) 940Sstevel@tonic-gate log_framework(LOG_INFO, 950Sstevel@tonic-gate "instance %s waitpid failure: %s\n", wi->wi_fmri, 960Sstevel@tonic-gate strerror(errno)); 970Sstevel@tonic-gate } else { 980Sstevel@tonic-gate if (WEXITSTATUS(status) != 0) { 990Sstevel@tonic-gate log_framework(LOG_NOTICE, 1000Sstevel@tonic-gate "instance %s exited with status %d\n", wi->wi_fmri, 1010Sstevel@tonic-gate WEXITSTATUS(status)); 1020Sstevel@tonic-gate } 1030Sstevel@tonic-gate } 1040Sstevel@tonic-gate 1050Sstevel@tonic-gate MUTEX_LOCK(&wait_info_lock); 106*111Srm88369 if (wi->wi_fd != -1) { 107*111Srm88369 startd_close(wi->wi_fd); 108*111Srm88369 wi->wi_fd = -1; 109*111Srm88369 } 1100Sstevel@tonic-gate uu_list_remove(wait_info_list, wi); 1110Sstevel@tonic-gate MUTEX_UNLOCK(&wait_info_lock); 1120Sstevel@tonic-gate 1130Sstevel@tonic-gate /* 1140Sstevel@tonic-gate * Make an attempt to clear out any utmpx record associated with this 1150Sstevel@tonic-gate * PID. 1160Sstevel@tonic-gate */ 1170Sstevel@tonic-gate utmpx_mark_dead(wi->wi_pid, status, B_FALSE); 1180Sstevel@tonic-gate 1190Sstevel@tonic-gate if (!direct) { 1200Sstevel@tonic-gate /* 1210Sstevel@tonic-gate * Bind wait_hndl lazily. 1220Sstevel@tonic-gate */ 1230Sstevel@tonic-gate if (wait_hndl == NULL) { 1240Sstevel@tonic-gate for (wait_hndl = 1250Sstevel@tonic-gate libscf_handle_create_bound(SCF_VERSION); 1260Sstevel@tonic-gate wait_hndl == NULL; 1270Sstevel@tonic-gate wait_hndl = 1280Sstevel@tonic-gate libscf_handle_create_bound(SCF_VERSION)) { 1290Sstevel@tonic-gate log_error(LOG_INFO, "[wait_remove] Unable to " 1300Sstevel@tonic-gate "bind a new repository handle: %s\n", 1310Sstevel@tonic-gate scf_strerror(scf_error())); 1320Sstevel@tonic-gate (void) sleep(2); 1330Sstevel@tonic-gate } 1340Sstevel@tonic-gate } 1350Sstevel@tonic-gate 1360Sstevel@tonic-gate log_framework(LOG_DEBUG, 1370Sstevel@tonic-gate "wait_remove requesting stop of %s\n", wi->wi_fmri); 1380Sstevel@tonic-gate (void) stop_instance_fmri(wait_hndl, wi->wi_fmri, RSTOP_EXIT); 1390Sstevel@tonic-gate } 1400Sstevel@tonic-gate 1410Sstevel@tonic-gate uu_list_node_fini(wi, &wi->wi_link, wait_info_pool); 1420Sstevel@tonic-gate startd_free(wi, sizeof (wait_info_t)); 1430Sstevel@tonic-gate } 1440Sstevel@tonic-gate 1450Sstevel@tonic-gate /* 1460Sstevel@tonic-gate * int wait_register(pid_t, char *, int, int) 1470Sstevel@tonic-gate * wait_register is called after we have called fork(2), and know which pid we 1480Sstevel@tonic-gate * wish to monitor. However, since the child may have already exited by the 1490Sstevel@tonic-gate * time we are called, we must handle the error cases from open(2) 1500Sstevel@tonic-gate * appropriately. The am_parent flag is recorded to handle waitpid(2) 1510Sstevel@tonic-gate * behaviour on removal; similarly, the direct flag is passed through to a 1520Sstevel@tonic-gate * potential call to wait_remove() to govern its behaviour in different 1530Sstevel@tonic-gate * contexts. 1540Sstevel@tonic-gate * 1550Sstevel@tonic-gate * Returns 0 if registration successful, 1 if child pid did not exist, and -1 1560Sstevel@tonic-gate * if a different error occurred. 1570Sstevel@tonic-gate */ 1580Sstevel@tonic-gate int 1590Sstevel@tonic-gate wait_register(pid_t pid, const char *inst_fmri, int am_parent, int direct) 1600Sstevel@tonic-gate { 1610Sstevel@tonic-gate char *fname = uu_msprintf("/proc/%ld/psinfo", pid); 1620Sstevel@tonic-gate int fd; 1630Sstevel@tonic-gate wait_info_t *wi; 1640Sstevel@tonic-gate 1650Sstevel@tonic-gate assert(pid != 0); 1660Sstevel@tonic-gate 1670Sstevel@tonic-gate if (fname == NULL) 1680Sstevel@tonic-gate return (-1); 1690Sstevel@tonic-gate 1700Sstevel@tonic-gate wi = startd_alloc(sizeof (wait_info_t)); 1710Sstevel@tonic-gate 1720Sstevel@tonic-gate uu_list_node_init(wi, &wi->wi_link, wait_info_pool); 1730Sstevel@tonic-gate 1740Sstevel@tonic-gate wi->wi_fd = -1; 1750Sstevel@tonic-gate wi->wi_pid = pid; 1760Sstevel@tonic-gate wi->wi_fmri = inst_fmri; 1770Sstevel@tonic-gate wi->wi_parent = am_parent; 1780Sstevel@tonic-gate 1790Sstevel@tonic-gate MUTEX_LOCK(&wait_info_lock); 1800Sstevel@tonic-gate (void) uu_list_insert_before(wait_info_list, NULL, wi); 1810Sstevel@tonic-gate MUTEX_UNLOCK(&wait_info_lock); 1820Sstevel@tonic-gate 1830Sstevel@tonic-gate if ((fd = open(fname, O_RDONLY)) == -1) { 1840Sstevel@tonic-gate if (errno == ENOENT) { 1850Sstevel@tonic-gate /* 1860Sstevel@tonic-gate * Child has already exited. 1870Sstevel@tonic-gate */ 1880Sstevel@tonic-gate wait_remove(wi, direct); 1890Sstevel@tonic-gate uu_free(fname); 1900Sstevel@tonic-gate return (1); 1910Sstevel@tonic-gate } else { 1920Sstevel@tonic-gate log_error(LOG_WARNING, 1930Sstevel@tonic-gate "open %s failed; not monitoring %s: %s\n", fname, 1940Sstevel@tonic-gate inst_fmri, strerror(errno)); 1950Sstevel@tonic-gate uu_free(fname); 1960Sstevel@tonic-gate return (-1); 1970Sstevel@tonic-gate } 1980Sstevel@tonic-gate } 1990Sstevel@tonic-gate 2000Sstevel@tonic-gate uu_free(fname); 2010Sstevel@tonic-gate 2020Sstevel@tonic-gate wi->wi_fd = fd; 2030Sstevel@tonic-gate 2040Sstevel@tonic-gate if (port_associate(port_fd, PORT_SOURCE_FD, fd, 0, wi)) { 2050Sstevel@tonic-gate log_error(LOG_WARNING, 2060Sstevel@tonic-gate "initial port_association of %d / %s failed: %s\n", fd, 2070Sstevel@tonic-gate inst_fmri, strerror(errno)); 2080Sstevel@tonic-gate return (-1); 2090Sstevel@tonic-gate } 2100Sstevel@tonic-gate 2110Sstevel@tonic-gate log_framework(LOG_DEBUG, "monitoring PID %ld on fd %d (%s)\n", pid, fd, 2120Sstevel@tonic-gate inst_fmri); 2130Sstevel@tonic-gate 2140Sstevel@tonic-gate return (0); 2150Sstevel@tonic-gate } 2160Sstevel@tonic-gate 2170Sstevel@tonic-gate /*ARGSUSED*/ 2180Sstevel@tonic-gate void * 2190Sstevel@tonic-gate wait_thread(void *args) 2200Sstevel@tonic-gate { 2210Sstevel@tonic-gate for (;;) { 2220Sstevel@tonic-gate port_event_t pe; 2230Sstevel@tonic-gate int fd; 2240Sstevel@tonic-gate wait_info_t *wi; 2250Sstevel@tonic-gate 226*111Srm88369 if (port_get(port_fd, &pe, NULL) != 0) { 227*111Srm88369 if (errno == EINTR) 2280Sstevel@tonic-gate continue; 229*111Srm88369 else { 2300Sstevel@tonic-gate log_error(LOG_WARNING, 231*111Srm88369 "port_get() failed with %s\n", 232*111Srm88369 strerror(errno)); 233*111Srm88369 bad_error("port_get", errno); 234*111Srm88369 } 235*111Srm88369 } 2360Sstevel@tonic-gate 2370Sstevel@tonic-gate fd = pe.portev_object; 2380Sstevel@tonic-gate wi = pe.portev_user; 239*111Srm88369 assert(wi != NULL); 240*111Srm88369 assert(fd == wi->wi_fd); 2410Sstevel@tonic-gate 2420Sstevel@tonic-gate if ((pe.portev_events & POLLHUP) == POLLHUP) { 2430Sstevel@tonic-gate psinfo_t psi; 2440Sstevel@tonic-gate 2450Sstevel@tonic-gate if (lseek(fd, 0, SEEK_SET) != 0 || 2460Sstevel@tonic-gate read(fd, &psi, sizeof (psinfo_t)) != 2470Sstevel@tonic-gate sizeof (psinfo_t)) { 2480Sstevel@tonic-gate log_framework(LOG_WARNING, 2490Sstevel@tonic-gate "couldn't get psinfo data for %s (%s); " 2500Sstevel@tonic-gate "assuming failed\n", wi->wi_fmri, 2510Sstevel@tonic-gate strerror(errno)); 2520Sstevel@tonic-gate goto err_remove; 2530Sstevel@tonic-gate } 2540Sstevel@tonic-gate 2550Sstevel@tonic-gate if (psi.pr_nlwp != 0 || 2560Sstevel@tonic-gate psi.pr_nzomb != 0 || 2570Sstevel@tonic-gate psi.pr_lwp.pr_lwpid != 0) { 2580Sstevel@tonic-gate /* 2590Sstevel@tonic-gate * We have determined, in accordance with the 2600Sstevel@tonic-gate * definition in proc(4), this process is not a 2610Sstevel@tonic-gate * zombie. Reassociate. 2620Sstevel@tonic-gate */ 2630Sstevel@tonic-gate if (port_associate(port_fd, PORT_SOURCE_FD, fd, 2640Sstevel@tonic-gate 0, wi)) 2650Sstevel@tonic-gate log_error(LOG_WARNING, 2660Sstevel@tonic-gate "port_association of %d / %s " 2670Sstevel@tonic-gate "failed\n", fd, wi->wi_fmri); 2680Sstevel@tonic-gate continue; 2690Sstevel@tonic-gate } 2700Sstevel@tonic-gate } else if ( 2710Sstevel@tonic-gate (pe.portev_events & POLLERR) == 0) { 2720Sstevel@tonic-gate if (port_associate(port_fd, PORT_SOURCE_FD, fd, 0, wi)) 2730Sstevel@tonic-gate log_error(LOG_WARNING, 2740Sstevel@tonic-gate "port_association of %d / %s " 2750Sstevel@tonic-gate "failed\n", fd, wi->wi_fmri); 2760Sstevel@tonic-gate continue; 2770Sstevel@tonic-gate } 2780Sstevel@tonic-gate 2790Sstevel@tonic-gate err_remove: 2800Sstevel@tonic-gate wait_remove(wi, 0); 2810Sstevel@tonic-gate } 2820Sstevel@tonic-gate 2830Sstevel@tonic-gate /*LINTED E_FUNC_HAS_NO_RETURN_STMT*/ 2840Sstevel@tonic-gate } 2850Sstevel@tonic-gate 2860Sstevel@tonic-gate void 2870Sstevel@tonic-gate wait_prefork() 2880Sstevel@tonic-gate { 2890Sstevel@tonic-gate MUTEX_LOCK(&wait_info_lock); 2900Sstevel@tonic-gate } 2910Sstevel@tonic-gate 2920Sstevel@tonic-gate void 2930Sstevel@tonic-gate wait_postfork(pid_t pid) 2940Sstevel@tonic-gate { 2950Sstevel@tonic-gate wait_info_t *wi; 2960Sstevel@tonic-gate 2970Sstevel@tonic-gate MUTEX_UNLOCK(&wait_info_lock); 2980Sstevel@tonic-gate 2990Sstevel@tonic-gate if (pid != 0) 3000Sstevel@tonic-gate return; 3010Sstevel@tonic-gate 3020Sstevel@tonic-gate /* 3030Sstevel@tonic-gate * Close all of the child's wait-related fds. The wait_thread() is 3040Sstevel@tonic-gate * gone, so no need to worry about returning events. We always exec(2) 3050Sstevel@tonic-gate * after a fork request, so we needn't free the list elements 3060Sstevel@tonic-gate * themselves. 3070Sstevel@tonic-gate */ 3080Sstevel@tonic-gate 3090Sstevel@tonic-gate for (wi = uu_list_first(wait_info_list); 3100Sstevel@tonic-gate wi != NULL; 3110Sstevel@tonic-gate wi = uu_list_next(wait_info_list, wi)) { 3120Sstevel@tonic-gate if (wi->wi_fd != -1) 3130Sstevel@tonic-gate startd_close(wi->wi_fd); 3140Sstevel@tonic-gate } 3150Sstevel@tonic-gate 3160Sstevel@tonic-gate startd_close(port_fd); 3170Sstevel@tonic-gate 3180Sstevel@tonic-gate (void) setrlimit(RLIMIT_NOFILE, &init_fd_rlimit); 3190Sstevel@tonic-gate } 3200Sstevel@tonic-gate 3210Sstevel@tonic-gate void 3220Sstevel@tonic-gate wait_init() 3230Sstevel@tonic-gate { 3240Sstevel@tonic-gate struct rlimit fd_new; 3250Sstevel@tonic-gate 3260Sstevel@tonic-gate (void) getrlimit(RLIMIT_NOFILE, &init_fd_rlimit); 3270Sstevel@tonic-gate (void) getrlimit(RLIMIT_NOFILE, &fd_new); 3280Sstevel@tonic-gate 3290Sstevel@tonic-gate fd_new.rlim_max = fd_new.rlim_cur = WAIT_FILES; 3300Sstevel@tonic-gate 3310Sstevel@tonic-gate (void) setrlimit(RLIMIT_NOFILE, &fd_new); 3320Sstevel@tonic-gate 3330Sstevel@tonic-gate if ((port_fd = port_create()) == -1) 3340Sstevel@tonic-gate uu_die("wait_init couldn't port_create"); 3350Sstevel@tonic-gate 3360Sstevel@tonic-gate wait_info_pool = uu_list_pool_create("wait_info", sizeof (wait_info_t), 3370Sstevel@tonic-gate offsetof(wait_info_t, wi_link), NULL, UU_LIST_POOL_DEBUG); 3380Sstevel@tonic-gate if (wait_info_pool == NULL) 3390Sstevel@tonic-gate uu_die("wait_init couldn't create wait_info_pool"); 3400Sstevel@tonic-gate 3410Sstevel@tonic-gate wait_info_list = uu_list_create(wait_info_pool, wait_info_list, 0); 3420Sstevel@tonic-gate if (wait_info_list == NULL) 3430Sstevel@tonic-gate uu_die("wait_init couldn't create wait_info_list"); 3440Sstevel@tonic-gate 3450Sstevel@tonic-gate (void) pthread_mutex_init(&wait_info_lock, &mutex_attrs); 3460Sstevel@tonic-gate } 347