1*0Sstevel@tonic-gate /* 2*0Sstevel@tonic-gate * CDDL HEADER START 3*0Sstevel@tonic-gate * 4*0Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*0Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*0Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*0Sstevel@tonic-gate * with the License. 8*0Sstevel@tonic-gate * 9*0Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*0Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*0Sstevel@tonic-gate * See the License for the specific language governing permissions 12*0Sstevel@tonic-gate * and limitations under the License. 13*0Sstevel@tonic-gate * 14*0Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*0Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*0Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*0Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*0Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*0Sstevel@tonic-gate * 20*0Sstevel@tonic-gate * CDDL HEADER END 21*0Sstevel@tonic-gate */ 22*0Sstevel@tonic-gate /* 23*0Sstevel@tonic-gate * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24*0Sstevel@tonic-gate * Use is subject to license terms. 25*0Sstevel@tonic-gate */ 26*0Sstevel@tonic-gate 27*0Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 28*0Sstevel@tonic-gate 29*0Sstevel@tonic-gate /* 30*0Sstevel@tonic-gate * wait.c - asynchronous monitoring of "wait registered" start methods 31*0Sstevel@tonic-gate * 32*0Sstevel@tonic-gate * Use event ports to poll on the set of fds representing the /proc/[pid]/psinfo 33*0Sstevel@tonic-gate * files. If one of these fds returns an event, then we inform the restarter 34*0Sstevel@tonic-gate * that it has stopped. 35*0Sstevel@tonic-gate * 36*0Sstevel@tonic-gate * The wait_info_list holds the series of processes currently being monitored 37*0Sstevel@tonic-gate * for exit. The wi_fd member, which contains the file descriptor of the psinfo 38*0Sstevel@tonic-gate * file being polled upon ("event ported upon"), will be set to -1 if the file 39*0Sstevel@tonic-gate * descriptor is inactive (already closed or not yet opened). 40*0Sstevel@tonic-gate */ 41*0Sstevel@tonic-gate 42*0Sstevel@tonic-gate #ifdef _FILE_OFFSET_BITS 43*0Sstevel@tonic-gate #undef _FILE_OFFSET_BITS 44*0Sstevel@tonic-gate #endif /* _FILE_OFFSET_BITS */ 45*0Sstevel@tonic-gate 46*0Sstevel@tonic-gate #include <sys/resource.h> 47*0Sstevel@tonic-gate #include <sys/stat.h> 48*0Sstevel@tonic-gate #include <sys/types.h> 49*0Sstevel@tonic-gate #include <sys/uio.h> 50*0Sstevel@tonic-gate #include <sys/wait.h> 51*0Sstevel@tonic-gate 52*0Sstevel@tonic-gate #include <assert.h> 53*0Sstevel@tonic-gate #include <errno.h> 54*0Sstevel@tonic-gate #include <fcntl.h> 55*0Sstevel@tonic-gate #include <libuutil.h> 56*0Sstevel@tonic-gate #include <poll.h> 57*0Sstevel@tonic-gate #include <port.h> 58*0Sstevel@tonic-gate #include <pthread.h> 59*0Sstevel@tonic-gate #include <procfs.h> 60*0Sstevel@tonic-gate #include <string.h> 61*0Sstevel@tonic-gate #include <stropts.h> 62*0Sstevel@tonic-gate #include <unistd.h> 63*0Sstevel@tonic-gate 64*0Sstevel@tonic-gate #include "startd.h" 65*0Sstevel@tonic-gate 66*0Sstevel@tonic-gate #define WAIT_FILES 262144 /* reasonably high maximum */ 67*0Sstevel@tonic-gate 68*0Sstevel@tonic-gate static int port_fd; 69*0Sstevel@tonic-gate static scf_handle_t *wait_hndl; 70*0Sstevel@tonic-gate static struct rlimit init_fd_rlimit; 71*0Sstevel@tonic-gate 72*0Sstevel@tonic-gate static uu_list_pool_t *wait_info_pool; 73*0Sstevel@tonic-gate static uu_list_t *wait_info_list; 74*0Sstevel@tonic-gate 75*0Sstevel@tonic-gate static pthread_mutex_t wait_info_lock; 76*0Sstevel@tonic-gate 77*0Sstevel@tonic-gate /* 78*0Sstevel@tonic-gate * void wait_remove(wait_info_t *, int) 79*0Sstevel@tonic-gate * Remove the given wait_info structure from our list, performing various 80*0Sstevel@tonic-gate * cleanup operations along the way. If the direct flag is false (meaning 81*0Sstevel@tonic-gate * that we are being called with from restarter instance list context), then 82*0Sstevel@tonic-gate * notify the restarter that the associated instance has exited. 83*0Sstevel@tonic-gate * 84*0Sstevel@tonic-gate * Since we may no longer be the startd that started this process, we only are 85*0Sstevel@tonic-gate * concerned with a waitpid(3C) failure if the wi_parent field is non-zero. 86*0Sstevel@tonic-gate */ 87*0Sstevel@tonic-gate static void 88*0Sstevel@tonic-gate wait_remove(wait_info_t *wi, int direct) 89*0Sstevel@tonic-gate { 90*0Sstevel@tonic-gate int status; 91*0Sstevel@tonic-gate 92*0Sstevel@tonic-gate if (waitpid(wi->wi_pid, &status, 0) == -1) { 93*0Sstevel@tonic-gate if (wi->wi_parent) 94*0Sstevel@tonic-gate log_framework(LOG_INFO, 95*0Sstevel@tonic-gate "instance %s waitpid failure: %s\n", wi->wi_fmri, 96*0Sstevel@tonic-gate strerror(errno)); 97*0Sstevel@tonic-gate } else { 98*0Sstevel@tonic-gate if (WEXITSTATUS(status) != 0) { 99*0Sstevel@tonic-gate log_framework(LOG_NOTICE, 100*0Sstevel@tonic-gate "instance %s exited with status %d\n", wi->wi_fmri, 101*0Sstevel@tonic-gate WEXITSTATUS(status)); 102*0Sstevel@tonic-gate } 103*0Sstevel@tonic-gate } 104*0Sstevel@tonic-gate 105*0Sstevel@tonic-gate MUTEX_LOCK(&wait_info_lock); 106*0Sstevel@tonic-gate uu_list_remove(wait_info_list, wi); 107*0Sstevel@tonic-gate MUTEX_UNLOCK(&wait_info_lock); 108*0Sstevel@tonic-gate 109*0Sstevel@tonic-gate /* 110*0Sstevel@tonic-gate * Make an attempt to clear out any utmpx record associated with this 111*0Sstevel@tonic-gate * PID. 112*0Sstevel@tonic-gate */ 113*0Sstevel@tonic-gate utmpx_mark_dead(wi->wi_pid, status, B_FALSE); 114*0Sstevel@tonic-gate 115*0Sstevel@tonic-gate if (!direct) { 116*0Sstevel@tonic-gate /* 117*0Sstevel@tonic-gate * Bind wait_hndl lazily. 118*0Sstevel@tonic-gate */ 119*0Sstevel@tonic-gate if (wait_hndl == NULL) { 120*0Sstevel@tonic-gate for (wait_hndl = 121*0Sstevel@tonic-gate libscf_handle_create_bound(SCF_VERSION); 122*0Sstevel@tonic-gate wait_hndl == NULL; 123*0Sstevel@tonic-gate wait_hndl = 124*0Sstevel@tonic-gate libscf_handle_create_bound(SCF_VERSION)) { 125*0Sstevel@tonic-gate log_error(LOG_INFO, "[wait_remove] Unable to " 126*0Sstevel@tonic-gate "bind a new repository handle: %s\n", 127*0Sstevel@tonic-gate scf_strerror(scf_error())); 128*0Sstevel@tonic-gate (void) sleep(2); 129*0Sstevel@tonic-gate } 130*0Sstevel@tonic-gate } 131*0Sstevel@tonic-gate 132*0Sstevel@tonic-gate log_framework(LOG_DEBUG, 133*0Sstevel@tonic-gate "wait_remove requesting stop of %s\n", wi->wi_fmri); 134*0Sstevel@tonic-gate (void) stop_instance_fmri(wait_hndl, wi->wi_fmri, RSTOP_EXIT); 135*0Sstevel@tonic-gate } 136*0Sstevel@tonic-gate 137*0Sstevel@tonic-gate uu_list_node_fini(wi, &wi->wi_link, wait_info_pool); 138*0Sstevel@tonic-gate startd_free(wi, sizeof (wait_info_t)); 139*0Sstevel@tonic-gate } 140*0Sstevel@tonic-gate 141*0Sstevel@tonic-gate /* 142*0Sstevel@tonic-gate * int wait_register(pid_t, char *, int, int) 143*0Sstevel@tonic-gate * wait_register is called after we have called fork(2), and know which pid we 144*0Sstevel@tonic-gate * wish to monitor. However, since the child may have already exited by the 145*0Sstevel@tonic-gate * time we are called, we must handle the error cases from open(2) 146*0Sstevel@tonic-gate * appropriately. The am_parent flag is recorded to handle waitpid(2) 147*0Sstevel@tonic-gate * behaviour on removal; similarly, the direct flag is passed through to a 148*0Sstevel@tonic-gate * potential call to wait_remove() to govern its behaviour in different 149*0Sstevel@tonic-gate * contexts. 150*0Sstevel@tonic-gate * 151*0Sstevel@tonic-gate * Returns 0 if registration successful, 1 if child pid did not exist, and -1 152*0Sstevel@tonic-gate * if a different error occurred. 153*0Sstevel@tonic-gate */ 154*0Sstevel@tonic-gate int 155*0Sstevel@tonic-gate wait_register(pid_t pid, const char *inst_fmri, int am_parent, int direct) 156*0Sstevel@tonic-gate { 157*0Sstevel@tonic-gate char *fname = uu_msprintf("/proc/%ld/psinfo", pid); 158*0Sstevel@tonic-gate int fd; 159*0Sstevel@tonic-gate wait_info_t *wi; 160*0Sstevel@tonic-gate 161*0Sstevel@tonic-gate assert(pid != 0); 162*0Sstevel@tonic-gate 163*0Sstevel@tonic-gate if (fname == NULL) 164*0Sstevel@tonic-gate return (-1); 165*0Sstevel@tonic-gate 166*0Sstevel@tonic-gate wi = startd_alloc(sizeof (wait_info_t)); 167*0Sstevel@tonic-gate 168*0Sstevel@tonic-gate uu_list_node_init(wi, &wi->wi_link, wait_info_pool); 169*0Sstevel@tonic-gate 170*0Sstevel@tonic-gate wi->wi_fd = -1; 171*0Sstevel@tonic-gate wi->wi_pid = pid; 172*0Sstevel@tonic-gate wi->wi_fmri = inst_fmri; 173*0Sstevel@tonic-gate wi->wi_parent = am_parent; 174*0Sstevel@tonic-gate 175*0Sstevel@tonic-gate MUTEX_LOCK(&wait_info_lock); 176*0Sstevel@tonic-gate (void) uu_list_insert_before(wait_info_list, NULL, wi); 177*0Sstevel@tonic-gate MUTEX_UNLOCK(&wait_info_lock); 178*0Sstevel@tonic-gate 179*0Sstevel@tonic-gate if ((fd = open(fname, O_RDONLY)) == -1) { 180*0Sstevel@tonic-gate if (errno == ENOENT) { 181*0Sstevel@tonic-gate /* 182*0Sstevel@tonic-gate * Child has already exited. 183*0Sstevel@tonic-gate */ 184*0Sstevel@tonic-gate wait_remove(wi, direct); 185*0Sstevel@tonic-gate uu_free(fname); 186*0Sstevel@tonic-gate return (1); 187*0Sstevel@tonic-gate } else { 188*0Sstevel@tonic-gate log_error(LOG_WARNING, 189*0Sstevel@tonic-gate "open %s failed; not monitoring %s: %s\n", fname, 190*0Sstevel@tonic-gate inst_fmri, strerror(errno)); 191*0Sstevel@tonic-gate uu_free(fname); 192*0Sstevel@tonic-gate return (-1); 193*0Sstevel@tonic-gate } 194*0Sstevel@tonic-gate } 195*0Sstevel@tonic-gate 196*0Sstevel@tonic-gate uu_free(fname); 197*0Sstevel@tonic-gate 198*0Sstevel@tonic-gate wi->wi_fd = fd; 199*0Sstevel@tonic-gate 200*0Sstevel@tonic-gate if (port_associate(port_fd, PORT_SOURCE_FD, fd, 0, wi)) { 201*0Sstevel@tonic-gate log_error(LOG_WARNING, 202*0Sstevel@tonic-gate "initial port_association of %d / %s failed: %s\n", fd, 203*0Sstevel@tonic-gate inst_fmri, strerror(errno)); 204*0Sstevel@tonic-gate return (-1); 205*0Sstevel@tonic-gate } 206*0Sstevel@tonic-gate 207*0Sstevel@tonic-gate log_framework(LOG_DEBUG, "monitoring PID %ld on fd %d (%s)\n", pid, fd, 208*0Sstevel@tonic-gate inst_fmri); 209*0Sstevel@tonic-gate 210*0Sstevel@tonic-gate return (0); 211*0Sstevel@tonic-gate } 212*0Sstevel@tonic-gate 213*0Sstevel@tonic-gate /*ARGSUSED*/ 214*0Sstevel@tonic-gate void * 215*0Sstevel@tonic-gate wait_thread(void *args) 216*0Sstevel@tonic-gate { 217*0Sstevel@tonic-gate for (;;) { 218*0Sstevel@tonic-gate port_event_t pe; 219*0Sstevel@tonic-gate int fd; 220*0Sstevel@tonic-gate wait_info_t *wi; 221*0Sstevel@tonic-gate struct timespec ts; 222*0Sstevel@tonic-gate 223*0Sstevel@tonic-gate ts.tv_sec = 1; 224*0Sstevel@tonic-gate ts.tv_nsec = 0; 225*0Sstevel@tonic-gate 226*0Sstevel@tonic-gate if (port_get(port_fd, &pe, &ts) == -1) 227*0Sstevel@tonic-gate if (errno == EINTR || errno == ETIME) 228*0Sstevel@tonic-gate continue; 229*0Sstevel@tonic-gate else 230*0Sstevel@tonic-gate log_error(LOG_WARNING, 231*0Sstevel@tonic-gate "port_get returned %s\n", strerror(errno)); 232*0Sstevel@tonic-gate 233*0Sstevel@tonic-gate fd = pe.portev_object; 234*0Sstevel@tonic-gate wi = pe.portev_user; 235*0Sstevel@tonic-gate 236*0Sstevel@tonic-gate if ((pe.portev_events & POLLHUP) == POLLHUP) { 237*0Sstevel@tonic-gate psinfo_t psi; 238*0Sstevel@tonic-gate 239*0Sstevel@tonic-gate if (lseek(fd, 0, SEEK_SET) != 0 || 240*0Sstevel@tonic-gate read(fd, &psi, sizeof (psinfo_t)) != 241*0Sstevel@tonic-gate sizeof (psinfo_t)) { 242*0Sstevel@tonic-gate log_framework(LOG_WARNING, 243*0Sstevel@tonic-gate "couldn't get psinfo data for %s (%s); " 244*0Sstevel@tonic-gate "assuming failed\n", wi->wi_fmri, 245*0Sstevel@tonic-gate strerror(errno)); 246*0Sstevel@tonic-gate goto err_remove; 247*0Sstevel@tonic-gate } 248*0Sstevel@tonic-gate 249*0Sstevel@tonic-gate if (psi.pr_nlwp != 0 || 250*0Sstevel@tonic-gate psi.pr_nzomb != 0 || 251*0Sstevel@tonic-gate psi.pr_lwp.pr_lwpid != 0) { 252*0Sstevel@tonic-gate /* 253*0Sstevel@tonic-gate * We have determined, in accordance with the 254*0Sstevel@tonic-gate * definition in proc(4), this process is not a 255*0Sstevel@tonic-gate * zombie. Reassociate. 256*0Sstevel@tonic-gate */ 257*0Sstevel@tonic-gate if (port_associate(port_fd, PORT_SOURCE_FD, fd, 258*0Sstevel@tonic-gate 0, wi)) 259*0Sstevel@tonic-gate log_error(LOG_WARNING, 260*0Sstevel@tonic-gate "port_association of %d / %s " 261*0Sstevel@tonic-gate "failed\n", fd, wi->wi_fmri); 262*0Sstevel@tonic-gate continue; 263*0Sstevel@tonic-gate } 264*0Sstevel@tonic-gate } else if ( 265*0Sstevel@tonic-gate (pe.portev_events & POLLERR) == 0) { 266*0Sstevel@tonic-gate if (port_associate(port_fd, PORT_SOURCE_FD, fd, 0, wi)) 267*0Sstevel@tonic-gate log_error(LOG_WARNING, 268*0Sstevel@tonic-gate "port_association of %d / %s " 269*0Sstevel@tonic-gate "failed\n", fd, wi->wi_fmri); 270*0Sstevel@tonic-gate continue; 271*0Sstevel@tonic-gate } 272*0Sstevel@tonic-gate 273*0Sstevel@tonic-gate err_remove: 274*0Sstevel@tonic-gate startd_close(fd); 275*0Sstevel@tonic-gate wi->wi_fd = -1; 276*0Sstevel@tonic-gate 277*0Sstevel@tonic-gate wait_remove(wi, 0); 278*0Sstevel@tonic-gate } 279*0Sstevel@tonic-gate 280*0Sstevel@tonic-gate /*LINTED E_FUNC_HAS_NO_RETURN_STMT*/ 281*0Sstevel@tonic-gate } 282*0Sstevel@tonic-gate 283*0Sstevel@tonic-gate void 284*0Sstevel@tonic-gate wait_prefork() 285*0Sstevel@tonic-gate { 286*0Sstevel@tonic-gate MUTEX_LOCK(&wait_info_lock); 287*0Sstevel@tonic-gate } 288*0Sstevel@tonic-gate 289*0Sstevel@tonic-gate void 290*0Sstevel@tonic-gate wait_postfork(pid_t pid) 291*0Sstevel@tonic-gate { 292*0Sstevel@tonic-gate wait_info_t *wi; 293*0Sstevel@tonic-gate 294*0Sstevel@tonic-gate MUTEX_UNLOCK(&wait_info_lock); 295*0Sstevel@tonic-gate 296*0Sstevel@tonic-gate if (pid != 0) 297*0Sstevel@tonic-gate return; 298*0Sstevel@tonic-gate 299*0Sstevel@tonic-gate /* 300*0Sstevel@tonic-gate * Close all of the child's wait-related fds. The wait_thread() is 301*0Sstevel@tonic-gate * gone, so no need to worry about returning events. We always exec(2) 302*0Sstevel@tonic-gate * after a fork request, so we needn't free the list elements 303*0Sstevel@tonic-gate * themselves. 304*0Sstevel@tonic-gate */ 305*0Sstevel@tonic-gate 306*0Sstevel@tonic-gate for (wi = uu_list_first(wait_info_list); 307*0Sstevel@tonic-gate wi != NULL; 308*0Sstevel@tonic-gate wi = uu_list_next(wait_info_list, wi)) { 309*0Sstevel@tonic-gate if (wi->wi_fd != -1) 310*0Sstevel@tonic-gate startd_close(wi->wi_fd); 311*0Sstevel@tonic-gate } 312*0Sstevel@tonic-gate 313*0Sstevel@tonic-gate startd_close(port_fd); 314*0Sstevel@tonic-gate 315*0Sstevel@tonic-gate (void) setrlimit(RLIMIT_NOFILE, &init_fd_rlimit); 316*0Sstevel@tonic-gate } 317*0Sstevel@tonic-gate 318*0Sstevel@tonic-gate void 319*0Sstevel@tonic-gate wait_init() 320*0Sstevel@tonic-gate { 321*0Sstevel@tonic-gate struct rlimit fd_new; 322*0Sstevel@tonic-gate 323*0Sstevel@tonic-gate (void) getrlimit(RLIMIT_NOFILE, &init_fd_rlimit); 324*0Sstevel@tonic-gate (void) getrlimit(RLIMIT_NOFILE, &fd_new); 325*0Sstevel@tonic-gate 326*0Sstevel@tonic-gate fd_new.rlim_max = fd_new.rlim_cur = WAIT_FILES; 327*0Sstevel@tonic-gate 328*0Sstevel@tonic-gate (void) setrlimit(RLIMIT_NOFILE, &fd_new); 329*0Sstevel@tonic-gate 330*0Sstevel@tonic-gate if ((port_fd = port_create()) == -1) 331*0Sstevel@tonic-gate uu_die("wait_init couldn't port_create"); 332*0Sstevel@tonic-gate 333*0Sstevel@tonic-gate wait_info_pool = uu_list_pool_create("wait_info", sizeof (wait_info_t), 334*0Sstevel@tonic-gate offsetof(wait_info_t, wi_link), NULL, UU_LIST_POOL_DEBUG); 335*0Sstevel@tonic-gate if (wait_info_pool == NULL) 336*0Sstevel@tonic-gate uu_die("wait_init couldn't create wait_info_pool"); 337*0Sstevel@tonic-gate 338*0Sstevel@tonic-gate wait_info_list = uu_list_create(wait_info_pool, wait_info_list, 0); 339*0Sstevel@tonic-gate if (wait_info_list == NULL) 340*0Sstevel@tonic-gate uu_die("wait_init couldn't create wait_info_list"); 341*0Sstevel@tonic-gate 342*0Sstevel@tonic-gate (void) pthread_mutex_init(&wait_info_lock, &mutex_attrs); 343*0Sstevel@tonic-gate } 344