xref: /onnv-gate/usr/src/cmd/svc/startd/restarter.c (revision 12967:ab9ae749152f)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
51273Sgm149974  * Common Development and Distribution License (the "License").
61273Sgm149974  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
2111466SRoger.Faulkner@Sun.COM 
220Sstevel@tonic-gate /*
2312412SSean.Wilcox@Sun.COM  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate 
260Sstevel@tonic-gate /*
270Sstevel@tonic-gate  * restarter.c - service manipulation
280Sstevel@tonic-gate  *
290Sstevel@tonic-gate  * This component manages services whose restarter is svc.startd, the standard
300Sstevel@tonic-gate  * restarter.  It translates restarter protocol events from the graph engine
310Sstevel@tonic-gate  * into actions on processes, as a delegated restarter would do.
320Sstevel@tonic-gate  *
330Sstevel@tonic-gate  * The master restarter manages a number of always-running threads:
340Sstevel@tonic-gate  *   - restarter event thread: events from the graph engine
350Sstevel@tonic-gate  *   - timeout thread: thread to fire queued timeouts
360Sstevel@tonic-gate  *   - contract thread: thread to handle contract events
370Sstevel@tonic-gate  *   - wait thread: thread to handle wait-based services
380Sstevel@tonic-gate  *
390Sstevel@tonic-gate  * The other threads are created as-needed:
400Sstevel@tonic-gate  *   - per-instance method threads
410Sstevel@tonic-gate  *   - per-instance event processing threads
420Sstevel@tonic-gate  *
430Sstevel@tonic-gate  * The interaction of all threads must result in the following conditions
440Sstevel@tonic-gate  * being satisfied (on a per-instance basis):
450Sstevel@tonic-gate  *   - restarter events must be processed in order
460Sstevel@tonic-gate  *   - method execution must be serialized
470Sstevel@tonic-gate  *   - instance delete must be held until outstanding methods are complete
480Sstevel@tonic-gate  *   - contract events shouldn't be processed while a method is running
490Sstevel@tonic-gate  *   - timeouts should fire even when a method is running
500Sstevel@tonic-gate  *
510Sstevel@tonic-gate  * Service instances are represented by restarter_inst_t's and are kept in the
520Sstevel@tonic-gate  * instance_list list.
530Sstevel@tonic-gate  *
540Sstevel@tonic-gate  * Service States
550Sstevel@tonic-gate  *   The current state of a service instance is kept in
560Sstevel@tonic-gate  *   restarter_inst_t->ri_i.i_state.  If transition to a new state could take
570Sstevel@tonic-gate  *   some time, then before we effect the transition we set
580Sstevel@tonic-gate  *   restarter_inst_t->ri_i.i_next_state to the target state, and afterwards we
590Sstevel@tonic-gate  *   rotate i_next_state to i_state and set i_next_state to
600Sstevel@tonic-gate  *   RESTARTER_STATE_NONE.  So usually i_next_state is _NONE when ri_lock is not
610Sstevel@tonic-gate  *   held.  The exception is when we launch methods, which are done with
620Sstevel@tonic-gate  *   a separate thread.  To keep any other threads from grabbing ri_lock before
630Sstevel@tonic-gate  *   method_thread() does, we set ri_method_thread to the thread id of the
640Sstevel@tonic-gate  *   method thread, and when it is nonzero any thread with a different thread id
650Sstevel@tonic-gate  *   waits on ri_method_cv.
660Sstevel@tonic-gate  *
670Sstevel@tonic-gate  * Method execution is serialized by blocking on ri_method_cv in
680Sstevel@tonic-gate  * inst_lookup_by_id() and waiting for a 0 value of ri_method_thread.  This
690Sstevel@tonic-gate  * also prevents the instance structure from being deleted until all
700Sstevel@tonic-gate  * outstanding operations such as method_thread() have finished.
710Sstevel@tonic-gate  *
720Sstevel@tonic-gate  * Lock ordering:
730Sstevel@tonic-gate  *
740Sstevel@tonic-gate  * dgraph_lock [can be held when taking:]
750Sstevel@tonic-gate  *   utmpx_lock
760Sstevel@tonic-gate  *   dictionary->dict_lock
770Sstevel@tonic-gate  *   st->st_load_lock
780Sstevel@tonic-gate  *   wait_info_lock
790Sstevel@tonic-gate  *   ru->restarter_update_lock
800Sstevel@tonic-gate  *     restarter_queue->rpeq_lock
810Sstevel@tonic-gate  *   instance_list.ril_lock
820Sstevel@tonic-gate  *     inst->ri_lock
830Sstevel@tonic-gate  *   st->st_configd_live_lock
840Sstevel@tonic-gate  *
850Sstevel@tonic-gate  * instance_list.ril_lock
860Sstevel@tonic-gate  *   graph_queue->gpeq_lock
870Sstevel@tonic-gate  *   gu->gu_lock
880Sstevel@tonic-gate  *   st->st_configd_live_lock
890Sstevel@tonic-gate  *   dictionary->dict_lock
900Sstevel@tonic-gate  *   inst->ri_lock
910Sstevel@tonic-gate  *     graph_queue->gpeq_lock
920Sstevel@tonic-gate  *     gu->gu_lock
930Sstevel@tonic-gate  *     tu->tu_lock
940Sstevel@tonic-gate  *     tq->tq_lock
950Sstevel@tonic-gate  *     inst->ri_queue_lock
960Sstevel@tonic-gate  *       wait_info_lock
970Sstevel@tonic-gate  *       bp->cb_lock
980Sstevel@tonic-gate  *     utmpx_lock
990Sstevel@tonic-gate  *
1000Sstevel@tonic-gate  * single_user_thread_lock
1010Sstevel@tonic-gate  *   wait_info_lock
1020Sstevel@tonic-gate  *   utmpx_lock
1030Sstevel@tonic-gate  *
1040Sstevel@tonic-gate  * gu_freeze_lock
1050Sstevel@tonic-gate  *
1060Sstevel@tonic-gate  * logbuf_mutex nests inside pretty much everything.
1070Sstevel@tonic-gate  */
1080Sstevel@tonic-gate 
1090Sstevel@tonic-gate #include <sys/contract/process.h>
1100Sstevel@tonic-gate #include <sys/ctfs.h>
1110Sstevel@tonic-gate #include <sys/stat.h>
1120Sstevel@tonic-gate #include <sys/time.h>
1130Sstevel@tonic-gate #include <sys/types.h>
1140Sstevel@tonic-gate #include <sys/uio.h>
1150Sstevel@tonic-gate #include <sys/wait.h>
1160Sstevel@tonic-gate #include <assert.h>
1170Sstevel@tonic-gate #include <errno.h>
1180Sstevel@tonic-gate #include <fcntl.h>
1190Sstevel@tonic-gate #include <libcontract.h>
1200Sstevel@tonic-gate #include <libcontract_priv.h>
1210Sstevel@tonic-gate #include <libintl.h>
1220Sstevel@tonic-gate #include <librestart.h>
1230Sstevel@tonic-gate #include <librestart_priv.h>
1240Sstevel@tonic-gate #include <libuutil.h>
1250Sstevel@tonic-gate #include <limits.h>
1260Sstevel@tonic-gate #include <poll.h>
1270Sstevel@tonic-gate #include <port.h>
1280Sstevel@tonic-gate #include <pthread.h>
1290Sstevel@tonic-gate #include <stdarg.h>
1300Sstevel@tonic-gate #include <stdio.h>
1310Sstevel@tonic-gate #include <strings.h>
1320Sstevel@tonic-gate #include <unistd.h>
1330Sstevel@tonic-gate 
1340Sstevel@tonic-gate #include "startd.h"
1350Sstevel@tonic-gate #include "protocol.h"
1360Sstevel@tonic-gate 
1370Sstevel@tonic-gate static uu_list_pool_t *restarter_instance_pool;
1380Sstevel@tonic-gate static restarter_instance_list_t instance_list;
1390Sstevel@tonic-gate 
1400Sstevel@tonic-gate static uu_list_pool_t *restarter_queue_pool;
1410Sstevel@tonic-gate 
14211482SSean.Wilcox@Sun.COM /*
14311482SSean.Wilcox@Sun.COM  * Function used to reset the restart times for an instance, when
14411482SSean.Wilcox@Sun.COM  * an administrative task comes along and essentially makes the times
14511482SSean.Wilcox@Sun.COM  * in this array ineffective.
14611482SSean.Wilcox@Sun.COM  */
14711482SSean.Wilcox@Sun.COM static void
reset_start_times(restarter_inst_t * inst)14811482SSean.Wilcox@Sun.COM reset_start_times(restarter_inst_t *inst)
14911482SSean.Wilcox@Sun.COM {
15011482SSean.Wilcox@Sun.COM 	inst->ri_start_index = 0;
15111482SSean.Wilcox@Sun.COM 	bzero(inst->ri_start_time, sizeof (inst->ri_start_time));
15211482SSean.Wilcox@Sun.COM }
15311482SSean.Wilcox@Sun.COM 
1540Sstevel@tonic-gate /*ARGSUSED*/
1550Sstevel@tonic-gate static int
restarter_instance_compare(const void * lc_arg,const void * rc_arg,void * private)1560Sstevel@tonic-gate restarter_instance_compare(const void *lc_arg, const void *rc_arg,
1570Sstevel@tonic-gate     void *private)
1580Sstevel@tonic-gate {
1590Sstevel@tonic-gate 	int lc_id = ((const restarter_inst_t *)lc_arg)->ri_id;
1600Sstevel@tonic-gate 	int rc_id = *(int *)rc_arg;
1610Sstevel@tonic-gate 
1620Sstevel@tonic-gate 	if (lc_id > rc_id)
1630Sstevel@tonic-gate 		return (1);
1640Sstevel@tonic-gate 	if (lc_id < rc_id)
1650Sstevel@tonic-gate 		return (-1);
1660Sstevel@tonic-gate 	return (0);
1670Sstevel@tonic-gate }
1680Sstevel@tonic-gate 
1690Sstevel@tonic-gate static restarter_inst_t *
inst_lookup_by_name(const char * name)1700Sstevel@tonic-gate inst_lookup_by_name(const char *name)
1710Sstevel@tonic-gate {
1720Sstevel@tonic-gate 	int id;
1730Sstevel@tonic-gate 
1740Sstevel@tonic-gate 	id = dict_lookup_byname(name);
1750Sstevel@tonic-gate 	if (id == -1)
1760Sstevel@tonic-gate 		return (NULL);
1770Sstevel@tonic-gate 
1780Sstevel@tonic-gate 	return (inst_lookup_by_id(id));
1790Sstevel@tonic-gate }
1800Sstevel@tonic-gate 
1810Sstevel@tonic-gate restarter_inst_t *
inst_lookup_by_id(int id)1820Sstevel@tonic-gate inst_lookup_by_id(int id)
1830Sstevel@tonic-gate {
1840Sstevel@tonic-gate 	restarter_inst_t *inst;
1850Sstevel@tonic-gate 
1860Sstevel@tonic-gate 	MUTEX_LOCK(&instance_list.ril_lock);
1870Sstevel@tonic-gate 	inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
1880Sstevel@tonic-gate 	if (inst != NULL)
1890Sstevel@tonic-gate 		MUTEX_LOCK(&inst->ri_lock);
1900Sstevel@tonic-gate 	MUTEX_UNLOCK(&instance_list.ril_lock);
1910Sstevel@tonic-gate 
1920Sstevel@tonic-gate 	if (inst != NULL) {
1930Sstevel@tonic-gate 		while (inst->ri_method_thread != 0 &&
1940Sstevel@tonic-gate 		    !pthread_equal(inst->ri_method_thread, pthread_self())) {
1950Sstevel@tonic-gate 			++inst->ri_method_waiters;
1960Sstevel@tonic-gate 			(void) pthread_cond_wait(&inst->ri_method_cv,
1970Sstevel@tonic-gate 			    &inst->ri_lock);
1980Sstevel@tonic-gate 			assert(inst->ri_method_waiters > 0);
1990Sstevel@tonic-gate 			--inst->ri_method_waiters;
2000Sstevel@tonic-gate 		}
2010Sstevel@tonic-gate 	}
2020Sstevel@tonic-gate 
2030Sstevel@tonic-gate 	return (inst);
2040Sstevel@tonic-gate }
2050Sstevel@tonic-gate 
2060Sstevel@tonic-gate static restarter_inst_t *
inst_lookup_queue(const char * name)2070Sstevel@tonic-gate inst_lookup_queue(const char *name)
2080Sstevel@tonic-gate {
2090Sstevel@tonic-gate 	int id;
2100Sstevel@tonic-gate 	restarter_inst_t *inst;
2110Sstevel@tonic-gate 
2120Sstevel@tonic-gate 	id = dict_lookup_byname(name);
2130Sstevel@tonic-gate 	if (id == -1)
2140Sstevel@tonic-gate 		return (NULL);
2150Sstevel@tonic-gate 
2160Sstevel@tonic-gate 	MUTEX_LOCK(&instance_list.ril_lock);
2170Sstevel@tonic-gate 	inst = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
2180Sstevel@tonic-gate 	if (inst != NULL)
2190Sstevel@tonic-gate 		MUTEX_LOCK(&inst->ri_queue_lock);
2200Sstevel@tonic-gate 	MUTEX_UNLOCK(&instance_list.ril_lock);
2210Sstevel@tonic-gate 
2220Sstevel@tonic-gate 	return (inst);
2230Sstevel@tonic-gate }
2240Sstevel@tonic-gate 
2250Sstevel@tonic-gate const char *
service_style(int flags)2260Sstevel@tonic-gate service_style(int flags)
2270Sstevel@tonic-gate {
2280Sstevel@tonic-gate 	switch (flags & RINST_STYLE_MASK) {
2290Sstevel@tonic-gate 	case RINST_CONTRACT:	return ("contract");
2300Sstevel@tonic-gate 	case RINST_TRANSIENT:	return ("transient");
2310Sstevel@tonic-gate 	case RINST_WAIT:	return ("wait");
2320Sstevel@tonic-gate 
2330Sstevel@tonic-gate 	default:
2340Sstevel@tonic-gate #ifndef NDEBUG
2350Sstevel@tonic-gate 		uu_warn("%s:%d: Bad flags 0x%x.\n", __FILE__, __LINE__, flags);
2360Sstevel@tonic-gate #endif
2370Sstevel@tonic-gate 		abort();
2380Sstevel@tonic-gate 		/* NOTREACHED */
2390Sstevel@tonic-gate 	}
2400Sstevel@tonic-gate }
2410Sstevel@tonic-gate 
2420Sstevel@tonic-gate /*
2430Sstevel@tonic-gate  * Fails with ECONNABORTED or ECANCELED.
2440Sstevel@tonic-gate  */
2450Sstevel@tonic-gate static int
check_contract(restarter_inst_t * inst,boolean_t primary,scf_instance_t * scf_inst)2460Sstevel@tonic-gate check_contract(restarter_inst_t *inst, boolean_t primary,
2470Sstevel@tonic-gate     scf_instance_t *scf_inst)
2480Sstevel@tonic-gate {
2490Sstevel@tonic-gate 	ctid_t *ctidp;
2500Sstevel@tonic-gate 	int fd, r;
2510Sstevel@tonic-gate 
2520Sstevel@tonic-gate 	ctidp = primary ? &inst->ri_i.i_primary_ctid :
2530Sstevel@tonic-gate 	    &inst->ri_i.i_transient_ctid;
2540Sstevel@tonic-gate 
2550Sstevel@tonic-gate 	assert(*ctidp >= 1);
2560Sstevel@tonic-gate 
2570Sstevel@tonic-gate 	fd = contract_open(*ctidp, NULL, "status", O_RDONLY);
2580Sstevel@tonic-gate 	if (fd >= 0) {
2590Sstevel@tonic-gate 		r = close(fd);
2600Sstevel@tonic-gate 		assert(r == 0);
2610Sstevel@tonic-gate 		return (0);
2620Sstevel@tonic-gate 	}
2630Sstevel@tonic-gate 
2640Sstevel@tonic-gate 	r = restarter_remove_contract(scf_inst, *ctidp, primary ?
2650Sstevel@tonic-gate 	    RESTARTER_CONTRACT_PRIMARY : RESTARTER_CONTRACT_TRANSIENT);
2660Sstevel@tonic-gate 	switch (r) {
2670Sstevel@tonic-gate 	case 0:
2680Sstevel@tonic-gate 	case ECONNABORTED:
2690Sstevel@tonic-gate 	case ECANCELED:
2700Sstevel@tonic-gate 		*ctidp = 0;
2710Sstevel@tonic-gate 		return (r);
2720Sstevel@tonic-gate 
2730Sstevel@tonic-gate 	case ENOMEM:
2740Sstevel@tonic-gate 		uu_die("Out of memory\n");
2750Sstevel@tonic-gate 		/* NOTREACHED */
2760Sstevel@tonic-gate 
2770Sstevel@tonic-gate 	case EPERM:
2780Sstevel@tonic-gate 		uu_die("Insufficient privilege.\n");
2790Sstevel@tonic-gate 		/* NOTREACHED */
2800Sstevel@tonic-gate 
2810Sstevel@tonic-gate 	case EACCES:
2820Sstevel@tonic-gate 		uu_die("Repository backend access denied.\n");
2830Sstevel@tonic-gate 		/* NOTREACHED */
2840Sstevel@tonic-gate 
2850Sstevel@tonic-gate 	case EROFS:
2860Sstevel@tonic-gate 		log_error(LOG_INFO, "Could not remove unusable contract id %ld "
2870Sstevel@tonic-gate 		    "for %s from repository.\n", *ctidp, inst->ri_i.i_fmri);
2880Sstevel@tonic-gate 		return (0);
2890Sstevel@tonic-gate 
2900Sstevel@tonic-gate 	case EINVAL:
2910Sstevel@tonic-gate 	case EBADF:
2920Sstevel@tonic-gate 	default:
2930Sstevel@tonic-gate 		assert(0);
2940Sstevel@tonic-gate 		abort();
2950Sstevel@tonic-gate 		/* NOTREACHED */
2960Sstevel@tonic-gate 	}
2970Sstevel@tonic-gate }
2980Sstevel@tonic-gate 
2990Sstevel@tonic-gate static int stop_instance(scf_handle_t *, restarter_inst_t *, stop_cause_t);
3000Sstevel@tonic-gate 
3010Sstevel@tonic-gate /*
3020Sstevel@tonic-gate  * int restarter_insert_inst(scf_handle_t *, char *)
3030Sstevel@tonic-gate  *   If the inst is already in the restarter list, return its id.  If the inst
3040Sstevel@tonic-gate  *   is not in the restarter list, initialize a restarter_inst_t, initialize its
3050Sstevel@tonic-gate  *   states, insert it into the list, and return 0.
3060Sstevel@tonic-gate  *
3070Sstevel@tonic-gate  *   Fails with
3080Sstevel@tonic-gate  *     ENOENT - name is not in the repository
3090Sstevel@tonic-gate  */
3100Sstevel@tonic-gate static int
restarter_insert_inst(scf_handle_t * h,const char * name)3110Sstevel@tonic-gate restarter_insert_inst(scf_handle_t *h, const char *name)
3120Sstevel@tonic-gate {
3130Sstevel@tonic-gate 	int id, r;
3140Sstevel@tonic-gate 	restarter_inst_t *inst;
3150Sstevel@tonic-gate 	uu_list_index_t idx;
3160Sstevel@tonic-gate 	scf_service_t *scf_svc;
3170Sstevel@tonic-gate 	scf_instance_t *scf_inst;
318837Srm88369 	scf_snapshot_t *snap = NULL;
3190Sstevel@tonic-gate 	scf_propertygroup_t *pg;
3200Sstevel@tonic-gate 	char *svc_name, *inst_name;
3210Sstevel@tonic-gate 	char logfilebuf[PATH_MAX];
3220Sstevel@tonic-gate 	char *c;
3230Sstevel@tonic-gate 	boolean_t do_commit_states;
3240Sstevel@tonic-gate 	restarter_instance_state_t state, next_state;
3250Sstevel@tonic-gate 	protocol_states_t *ps;
3260Sstevel@tonic-gate 	pid_t start_pid;
327*12967Sgavin.maltby@oracle.com 	restarter_str_t reason = restarter_str_insert_in_graph;
3280Sstevel@tonic-gate 
3290Sstevel@tonic-gate 	MUTEX_LOCK(&instance_list.ril_lock);
3300Sstevel@tonic-gate 
3310Sstevel@tonic-gate 	/*
3320Sstevel@tonic-gate 	 * We don't use inst_lookup_by_name() here because we want the lookup
3330Sstevel@tonic-gate 	 * & insert to be atomic.
3340Sstevel@tonic-gate 	 */
3350Sstevel@tonic-gate 	id = dict_lookup_byname(name);
3360Sstevel@tonic-gate 	if (id != -1) {
3370Sstevel@tonic-gate 		inst = uu_list_find(instance_list.ril_instance_list, &id, NULL,
3380Sstevel@tonic-gate 		    &idx);
3390Sstevel@tonic-gate 		if (inst != NULL) {
3400Sstevel@tonic-gate 			MUTEX_UNLOCK(&instance_list.ril_lock);
3410Sstevel@tonic-gate 			return (0);
3420Sstevel@tonic-gate 		}
3430Sstevel@tonic-gate 	}
3440Sstevel@tonic-gate 
3450Sstevel@tonic-gate 	/* Allocate an instance */
3460Sstevel@tonic-gate 	inst = startd_zalloc(sizeof (restarter_inst_t));
3470Sstevel@tonic-gate 	inst->ri_utmpx_prefix = startd_alloc(max_scf_value_size);
3480Sstevel@tonic-gate 	inst->ri_utmpx_prefix[0] = '\0';
3490Sstevel@tonic-gate 
3500Sstevel@tonic-gate 	inst->ri_i.i_fmri = startd_alloc(strlen(name) + 1);
3510Sstevel@tonic-gate 	(void) strcpy((char *)inst->ri_i.i_fmri, name);
3520Sstevel@tonic-gate 
3530Sstevel@tonic-gate 	inst->ri_queue = startd_list_create(restarter_queue_pool, inst, 0);
3540Sstevel@tonic-gate 
3550Sstevel@tonic-gate 	/*
3560Sstevel@tonic-gate 	 * id shouldn't be -1 since we use the same dictionary as graph.c, but
3570Sstevel@tonic-gate 	 * just in case.
3580Sstevel@tonic-gate 	 */
3590Sstevel@tonic-gate 	inst->ri_id = (id != -1 ? id : dict_insert(name));
3600Sstevel@tonic-gate 
3610Sstevel@tonic-gate 	special_online_hooks_get(name, &inst->ri_pre_online_hook,
3620Sstevel@tonic-gate 	    &inst->ri_post_online_hook, &inst->ri_post_offline_hook);
3630Sstevel@tonic-gate 
3640Sstevel@tonic-gate 	scf_svc = safe_scf_service_create(h);
3650Sstevel@tonic-gate 	scf_inst = safe_scf_instance_create(h);
3660Sstevel@tonic-gate 	pg = safe_scf_pg_create(h);
3670Sstevel@tonic-gate 	svc_name = startd_alloc(max_scf_name_size);
3680Sstevel@tonic-gate 	inst_name = startd_alloc(max_scf_name_size);
3690Sstevel@tonic-gate 
3700Sstevel@tonic-gate rep_retry:
371837Srm88369 	if (snap != NULL)
372837Srm88369 		scf_snapshot_destroy(snap);
373837Srm88369 	if (inst->ri_logstem != NULL)
374837Srm88369 		startd_free(inst->ri_logstem, PATH_MAX);
375837Srm88369 	if (inst->ri_common_name != NULL)
376837Srm88369 		startd_free(inst->ri_common_name, max_scf_value_size);
377837Srm88369 	if (inst->ri_C_common_name != NULL)
378837Srm88369 		startd_free(inst->ri_C_common_name, max_scf_value_size);
379837Srm88369 	snap = NULL;
380837Srm88369 	inst->ri_logstem = NULL;
381837Srm88369 	inst->ri_common_name = NULL;
382837Srm88369 	inst->ri_C_common_name = NULL;
383837Srm88369 
3840Sstevel@tonic-gate 	if (scf_handle_decode_fmri(h, name, NULL, scf_svc, scf_inst, NULL,
3850Sstevel@tonic-gate 	    NULL, SCF_DECODE_FMRI_EXACT) != 0) {
3860Sstevel@tonic-gate 		switch (scf_error()) {
3870Sstevel@tonic-gate 		case SCF_ERROR_CONNECTION_BROKEN:
3880Sstevel@tonic-gate 			libscf_handle_rebind(h);
3890Sstevel@tonic-gate 			goto rep_retry;
3900Sstevel@tonic-gate 
3910Sstevel@tonic-gate 		case SCF_ERROR_NOT_FOUND:
392837Srm88369 			goto deleted;
3930Sstevel@tonic-gate 		}
3940Sstevel@tonic-gate 
3950Sstevel@tonic-gate 		uu_die("Can't decode FMRI %s: %s\n", name,
3960Sstevel@tonic-gate 		    scf_strerror(scf_error()));
3970Sstevel@tonic-gate 	}
3980Sstevel@tonic-gate 
3990Sstevel@tonic-gate 	/*
4000Sstevel@tonic-gate 	 * If there's no running snapshot, then we execute using the editing
4010Sstevel@tonic-gate 	 * snapshot.  Pending snapshots will be taken later.
4020Sstevel@tonic-gate 	 */
4030Sstevel@tonic-gate 	snap = libscf_get_running_snapshot(scf_inst);
4040Sstevel@tonic-gate 
4050Sstevel@tonic-gate 	if ((scf_service_get_name(scf_svc, svc_name, max_scf_name_size) < 0) ||
4060Sstevel@tonic-gate 	    (scf_instance_get_name(scf_inst, inst_name, max_scf_name_size) <
4070Sstevel@tonic-gate 	    0)) {
4080Sstevel@tonic-gate 		switch (scf_error()) {
4090Sstevel@tonic-gate 		case SCF_ERROR_NOT_SET:
4100Sstevel@tonic-gate 			break;
4110Sstevel@tonic-gate 
4120Sstevel@tonic-gate 		case SCF_ERROR_CONNECTION_BROKEN:
4130Sstevel@tonic-gate 			libscf_handle_rebind(h);
4140Sstevel@tonic-gate 			goto rep_retry;
4150Sstevel@tonic-gate 
4160Sstevel@tonic-gate 		default:
4170Sstevel@tonic-gate 			assert(0);
4180Sstevel@tonic-gate 			abort();
4190Sstevel@tonic-gate 		}
4200Sstevel@tonic-gate 
4210Sstevel@tonic-gate 		goto deleted;
4220Sstevel@tonic-gate 	}
4230Sstevel@tonic-gate 
424345Slianep 	(void) snprintf(logfilebuf, PATH_MAX, "%s:%s", svc_name, inst_name);
425345Slianep 	for (c = logfilebuf; *c != '\0'; c++)
426345Slianep 		if (*c == '/')
427345Slianep 			*c = '-';
428345Slianep 
429345Slianep 	inst->ri_logstem = startd_alloc(PATH_MAX);
430345Slianep 	(void) snprintf(inst->ri_logstem, PATH_MAX, "%s%s", logfilebuf,
431345Slianep 	    LOG_SUFFIX);
432345Slianep 
4330Sstevel@tonic-gate 	/*
4340Sstevel@tonic-gate 	 * If the restarter group is missing, use uninit/none.  Otherwise,
4350Sstevel@tonic-gate 	 * we're probably being restarted & don't want to mess up the states
4360Sstevel@tonic-gate 	 * that are there.
4370Sstevel@tonic-gate 	 */
4380Sstevel@tonic-gate 	state = RESTARTER_STATE_UNINIT;
4390Sstevel@tonic-gate 	next_state = RESTARTER_STATE_NONE;
4400Sstevel@tonic-gate 
4410Sstevel@tonic-gate 	r = scf_instance_get_pg(scf_inst, SCF_PG_RESTARTER, pg);
4420Sstevel@tonic-gate 	if (r != 0) {
4430Sstevel@tonic-gate 		switch (scf_error()) {
4440Sstevel@tonic-gate 		case SCF_ERROR_CONNECTION_BROKEN:
4450Sstevel@tonic-gate 			libscf_handle_rebind(h);
4460Sstevel@tonic-gate 			goto rep_retry;
4470Sstevel@tonic-gate 
4480Sstevel@tonic-gate 		case SCF_ERROR_NOT_SET:
4490Sstevel@tonic-gate 			goto deleted;
4500Sstevel@tonic-gate 
4510Sstevel@tonic-gate 		case SCF_ERROR_NOT_FOUND:
4520Sstevel@tonic-gate 			/*
4530Sstevel@tonic-gate 			 * This shouldn't happen since the graph engine should
4540Sstevel@tonic-gate 			 * have initialized the state to uninitialized/none if
4550Sstevel@tonic-gate 			 * there was no restarter pg.  In case somebody
4560Sstevel@tonic-gate 			 * deleted it, though....
4570Sstevel@tonic-gate 			 */
4580Sstevel@tonic-gate 			do_commit_states = B_TRUE;
4590Sstevel@tonic-gate 			break;
4600Sstevel@tonic-gate 
4610Sstevel@tonic-gate 		default:
4620Sstevel@tonic-gate 			assert(0);
4630Sstevel@tonic-gate 			abort();
4640Sstevel@tonic-gate 		}
4650Sstevel@tonic-gate 	} else {
4660Sstevel@tonic-gate 		r = libscf_read_states(pg, &state, &next_state);
4670Sstevel@tonic-gate 		if (r != 0) {
4680Sstevel@tonic-gate 			do_commit_states = B_TRUE;
4690Sstevel@tonic-gate 		} else {
4700Sstevel@tonic-gate 			if (next_state != RESTARTER_STATE_NONE) {
4710Sstevel@tonic-gate 				/*
4720Sstevel@tonic-gate 				 * Force next_state to _NONE since we
4730Sstevel@tonic-gate 				 * don't look for method processes.
4740Sstevel@tonic-gate 				 */
4750Sstevel@tonic-gate 				next_state = RESTARTER_STATE_NONE;
4760Sstevel@tonic-gate 				do_commit_states = B_TRUE;
4770Sstevel@tonic-gate 			} else {
4780Sstevel@tonic-gate 				/*
479*12967Sgavin.maltby@oracle.com 				 * The reason for transition will depend on
480*12967Sgavin.maltby@oracle.com 				 * state.
481*12967Sgavin.maltby@oracle.com 				 */
482*12967Sgavin.maltby@oracle.com 				if (st->st_initial == 0)
483*12967Sgavin.maltby@oracle.com 					reason = restarter_str_startd_restart;
484*12967Sgavin.maltby@oracle.com 				else if (state == RESTARTER_STATE_MAINT)
485*12967Sgavin.maltby@oracle.com 					reason = restarter_str_bad_repo_state;
486*12967Sgavin.maltby@oracle.com 				/*
4870Sstevel@tonic-gate 				 * Inform the restarter of our state without
4880Sstevel@tonic-gate 				 * changing the STIME in the repository.
4890Sstevel@tonic-gate 				 */
4900Sstevel@tonic-gate 				ps = startd_alloc(sizeof (*ps));
4910Sstevel@tonic-gate 				inst->ri_i.i_state = ps->ps_state = state;
4920Sstevel@tonic-gate 				inst->ri_i.i_next_state = ps->ps_state_next =
4930Sstevel@tonic-gate 				    next_state;
494*12967Sgavin.maltby@oracle.com 				ps->ps_reason = reason;
4950Sstevel@tonic-gate 
4960Sstevel@tonic-gate 				graph_protocol_send_event(inst->ri_i.i_fmri,
4970Sstevel@tonic-gate 				    GRAPH_UPDATE_STATE_CHANGE, ps);
4980Sstevel@tonic-gate 
4990Sstevel@tonic-gate 				do_commit_states = B_FALSE;
5000Sstevel@tonic-gate 			}
5010Sstevel@tonic-gate 		}
5020Sstevel@tonic-gate 	}
5030Sstevel@tonic-gate 
5040Sstevel@tonic-gate 	switch (libscf_get_startd_properties(scf_inst, snap, &inst->ri_flags,
5050Sstevel@tonic-gate 	    &inst->ri_utmpx_prefix)) {
5060Sstevel@tonic-gate 	case 0:
5070Sstevel@tonic-gate 		break;
5080Sstevel@tonic-gate 
5090Sstevel@tonic-gate 	case ECONNABORTED:
5100Sstevel@tonic-gate 		libscf_handle_rebind(h);
5110Sstevel@tonic-gate 		goto rep_retry;
5120Sstevel@tonic-gate 
5130Sstevel@tonic-gate 	case ECANCELED:
5140Sstevel@tonic-gate 		goto deleted;
5150Sstevel@tonic-gate 
5160Sstevel@tonic-gate 	case ENOENT:
5170Sstevel@tonic-gate 		/*
5180Sstevel@tonic-gate 		 * This is odd, because the graph engine should have required
5190Sstevel@tonic-gate 		 * the general property group.  So we'll just use default
5200Sstevel@tonic-gate 		 * flags in anticipation of the graph engine sending us
5210Sstevel@tonic-gate 		 * REMOVE_INSTANCE when it finds out that the general property
5220Sstevel@tonic-gate 		 * group has been deleted.
5230Sstevel@tonic-gate 		 */
5240Sstevel@tonic-gate 		inst->ri_flags = RINST_CONTRACT;
5250Sstevel@tonic-gate 		break;
5260Sstevel@tonic-gate 
5270Sstevel@tonic-gate 	default:
5280Sstevel@tonic-gate 		assert(0);
5290Sstevel@tonic-gate 		abort();
5300Sstevel@tonic-gate 	}
5310Sstevel@tonic-gate 
5320Sstevel@tonic-gate 	switch (libscf_get_template_values(scf_inst, snap,
5330Sstevel@tonic-gate 	    &inst->ri_common_name, &inst->ri_C_common_name)) {
5340Sstevel@tonic-gate 	case 0:
5350Sstevel@tonic-gate 		break;
5360Sstevel@tonic-gate 
5370Sstevel@tonic-gate 	case ECONNABORTED:
5380Sstevel@tonic-gate 		libscf_handle_rebind(h);
5390Sstevel@tonic-gate 		goto rep_retry;
5400Sstevel@tonic-gate 
5410Sstevel@tonic-gate 	case ECANCELED:
5420Sstevel@tonic-gate 		goto deleted;
5430Sstevel@tonic-gate 
5440Sstevel@tonic-gate 	case ECHILD:
5450Sstevel@tonic-gate 	case ENOENT:
5460Sstevel@tonic-gate 		break;
5470Sstevel@tonic-gate 
5480Sstevel@tonic-gate 	default:
5490Sstevel@tonic-gate 		assert(0);
5500Sstevel@tonic-gate 		abort();
5510Sstevel@tonic-gate 	}
5520Sstevel@tonic-gate 
5530Sstevel@tonic-gate 	switch (libscf_read_method_ids(h, scf_inst, inst->ri_i.i_fmri,
5540Sstevel@tonic-gate 	    &inst->ri_i.i_primary_ctid, &inst->ri_i.i_transient_ctid,
5550Sstevel@tonic-gate 	    &start_pid)) {
5560Sstevel@tonic-gate 	case 0:
5570Sstevel@tonic-gate 		break;
5580Sstevel@tonic-gate 
5590Sstevel@tonic-gate 	case ECONNABORTED:
5600Sstevel@tonic-gate 		libscf_handle_rebind(h);
5610Sstevel@tonic-gate 		goto rep_retry;
5620Sstevel@tonic-gate 
5630Sstevel@tonic-gate 	case ECANCELED:
5640Sstevel@tonic-gate 		goto deleted;
5650Sstevel@tonic-gate 
5660Sstevel@tonic-gate 	default:
5670Sstevel@tonic-gate 		assert(0);
5680Sstevel@tonic-gate 		abort();
5690Sstevel@tonic-gate 	}
5700Sstevel@tonic-gate 
5710Sstevel@tonic-gate 	if (inst->ri_i.i_primary_ctid >= 1) {
5720Sstevel@tonic-gate 		contract_hash_store(inst->ri_i.i_primary_ctid, inst->ri_id);
5730Sstevel@tonic-gate 
5740Sstevel@tonic-gate 		switch (check_contract(inst, B_TRUE, scf_inst)) {
5750Sstevel@tonic-gate 		case 0:
5760Sstevel@tonic-gate 			break;
5770Sstevel@tonic-gate 
5780Sstevel@tonic-gate 		case ECONNABORTED:
5790Sstevel@tonic-gate 			libscf_handle_rebind(h);
5800Sstevel@tonic-gate 			goto rep_retry;
5810Sstevel@tonic-gate 
5820Sstevel@tonic-gate 		case ECANCELED:
5830Sstevel@tonic-gate 			goto deleted;
5840Sstevel@tonic-gate 
5850Sstevel@tonic-gate 		default:
5860Sstevel@tonic-gate 			assert(0);
5870Sstevel@tonic-gate 			abort();
5880Sstevel@tonic-gate 		}
5890Sstevel@tonic-gate 	}
5900Sstevel@tonic-gate 
5910Sstevel@tonic-gate 	if (inst->ri_i.i_transient_ctid >= 1) {
5920Sstevel@tonic-gate 		switch (check_contract(inst, B_FALSE, scf_inst)) {
5930Sstevel@tonic-gate 		case 0:
5940Sstevel@tonic-gate 			break;
5950Sstevel@tonic-gate 
5960Sstevel@tonic-gate 		case ECONNABORTED:
5970Sstevel@tonic-gate 			libscf_handle_rebind(h);
5980Sstevel@tonic-gate 			goto rep_retry;
5990Sstevel@tonic-gate 
6000Sstevel@tonic-gate 		case ECANCELED:
6010Sstevel@tonic-gate 			goto deleted;
6020Sstevel@tonic-gate 
6030Sstevel@tonic-gate 		default:
6040Sstevel@tonic-gate 			assert(0);
6050Sstevel@tonic-gate 			abort();
6060Sstevel@tonic-gate 		}
6070Sstevel@tonic-gate 	}
6080Sstevel@tonic-gate 
6090Sstevel@tonic-gate 	/* No more failures we live through, so add it to the list. */
6100Sstevel@tonic-gate 	(void) pthread_mutex_init(&inst->ri_lock, &mutex_attrs);
6110Sstevel@tonic-gate 	(void) pthread_mutex_init(&inst->ri_queue_lock, &mutex_attrs);
6120Sstevel@tonic-gate 	MUTEX_LOCK(&inst->ri_lock);
6130Sstevel@tonic-gate 	MUTEX_LOCK(&inst->ri_queue_lock);
6140Sstevel@tonic-gate 
6150Sstevel@tonic-gate 	(void) pthread_cond_init(&inst->ri_method_cv, NULL);
6160Sstevel@tonic-gate 
6170Sstevel@tonic-gate 	uu_list_node_init(inst, &inst->ri_link, restarter_instance_pool);
6180Sstevel@tonic-gate 	uu_list_insert(instance_list.ril_instance_list, inst, idx);
6190Sstevel@tonic-gate 	MUTEX_UNLOCK(&instance_list.ril_lock);
6200Sstevel@tonic-gate 
6210Sstevel@tonic-gate 	if (start_pid != -1 &&
6220Sstevel@tonic-gate 	    (inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT) {
6230Sstevel@tonic-gate 		int ret;
6240Sstevel@tonic-gate 		ret = wait_register(start_pid, inst->ri_i.i_fmri, 0, 1);
6250Sstevel@tonic-gate 		if (ret == -1) {
6260Sstevel@tonic-gate 			/*
6270Sstevel@tonic-gate 			 * Implication:  if we can't reregister the
6280Sstevel@tonic-gate 			 * instance, we will start another one.  Two
6290Sstevel@tonic-gate 			 * instances may or may not result in a resource
6300Sstevel@tonic-gate 			 * conflict.
6310Sstevel@tonic-gate 			 */
6320Sstevel@tonic-gate 			log_error(LOG_WARNING,
6330Sstevel@tonic-gate 			    "%s: couldn't reregister %ld for wait\n",
6340Sstevel@tonic-gate 			    inst->ri_i.i_fmri, start_pid);
6350Sstevel@tonic-gate 		} else if (ret == 1) {
6360Sstevel@tonic-gate 			/*
6370Sstevel@tonic-gate 			 * Leading PID has exited.
6380Sstevel@tonic-gate 			 */
6390Sstevel@tonic-gate 			(void) stop_instance(h, inst, RSTOP_EXIT);
6400Sstevel@tonic-gate 		}
6410Sstevel@tonic-gate 	}
6420Sstevel@tonic-gate 
6430Sstevel@tonic-gate 
6440Sstevel@tonic-gate 	scf_pg_destroy(pg);
6450Sstevel@tonic-gate 
6460Sstevel@tonic-gate 	if (do_commit_states)
6470Sstevel@tonic-gate 		(void) restarter_instance_update_states(h, inst, state,
648*12967Sgavin.maltby@oracle.com 		    next_state, RERR_NONE, reason);
6490Sstevel@tonic-gate 
6500Sstevel@tonic-gate 	log_framework(LOG_DEBUG, "%s is a %s-style service\n", name,
6510Sstevel@tonic-gate 	    service_style(inst->ri_flags));
6520Sstevel@tonic-gate 
6530Sstevel@tonic-gate 	MUTEX_UNLOCK(&inst->ri_queue_lock);
6540Sstevel@tonic-gate 	MUTEX_UNLOCK(&inst->ri_lock);
6550Sstevel@tonic-gate 
6560Sstevel@tonic-gate 	startd_free(svc_name, max_scf_name_size);
6570Sstevel@tonic-gate 	startd_free(inst_name, max_scf_name_size);
6580Sstevel@tonic-gate 	scf_snapshot_destroy(snap);
6590Sstevel@tonic-gate 	scf_instance_destroy(scf_inst);
6600Sstevel@tonic-gate 	scf_service_destroy(scf_svc);
6610Sstevel@tonic-gate 
6620Sstevel@tonic-gate 	log_framework(LOG_DEBUG, "%s: inserted instance into restarter list\n",
6630Sstevel@tonic-gate 	    name);
6640Sstevel@tonic-gate 
6650Sstevel@tonic-gate 	return (0);
666837Srm88369 
667837Srm88369 deleted:
668837Srm88369 	MUTEX_UNLOCK(&instance_list.ril_lock);
669837Srm88369 	startd_free(inst_name, max_scf_name_size);
670837Srm88369 	startd_free(svc_name, max_scf_name_size);
671837Srm88369 	if (snap != NULL)
672837Srm88369 		scf_snapshot_destroy(snap);
673837Srm88369 	scf_pg_destroy(pg);
674837Srm88369 	scf_instance_destroy(scf_inst);
675837Srm88369 	scf_service_destroy(scf_svc);
676837Srm88369 	startd_free((void *)inst->ri_i.i_fmri, strlen(inst->ri_i.i_fmri) + 1);
677837Srm88369 	uu_list_destroy(inst->ri_queue);
678837Srm88369 	if (inst->ri_logstem != NULL)
679837Srm88369 		startd_free(inst->ri_logstem, PATH_MAX);
680837Srm88369 	if (inst->ri_common_name != NULL)
681837Srm88369 		startd_free(inst->ri_common_name, max_scf_value_size);
682837Srm88369 	if (inst->ri_C_common_name != NULL)
683837Srm88369 		startd_free(inst->ri_C_common_name, max_scf_value_size);
684837Srm88369 	startd_free(inst->ri_utmpx_prefix, max_scf_value_size);
685837Srm88369 	startd_free(inst, sizeof (restarter_inst_t));
686837Srm88369 	return (ENOENT);
6870Sstevel@tonic-gate }
6880Sstevel@tonic-gate 
6890Sstevel@tonic-gate static void
restarter_delete_inst(restarter_inst_t * ri)6900Sstevel@tonic-gate restarter_delete_inst(restarter_inst_t *ri)
6910Sstevel@tonic-gate {
6920Sstevel@tonic-gate 	int id;
6930Sstevel@tonic-gate 	restarter_inst_t *rip;
6940Sstevel@tonic-gate 	void *cookie = NULL;
6950Sstevel@tonic-gate 	restarter_instance_qentry_t *e;
6960Sstevel@tonic-gate 
69711466SRoger.Faulkner@Sun.COM 	assert(MUTEX_HELD(&ri->ri_lock));
6980Sstevel@tonic-gate 
6990Sstevel@tonic-gate 	/*
7000Sstevel@tonic-gate 	 * Must drop the instance lock so we can pick up the instance_list
7010Sstevel@tonic-gate 	 * lock & remove the instance.
7020Sstevel@tonic-gate 	 */
7030Sstevel@tonic-gate 	id = ri->ri_id;
7040Sstevel@tonic-gate 	MUTEX_UNLOCK(&ri->ri_lock);
7050Sstevel@tonic-gate 
7060Sstevel@tonic-gate 	MUTEX_LOCK(&instance_list.ril_lock);
7070Sstevel@tonic-gate 
7080Sstevel@tonic-gate 	rip = uu_list_find(instance_list.ril_instance_list, &id, NULL, NULL);
7090Sstevel@tonic-gate 	if (rip == NULL) {
7100Sstevel@tonic-gate 		MUTEX_UNLOCK(&instance_list.ril_lock);
7110Sstevel@tonic-gate 		return;
7120Sstevel@tonic-gate 	}
7130Sstevel@tonic-gate 
7140Sstevel@tonic-gate 	assert(ri == rip);
7150Sstevel@tonic-gate 
7160Sstevel@tonic-gate 	uu_list_remove(instance_list.ril_instance_list, ri);
7170Sstevel@tonic-gate 
7180Sstevel@tonic-gate 	log_framework(LOG_DEBUG, "%s: deleted instance from restarter list\n",
7190Sstevel@tonic-gate 	    ri->ri_i.i_fmri);
7200Sstevel@tonic-gate 
7210Sstevel@tonic-gate 	MUTEX_UNLOCK(&instance_list.ril_lock);
7220Sstevel@tonic-gate 
7230Sstevel@tonic-gate 	/*
7240Sstevel@tonic-gate 	 * We can lock the instance without holding the instance_list lock
7250Sstevel@tonic-gate 	 * since we removed the instance from the list.
7260Sstevel@tonic-gate 	 */
7270Sstevel@tonic-gate 	MUTEX_LOCK(&ri->ri_lock);
7280Sstevel@tonic-gate 	MUTEX_LOCK(&ri->ri_queue_lock);
7290Sstevel@tonic-gate 
7300Sstevel@tonic-gate 	if (ri->ri_i.i_primary_ctid >= 1)
7310Sstevel@tonic-gate 		contract_hash_remove(ri->ri_i.i_primary_ctid);
7320Sstevel@tonic-gate 
7330Sstevel@tonic-gate 	while (ri->ri_method_thread != 0 || ri->ri_method_waiters > 0)
7340Sstevel@tonic-gate 		(void) pthread_cond_wait(&ri->ri_method_cv, &ri->ri_lock);
7350Sstevel@tonic-gate 
7360Sstevel@tonic-gate 	while ((e = uu_list_teardown(ri->ri_queue, &cookie)) != NULL)
7370Sstevel@tonic-gate 		startd_free(e, sizeof (*e));
7380Sstevel@tonic-gate 	uu_list_destroy(ri->ri_queue);
7390Sstevel@tonic-gate 
7400Sstevel@tonic-gate 	startd_free((void *)ri->ri_i.i_fmri, strlen(ri->ri_i.i_fmri) + 1);
741345Slianep 	startd_free(ri->ri_logstem, PATH_MAX);
7421753Srm88369 	if (ri->ri_common_name != NULL)
7431753Srm88369 		startd_free(ri->ri_common_name, max_scf_value_size);
7441753Srm88369 	if (ri->ri_C_common_name != NULL)
7451753Srm88369 		startd_free(ri->ri_C_common_name, max_scf_value_size);
7460Sstevel@tonic-gate 	startd_free(ri->ri_utmpx_prefix, max_scf_value_size);
7470Sstevel@tonic-gate 	(void) pthread_mutex_destroy(&ri->ri_lock);
7480Sstevel@tonic-gate 	(void) pthread_mutex_destroy(&ri->ri_queue_lock);
7490Sstevel@tonic-gate 	startd_free(ri, sizeof (restarter_inst_t));
7500Sstevel@tonic-gate }
7510Sstevel@tonic-gate 
7520Sstevel@tonic-gate /*
7530Sstevel@tonic-gate  * instance_is_wait_style()
7540Sstevel@tonic-gate  *
7550Sstevel@tonic-gate  *   Returns 1 if the given instance is a "wait-style" service instance.
7560Sstevel@tonic-gate  */
7570Sstevel@tonic-gate int
instance_is_wait_style(restarter_inst_t * inst)7580Sstevel@tonic-gate instance_is_wait_style(restarter_inst_t *inst)
7590Sstevel@tonic-gate {
76011466SRoger.Faulkner@Sun.COM 	assert(MUTEX_HELD(&inst->ri_lock));
7610Sstevel@tonic-gate 	return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_WAIT);
7620Sstevel@tonic-gate }
7630Sstevel@tonic-gate 
7640Sstevel@tonic-gate /*
7650Sstevel@tonic-gate  * instance_is_transient_style()
7660Sstevel@tonic-gate  *
7670Sstevel@tonic-gate  *   Returns 1 if the given instance is a transient service instance.
7680Sstevel@tonic-gate  */
7690Sstevel@tonic-gate int
instance_is_transient_style(restarter_inst_t * inst)7700Sstevel@tonic-gate instance_is_transient_style(restarter_inst_t *inst)
7710Sstevel@tonic-gate {
77211466SRoger.Faulkner@Sun.COM 	assert(MUTEX_HELD(&inst->ri_lock));
7730Sstevel@tonic-gate 	return ((inst->ri_flags & RINST_STYLE_MASK) == RINST_TRANSIENT);
7740Sstevel@tonic-gate }
7750Sstevel@tonic-gate 
7760Sstevel@tonic-gate /*
7770Sstevel@tonic-gate  * instance_in_transition()
7780Sstevel@tonic-gate  * Returns 1 if instance is in transition, 0 if not
7790Sstevel@tonic-gate  */
7800Sstevel@tonic-gate int
instance_in_transition(restarter_inst_t * inst)7810Sstevel@tonic-gate instance_in_transition(restarter_inst_t *inst)
7820Sstevel@tonic-gate {
78311466SRoger.Faulkner@Sun.COM 	assert(MUTEX_HELD(&inst->ri_lock));
7840Sstevel@tonic-gate 	if (inst->ri_i.i_next_state == RESTARTER_STATE_NONE)
7850Sstevel@tonic-gate 		return (0);
7860Sstevel@tonic-gate 	return (1);
7870Sstevel@tonic-gate }
7880Sstevel@tonic-gate 
7890Sstevel@tonic-gate /*
7901273Sgm149974  * returns 1 if instance is already started, 0 if not
7911273Sgm149974  */
7921273Sgm149974 static int
instance_started(restarter_inst_t * inst)7931273Sgm149974 instance_started(restarter_inst_t *inst)
7941273Sgm149974 {
7951273Sgm149974 	int ret;
7961273Sgm149974 
79711466SRoger.Faulkner@Sun.COM 	assert(MUTEX_HELD(&inst->ri_lock));
7981273Sgm149974 
7991273Sgm149974 	if (inst->ri_i.i_state == RESTARTER_STATE_ONLINE ||
8001273Sgm149974 	    inst->ri_i.i_state == RESTARTER_STATE_DEGRADED)
8011273Sgm149974 		ret = 1;
8021273Sgm149974 	else
8031273Sgm149974 		ret = 0;
8041273Sgm149974 
8051273Sgm149974 	return (ret);
8061273Sgm149974 }
8071273Sgm149974 
8081273Sgm149974 /*
8090Sstevel@tonic-gate  * Returns
8100Sstevel@tonic-gate  *   0 - success
8110Sstevel@tonic-gate  *   ECONNRESET - success, but h was rebound
8120Sstevel@tonic-gate  */
8130Sstevel@tonic-gate int
restarter_instance_update_states(scf_handle_t * h,restarter_inst_t * ri,restarter_instance_state_t new_state,restarter_instance_state_t new_state_next,restarter_error_t err,restarter_str_t reason)8140Sstevel@tonic-gate restarter_instance_update_states(scf_handle_t *h, restarter_inst_t *ri,
8150Sstevel@tonic-gate     restarter_instance_state_t new_state,
816*12967Sgavin.maltby@oracle.com     restarter_instance_state_t new_state_next, restarter_error_t err,
817*12967Sgavin.maltby@oracle.com     restarter_str_t reason)
8180Sstevel@tonic-gate {
8190Sstevel@tonic-gate 	protocol_states_t *states;
8200Sstevel@tonic-gate 	int e;
8210Sstevel@tonic-gate 	uint_t retry_count = 0, msecs = ALLOC_DELAY;
8220Sstevel@tonic-gate 	boolean_t rebound = B_FALSE;
8231273Sgm149974 	int prev_state_online;
8241273Sgm149974 	int state_online;
8250Sstevel@tonic-gate 
82611466SRoger.Faulkner@Sun.COM 	assert(MUTEX_HELD(&ri->ri_lock));
8270Sstevel@tonic-gate 
8281273Sgm149974 	prev_state_online = instance_started(ri);
8291273Sgm149974 
8300Sstevel@tonic-gate retry:
8310Sstevel@tonic-gate 	e = _restarter_commit_states(h, &ri->ri_i, new_state, new_state_next,
832*12967Sgavin.maltby@oracle.com 	    restarter_get_str_short(reason));
8330Sstevel@tonic-gate 	switch (e) {
8340Sstevel@tonic-gate 	case 0:
8350Sstevel@tonic-gate 		break;
8360Sstevel@tonic-gate 
8370Sstevel@tonic-gate 	case ENOMEM:
8380Sstevel@tonic-gate 		++retry_count;
8390Sstevel@tonic-gate 		if (retry_count < ALLOC_RETRY) {
8400Sstevel@tonic-gate 			(void) poll(NULL, 0, msecs);
8410Sstevel@tonic-gate 			msecs *= ALLOC_DELAY_MULT;
8420Sstevel@tonic-gate 			goto retry;
8430Sstevel@tonic-gate 		}
8440Sstevel@tonic-gate 
8450Sstevel@tonic-gate 		/* Like startd_alloc(). */
8460Sstevel@tonic-gate 		uu_die("Insufficient memory.\n");
8470Sstevel@tonic-gate 		/* NOTREACHED */
8480Sstevel@tonic-gate 
8490Sstevel@tonic-gate 	case ECONNABORTED:
8500Sstevel@tonic-gate 		libscf_handle_rebind(h);
8510Sstevel@tonic-gate 		rebound = B_TRUE;
8520Sstevel@tonic-gate 		goto retry;
8530Sstevel@tonic-gate 
8540Sstevel@tonic-gate 	case EPERM:
8550Sstevel@tonic-gate 	case EACCES:
8560Sstevel@tonic-gate 	case EROFS:
8570Sstevel@tonic-gate 		log_error(LOG_NOTICE, "Could not commit state change for %s "
8580Sstevel@tonic-gate 		    "to repository: %s.\n", ri->ri_i.i_fmri, strerror(e));
8590Sstevel@tonic-gate 		/* FALLTHROUGH */
8600Sstevel@tonic-gate 
8610Sstevel@tonic-gate 	case ENOENT:
8620Sstevel@tonic-gate 		ri->ri_i.i_state = new_state;
8630Sstevel@tonic-gate 		ri->ri_i.i_next_state = new_state_next;
8640Sstevel@tonic-gate 		break;
8650Sstevel@tonic-gate 
8660Sstevel@tonic-gate 	case EINVAL:
8670Sstevel@tonic-gate 	default:
8680Sstevel@tonic-gate 		bad_error("_restarter_commit_states", e);
8690Sstevel@tonic-gate 	}
8700Sstevel@tonic-gate 
8710Sstevel@tonic-gate 	states = startd_alloc(sizeof (protocol_states_t));
8720Sstevel@tonic-gate 	states->ps_state = new_state;
8730Sstevel@tonic-gate 	states->ps_state_next = new_state_next;
8740Sstevel@tonic-gate 	states->ps_err = err;
875*12967Sgavin.maltby@oracle.com 	states->ps_reason = reason;
8760Sstevel@tonic-gate 	graph_protocol_send_event(ri->ri_i.i_fmri, GRAPH_UPDATE_STATE_CHANGE,
8770Sstevel@tonic-gate 	    (void *)states);
8780Sstevel@tonic-gate 
8791273Sgm149974 	state_online = instance_started(ri);
8801273Sgm149974 
8811273Sgm149974 	if (prev_state_online && !state_online)
8821273Sgm149974 		ri->ri_post_offline_hook();
8831273Sgm149974 	else if (!prev_state_online && state_online)
8840Sstevel@tonic-gate 		ri->ri_post_online_hook();
8850Sstevel@tonic-gate 
8860Sstevel@tonic-gate 	return (rebound ? ECONNRESET : 0);
8870Sstevel@tonic-gate }
8880Sstevel@tonic-gate 
8890Sstevel@tonic-gate void
restarter_mark_pending_snapshot(const char * fmri,uint_t flag)8900Sstevel@tonic-gate restarter_mark_pending_snapshot(const char *fmri, uint_t flag)
8910Sstevel@tonic-gate {
8920Sstevel@tonic-gate 	restarter_inst_t *inst;
8930Sstevel@tonic-gate 
8940Sstevel@tonic-gate 	assert(flag == RINST_RETAKE_RUNNING || flag == RINST_RETAKE_START);
8950Sstevel@tonic-gate 
8960Sstevel@tonic-gate 	inst = inst_lookup_by_name(fmri);
8970Sstevel@tonic-gate 	if (inst == NULL)
8980Sstevel@tonic-gate 		return;
8990Sstevel@tonic-gate 
9000Sstevel@tonic-gate 	inst->ri_flags |= flag;
9010Sstevel@tonic-gate 
9020Sstevel@tonic-gate 	MUTEX_UNLOCK(&inst->ri_lock);
9030Sstevel@tonic-gate }
9040Sstevel@tonic-gate 
9050Sstevel@tonic-gate static void
restarter_take_pending_snapshots(scf_handle_t * h)9060Sstevel@tonic-gate restarter_take_pending_snapshots(scf_handle_t *h)
9070Sstevel@tonic-gate {
9080Sstevel@tonic-gate 	restarter_inst_t *inst;
9090Sstevel@tonic-gate 	int r;
9100Sstevel@tonic-gate 
9110Sstevel@tonic-gate 	MUTEX_LOCK(&instance_list.ril_lock);
9120Sstevel@tonic-gate 
9130Sstevel@tonic-gate 	for (inst = uu_list_first(instance_list.ril_instance_list);
9140Sstevel@tonic-gate 	    inst != NULL;
9150Sstevel@tonic-gate 	    inst = uu_list_next(instance_list.ril_instance_list, inst)) {
9160Sstevel@tonic-gate 		const char *fmri;
9170Sstevel@tonic-gate 		scf_instance_t *sinst = NULL;
9180Sstevel@tonic-gate 
9190Sstevel@tonic-gate 		MUTEX_LOCK(&inst->ri_lock);
9200Sstevel@tonic-gate 
9210Sstevel@tonic-gate 		/*
9220Sstevel@tonic-gate 		 * This is where we'd check inst->ri_method_thread and if it
9230Sstevel@tonic-gate 		 * were nonzero we'd wait in anticipation of another thread
9240Sstevel@tonic-gate 		 * executing a method for inst.  Doing so with the instance_list
9250Sstevel@tonic-gate 		 * locked, though, leads to deadlock.  Since taking a snapshot
9260Sstevel@tonic-gate 		 * during that window won't hurt anything, we'll just continue.
9270Sstevel@tonic-gate 		 */
9280Sstevel@tonic-gate 
9290Sstevel@tonic-gate 		fmri = inst->ri_i.i_fmri;
9300Sstevel@tonic-gate 
9310Sstevel@tonic-gate 		if (inst->ri_flags & RINST_RETAKE_RUNNING) {
9320Sstevel@tonic-gate 			scf_snapshot_t *rsnap;
9330Sstevel@tonic-gate 
9340Sstevel@tonic-gate 			(void) libscf_fmri_get_instance(h, fmri, &sinst);
9350Sstevel@tonic-gate 
9360Sstevel@tonic-gate 			rsnap = libscf_get_or_make_running_snapshot(sinst,
9370Sstevel@tonic-gate 			    fmri, B_FALSE);
9380Sstevel@tonic-gate 
9390Sstevel@tonic-gate 			scf_instance_destroy(sinst);
9400Sstevel@tonic-gate 
9410Sstevel@tonic-gate 			if (rsnap != NULL)
9420Sstevel@tonic-gate 				inst->ri_flags &= ~RINST_RETAKE_RUNNING;
9430Sstevel@tonic-gate 
9440Sstevel@tonic-gate 			scf_snapshot_destroy(rsnap);
9450Sstevel@tonic-gate 		}
9460Sstevel@tonic-gate 
9470Sstevel@tonic-gate 		if (inst->ri_flags & RINST_RETAKE_START) {
9480Sstevel@tonic-gate 			switch (r = libscf_snapshots_poststart(h, fmri,
9490Sstevel@tonic-gate 			    B_FALSE)) {
9500Sstevel@tonic-gate 			case 0:
9510Sstevel@tonic-gate 			case ENOENT:
9520Sstevel@tonic-gate 				inst->ri_flags &= ~RINST_RETAKE_START;
9530Sstevel@tonic-gate 				break;
9540Sstevel@tonic-gate 
9550Sstevel@tonic-gate 			case ECONNABORTED:
9560Sstevel@tonic-gate 				break;
9570Sstevel@tonic-gate 
9580Sstevel@tonic-gate 			case EACCES:
9590Sstevel@tonic-gate 			default:
9600Sstevel@tonic-gate 				bad_error("libscf_snapshots_poststart", r);
9610Sstevel@tonic-gate 			}
9620Sstevel@tonic-gate 		}
9630Sstevel@tonic-gate 
9640Sstevel@tonic-gate 		MUTEX_UNLOCK(&inst->ri_lock);
9650Sstevel@tonic-gate 	}
9660Sstevel@tonic-gate 
9670Sstevel@tonic-gate 	MUTEX_UNLOCK(&instance_list.ril_lock);
9680Sstevel@tonic-gate }
9690Sstevel@tonic-gate 
9700Sstevel@tonic-gate /* ARGSUSED */
9710Sstevel@tonic-gate void *
restarter_post_fsminimal_thread(void * unused)9720Sstevel@tonic-gate restarter_post_fsminimal_thread(void *unused)
9730Sstevel@tonic-gate {
9740Sstevel@tonic-gate 	scf_handle_t *h;
9750Sstevel@tonic-gate 	int r;
9760Sstevel@tonic-gate 
9770Sstevel@tonic-gate 	h = libscf_handle_create_bound_loop();
9780Sstevel@tonic-gate 
9790Sstevel@tonic-gate 	for (;;) {
9800Sstevel@tonic-gate 		r = libscf_create_self(h);
9810Sstevel@tonic-gate 		if (r == 0)
9820Sstevel@tonic-gate 			break;
9830Sstevel@tonic-gate 
9840Sstevel@tonic-gate 		assert(r == ECONNABORTED);
9850Sstevel@tonic-gate 		libscf_handle_rebind(h);
9860Sstevel@tonic-gate 	}
9870Sstevel@tonic-gate 
9880Sstevel@tonic-gate 	restarter_take_pending_snapshots(h);
9890Sstevel@tonic-gate 
9900Sstevel@tonic-gate 	(void) scf_handle_unbind(h);
9910Sstevel@tonic-gate 	scf_handle_destroy(h);
9920Sstevel@tonic-gate 
9930Sstevel@tonic-gate 	return (NULL);
9940Sstevel@tonic-gate }
9950Sstevel@tonic-gate 
9960Sstevel@tonic-gate /*
9970Sstevel@tonic-gate  * int stop_instance()
9980Sstevel@tonic-gate  *
9990Sstevel@tonic-gate  *   Stop the instance identified by the instance given as the second argument,
10000Sstevel@tonic-gate  *   for the cause stated.
10010Sstevel@tonic-gate  *
10020Sstevel@tonic-gate  *   Returns
10030Sstevel@tonic-gate  *     0 - success
10040Sstevel@tonic-gate  *     -1 - inst is in transition
10050Sstevel@tonic-gate  */
10060Sstevel@tonic-gate static int
stop_instance(scf_handle_t * local_handle,restarter_inst_t * inst,stop_cause_t cause)10070Sstevel@tonic-gate stop_instance(scf_handle_t *local_handle, restarter_inst_t *inst,
10080Sstevel@tonic-gate     stop_cause_t cause)
10090Sstevel@tonic-gate {
10100Sstevel@tonic-gate 	fork_info_t *info;
10110Sstevel@tonic-gate 	const char *cp;
10120Sstevel@tonic-gate 	int err;
10130Sstevel@tonic-gate 	restarter_error_t re;
1014*12967Sgavin.maltby@oracle.com 	restarter_str_t	reason;
10150Sstevel@tonic-gate 
101611466SRoger.Faulkner@Sun.COM 	assert(MUTEX_HELD(&inst->ri_lock));
10170Sstevel@tonic-gate 	assert(inst->ri_method_thread == 0);
10180Sstevel@tonic-gate 
10190Sstevel@tonic-gate 	switch (cause) {
10200Sstevel@tonic-gate 	case RSTOP_EXIT:
10210Sstevel@tonic-gate 		re = RERR_RESTART;
1022*12967Sgavin.maltby@oracle.com 		reason = restarter_str_ct_ev_exit;
10230Sstevel@tonic-gate 		cp = "all processes in service exited";
10240Sstevel@tonic-gate 		break;
10250Sstevel@tonic-gate 	case RSTOP_CORE:
10260Sstevel@tonic-gate 		re = RERR_FAULT;
1027*12967Sgavin.maltby@oracle.com 		reason = restarter_str_ct_ev_core;
10280Sstevel@tonic-gate 		cp = "process dumped core";
10290Sstevel@tonic-gate 		break;
10300Sstevel@tonic-gate 	case RSTOP_SIGNAL:
10310Sstevel@tonic-gate 		re = RERR_FAULT;
1032*12967Sgavin.maltby@oracle.com 		reason = restarter_str_ct_ev_signal;
10330Sstevel@tonic-gate 		cp = "process received fatal signal from outside the service";
10340Sstevel@tonic-gate 		break;
10350Sstevel@tonic-gate 	case RSTOP_HWERR:
10360Sstevel@tonic-gate 		re = RERR_FAULT;
1037*12967Sgavin.maltby@oracle.com 		reason = restarter_str_ct_ev_hwerr;
10380Sstevel@tonic-gate 		cp = "process killed due to uncorrectable hardware error";
10390Sstevel@tonic-gate 		break;
10400Sstevel@tonic-gate 	case RSTOP_DEPENDENCY:
10410Sstevel@tonic-gate 		re = RERR_RESTART;
1042*12967Sgavin.maltby@oracle.com 		reason = restarter_str_dependency_activity;
10430Sstevel@tonic-gate 		cp = "dependency activity requires stop";
10440Sstevel@tonic-gate 		break;
10450Sstevel@tonic-gate 	case RSTOP_DISABLE:
10460Sstevel@tonic-gate 		re = RERR_RESTART;
1047*12967Sgavin.maltby@oracle.com 		reason = restarter_str_disable_request;
10480Sstevel@tonic-gate 		cp = "service disabled";
10490Sstevel@tonic-gate 		break;
10500Sstevel@tonic-gate 	case RSTOP_RESTART:
10510Sstevel@tonic-gate 		re = RERR_RESTART;
1052*12967Sgavin.maltby@oracle.com 		reason = restarter_str_restart_request;
10530Sstevel@tonic-gate 		cp = "service restarting";
10540Sstevel@tonic-gate 		break;
10550Sstevel@tonic-gate 	default:
10560Sstevel@tonic-gate #ifndef NDEBUG
10570Sstevel@tonic-gate 		(void) fprintf(stderr, "Unknown cause %d at %s:%d.\n",
10580Sstevel@tonic-gate 		    cause, __FILE__, __LINE__);
10590Sstevel@tonic-gate #endif
10600Sstevel@tonic-gate 		abort();
10610Sstevel@tonic-gate 	}
10620Sstevel@tonic-gate 
10630Sstevel@tonic-gate 	/* Services in the disabled and maintenance state are ignored */
10640Sstevel@tonic-gate 	if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
10650Sstevel@tonic-gate 	    inst->ri_i.i_state == RESTARTER_STATE_DISABLED) {
10660Sstevel@tonic-gate 		log_framework(LOG_DEBUG,
10670Sstevel@tonic-gate 		    "%s: stop_instance -> is maint/disabled\n",
10680Sstevel@tonic-gate 		    inst->ri_i.i_fmri);
10690Sstevel@tonic-gate 		return (0);
10700Sstevel@tonic-gate 	}
10710Sstevel@tonic-gate 
10720Sstevel@tonic-gate 	/* Already stopped instances are left alone */
10730Sstevel@tonic-gate 	if (instance_started(inst) == 0) {
10740Sstevel@tonic-gate 		log_framework(LOG_DEBUG, "Restarter: %s is already stopped.\n",
10750Sstevel@tonic-gate 		    inst->ri_i.i_fmri);
10760Sstevel@tonic-gate 		return (0);
10770Sstevel@tonic-gate 	}
10780Sstevel@tonic-gate 
10790Sstevel@tonic-gate 	if (instance_in_transition(inst)) {
10800Sstevel@tonic-gate 		/* requeue event by returning -1 */
10810Sstevel@tonic-gate 		log_framework(LOG_DEBUG,
10820Sstevel@tonic-gate 		    "Restarter: Not stopping %s, in transition.\n",
10830Sstevel@tonic-gate 		    inst->ri_i.i_fmri);
10840Sstevel@tonic-gate 		return (-1);
10850Sstevel@tonic-gate 	}
10860Sstevel@tonic-gate 
10870Sstevel@tonic-gate 	log_instance(inst, B_TRUE, "Stopping because %s.", cp);
10880Sstevel@tonic-gate 
10890Sstevel@tonic-gate 	log_framework(re == RERR_FAULT ? LOG_INFO : LOG_DEBUG,
10900Sstevel@tonic-gate 	    "%s: Instance stopping because %s.\n", inst->ri_i.i_fmri, cp);
10910Sstevel@tonic-gate 
10920Sstevel@tonic-gate 	if (instance_is_wait_style(inst) && cause == RSTOP_EXIT) {
10930Sstevel@tonic-gate 		/*
10940Sstevel@tonic-gate 		 * No need to stop instance, as child has exited; remove
10950Sstevel@tonic-gate 		 * contract and move the instance to the offline state.
10960Sstevel@tonic-gate 		 */
10970Sstevel@tonic-gate 		switch (err = restarter_instance_update_states(local_handle,
10980Sstevel@tonic-gate 		    inst, inst->ri_i.i_state, RESTARTER_STATE_OFFLINE, re,
1099*12967Sgavin.maltby@oracle.com 		    reason)) {
11000Sstevel@tonic-gate 		case 0:
11010Sstevel@tonic-gate 		case ECONNRESET:
11020Sstevel@tonic-gate 			break;
11030Sstevel@tonic-gate 
11040Sstevel@tonic-gate 		default:
11050Sstevel@tonic-gate 			bad_error("restarter_instance_update_states", err);
11060Sstevel@tonic-gate 		}
11070Sstevel@tonic-gate 
11080Sstevel@tonic-gate 		(void) update_fault_count(inst, FAULT_COUNT_RESET);
110911623SSean.Wilcox@Sun.COM 		reset_start_times(inst);
11100Sstevel@tonic-gate 
11110Sstevel@tonic-gate 		if (inst->ri_i.i_primary_ctid != 0) {
11120Sstevel@tonic-gate 			inst->ri_m_inst =
11130Sstevel@tonic-gate 			    safe_scf_instance_create(local_handle);
11140Sstevel@tonic-gate 			inst->ri_mi_deleted = B_FALSE;
11150Sstevel@tonic-gate 
11160Sstevel@tonic-gate 			libscf_reget_instance(inst);
11170Sstevel@tonic-gate 			method_remove_contract(inst, B_TRUE, B_TRUE);
11180Sstevel@tonic-gate 
11190Sstevel@tonic-gate 			scf_instance_destroy(inst->ri_m_inst);
11200Sstevel@tonic-gate 			inst->ri_m_inst = NULL;
11210Sstevel@tonic-gate 		}
11220Sstevel@tonic-gate 
11230Sstevel@tonic-gate 		switch (err = restarter_instance_update_states(local_handle,
11240Sstevel@tonic-gate 		    inst, inst->ri_i.i_next_state, RESTARTER_STATE_NONE, re,
1125*12967Sgavin.maltby@oracle.com 		    reason)) {
11260Sstevel@tonic-gate 		case 0:
11270Sstevel@tonic-gate 		case ECONNRESET:
11280Sstevel@tonic-gate 			break;
11290Sstevel@tonic-gate 
11300Sstevel@tonic-gate 		default:
11310Sstevel@tonic-gate 			bad_error("restarter_instance_update_states", err);
11320Sstevel@tonic-gate 		}
11330Sstevel@tonic-gate 
11340Sstevel@tonic-gate 		return (0);
11357219Srm88369 	} else if (instance_is_wait_style(inst) && re == RERR_RESTART) {
11367219Srm88369 		/*
11377219Srm88369 		 * Stopping a wait service through means other than the pid
11387219Srm88369 		 * exiting should keep wait_thread() from restarting the
11397219Srm88369 		 * service, by removing it from the wait list.
11407219Srm88369 		 * We cannot remove it right now otherwise the process will
11417219Srm88369 		 * end up <defunct> so mark it to be ignored.
11427219Srm88369 		 */
11437219Srm88369 		wait_ignore_by_fmri(inst->ri_i.i_fmri);
11440Sstevel@tonic-gate 	}
11450Sstevel@tonic-gate 
11460Sstevel@tonic-gate 	switch (err = restarter_instance_update_states(local_handle, inst,
11470Sstevel@tonic-gate 	    inst->ri_i.i_state, inst->ri_i.i_enabled ? RESTARTER_STATE_OFFLINE :
1148*12967Sgavin.maltby@oracle.com 	    RESTARTER_STATE_DISABLED, RERR_NONE, reason)) {
11490Sstevel@tonic-gate 	case 0:
11500Sstevel@tonic-gate 	case ECONNRESET:
11510Sstevel@tonic-gate 		break;
11520Sstevel@tonic-gate 
11530Sstevel@tonic-gate 	default:
11540Sstevel@tonic-gate 		bad_error("restarter_instance_update_states", err);
11550Sstevel@tonic-gate 	}
11560Sstevel@tonic-gate 
11570Sstevel@tonic-gate 	info = startd_zalloc(sizeof (fork_info_t));
11580Sstevel@tonic-gate 
11590Sstevel@tonic-gate 	info->sf_id = inst->ri_id;
11600Sstevel@tonic-gate 	info->sf_method_type = METHOD_STOP;
11610Sstevel@tonic-gate 	info->sf_event_type = re;
1162*12967Sgavin.maltby@oracle.com 	info->sf_reason = reason;
11630Sstevel@tonic-gate 	inst->ri_method_thread = startd_thread_create(method_thread, info);
11640Sstevel@tonic-gate 
11650Sstevel@tonic-gate 	return (0);
11660Sstevel@tonic-gate }
11670Sstevel@tonic-gate 
11680Sstevel@tonic-gate /*
11690Sstevel@tonic-gate  * Returns
11700Sstevel@tonic-gate  *   ENOENT - fmri is not in instance_list
11710Sstevel@tonic-gate  *   0 - success
11720Sstevel@tonic-gate  *   ECONNRESET - success, though handle was rebound
11730Sstevel@tonic-gate  *   -1 - instance is in transition
11740Sstevel@tonic-gate  */
11750Sstevel@tonic-gate int
stop_instance_fmri(scf_handle_t * h,const char * fmri,uint_t flags)11760Sstevel@tonic-gate stop_instance_fmri(scf_handle_t *h, const char *fmri, uint_t flags)
11770Sstevel@tonic-gate {
11780Sstevel@tonic-gate 	restarter_inst_t *rip;
11790Sstevel@tonic-gate 	int r;
11800Sstevel@tonic-gate 
11810Sstevel@tonic-gate 	rip = inst_lookup_by_name(fmri);
11820Sstevel@tonic-gate 	if (rip == NULL)
11830Sstevel@tonic-gate 		return (ENOENT);
11840Sstevel@tonic-gate 
11850Sstevel@tonic-gate 	r = stop_instance(h, rip, flags);
11860Sstevel@tonic-gate 
11870Sstevel@tonic-gate 	MUTEX_UNLOCK(&rip->ri_lock);
11880Sstevel@tonic-gate 
11890Sstevel@tonic-gate 	return (r);
11900Sstevel@tonic-gate }
11910Sstevel@tonic-gate 
11920Sstevel@tonic-gate static void
unmaintain_instance(scf_handle_t * h,restarter_inst_t * rip,unmaint_cause_t cause)11930Sstevel@tonic-gate unmaintain_instance(scf_handle_t *h, restarter_inst_t *rip,
11940Sstevel@tonic-gate     unmaint_cause_t cause)
11950Sstevel@tonic-gate {
11960Sstevel@tonic-gate 	ctid_t ctid;
11970Sstevel@tonic-gate 	scf_instance_t *inst;
11980Sstevel@tonic-gate 	int r;
11990Sstevel@tonic-gate 	uint_t tries = 0, msecs = ALLOC_DELAY;
12000Sstevel@tonic-gate 	const char *cp;
1201*12967Sgavin.maltby@oracle.com 	restarter_str_t	reason;
12020Sstevel@tonic-gate 
120311466SRoger.Faulkner@Sun.COM 	assert(MUTEX_HELD(&rip->ri_lock));
12040Sstevel@tonic-gate 
12050Sstevel@tonic-gate 	if (rip->ri_i.i_state != RESTARTER_STATE_MAINT) {
12060Sstevel@tonic-gate 		log_error(LOG_DEBUG, "Restarter: "
12070Sstevel@tonic-gate 		    "Ignoring maintenance off command because %s is not in the "
12080Sstevel@tonic-gate 		    "maintenance state.\n", rip->ri_i.i_fmri);
12090Sstevel@tonic-gate 		return;
12100Sstevel@tonic-gate 	}
12110Sstevel@tonic-gate 
12120Sstevel@tonic-gate 	switch (cause) {
12130Sstevel@tonic-gate 	case RUNMAINT_CLEAR:
12140Sstevel@tonic-gate 		cp = "clear requested";
1215*12967Sgavin.maltby@oracle.com 		reason = restarter_str_clear_request;
12160Sstevel@tonic-gate 		break;
12170Sstevel@tonic-gate 	case RUNMAINT_DISABLE:
12180Sstevel@tonic-gate 		cp = "disable requested";
1219*12967Sgavin.maltby@oracle.com 		reason = restarter_str_disable_request;
12200Sstevel@tonic-gate 		break;
12210Sstevel@tonic-gate 	default:
12220Sstevel@tonic-gate #ifndef NDEBUG
12230Sstevel@tonic-gate 		(void) fprintf(stderr, "Uncaught case for %d at %s:%d.\n",
12240Sstevel@tonic-gate 		    cause, __FILE__, __LINE__);
12250Sstevel@tonic-gate #endif
12260Sstevel@tonic-gate 		abort();
12270Sstevel@tonic-gate 	}
12280Sstevel@tonic-gate 
12290Sstevel@tonic-gate 	log_instance(rip, B_TRUE, "Leaving maintenance because %s.",
12300Sstevel@tonic-gate 	    cp);
12310Sstevel@tonic-gate 	log_framework(LOG_DEBUG, "%s: Instance leaving maintenance because "
12320Sstevel@tonic-gate 	    "%s.\n", rip->ri_i.i_fmri, cp);
12330Sstevel@tonic-gate 
12340Sstevel@tonic-gate 	(void) restarter_instance_update_states(h, rip, RESTARTER_STATE_UNINIT,
1235*12967Sgavin.maltby@oracle.com 	    RESTARTER_STATE_NONE, RERR_RESTART, reason);
12360Sstevel@tonic-gate 
12370Sstevel@tonic-gate 	/*
12380Sstevel@tonic-gate 	 * If we did ADMIN_MAINT_ON_IMMEDIATE, then there might still be
12390Sstevel@tonic-gate 	 * a primary contract.
12400Sstevel@tonic-gate 	 */
12410Sstevel@tonic-gate 	if (rip->ri_i.i_primary_ctid == 0)
12420Sstevel@tonic-gate 		return;
12430Sstevel@tonic-gate 
12440Sstevel@tonic-gate 	ctid = rip->ri_i.i_primary_ctid;
12450Sstevel@tonic-gate 	contract_abandon(ctid);
12460Sstevel@tonic-gate 	rip->ri_i.i_primary_ctid = 0;
12470Sstevel@tonic-gate 
12480Sstevel@tonic-gate rep_retry:
12490Sstevel@tonic-gate 	switch (r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst)) {
12500Sstevel@tonic-gate 	case 0:
12510Sstevel@tonic-gate 		break;
12520Sstevel@tonic-gate 
12530Sstevel@tonic-gate 	case ECONNABORTED:
12540Sstevel@tonic-gate 		libscf_handle_rebind(h);
12550Sstevel@tonic-gate 		goto rep_retry;
12560Sstevel@tonic-gate 
12570Sstevel@tonic-gate 	case ENOENT:
12580Sstevel@tonic-gate 		/* Must have been deleted. */
12590Sstevel@tonic-gate 		return;
12600Sstevel@tonic-gate 
12610Sstevel@tonic-gate 	case EINVAL:
12620Sstevel@tonic-gate 	case ENOTSUP:
12630Sstevel@tonic-gate 	default:
12640Sstevel@tonic-gate 		bad_error("libscf_handle_rebind", r);
12650Sstevel@tonic-gate 	}
12660Sstevel@tonic-gate 
12670Sstevel@tonic-gate again:
12680Sstevel@tonic-gate 	r = restarter_remove_contract(inst, ctid, RESTARTER_CONTRACT_PRIMARY);
12690Sstevel@tonic-gate 	switch (r) {
12700Sstevel@tonic-gate 	case 0:
12710Sstevel@tonic-gate 		break;
12720Sstevel@tonic-gate 
12730Sstevel@tonic-gate 	case ENOMEM:
12740Sstevel@tonic-gate 		++tries;
12750Sstevel@tonic-gate 		if (tries < ALLOC_RETRY) {
12760Sstevel@tonic-gate 			(void) poll(NULL, 0, msecs);
12770Sstevel@tonic-gate 			msecs *= ALLOC_DELAY_MULT;
12780Sstevel@tonic-gate 			goto again;
12790Sstevel@tonic-gate 		}
12800Sstevel@tonic-gate 
12810Sstevel@tonic-gate 		uu_die("Insufficient memory.\n");
12820Sstevel@tonic-gate 		/* NOTREACHED */
12830Sstevel@tonic-gate 
12840Sstevel@tonic-gate 	case ECONNABORTED:
12850Sstevel@tonic-gate 		scf_instance_destroy(inst);
12860Sstevel@tonic-gate 		libscf_handle_rebind(h);
12870Sstevel@tonic-gate 		goto rep_retry;
12880Sstevel@tonic-gate 
12890Sstevel@tonic-gate 	case ECANCELED:
12900Sstevel@tonic-gate 		break;
12910Sstevel@tonic-gate 
12920Sstevel@tonic-gate 	case EPERM:
12930Sstevel@tonic-gate 	case EACCES:
12940Sstevel@tonic-gate 	case EROFS:
12950Sstevel@tonic-gate 		log_error(LOG_INFO,
12960Sstevel@tonic-gate 		    "Could not remove contract id %lu for %s (%s).\n", ctid,
12970Sstevel@tonic-gate 		    rip->ri_i.i_fmri, strerror(r));
12980Sstevel@tonic-gate 		break;
12990Sstevel@tonic-gate 
13000Sstevel@tonic-gate 	case EINVAL:
13010Sstevel@tonic-gate 	case EBADF:
13020Sstevel@tonic-gate 	default:
13030Sstevel@tonic-gate 		bad_error("restarter_remove_contract", r);
13040Sstevel@tonic-gate 	}
13050Sstevel@tonic-gate 
13060Sstevel@tonic-gate 	scf_instance_destroy(inst);
13070Sstevel@tonic-gate }
13080Sstevel@tonic-gate 
13090Sstevel@tonic-gate /*
13100Sstevel@tonic-gate  * enable_inst()
13110Sstevel@tonic-gate  *   Set inst->ri_i.i_enabled.  Expects 'e' to be _ENABLE, _DISABLE, or
13120Sstevel@tonic-gate  *   _ADMIN_DISABLE.  If the event is _ENABLE and inst is uninitialized or
13130Sstevel@tonic-gate  *   disabled, move it to offline.  If the event is _DISABLE or
13140Sstevel@tonic-gate  *   _ADMIN_DISABLE, make sure inst will move to disabled.
13150Sstevel@tonic-gate  *
13160Sstevel@tonic-gate  *   Returns
13170Sstevel@tonic-gate  *     0 - success
13180Sstevel@tonic-gate  *     ECONNRESET - h was rebound
13190Sstevel@tonic-gate  */
13200Sstevel@tonic-gate static int
enable_inst(scf_handle_t * h,restarter_inst_t * inst,restarter_instance_qentry_t * riq)1321*12967Sgavin.maltby@oracle.com enable_inst(scf_handle_t *h, restarter_inst_t *inst,
1322*12967Sgavin.maltby@oracle.com     restarter_instance_qentry_t *riq)
13230Sstevel@tonic-gate {
13240Sstevel@tonic-gate 	restarter_instance_state_t state;
1325*12967Sgavin.maltby@oracle.com 	restarter_event_type_t e = riq->riq_type;
1326*12967Sgavin.maltby@oracle.com 	restarter_str_t reason = restarter_str_per_configuration;
13270Sstevel@tonic-gate 	int r;
13280Sstevel@tonic-gate 
132911466SRoger.Faulkner@Sun.COM 	assert(MUTEX_HELD(&inst->ri_lock));
13300Sstevel@tonic-gate 	assert(e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE ||
13310Sstevel@tonic-gate 	    e == RESTARTER_EVENT_TYPE_DISABLE ||
13320Sstevel@tonic-gate 	    e == RESTARTER_EVENT_TYPE_ENABLE);
13330Sstevel@tonic-gate 	assert(instance_in_transition(inst) == 0);
13340Sstevel@tonic-gate 
13350Sstevel@tonic-gate 	state = inst->ri_i.i_state;
13360Sstevel@tonic-gate 
13370Sstevel@tonic-gate 	if (e == RESTARTER_EVENT_TYPE_ENABLE) {
13380Sstevel@tonic-gate 		inst->ri_i.i_enabled = 1;
13390Sstevel@tonic-gate 
13400Sstevel@tonic-gate 		if (state == RESTARTER_STATE_UNINIT ||
13410Sstevel@tonic-gate 		    state == RESTARTER_STATE_DISABLED) {
13420Sstevel@tonic-gate 			/*
13430Sstevel@tonic-gate 			 * B_FALSE: Don't log an error if the log_instance()
13440Sstevel@tonic-gate 			 * fails because it will fail on the miniroot before
13450Sstevel@tonic-gate 			 * install-discovery runs.
13460Sstevel@tonic-gate 			 */
13470Sstevel@tonic-gate 			log_instance(inst, B_FALSE, "Enabled.");
13480Sstevel@tonic-gate 			log_framework(LOG_DEBUG, "%s: Instance enabled.\n",
13490Sstevel@tonic-gate 			    inst->ri_i.i_fmri);
1350*12967Sgavin.maltby@oracle.com 
1351*12967Sgavin.maltby@oracle.com 			/*
1352*12967Sgavin.maltby@oracle.com 			 * If we are coming from DISABLED, it was obviously an
1353*12967Sgavin.maltby@oracle.com 			 * enable request. If we are coming from UNINIT, it may
1354*12967Sgavin.maltby@oracle.com 			 * have been a sevice in MAINT that was cleared.
1355*12967Sgavin.maltby@oracle.com 			 */
1356*12967Sgavin.maltby@oracle.com 			if (riq->riq_reason == restarter_str_clear_request)
1357*12967Sgavin.maltby@oracle.com 				reason = restarter_str_clear_request;
1358*12967Sgavin.maltby@oracle.com 			else if (state == RESTARTER_STATE_DISABLED)
1359*12967Sgavin.maltby@oracle.com 				reason = restarter_str_enable_request;
13600Sstevel@tonic-gate 			(void) restarter_instance_update_states(h, inst,
13610Sstevel@tonic-gate 			    RESTARTER_STATE_OFFLINE, RESTARTER_STATE_NONE,
1362*12967Sgavin.maltby@oracle.com 			    RERR_NONE, reason);
13630Sstevel@tonic-gate 		} else {
13640Sstevel@tonic-gate 			log_framework(LOG_DEBUG, "Restarter: "
13650Sstevel@tonic-gate 			    "Not changing state of %s for enable command.\n",
13660Sstevel@tonic-gate 			    inst->ri_i.i_fmri);
13670Sstevel@tonic-gate 		}
13680Sstevel@tonic-gate 	} else {
13690Sstevel@tonic-gate 		inst->ri_i.i_enabled = 0;
13700Sstevel@tonic-gate 
13710Sstevel@tonic-gate 		switch (state) {
13720Sstevel@tonic-gate 		case RESTARTER_STATE_ONLINE:
13730Sstevel@tonic-gate 		case RESTARTER_STATE_DEGRADED:
13740Sstevel@tonic-gate 			r = stop_instance(h, inst, RSTOP_DISABLE);
13750Sstevel@tonic-gate 			return (r == ECONNRESET ? 0 : r);
13760Sstevel@tonic-gate 
13770Sstevel@tonic-gate 		case RESTARTER_STATE_OFFLINE:
13780Sstevel@tonic-gate 		case RESTARTER_STATE_UNINIT:
13790Sstevel@tonic-gate 			if (inst->ri_i.i_primary_ctid != 0) {
13800Sstevel@tonic-gate 				inst->ri_m_inst = safe_scf_instance_create(h);
13810Sstevel@tonic-gate 				inst->ri_mi_deleted = B_FALSE;
13820Sstevel@tonic-gate 
13830Sstevel@tonic-gate 				libscf_reget_instance(inst);
13840Sstevel@tonic-gate 				method_remove_contract(inst, B_TRUE, B_TRUE);
13850Sstevel@tonic-gate 
13860Sstevel@tonic-gate 				scf_instance_destroy(inst->ri_m_inst);
13870Sstevel@tonic-gate 			}
13880Sstevel@tonic-gate 			/* B_FALSE: See log_instance(..., "Enabled."); above */
13890Sstevel@tonic-gate 			log_instance(inst, B_FALSE, "Disabled.");
13900Sstevel@tonic-gate 			log_framework(LOG_DEBUG, "%s: Instance disabled.\n",
13910Sstevel@tonic-gate 			    inst->ri_i.i_fmri);
1392*12967Sgavin.maltby@oracle.com 
1393*12967Sgavin.maltby@oracle.com 			/*
1394*12967Sgavin.maltby@oracle.com 			 * If we are coming from OFFLINE, it was obviously a
1395*12967Sgavin.maltby@oracle.com 			 * disable request. But if we are coming from
1396*12967Sgavin.maltby@oracle.com 			 * UNINIT, it may have been a disable request for a
1397*12967Sgavin.maltby@oracle.com 			 * service in MAINT.
1398*12967Sgavin.maltby@oracle.com 			 */
1399*12967Sgavin.maltby@oracle.com 			if (riq->riq_reason == restarter_str_disable_request ||
1400*12967Sgavin.maltby@oracle.com 			    state == RESTARTER_STATE_OFFLINE)
1401*12967Sgavin.maltby@oracle.com 				reason = restarter_str_disable_request;
14020Sstevel@tonic-gate 			(void) restarter_instance_update_states(h, inst,
14030Sstevel@tonic-gate 			    RESTARTER_STATE_DISABLED, RESTARTER_STATE_NONE,
1404*12967Sgavin.maltby@oracle.com 			    RERR_RESTART, reason);
14050Sstevel@tonic-gate 			return (0);
14060Sstevel@tonic-gate 
14070Sstevel@tonic-gate 		case RESTARTER_STATE_DISABLED:
14080Sstevel@tonic-gate 			break;
14090Sstevel@tonic-gate 
14100Sstevel@tonic-gate 		case RESTARTER_STATE_MAINT:
14110Sstevel@tonic-gate 			/*
14120Sstevel@tonic-gate 			 * We only want to pull the instance out of maintenance
14130Sstevel@tonic-gate 			 * if the disable is on adminstrative request.  The
14140Sstevel@tonic-gate 			 * graph engine sends _DISABLE events whenever a
14150Sstevel@tonic-gate 			 * service isn't in the disabled state, and we don't
14160Sstevel@tonic-gate 			 * want to pull the service out of maintenance if,
14170Sstevel@tonic-gate 			 * for example, it is there due to a dependency cycle.
14180Sstevel@tonic-gate 			 */
14190Sstevel@tonic-gate 			if (e == RESTARTER_EVENT_TYPE_ADMIN_DISABLE)
14200Sstevel@tonic-gate 				unmaintain_instance(h, inst, RUNMAINT_DISABLE);
14210Sstevel@tonic-gate 			break;
14220Sstevel@tonic-gate 
14230Sstevel@tonic-gate 		default:
14240Sstevel@tonic-gate #ifndef NDEBUG
14250Sstevel@tonic-gate 			(void) fprintf(stderr, "Restarter instance %s has "
14260Sstevel@tonic-gate 			    "unknown state %d.\n", inst->ri_i.i_fmri, state);
14270Sstevel@tonic-gate #endif
14280Sstevel@tonic-gate 			abort();
14290Sstevel@tonic-gate 		}
14300Sstevel@tonic-gate 	}
14310Sstevel@tonic-gate 
14320Sstevel@tonic-gate 	return (0);
14330Sstevel@tonic-gate }
14340Sstevel@tonic-gate 
14350Sstevel@tonic-gate static void
start_instance(scf_handle_t * local_handle,restarter_inst_t * inst,int32_t reason)1436*12967Sgavin.maltby@oracle.com start_instance(scf_handle_t *local_handle, restarter_inst_t *inst,
1437*12967Sgavin.maltby@oracle.com     int32_t reason)
14380Sstevel@tonic-gate {
14390Sstevel@tonic-gate 	fork_info_t *info;
1440*12967Sgavin.maltby@oracle.com 	restarter_str_t	new_reason;
14410Sstevel@tonic-gate 
144211466SRoger.Faulkner@Sun.COM 	assert(MUTEX_HELD(&inst->ri_lock));
14430Sstevel@tonic-gate 	assert(instance_in_transition(inst) == 0);
14440Sstevel@tonic-gate 	assert(inst->ri_method_thread == 0);
14450Sstevel@tonic-gate 
14460Sstevel@tonic-gate 	log_framework(LOG_DEBUG, "%s: trying to start instance\n",
14470Sstevel@tonic-gate 	    inst->ri_i.i_fmri);
14480Sstevel@tonic-gate 
1449*12967Sgavin.maltby@oracle.com 	/*
1450*12967Sgavin.maltby@oracle.com 	 * We want to keep the original reason for restarts and clear actions
1451*12967Sgavin.maltby@oracle.com 	 */
1452*12967Sgavin.maltby@oracle.com 	switch (reason) {
1453*12967Sgavin.maltby@oracle.com 	case restarter_str_restart_request:
1454*12967Sgavin.maltby@oracle.com 	case restarter_str_clear_request:
1455*12967Sgavin.maltby@oracle.com 		new_reason = reason;
1456*12967Sgavin.maltby@oracle.com 		break;
1457*12967Sgavin.maltby@oracle.com 	default:
1458*12967Sgavin.maltby@oracle.com 		new_reason = restarter_str_dependencies_satisfied;
1459*12967Sgavin.maltby@oracle.com 	}
1460*12967Sgavin.maltby@oracle.com 
14610Sstevel@tonic-gate 	/* Services in the disabled and maintenance state are ignored */
14620Sstevel@tonic-gate 	if (inst->ri_i.i_state == RESTARTER_STATE_MAINT ||
14630Sstevel@tonic-gate 	    inst->ri_i.i_state == RESTARTER_STATE_DISABLED ||
14640Sstevel@tonic-gate 	    inst->ri_i.i_enabled == 0) {
14650Sstevel@tonic-gate 		log_framework(LOG_DEBUG,
14660Sstevel@tonic-gate 		    "%s: start_instance -> is maint/disabled\n",
14670Sstevel@tonic-gate 		    inst->ri_i.i_fmri);
14680Sstevel@tonic-gate 		return;
14690Sstevel@tonic-gate 	}
14700Sstevel@tonic-gate 
14710Sstevel@tonic-gate 	/* Already started instances are left alone */
14720Sstevel@tonic-gate 	if (instance_started(inst) == 1) {
14730Sstevel@tonic-gate 		log_framework(LOG_DEBUG,
14740Sstevel@tonic-gate 		    "%s: start_instance -> is already started\n",
14750Sstevel@tonic-gate 		    inst->ri_i.i_fmri);
14760Sstevel@tonic-gate 		return;
14770Sstevel@tonic-gate 	}
14780Sstevel@tonic-gate 
14790Sstevel@tonic-gate 	log_framework(LOG_DEBUG, "%s: starting instance.\n", inst->ri_i.i_fmri);
14800Sstevel@tonic-gate 
14810Sstevel@tonic-gate 	(void) restarter_instance_update_states(local_handle, inst,
1482*12967Sgavin.maltby@oracle.com 	    inst->ri_i.i_state, RESTARTER_STATE_ONLINE, RERR_NONE, new_reason);
14830Sstevel@tonic-gate 
14840Sstevel@tonic-gate 	info = startd_zalloc(sizeof (fork_info_t));
14850Sstevel@tonic-gate 
14860Sstevel@tonic-gate 	info->sf_id = inst->ri_id;
14870Sstevel@tonic-gate 	info->sf_method_type = METHOD_START;
14880Sstevel@tonic-gate 	info->sf_event_type = RERR_NONE;
1489*12967Sgavin.maltby@oracle.com 	info->sf_reason = new_reason;
14900Sstevel@tonic-gate 	inst->ri_method_thread = startd_thread_create(method_thread, info);
14910Sstevel@tonic-gate }
14920Sstevel@tonic-gate 
14938823STruong.Q.Nguyen@Sun.COM static int
event_from_tty(scf_handle_t * h,restarter_inst_t * rip)14948823STruong.Q.Nguyen@Sun.COM event_from_tty(scf_handle_t *h, restarter_inst_t *rip)
14958823STruong.Q.Nguyen@Sun.COM {
14968823STruong.Q.Nguyen@Sun.COM 	scf_instance_t *inst;
14978823STruong.Q.Nguyen@Sun.COM 	int ret = 0;
14988823STruong.Q.Nguyen@Sun.COM 
14998823STruong.Q.Nguyen@Sun.COM 	if (libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst))
15008823STruong.Q.Nguyen@Sun.COM 		return (-1);
15018823STruong.Q.Nguyen@Sun.COM 
15028823STruong.Q.Nguyen@Sun.COM 	ret = restarter_inst_ractions_from_tty(inst);
15038823STruong.Q.Nguyen@Sun.COM 
15048823STruong.Q.Nguyen@Sun.COM 	scf_instance_destroy(inst);
15058823STruong.Q.Nguyen@Sun.COM 	return (ret);
15068823STruong.Q.Nguyen@Sun.COM }
15078823STruong.Q.Nguyen@Sun.COM 
15080Sstevel@tonic-gate static void
maintain_instance(scf_handle_t * h,restarter_inst_t * rip,int immediate,restarter_str_t reason)15090Sstevel@tonic-gate maintain_instance(scf_handle_t *h, restarter_inst_t *rip, int immediate,
1510*12967Sgavin.maltby@oracle.com     restarter_str_t reason)
15110Sstevel@tonic-gate {
15120Sstevel@tonic-gate 	fork_info_t *info;
15138823STruong.Q.Nguyen@Sun.COM 	scf_instance_t *scf_inst = NULL;
15140Sstevel@tonic-gate 
151511466SRoger.Faulkner@Sun.COM 	assert(MUTEX_HELD(&rip->ri_lock));
1516*12967Sgavin.maltby@oracle.com 	assert(reason != restarter_str_none);
15170Sstevel@tonic-gate 	assert(rip->ri_method_thread == 0);
15180Sstevel@tonic-gate 
1519*12967Sgavin.maltby@oracle.com 	log_instance(rip, B_TRUE, "Stopping for maintenance due to %s.",
1520*12967Sgavin.maltby@oracle.com 	    restarter_get_str_short(reason));
15210Sstevel@tonic-gate 	log_framework(LOG_DEBUG, "%s: stopping for maintenance due to %s.\n",
1522*12967Sgavin.maltby@oracle.com 	    rip->ri_i.i_fmri, restarter_get_str_short(reason));
15230Sstevel@tonic-gate 
15240Sstevel@tonic-gate 	/* Services in the maintenance state are ignored */
15250Sstevel@tonic-gate 	if (rip->ri_i.i_state == RESTARTER_STATE_MAINT) {
15260Sstevel@tonic-gate 		log_framework(LOG_DEBUG,
15270Sstevel@tonic-gate 		    "%s: maintain_instance -> is already in maintenance\n",
15280Sstevel@tonic-gate 		    rip->ri_i.i_fmri);
15290Sstevel@tonic-gate 		return;
15300Sstevel@tonic-gate 	}
15310Sstevel@tonic-gate 
15328823STruong.Q.Nguyen@Sun.COM 	/*
1533*12967Sgavin.maltby@oracle.com 	 * If reason state is restarter_str_service_request and
15348823STruong.Q.Nguyen@Sun.COM 	 * restarter_actions/auxiliary_fmri property is set with a valid fmri,
15358823STruong.Q.Nguyen@Sun.COM 	 * copy the fmri to restarter/auxiliary_fmri so svcs -x can use.
15368823STruong.Q.Nguyen@Sun.COM 	 */
1537*12967Sgavin.maltby@oracle.com 	if (reason == restarter_str_service_request &&
1538*12967Sgavin.maltby@oracle.com 	    libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &scf_inst) == 0) {
15398823STruong.Q.Nguyen@Sun.COM 		if (restarter_inst_validate_ractions_aux_fmri(scf_inst) == 0) {
15408823STruong.Q.Nguyen@Sun.COM 			if (restarter_inst_set_aux_fmri(scf_inst))
15418823STruong.Q.Nguyen@Sun.COM 				log_framework(LOG_DEBUG, "%s: "
15428823STruong.Q.Nguyen@Sun.COM 				    "restarter_inst_set_aux_fmri failed: ",
15438823STruong.Q.Nguyen@Sun.COM 				    rip->ri_i.i_fmri);
15448823STruong.Q.Nguyen@Sun.COM 		} else {
15458823STruong.Q.Nguyen@Sun.COM 			log_framework(LOG_DEBUG, "%s: "
15468823STruong.Q.Nguyen@Sun.COM 			    "restarter_inst_validate_ractions_aux_fmri "
15478823STruong.Q.Nguyen@Sun.COM 			    "failed: ", rip->ri_i.i_fmri);
15488823STruong.Q.Nguyen@Sun.COM 
15498823STruong.Q.Nguyen@Sun.COM 			if (restarter_inst_reset_aux_fmri(scf_inst))
15508823STruong.Q.Nguyen@Sun.COM 				log_framework(LOG_DEBUG, "%s: "
15518823STruong.Q.Nguyen@Sun.COM 				    "restarter_inst_reset_aux_fmri failed: ",
15528823STruong.Q.Nguyen@Sun.COM 				    rip->ri_i.i_fmri);
15538823STruong.Q.Nguyen@Sun.COM 		}
15548823STruong.Q.Nguyen@Sun.COM 		scf_instance_destroy(scf_inst);
15558823STruong.Q.Nguyen@Sun.COM 	}
15568823STruong.Q.Nguyen@Sun.COM 
15570Sstevel@tonic-gate 	if (immediate || !instance_started(rip)) {
15580Sstevel@tonic-gate 		if (rip->ri_i.i_primary_ctid != 0) {
15590Sstevel@tonic-gate 			rip->ri_m_inst = safe_scf_instance_create(h);
15600Sstevel@tonic-gate 			rip->ri_mi_deleted = B_FALSE;
15610Sstevel@tonic-gate 
15620Sstevel@tonic-gate 			libscf_reget_instance(rip);
15630Sstevel@tonic-gate 			method_remove_contract(rip, B_TRUE, B_TRUE);
15640Sstevel@tonic-gate 
15650Sstevel@tonic-gate 			scf_instance_destroy(rip->ri_m_inst);
15660Sstevel@tonic-gate 		}
15670Sstevel@tonic-gate 
15680Sstevel@tonic-gate 		(void) restarter_instance_update_states(h, rip,
15690Sstevel@tonic-gate 		    RESTARTER_STATE_MAINT, RESTARTER_STATE_NONE, RERR_RESTART,
1570*12967Sgavin.maltby@oracle.com 		    reason);
15710Sstevel@tonic-gate 		return;
15720Sstevel@tonic-gate 	}
15730Sstevel@tonic-gate 
15740Sstevel@tonic-gate 	(void) restarter_instance_update_states(h, rip, rip->ri_i.i_state,
1575*12967Sgavin.maltby@oracle.com 	    RESTARTER_STATE_MAINT, RERR_NONE, reason);
15760Sstevel@tonic-gate 
15771958Slianep 	log_transition(rip, MAINT_REQUESTED);
15781958Slianep 
15790Sstevel@tonic-gate 	info = startd_zalloc(sizeof (*info));
15800Sstevel@tonic-gate 	info->sf_id = rip->ri_id;
15810Sstevel@tonic-gate 	info->sf_method_type = METHOD_STOP;
15820Sstevel@tonic-gate 	info->sf_event_type = RERR_RESTART;
1583*12967Sgavin.maltby@oracle.com 	info->sf_reason = reason;
15840Sstevel@tonic-gate 	rip->ri_method_thread = startd_thread_create(method_thread, info);
15850Sstevel@tonic-gate }
15860Sstevel@tonic-gate 
15870Sstevel@tonic-gate static void
refresh_instance(scf_handle_t * h,restarter_inst_t * rip)15880Sstevel@tonic-gate refresh_instance(scf_handle_t *h, restarter_inst_t *rip)
15890Sstevel@tonic-gate {
15900Sstevel@tonic-gate 	scf_instance_t *inst;
15910Sstevel@tonic-gate 	scf_snapshot_t *snap;
15920Sstevel@tonic-gate 	fork_info_t *info;
15930Sstevel@tonic-gate 	int r;
15940Sstevel@tonic-gate 
159511466SRoger.Faulkner@Sun.COM 	assert(MUTEX_HELD(&rip->ri_lock));
15960Sstevel@tonic-gate 
15970Sstevel@tonic-gate 	log_instance(rip, B_TRUE, "Rereading configuration.");
15980Sstevel@tonic-gate 	log_framework(LOG_DEBUG, "%s: rereading configuration.\n",
15990Sstevel@tonic-gate 	    rip->ri_i.i_fmri);
16000Sstevel@tonic-gate 
16010Sstevel@tonic-gate rep_retry:
16020Sstevel@tonic-gate 	r = libscf_fmri_get_instance(h, rip->ri_i.i_fmri, &inst);
16030Sstevel@tonic-gate 	switch (r) {
16040Sstevel@tonic-gate 	case 0:
16050Sstevel@tonic-gate 		break;
16060Sstevel@tonic-gate 
16070Sstevel@tonic-gate 	case ECONNABORTED:
16080Sstevel@tonic-gate 		libscf_handle_rebind(h);
16090Sstevel@tonic-gate 		goto rep_retry;
16100Sstevel@tonic-gate 
16110Sstevel@tonic-gate 	case ENOENT:
16120Sstevel@tonic-gate 		/* Must have been deleted. */
16130Sstevel@tonic-gate 		return;
16140Sstevel@tonic-gate 
16150Sstevel@tonic-gate 	case EINVAL:
16160Sstevel@tonic-gate 	case ENOTSUP:
16170Sstevel@tonic-gate 	default:
16180Sstevel@tonic-gate 		bad_error("libscf_fmri_get_instance", r);
16190Sstevel@tonic-gate 	}
16200Sstevel@tonic-gate 
16210Sstevel@tonic-gate 	snap = libscf_get_running_snapshot(inst);
16220Sstevel@tonic-gate 
16230Sstevel@tonic-gate 	r = libscf_get_startd_properties(inst, snap, &rip->ri_flags,
16240Sstevel@tonic-gate 	    &rip->ri_utmpx_prefix);
16250Sstevel@tonic-gate 	switch (r) {
16260Sstevel@tonic-gate 	case 0:
16270Sstevel@tonic-gate 		log_framework(LOG_DEBUG, "%s is a %s-style service\n",
16280Sstevel@tonic-gate 		    rip->ri_i.i_fmri, service_style(rip->ri_flags));
16290Sstevel@tonic-gate 		break;
16300Sstevel@tonic-gate 
16310Sstevel@tonic-gate 	case ECONNABORTED:
16320Sstevel@tonic-gate 		scf_instance_destroy(inst);
16330Sstevel@tonic-gate 		scf_snapshot_destroy(snap);
16340Sstevel@tonic-gate 		libscf_handle_rebind(h);
16350Sstevel@tonic-gate 		goto rep_retry;
16360Sstevel@tonic-gate 
16370Sstevel@tonic-gate 	case ECANCELED:
16380Sstevel@tonic-gate 	case ENOENT:
16390Sstevel@tonic-gate 		/* Succeed in anticipation of REMOVE_INSTANCE. */
16400Sstevel@tonic-gate 		break;
16410Sstevel@tonic-gate 
16420Sstevel@tonic-gate 	default:
16430Sstevel@tonic-gate 		bad_error("libscf_get_startd_properties", r);
16440Sstevel@tonic-gate 	}
16450Sstevel@tonic-gate 
16460Sstevel@tonic-gate 	if (instance_started(rip)) {
16470Sstevel@tonic-gate 		/* Refresh does not change the state. */
16480Sstevel@tonic-gate 		(void) restarter_instance_update_states(h, rip,
1649*12967Sgavin.maltby@oracle.com 		    rip->ri_i.i_state, rip->ri_i.i_state, RERR_NONE,
1650*12967Sgavin.maltby@oracle.com 		    restarter_str_refresh);
16510Sstevel@tonic-gate 
16520Sstevel@tonic-gate 		info = startd_zalloc(sizeof (*info));
16530Sstevel@tonic-gate 		info->sf_id = rip->ri_id;
16540Sstevel@tonic-gate 		info->sf_method_type = METHOD_REFRESH;
16550Sstevel@tonic-gate 		info->sf_event_type = RERR_REFRESH;
1656*12967Sgavin.maltby@oracle.com 		info->sf_reason = NULL;
16570Sstevel@tonic-gate 
16580Sstevel@tonic-gate 		assert(rip->ri_method_thread == 0);
16590Sstevel@tonic-gate 		rip->ri_method_thread =
16600Sstevel@tonic-gate 		    startd_thread_create(method_thread, info);
16610Sstevel@tonic-gate 	}
16620Sstevel@tonic-gate 
16630Sstevel@tonic-gate 	scf_snapshot_destroy(snap);
16640Sstevel@tonic-gate 	scf_instance_destroy(inst);
16650Sstevel@tonic-gate }
16660Sstevel@tonic-gate 
16670Sstevel@tonic-gate const char *event_names[] = { "INVALID", "ADD_INSTANCE", "REMOVE_INSTANCE",
16680Sstevel@tonic-gate 	"ENABLE", "DISABLE", "ADMIN_DEGRADED", "ADMIN_REFRESH",
16690Sstevel@tonic-gate 	"ADMIN_RESTART", "ADMIN_MAINT_OFF", "ADMIN_MAINT_ON",
16700Sstevel@tonic-gate 	"ADMIN_MAINT_ON_IMMEDIATE", "STOP", "START", "DEPENDENCY_CYCLE",
167111623SSean.Wilcox@Sun.COM 	"INVALID_DEPENDENCY", "ADMIN_DISABLE", "STOP_RESET"
16720Sstevel@tonic-gate };
16730Sstevel@tonic-gate 
16740Sstevel@tonic-gate /*
16750Sstevel@tonic-gate  * void *restarter_process_events()
16760Sstevel@tonic-gate  *
16770Sstevel@tonic-gate  *   Called in a separate thread to process the events on an instance's
16780Sstevel@tonic-gate  *   queue.  Empties the queue completely, and tries to keep the thread
16790Sstevel@tonic-gate  *   around for a little while after the queue is empty to save on
16800Sstevel@tonic-gate  *   startup costs.
16810Sstevel@tonic-gate  */
16820Sstevel@tonic-gate static void *
restarter_process_events(void * arg)16830Sstevel@tonic-gate restarter_process_events(void *arg)
16840Sstevel@tonic-gate {
16850Sstevel@tonic-gate 	scf_handle_t *h;
16860Sstevel@tonic-gate 	restarter_instance_qentry_t *event;
16870Sstevel@tonic-gate 	restarter_inst_t *rip;
16880Sstevel@tonic-gate 	char *fmri = (char *)arg;
16890Sstevel@tonic-gate 	struct timespec to;
16900Sstevel@tonic-gate 
16910Sstevel@tonic-gate 	assert(fmri != NULL);
16920Sstevel@tonic-gate 
16930Sstevel@tonic-gate 	h = libscf_handle_create_bound_loop();
16940Sstevel@tonic-gate 
16950Sstevel@tonic-gate 	/* grab the queue lock */
16960Sstevel@tonic-gate 	rip = inst_lookup_queue(fmri);
16970Sstevel@tonic-gate 	if (rip == NULL)
16980Sstevel@tonic-gate 		goto out;
16990Sstevel@tonic-gate 
17000Sstevel@tonic-gate again:
17010Sstevel@tonic-gate 
17020Sstevel@tonic-gate 	while ((event = uu_list_first(rip->ri_queue)) != NULL) {
17030Sstevel@tonic-gate 		restarter_inst_t *inst;
17040Sstevel@tonic-gate 
17050Sstevel@tonic-gate 		/* drop the queue lock */
17060Sstevel@tonic-gate 		MUTEX_UNLOCK(&rip->ri_queue_lock);
17070Sstevel@tonic-gate 
17080Sstevel@tonic-gate 		/*
17090Sstevel@tonic-gate 		 * Grab the inst lock -- this waits until any outstanding
17100Sstevel@tonic-gate 		 * method finishes running.
17110Sstevel@tonic-gate 		 */
17120Sstevel@tonic-gate 		inst = inst_lookup_by_name(fmri);
17130Sstevel@tonic-gate 		if (inst == NULL) {
17140Sstevel@tonic-gate 			/* Getting deleted in the middle isn't an error. */
17150Sstevel@tonic-gate 			goto cont;
17160Sstevel@tonic-gate 		}
17170Sstevel@tonic-gate 
17180Sstevel@tonic-gate 		assert(instance_in_transition(inst) == 0);
17190Sstevel@tonic-gate 
17200Sstevel@tonic-gate 		/* process the event */
17210Sstevel@tonic-gate 		switch (event->riq_type) {
17220Sstevel@tonic-gate 		case RESTARTER_EVENT_TYPE_ENABLE:
17230Sstevel@tonic-gate 		case RESTARTER_EVENT_TYPE_DISABLE:
1724*12967Sgavin.maltby@oracle.com 			(void) enable_inst(h, inst, event);
172511482SSean.Wilcox@Sun.COM 			break;
172611482SSean.Wilcox@Sun.COM 
17270Sstevel@tonic-gate 		case RESTARTER_EVENT_TYPE_ADMIN_DISABLE:
1728*12967Sgavin.maltby@oracle.com 			if (enable_inst(h, inst, event) == 0)
172911482SSean.Wilcox@Sun.COM 				reset_start_times(inst);
17300Sstevel@tonic-gate 			break;
17310Sstevel@tonic-gate 
17320Sstevel@tonic-gate 		case RESTARTER_EVENT_TYPE_REMOVE_INSTANCE:
17330Sstevel@tonic-gate 			restarter_delete_inst(inst);
17340Sstevel@tonic-gate 			inst = NULL;
17350Sstevel@tonic-gate 			goto cont;
17360Sstevel@tonic-gate 
173711482SSean.Wilcox@Sun.COM 		case RESTARTER_EVENT_TYPE_STOP_RESET:
173811482SSean.Wilcox@Sun.COM 			reset_start_times(inst);
173911482SSean.Wilcox@Sun.COM 			/* FALLTHROUGH */
17400Sstevel@tonic-gate 		case RESTARTER_EVENT_TYPE_STOP:
17410Sstevel@tonic-gate 			(void) stop_instance(h, inst, RSTOP_DEPENDENCY);
17420Sstevel@tonic-gate 			break;
17430Sstevel@tonic-gate 
17440Sstevel@tonic-gate 		case RESTARTER_EVENT_TYPE_START:
1745*12967Sgavin.maltby@oracle.com 			start_instance(h, inst, event->riq_reason);
17460Sstevel@tonic-gate 			break;
17470Sstevel@tonic-gate 
17480Sstevel@tonic-gate 		case RESTARTER_EVENT_TYPE_DEPENDENCY_CYCLE:
1749*12967Sgavin.maltby@oracle.com 			maintain_instance(h, inst, 0,
1750*12967Sgavin.maltby@oracle.com 			    restarter_str_dependency_cycle);
17510Sstevel@tonic-gate 			break;
17520Sstevel@tonic-gate 
17530Sstevel@tonic-gate 		case RESTARTER_EVENT_TYPE_INVALID_DEPENDENCY:
1754*12967Sgavin.maltby@oracle.com 			maintain_instance(h, inst, 0,
1755*12967Sgavin.maltby@oracle.com 			    restarter_str_invalid_dependency);
17560Sstevel@tonic-gate 			break;
17570Sstevel@tonic-gate 
17580Sstevel@tonic-gate 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
17598823STruong.Q.Nguyen@Sun.COM 			if (event_from_tty(h, inst) == 0)
17608823STruong.Q.Nguyen@Sun.COM 				maintain_instance(h, inst, 0,
1761*12967Sgavin.maltby@oracle.com 				    restarter_str_service_request);
17628823STruong.Q.Nguyen@Sun.COM 			else
17638823STruong.Q.Nguyen@Sun.COM 				maintain_instance(h, inst, 0,
1764*12967Sgavin.maltby@oracle.com 				    restarter_str_administrative_request);
17650Sstevel@tonic-gate 			break;
17660Sstevel@tonic-gate 
17670Sstevel@tonic-gate 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
17688823STruong.Q.Nguyen@Sun.COM 			if (event_from_tty(h, inst) == 0)
17698823STruong.Q.Nguyen@Sun.COM 				maintain_instance(h, inst, 1,
1770*12967Sgavin.maltby@oracle.com 				    restarter_str_service_request);
17718823STruong.Q.Nguyen@Sun.COM 			else
17728823STruong.Q.Nguyen@Sun.COM 				maintain_instance(h, inst, 1,
1773*12967Sgavin.maltby@oracle.com 				    restarter_str_administrative_request);
17740Sstevel@tonic-gate 			break;
17750Sstevel@tonic-gate 
17760Sstevel@tonic-gate 		case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
17770Sstevel@tonic-gate 			unmaintain_instance(h, inst, RUNMAINT_CLEAR);
177812412SSean.Wilcox@Sun.COM 			reset_start_times(inst);
17790Sstevel@tonic-gate 			break;
17800Sstevel@tonic-gate 
17810Sstevel@tonic-gate 		case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
17820Sstevel@tonic-gate 			refresh_instance(h, inst);
17830Sstevel@tonic-gate 			break;
17840Sstevel@tonic-gate 
17850Sstevel@tonic-gate 		case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
17860Sstevel@tonic-gate 			log_framework(LOG_WARNING, "Restarter: "
17870Sstevel@tonic-gate 			    "%s command (for %s) unimplemented.\n",
17880Sstevel@tonic-gate 			    event_names[event->riq_type], inst->ri_i.i_fmri);
17890Sstevel@tonic-gate 			break;
17900Sstevel@tonic-gate 
17910Sstevel@tonic-gate 		case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
17920Sstevel@tonic-gate 			if (!instance_started(inst)) {
17930Sstevel@tonic-gate 				log_framework(LOG_DEBUG, "Restarter: "
17940Sstevel@tonic-gate 				    "Not restarting %s; not running.\n",
17950Sstevel@tonic-gate 				    inst->ri_i.i_fmri);
17960Sstevel@tonic-gate 			} else {
17970Sstevel@tonic-gate 				/*
17980Sstevel@tonic-gate 				 * Stop the instance.  If it can be restarted,
17990Sstevel@tonic-gate 				 * the graph engine will send a new event.
18000Sstevel@tonic-gate 				 */
180111482SSean.Wilcox@Sun.COM 				if (stop_instance(h, inst, RSTOP_RESTART) == 0)
180211482SSean.Wilcox@Sun.COM 					reset_start_times(inst);
18030Sstevel@tonic-gate 			}
18040Sstevel@tonic-gate 			break;
18050Sstevel@tonic-gate 
18060Sstevel@tonic-gate 		case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
18070Sstevel@tonic-gate 		default:
18080Sstevel@tonic-gate #ifndef NDEBUG
18090Sstevel@tonic-gate 			uu_warn("%s:%d: Bad restarter event %d.  "
18100Sstevel@tonic-gate 			    "Aborting.\n", __FILE__, __LINE__, event->riq_type);
18110Sstevel@tonic-gate #endif
18120Sstevel@tonic-gate 			abort();
18130Sstevel@tonic-gate 		}
18140Sstevel@tonic-gate 
18150Sstevel@tonic-gate 		assert(inst != NULL);
18160Sstevel@tonic-gate 		MUTEX_UNLOCK(&inst->ri_lock);
18170Sstevel@tonic-gate 
18180Sstevel@tonic-gate cont:
18190Sstevel@tonic-gate 		/* grab the queue lock */
18200Sstevel@tonic-gate 		rip = inst_lookup_queue(fmri);
18210Sstevel@tonic-gate 		if (rip == NULL)
18220Sstevel@tonic-gate 			goto out;
18230Sstevel@tonic-gate 
18240Sstevel@tonic-gate 		/* delete the event */
18250Sstevel@tonic-gate 		uu_list_remove(rip->ri_queue, event);
18260Sstevel@tonic-gate 		startd_free(event, sizeof (restarter_instance_qentry_t));
18270Sstevel@tonic-gate 	}
18280Sstevel@tonic-gate 
18290Sstevel@tonic-gate 	assert(rip != NULL);
18300Sstevel@tonic-gate 
18310Sstevel@tonic-gate 	/*
18320Sstevel@tonic-gate 	 * Try to preserve the thread for a little while for future use.
18330Sstevel@tonic-gate 	 */
18340Sstevel@tonic-gate 	to.tv_sec = 3;
18350Sstevel@tonic-gate 	to.tv_nsec = 0;
18360Sstevel@tonic-gate 	(void) pthread_cond_reltimedwait_np(&rip->ri_queue_cv,
18370Sstevel@tonic-gate 	    &rip->ri_queue_lock, &to);
18380Sstevel@tonic-gate 
18390Sstevel@tonic-gate 	if (uu_list_first(rip->ri_queue) != NULL)
18400Sstevel@tonic-gate 		goto again;
18410Sstevel@tonic-gate 
18420Sstevel@tonic-gate 	rip->ri_queue_thread = 0;
18430Sstevel@tonic-gate 	MUTEX_UNLOCK(&rip->ri_queue_lock);
18440Sstevel@tonic-gate out:
18450Sstevel@tonic-gate 	(void) scf_handle_unbind(h);
18460Sstevel@tonic-gate 	scf_handle_destroy(h);
18470Sstevel@tonic-gate 	free(fmri);
18480Sstevel@tonic-gate 	return (NULL);
18490Sstevel@tonic-gate }
18500Sstevel@tonic-gate 
18510Sstevel@tonic-gate static int
is_admin_event(restarter_event_type_t t)18520Sstevel@tonic-gate is_admin_event(restarter_event_type_t t) {
18530Sstevel@tonic-gate 
18540Sstevel@tonic-gate 	switch (t) {
18550Sstevel@tonic-gate 	case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON:
18560Sstevel@tonic-gate 	case RESTARTER_EVENT_TYPE_ADMIN_MAINT_ON_IMMEDIATE:
18570Sstevel@tonic-gate 	case RESTARTER_EVENT_TYPE_ADMIN_MAINT_OFF:
18580Sstevel@tonic-gate 	case RESTARTER_EVENT_TYPE_ADMIN_REFRESH:
18590Sstevel@tonic-gate 	case RESTARTER_EVENT_TYPE_ADMIN_DEGRADED:
18600Sstevel@tonic-gate 	case RESTARTER_EVENT_TYPE_ADMIN_RESTART:
18610Sstevel@tonic-gate 		return (1);
18620Sstevel@tonic-gate 	default:
18630Sstevel@tonic-gate 		return (0);
18640Sstevel@tonic-gate 	}
18650Sstevel@tonic-gate }
18660Sstevel@tonic-gate 
18670Sstevel@tonic-gate static void
restarter_queue_event(restarter_inst_t * ri,restarter_protocol_event_t * e)18680Sstevel@tonic-gate restarter_queue_event(restarter_inst_t *ri, restarter_protocol_event_t *e)
18690Sstevel@tonic-gate {
18700Sstevel@tonic-gate 	restarter_instance_qentry_t *qe;
18710Sstevel@tonic-gate 	int r;
18720Sstevel@tonic-gate 
187311466SRoger.Faulkner@Sun.COM 	assert(MUTEX_HELD(&ri->ri_queue_lock));
187411466SRoger.Faulkner@Sun.COM 	assert(!MUTEX_HELD(&ri->ri_lock));
18750Sstevel@tonic-gate 
18760Sstevel@tonic-gate 	qe = startd_zalloc(sizeof (restarter_instance_qentry_t));
18770Sstevel@tonic-gate 	qe->riq_type = e->rpe_type;
1878*12967Sgavin.maltby@oracle.com 	qe->riq_reason = e->rpe_reason;
18790Sstevel@tonic-gate 
18800Sstevel@tonic-gate 	uu_list_node_init(qe, &qe->riq_link, restarter_queue_pool);
18810Sstevel@tonic-gate 	r = uu_list_insert_before(ri->ri_queue, NULL, qe);
18820Sstevel@tonic-gate 	assert(r == 0);
18830Sstevel@tonic-gate }
18840Sstevel@tonic-gate 
18850Sstevel@tonic-gate /*
18860Sstevel@tonic-gate  * void *restarter_event_thread()
18870Sstevel@tonic-gate  *
18880Sstevel@tonic-gate  *  Handle incoming graph events by placing them on a per-instance
18890Sstevel@tonic-gate  *  queue.  We can't lock the main part of the instance structure, so
18900Sstevel@tonic-gate  *  just modify the seprarately locked event queue portion.
18910Sstevel@tonic-gate  */
18920Sstevel@tonic-gate /*ARGSUSED*/
18930Sstevel@tonic-gate static void *
restarter_event_thread(void * unused)18940Sstevel@tonic-gate restarter_event_thread(void *unused)
18950Sstevel@tonic-gate {
18960Sstevel@tonic-gate 	scf_handle_t *h;
18970Sstevel@tonic-gate 
18980Sstevel@tonic-gate 	/*
18990Sstevel@tonic-gate 	 * This is a new thread, and thus, gets its own handle
19000Sstevel@tonic-gate 	 * to the repository.
19010Sstevel@tonic-gate 	 */
19020Sstevel@tonic-gate 	h = libscf_handle_create_bound_loop();
19030Sstevel@tonic-gate 
19040Sstevel@tonic-gate 	MUTEX_LOCK(&ru->restarter_update_lock);
19050Sstevel@tonic-gate 
19060Sstevel@tonic-gate 	/*CONSTCOND*/
19070Sstevel@tonic-gate 	while (1) {
19080Sstevel@tonic-gate 		restarter_protocol_event_t *e;
19090Sstevel@tonic-gate 
19100Sstevel@tonic-gate 		while (ru->restarter_update_wakeup == 0)
19110Sstevel@tonic-gate 			(void) pthread_cond_wait(&ru->restarter_update_cv,
19120Sstevel@tonic-gate 			    &ru->restarter_update_lock);
19130Sstevel@tonic-gate 
19140Sstevel@tonic-gate 		ru->restarter_update_wakeup = 0;
19150Sstevel@tonic-gate 
19160Sstevel@tonic-gate 		while ((e = restarter_event_dequeue()) != NULL) {
19170Sstevel@tonic-gate 			restarter_inst_t *rip;
19180Sstevel@tonic-gate 			char *fmri;
19190Sstevel@tonic-gate 
19200Sstevel@tonic-gate 			MUTEX_UNLOCK(&ru->restarter_update_lock);
19210Sstevel@tonic-gate 
19220Sstevel@tonic-gate 			/*
19230Sstevel@tonic-gate 			 * ADD_INSTANCE is special: there's likely no
19240Sstevel@tonic-gate 			 * instance structure yet, so we need to handle the
19250Sstevel@tonic-gate 			 * addition synchronously.
19260Sstevel@tonic-gate 			 */
19270Sstevel@tonic-gate 			switch (e->rpe_type) {
19280Sstevel@tonic-gate 			case RESTARTER_EVENT_TYPE_ADD_INSTANCE:
19290Sstevel@tonic-gate 				if (restarter_insert_inst(h, e->rpe_inst) != 0)
19300Sstevel@tonic-gate 					log_error(LOG_INFO, "Restarter: "
19310Sstevel@tonic-gate 					    "Could not add %s.\n", e->rpe_inst);
19320Sstevel@tonic-gate 
19330Sstevel@tonic-gate 				MUTEX_LOCK(&st->st_load_lock);
19340Sstevel@tonic-gate 				if (--st->st_load_instances == 0)
19350Sstevel@tonic-gate 					(void) pthread_cond_broadcast(
19360Sstevel@tonic-gate 					    &st->st_load_cv);
19370Sstevel@tonic-gate 				MUTEX_UNLOCK(&st->st_load_lock);
19380Sstevel@tonic-gate 
19390Sstevel@tonic-gate 				goto nolookup;
19400Sstevel@tonic-gate 			}
19410Sstevel@tonic-gate 
19420Sstevel@tonic-gate 			/*
19430Sstevel@tonic-gate 			 * Lookup the instance, locking only the event queue.
19440Sstevel@tonic-gate 			 * Can't grab ri_lock here because it might be held
19450Sstevel@tonic-gate 			 * by a long-running method.
19460Sstevel@tonic-gate 			 */
19470Sstevel@tonic-gate 			rip = inst_lookup_queue(e->rpe_inst);
19480Sstevel@tonic-gate 			if (rip == NULL) {
19490Sstevel@tonic-gate 				log_error(LOG_INFO, "Restarter: "
19500Sstevel@tonic-gate 				    "Ignoring %s command for unknown service "
19510Sstevel@tonic-gate 				    "%s.\n", event_names[e->rpe_type],
19520Sstevel@tonic-gate 				    e->rpe_inst);
19530Sstevel@tonic-gate 				goto nolookup;
19540Sstevel@tonic-gate 			}
19550Sstevel@tonic-gate 
19560Sstevel@tonic-gate 			/* Keep ADMIN events from filling up the queue. */
19570Sstevel@tonic-gate 			if (is_admin_event(e->rpe_type) &&
19580Sstevel@tonic-gate 			    uu_list_numnodes(rip->ri_queue) >
19590Sstevel@tonic-gate 			    RINST_QUEUE_THRESHOLD) {
19600Sstevel@tonic-gate 				MUTEX_UNLOCK(&rip->ri_queue_lock);
19610Sstevel@tonic-gate 				log_instance(rip, B_TRUE, "Instance event "
19620Sstevel@tonic-gate 				    "queue overflow.  Dropping administrative "
19630Sstevel@tonic-gate 				    "request.");
19640Sstevel@tonic-gate 				log_framework(LOG_DEBUG, "%s: Instance event "
19650Sstevel@tonic-gate 				    "queue overflow.  Dropping administrative "
19660Sstevel@tonic-gate 				    "request.\n", rip->ri_i.i_fmri);
19670Sstevel@tonic-gate 				goto nolookup;
19680Sstevel@tonic-gate 			}
19690Sstevel@tonic-gate 
19700Sstevel@tonic-gate 			/* Now add the event to the instance queue. */
19710Sstevel@tonic-gate 			restarter_queue_event(rip, e);
19720Sstevel@tonic-gate 
19730Sstevel@tonic-gate 			if (rip->ri_queue_thread == 0) {
19740Sstevel@tonic-gate 				/*
19750Sstevel@tonic-gate 				 * Start a thread if one isn't already
19760Sstevel@tonic-gate 				 * running.
19770Sstevel@tonic-gate 				 */
19780Sstevel@tonic-gate 				fmri = safe_strdup(e->rpe_inst);
19790Sstevel@tonic-gate 				rip->ri_queue_thread =  startd_thread_create(
19800Sstevel@tonic-gate 				    restarter_process_events, (void *)fmri);
19810Sstevel@tonic-gate 			} else {
19820Sstevel@tonic-gate 				/*
19830Sstevel@tonic-gate 				 * Signal the existing thread that there's
19840Sstevel@tonic-gate 				 * a new event.
19850Sstevel@tonic-gate 				 */
19860Sstevel@tonic-gate 				(void) pthread_cond_broadcast(
19870Sstevel@tonic-gate 				    &rip->ri_queue_cv);
19880Sstevel@tonic-gate 			}
19890Sstevel@tonic-gate 
19900Sstevel@tonic-gate 			MUTEX_UNLOCK(&rip->ri_queue_lock);
19910Sstevel@tonic-gate nolookup:
19920Sstevel@tonic-gate 			restarter_event_release(e);
19930Sstevel@tonic-gate 
19940Sstevel@tonic-gate 			MUTEX_LOCK(&ru->restarter_update_lock);
19950Sstevel@tonic-gate 		}
19960Sstevel@tonic-gate 	}
19970Sstevel@tonic-gate 
19980Sstevel@tonic-gate 	/*
19990Sstevel@tonic-gate 	 * Unreachable for now -- there's currently no graceful cleanup
20000Sstevel@tonic-gate 	 * called on exit().
20010Sstevel@tonic-gate 	 */
20020Sstevel@tonic-gate 	(void) scf_handle_unbind(h);
20030Sstevel@tonic-gate 	scf_handle_destroy(h);
20040Sstevel@tonic-gate 	return (NULL);
20050Sstevel@tonic-gate }
20060Sstevel@tonic-gate 
20070Sstevel@tonic-gate static restarter_inst_t *
contract_to_inst(ctid_t ctid)20080Sstevel@tonic-gate contract_to_inst(ctid_t ctid)
20090Sstevel@tonic-gate {
20100Sstevel@tonic-gate 	restarter_inst_t *inst;
20110Sstevel@tonic-gate 	int id;
20120Sstevel@tonic-gate 
20130Sstevel@tonic-gate 	id = lookup_inst_by_contract(ctid);
20140Sstevel@tonic-gate 	if (id == -1)
20150Sstevel@tonic-gate 		return (NULL);
20160Sstevel@tonic-gate 
20170Sstevel@tonic-gate 	inst = inst_lookup_by_id(id);
20180Sstevel@tonic-gate 	if (inst != NULL) {
20190Sstevel@tonic-gate 		/*
20200Sstevel@tonic-gate 		 * Since ri_lock isn't held by the contract id lookup, this
20210Sstevel@tonic-gate 		 * instance may have been restarted and now be in a new
20220Sstevel@tonic-gate 		 * contract, making the old contract no longer valid for this
20230Sstevel@tonic-gate 		 * instance.
20240Sstevel@tonic-gate 		 */
20250Sstevel@tonic-gate 		if (ctid != inst->ri_i.i_primary_ctid) {
20260Sstevel@tonic-gate 			MUTEX_UNLOCK(&inst->ri_lock);
20270Sstevel@tonic-gate 			inst = NULL;
20280Sstevel@tonic-gate 		}
20290Sstevel@tonic-gate 	}
20300Sstevel@tonic-gate 	return (inst);
20310Sstevel@tonic-gate }
20320Sstevel@tonic-gate 
20330Sstevel@tonic-gate /*
20340Sstevel@tonic-gate  * void contract_action()
20350Sstevel@tonic-gate  *   Take action on contract events.
20360Sstevel@tonic-gate  */
20370Sstevel@tonic-gate static void
contract_action(scf_handle_t * h,restarter_inst_t * inst,ctid_t id,uint32_t type)20380Sstevel@tonic-gate contract_action(scf_handle_t *h, restarter_inst_t *inst, ctid_t id,
20390Sstevel@tonic-gate     uint32_t type)
20400Sstevel@tonic-gate {
20410Sstevel@tonic-gate 	const char *fmri = inst->ri_i.i_fmri;
20420Sstevel@tonic-gate 
204311466SRoger.Faulkner@Sun.COM 	assert(MUTEX_HELD(&inst->ri_lock));
20440Sstevel@tonic-gate 
20450Sstevel@tonic-gate 	/*
20460Sstevel@tonic-gate 	 * If startd has stopped this contract, there is no need to
20470Sstevel@tonic-gate 	 * stop it again.
20480Sstevel@tonic-gate 	 */
20490Sstevel@tonic-gate 	if (inst->ri_i.i_primary_ctid > 0 &&
20500Sstevel@tonic-gate 	    inst->ri_i.i_primary_ctid_stopped)
20510Sstevel@tonic-gate 		return;
20520Sstevel@tonic-gate 
20530Sstevel@tonic-gate 	if ((type & (CT_PR_EV_EMPTY | CT_PR_EV_CORE | CT_PR_EV_SIGNAL
20540Sstevel@tonic-gate 	    | CT_PR_EV_HWERR)) == 0) {
20550Sstevel@tonic-gate 		/*
20560Sstevel@tonic-gate 		 * There shouldn't be other events, since that's not how we set
20570Sstevel@tonic-gate 		 * the terms. Thus, just log an error and drive on.
20580Sstevel@tonic-gate 		 */
20590Sstevel@tonic-gate 		log_framework(LOG_NOTICE,
20600Sstevel@tonic-gate 		    "%s: contract %ld received unexpected critical event "
20610Sstevel@tonic-gate 		    "(%d)\n", fmri, id, type);
20625238Slianep 		return;
20630Sstevel@tonic-gate 	}
20640Sstevel@tonic-gate 
20650Sstevel@tonic-gate 	assert(instance_in_transition(inst) == 0);
20660Sstevel@tonic-gate 
20670Sstevel@tonic-gate 	if (instance_is_wait_style(inst)) {
20680Sstevel@tonic-gate 		/*
20690Sstevel@tonic-gate 		 * We ignore all events; if they impact the
20700Sstevel@tonic-gate 		 * process we're monitoring, then the
20710Sstevel@tonic-gate 		 * wait_thread will stop the instance.
20720Sstevel@tonic-gate 		 */
20730Sstevel@tonic-gate 		log_framework(LOG_DEBUG,
20740Sstevel@tonic-gate 		    "%s: ignoring contract event on wait-style service\n",
20750Sstevel@tonic-gate 		    fmri);
20760Sstevel@tonic-gate 	} else {
20770Sstevel@tonic-gate 		/*
20780Sstevel@tonic-gate 		 * A CT_PR_EV_EMPTY event is an RSTOP_EXIT request.
20790Sstevel@tonic-gate 		 */
20800Sstevel@tonic-gate 		switch (type) {
20810Sstevel@tonic-gate 		case CT_PR_EV_EMPTY:
20820Sstevel@tonic-gate 			(void) stop_instance(h, inst, RSTOP_EXIT);
20830Sstevel@tonic-gate 			break;
20840Sstevel@tonic-gate 		case CT_PR_EV_CORE:
20850Sstevel@tonic-gate 			(void) stop_instance(h, inst, RSTOP_CORE);
20860Sstevel@tonic-gate 			break;
20870Sstevel@tonic-gate 		case CT_PR_EV_SIGNAL:
20880Sstevel@tonic-gate 			(void) stop_instance(h, inst, RSTOP_SIGNAL);
20890Sstevel@tonic-gate 			break;
20900Sstevel@tonic-gate 		case CT_PR_EV_HWERR:
20910Sstevel@tonic-gate 			(void) stop_instance(h, inst, RSTOP_HWERR);
20920Sstevel@tonic-gate 			break;
20930Sstevel@tonic-gate 		}
20940Sstevel@tonic-gate 	}
20950Sstevel@tonic-gate }
20960Sstevel@tonic-gate 
20970Sstevel@tonic-gate /*
20980Sstevel@tonic-gate  * void *restarter_contract_event_thread(void *)
20990Sstevel@tonic-gate  *   Listens to the process contract bundle for critical events, taking action
21000Sstevel@tonic-gate  *   on events from contracts we know we are responsible for.
21010Sstevel@tonic-gate  */
21020Sstevel@tonic-gate /*ARGSUSED*/
21030Sstevel@tonic-gate static void *
restarter_contracts_event_thread(void * unused)21040Sstevel@tonic-gate restarter_contracts_event_thread(void *unused)
21050Sstevel@tonic-gate {
21060Sstevel@tonic-gate 	int fd, err;
21070Sstevel@tonic-gate 	scf_handle_t *local_handle;
21080Sstevel@tonic-gate 
21090Sstevel@tonic-gate 	/*
21100Sstevel@tonic-gate 	 * Await graph load completion.  That is, stop here, until we've scanned
21110Sstevel@tonic-gate 	 * the repository for contract - instance associations.
21120Sstevel@tonic-gate 	 */
21130Sstevel@tonic-gate 	MUTEX_LOCK(&st->st_load_lock);
21140Sstevel@tonic-gate 	while (!(st->st_load_complete && st->st_load_instances == 0))
21150Sstevel@tonic-gate 		(void) pthread_cond_wait(&st->st_load_cv, &st->st_load_lock);
21160Sstevel@tonic-gate 	MUTEX_UNLOCK(&st->st_load_lock);
21170Sstevel@tonic-gate 
21180Sstevel@tonic-gate 	/*
21190Sstevel@tonic-gate 	 * This is a new thread, and thus, gets its own handle
21200Sstevel@tonic-gate 	 * to the repository.
21210Sstevel@tonic-gate 	 */
21220Sstevel@tonic-gate 	if ((local_handle = libscf_handle_create_bound(SCF_VERSION)) == NULL)
21230Sstevel@tonic-gate 		uu_die("Unable to bind a new repository handle: %s\n",
21240Sstevel@tonic-gate 		    scf_strerror(scf_error()));
21250Sstevel@tonic-gate 
21260Sstevel@tonic-gate 	fd = open64(CTFS_ROOT "/process/pbundle", O_RDONLY);
21270Sstevel@tonic-gate 	if (fd == -1)
21280Sstevel@tonic-gate 		uu_die("process bundle open failed");
21290Sstevel@tonic-gate 
21300Sstevel@tonic-gate 	/*
21310Sstevel@tonic-gate 	 * Make sure we get all events (including those generated by configd
21320Sstevel@tonic-gate 	 * before this thread was started).
21330Sstevel@tonic-gate 	 */
21340Sstevel@tonic-gate 	err = ct_event_reset(fd);
21350Sstevel@tonic-gate 	assert(err == 0);
21360Sstevel@tonic-gate 
21370Sstevel@tonic-gate 	for (;;) {
21380Sstevel@tonic-gate 		int efd, sfd;
21390Sstevel@tonic-gate 		ct_evthdl_t ev;
21400Sstevel@tonic-gate 		uint32_t type;
21410Sstevel@tonic-gate 		ctevid_t evid;
21420Sstevel@tonic-gate 		ct_stathdl_t status;
21430Sstevel@tonic-gate 		ctid_t ctid;
21440Sstevel@tonic-gate 		restarter_inst_t *inst;
21450Sstevel@tonic-gate 		uint64_t cookie;
21460Sstevel@tonic-gate 
21470Sstevel@tonic-gate 		if (err = ct_event_read_critical(fd, &ev)) {
21480Sstevel@tonic-gate 			log_error(LOG_WARNING,
21490Sstevel@tonic-gate 			    "Error reading next contract event: %s",
21500Sstevel@tonic-gate 			    strerror(err));
21510Sstevel@tonic-gate 			continue;
21520Sstevel@tonic-gate 		}
21530Sstevel@tonic-gate 
21540Sstevel@tonic-gate 		evid = ct_event_get_evid(ev);
21550Sstevel@tonic-gate 		ctid = ct_event_get_ctid(ev);
21560Sstevel@tonic-gate 		type = ct_event_get_type(ev);
21570Sstevel@tonic-gate 
21580Sstevel@tonic-gate 		/* Fetch cookie. */
21590Sstevel@tonic-gate 		if ((sfd = contract_open(ctid, "process", "status", O_RDONLY))
21600Sstevel@tonic-gate 		    < 0) {
21610Sstevel@tonic-gate 			ct_event_free(ev);
21620Sstevel@tonic-gate 			continue;
21630Sstevel@tonic-gate 		}
21640Sstevel@tonic-gate 
21650Sstevel@tonic-gate 		if (err = ct_status_read(sfd, CTD_COMMON, &status)) {
21660Sstevel@tonic-gate 			log_framework(LOG_WARNING, "Could not get status for "
21670Sstevel@tonic-gate 			    "contract %ld: %s\n", ctid, strerror(err));
21680Sstevel@tonic-gate 
21690Sstevel@tonic-gate 			startd_close(sfd);
21700Sstevel@tonic-gate 			ct_event_free(ev);
21710Sstevel@tonic-gate 			continue;
21720Sstevel@tonic-gate 		}
21730Sstevel@tonic-gate 
21740Sstevel@tonic-gate 		cookie = ct_status_get_cookie(status);
21750Sstevel@tonic-gate 
21764244Sjeanm 		log_framework(LOG_DEBUG, "Received event %d for ctid %ld "
21774244Sjeanm 		    "cookie %lld\n", type, ctid, cookie);
21784244Sjeanm 
21790Sstevel@tonic-gate 		ct_status_free(status);
21800Sstevel@tonic-gate 
21810Sstevel@tonic-gate 		startd_close(sfd);
21820Sstevel@tonic-gate 
21830Sstevel@tonic-gate 		/*
21840Sstevel@tonic-gate 		 * svc.configd(1M) restart handling performed by the
21850Sstevel@tonic-gate 		 * fork_configd_thread.  We don't acknowledge, as that thread
21860Sstevel@tonic-gate 		 * will do so.
21870Sstevel@tonic-gate 		 */
21880Sstevel@tonic-gate 		if (cookie == CONFIGD_COOKIE) {
21890Sstevel@tonic-gate 			ct_event_free(ev);
21900Sstevel@tonic-gate 			continue;
21910Sstevel@tonic-gate 		}
21920Sstevel@tonic-gate 
21934244Sjeanm 		inst = NULL;
21944244Sjeanm 		if (storing_contract != 0 &&
21954244Sjeanm 		    (inst = contract_to_inst(ctid)) == NULL) {
21964244Sjeanm 			/*
21974244Sjeanm 			 * This can happen for two reasons:
21984244Sjeanm 			 * - method_run() has not yet stored the
21994244Sjeanm 			 *    the contract into the internal hash table.
22004244Sjeanm 			 * - we receive an EMPTY event for an abandoned
22014244Sjeanm 			 *    contract.
22024244Sjeanm 			 * If there is any contract in the process of
22034244Sjeanm 			 * being stored into the hash table then re-read
22044244Sjeanm 			 * the event later.
22054244Sjeanm 			 */
22064244Sjeanm 			log_framework(LOG_DEBUG,
22074244Sjeanm 			    "Reset event %d for unknown "
22084244Sjeanm 			    "contract id %ld\n", type, ctid);
22094244Sjeanm 
22104244Sjeanm 			/* don't go too fast */
22114244Sjeanm 			(void) poll(NULL, 0, 100);
22124244Sjeanm 
22134244Sjeanm 			(void) ct_event_reset(fd);
22144244Sjeanm 			ct_event_free(ev);
22154244Sjeanm 			continue;
22164244Sjeanm 		}
22174244Sjeanm 
22184244Sjeanm 		/*
22194244Sjeanm 		 * Do not call contract_to_inst() again if first
22204244Sjeanm 		 * call succeeded.
22214244Sjeanm 		 */
22224244Sjeanm 		if (inst == NULL)
22234244Sjeanm 			inst = contract_to_inst(ctid);
22240Sstevel@tonic-gate 		if (inst == NULL) {
22250Sstevel@tonic-gate 			/*
22260Sstevel@tonic-gate 			 * This can happen if we receive an EMPTY
22270Sstevel@tonic-gate 			 * event for an abandoned contract.
22280Sstevel@tonic-gate 			 */
22290Sstevel@tonic-gate 			log_framework(LOG_DEBUG,
22300Sstevel@tonic-gate 			    "Received event %d for unknown contract id "
22310Sstevel@tonic-gate 			    "%ld\n", type, ctid);
22320Sstevel@tonic-gate 		} else {
22330Sstevel@tonic-gate 			log_framework(LOG_DEBUG,
22340Sstevel@tonic-gate 			    "Received event %d for contract id "
22350Sstevel@tonic-gate 			    "%ld (%s)\n", type, ctid,
22360Sstevel@tonic-gate 			    inst->ri_i.i_fmri);
22370Sstevel@tonic-gate 
22380Sstevel@tonic-gate 			contract_action(local_handle, inst, ctid, type);
22390Sstevel@tonic-gate 
22400Sstevel@tonic-gate 			MUTEX_UNLOCK(&inst->ri_lock);
22410Sstevel@tonic-gate 		}
22420Sstevel@tonic-gate 
22430Sstevel@tonic-gate 		efd = contract_open(ct_event_get_ctid(ev), "process", "ctl",
22440Sstevel@tonic-gate 		    O_WRONLY);
22450Sstevel@tonic-gate 		if (efd != -1) {
22460Sstevel@tonic-gate 			(void) ct_ctl_ack(efd, evid);
22470Sstevel@tonic-gate 			startd_close(efd);
22480Sstevel@tonic-gate 		}
22490Sstevel@tonic-gate 
22500Sstevel@tonic-gate 		ct_event_free(ev);
22510Sstevel@tonic-gate 
22520Sstevel@tonic-gate 	}
22530Sstevel@tonic-gate 
22540Sstevel@tonic-gate 	/*NOTREACHED*/
22550Sstevel@tonic-gate 	return (NULL);
22560Sstevel@tonic-gate }
22570Sstevel@tonic-gate 
22580Sstevel@tonic-gate /*
22590Sstevel@tonic-gate  * Timeout queue, processed by restarter_timeouts_event_thread().
22600Sstevel@tonic-gate  */
22610Sstevel@tonic-gate timeout_queue_t *timeouts;
22620Sstevel@tonic-gate static uu_list_pool_t *timeout_pool;
22630Sstevel@tonic-gate 
22640Sstevel@tonic-gate typedef struct timeout_update {
22650Sstevel@tonic-gate 	pthread_mutex_t		tu_lock;
22660Sstevel@tonic-gate 	pthread_cond_t		tu_cv;
22670Sstevel@tonic-gate 	int			tu_wakeup;
22680Sstevel@tonic-gate } timeout_update_t;
22690Sstevel@tonic-gate 
22700Sstevel@tonic-gate timeout_update_t *tu;
22710Sstevel@tonic-gate 
22720Sstevel@tonic-gate static const char *timeout_ovr_svcs[] = {
22730Sstevel@tonic-gate 	"svc:/system/manifest-import:default",
22740Sstevel@tonic-gate 	"svc:/network/initial:default",
22750Sstevel@tonic-gate 	"svc:/network/service:default",
22760Sstevel@tonic-gate 	"svc:/system/rmtmpfiles:default",
22770Sstevel@tonic-gate 	"svc:/network/loopback:default",
22780Sstevel@tonic-gate 	"svc:/network/physical:default",
22790Sstevel@tonic-gate 	"svc:/system/device/local:default",
22800Sstevel@tonic-gate 	"svc:/system/metainit:default",
22810Sstevel@tonic-gate 	"svc:/system/filesystem/usr:default",
22820Sstevel@tonic-gate 	"svc:/system/filesystem/minimal:default",
22830Sstevel@tonic-gate 	"svc:/system/filesystem/local:default",
22840Sstevel@tonic-gate 	NULL
22850Sstevel@tonic-gate };
22860Sstevel@tonic-gate 
22870Sstevel@tonic-gate int
is_timeout_ovr(restarter_inst_t * inst)22880Sstevel@tonic-gate is_timeout_ovr(restarter_inst_t *inst)
22890Sstevel@tonic-gate {
22900Sstevel@tonic-gate 	int i;
22910Sstevel@tonic-gate 
22920Sstevel@tonic-gate 	for (i = 0; timeout_ovr_svcs[i] != NULL; ++i) {
22930Sstevel@tonic-gate 		if (strcmp(inst->ri_i.i_fmri, timeout_ovr_svcs[i]) == 0) {
22940Sstevel@tonic-gate 			log_instance(inst, B_TRUE, "Timeout override by "
22955238Slianep 			    "svc.startd.  Using infinite timeout.");
22960Sstevel@tonic-gate 			return (1);
22970Sstevel@tonic-gate 		}
22980Sstevel@tonic-gate 	}
22990Sstevel@tonic-gate 
23000Sstevel@tonic-gate 	return (0);
23010Sstevel@tonic-gate }
23020Sstevel@tonic-gate 
23030Sstevel@tonic-gate /*ARGSUSED*/
23040Sstevel@tonic-gate static int
timeout_compare(const void * lc_arg,const void * rc_arg,void * private)23050Sstevel@tonic-gate timeout_compare(const void *lc_arg, const void *rc_arg, void *private)
23060Sstevel@tonic-gate {
23070Sstevel@tonic-gate 	hrtime_t t1 = ((const timeout_entry_t *)lc_arg)->te_timeout;
23080Sstevel@tonic-gate 	hrtime_t t2 = ((const timeout_entry_t *)rc_arg)->te_timeout;
23090Sstevel@tonic-gate 
23100Sstevel@tonic-gate 	if (t1 > t2)
23110Sstevel@tonic-gate 		return (1);
23120Sstevel@tonic-gate 	else if (t1 < t2)
23130Sstevel@tonic-gate 		return (-1);
23140Sstevel@tonic-gate 	return (0);
23150Sstevel@tonic-gate }
23160Sstevel@tonic-gate 
23170Sstevel@tonic-gate void
timeout_init()23180Sstevel@tonic-gate timeout_init()
23190Sstevel@tonic-gate {
23200Sstevel@tonic-gate 	timeouts = startd_zalloc(sizeof (timeout_queue_t));
23210Sstevel@tonic-gate 
23220Sstevel@tonic-gate 	(void) pthread_mutex_init(&timeouts->tq_lock, &mutex_attrs);
23230Sstevel@tonic-gate 
23240Sstevel@tonic-gate 	timeout_pool = startd_list_pool_create("timeouts",
23250Sstevel@tonic-gate 	    sizeof (timeout_entry_t), offsetof(timeout_entry_t, te_link),
23260Sstevel@tonic-gate 	    timeout_compare, UU_LIST_POOL_DEBUG);
23270Sstevel@tonic-gate 	assert(timeout_pool != NULL);
23280Sstevel@tonic-gate 
23290Sstevel@tonic-gate 	timeouts->tq_list = startd_list_create(timeout_pool,
23300Sstevel@tonic-gate 	    timeouts, UU_LIST_SORTED);
23310Sstevel@tonic-gate 	assert(timeouts->tq_list != NULL);
23320Sstevel@tonic-gate 
23330Sstevel@tonic-gate 	tu = startd_zalloc(sizeof (timeout_update_t));
23340Sstevel@tonic-gate 	(void) pthread_cond_init(&tu->tu_cv, NULL);
23350Sstevel@tonic-gate 	(void) pthread_mutex_init(&tu->tu_lock, &mutex_attrs);
23360Sstevel@tonic-gate }
23370Sstevel@tonic-gate 
23380Sstevel@tonic-gate void
timeout_insert(restarter_inst_t * inst,ctid_t cid,uint64_t timeout_sec)23390Sstevel@tonic-gate timeout_insert(restarter_inst_t *inst, ctid_t cid, uint64_t timeout_sec)
23400Sstevel@tonic-gate {
23410Sstevel@tonic-gate 	hrtime_t now, timeout;
23420Sstevel@tonic-gate 	timeout_entry_t *entry;
23430Sstevel@tonic-gate 	uu_list_index_t idx;
23440Sstevel@tonic-gate 
234511466SRoger.Faulkner@Sun.COM 	assert(MUTEX_HELD(&inst->ri_lock));
23460Sstevel@tonic-gate 
23470Sstevel@tonic-gate 	now = gethrtime();
23480Sstevel@tonic-gate 
23490Sstevel@tonic-gate 	/*
23500Sstevel@tonic-gate 	 * If we overflow LLONG_MAX, we're never timing out anyways, so
23510Sstevel@tonic-gate 	 * just return.
23520Sstevel@tonic-gate 	 */
23530Sstevel@tonic-gate 	if (timeout_sec >= (LLONG_MAX - now) / 1000000000LL) {
23540Sstevel@tonic-gate 		log_instance(inst, B_TRUE, "timeout_seconds too large, "
23550Sstevel@tonic-gate 		    "treating as infinite.");
23560Sstevel@tonic-gate 		return;
23570Sstevel@tonic-gate 	}
23580Sstevel@tonic-gate 
23590Sstevel@tonic-gate 	/* hrtime is in nanoseconds. Convert timeout_sec. */
23600Sstevel@tonic-gate 	timeout = now + (timeout_sec * 1000000000LL);
23610Sstevel@tonic-gate 
23620Sstevel@tonic-gate 	entry = startd_alloc(sizeof (timeout_entry_t));
23630Sstevel@tonic-gate 	entry->te_timeout = timeout;
23640Sstevel@tonic-gate 	entry->te_ctid = cid;
23650Sstevel@tonic-gate 	entry->te_fmri = safe_strdup(inst->ri_i.i_fmri);
23660Sstevel@tonic-gate 	entry->te_logstem = safe_strdup(inst->ri_logstem);
23670Sstevel@tonic-gate 	entry->te_fired = 0;
23680Sstevel@tonic-gate 	/* Insert the calculated timeout time onto the queue. */
23690Sstevel@tonic-gate 	MUTEX_LOCK(&timeouts->tq_lock);
23700Sstevel@tonic-gate 	(void) uu_list_find(timeouts->tq_list, entry, NULL, &idx);
23710Sstevel@tonic-gate 	uu_list_node_init(entry, &entry->te_link, timeout_pool);
23720Sstevel@tonic-gate 	uu_list_insert(timeouts->tq_list, entry, idx);
23730Sstevel@tonic-gate 	MUTEX_UNLOCK(&timeouts->tq_lock);
23740Sstevel@tonic-gate 
23750Sstevel@tonic-gate 	assert(inst->ri_timeout == NULL);
23760Sstevel@tonic-gate 	inst->ri_timeout = entry;
23770Sstevel@tonic-gate 
23780Sstevel@tonic-gate 	MUTEX_LOCK(&tu->tu_lock);
23790Sstevel@tonic-gate 	tu->tu_wakeup = 1;
23800Sstevel@tonic-gate 	(void) pthread_cond_broadcast(&tu->tu_cv);
23810Sstevel@tonic-gate 	MUTEX_UNLOCK(&tu->tu_lock);
23820Sstevel@tonic-gate }
23830Sstevel@tonic-gate 
23840Sstevel@tonic-gate 
23850Sstevel@tonic-gate void
timeout_remove(restarter_inst_t * inst,ctid_t cid)23860Sstevel@tonic-gate timeout_remove(restarter_inst_t *inst, ctid_t cid)
23870Sstevel@tonic-gate {
238811466SRoger.Faulkner@Sun.COM 	assert(MUTEX_HELD(&inst->ri_lock));
23890Sstevel@tonic-gate 
23900Sstevel@tonic-gate 	if (inst->ri_timeout == NULL)
23910Sstevel@tonic-gate 		return;
23920Sstevel@tonic-gate 
23930Sstevel@tonic-gate 	assert(inst->ri_timeout->te_ctid == cid);
23940Sstevel@tonic-gate 
23950Sstevel@tonic-gate 	MUTEX_LOCK(&timeouts->tq_lock);
23960Sstevel@tonic-gate 	uu_list_remove(timeouts->tq_list, inst->ri_timeout);
23970Sstevel@tonic-gate 	MUTEX_UNLOCK(&timeouts->tq_lock);
23980Sstevel@tonic-gate 
23990Sstevel@tonic-gate 	free(inst->ri_timeout->te_fmri);
24000Sstevel@tonic-gate 	free(inst->ri_timeout->te_logstem);
24010Sstevel@tonic-gate 	startd_free(inst->ri_timeout, sizeof (timeout_entry_t));
24020Sstevel@tonic-gate 	inst->ri_timeout = NULL;
24030Sstevel@tonic-gate }
24040Sstevel@tonic-gate 
24050Sstevel@tonic-gate static int
timeout_now()24060Sstevel@tonic-gate timeout_now()
24070Sstevel@tonic-gate {
24080Sstevel@tonic-gate 	timeout_entry_t *e;
24090Sstevel@tonic-gate 	hrtime_t now;
24100Sstevel@tonic-gate 	int ret;
24110Sstevel@tonic-gate 
24120Sstevel@tonic-gate 	now = gethrtime();
24130Sstevel@tonic-gate 
24140Sstevel@tonic-gate 	/*
24150Sstevel@tonic-gate 	 * Walk through the (sorted) timeouts list.  While the timeout
24160Sstevel@tonic-gate 	 * at the head of the list is <= the current time, kill the
24170Sstevel@tonic-gate 	 * method.
24180Sstevel@tonic-gate 	 */
24190Sstevel@tonic-gate 	MUTEX_LOCK(&timeouts->tq_lock);
24200Sstevel@tonic-gate 
24210Sstevel@tonic-gate 	for (e = uu_list_first(timeouts->tq_list);
24220Sstevel@tonic-gate 	    e != NULL && e->te_timeout <= now;
24230Sstevel@tonic-gate 	    e = uu_list_next(timeouts->tq_list, e)) {
24240Sstevel@tonic-gate 		log_framework(LOG_WARNING, "%s: Method or service exit timed "
24250Sstevel@tonic-gate 		    "out.  Killing contract %ld.\n", e->te_fmri, e->te_ctid);
24260Sstevel@tonic-gate 		log_instance_fmri(e->te_fmri, e->te_logstem, B_TRUE,
24275238Slianep 		    "Method or service exit timed out.  Killing contract %ld.",
24280Sstevel@tonic-gate 		    e->te_ctid);
24290Sstevel@tonic-gate 		e->te_fired = 1;
24300Sstevel@tonic-gate 		(void) contract_kill(e->te_ctid, SIGKILL, e->te_fmri);
24310Sstevel@tonic-gate 	}
24320Sstevel@tonic-gate 
24330Sstevel@tonic-gate 	if (uu_list_numnodes(timeouts->tq_list) > 0)
24340Sstevel@tonic-gate 		ret = 0;
24350Sstevel@tonic-gate 	else
24360Sstevel@tonic-gate 		ret = -1;
24370Sstevel@tonic-gate 
24380Sstevel@tonic-gate 	MUTEX_UNLOCK(&timeouts->tq_lock);
24390Sstevel@tonic-gate 
24400Sstevel@tonic-gate 	return (ret);
24410Sstevel@tonic-gate }
24420Sstevel@tonic-gate 
24430Sstevel@tonic-gate /*
24440Sstevel@tonic-gate  * void *restarter_timeouts_event_thread(void *)
24450Sstevel@tonic-gate  *   Responsible for monitoring the method timeouts.  This thread must
24460Sstevel@tonic-gate  *   be started before any methods are called.
24470Sstevel@tonic-gate  */
24480Sstevel@tonic-gate /*ARGSUSED*/
24490Sstevel@tonic-gate static void *
restarter_timeouts_event_thread(void * unused)24500Sstevel@tonic-gate restarter_timeouts_event_thread(void *unused)
24510Sstevel@tonic-gate {
24520Sstevel@tonic-gate 	/*
24530Sstevel@tonic-gate 	 * Timeouts are entered on a priority queue, which is processed by
24540Sstevel@tonic-gate 	 * this thread.  As timeouts are specified in seconds, we'll do
24550Sstevel@tonic-gate 	 * the necessary processing every second, as long as the queue
24560Sstevel@tonic-gate 	 * is not empty.
24570Sstevel@tonic-gate 	 */
24580Sstevel@tonic-gate 
24590Sstevel@tonic-gate 	/*CONSTCOND*/
24600Sstevel@tonic-gate 	while (1) {
24610Sstevel@tonic-gate 		/*
24620Sstevel@tonic-gate 		 * As long as the timeout list isn't empty, process it
24630Sstevel@tonic-gate 		 * every second.
24640Sstevel@tonic-gate 		 */
24650Sstevel@tonic-gate 		if (timeout_now() == 0) {
24660Sstevel@tonic-gate 			(void) sleep(1);
24670Sstevel@tonic-gate 			continue;
24680Sstevel@tonic-gate 		}
24690Sstevel@tonic-gate 
24700Sstevel@tonic-gate 		/* The list is empty, wait until we have more timeouts. */
24710Sstevel@tonic-gate 		MUTEX_LOCK(&tu->tu_lock);
24720Sstevel@tonic-gate 
24730Sstevel@tonic-gate 		while (tu->tu_wakeup == 0)
24740Sstevel@tonic-gate 			(void) pthread_cond_wait(&tu->tu_cv, &tu->tu_lock);
24750Sstevel@tonic-gate 
24760Sstevel@tonic-gate 		tu->tu_wakeup = 0;
24770Sstevel@tonic-gate 		MUTEX_UNLOCK(&tu->tu_lock);
24780Sstevel@tonic-gate 	}
24790Sstevel@tonic-gate 
24800Sstevel@tonic-gate 	return (NULL);
24810Sstevel@tonic-gate }
24820Sstevel@tonic-gate 
24830Sstevel@tonic-gate void
restarter_start()24840Sstevel@tonic-gate restarter_start()
24850Sstevel@tonic-gate {
24860Sstevel@tonic-gate 	(void) startd_thread_create(restarter_timeouts_event_thread, NULL);
24870Sstevel@tonic-gate 	(void) startd_thread_create(restarter_event_thread, NULL);
24880Sstevel@tonic-gate 	(void) startd_thread_create(restarter_contracts_event_thread, NULL);
24890Sstevel@tonic-gate 	(void) startd_thread_create(wait_thread, NULL);
24900Sstevel@tonic-gate }
24910Sstevel@tonic-gate 
24920Sstevel@tonic-gate 
24930Sstevel@tonic-gate void
restarter_init()24940Sstevel@tonic-gate restarter_init()
24950Sstevel@tonic-gate {
24960Sstevel@tonic-gate 	restarter_instance_pool = startd_list_pool_create("restarter_instances",
24970Sstevel@tonic-gate 	    sizeof (restarter_inst_t), offsetof(restarter_inst_t,
24985238Slianep 	    ri_link), restarter_instance_compare, UU_LIST_POOL_DEBUG);
24990Sstevel@tonic-gate 	(void) memset(&instance_list, 0, sizeof (instance_list));
25000Sstevel@tonic-gate 
25010Sstevel@tonic-gate 	(void) pthread_mutex_init(&instance_list.ril_lock, &mutex_attrs);
25020Sstevel@tonic-gate 	instance_list.ril_instance_list = startd_list_create(
25030Sstevel@tonic-gate 	    restarter_instance_pool, &instance_list, UU_LIST_SORTED);
25040Sstevel@tonic-gate 
25050Sstevel@tonic-gate 	restarter_queue_pool = startd_list_pool_create(
25060Sstevel@tonic-gate 	    "restarter_instance_queue", sizeof (restarter_instance_qentry_t),
25070Sstevel@tonic-gate 	    offsetof(restarter_instance_qentry_t,  riq_link), NULL,
25080Sstevel@tonic-gate 	    UU_LIST_POOL_DEBUG);
25090Sstevel@tonic-gate 
25100Sstevel@tonic-gate 	contract_list_pool = startd_list_pool_create(
25110Sstevel@tonic-gate 	    "contract_list", sizeof (contract_entry_t),
25120Sstevel@tonic-gate 	    offsetof(contract_entry_t,  ce_link), NULL,
25130Sstevel@tonic-gate 	    UU_LIST_POOL_DEBUG);
25140Sstevel@tonic-gate 	contract_hash_init();
25150Sstevel@tonic-gate 
25160Sstevel@tonic-gate 	log_framework(LOG_DEBUG, "Initialized restarter\n");
25170Sstevel@tonic-gate }
2518