11958Slianep /*
21958Slianep * CDDL HEADER START
31958Slianep *
41958Slianep * The contents of this file are subject to the terms of the
51958Slianep * Common Development and Distribution License (the "License").
61958Slianep * You may not use this file except in compliance with the License.
71958Slianep *
81958Slianep * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
91958Slianep * or http://www.opensolaris.org/os/licensing.
101958Slianep * See the License for the specific language governing permissions
111958Slianep * and limitations under the License.
121958Slianep *
131958Slianep * When distributing Covered Code, include this CDDL HEADER in each
141958Slianep * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
151958Slianep * If applicable, add the following below this CDDL HEADER, with the
161958Slianep * fields enclosed by brackets "[]" replaced with your own identifying
171958Slianep * information: Portions Copyright [yyyy] [name of copyright owner]
181958Slianep *
191958Slianep * CDDL HEADER END
201958Slianep */
211958Slianep /*
22*9333SRenaud.Manus@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
231958Slianep * Use is subject to license terms.
241958Slianep */
251958Slianep
261958Slianep
271958Slianep /*
281958Slianep * transition.c - Graph State Machine
291958Slianep *
301958Slianep * The graph state machine is implemented here, with a typical approach
311958Slianep * of a function per state. Separating the implementation allows more
321958Slianep * clarity into the actions taken on notification of state change, as well
331958Slianep * as a place for future expansion including hooks for configurable actions.
341958Slianep * All functions are called with dgraph_lock held.
351958Slianep *
361958Slianep * The start action for this state machine is not explicit. The states
372747Sbustos * (ONLINE and DEGRADED) which need to know when they're entering the state
381958Slianep * due to a daemon restart implement this understanding by checking for
391958Slianep * transition from uninitialized. In the future, this would likely be better
401958Slianep * as an explicit start action instead of relying on an overloaded transition.
411958Slianep *
421958Slianep * All gt_enter functions use the same set of return codes.
431958Slianep * 0 success
441958Slianep * ECONNABORTED repository connection aborted
451958Slianep */
461958Slianep
471958Slianep #include "startd.h"
481958Slianep
491958Slianep static int
gt_running(restarter_instance_state_t state)501958Slianep gt_running(restarter_instance_state_t state)
511958Slianep {
521958Slianep if (state == RESTARTER_STATE_ONLINE ||
531958Slianep state == RESTARTER_STATE_DEGRADED)
541958Slianep return (1);
551958Slianep
561958Slianep return (0);
571958Slianep }
581958Slianep
591958Slianep static int
gt_enter_uninit(scf_handle_t * h,graph_vertex_t * v,restarter_instance_state_t old_state,restarter_error_t rerr)601958Slianep gt_enter_uninit(scf_handle_t *h, graph_vertex_t *v,
611958Slianep restarter_instance_state_t old_state, restarter_error_t rerr)
621958Slianep {
631958Slianep int err;
641958Slianep scf_instance_t *inst;
651958Slianep
661958Slianep /* Initialize instance by refreshing it. */
671958Slianep
681958Slianep err = libscf_fmri_get_instance(h, v->gv_name, &inst);
691958Slianep switch (err) {
701958Slianep case 0:
711958Slianep break;
721958Slianep
731958Slianep case ECONNABORTED:
741958Slianep return (ECONNABORTED);
751958Slianep
761958Slianep case ENOENT:
771958Slianep return (0);
781958Slianep
791958Slianep case EINVAL:
801958Slianep case ENOTSUP:
811958Slianep default:
821958Slianep bad_error("libscf_fmri_get_instance", err);
831958Slianep }
841958Slianep
851958Slianep err = refresh_vertex(v, inst);
861958Slianep if (err == 0)
871958Slianep graph_enable_by_vertex(v, v->gv_flags & GV_ENABLED, 0);
881958Slianep
891958Slianep scf_instance_destroy(inst);
901958Slianep
911958Slianep /* If the service was running, propagate a stop event. */
921958Slianep if (gt_running(old_state)) {
931958Slianep log_framework(LOG_DEBUG, "Propagating stop of %s.\n",
941958Slianep v->gv_name);
951958Slianep
962339Slianep graph_transition_propagate(v, PROPAGATE_STOP, rerr);
971958Slianep }
981958Slianep
991958Slianep graph_transition_sulogin(RESTARTER_STATE_UNINIT, old_state);
1001958Slianep return (0);
1011958Slianep }
1021958Slianep
1032747Sbustos /* ARGSUSED */
1041958Slianep static int
gt_enter_maint(scf_handle_t * h,graph_vertex_t * v,restarter_instance_state_t old_state,restarter_error_t rerr)1051958Slianep gt_enter_maint(scf_handle_t *h, graph_vertex_t *v,
1061958Slianep restarter_instance_state_t old_state, restarter_error_t rerr)
1071958Slianep {
1088354SRenaud.Manus@Sun.COM int to_offline = v->gv_flags & GV_TOOFFLINE;
1098354SRenaud.Manus@Sun.COM
1102339Slianep /*
1112339Slianep * If the service was running, propagate a stop event. If the
1122339Slianep * service was not running the maintenance transition may satisfy
1132339Slianep * optional dependencies and should be propagated to determine
1142339Slianep * whether new dependents are satisfiable.
1157630SRenaud.Manus@Sun.COM * Instances that transition to maintenance and have the GV_TOOFFLINE
1167630SRenaud.Manus@Sun.COM * flag are special because they can expose new subtree leaves so
1177630SRenaud.Manus@Sun.COM * propagate the offline to the instance dependencies.
1182339Slianep */
1198354SRenaud.Manus@Sun.COM
1208354SRenaud.Manus@Sun.COM /* instance transitioning to maintenance is considered disabled */
1218354SRenaud.Manus@Sun.COM v->gv_flags &= ~GV_TODISABLE;
1228354SRenaud.Manus@Sun.COM v->gv_flags &= ~GV_TOOFFLINE;
1238354SRenaud.Manus@Sun.COM
1241958Slianep if (gt_running(old_state)) {
1257630SRenaud.Manus@Sun.COM /*
1267630SRenaud.Manus@Sun.COM * Handle state change during instance disabling.
1277630SRenaud.Manus@Sun.COM * Propagate offline to the new exposed leaves.
1287630SRenaud.Manus@Sun.COM */
1298354SRenaud.Manus@Sun.COM if (to_offline) {
1307630SRenaud.Manus@Sun.COM log_framework(LOG_DEBUG, "%s removed from subtree\n",
1317630SRenaud.Manus@Sun.COM v->gv_name);
1328354SRenaud.Manus@Sun.COM
1337630SRenaud.Manus@Sun.COM graph_offline_subtree_leaves(v, (void *)h);
1347630SRenaud.Manus@Sun.COM }
1357630SRenaud.Manus@Sun.COM
1362339Slianep log_framework(LOG_DEBUG, "Propagating maintenance (stop) of "
1372339Slianep "%s.\n", v->gv_name);
1381958Slianep
1392339Slianep graph_transition_propagate(v, PROPAGATE_STOP, rerr);
1402339Slianep } else {
1411958Slianep log_framework(LOG_DEBUG, "Propagating maintenance of %s.\n",
1421958Slianep v->gv_name);
1431958Slianep
1442339Slianep graph_transition_propagate(v, PROPAGATE_SAT, rerr);
1451958Slianep }
1461958Slianep
1471958Slianep graph_transition_sulogin(RESTARTER_STATE_MAINT, old_state);
1481958Slianep return (0);
1491958Slianep }
1501958Slianep
1512747Sbustos /* ARGSUSED */
1521958Slianep static int
gt_enter_offline(scf_handle_t * h,graph_vertex_t * v,restarter_instance_state_t old_state,restarter_error_t rerr)1531958Slianep gt_enter_offline(scf_handle_t *h, graph_vertex_t *v,
1541958Slianep restarter_instance_state_t old_state, restarter_error_t rerr)
1551958Slianep {
1568354SRenaud.Manus@Sun.COM int to_offline = v->gv_flags & GV_TOOFFLINE;
1578354SRenaud.Manus@Sun.COM
1588354SRenaud.Manus@Sun.COM v->gv_flags &= ~GV_TOOFFLINE;
1598354SRenaud.Manus@Sun.COM
1601958Slianep /*
1611958Slianep * If the instance should be enabled, see if we can start it.
1621958Slianep * Otherwise send a disable command.
1637630SRenaud.Manus@Sun.COM * If a instance has the GV_TOOFFLINE flag set then it must
1647630SRenaud.Manus@Sun.COM * remains offline until the disable process completes.
1651958Slianep */
1661958Slianep if (v->gv_flags & GV_ENABLED) {
1678354SRenaud.Manus@Sun.COM if (to_offline == 0)
1687630SRenaud.Manus@Sun.COM graph_start_if_satisfied(v);
1691958Slianep } else {
1701958Slianep if (gt_running(old_state) && v->gv_post_disable_f)
1711958Slianep v->gv_post_disable_f();
1727630SRenaud.Manus@Sun.COM
1731958Slianep vertex_send_event(v, RESTARTER_EVENT_TYPE_DISABLE);
1741958Slianep }
1751958Slianep
1763639Srm88369 /*
1773639Srm88369 * If the service was running, propagate a stop event. If the
1783639Srm88369 * service was not running the offline transition may satisfy
1793639Srm88369 * optional dependencies and should be propagated to determine
1803639Srm88369 * whether new dependents are satisfiable.
1817630SRenaud.Manus@Sun.COM * Instances that transition to offline and have the GV_TOOFFLINE flag
1827630SRenaud.Manus@Sun.COM * are special because they can expose new subtree leaves so propagate
1837630SRenaud.Manus@Sun.COM * the offline to the instance dependencies.
1843639Srm88369 */
1851958Slianep if (gt_running(old_state)) {
1867630SRenaud.Manus@Sun.COM /*
1877630SRenaud.Manus@Sun.COM * Handle state change during instance disabling.
1887630SRenaud.Manus@Sun.COM * Propagate offline to the new exposed leaves.
1897630SRenaud.Manus@Sun.COM */
1908354SRenaud.Manus@Sun.COM if (to_offline) {
1917630SRenaud.Manus@Sun.COM log_framework(LOG_DEBUG, "%s removed from subtree\n",
1927630SRenaud.Manus@Sun.COM v->gv_name);
1938354SRenaud.Manus@Sun.COM
1947630SRenaud.Manus@Sun.COM graph_offline_subtree_leaves(v, (void *)h);
1957630SRenaud.Manus@Sun.COM }
1967630SRenaud.Manus@Sun.COM
1971958Slianep log_framework(LOG_DEBUG, "Propagating stop of %s.\n",
1981958Slianep v->gv_name);
1991958Slianep
2002339Slianep graph_transition_propagate(v, PROPAGATE_STOP, rerr);
2018354SRenaud.Manus@Sun.COM
2028354SRenaud.Manus@Sun.COM /*
2038354SRenaud.Manus@Sun.COM * The offline transition may satisfy require_any/restart
2048354SRenaud.Manus@Sun.COM * dependencies and should be propagated to determine
2058354SRenaud.Manus@Sun.COM * whether new dependents are satisfiable.
2068354SRenaud.Manus@Sun.COM */
2078354SRenaud.Manus@Sun.COM graph_transition_propagate(v, PROPAGATE_SAT, rerr);
2083639Srm88369 } else {
2093639Srm88369 log_framework(LOG_DEBUG, "Propagating offline of %s.\n",
2103639Srm88369 v->gv_name);
2113639Srm88369
2123639Srm88369 graph_transition_propagate(v, PROPAGATE_SAT, rerr);
2131958Slianep }
2141958Slianep
2151958Slianep graph_transition_sulogin(RESTARTER_STATE_OFFLINE, old_state);
2161958Slianep return (0);
2171958Slianep }
2181958Slianep
2192747Sbustos /* ARGSUSED */
2201958Slianep static int
gt_enter_disabled(scf_handle_t * h,graph_vertex_t * v,restarter_instance_state_t old_state,restarter_error_t rerr)2211958Slianep gt_enter_disabled(scf_handle_t *h, graph_vertex_t *v,
2221958Slianep restarter_instance_state_t old_state, restarter_error_t rerr)
2231958Slianep {
2248354SRenaud.Manus@Sun.COM int to_offline = v->gv_flags & GV_TOOFFLINE;
2258354SRenaud.Manus@Sun.COM
2268354SRenaud.Manus@Sun.COM v->gv_flags &= ~GV_TODISABLE;
2278354SRenaud.Manus@Sun.COM v->gv_flags &= ~GV_TOOFFLINE;
2287630SRenaud.Manus@Sun.COM
2291958Slianep /*
2301958Slianep * If the instance should be disabled, no problem. Otherwise,
2311958Slianep * send an enable command, which should result in the instance
2327630SRenaud.Manus@Sun.COM * moving to OFFLINE unless the instance is part of a subtree
2337630SRenaud.Manus@Sun.COM * (non root) and in this case the result is unpredictable.
2341958Slianep */
2351958Slianep if (v->gv_flags & GV_ENABLED) {
2361958Slianep vertex_send_event(v, RESTARTER_EVENT_TYPE_ENABLE);
2371958Slianep } else if (gt_running(old_state) && v->gv_post_disable_f) {
2381958Slianep v->gv_post_disable_f();
2391958Slianep }
2401958Slianep
2411958Slianep /*
2422339Slianep * If the service was running, propagate this as a stop. If the
2432339Slianep * service was not running the disabled transition may satisfy
2442339Slianep * optional dependencies and should be propagated to determine
2452339Slianep * whether new dependents are satisfiable.
2461958Slianep */
2471958Slianep if (gt_running(old_state)) {
2487630SRenaud.Manus@Sun.COM /*
2497630SRenaud.Manus@Sun.COM * We need to propagate the offline to new exposed leaves in
2507630SRenaud.Manus@Sun.COM * case we've just disabled an instance that was part of a
2517630SRenaud.Manus@Sun.COM * subtree.
2527630SRenaud.Manus@Sun.COM */
2538354SRenaud.Manus@Sun.COM if (to_offline) {
2547630SRenaud.Manus@Sun.COM log_framework(LOG_DEBUG, "%s removed from subtree\n",
2557630SRenaud.Manus@Sun.COM v->gv_name);
2567630SRenaud.Manus@Sun.COM
2577630SRenaud.Manus@Sun.COM /*
2587630SRenaud.Manus@Sun.COM * Handle state change during instance disabling.
2597630SRenaud.Manus@Sun.COM * Propagate offline to the new exposed leaves.
2607630SRenaud.Manus@Sun.COM */
2617630SRenaud.Manus@Sun.COM graph_offline_subtree_leaves(v, (void *)h);
2627630SRenaud.Manus@Sun.COM }
2637630SRenaud.Manus@Sun.COM
2647630SRenaud.Manus@Sun.COM
2651958Slianep log_framework(LOG_DEBUG, "Propagating stop of %s.\n",
2661958Slianep v->gv_name);
2671958Slianep
2682339Slianep graph_transition_propagate(v, PROPAGATE_STOP, rerr);
2691958Slianep
2702339Slianep } else {
2711958Slianep log_framework(LOG_DEBUG, "Propagating disable of %s.\n",
2721958Slianep v->gv_name);
2731958Slianep
2742339Slianep graph_transition_propagate(v, PROPAGATE_SAT, rerr);
2751958Slianep }
2761958Slianep
2771958Slianep graph_transition_sulogin(RESTARTER_STATE_DISABLED, old_state);
2781958Slianep return (0);
2791958Slianep }
2801958Slianep
2811958Slianep static int
gt_internal_online_or_degraded(scf_handle_t * h,graph_vertex_t * v,restarter_instance_state_t old_state,restarter_error_t rerr)2821958Slianep gt_internal_online_or_degraded(scf_handle_t *h, graph_vertex_t *v,
2831958Slianep restarter_instance_state_t old_state, restarter_error_t rerr)
2841958Slianep {
2851958Slianep int r;
2861958Slianep
2871958Slianep /*
2881958Slianep * If the instance has just come up, update the start
2891958Slianep * snapshot.
2901958Slianep */
2911958Slianep if (gt_running(old_state) == 0) {
2921958Slianep /*
2931958Slianep * Don't fire if we're just recovering state
2941958Slianep * after a restart.
2951958Slianep */
2961958Slianep if (old_state != RESTARTER_STATE_UNINIT &&
2971958Slianep v->gv_post_online_f)
2981958Slianep v->gv_post_online_f();
2991958Slianep
3001958Slianep r = libscf_snapshots_poststart(h, v->gv_name, B_TRUE);
3011958Slianep switch (r) {
3021958Slianep case 0:
3031958Slianep case ENOENT:
3041958Slianep /*
3051958Slianep * If ENOENT, the instance must have been
3061958Slianep * deleted. Pretend we were successful since
3071958Slianep * we should get a delete event later.
3081958Slianep */
3091958Slianep break;
3101958Slianep
3111958Slianep case ECONNABORTED:
3121958Slianep return (ECONNABORTED);
3131958Slianep
3141958Slianep case EACCES:
3151958Slianep case ENOTSUP:
3161958Slianep default:
3171958Slianep bad_error("libscf_snapshots_poststart", r);
3181958Slianep }
3191958Slianep }
320*9333SRenaud.Manus@Sun.COM
321*9333SRenaud.Manus@Sun.COM if (!(v->gv_flags & GV_ENABLED)) {
3221958Slianep vertex_send_event(v, RESTARTER_EVENT_TYPE_DISABLE);
323*9333SRenaud.Manus@Sun.COM } else if (v->gv_flags & GV_TOOFFLINE) {
324*9333SRenaud.Manus@Sun.COM /*
325*9333SRenaud.Manus@Sun.COM * If the vertex has the GV_TOOFFLINE flag set then that's
326*9333SRenaud.Manus@Sun.COM * because the instance was transitioning from offline to
327*9333SRenaud.Manus@Sun.COM * online and the reverse disable algorithm doesn't offline
328*9333SRenaud.Manus@Sun.COM * those instances because it was already appearing offline.
329*9333SRenaud.Manus@Sun.COM * So do it now.
330*9333SRenaud.Manus@Sun.COM */
331*9333SRenaud.Manus@Sun.COM offline_vertex(v);
332*9333SRenaud.Manus@Sun.COM }
3331958Slianep
3341958Slianep if (gt_running(old_state) == 0) {
3351958Slianep log_framework(LOG_DEBUG, "Propagating start of %s.\n",
3361958Slianep v->gv_name);
3371958Slianep
3382339Slianep graph_transition_propagate(v, PROPAGATE_START, rerr);
3391958Slianep } else if (rerr == RERR_REFRESH) {
3401958Slianep /* For refresh we'll get a message sans state change */
3411958Slianep
3421958Slianep log_framework(LOG_DEBUG, "Propagating refresh of %s.\n",
3431958Slianep v->gv_name);
3441958Slianep
3452339Slianep graph_transition_propagate(v, PROPAGATE_STOP, rerr);
3461958Slianep }
3471958Slianep
3481958Slianep return (0);
3491958Slianep }
3501958Slianep
3511958Slianep static int
gt_enter_online(scf_handle_t * h,graph_vertex_t * v,restarter_instance_state_t old_state,restarter_error_t rerr)3521958Slianep gt_enter_online(scf_handle_t *h, graph_vertex_t *v,
3531958Slianep restarter_instance_state_t old_state, restarter_error_t rerr)
3541958Slianep {
3551958Slianep int r;
3561958Slianep
3571958Slianep r = gt_internal_online_or_degraded(h, v, old_state, rerr);
3581958Slianep if (r != 0)
3591958Slianep return (r);
3601958Slianep
3611958Slianep graph_transition_sulogin(RESTARTER_STATE_ONLINE, old_state);
3621958Slianep return (0);
3631958Slianep }
3641958Slianep
3651958Slianep static int
gt_enter_degraded(scf_handle_t * h,graph_vertex_t * v,restarter_instance_state_t old_state,restarter_error_t rerr)3661958Slianep gt_enter_degraded(scf_handle_t *h, graph_vertex_t *v,
3671958Slianep restarter_instance_state_t old_state, restarter_error_t rerr)
3681958Slianep {
3691958Slianep int r;
3701958Slianep
3711958Slianep r = gt_internal_online_or_degraded(h, v, old_state, rerr);
3721958Slianep if (r != 0)
3731958Slianep return (r);
3741958Slianep
3751958Slianep graph_transition_sulogin(RESTARTER_STATE_DEGRADED, old_state);
3761958Slianep return (0);
3771958Slianep }
3781958Slianep
3791958Slianep /*
3801958Slianep * gt_transition() implements the state transition for the graph
3811958Slianep * state machine. It can return:
3821958Slianep * 0 success
3831958Slianep * ECONNABORTED repository connection aborted
3842339Slianep *
3852339Slianep * v->gv_state should be set to the state we're transitioning to before
3862339Slianep * calling this function.
3871958Slianep */
3881958Slianep int
gt_transition(scf_handle_t * h,graph_vertex_t * v,restarter_error_t rerr,restarter_instance_state_t old_state)3891958Slianep gt_transition(scf_handle_t *h, graph_vertex_t *v, restarter_error_t rerr,
3902339Slianep restarter_instance_state_t old_state)
3911958Slianep {
3922747Sbustos int err;
3932747Sbustos int lost_repository = 0;
3941958Slianep
3951958Slianep /*
3961958Slianep * If there's a common set of work to be done on exit from the
3971958Slianep * old_state, include it as a separate set of functions here. For
3981958Slianep * now there's no such work, so there are no gt_exit functions.
3991958Slianep */
4001958Slianep
4012747Sbustos err = vertex_subgraph_dependencies_shutdown(h, v, old_state);
4022747Sbustos switch (err) {
4032747Sbustos case 0:
4042747Sbustos break;
4052747Sbustos
4062747Sbustos case ECONNABORTED:
4072747Sbustos lost_repository = 1;
4082747Sbustos break;
4092747Sbustos
4102747Sbustos default:
4112747Sbustos bad_error("vertex_subgraph_dependencies_shutdown", err);
4122747Sbustos }
4132747Sbustos
4141958Slianep /*
4151958Slianep * Now call the appropriate gt_enter function for the new state.
4161958Slianep */
4172339Slianep switch (v->gv_state) {
4181958Slianep case RESTARTER_STATE_UNINIT:
4191958Slianep err = gt_enter_uninit(h, v, old_state, rerr);
4201958Slianep break;
4211958Slianep
4221958Slianep case RESTARTER_STATE_DISABLED:
4231958Slianep err = gt_enter_disabled(h, v, old_state, rerr);
4241958Slianep break;
4251958Slianep
4261958Slianep case RESTARTER_STATE_OFFLINE:
4271958Slianep err = gt_enter_offline(h, v, old_state, rerr);
4281958Slianep break;
4291958Slianep
4301958Slianep case RESTARTER_STATE_ONLINE:
4311958Slianep err = gt_enter_online(h, v, old_state, rerr);
4321958Slianep break;
4331958Slianep
4341958Slianep case RESTARTER_STATE_DEGRADED:
4351958Slianep err = gt_enter_degraded(h, v, old_state, rerr);
4361958Slianep break;
4371958Slianep
4381958Slianep case RESTARTER_STATE_MAINT:
4391958Slianep err = gt_enter_maint(h, v, old_state, rerr);
4401958Slianep break;
4411958Slianep
4421958Slianep default:
4432339Slianep /* Shouldn't be in an invalid state. */
4441958Slianep #ifndef NDEBUG
4452747Sbustos uu_warn("%s:%d: Invalid state %d.\n", __FILE__, __LINE__,
4462339Slianep v->gv_state);
4471958Slianep #endif
4481958Slianep abort();
4491958Slianep }
4501958Slianep
4512747Sbustos switch (err) {
4522747Sbustos case 0:
4532747Sbustos break;
4542747Sbustos
4552747Sbustos case ECONNABORTED:
4562747Sbustos lost_repository = 1;
4572747Sbustos break;
4582747Sbustos
4592747Sbustos default:
4602747Sbustos #ifndef NDEBUG
4612747Sbustos uu_warn("%s:%d: "
4622747Sbustos "gt_enter_%s() failed with unexpected error %d.\n",
4632747Sbustos __FILE__, __LINE__, instance_state_str[v->gv_state], err);
4642747Sbustos #endif
4652747Sbustos abort();
4662747Sbustos }
4672747Sbustos
4682747Sbustos return (lost_repository ? ECONNABORTED : 0);
4691958Slianep }
470