11708Sstevel /*
21708Sstevel * CDDL HEADER START
31708Sstevel *
41708Sstevel * The contents of this file are subject to the terms of the
51708Sstevel * Common Development and Distribution License (the "License").
61708Sstevel * You may not use this file except in compliance with the License.
71708Sstevel *
81708Sstevel * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
91708Sstevel * or http://www.opensolaris.org/os/licensing.
101708Sstevel * See the License for the specific language governing permissions
111708Sstevel * and limitations under the License.
121708Sstevel *
131708Sstevel * When distributing Covered Code, include this CDDL HEADER in each
141708Sstevel * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
151708Sstevel * If applicable, add the following below this CDDL HEADER, with the
161708Sstevel * fields enclosed by brackets "[]" replaced with your own identifying
171708Sstevel * information: Portions Copyright [yyyy] [name of copyright owner]
181708Sstevel *
191708Sstevel * CDDL HEADER END
201708Sstevel */
211708Sstevel
221708Sstevel /*
23*11311SSurya.Prakki@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
241708Sstevel * Use is subject to license terms.
251708Sstevel */
261708Sstevel
271708Sstevel /*
281708Sstevel * ntwdt driver
291708Sstevel * ------------
301708Sstevel *
311708Sstevel * Subsystem Overview
321708Sstevel * ------------------
331708Sstevel *
341708Sstevel * This is a pseudo driver for the Netra-1280 watchdog
351708Sstevel * timer (WDT). It provides for an *application-driven*
361708Sstevel * WDT (AWDT), not a traditional, hardware-based WDT. A
371708Sstevel * hardware-based feature is already present on the
381708Sstevel * Netra-1280, and it is referred to here as the
391708Sstevel * System WDT (SWDT).
401708Sstevel *
411708Sstevel * ScApp and Solaris cooperate to provide either a SWDT or
421708Sstevel * an AWDT; they are mutually-exclusive. Once in AWDT
431708Sstevel * mode, one can only transition to SWDT mode via a reboot.
441708Sstevel * This obviously gives priority to the AWDT and was done
451708Sstevel * to handle scenarios where the customer might temporarily
461708Sstevel * terminate their wdog-app in order to do some debugging,
471708Sstevel * or even to load a new version of the wdog-app.
481708Sstevel *
491708Sstevel * The wdog-app does an open() of the /dev/ntwdt device node
501708Sstevel * and then issues ioctl's to control the state of the AWDT.
511708Sstevel * The ioctl's are implemented by this driver. Only one
521708Sstevel * concurrent instance of open() is allowed. On the close(),
531708Sstevel * a watchdog timer still in progress is NOT terminated.
541708Sstevel * This allows the global state machine to monitor the
551708Sstevel * progress of a Solaris reboot. ScApp will reset Solaris
561708Sstevel * (eg, send an XIR) if the actual boot/crashdump latency
571708Sstevel * is larger than the current AWDT timeout.
581708Sstevel *
591708Sstevel * The rationale for implementing an AWDT (vs a SWDT) is
601708Sstevel * that it is more sensitive to system outage scenarios than
611708Sstevel * a SWDT. Eg, a system could be in such a failed state that
621708Sstevel * even though its clock-interrupt could still run (and the
631708Sstevel * SWDT's watchdog timer therefore re-armed), the system could
641708Sstevel * in effect have a corrupt or very poor dispatch latency.
651708Sstevel * An AWDT would be sensitive to dispatch latency issues, as
661708Sstevel * well as problems with its own execution (eg, a hang or
671708Sstevel * crash).
681708Sstevel *
691708Sstevel * Subsystem Interface Overview
701708Sstevel * ----------------------------
711708Sstevel *
721708Sstevel * This pseudo-driver does not have any 'extern' functions.
731708Sstevel *
741708Sstevel * All system interaction is done via the traditional driver
751708Sstevel * entry points (eg, attach(9e), _init(9e)).
761708Sstevel *
771708Sstevel * All interaction with user is via the entry points in the
781708Sstevel * 'struct cb_ops' vector (eg, open(9e), ioctl(9e), and
791708Sstevel * close(9e)).
801708Sstevel *
811708Sstevel * Subsystem Implementation Overview
821708Sstevel * ---------------------------------
831708Sstevel *
841708Sstevel * ScApp and Solaris (eg, ntwdt) cooperate so that a state
851708Sstevel * machine global to ScApp and ntwdt is either in AWDT mode
861708Sstevel * or in SWDT mode. These two peers communicate via the SBBC
871708Sstevel * Mailbox that resides in IOSRAM (SBBC_MAILBOX_KEY).
881708Sstevel * They use two new mailbox messages (LW8_MBOX_WDT_GET and
891708Sstevel * LW8_MBOX_WDT_SET) and one new event (LW8_EVENT_SC_RESTARTED).
901708Sstevel *
911708Sstevel * ntwdt implements the AWDT by implementing a "virtual
921708Sstevel * WDT" (VWDT). Eg, the watchdog timer is not a traditional
931708Sstevel * counter in hardware, it is a variable in ntwdt's
941708Sstevel * softstate. The wdog-app's actions cause changes to this
951708Sstevel * and other variables in ntwdt's softstate.
961708Sstevel *
971708Sstevel * The wdog-app uses the LOMIOCDOGTIME ioctl to specify
981708Sstevel * the number of seconds in the watchdog timeout (and
991708Sstevel * therefore the VWDT). The wdog-app then uses the
1001708Sstevel * LOMIOCDOGCTL ioctl to enable the wdog. This causes
1011708Sstevel * ntwdt to create a Cyclic that will both decrement
1021708Sstevel * the VWDT and check to see if it has expired. To keep
1031708Sstevel * the VWDT from expiring, the wdog-app uses the
1041708Sstevel * LOMIOCDOGPAT ioctl to re-arm (or "pat") the watchdog.
1051708Sstevel * This sets the VWDT value to that specified in the
1061708Sstevel * last LOMIOCDOGTIME ioctl. The wdog-app can use the
1071708Sstevel * LOMIOCDOGSTATE ioctl to query the state of the VWDT.
1081708Sstevel *
1091708Sstevel * The wdog-app can also specify how Recovery is to be
1101708Sstevel * done. The only choice is whether to do a crashdump
1111708Sstevel * or not. If ntwdt computes a VWDT expiration, then
1121708Sstevel * ntwdt initiates the Recovery, else ScApp will. Eg,
1131708Sstevel * a hang in Solaris will be sensed by ScApp and not
1141708Sstevel * ntwdt. The wdog-app specifies the Recovery policy
1151708Sstevel * via the DOGCTL ioctl.
1161708Sstevel *
1171708Sstevel * Timeout Expiration
1181708Sstevel * ------------------
1191708Sstevel * In our implementation, ScApp senses a watchdog
1201708Sstevel * expiration the same way it historically has:
1211708Sstevel * by reading a well-known area of IOSRAM (SBBC_TOD_KEY)
1221708Sstevel * to see if the timestamp associated with a
1231708Sstevel * Solaris-generated "heartbeat" field is older
1241708Sstevel * than the currently specified timeout (which is
1251708Sstevel * also specified in this same IOSRAM section).
1261708Sstevel *
1271708Sstevel * What is different when ntwdt is running is that
1281708Sstevel * ntwdt is responsible for updating the Heartbeat,
1291708Sstevel * and not the normal client (todsg). When ntwdt
1301708Sstevel * puts the system in AWDT mode, it disables todsg's
1311708Sstevel * updating of the Heartbeat by changing the state of
1321708Sstevel * a pair of kernel tunables (watchdog_activated and
1331708Sstevel * watchdog_enable). ntwdt then takes responsibility
1341708Sstevel * for updating the Heartbeat. It does this by
1351708Sstevel * updating the Heartbeat from the Cyclic that is
1361708Sstevel * created when the user enables the AWDT (DOGCTL)
1371708Sstevel * or specifies a new timeout value (DOGTIME).
1381708Sstevel *
1391708Sstevel * As long as the AWDT is enabled, ntwdt will update
1401708Sstevel * the real system Heartbeat. As a result, ScApp
1411708Sstevel * will conclude that Solaris is still running. If
1421708Sstevel * the user stops re-arming the VWDT or Solaris
1431708Sstevel * hangs (eg), ntwdt will stop updating the Heartbeat.
1441708Sstevel *
1451708Sstevel * Note that ntwdt computes expiration via the
1461708Sstevel * repeatedly firing Cyclic, and ScApp computes
1471708Sstevel * expiration via a cessation of Heartbeat update.
1481708Sstevel * Since Heartbeat update stops once user stops
1491708Sstevel * re-arming the VWDT (ie, DOGPAT ioctl), ntwdt
1501708Sstevel * will compute a timeout at t(x), and ScApp will
1511708Sstevel * compute a timeout at t(2x), where 'x' is the
1521708Sstevel * current timeout value. When ntwdt computes
1531708Sstevel * the expiration, ntwdt masks this asymmetry.
1541708Sstevel *
1551708Sstevel * Lifecycle Events
1561708Sstevel * ----------------
1571708Sstevel *
1581708Sstevel * ntwdt only handles one of the coarse-grained
1591708Sstevel * "lifecycle events" (eg, entering OBP, shutdown,
1601708Sstevel * power-down, DR) that are possible during a Solaris
1611708Sstevel * session: a panic. (Note that ScApp handles one
1621708Sstevel * of the others: "entering OBP"). Other than these,
1631708Sstevel * a user choosing such a state transition must first
1641708Sstevel * use the wdog-app to disable the watchdog, else
1651708Sstevel * an expiration could occur.
1661708Sstevel *
1671708Sstevel * Solaris handles a panic by registering a handler
1681708Sstevel * that's called during the panic. The handler will
1691708Sstevel * set the watchdog timeout to the value specified
1701708Sstevel * in the NTWDT_BOOT_TIMEOUT_PROP driver Property.
1711708Sstevel * Again, this value should be greater than the actual
1721708Sstevel * Solaris reboot/crashdump latency.
1731708Sstevel *
1741708Sstevel * When the user enters OBP via the System Controller,
1751708Sstevel * ScApp will disable the watchdog (from ScApp's
1761708Sstevel * perspective), but it will not communicate this to
1771708Sstevel * ntwdt. After having exited OBP, the wdog-app can
1781708Sstevel * be used to enable or disable the watchdog (which
1791708Sstevel * will get both ScApp and ntwdt in-sync).
1801708Sstevel *
1811708Sstevel * Locking
1821708Sstevel * -------
1831708Sstevel *
1841708Sstevel * ntwdt has code running at three interrupt levels as
1851708Sstevel * well as base level.
1861708Sstevel *
1871708Sstevel * The ioctls run at base level in User Context. The
1881708Sstevel * driver's entry points run at base level in Kernel
1891708Sstevel * Context.
1901708Sstevel *
1911708Sstevel * ntwdt's three interrupt levels are used by:
1921708Sstevel *
1931708Sstevel * o LOCK_LEVEL :
1941708Sstevel * the Cyclic used to manage the VWDT is initialized
1951708Sstevel * to CY_LOCK_LEVEL
1961708Sstevel *
1971708Sstevel * o DDI_SOFTINT_MED :
1981708Sstevel * the SBBC mailbox implementation registers the
1991708Sstevel * specified handlers at this level
2001708Sstevel *
2011708Sstevel * o DDI_SOFTINT_LOW :
2021708Sstevel * this level is used by two handlers. One handler
2031708Sstevel * is triggered by the LOCK_LEVEL Cyclic. The other
2041708Sstevel * handler is triggered by the DDI_SOFTINT_MED
2051708Sstevel * handler registered to handle SBBC mailbox events.
2061708Sstevel *
2071708Sstevel * The centralizing concept is that the ntwdt_wdog_mutex
2081708Sstevel * in the driver's softstate is initialized to have an
2091708Sstevel * interrupt-block-cookie corresponding to DDI_SOFTINT_LOW.
2101708Sstevel *
2111708Sstevel * As a result, any base level code grabs ntwdt_wdog_mutex
2121708Sstevel * before doing work. Also, any handler running at interrupt
2131708Sstevel * level higher than DDI_SOFTINT_LOW "posts down" so that
2141708Sstevel * a DDI_SOFTINT_LOW handler is responsible for executing
2151708Sstevel * the "real work". Each DDI_SOFTINT_LOW handler also
2161708Sstevel * first grabs ntwdt_wdog_mutex, and so base level is
2171708Sstevel * synchronized with all interrupt levels.
2181708Sstevel *
2191708Sstevel * Note there's another mutex in the softstate: ntwdt_mutex.
2201708Sstevel * This mutex has few responsibilities. However, this
2211708Sstevel * locking order must be followed: ntwdt_wdog_mutex is
2221708Sstevel * held first, and then ntwdt_mutex. This choice results
2231708Sstevel * from the fact that the number of dynamic call sites
2241708Sstevel * for ntwdt_wdog_mutex is MUCH greater than that of
2251708Sstevel * ntwdt_mutex. As a result, almost all uses of
2261708Sstevel * ntwdt_wdog_mutex do not even require ntwdt_mutex to
2271708Sstevel * be held, which saves resources.
2281708Sstevel *
2291708Sstevel * Driver Properties
2301708Sstevel * -----------------
2311708Sstevel *
2321708Sstevel * "ddi-forceattach=1;"
2331708Sstevel * ------------------
2341708Sstevel *
2351708Sstevel * Using this allows our driver to be automatically
2361708Sstevel * loaded at boot-time AND to not be removed from memory
2371708Sstevel * solely due to memory-pressure.
2381708Sstevel *
2391708Sstevel * Being loaded at boot allows ntwdt to (as soon as
2401708Sstevel * possible) tell ScApp of the current mode of the
2411708Sstevel * state-machine (eg, SWDT). This is needed for the case
2421708Sstevel * when Solaris is re-loaded while in AWDT mode; having
2431708Sstevel * Solaris communicate ASAP with ScApp reduces the duration
2441708Sstevel * of any "split-brain" scenario where ScApp and Solaris
2451708Sstevel * are not in the same mode.
2461708Sstevel *
2471708Sstevel * Having ntwdt remain in memory even after a close()
2481708Sstevel * allows ntwdt to answer any SBBC mailbox commands
2491708Sstevel * that ScApp sends (as the mailbox infrastructure is
2501708Sstevel * not torn down until ntwdt is detach()'d). Specifically,
2511708Sstevel * ScApp could be re-loaded after AWDT mode had been
2521708Sstevel * entered and the wdog-app had close()'d ntwdt. ScApp
2531708Sstevel * will then eventually send a LW8_EVENT_SC_RESTARTED
2541708Sstevel * mailbox event in order to learn the current state of
2551708Sstevel * state-machine. Having ntwdt remain loaded allows this
2561708Sstevel * event to never go unanswered.
2571708Sstevel *
2581708Sstevel * "ntwdt-boottimeout=600;"
2591708Sstevel * ----------------------
2601708Sstevel *
2611708Sstevel * This specifies the watchdog timeout value (in seconds) to
2621708Sstevel * use when ntwdt is aware of the need to reboot/reload Solaris.
2631708Sstevel *
2641708Sstevel * ntwdt will update ScApp by setting the watchdog timeout
2651708Sstevel * to the specified number of seconds when either a) Solaris
2661708Sstevel * panics or b) the VWDT expires. Note that this is only done
2671708Sstevel * if the user has chosen to enable Reset.
2681708Sstevel *
2691708Sstevel * ntwdt boundary-checks the specified value, and if out-of-range,
2701708Sstevel * it initializes the watchdog timeout to a default value of
2711708Sstevel * NTWDT_DEFAULT_BOOT_TIMEOUT seconds. Note that this is a
2721708Sstevel * default value and is not a *minimum* value. The valid range
2731708Sstevel * for the watchdog timeout is between one second and
2741708Sstevel * NTWDT_MAX_TIMEOUT seconds, inclusive.
2751708Sstevel *
2761708Sstevel * If ntwdt-boottimeout is set to a value less than an actual
2771708Sstevel * Solaris boot's latency, ScApp will reset Solaris during boot.
2781708Sstevel * Note that a continuous series of ScApp-induced resets will
2791708Sstevel * not occur; ScApp only resets Solaris on the first transition
2801708Sstevel * into the watchdog-expired state.
2811708Sstevel */
2821708Sstevel
2831708Sstevel #include <sys/note.h>
2841708Sstevel #include <sys/types.h>
2851708Sstevel #include <sys/callb.h>
2861708Sstevel #include <sys/stat.h>
2871708Sstevel #include <sys/conf.h>
2881708Sstevel #include <sys/ddi.h>
2891708Sstevel #include <sys/sunddi.h>
2901708Sstevel #include <sys/modctl.h>
2911708Sstevel #include <sys/ddi_impldefs.h>
2921708Sstevel #include <sys/kmem.h>
2931708Sstevel #include <sys/devops.h>
2941708Sstevel #include <sys/cyclic.h>
2951708Sstevel #include <sys/uadmin.h>
2961708Sstevel #include <sys/lw8_impl.h>
2971708Sstevel #include <sys/sgsbbc.h>
2981708Sstevel #include <sys/sgsbbc_iosram.h>
2991708Sstevel #include <sys/sgsbbc_mailbox.h>
3001708Sstevel #include <sys/todsg.h>
3011708Sstevel #include <sys/mem_config.h>
3021708Sstevel #include <sys/lom_io.h>
3031708Sstevel #include <sys/reboot.h>
3041708Sstevel #include <sys/clock.h>
3051708Sstevel
3061708Sstevel
3071708Sstevel /*
3081708Sstevel * tunables
3091708Sstevel */
3101708Sstevel int ntwdt_disable_timeout_action = 0;
3111708Sstevel #ifdef DEBUG
3121708Sstevel /*
3131708Sstevel * tunable to simulate a Solaris hang. If is non-zero, then
3141708Sstevel * no system heartbeats ("hardware patting") will be done,
3151708Sstevel * even though all AWDT machinery is functioning OK.
3161708Sstevel */
3171708Sstevel int ntwdt_stop_heart;
3181708Sstevel #endif
3191708Sstevel
3201708Sstevel /*
3211708Sstevel * Driver Property
3221708Sstevel */
3231708Sstevel #define NTWDT_BOOT_TIMEOUT_PROP "ntwdt-boottimeout"
3241708Sstevel
3251708Sstevel /*
3261708Sstevel * watchdog-timeout values (in seconds):
3271708Sstevel *
3281708Sstevel * NTWDT_DEFAULT_BOOT_TIMEOUT: the default value used if
3291708Sstevel * this driver is aware of the
3301708Sstevel * reboot.
3311708Sstevel *
3321708Sstevel * NTWDT_MAX_TIMEOUT: max value settable by app (via the
3331708Sstevel * LOMIOCDOGTIME ioctl)
3341708Sstevel */
3351708Sstevel #define NTWDT_DEFAULT_BOOT_TIMEOUT (10*60)
3361708Sstevel #define NTWDT_MAX_TIMEOUT (180*60)
3371708Sstevel
3381708Sstevel
3391708Sstevel #define NTWDT_CYCLIC_CHK_PERCENT (20)
3401708Sstevel #define NTWDT_MINOR_NODE "awdt"
3411708Sstevel #define OFFSET(base, field) ((char *)&base.field - (char *)&base)
3421708Sstevel
3431708Sstevel #define NTWDT_SUCCESS 0
3441708Sstevel #define NTWDT_FAILURE 1
3451708Sstevel
3461708Sstevel typedef struct {
3471708Sstevel callb_id_t ntwdt_panic_cb;
3481708Sstevel } ntwdt_callback_ids_t;
3491708Sstevel static ntwdt_callback_ids_t ntwdt_callback_ids;
3501708Sstevel
3511708Sstevel /* MBOX_EVENT_LW8 that is sent in IOSRAM Mailbox: */
3521708Sstevel static lw8_event_t lw8_event; /* payload */
3531708Sstevel static sbbc_msg_t sbbc_msg; /* message */
3541708Sstevel
3551708Sstevel static ddi_softintr_t ntwdt_mbox_softint_id;
3561708Sstevel static ddi_softintr_t ntwdt_cyclic_softint_id;
3571708Sstevel
3581708Sstevel /*
3591708Sstevel * VWDT (i.e., Virtual Watchdog Timer) state
3601708Sstevel */
3611708Sstevel typedef struct {
3621708Sstevel kmutex_t ntwdt_wdog_mutex;
3631708Sstevel ddi_iblock_cookie_t ntwdt_wdog_mtx_cookie;
3641708Sstevel int ntwdt_wdog_enabled; /* wdog enabled ? */
3651708Sstevel int ntwdt_reset_enabled; /* reset enabled ? */
3661708Sstevel int ntwdt_timer_running; /* wdog running ? */
3671708Sstevel int ntwdt_wdog_expired; /* wdog expired ? */
3681708Sstevel int ntwdt_is_initial_enable; /* 1st wdog-enable? */
3691708Sstevel uint32_t ntwdt_boot_timeout; /* timeout for boot */
3701708Sstevel uint32_t ntwdt_secs_remaining; /* expiration timer */
3711708Sstevel uint8_t ntwdt_wdog_action; /* Reset action */
3721708Sstevel uint32_t ntwdt_wdog_timeout; /* timeout in seconds */
3731708Sstevel hrtime_t ntwdt_cyclic_interval; /* cyclic interval */
3741708Sstevel cyc_handler_t ntwdt_cycl_hdlr;
3751708Sstevel cyc_time_t ntwdt_cycl_time;
3761708Sstevel kmutex_t ntwdt_event_lock; /* lock */
3771708Sstevel uint64_t ntwdt_wdog_flags;
3781708Sstevel } ntwdt_wdog_t;
3791708Sstevel
3801708Sstevel /* ntwdt_wdog_flags */
3811708Sstevel #define NTWDT_FLAG_SKIP_CYCLIC 0x1 /* skip next Cyclic */
3821708Sstevel
3831708Sstevel /* macros to set/clear one bit in ntwdt_wdog_flags */
3841708Sstevel #define NTWDT_FLAG_SET(p, f)\
3851708Sstevel ((p)->ntwdt_wdog_flags |= NTWDT_FLAG_##f)
3861708Sstevel #define NTWDT_FLAG_CLR(p, f)\
3871708Sstevel ((p)->ntwdt_wdog_flags &= ~NTWDT_FLAG_##f)
3881708Sstevel
3891708Sstevel
3901708Sstevel /* softstate */
3911708Sstevel typedef struct {
3921708Sstevel kmutex_t ntwdt_mutex;
3931708Sstevel dev_info_t *ntwdt_dip; /* dip */
3941708Sstevel int ntwdt_open_flag; /* file open ? */
3951708Sstevel ntwdt_wdog_t *ntwdt_wdog_state; /* wdog state */
3961708Sstevel cyclic_id_t ntwdt_cycl_id;
3971708Sstevel } ntwdt_state_t;
3981708Sstevel
3991708Sstevel static void *ntwdt_statep; /* softstate */
4001708Sstevel static dev_info_t *ntwdt_dip;
4011708Sstevel /*
4021708Sstevel * if non-zero, then the app-wdog feature is available on
4031708Sstevel * this system configuration.
4041708Sstevel */
4051708Sstevel static int ntwdt_watchdog_available;
4061708Sstevel /*
4071708Sstevel * if non-zero, then application has used the LOMIOCDOGCTL
4081708Sstevel * ioctl at least once in order to Enable the app-wdog.
4091708Sstevel * Also, if this is non-zero, then system is in AWDT mode,
4101708Sstevel * else it is in SWDT mode.
4111708Sstevel */
4121708Sstevel static int ntwdt_watchdog_activated;
4131708Sstevel
4141708Sstevel #define getstate(minor) \
4151708Sstevel ((ntwdt_state_t *)ddi_get_soft_state(ntwdt_statep, (minor)))
4161708Sstevel
4171708Sstevel static int ntwdt_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
4181708Sstevel static int ntwdt_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
4191708Sstevel static int ntwdt_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg,
4201708Sstevel void **result);
4211708Sstevel static int ntwdt_open(dev_t *, int, int, cred_t *);
4221708Sstevel static int ntwdt_close(dev_t, int, int, cred_t *);
4231708Sstevel static int ntwdt_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
4241708Sstevel
4251708Sstevel static void ntwdt_reprogram_wd(ntwdt_state_t *);
4261708Sstevel static boolean_t ntwdt_panic_cb(void *arg, int code);
4271708Sstevel static void ntwdt_start_timer(ntwdt_state_t *);
4281708Sstevel static void ntwdt_stop_timer(void *);
4291708Sstevel static void ntwdt_stop_timer_lock(void *arg);
4301708Sstevel static void ntwdt_add_callbacks(ntwdt_state_t *ntwdt_ptr);
4311708Sstevel static void ntwdt_remove_callbacks();
4321708Sstevel static void ntwdt_cyclic_pat(void *arg);
4331708Sstevel static void ntwdt_enforce_timeout();
4341708Sstevel static void ntwdt_pat_hw_watchdog();
4351708Sstevel static int ntwdt_set_cfgvar(int var, int val);
4361708Sstevel static void ntwdt_set_cfgvar_noreply(int var, int val);
4371708Sstevel static int ntwdt_read_props(ntwdt_state_t *);
4381708Sstevel static int ntwdt_add_mbox_handlers(ntwdt_state_t *);
4391708Sstevel static int ntwdt_set_hw_timeout(uint32_t period);
4401708Sstevel static int ntwdt_remove_mbox_handlers(void);
4411708Sstevel static uint_t ntwdt_event_data_handler(char *arg);
4421708Sstevel static uint_t ntwdt_mbox_softint(char *arg);
4431708Sstevel static uint_t ntwdt_cyclic_softint(char *arg);
4441708Sstevel static int ntwdt_lomcmd(int cmd, intptr_t arg);
4451708Sstevel static int ntwdt_chk_wdog_support();
4461708Sstevel static int ntwdt_chk_sc_support();
4471708Sstevel static int ntwdt_set_swdt_state();
4481708Sstevel static void ntwdt_swdt_to_awdt(ntwdt_wdog_t *);
4491708Sstevel static void ntwdt_arm_vwdt(ntwdt_wdog_t *wdog_state);
4501708Sstevel #ifdef DEBUG
4511708Sstevel static int ntwdt_get_cfgvar(int var, int *val);
4521708Sstevel #endif
4531708Sstevel
4541708Sstevel struct cb_ops ntwdt_cb_ops = {
4551708Sstevel ntwdt_open, /* open */
4561708Sstevel ntwdt_close, /* close */
4571708Sstevel nulldev, /* strategy */
4581708Sstevel nulldev, /* print */
4591708Sstevel nulldev, /* dump */
4601708Sstevel nulldev, /* read */
4611708Sstevel nulldev, /* write */
4621708Sstevel ntwdt_ioctl, /* ioctl */
4631708Sstevel nulldev, /* devmap */
4641708Sstevel nulldev, /* mmap */
4651708Sstevel nulldev, /* segmap */
4661708Sstevel nochpoll, /* poll */
4671708Sstevel ddi_prop_op, /* cb_prop_op */
4681708Sstevel NULL, /* streamtab */
4691708Sstevel D_MP | D_NEW
4701708Sstevel };
4711708Sstevel
4721708Sstevel static struct dev_ops ntwdt_ops = {
4731708Sstevel DEVO_REV, /* Devo_rev */
4741708Sstevel 0, /* Refcnt */
4751708Sstevel ntwdt_info, /* Info */
4761708Sstevel nulldev, /* Identify */
4771708Sstevel nulldev, /* Probe */
4781708Sstevel ntwdt_attach, /* Attach */
4791708Sstevel ntwdt_detach, /* Detach */
4801708Sstevel nodev, /* Reset */
4811708Sstevel &ntwdt_cb_ops, /* Driver operations */
4821708Sstevel 0, /* Bus operations */
4831708Sstevel NULL /* Power */
4841708Sstevel };
4851708Sstevel
4861708Sstevel static struct modldrv modldrv = {
4871708Sstevel &mod_driverops, /* This one is a driver */
4887799SRichard.Bean@Sun.COM "ntwdt-Netra-T12", /* Name of the module. */
4891708Sstevel &ntwdt_ops, /* Driver ops */
4901708Sstevel };
4911708Sstevel
4921708Sstevel static struct modlinkage modlinkage = {
4931708Sstevel MODREV_1, (void *)&modldrv, NULL
4941708Sstevel };
4951708Sstevel
4961708Sstevel
4971708Sstevel /*
4981708Sstevel * Flags to set in ntwdt_debug.
4991708Sstevel *
5001708Sstevel * Use either the NTWDT_DBG or NTWDT_NDBG macros
5011708Sstevel */
5021708Sstevel #define WDT_DBG_ENTRY 0x00000001 /* drv entry points */
5031708Sstevel #define WDT_DBG_HEART 0x00000002 /* system heartbeat */
5041708Sstevel #define WDT_DBG_VWDT 0x00000004 /* virtual WDT */
5051708Sstevel #define WDT_DBG_EVENT 0x00000010 /* SBBC Mbox events */
5061708Sstevel #define WDT_DBG_PROT 0x00000020 /* SC/Solaris protocol */
5071708Sstevel #define WDT_DBG_IOCTL 0x00000040 /* ioctl's */
5081708Sstevel
5091708Sstevel uint64_t ntwdt_debug; /* enables tracing of module's activity */
5101708Sstevel
5111708Sstevel /* used in non-debug version of module */
5121708Sstevel #define NTWDT_NDBG(flag, msg) { if ((ntwdt_debug & (flag)) != 0) \
5131708Sstevel (void) printf msg; }
5141708Sstevel
5151708Sstevel #ifdef DEBUG
5161708Sstevel typedef struct {
5171708Sstevel uint32_t ntwdt_wd1;
5181708Sstevel uint8_t ntwdt_wd2;
5191708Sstevel } ntwdt_data_t;
5201708Sstevel
5211708Sstevel #define NTWDTIOCSTATE _IOWR('a', 0xa, ntwdt_data_t)
5221708Sstevel #define NTWDTIOCPANIC _IOR('a', 0xb, uint32_t)
5231708Sstevel
5241708Sstevel /* used in debug version of module */
5251708Sstevel #define NTWDT_DBG(flag, msg) { if ((ntwdt_debug & (flag)) != 0) \
5261708Sstevel (void) printf msg; }
5271708Sstevel #else
5281708Sstevel #define NTWDT_DBG(flag, msg)
5291708Sstevel #endif
5301708Sstevel
5311708Sstevel
5321708Sstevel int
_init(void)5331708Sstevel _init(void)
5341708Sstevel {
5351708Sstevel int error = 0;
5361708Sstevel
5371708Sstevel NTWDT_DBG(WDT_DBG_ENTRY, ("_init"));
5381708Sstevel
5391708Sstevel /* Initialize the soft state structures */
5401708Sstevel if ((error = ddi_soft_state_init(&ntwdt_statep,
5411708Sstevel sizeof (ntwdt_state_t), 1)) != 0) {
5421708Sstevel return (error);
5431708Sstevel }
5441708Sstevel
5451708Sstevel /* Install the loadable module */
5461708Sstevel if ((error = mod_install(&modlinkage)) != 0) {
5471708Sstevel ddi_soft_state_fini(&ntwdt_statep);
5481708Sstevel }
5491708Sstevel return (error);
5501708Sstevel }
5511708Sstevel
5521708Sstevel int
_info(struct modinfo * modinfop)5531708Sstevel _info(struct modinfo *modinfop)
5541708Sstevel {
5551708Sstevel NTWDT_DBG(WDT_DBG_ENTRY, ("_info"));
5561708Sstevel
5571708Sstevel return (mod_info(&modlinkage, modinfop));
5581708Sstevel }
5591708Sstevel
5601708Sstevel int
_fini(void)5611708Sstevel _fini(void)
5621708Sstevel {
5631708Sstevel int error;
5641708Sstevel
5651708Sstevel NTWDT_DBG(WDT_DBG_ENTRY, ("_fini"));
5661708Sstevel
5671708Sstevel error = mod_remove(&modlinkage);
5681708Sstevel if (error == 0) {
5691708Sstevel ddi_soft_state_fini(&ntwdt_statep);
5701708Sstevel }
5711708Sstevel
5721708Sstevel return (error);
5731708Sstevel }
5741708Sstevel
5751708Sstevel static int
ntwdt_attach(dev_info_t * dip,ddi_attach_cmd_t cmd)5761708Sstevel ntwdt_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
5771708Sstevel {
5781708Sstevel int instance;
5791708Sstevel ntwdt_state_t *ntwdt_ptr = NULL;
5801708Sstevel ntwdt_wdog_t *wdog_state = NULL;
5811708Sstevel cyc_handler_t *hdlr = NULL;
5821708Sstevel
5831708Sstevel NTWDT_DBG(WDT_DBG_ENTRY, ("attach: dip/cmd: 0x%p/%d",
584*11311SSurya.Prakki@Sun.COM (void *)dip, cmd));
5851708Sstevel
5861708Sstevel switch (cmd) {
5871708Sstevel case DDI_ATTACH:
5881708Sstevel break;
5891708Sstevel
5901708Sstevel case DDI_RESUME:
5911708Sstevel return (DDI_SUCCESS);
5921708Sstevel
5931708Sstevel default:
5941708Sstevel return (DDI_FAILURE);
5951708Sstevel }
5961708Sstevel
5971708Sstevel /* see if app-wdog is supported on our config */
5981708Sstevel if (ntwdt_chk_wdog_support() != 0)
5991708Sstevel return (DDI_FAILURE);
6001708Sstevel
6011708Sstevel /* (unsolicitedly) send SWDT state to ScApp via mailbox */
602*11311SSurya.Prakki@Sun.COM (void) ntwdt_set_swdt_state();
6031708Sstevel
6041708Sstevel instance = ddi_get_instance(dip);
6051708Sstevel ASSERT(instance == 0);
6061708Sstevel
6071708Sstevel if (ddi_soft_state_zalloc(ntwdt_statep, instance)
6081708Sstevel != DDI_SUCCESS) {
6091708Sstevel return (DDI_FAILURE);
6101708Sstevel }
6111708Sstevel ntwdt_ptr = ddi_get_soft_state(ntwdt_statep, instance);
6121708Sstevel ASSERT(ntwdt_ptr != NULL);
6131708Sstevel
6141708Sstevel ntwdt_dip = dip;
6151708Sstevel
6161708Sstevel ntwdt_ptr->ntwdt_dip = dip;
6171708Sstevel ntwdt_ptr->ntwdt_cycl_id = CYCLIC_NONE;
6181708Sstevel mutex_init(&ntwdt_ptr->ntwdt_mutex, NULL,
6191708Sstevel MUTEX_DRIVER, NULL);
6201708Sstevel
6211708Sstevel /*
6221708Sstevel * Initialize the watchdog structure
6231708Sstevel */
6241708Sstevel ntwdt_ptr->ntwdt_wdog_state =
6251708Sstevel kmem_zalloc(sizeof (ntwdt_wdog_t), KM_SLEEP);
6261708Sstevel wdog_state = ntwdt_ptr->ntwdt_wdog_state;
6271708Sstevel
6281708Sstevel /*
6291708Sstevel * Create an iblock-cookie so that ntwdt_wdog_mutex can be
6301708Sstevel * used at User Context and Interrupt Context.
6311708Sstevel */
6321708Sstevel if (ddi_get_soft_iblock_cookie(dip, DDI_SOFTINT_LOW,
6331708Sstevel &wdog_state->ntwdt_wdog_mtx_cookie) != DDI_SUCCESS) {
6341708Sstevel cmn_err(CE_WARN, "init of iblock cookie failed "
6351708Sstevel "for ntwdt_wdog_mutex");
6361708Sstevel goto err1;
6371708Sstevel } else {
6381708Sstevel mutex_init(&wdog_state->ntwdt_wdog_mutex, NULL, MUTEX_DRIVER,
6391708Sstevel (void *)wdog_state->ntwdt_wdog_mtx_cookie);
6401708Sstevel }
6411708Sstevel
6421708Sstevel mutex_init(&wdog_state->ntwdt_event_lock, NULL,
6431708Sstevel MUTEX_DRIVER, NULL);
6441708Sstevel
6451708Sstevel /* Cyclic fires once per second: */
6461708Sstevel wdog_state->ntwdt_cyclic_interval = NANOSEC;
6471708Sstevel
6481708Sstevel /* interpret our .conf file. */
6491708Sstevel (void) ntwdt_read_props(ntwdt_ptr);
6501708Sstevel
6511708Sstevel /* init the Cyclic that drives the VWDT */
6521708Sstevel hdlr = &wdog_state->ntwdt_cycl_hdlr;
6531708Sstevel hdlr->cyh_level = CY_LOCK_LEVEL;
6541708Sstevel hdlr->cyh_func = ntwdt_cyclic_pat;
6551708Sstevel hdlr->cyh_arg = (void *)ntwdt_ptr;
6561708Sstevel
6571708Sstevel /* Register handler for SBBC Mailbox events */
6581708Sstevel if (ntwdt_add_mbox_handlers(ntwdt_ptr) != DDI_SUCCESS)
6591708Sstevel goto err2;
6601708Sstevel
6611708Sstevel /* Softint that will be triggered by Cyclic that drives VWDT */
6621708Sstevel if (ddi_add_softintr(dip, DDI_SOFTINT_LOW, &ntwdt_cyclic_softint_id,
6631708Sstevel NULL, NULL, ntwdt_cyclic_softint, (caddr_t)ntwdt_ptr)
6641708Sstevel != DDI_SUCCESS) {
6651708Sstevel cmn_err(CE_WARN, "failed to add cyclic softintr");
6661708Sstevel goto err3;
6671708Sstevel }
6681708Sstevel
6691708Sstevel /* Register callbacks for various system events, e.g. panic */
6701708Sstevel ntwdt_add_callbacks(ntwdt_ptr);
6711708Sstevel
6721708Sstevel /*
6731708Sstevel * Create Minor Node as last activity. This prevents
6741708Sstevel * application from accessing our implementation until it
6751708Sstevel * is initialized.
6761708Sstevel */
6771708Sstevel if (ddi_create_minor_node(dip, NTWDT_MINOR_NODE, S_IFCHR, 0,
6781708Sstevel DDI_PSEUDO, NULL) == DDI_FAILURE) {
6791708Sstevel cmn_err(CE_WARN, "failed to create Minor Node: %s",
6801708Sstevel NTWDT_MINOR_NODE);
6811708Sstevel goto err4;
6821708Sstevel }
6831708Sstevel
6841708Sstevel /* Display our driver info in the banner */
6851708Sstevel ddi_report_dev(dip);
6861708Sstevel
6871708Sstevel return (DDI_SUCCESS);
6881708Sstevel
6891708Sstevel err4:
6901708Sstevel ntwdt_remove_callbacks();
6911708Sstevel ddi_remove_softintr(ntwdt_cyclic_softint_id);
6921708Sstevel err3:
693*11311SSurya.Prakki@Sun.COM (void) ntwdt_remove_mbox_handlers();
6941708Sstevel err2:
6951708Sstevel mutex_destroy(&wdog_state->ntwdt_event_lock);
6961708Sstevel mutex_destroy(&wdog_state->ntwdt_wdog_mutex);
6971708Sstevel err1:
6981708Sstevel kmem_free(wdog_state, sizeof (ntwdt_wdog_t));
6991708Sstevel ntwdt_ptr->ntwdt_wdog_state = NULL;
7001708Sstevel
7011708Sstevel mutex_destroy(&ntwdt_ptr->ntwdt_mutex);
7021708Sstevel ddi_soft_state_free(ntwdt_statep, instance);
7031708Sstevel
7041708Sstevel ntwdt_dip = NULL;
7051708Sstevel
7061708Sstevel return (DDI_FAILURE);
7071708Sstevel }
7081708Sstevel
7091708Sstevel /*
7101708Sstevel * Do static checks to see if the app-wdog feature is supported in
7111708Sstevel * the current configuration.
7121708Sstevel *
7131708Sstevel * If the kernel debugger was booted, then we disallow the app-wdog
7141708Sstevel * feature, as we assume the user will be interested more in
7151708Sstevel * debuggability of system than its ability to support an app-wdog.
7161708Sstevel * (Note that the System Watchdog (SWDT) can still be available).
7171708Sstevel *
7181708Sstevel * If the currently loaded version of ScApp does not understand one
7191708Sstevel * of the IOSRAM mailbox messages that is specific to the app-wdog
7201708Sstevel * protocol, then we disallow use of the app-wdog feature (else
7211708Sstevel * we could have a "split-brain" scenario where Solaris supports
7221708Sstevel * app-wdog but ScApp doesn't).
7231708Sstevel *
7241708Sstevel * Note that there is no *dynamic* checking of whether ScApp supports
7251708Sstevel * the wdog protocol. Eg, if a new version of ScApp was loaded out
7261708Sstevel * from under Solaris, then once in AWDT mode, Solaris has no way
7271708Sstevel * of knowing that (a possibly older version of) ScApp was loaded.
7281708Sstevel */
7291708Sstevel static int
ntwdt_chk_wdog_support()7301708Sstevel ntwdt_chk_wdog_support()
7311708Sstevel {
7321708Sstevel int retval = ENOTSUP;
7331708Sstevel int rv;
7341708Sstevel
7351708Sstevel if ((boothowto & RB_DEBUG) != 0) {
7361708Sstevel cmn_err(CE_WARN, "kernel debugger was booted; "
7371708Sstevel "application watchdog is not available.");
7381708Sstevel return (retval);
7391708Sstevel }
7401708Sstevel
7411708Sstevel /*
7421708Sstevel * if ScApp does not support the MBOX_GET cmd, then
7431708Sstevel * it does not support the app-wdog feature. Also,
7441708Sstevel * if there is *any* type of SBBC Mailbox error at
7451708Sstevel * this point, we will disable the app watchdog
7461708Sstevel * feature.
7471708Sstevel */
7481708Sstevel if ((rv = ntwdt_chk_sc_support()) != 0) {
7491708Sstevel if (rv == EINVAL)
7501708Sstevel cmn_err(CE_WARN, "ScApp does not support "
7511708Sstevel "the application watchdog feature.");
7521708Sstevel else
7531708Sstevel cmn_err(CE_WARN, "SBBC mailbox had error;"
7541708Sstevel "application watchdog is not available.");
7551708Sstevel retval = rv;
7561708Sstevel } else {
7571708Sstevel ntwdt_watchdog_available = 1;
7581708Sstevel retval = 0;
7591708Sstevel }
7601708Sstevel
7611708Sstevel NTWDT_DBG(WDT_DBG_PROT, ("app-wdog is %savailable",
7621708Sstevel (ntwdt_watchdog_available != 0) ? "" : "not "));
7631708Sstevel
7641708Sstevel return (retval);
7651708Sstevel }
7661708Sstevel
7671708Sstevel /*
7681708Sstevel * Check to see if ScApp supports the app-watchdog feature.
7691708Sstevel *
7701708Sstevel * Do this by sending one of the mailbox commands that is
7711708Sstevel * specific to the app-wdog protocol. If ScApp does not
7721708Sstevel * return an error code, we will assume it understands it
7731708Sstevel * (as well as the remainder of the app-wdog protocol).
7741708Sstevel *
7751708Sstevel * Notes:
7761708Sstevel * ntwdt_lomcmd() will return EINVAL if ScApp does not
7771708Sstevel * understand the message. The underlying sbbc_mbox_
7781708Sstevel * utility function returns SG_MBOX_STATUS_ILLEGAL_PARAMETER
7791708Sstevel * ("illegal ioctl parameter").
7801708Sstevel */
7811708Sstevel static int
ntwdt_chk_sc_support()7821708Sstevel ntwdt_chk_sc_support()
7831708Sstevel {
7841708Sstevel lw8_get_wdt_t get_wdt;
7851708Sstevel
7861708Sstevel return (ntwdt_lomcmd(LW8_MBOX_WDT_GET, (intptr_t)&get_wdt));
7871708Sstevel }
7881708Sstevel
7891708Sstevel static int
ntwdt_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)7901708Sstevel ntwdt_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
7911708Sstevel {
7921708Sstevel int instance = ddi_get_instance(dip);
7931708Sstevel ntwdt_state_t *ntwdt_ptr = NULL;
7941708Sstevel
7951708Sstevel NTWDT_DBG(WDT_DBG_ENTRY, ("detach: dip/cmd: 0x%p/%d",
796*11311SSurya.Prakki@Sun.COM (void *)dip, cmd));
7971708Sstevel
7981708Sstevel ntwdt_ptr = ddi_get_soft_state(ntwdt_statep, instance);
7991708Sstevel if (ntwdt_ptr == NULL) {
8001708Sstevel return (DDI_FAILURE);
8011708Sstevel }
8021708Sstevel
8031708Sstevel switch (cmd) {
8041708Sstevel case DDI_SUSPEND:
8051708Sstevel return (DDI_SUCCESS);
8061708Sstevel
8071708Sstevel case DDI_DETACH:
8081708Sstevel /*
8091708Sstevel * release resources in opposite (LIFO) order as
8101708Sstevel * were allocated in attach(9f).
8111708Sstevel */
8121708Sstevel ddi_remove_minor_node(dip, NULL);
8131708Sstevel
8141708Sstevel ntwdt_stop_timer_lock((void *)ntwdt_ptr);
8151708Sstevel
816*11311SSurya.Prakki@Sun.COM ntwdt_remove_callbacks();
8171708Sstevel
8181708Sstevel ddi_remove_softintr(ntwdt_cyclic_softint_id);
8191708Sstevel
820*11311SSurya.Prakki@Sun.COM (void) ntwdt_remove_mbox_handlers();
8211708Sstevel
8221708Sstevel mutex_destroy(&ntwdt_ptr->ntwdt_wdog_state->ntwdt_event_lock);
8231708Sstevel mutex_destroy(&ntwdt_ptr->ntwdt_wdog_state->ntwdt_wdog_mutex);
8241708Sstevel kmem_free(ntwdt_ptr->ntwdt_wdog_state,
8251708Sstevel sizeof (ntwdt_wdog_t));
8261708Sstevel ntwdt_ptr->ntwdt_wdog_state = NULL;
8271708Sstevel
8281708Sstevel mutex_destroy(&ntwdt_ptr->ntwdt_mutex);
8291708Sstevel
8301708Sstevel ddi_soft_state_free(ntwdt_statep, instance);
8311708Sstevel
8321708Sstevel ntwdt_dip = NULL;
8331708Sstevel return (DDI_SUCCESS);
8341708Sstevel
8351708Sstevel default:
8361708Sstevel return (DDI_FAILURE);
8371708Sstevel }
8381708Sstevel }
8391708Sstevel
8401708Sstevel /*
8411708Sstevel * Register the SBBC Mailbox handlers.
8421708Sstevel *
8431708Sstevel * Currently, only one handler is used. It processes the MBOX_EVENT_LW8
8441708Sstevel * Events that are sent by ScApp. Of the Events that are sent, only
8451708Sstevel * the Event declaring that ScApp is coming up from a reboot
8461708Sstevel * (LW8_EVENT_SC_RESTARTED) is processed.
8471708Sstevel *
8481708Sstevel * sbbc_mbox_reg_intr registers the handler so that it executes at
8491708Sstevel * a DDI_SOFTINT_MED priority.
8501708Sstevel */
8511708Sstevel static int
ntwdt_add_mbox_handlers(ntwdt_state_t * ntwdt_ptr)8521708Sstevel ntwdt_add_mbox_handlers(ntwdt_state_t *ntwdt_ptr)
8531708Sstevel {
8541708Sstevel int err;
8551708Sstevel
8561708Sstevel /*
8571708Sstevel * We need two interrupt handlers to handle the SBBC mbox
8581708Sstevel * events. The sbbc_mbox_xxx implementation will
8591708Sstevel * trigger our ntwdt_event_data_handler, which itself will
8601708Sstevel * trigger our ntwdt_mbox_softint. As a result, we'll
8611708Sstevel * register ntwdt_mbox_softint first, to ensure it cannot
8621708Sstevel * be called (until its caller, ntwdt_event_data_handler)
8631708Sstevel * is registered.
8641708Sstevel */
8651708Sstevel
8661708Sstevel /*
8671708Sstevel * add the softint that will do the real work of handling the
8681708Sstevel * LW8_SC_RESTARTED_EVENT sent from ScApp.
8691708Sstevel */
8701708Sstevel if (ddi_add_softintr(ntwdt_ptr->ntwdt_dip, DDI_SOFTINT_LOW,
8711708Sstevel &ntwdt_mbox_softint_id, NULL, NULL, ntwdt_mbox_softint,
8721708Sstevel (caddr_t)ntwdt_ptr) != DDI_SUCCESS) {
8731708Sstevel cmn_err(CE_WARN, "Failed to add MBOX_EVENT_LW8 softintr");
8741708Sstevel return (DDI_FAILURE);
8751708Sstevel }
8761708Sstevel
8771708Sstevel /*
8781708Sstevel * Register an interrupt handler with the SBBC mailbox utility.
8791708Sstevel * This handler will get called on each event of each type of
8801708Sstevel * MBOX_EVENT_LW8 events. However, it will only conditionally
8811708Sstevel * trigger the worker-handler (ntwdt_mbox_softintr).
8821708Sstevel */
8831708Sstevel sbbc_msg.msg_buf = (caddr_t)&lw8_event;
8841708Sstevel sbbc_msg.msg_len = sizeof (lw8_event);
8851708Sstevel
8861708Sstevel err = sbbc_mbox_reg_intr(MBOX_EVENT_LW8, ntwdt_event_data_handler,
8871708Sstevel &sbbc_msg, NULL, &ntwdt_ptr->ntwdt_wdog_state->ntwdt_event_lock);
8881708Sstevel if (err != 0) {
8891708Sstevel cmn_err(CE_WARN, "Failed to register SBBC MBOX_EVENT_LW8"
8901708Sstevel " handler. err=%d", err);
8911708Sstevel
8921708Sstevel ddi_remove_softintr(ntwdt_mbox_softint_id);
8931708Sstevel return (DDI_FAILURE);
8941708Sstevel }
8951708Sstevel
8961708Sstevel return (DDI_SUCCESS);
8971708Sstevel }
8981708Sstevel
8991708Sstevel /*
9001708Sstevel * Unregister the SBBC Mailbox handlers that were registered
9011708Sstevel * by ntwdt_add_mbox_handlers.
9021708Sstevel */
9031708Sstevel static int
ntwdt_remove_mbox_handlers(void)9041708Sstevel ntwdt_remove_mbox_handlers(void)
9051708Sstevel {
9061708Sstevel int rv = DDI_SUCCESS;
9071708Sstevel int err;
9081708Sstevel
9091708Sstevel /*
9101708Sstevel * unregister the two handlers that cooperate to handle
9111708Sstevel * the LW8_SC_RESTARTED_EVENT. Note that they are unregistered
9121708Sstevel * in LIFO order (as compared to how they were registered).
9131708Sstevel */
9141708Sstevel err = sbbc_mbox_unreg_intr(MBOX_EVENT_LW8, ntwdt_event_data_handler);
9151708Sstevel if (err != 0) {
9161708Sstevel cmn_err(CE_WARN, "Failed to unregister sbbc MBOX_EVENT_LW8 "
9171708Sstevel "handler. Err=%d", err);
9181708Sstevel rv = DDI_FAILURE;
9191708Sstevel }
9201708Sstevel
9211708Sstevel /* remove the associated softint */
9221708Sstevel ddi_remove_softintr(ntwdt_mbox_softint_id);
9231708Sstevel
9241708Sstevel return (rv);
9251708Sstevel }
9261708Sstevel
9271708Sstevel _NOTE(ARGSUSED(0))
9281708Sstevel static int
ntwdt_info(dev_info_t * dip,ddi_info_cmd_t infocmd,void * arg,void ** result)9291708Sstevel ntwdt_info(dev_info_t *dip, ddi_info_cmd_t infocmd,
9301708Sstevel void *arg, void **result)
9311708Sstevel {
9321708Sstevel dev_t dev;
9331708Sstevel int instance;
9341708Sstevel int error = DDI_SUCCESS;
9351708Sstevel
9361708Sstevel if (result == NULL)
9371708Sstevel return (DDI_FAILURE);
9381708Sstevel
9391708Sstevel switch (infocmd) {
9401708Sstevel case DDI_INFO_DEVT2DEVINFO:
9411708Sstevel dev = (dev_t)arg;
9421708Sstevel if (getminor(dev) == 0)
9431708Sstevel *result = (void *)ntwdt_dip;
9441708Sstevel else
9451708Sstevel error = DDI_FAILURE;
9461708Sstevel break;
9471708Sstevel
9481708Sstevel case DDI_INFO_DEVT2INSTANCE:
9491708Sstevel dev = (dev_t)arg;
9501708Sstevel instance = getminor(dev);
9511708Sstevel *result = (void *)(uintptr_t)instance;
9521708Sstevel break;
9531708Sstevel
9541708Sstevel default:
9551708Sstevel error = DDI_FAILURE;
9561708Sstevel }
9571708Sstevel
9581708Sstevel return (error);
9591708Sstevel }
9601708Sstevel
9611708Sstevel /*
9621708Sstevel * Open the device this driver manages.
9631708Sstevel *
9641708Sstevel * Ensure the caller is a privileged process, else
9651708Sstevel * a non-privileged user could cause denial-of-service
9661708Sstevel * and/or negatively impact reliability/availability.
9671708Sstevel *
9681708Sstevel * Ensure there is only one concurrent open().
9691708Sstevel */
9701708Sstevel _NOTE(ARGSUSED(1))
9711708Sstevel static int
ntwdt_open(dev_t * devp,int flag,int otyp,cred_t * credp)9721708Sstevel ntwdt_open(dev_t *devp, int flag, int otyp, cred_t *credp)
9731708Sstevel {
9741708Sstevel int inst = getminor(*devp);
9751708Sstevel int ret = 0;
9761708Sstevel ntwdt_state_t *ntwdt_ptr = getstate(inst);
9771708Sstevel
9781708Sstevel NTWDT_DBG(WDT_DBG_ENTRY, ("open: inst/soft: %d/0x%p",
979*11311SSurya.Prakki@Sun.COM inst, (void *)ntwdt_ptr));
9801708Sstevel
9811708Sstevel /* ensure caller is a privileged process */
9821708Sstevel if (drv_priv(credp) != 0)
9831708Sstevel return (EPERM);
9841708Sstevel
9851708Sstevel /*
9861708Sstevel * Check for a Deferred Attach scenario.
9871708Sstevel * Return ENXIO so DDI framework will call
9881708Sstevel * attach() and then retry the open().
9891708Sstevel */
9901708Sstevel if (ntwdt_ptr == NULL)
9911708Sstevel return (ENXIO);
9921708Sstevel
9931708Sstevel mutex_enter(&ntwdt_ptr->ntwdt_wdog_state->ntwdt_wdog_mutex);
9941708Sstevel mutex_enter(&ntwdt_ptr->ntwdt_mutex);
9951708Sstevel if (ntwdt_ptr->ntwdt_open_flag != 0)
9961708Sstevel ret = EAGAIN;
9971708Sstevel else
9981708Sstevel ntwdt_ptr->ntwdt_open_flag = 1;
9991708Sstevel mutex_exit(&ntwdt_ptr->ntwdt_mutex);
10001708Sstevel mutex_exit(&ntwdt_ptr->ntwdt_wdog_state->ntwdt_wdog_mutex);
10011708Sstevel
10021708Sstevel return (ret);
10031708Sstevel }
10041708Sstevel
10051708Sstevel /*
10061708Sstevel * Close the device this driver manages.
10071708Sstevel *
10081708Sstevel * Notes:
10091708Sstevel *
10101708Sstevel * The close() can happen while the AWDT is running !
10111708Sstevel * (and nothing is done, eg, to disable the watchdog
10121708Sstevel * or to stop updating the system heartbeat). This
10131708Sstevel * is the desired behavior, as this allows for the
10141708Sstevel * case of monitoring a Solaris reboot in terms
10151708Sstevel * of watchdog expiration.
10161708Sstevel */
10171708Sstevel _NOTE(ARGSUSED(1))
10181708Sstevel static int
ntwdt_close(dev_t dev,int flag,int otyp,cred_t * credp)10191708Sstevel ntwdt_close(dev_t dev, int flag, int otyp, cred_t *credp)
10201708Sstevel {
10211708Sstevel int inst = getminor(dev);
10221708Sstevel ntwdt_state_t *ntwdt_ptr = getstate(inst);
10231708Sstevel
10241708Sstevel NTWDT_DBG(WDT_DBG_ENTRY, ("close: inst/soft: %d/0x%p",
1025*11311SSurya.Prakki@Sun.COM inst, (void *)ntwdt_ptr));
10261708Sstevel
10271708Sstevel if (ntwdt_ptr == NULL)
10281708Sstevel return (ENXIO);
10291708Sstevel
10301708Sstevel mutex_enter(&ntwdt_ptr->ntwdt_wdog_state->ntwdt_wdog_mutex);
10311708Sstevel mutex_enter(&ntwdt_ptr->ntwdt_mutex);
10321708Sstevel if (ntwdt_ptr->ntwdt_open_flag != 0) {
10331708Sstevel ntwdt_ptr->ntwdt_open_flag = 0;
10341708Sstevel }
10351708Sstevel mutex_exit(&ntwdt_ptr->ntwdt_mutex);
10361708Sstevel mutex_exit(&ntwdt_ptr->ntwdt_wdog_state->ntwdt_wdog_mutex);
10371708Sstevel
10381708Sstevel return (0);
10391708Sstevel }
10401708Sstevel
10411708Sstevel _NOTE(ARGSUSED(4))
10421708Sstevel static int
ntwdt_ioctl(dev_t dev,int cmd,intptr_t arg,int mode,cred_t * credp,int * rvalp)10431708Sstevel ntwdt_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
10441708Sstevel cred_t *credp, int *rvalp)
10451708Sstevel {
10461708Sstevel int inst = getminor(dev);
10471708Sstevel int retval = 0;
10481708Sstevel ntwdt_state_t *ntwdt_ptr = NULL;
10491708Sstevel ntwdt_wdog_t *wdog_state;
10501708Sstevel
10511708Sstevel if ((ntwdt_ptr = getstate(inst)) == NULL)
10521708Sstevel return (ENXIO);
10531708Sstevel
10541708Sstevel /* Only allow ioctl's if Solaris/ScApp support app-wdog */
10551708Sstevel if (ntwdt_watchdog_available == 0)
10561708Sstevel return (ENXIO);
10571708Sstevel
10581708Sstevel wdog_state = ntwdt_ptr->ntwdt_wdog_state;
10591708Sstevel
10601708Sstevel switch (cmd) {
10611708Sstevel case LOMIOCDOGSTATE: {
10621708Sstevel /*
10631708Sstevel * Return the state of the AWDT to the application.
10641708Sstevel */
10651708Sstevel lom_dogstate_t lom_dogstate;
10661708Sstevel
10671708Sstevel mutex_enter(&wdog_state->ntwdt_wdog_mutex);
10681708Sstevel lom_dogstate.reset_enable =
10691708Sstevel wdog_state->ntwdt_reset_enabled;
10701708Sstevel lom_dogstate.dog_enable =
10711708Sstevel wdog_state->ntwdt_wdog_enabled;
10721708Sstevel lom_dogstate.dog_timeout =
10731708Sstevel wdog_state->ntwdt_wdog_timeout;
10741708Sstevel mutex_exit(&wdog_state->ntwdt_wdog_mutex);
10751708Sstevel
10761708Sstevel NTWDT_DBG(WDT_DBG_IOCTL, ("DOGSTATE: wdog/reset/timeout:"
10771708Sstevel " %d/%d/%d", lom_dogstate.dog_enable,
10781708Sstevel lom_dogstate.reset_enable, lom_dogstate.dog_timeout));
10791708Sstevel
10801708Sstevel if (ddi_copyout((caddr_t)&lom_dogstate, (caddr_t)arg,
10811708Sstevel sizeof (lom_dogstate_t), mode) != 0) {
10821708Sstevel retval = EFAULT;
10831708Sstevel }
10841708Sstevel break;
10851708Sstevel }
10861708Sstevel
10871708Sstevel case LOMIOCDOGCTL: {
10881708Sstevel /*
10891708Sstevel * Allow application to control whether watchdog
10901708Sstevel * is {dis,en}abled and whether Reset is
10911708Sstevel * {dis,en}abled.
10921708Sstevel */
10931708Sstevel lom_dogctl_t lom_dogctl;
10941708Sstevel
10951708Sstevel if (ddi_copyin((caddr_t)arg, (caddr_t)&lom_dogctl,
10961708Sstevel sizeof (lom_dogctl_t), mode) != 0) {
10971708Sstevel retval = EFAULT;
10981708Sstevel break;
10991708Sstevel }
11001708Sstevel
11011708Sstevel NTWDT_DBG(WDT_DBG_IOCTL, ("DOGCTL: wdog/reset:"
11021708Sstevel " %d/%d", lom_dogctl.dog_enable,
11031708Sstevel lom_dogctl.reset_enable));
11041708Sstevel
11051708Sstevel mutex_enter(&wdog_state->ntwdt_wdog_mutex);
11061708Sstevel
11071708Sstevel if (wdog_state->ntwdt_wdog_timeout == 0) {
11081708Sstevel /*
11091708Sstevel * then LOMIOCDOGTIME has never been used
11101708Sstevel * to setup a valid timeout.
11111708Sstevel */
11121708Sstevel retval = EINVAL;
11131708Sstevel goto end;
11141708Sstevel }
11151708Sstevel
11161708Sstevel /*
11171708Sstevel * Return error for the non-sensical combination:
11181708Sstevel * "enable Reset" and "disable watchdog".
11191708Sstevel */
11201708Sstevel if (lom_dogctl.dog_enable == 0 &&
11211708Sstevel lom_dogctl.reset_enable != 0) {
11221708Sstevel retval = EINVAL;
11231708Sstevel goto end;
11241708Sstevel }
11251708Sstevel
11261708Sstevel /*
11271708Sstevel * Store the user-specified state in our softstate.
11281708Sstevel * Note that our implementation here is stateless.
11291708Sstevel * Eg, we do not disallow an "enable the watchdog"
11301708Sstevel * command when the watchdog is currently enabled.
11311708Sstevel * This is needed (at least in the case) when
11321708Sstevel * the user enters OBP via ScApp/lom. In that case,
11331708Sstevel * ScApp disables the watchdog, but does not inform
11341708Sstevel * Solaris. As a result, an ensuing, unfiltered DOGCTL
11351708Sstevel * to enable the watchdog is required.
11361708Sstevel */
11371708Sstevel wdog_state->ntwdt_reset_enabled =
11381708Sstevel lom_dogctl.reset_enable;
11391708Sstevel wdog_state->ntwdt_wdog_enabled =
11401708Sstevel lom_dogctl.dog_enable;
11411708Sstevel
11421708Sstevel if (wdog_state->ntwdt_wdog_enabled != 0) {
11431708Sstevel /*
11441708Sstevel * then user wants to enable watchdog.
11451708Sstevel * Arm the watchdog timer and start the
11461708Sstevel * Cyclic, if it is not running.
11471708Sstevel */
11481708Sstevel ntwdt_arm_vwdt(wdog_state);
11491708Sstevel
11501708Sstevel if (wdog_state->ntwdt_timer_running == 0) {
11511708Sstevel ntwdt_start_timer(ntwdt_ptr);
11521708Sstevel }
11531708Sstevel } else {
11541708Sstevel /*
11551708Sstevel * user wants to disable the watchdog.
11561708Sstevel * Note that we do not set ntwdt_secs_remaining
11571708Sstevel * to zero; that could cause a false expiration.
11581708Sstevel */
11591708Sstevel if (wdog_state->ntwdt_timer_running != 0) {
11601708Sstevel ntwdt_stop_timer(ntwdt_ptr);
11611708Sstevel }
11621708Sstevel }
11631708Sstevel
11641708Sstevel /*
11651708Sstevel * Send a permutation of mailbox commands to
11661708Sstevel * ScApp that describes the current state of the
11671708Sstevel * watchdog timer. Note that the permutation
11681708Sstevel * depends on whether this is the first
11691708Sstevel * Enabling of the watchdog or not.
11701708Sstevel */
11711708Sstevel if (wdog_state->ntwdt_wdog_enabled != 0 &&
11721708Sstevel wdog_state->ntwdt_is_initial_enable == 0) {
11731708Sstevel
11741708Sstevel /* switch from SWDT to AWDT mode */
11751708Sstevel ntwdt_swdt_to_awdt(wdog_state);
11761708Sstevel
11771708Sstevel /* Tell ScApp we're in AWDT mode */
1178*11311SSurya.Prakki@Sun.COM (void) ntwdt_set_cfgvar(LW8_WDT_PROP_MODE,
11791708Sstevel LW8_PROP_MODE_AWDT);
11801708Sstevel }
11811708Sstevel
11821708Sstevel /* Inform ScApp of the choices made by the app */
1183*11311SSurya.Prakki@Sun.COM (void) ntwdt_set_cfgvar(LW8_WDT_PROP_WDT,
11841708Sstevel wdog_state->ntwdt_wdog_enabled);
1185*11311SSurya.Prakki@Sun.COM (void) ntwdt_set_cfgvar(LW8_WDT_PROP_RECOV,
11861708Sstevel wdog_state->ntwdt_reset_enabled);
11871708Sstevel
11881708Sstevel if (wdog_state->ntwdt_wdog_enabled != 0 &&
11891708Sstevel wdog_state->ntwdt_is_initial_enable == 0) {
11901708Sstevel /*
11911708Sstevel * Clear tod_iosram_t.tod_timeout_period,
11921708Sstevel * which is used in SWDT part of state
11931708Sstevel * machine. (If this field is non-zero,
11941708Sstevel * ScApp assumes that Solaris' SWDT is active).
11951708Sstevel *
11961708Sstevel * Clearing this is useful in case SC reboots
11971708Sstevel * while Solaris is running, as ScApp will read
11981708Sstevel * a zero and not assume SWDT is running.
11991708Sstevel */
1200*11311SSurya.Prakki@Sun.COM (void) ntwdt_set_hw_timeout(0);
12011708Sstevel
12021708Sstevel /* "the first watchdog-enable has been seen" */
12031708Sstevel wdog_state->ntwdt_is_initial_enable = 1;
12041708Sstevel }
12051708Sstevel
12061708Sstevel mutex_exit(&wdog_state->ntwdt_wdog_mutex);
12071708Sstevel break;
12081708Sstevel }
12091708Sstevel
12101708Sstevel case LOMIOCDOGTIME: {
12111708Sstevel /*
12121708Sstevel * Allow application to set the period (in seconds)
12131708Sstevel * of the watchdog timeout.
12141708Sstevel */
12151708Sstevel uint32_t lom_dogtime;
12161708Sstevel
12171708Sstevel if (ddi_copyin((caddr_t)arg, (caddr_t)&lom_dogtime,
12181708Sstevel sizeof (uint32_t), mode) != 0) {
12191708Sstevel retval = EFAULT;
12201708Sstevel break;
12211708Sstevel }
12221708Sstevel
12231708Sstevel NTWDT_DBG(WDT_DBG_IOCTL, ("DOGTIME: %u seconds",
12241708Sstevel lom_dogtime));
12251708Sstevel
12261708Sstevel /* Ensure specified timeout is within range. */
12271708Sstevel if ((lom_dogtime == 0) ||
12281708Sstevel (lom_dogtime > NTWDT_MAX_TIMEOUT)) {
12291708Sstevel retval = EINVAL;
12301708Sstevel break;
12311708Sstevel }
12321708Sstevel
12331708Sstevel mutex_enter(&wdog_state->ntwdt_wdog_mutex);
12341708Sstevel
12351708Sstevel wdog_state->ntwdt_wdog_timeout = lom_dogtime;
12361708Sstevel
12371708Sstevel /*
12381708Sstevel * If watchdog is currently running, re-arm the
12391708Sstevel * watchdog timeout with the specified value.
12401708Sstevel */
12411708Sstevel if (wdog_state->ntwdt_timer_running != 0) {
12421708Sstevel ntwdt_arm_vwdt(wdog_state);
12431708Sstevel }
12441708Sstevel
12451708Sstevel /* Tell ScApp of the specified timeout */
1246*11311SSurya.Prakki@Sun.COM (void) ntwdt_set_cfgvar(LW8_WDT_PROP_TO, lom_dogtime);
12471708Sstevel
12481708Sstevel mutex_exit(&wdog_state->ntwdt_wdog_mutex);
12491708Sstevel break;
12501708Sstevel }
12511708Sstevel
12521708Sstevel case LOMIOCDOGPAT: {
12531708Sstevel /*
12541708Sstevel * Allow user to re-arm ("pat") the watchdog.
12551708Sstevel */
12561708Sstevel NTWDT_DBG(WDT_DBG_IOCTL, ("DOGPAT"));
12571708Sstevel
12581708Sstevel mutex_enter(&wdog_state->ntwdt_wdog_mutex);
12591708Sstevel
12601708Sstevel /*
12611708Sstevel * If watchdog is not enabled or underlying
12621708Sstevel * Cyclic timer is not running, exit.
12631708Sstevel */
12641708Sstevel if (!(wdog_state->ntwdt_wdog_enabled &&
12651708Sstevel wdog_state->ntwdt_timer_running))
12661708Sstevel goto end;
12671708Sstevel
12681708Sstevel if (wdog_state->ntwdt_wdog_expired == 0) {
12691708Sstevel /* then VWDT has not expired; re-arm it */
12701708Sstevel ntwdt_arm_vwdt(wdog_state);
12711708Sstevel
12721708Sstevel NTWDT_DBG(WDT_DBG_VWDT, ("VWDT re-armed:"
12731708Sstevel " %d seconds",
12741708Sstevel wdog_state->ntwdt_secs_remaining));
12751708Sstevel }
12761708Sstevel
12771708Sstevel mutex_exit(&wdog_state->ntwdt_wdog_mutex);
12781708Sstevel break;
12791708Sstevel }
12801708Sstevel
12811708Sstevel #ifdef DEBUG
12821708Sstevel case NTWDTIOCPANIC: {
12831708Sstevel /*
12841708Sstevel * Use in unit/integration testing to test our
12851708Sstevel * panic-handler code.
12861708Sstevel */
12871708Sstevel cmn_err(CE_PANIC, "NTWDTIOCPANIC: force a panic");
12881708Sstevel break;
12891708Sstevel }
12901708Sstevel
12911708Sstevel case NTWDTIOCSTATE: {
12921708Sstevel /*
12931708Sstevel * Allow application to read wdog state from the
12941708Sstevel * SC (and *not* the driver's softstate).
12951708Sstevel *
12961708Sstevel * Return state of:
12971708Sstevel * o recovery-enabled
12981708Sstevel * o current timeout value
12991708Sstevel */
13001708Sstevel ntwdt_data_t ntwdt_data;
13011708Sstevel int action;
13021708Sstevel int timeout;
13031708Sstevel int ret;
13041708Sstevel
13051708Sstevel mutex_enter(&wdog_state->ntwdt_wdog_mutex);
13061708Sstevel ret = ntwdt_get_cfgvar(LW8_WDT_PROP_TO, &timeout);
13071708Sstevel ret |= ntwdt_get_cfgvar(LW8_WDT_PROP_RECOV, &action);
13081708Sstevel mutex_exit(&wdog_state->ntwdt_wdog_mutex);
13091708Sstevel
13101708Sstevel bzero((caddr_t)&ntwdt_data, sizeof (ntwdt_data));
13111708Sstevel
13121708Sstevel if (ret != NTWDT_SUCCESS) {
13131708Sstevel retval = EIO;
13141708Sstevel break;
13151708Sstevel }
13161708Sstevel
13171708Sstevel NTWDT_DBG(WDT_DBG_IOCTL, ("NTWDTIOCSTATE:"
13181708Sstevel " timeout/action: %d/%d", timeout, action));
13191708Sstevel
13201708Sstevel ntwdt_data.ntwdt_wd1 = (uint32_t)timeout;
13211708Sstevel ntwdt_data.ntwdt_wd2 = (uint8_t)action;
13221708Sstevel
13231708Sstevel if (ddi_copyout((caddr_t)&ntwdt_data, (caddr_t)arg,
13241708Sstevel sizeof (ntwdt_data_t), mode) != 0) {
13251708Sstevel retval = EFAULT;
13261708Sstevel }
13271708Sstevel break;
13281708Sstevel }
13291708Sstevel #endif
13301708Sstevel default:
13311708Sstevel retval = EINVAL;
13321708Sstevel break;
13331708Sstevel }
13341708Sstevel
13351708Sstevel return (retval);
13361708Sstevel end:
13371708Sstevel mutex_exit(&wdog_state->ntwdt_wdog_mutex);
13381708Sstevel return (retval);
13391708Sstevel }
13401708Sstevel
13411708Sstevel /*
13421708Sstevel * Arm the Virtual Watchdog Timer (VWDT).
13431708Sstevel *
13441708Sstevel * Assign the current watchdog timeout (ntwdt_wdog_timeout)
13451708Sstevel * to the softstate variable representing the watchdog
13461708Sstevel * timer (ntwdt_secs_remaining).
13471708Sstevel *
13481708Sstevel * To ensure (from ntwdt's perspective) that any actual
13491708Sstevel * timeout expiration is at least as large as the expected
13501708Sstevel * timeout, conditionally set/clear a bit that will be
13511708Sstevel * checked in the Cyclic's softint.
13521708Sstevel *
13531708Sstevel * If the Cyclic has been started, the goal is to ignore
13541708Sstevel * the _next_ firing of the Cyclic, as that firing will
13551708Sstevel * NOT represent a full, one-second period. If the Cyclic
13561708Sstevel * has NOT been started yet, then do not ignore the next
13571708Sstevel * Cyclic's firing, as that's the First One, and it was
13581708Sstevel * programmed to fire at a specific time (see ntwdt_start_timer).
13591708Sstevel */
13601708Sstevel static void
ntwdt_arm_vwdt(ntwdt_wdog_t * wdog_state)13611708Sstevel ntwdt_arm_vwdt(ntwdt_wdog_t *wdog_state)
13621708Sstevel {
13631708Sstevel /* arm the watchdog timer (VWDT) */
13641708Sstevel wdog_state->ntwdt_secs_remaining =
13651708Sstevel wdog_state->ntwdt_wdog_timeout;
13661708Sstevel
13671708Sstevel if (wdog_state->ntwdt_timer_running != 0)
13681708Sstevel NTWDT_FLAG_SET(wdog_state, SKIP_CYCLIC);
13691708Sstevel else
13701708Sstevel NTWDT_FLAG_CLR(wdog_state, SKIP_CYCLIC);
13711708Sstevel }
13721708Sstevel
13731708Sstevel /*
13741708Sstevel * Switch from SWDT mode to AWDT mode.
13751708Sstevel */
13761708Sstevel _NOTE(ARGSUSED(0))
13771708Sstevel static void
ntwdt_swdt_to_awdt(ntwdt_wdog_t * wdog_state)13781708Sstevel ntwdt_swdt_to_awdt(ntwdt_wdog_t *wdog_state)
13791708Sstevel {
13801708Sstevel ASSERT(wdog_state->ntwdt_is_initial_enable == 0);
13811708Sstevel
13821708Sstevel /*
13831708Sstevel * Disable SWDT. If SWDT is currently active,
13841708Sstevel * display a message so user knows that SWDT Mode
13851708Sstevel * has terminated.
13861708Sstevel */
13871708Sstevel if (watchdog_enable != 0 ||
13881708Sstevel watchdog_activated != 0)
13891708Sstevel cmn_err(CE_NOTE, "Hardware watchdog disabled");
13901708Sstevel watchdog_enable = 0;
13911708Sstevel watchdog_activated = 0;
13921708Sstevel
13931708Sstevel /* "we are in AWDT mode" */
13941708Sstevel ntwdt_watchdog_activated = 1;
13951708Sstevel NTWDT_DBG(WDT_DBG_VWDT, ("AWDT is enabled"));
13961708Sstevel }
13971708Sstevel
13981708Sstevel /*
13991708Sstevel * This is the Cyclic that runs at a multiple of the
14001708Sstevel * AWDT's watchdog-timeout period. This Cyclic runs at
14011708Sstevel * LOCK_LEVEL (eg, CY_LOCK_LEVEL) and will post a
14021708Sstevel * soft-interrupt in order to complete all processing.
14031708Sstevel *
14041708Sstevel * Executing at LOCK_LEVEL gives this function a high
14051708Sstevel * interrupt priority, while performing its work via
14061708Sstevel * a soft-interrupt allows for a consistent (eg, MT-safe)
14071708Sstevel * view of driver softstate between User and Interrupt
14081708Sstevel * context.
14091708Sstevel *
14101708Sstevel * Context:
14111708Sstevel * interrupt context: Cyclic framework calls at
14121708Sstevel * CY_LOCK_LEVEL (=> 10)
14131708Sstevel */
14141708Sstevel _NOTE(ARGSUSED(0))
14151708Sstevel static void
ntwdt_cyclic_pat(void * arg)14161708Sstevel ntwdt_cyclic_pat(void *arg)
14171708Sstevel {
14181708Sstevel /* post-down to DDI_SOFTINT_LOW */
14191708Sstevel ddi_trigger_softintr(ntwdt_cyclic_softint_id);
14201708Sstevel }
14211708Sstevel
14221708Sstevel /*
14231708Sstevel * This is the soft-interrupt triggered by the AWDT
14241708Sstevel * Cyclic.
14251708Sstevel *
14261708Sstevel * This softint does all the work re: computing whether
14271708Sstevel * the VWDT expired. It grabs ntwdt_wdog_mutex
14281708Sstevel * so User Context code (eg, the IOCTLs) cannot run,
14291708Sstevel * and then it tests whether the VWDT expired. If it
14301708Sstevel * hasn't, it decrements the VWDT timer by the amount
14311708Sstevel * of the Cyclic's period. If the timer has expired,
14321708Sstevel * it initiates Recovery (based on what user specified
14331708Sstevel * in LOMIOCDOGCTL).
14341708Sstevel *
14351708Sstevel * This function also updates the normal system "heartbeat".
14361708Sstevel *
14371708Sstevel * Context:
14381708Sstevel * interrupt-context: DDI_SOFTINT_LOW
14391708Sstevel */
14401708Sstevel static uint_t
ntwdt_cyclic_softint(char * arg)14411708Sstevel ntwdt_cyclic_softint(char *arg)
14421708Sstevel {
14431708Sstevel ntwdt_state_t *ntwdt_ptr = (ntwdt_state_t *)arg;
14441708Sstevel ntwdt_wdog_t *wdog_state;
14451708Sstevel
14461708Sstevel wdog_state = ntwdt_ptr->ntwdt_wdog_state;
14471708Sstevel
14481708Sstevel mutex_enter(&wdog_state->ntwdt_wdog_mutex);
14491708Sstevel
14501708Sstevel if ((wdog_state->ntwdt_wdog_flags &
14511708Sstevel NTWDT_FLAG_SKIP_CYCLIC) != 0) {
14521708Sstevel /*
14531708Sstevel * then skip all processing by this interrupt.
14541708Sstevel * (see ntwdt_arm_vwdt()).
14551708Sstevel */
14561708Sstevel wdog_state->ntwdt_wdog_flags &= ~NTWDT_FLAG_SKIP_CYCLIC;
14571708Sstevel goto end;
14581708Sstevel }
14591708Sstevel
14601708Sstevel if (wdog_state->ntwdt_timer_running == 0 ||
14611708Sstevel (ntwdt_ptr->ntwdt_cycl_id == CYCLIC_NONE) ||
14621708Sstevel (wdog_state->ntwdt_wdog_enabled == 0))
14631708Sstevel goto end;
14641708Sstevel
14651708Sstevel /* re-arm ("pat") the hardware watchdog */
14661708Sstevel ntwdt_pat_hw_watchdog();
14671708Sstevel
14681708Sstevel /* Decrement the VWDT and see if it has expired. */
14691708Sstevel if (--wdog_state->ntwdt_secs_remaining == 0) {
14701708Sstevel
14711708Sstevel cmn_err(CE_WARN, "application-watchdog expired");
14721708Sstevel
14731708Sstevel wdog_state->ntwdt_wdog_expired = 1;
14741708Sstevel
14751708Sstevel if (wdog_state->ntwdt_reset_enabled != 0) {
14761708Sstevel /*
14771708Sstevel * Update ScApp so that the new wdog-timeout
14781708Sstevel * value is as specified in the
14791708Sstevel * NTWDT_BOOT_TIMEOUT_PROP driver Property.
14801708Sstevel * This timeout is assumedly larger than the
14811708Sstevel * actual Solaris reboot time. This will allow
14821708Sstevel * our forced-reboot to not cause an unplanned
14831708Sstevel * (series of) watchdog expiration(s).
14841708Sstevel */
14851708Sstevel if (ntwdt_disable_timeout_action == 0)
14861708Sstevel ntwdt_reprogram_wd(ntwdt_ptr);
14871708Sstevel
14881708Sstevel mutex_exit(&wdog_state->ntwdt_wdog_mutex);
14891708Sstevel
14901708Sstevel NTWDT_DBG(WDT_DBG_VWDT, ("recovery being done"));
14911708Sstevel
14921708Sstevel ntwdt_enforce_timeout();
14931708Sstevel } else {
14941708Sstevel NTWDT_DBG(WDT_DBG_VWDT, ("no recovery being done"));
14951708Sstevel
14961708Sstevel wdog_state->ntwdt_wdog_enabled = 0;
14971708Sstevel
14981708Sstevel /*
14991708Sstevel * Tell ScApp to disable wdog; this prevents
15001708Sstevel * the "2x-timeout" artifact. Eg, Solaris
15011708Sstevel * times-out at t(x) and ScApp times-out at t(2x),
15021708Sstevel * where (x==ntwdt_wdog_timeout).
15031708Sstevel */
15041708Sstevel (void) ntwdt_set_cfgvar(LW8_WDT_PROP_WDT,
15051708Sstevel wdog_state->ntwdt_wdog_enabled);
15061708Sstevel }
15071708Sstevel
15081708Sstevel /* Schedule Callout to stop this Cyclic */
1509*11311SSurya.Prakki@Sun.COM (void) timeout(ntwdt_stop_timer_lock, ntwdt_ptr, 0);
15101708Sstevel
15111708Sstevel } else {
15121708Sstevel _NOTE(EMPTY)
15131708Sstevel NTWDT_DBG(WDT_DBG_VWDT, ("time remaining in VWDT: %d"
15141708Sstevel " seconds", wdog_state->ntwdt_secs_remaining));
15151708Sstevel }
15161708Sstevel end:
15171708Sstevel mutex_exit(&wdog_state->ntwdt_wdog_mutex);
15181708Sstevel
15191708Sstevel return (DDI_INTR_CLAIMED);
15201708Sstevel }
15211708Sstevel
15221708Sstevel /*
15231708Sstevel * Program the AWDT watchdog-timeout value to that specified
15241708Sstevel * in the NTWDT_BOOT_TIMEOUT_PROP driver Property. However,
15251708Sstevel * only do this if the AWDT is in the correct state.
15261708Sstevel *
15271708Sstevel * Caller's Context:
15281708Sstevel * o interrupt context: (from software-interrupt)
15291708Sstevel * o during a panic
15301708Sstevel */
15311708Sstevel static void
ntwdt_reprogram_wd(ntwdt_state_t * ntwdt_ptr)15321708Sstevel ntwdt_reprogram_wd(ntwdt_state_t *ntwdt_ptr)
15331708Sstevel {
15341708Sstevel ntwdt_wdog_t *wdog_state = ntwdt_ptr->ntwdt_wdog_state;
15351708Sstevel
15361708Sstevel /*
15371708Sstevel * Program the AWDT watchdog-timeout value only if the
15381708Sstevel * watchdog is enabled, the user wants to do recovery,
15391708Sstevel * ("reset is enabled") and the AWDT timer is currently
15401708Sstevel * running.
15411708Sstevel */
15421708Sstevel if (wdog_state->ntwdt_wdog_enabled != 0 &&
15431708Sstevel wdog_state->ntwdt_reset_enabled != 0 &&
15441708Sstevel wdog_state->ntwdt_timer_running != 0) {
15451708Sstevel if (ddi_in_panic() != 0)
1546*11311SSurya.Prakki@Sun.COM (void) ntwdt_set_cfgvar_noreply(LW8_WDT_PROP_TO,
15471708Sstevel wdog_state->ntwdt_boot_timeout);
15481708Sstevel else
15491708Sstevel (void) ntwdt_set_cfgvar(LW8_WDT_PROP_TO,
15501708Sstevel wdog_state->ntwdt_boot_timeout);
15511708Sstevel }
15521708Sstevel }
15531708Sstevel
15541708Sstevel /*
15551708Sstevel * This is the callback that was registered to run during a panic.
15561708Sstevel * It will set the watchdog-timeout value to be that as specified
15571708Sstevel * in the NTWDT_BOOT_TIMEOUT_PROP driver Property.
15581708Sstevel *
15591708Sstevel * Note that unless this Property's value specifies a timeout
15601708Sstevel * that's larger than the actual reboot latency, ScApp will
15611708Sstevel * experience a timeout and initiate Recovery.
15621708Sstevel */
15631708Sstevel _NOTE(ARGSUSED(1))
15641708Sstevel static boolean_t
ntwdt_panic_cb(void * arg,int code)15651708Sstevel ntwdt_panic_cb(void *arg, int code)
15661708Sstevel {
15671708Sstevel ASSERT(ddi_in_panic() != 0);
15681708Sstevel
15691708Sstevel ntwdt_reprogram_wd((ntwdt_state_t *)arg);
15701708Sstevel
15711708Sstevel return (B_TRUE);
15721708Sstevel }
15731708Sstevel
15741708Sstevel /*
15751708Sstevel * Initialize the Cyclic that is used to monitor the VWDT.
15761708Sstevel */
15771708Sstevel static void
ntwdt_start_timer(ntwdt_state_t * ntwdt_ptr)15781708Sstevel ntwdt_start_timer(ntwdt_state_t *ntwdt_ptr)
15791708Sstevel {
15801708Sstevel ntwdt_wdog_t *wdog_state = ntwdt_ptr->ntwdt_wdog_state;
15811708Sstevel cyc_handler_t *hdlr = &wdog_state->ntwdt_cycl_hdlr;
15821708Sstevel cyc_time_t *when = &wdog_state->ntwdt_cycl_time;
15831708Sstevel
15841708Sstevel /*
15851708Sstevel * Init Cyclic so its first expiry occurs wdog-timeout
15861708Sstevel * seconds from the current, absolute time.
15871708Sstevel */
15881708Sstevel when->cyt_interval = wdog_state->ntwdt_cyclic_interval;
15891708Sstevel when->cyt_when = gethrtime() + when->cyt_interval;
15901708Sstevel
15911708Sstevel wdog_state->ntwdt_wdog_expired = 0;
15921708Sstevel wdog_state->ntwdt_timer_running = 1;
15931708Sstevel
15941708Sstevel mutex_enter(&cpu_lock);
15951708Sstevel if (ntwdt_ptr->ntwdt_cycl_id == CYCLIC_NONE)
15961708Sstevel ntwdt_ptr->ntwdt_cycl_id = cyclic_add(hdlr, when);
15971708Sstevel mutex_exit(&cpu_lock);
15981708Sstevel
15991708Sstevel NTWDT_DBG(WDT_DBG_VWDT, ("AWDT's cyclic-driven timer is started"));
16001708Sstevel }
16011708Sstevel
16021708Sstevel /*
16031708Sstevel * Stop the cyclic that is used to monitor the VWDT (and
16041708Sstevel * was Started by ntwdt_start_timer).
16051708Sstevel *
16061708Sstevel * Context: per the Cyclic API, cyclic_remove cannot be called
16071708Sstevel * from interrupt-context. Note that when this is
16081708Sstevel * called via a Callout, it's called from base level.
16091708Sstevel */
16101708Sstevel static void
ntwdt_stop_timer(void * arg)16111708Sstevel ntwdt_stop_timer(void *arg)
16121708Sstevel {
16131708Sstevel ntwdt_state_t *ntwdt_ptr = (void *)arg;
16141708Sstevel ntwdt_wdog_t *wdog_state = ntwdt_ptr->ntwdt_wdog_state;
16151708Sstevel
16161708Sstevel mutex_enter(&cpu_lock);
16171708Sstevel if (ntwdt_ptr->ntwdt_cycl_id != CYCLIC_NONE)
16181708Sstevel cyclic_remove(ntwdt_ptr->ntwdt_cycl_id);
16191708Sstevel mutex_exit(&cpu_lock);
16201708Sstevel
16211708Sstevel wdog_state->ntwdt_timer_running = 0;
16221708Sstevel ntwdt_ptr->ntwdt_cycl_id = CYCLIC_NONE;
16231708Sstevel
16241708Sstevel NTWDT_DBG(WDT_DBG_VWDT, ("AWDT's cyclic-driven timer is stopped"));
16251708Sstevel }
16261708Sstevel
16271708Sstevel /*
16281708Sstevel * Stop the cyclic that is used to monitor the VWDT (and
16291708Sstevel * do it in a thread-safe manner).
16301708Sstevel *
16311708Sstevel * This is a wrapper function for the core function,
16321708Sstevel * ntwdt_stop_timer. Both functions are useful, as some
16331708Sstevel * callers will already have the appropriate mutex locked, and
16341708Sstevel * other callers will not.
16351708Sstevel */
16361708Sstevel static void
ntwdt_stop_timer_lock(void * arg)16371708Sstevel ntwdt_stop_timer_lock(void *arg)
16381708Sstevel {
16391708Sstevel ntwdt_state_t *ntwdt_ptr = (void *)arg;
16401708Sstevel ntwdt_wdog_t *wdog_state = ntwdt_ptr->ntwdt_wdog_state;
16411708Sstevel
16421708Sstevel mutex_enter(&wdog_state->ntwdt_wdog_mutex);
16431708Sstevel ntwdt_stop_timer(arg);
16441708Sstevel mutex_exit(&wdog_state->ntwdt_wdog_mutex);
16451708Sstevel }
16461708Sstevel
16471708Sstevel /*
16481708Sstevel * Add callbacks needed to react to major system state transitions.
16491708Sstevel */
16501708Sstevel static void
ntwdt_add_callbacks(ntwdt_state_t * ntwdt_ptr)16511708Sstevel ntwdt_add_callbacks(ntwdt_state_t *ntwdt_ptr)
16521708Sstevel {
16531708Sstevel /* register a callback that's called during a panic */
16541708Sstevel ntwdt_callback_ids.ntwdt_panic_cb = callb_add(ntwdt_panic_cb,
16551708Sstevel (void *)ntwdt_ptr, CB_CL_PANIC, "ntwdt_panic_cb");
16561708Sstevel }
16571708Sstevel
16581708Sstevel /*
16591708Sstevel * Remove callbacks added by ntwdt_add_callbacks.
16601708Sstevel */
16611708Sstevel static void
ntwdt_remove_callbacks()16621708Sstevel ntwdt_remove_callbacks()
16631708Sstevel {
1664*11311SSurya.Prakki@Sun.COM (void) callb_delete(ntwdt_callback_ids.ntwdt_panic_cb);
16651708Sstevel }
16661708Sstevel
16671708Sstevel /*
16681708Sstevel * Initiate a Reset (as a result of the VWDT timeout expiring).
16691708Sstevel */
16701708Sstevel static void
ntwdt_enforce_timeout()16711708Sstevel ntwdt_enforce_timeout()
16721708Sstevel {
16731708Sstevel if (ntwdt_disable_timeout_action != 0) {
16741708Sstevel cmn_err(CE_NOTE, "OS timeout expired, taking no action");
16751708Sstevel return;
16761708Sstevel }
16771708Sstevel
16781708Sstevel NTWDT_DBG(WDT_DBG_VWDT, ("VWDT expired; do a crashdump"));
16791708Sstevel
16801708Sstevel (void) kadmin(A_DUMP, AD_BOOT, NULL, kcred);
16811708Sstevel cmn_err(CE_PANIC, "kadmin(A_DUMP, AD_BOOT) failed");
16821708Sstevel _NOTE(NOTREACHED)
16831708Sstevel }
16841708Sstevel
16851708Sstevel /*
16861708Sstevel * Interpret the Properties from driver's config file.
16871708Sstevel */
16881708Sstevel static int
ntwdt_read_props(ntwdt_state_t * ntwdt_ptr)16891708Sstevel ntwdt_read_props(ntwdt_state_t *ntwdt_ptr)
16901708Sstevel {
16911708Sstevel ntwdt_wdog_t *wdog_state;
16921708Sstevel int boot_timeout;
16931708Sstevel
16941708Sstevel wdog_state = ntwdt_ptr->ntwdt_wdog_state;
16951708Sstevel
16961708Sstevel /*
16971708Sstevel * interpret Property that specifies how long
16981708Sstevel * the watchdog-timeout should be set to when
16991708Sstevel * Solaris panics. Assumption is that this value
17001708Sstevel * is larger than the amount of time it takes
17011708Sstevel * to reboot and write crashdump. If not,
17021708Sstevel * ScApp could induce a reset, due to an expired
17031708Sstevel * watchdog-timeout.
17041708Sstevel */
17051708Sstevel wdog_state->ntwdt_boot_timeout =
17061708Sstevel NTWDT_DEFAULT_BOOT_TIMEOUT;
17071708Sstevel
17081708Sstevel boot_timeout = ddi_prop_get_int(DDI_DEV_T_ANY,
17091708Sstevel ntwdt_ptr->ntwdt_dip, DDI_PROP_DONTPASS,
17101708Sstevel NTWDT_BOOT_TIMEOUT_PROP, -1);
17111708Sstevel
17121708Sstevel if (boot_timeout != -1 && boot_timeout > 0 &&
17131708Sstevel boot_timeout <= NTWDT_MAX_TIMEOUT) {
17141708Sstevel wdog_state->ntwdt_boot_timeout =
17151708Sstevel boot_timeout;
17161708Sstevel } else {
17171708Sstevel _NOTE(EMPTY)
17181708Sstevel NTWDT_DBG(WDT_DBG_ENTRY, (NTWDT_BOOT_TIMEOUT_PROP
17191708Sstevel ": using default of %d seconds.",
17201708Sstevel wdog_state->ntwdt_boot_timeout));
17211708Sstevel }
17221708Sstevel
17231708Sstevel return (DDI_SUCCESS);
17241708Sstevel }
17251708Sstevel
17261708Sstevel /*
17271708Sstevel * Write state of SWDT to ScApp.
17281708Sstevel *
17291708Sstevel * Currently, this function is only called on attach()
17301708Sstevel * of our driver.
17311708Sstevel *
17321708Sstevel * Note that we do not need to call this function, eg,
17331708Sstevel * in response to a solicitation from ScApp (eg,
17341708Sstevel * the LW8_SC_RESTARTED_EVENT).
17351708Sstevel *
17361708Sstevel * Context:
17371708Sstevel * called in Kernel Context
17381708Sstevel */
17391708Sstevel static int
ntwdt_set_swdt_state()17401708Sstevel ntwdt_set_swdt_state()
17411708Sstevel {
17421708Sstevel /*
17431708Sstevel * note that ScApp only needs this one
17441708Sstevel * variable when system is in SWDT mode.
17451708Sstevel */
1746*11311SSurya.Prakki@Sun.COM (void) ntwdt_set_cfgvar(LW8_WDT_PROP_MODE,
17471708Sstevel LW8_PROP_MODE_SWDT);
17481708Sstevel
17491708Sstevel return (0);
17501708Sstevel }
17511708Sstevel
17521708Sstevel /*
17531708Sstevel * Write all AWDT state to ScApp via the SBBC mailbox
17541708Sstevel * in IOSRAM. Note that the permutation of Writes
17551708Sstevel * is as specified in the design spec.
17561708Sstevel *
17571708Sstevel * Notes: caller must perform synchronization so that
17581708Sstevel * this series of Writes is consistent as viewed
17591708Sstevel * by ScApp (eg, there is no LW8_WDT_xxx mailbox
17601708Sstevel * command that contains "all Properties"; each
17611708Sstevel * Property must be written individually).
17621708Sstevel */
17631708Sstevel static int
ntwdt_set_awdt_state(ntwdt_wdog_t * rstatep)17641708Sstevel ntwdt_set_awdt_state(ntwdt_wdog_t *rstatep)
17651708Sstevel {
17661708Sstevel /* ScApp expects values in this order: */
1767*11311SSurya.Prakki@Sun.COM (void) ntwdt_set_cfgvar(LW8_WDT_PROP_MODE,
17681708Sstevel ntwdt_watchdog_activated != 0);
1769*11311SSurya.Prakki@Sun.COM (void) ntwdt_set_cfgvar(LW8_WDT_PROP_TO,
17701708Sstevel rstatep->ntwdt_wdog_timeout);
1771*11311SSurya.Prakki@Sun.COM (void) ntwdt_set_cfgvar(LW8_WDT_PROP_RECOV,
17721708Sstevel rstatep->ntwdt_reset_enabled);
1773*11311SSurya.Prakki@Sun.COM (void) ntwdt_set_cfgvar(LW8_WDT_PROP_WDT,
17741708Sstevel rstatep->ntwdt_wdog_enabled);
17751708Sstevel
17761708Sstevel return (NTWDT_SUCCESS);
17771708Sstevel }
17781708Sstevel
17791708Sstevel /*
17801708Sstevel * Write a specified WDT Property (and Value) to ScApp.
17811708Sstevel *
17821708Sstevel * <Property, Value> is passed in the LW8_MBOX_WDT_SET
17831708Sstevel * (SBBC) mailbox message. The SBBC mailbox resides in
17841708Sstevel * IOSRAM.
17851708Sstevel *
17861708Sstevel * Note that this function is responsible for ensuring that
17871708Sstevel * a driver-specific representation of a mailbox <Value> is
17881708Sstevel * mapped into the representation that is expected by ScApp
17891708Sstevel * (eg, see LW8_WDT_PROP_RECOV).
17901708Sstevel */
17911708Sstevel static int
ntwdt_set_cfgvar(int var,int val)17921708Sstevel ntwdt_set_cfgvar(int var, int val)
17931708Sstevel {
17941708Sstevel int rv;
17951708Sstevel int mbox_val;
17961708Sstevel lw8_set_wdt_t set_wdt;
17971708Sstevel
17981708Sstevel switch (var) {
17991708Sstevel case LW8_WDT_PROP_RECOV:
18001708Sstevel #ifdef DEBUG
18011708Sstevel NTWDT_DBG(WDT_DBG_PROT, ("MBOX_SET of 'recovery-enabled':"
18021708Sstevel " %s (%d)", (val != 0) ? "enabled" : "disabled", val));
18031708Sstevel #endif
18041708Sstevel mbox_val = (val != 0) ? LW8_PROP_RECOV_ENABLED :
18051708Sstevel LW8_PROP_RECOV_DISABLED;
18061708Sstevel break;
18071708Sstevel
18081708Sstevel case LW8_WDT_PROP_WDT:
18091708Sstevel #ifdef DEBUG
18101708Sstevel NTWDT_DBG(WDT_DBG_PROT, ("MBOX_SET of 'wdog-enabled':"
18111708Sstevel " %s (%d)", (val != 0) ? "enabled" : "disabled", val));
18121708Sstevel #endif
18131708Sstevel mbox_val = (val != 0) ? LW8_PROP_WDT_ENABLED :
18141708Sstevel LW8_PROP_WDT_DISABLED;
18151708Sstevel break;
18161708Sstevel
18171708Sstevel case LW8_WDT_PROP_TO:
18181708Sstevel #ifdef DEBUG
18191708Sstevel NTWDT_DBG(WDT_DBG_PROT, ("MBOX_SET of 'wdog-timeout':"
18201708Sstevel " %d seconds", val));
18211708Sstevel #endif
18221708Sstevel mbox_val = val;
18231708Sstevel break;
18241708Sstevel
18251708Sstevel case LW8_WDT_PROP_MODE:
18261708Sstevel #ifdef DEBUG
18271708Sstevel NTWDT_DBG(WDT_DBG_PROT, ("MBOX_SET of 'wdog-mode':"
18281708Sstevel " %s (%d)", (val != LW8_PROP_MODE_SWDT) ?
18291708Sstevel "AWDT" : "SWDT", val));
18301708Sstevel #endif
18311708Sstevel mbox_val = val;
18321708Sstevel break;
18331708Sstevel
18341708Sstevel default:
18351708Sstevel ASSERT(0);
18361708Sstevel _NOTE(NOTREACHED)
18371708Sstevel }
18381708Sstevel
18391708Sstevel set_wdt.property_id = var;
18401708Sstevel set_wdt.value = mbox_val;
18411708Sstevel
18421708Sstevel rv = ntwdt_lomcmd(LW8_MBOX_WDT_SET, (intptr_t)&set_wdt);
18431708Sstevel if (rv != 0) {
18441708Sstevel _NOTE(EMPTY)
18451708Sstevel NTWDT_DBG(WDT_DBG_PROT, ("MBOX_SET of prop/val %d/%d "
18461708Sstevel "failed: %d", var, mbox_val, rv));
18471708Sstevel }
18481708Sstevel
18491708Sstevel return (rv);
18501708Sstevel }
18511708Sstevel
18521708Sstevel static void
ntwdt_set_cfgvar_noreply(int var,int val)18531708Sstevel ntwdt_set_cfgvar_noreply(int var, int val)
18541708Sstevel {
1855*11311SSurya.Prakki@Sun.COM (void) ntwdt_set_cfgvar(var, val);
18561708Sstevel }
18571708Sstevel
18581708Sstevel #ifdef DEBUG
18591708Sstevel /*
18601708Sstevel * Read a specified WDT Property from ScApp.
18611708Sstevel *
18621708Sstevel * <Property> is passed in the Request of the LW8_MBOX_WDT_GET
18631708Sstevel * (SBBC) mailbox message, and the Property's <Value>
18641708Sstevel * is returned in the message's Response. The SBBC mailbox
18651708Sstevel * resides in IOSRAM.
18661708Sstevel */
18671708Sstevel static int
ntwdt_get_cfgvar(int var,int * val)18681708Sstevel ntwdt_get_cfgvar(int var, int *val)
18691708Sstevel {
18701708Sstevel lw8_get_wdt_t get_wdt;
18711708Sstevel int rv;
18721708Sstevel
18731708Sstevel rv = ntwdt_lomcmd(LW8_MBOX_WDT_GET, (intptr_t)&get_wdt);
18741708Sstevel if (rv != 0) {
18751708Sstevel _NOTE(EMPTY)
18761708Sstevel NTWDT_DBG(WDT_DBG_PROT, ("MBOX_GET failed: %d", rv));
18771708Sstevel } else {
18781708Sstevel switch (var) {
18791708Sstevel case LW8_WDT_PROP_RECOV:
18801708Sstevel *val = (uint8_t)get_wdt.recovery_enabled;
18811708Sstevel NTWDT_DBG(WDT_DBG_PROT, ("MBOX_GET of 'reset-enabled':"
18821708Sstevel " %s (%d)", (*val != 0) ? "enabled" : "disabled",
18831708Sstevel *val));
18841708Sstevel break;
18851708Sstevel
18861708Sstevel case LW8_WDT_PROP_WDT:
18871708Sstevel *val = (uint8_t)get_wdt.watchdog_enabled;
18881708Sstevel NTWDT_DBG(WDT_DBG_PROT, ("MBOX_GET of 'wdog-enabled':"
18891708Sstevel " %s (%d)", (*val != 0) ? "enabled" : "disabled",
18901708Sstevel *val));
18911708Sstevel break;
18921708Sstevel
18931708Sstevel case LW8_WDT_PROP_TO:
18941708Sstevel *val = (uint8_t)get_wdt.timeout;
18951708Sstevel NTWDT_DBG(WDT_DBG_PROT, ("MBOX_GET of 'wdog-timeout':"
18961708Sstevel " %d seconds", *val));
18971708Sstevel break;
18981708Sstevel
18991708Sstevel default:
19001708Sstevel ASSERT(0);
19011708Sstevel _NOTE(NOTREACHED)
19021708Sstevel }
19031708Sstevel }
19041708Sstevel
19051708Sstevel return (rv);
19061708Sstevel }
19071708Sstevel #endif
19081708Sstevel
19091708Sstevel /*
19101708Sstevel * Update the real system "heartbeat", which resides in IOSRAM.
19111708Sstevel * This "heartbeat" is normally used in SWDT Mode, but when
19121708Sstevel * in AWDT Mode, ScApp also uses its value to determine if Solaris
19131708Sstevel * is up-and-running.
19141708Sstevel */
19151708Sstevel static void
ntwdt_pat_hw_watchdog()19161708Sstevel ntwdt_pat_hw_watchdog()
19171708Sstevel {
19181708Sstevel tod_iosram_t tod_buf;
19191708Sstevel static uint32_t i_am_alive = 0;
19201708Sstevel #ifdef DEBUG
19211708Sstevel if (ntwdt_stop_heart != 0)
19221708Sstevel return;
19231708Sstevel #endif
19241708Sstevel /* Update the system heartbeat */
19251708Sstevel if (i_am_alive == UINT32_MAX)
19261708Sstevel i_am_alive = 0;
19271708Sstevel else
19281708Sstevel i_am_alive++;
19291708Sstevel
19301708Sstevel NTWDT_DBG(WDT_DBG_HEART, ("update heartbeat: %d",
19311708Sstevel i_am_alive));
19321708Sstevel
19331708Sstevel if (iosram_write(SBBC_TOD_KEY, OFFSET(tod_buf, tod_i_am_alive),
19347799SRichard.Bean@Sun.COM (char *)&i_am_alive, sizeof (uint32_t))) {
19351708Sstevel cmn_err(CE_WARN, "ntwdt_pat_hw_watchdog(): "
19361708Sstevel "write heartbeat failed");
19371708Sstevel }
19381708Sstevel }
19391708Sstevel
19401708Sstevel /*
19411708Sstevel * Write the specified value to the system's normal (IOSRAM)
19421708Sstevel * location that's used to specify Solaris' watchdog-timeout
19431708Sstevel * on Serengeti platforms.
19441708Sstevel *
19451708Sstevel * In SWDT Mode, this location can hold values [0,n).
19461708Sstevel * In AWDT Mode, this location must have value 0 (else
19471708Sstevel * after a ScApp-reboot, ScApp could mistakenly interpret
19481708Sstevel * that the system is in SWDT Mode).
19491708Sstevel */
19501708Sstevel static int
ntwdt_set_hw_timeout(uint32_t period)19511708Sstevel ntwdt_set_hw_timeout(uint32_t period)
19521708Sstevel {
19531708Sstevel tod_iosram_t tod_buf;
19541708Sstevel int rv;
19551708Sstevel
19561708Sstevel rv = iosram_write(SBBC_TOD_KEY, OFFSET(tod_buf, tod_timeout_period),
19571708Sstevel (char *)&period, sizeof (uint32_t));
19581708Sstevel if (rv != 0)
19591708Sstevel cmn_err(CE_WARN, "write of %d for TOD timeout "
19601708Sstevel "period failed: %d", period, rv);
19611708Sstevel
19621708Sstevel return (rv);
19631708Sstevel }
19641708Sstevel
19651708Sstevel /*
19661708Sstevel * Soft-interrupt handler that is triggered when ScApp wants
19671708Sstevel * to know the current state of the app-wdog.
19681708Sstevel *
19691708Sstevel * Grab ntwdt_wdog_mutex so that we synchronize with any
19701708Sstevel * concurrent User Context and Interrupt Context activity. Call
19711708Sstevel * a function that writes a permutation of the watchdog state
19721708Sstevel * to the SC, then release the mutex.
19731708Sstevel *
19741708Sstevel * We grab the mutex not only so that each variable is consistent
19751708Sstevel * but also so that the *permutation* of variables is consistent.
19761708Sstevel * I.e., any set of one or more variables (that we write to SC
19771708Sstevel * using multiple mailbox commands) will truly be seen as a
19781708Sstevel * consistent snapshot. Note that if our protocol had a MBOX_SET
19791708Sstevel * command that allowed writing all watchdog state in one
19801708Sstevel * command, then the lock-hold latency would be greatly reduced.
19811708Sstevel * To our advantage, this softint normally executes very
19821708Sstevel * infrequently.
19831708Sstevel *
19841708Sstevel * Context:
19851708Sstevel * called at Interrupt Context (DDI_SOFTINT_LOW)
19861708Sstevel */
19871708Sstevel static uint_t
ntwdt_mbox_softint(char * arg)19881708Sstevel ntwdt_mbox_softint(char *arg)
19891708Sstevel {
19901708Sstevel ntwdt_wdog_t *wdog_state;
19911708Sstevel
19921708Sstevel wdog_state = ((ntwdt_state_t *)arg)->ntwdt_wdog_state;
19931708Sstevel
19941708Sstevel ASSERT(wdog_state != NULL);
19951708Sstevel
19961708Sstevel mutex_enter(&wdog_state->ntwdt_wdog_mutex);
19971708Sstevel
19981708Sstevel /* tell ScApp state of AWDT */
1999*11311SSurya.Prakki@Sun.COM (void) ntwdt_set_awdt_state(wdog_state);
20001708Sstevel
20011708Sstevel mutex_exit(&wdog_state->ntwdt_wdog_mutex);
20021708Sstevel
20031708Sstevel return (DDI_INTR_CLAIMED);
20041708Sstevel }
20051708Sstevel
20061708Sstevel /*
20071708Sstevel * Handle MBOX_EVENT_LW8 Events that are sent from ScApp.
20081708Sstevel *
20091708Sstevel * The only (sub-)type of Event we handle is the
20101708Sstevel * LW8_EVENT_SC_RESTARTED Event. We handle this by triggering
20111708Sstevel * a soft-interrupt only if we are in AWDT mode.
20121708Sstevel *
20131708Sstevel * ScApp sends this Event when it wants to learn the current
20141708Sstevel * state of the AWDT variables. Design-wise, this is used to
20151708Sstevel * handle the case where the SC reboots while the system is in
20161708Sstevel * AWDT mode (if the SC reboots in SWDT mode, then ScApp
20171708Sstevel * already knows all necessary info and therefore won't send
20181708Sstevel * this Event).
20191708Sstevel *
20201708Sstevel * Context:
20211708Sstevel * function is called in Interrupt Context (at DDI_SOFTINT_MED)
20221708Sstevel * and we conditionally trigger a softint that will run at
20231708Sstevel * DDI_SOFTINT_LOW. Note that function executes at
20241708Sstevel * DDI_SOFTINT_MED due to how this handler was registered by
20251708Sstevel * the implementation of sbbc_mbox_reg_intr().
20261708Sstevel *
20271708Sstevel * Notes:
20281708Sstevel * Currently, the LW8_EVENT_SC_RESTARTED Event is only sent
20291708Sstevel * by SC when in AWDT mode.
20301708Sstevel */
20311708Sstevel static uint_t
ntwdt_event_data_handler(char * arg)20321708Sstevel ntwdt_event_data_handler(char *arg)
20331708Sstevel {
20341708Sstevel lw8_event_t *payload;
20351708Sstevel sbbc_msg_t *msg;
20361708Sstevel
20371708Sstevel if (arg == NULL) {
20381708Sstevel return (DDI_INTR_CLAIMED);
20391708Sstevel }
20401708Sstevel
20411708Sstevel msg = (sbbc_msg_t *)arg;
20421708Sstevel if (msg->msg_buf == NULL) {
20431708Sstevel return (DDI_INTR_CLAIMED);
20441708Sstevel }
20451708Sstevel
20461708Sstevel payload = (lw8_event_t *)msg->msg_buf;
20471708Sstevel
20481708Sstevel switch (payload->event_type) {
20491708Sstevel case LW8_EVENT_SC_RESTARTED:
20501708Sstevel /*
20511708Sstevel * then SC probably was rebooted, and it therefore
20521708Sstevel * needs to know what the current state of AWDT is.
20531708Sstevel */
20541708Sstevel NTWDT_DBG(WDT_DBG_EVENT, ("LW8_EVENT_SC_RESTARTED "
20551708Sstevel "received in %s mode",
20561708Sstevel (ntwdt_watchdog_activated != 0) ? "AWDT" : "SWDT"));
20571708Sstevel
20581708Sstevel if (ntwdt_watchdog_activated != 0) {
20591708Sstevel /* then system is in AWDT mode */
20601708Sstevel ddi_trigger_softintr(ntwdt_mbox_softint_id);
20611708Sstevel }
20621708Sstevel break;
20631708Sstevel
20641708Sstevel default:
20651708Sstevel NTWDT_DBG(WDT_DBG_EVENT,
20661708Sstevel ("MBOX_EVENT_LW8: %d", payload->event_type));
20671708Sstevel break;
20681708Sstevel }
20691708Sstevel
20701708Sstevel return (DDI_INTR_CLAIMED);
20711708Sstevel }
20721708Sstevel
20731708Sstevel /*
20741708Sstevel * Send an SBBC Mailbox command to ScApp.
20751708Sstevel *
20761708Sstevel * Use the sbbc_mbox_request_response utility function to
20771708Sstevel * send the Request and receive the optional Response.
20781708Sstevel *
20791708Sstevel * Context:
20801708Sstevel * can be called from Interrupt Context or User Context.
20811708Sstevel */
20821708Sstevel static int
ntwdt_lomcmd(int cmd,intptr_t arg)20831708Sstevel ntwdt_lomcmd(int cmd, intptr_t arg)
20841708Sstevel {
20851708Sstevel sbbc_msg_t request;
20861708Sstevel sbbc_msg_t *reqp;
20871708Sstevel sbbc_msg_t response;
20881708Sstevel sbbc_msg_t *resp;
20891708Sstevel int rv = 0;
20901708Sstevel
20911708Sstevel reqp = &request;
20921708Sstevel bzero((caddr_t)&request, sizeof (request));
20931708Sstevel reqp->msg_type.type = LW8_MBOX;
20941708Sstevel reqp->msg_type.sub_type = (uint16_t)cmd;
20951708Sstevel
20961708Sstevel resp = &response;
20971708Sstevel bzero((caddr_t)&response, sizeof (response));
20981708Sstevel resp->msg_type.type = LW8_MBOX;
20991708Sstevel resp->msg_type.sub_type = (uint16_t)cmd;
21001708Sstevel
21011708Sstevel switch (cmd) {
21021708Sstevel case LW8_MBOX_WDT_GET:
21031708Sstevel reqp->msg_len = 0;
21041708Sstevel reqp->msg_buf = (caddr_t)NULL;
21051708Sstevel resp->msg_len = sizeof (lw8_get_wdt_t);
21061708Sstevel resp->msg_buf = (caddr_t)arg;
21071708Sstevel break;
21081708Sstevel
21091708Sstevel case LW8_MBOX_WDT_SET:
21101708Sstevel reqp->msg_len = sizeof (lw8_set_wdt_t);
21111708Sstevel reqp->msg_buf = (caddr_t)arg;
21121708Sstevel resp->msg_len = 0;
21131708Sstevel resp->msg_buf = (caddr_t)NULL;
21141708Sstevel break;
21151708Sstevel
21161708Sstevel default:
21171708Sstevel return (EINVAL);
21181708Sstevel }
21191708Sstevel
21201708Sstevel rv = sbbc_mbox_request_response(reqp, resp,
21217799SRichard.Bean@Sun.COM LW8_DEFAULT_MAX_MBOX_WAIT_TIME);
21221708Sstevel
21231708Sstevel if ((rv) || (resp->msg_status != SG_MBOX_STATUS_SUCCESS)) {
21241708Sstevel
21251708Sstevel NTWDT_NDBG(WDT_DBG_PROT, ("SBBC mailbox error:"
21261708Sstevel " (rv/msg_status)=(%d/%d)", rv, resp->msg_status));
21271708Sstevel
21281708Sstevel /* errors from sgsbbc */
21291708Sstevel if (resp->msg_status > 0) {
21301708Sstevel return (resp->msg_status);
21311708Sstevel }
21321708Sstevel
21331708Sstevel /* errors from ScApp */
21341708Sstevel switch (resp->msg_status) {
21351708Sstevel case SG_MBOX_STATUS_ILLEGAL_PARAMETER:
21361708Sstevel /* illegal ioctl parameter */
21371708Sstevel return (EINVAL);
21381708Sstevel
21391708Sstevel default:
21401708Sstevel return (EIO);
21411708Sstevel }
21421708Sstevel }
21431708Sstevel return (0);
21441708Sstevel }
2145