1*730b8a0aSriastradh /* $NetBSD: kern_heartbeat.c,v 1.14 2024/08/25 01:14:01 riastradh Exp $ */ 212861a66Sriastradh 312861a66Sriastradh /*- 412861a66Sriastradh * Copyright (c) 2023 The NetBSD Foundation, Inc. 512861a66Sriastradh * All rights reserved. 612861a66Sriastradh * 712861a66Sriastradh * Redistribution and use in source and binary forms, with or without 812861a66Sriastradh * modification, are permitted provided that the following conditions 912861a66Sriastradh * are met: 1012861a66Sriastradh * 1. Redistributions of source code must retain the above copyright 1112861a66Sriastradh * notice, this list of conditions and the following disclaimer. 1212861a66Sriastradh * 2. Redistributions in binary form must reproduce the above copyright 1312861a66Sriastradh * notice, this list of conditions and the following disclaimer in the 1412861a66Sriastradh * documentation and/or other materials provided with the distribution. 1512861a66Sriastradh * 1612861a66Sriastradh * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 1712861a66Sriastradh * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 1812861a66Sriastradh * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 1912861a66Sriastradh * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 2012861a66Sriastradh * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 2112861a66Sriastradh * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 2212861a66Sriastradh * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 2312861a66Sriastradh * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 2412861a66Sriastradh * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 2512861a66Sriastradh * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 2612861a66Sriastradh * POSSIBILITY OF SUCH DAMAGE. 2712861a66Sriastradh */ 2812861a66Sriastradh 2912861a66Sriastradh /* 3012861a66Sriastradh * heartbeat(9) -- periodic checks to ensure CPUs are making progress 3112861a66Sriastradh * 3212861a66Sriastradh * Manual tests to run when changing this file. Magic numbers are for 3312861a66Sriastradh * evbarm; adjust for other platforms. Tests involving cpuctl 3412861a66Sriastradh * online/offline assume a 2-CPU system -- for full testing on a >2-CPU 3512861a66Sriastradh * system, offline all but one CPU. 3612861a66Sriastradh * 3712861a66Sriastradh * 1. cpuctl offline 0 3812861a66Sriastradh * sleep 20 3912861a66Sriastradh * cpuctl online 0 4012861a66Sriastradh * 4112861a66Sriastradh * 2. cpuctl offline 1 4212861a66Sriastradh * sleep 20 4312861a66Sriastradh * cpuctl online 1 4412861a66Sriastradh * 4512861a66Sriastradh * 3. cpuctl offline 0 4612861a66Sriastradh * sysctl -w kern.heartbeat.max_period=5 4712861a66Sriastradh * sleep 10 4812861a66Sriastradh * sysctl -w kern.heartbeat.max_period=0 4912861a66Sriastradh * sleep 10 5012861a66Sriastradh * sysctl -w kern.heartbeat.max_period=5 5112861a66Sriastradh * sleep 10 5212861a66Sriastradh * cpuctl online 0 5312861a66Sriastradh * 5412861a66Sriastradh * 4. sysctl -w debug.crashme_enable=1 5512861a66Sriastradh * sysctl -w debug.crashme.spl_spinout=1 # IPL_SOFTCLOCK 5611062fecSriastradh * # verify system panics after 15sec, with a stack trace through 5711062fecSriastradh * # crashme_spl_spinout 5812861a66Sriastradh * 5912861a66Sriastradh * 5. sysctl -w debug.crashme_enable=1 6012861a66Sriastradh * sysctl -w debug.crashme.spl_spinout=6 # IPL_SCHED 6111062fecSriastradh * # verify system panics after 15sec, with a stack trace through 6211062fecSriastradh * # crashme_spl_spinout 6312861a66Sriastradh * 6412861a66Sriastradh * 6. cpuctl offline 0 6512861a66Sriastradh * sysctl -w debug.crashme_enable=1 6612861a66Sriastradh * sysctl -w debug.crashme.spl_spinout=1 # IPL_SOFTCLOCK 6711062fecSriastradh * # verify system panics after 15sec, with a stack trace through 6811062fecSriastradh * # crashme_spl_spinout 6912861a66Sriastradh * 7012861a66Sriastradh * 7. cpuctl offline 0 7112861a66Sriastradh * sysctl -w debug.crashme_enable=1 7212861a66Sriastradh * sysctl -w debug.crashme.spl_spinout=5 # IPL_VM 7311062fecSriastradh * # verify system panics after 15sec, with a stack trace through 7411062fecSriastradh * # crashme_spl_spinout 7512861a66Sriastradh * 7612861a66Sriastradh * # Not this -- IPL_SCHED and IPL_HIGH spinout on a single CPU 7712861a66Sriastradh * # require a hardware watchdog timer. 7812861a66Sriastradh * #cpuctl offline 0 7912861a66Sriastradh * #sysctl -w debug.crashme_enable 8012861a66Sriastradh * #sysctl -w debug.crashme.spl_spinout=6 # IPL_SCHED 8112861a66Sriastradh * # hope watchdog timer kicks in 8212861a66Sriastradh */ 8312861a66Sriastradh 8412861a66Sriastradh #include <sys/cdefs.h> 85*730b8a0aSriastradh __KERNEL_RCSID(0, "$NetBSD: kern_heartbeat.c,v 1.14 2024/08/25 01:14:01 riastradh Exp $"); 8612861a66Sriastradh 8712861a66Sriastradh #ifdef _KERNEL_OPT 8812861a66Sriastradh #include "opt_ddb.h" 8912861a66Sriastradh #include "opt_heartbeat.h" 9012861a66Sriastradh #endif 9112861a66Sriastradh 9212861a66Sriastradh #include "heartbeat.h" 9312861a66Sriastradh 9412861a66Sriastradh #include <sys/param.h> 9512861a66Sriastradh #include <sys/types.h> 9612861a66Sriastradh 9712861a66Sriastradh #include <sys/atomic.h> 9812861a66Sriastradh #include <sys/cpu.h> 9912861a66Sriastradh #include <sys/errno.h> 10012861a66Sriastradh #include <sys/heartbeat.h> 10112861a66Sriastradh #include <sys/ipi.h> 102fc3ac8aeSriastradh #include <sys/kernel.h> 10312861a66Sriastradh #include <sys/mutex.h> 10412861a66Sriastradh #include <sys/sysctl.h> 10512861a66Sriastradh #include <sys/systm.h> 10612861a66Sriastradh #include <sys/xcall.h> 10712861a66Sriastradh 10812861a66Sriastradh #ifdef DDB 10912861a66Sriastradh #include <ddb/ddb.h> 11012861a66Sriastradh #endif 11112861a66Sriastradh 11212861a66Sriastradh /* 11312861a66Sriastradh * Global state. 11412861a66Sriastradh * 11512861a66Sriastradh * heartbeat_lock serializes access to heartbeat_max_period_secs 11612861a66Sriastradh * and heartbeat_max_period_ticks. Two separate variables so we 11712861a66Sriastradh * can avoid multiplication or division in the heartbeat routine. 11812861a66Sriastradh * 11912861a66Sriastradh * heartbeat_sih is stable after initialization in 12012861a66Sriastradh * heartbeat_start. 12112861a66Sriastradh */ 12212861a66Sriastradh kmutex_t heartbeat_lock __cacheline_aligned; 12312861a66Sriastradh unsigned heartbeat_max_period_secs __read_mostly; 12412861a66Sriastradh unsigned heartbeat_max_period_ticks __read_mostly; 12512861a66Sriastradh 12612861a66Sriastradh void *heartbeat_sih __read_mostly; 12712861a66Sriastradh 12812861a66Sriastradh /* 12912861a66Sriastradh * heartbeat_suspend() 13012861a66Sriastradh * 13112861a66Sriastradh * Suspend heartbeat monitoring of the current CPU. 13212861a66Sriastradh * 13312861a66Sriastradh * Called after the current CPU has been marked offline but before 134572220daSriastradh * it has stopped running, or after IPL has been raised for 13569b2327dSriastradh * polling-mode console input. Nestable (but only 2^32 times, so 13669b2327dSriastradh * don't do this in a loop). Reversed by heartbeat_resume. 137d6d81014Sriastradh * 138d6d81014Sriastradh * Caller must be bound to the CPU, i.e., curcpu_stable() must be 139d6d81014Sriastradh * true. This function does not assert curcpu_stable() since it 140d6d81014Sriastradh * is used in the ddb entry path, where any assertions risk 141d6d81014Sriastradh * infinite regress into undebuggable chaos, so callers must be 142d6d81014Sriastradh * careful. 14312861a66Sriastradh */ 14412861a66Sriastradh void 14512861a66Sriastradh heartbeat_suspend(void) 14612861a66Sriastradh { 1475c3232dbSriastradh unsigned *p; 14812861a66Sriastradh 1495c3232dbSriastradh p = &curcpu()->ci_heartbeat_suspend; 1505c3232dbSriastradh atomic_store_relaxed(p, *p + 1); 15112861a66Sriastradh } 15212861a66Sriastradh 15312861a66Sriastradh /* 154fc3ac8aeSriastradh * heartbeat_resume_cpu(ci) 155fc3ac8aeSriastradh * 156fc3ac8aeSriastradh * Resume heartbeat monitoring of ci. 157fc3ac8aeSriastradh * 158fc3ac8aeSriastradh * Called at startup while cold, and whenever heartbeat monitoring 159fc3ac8aeSriastradh * is re-enabled after being disabled or the period is changed. 160fc3ac8aeSriastradh * When not cold, ci must be the current CPU. 161572220daSriastradh * 162572220daSriastradh * Must be run at splsched. 163fc3ac8aeSriastradh */ 164fc3ac8aeSriastradh static void 165fc3ac8aeSriastradh heartbeat_resume_cpu(struct cpu_info *ci) 166fc3ac8aeSriastradh { 167fc3ac8aeSriastradh 168fc3ac8aeSriastradh KASSERT(__predict_false(cold) || curcpu_stable()); 169fc3ac8aeSriastradh KASSERT(__predict_false(cold) || ci == curcpu()); 170572220daSriastradh /* XXX KASSERT IPL_SCHED */ 171fc3ac8aeSriastradh 172fc3ac8aeSriastradh ci->ci_heartbeat_count = 0; 173*730b8a0aSriastradh ci->ci_heartbeat_uptime_cache = time_uptime32; 174fc3ac8aeSriastradh ci->ci_heartbeat_uptime_stamp = 0; 175fc3ac8aeSriastradh } 176fc3ac8aeSriastradh 177fc3ac8aeSriastradh /* 17812861a66Sriastradh * heartbeat_resume() 17912861a66Sriastradh * 18012861a66Sriastradh * Resume heartbeat monitoring of the current CPU. 18112861a66Sriastradh * 18212861a66Sriastradh * Called after the current CPU has started running but before it 183572220daSriastradh * has been marked online, or when ending polling-mode input 1845c3232dbSriastradh * before IPL is restored. Reverses heartbeat_suspend. 185d6d81014Sriastradh * 186d6d81014Sriastradh * Caller must be bound to the CPU, i.e., curcpu_stable() must be 187d6d81014Sriastradh * true. 18812861a66Sriastradh */ 18912861a66Sriastradh void 19012861a66Sriastradh heartbeat_resume(void) 19112861a66Sriastradh { 19212861a66Sriastradh struct cpu_info *ci = curcpu(); 1935c3232dbSriastradh unsigned *p; 19412861a66Sriastradh int s; 19512861a66Sriastradh 196d6d81014Sriastradh KASSERT(curcpu_stable()); 197d6d81014Sriastradh 19812861a66Sriastradh /* 1995c3232dbSriastradh * Reset the state so nobody spuriously thinks we had a heart 2005c3232dbSriastradh * attack as soon as the heartbeat checks resume. 20112861a66Sriastradh */ 20212861a66Sriastradh s = splsched(); 203fc3ac8aeSriastradh heartbeat_resume_cpu(ci); 20412861a66Sriastradh splx(s); 2055c3232dbSriastradh 2065c3232dbSriastradh p = &ci->ci_heartbeat_suspend; 2075c3232dbSriastradh atomic_store_relaxed(p, *p - 1); 20812861a66Sriastradh } 20912861a66Sriastradh 21012861a66Sriastradh /* 21195d8ae3cSriastradh * heartbeat_timecounter_suspended() 21295d8ae3cSriastradh * 21395d8ae3cSriastradh * True if timecounter heartbeat checks are suspended because the 21495d8ae3cSriastradh * timecounter may not be advancing, false if heartbeat checks 21595d8ae3cSriastradh * should check for timecounter progress. 21695d8ae3cSriastradh */ 21795d8ae3cSriastradh static bool 21895d8ae3cSriastradh heartbeat_timecounter_suspended(void) 21995d8ae3cSriastradh { 22095d8ae3cSriastradh CPU_INFO_ITERATOR cii; 22195d8ae3cSriastradh struct cpu_info *ci; 22295d8ae3cSriastradh 22395d8ae3cSriastradh /* 22495d8ae3cSriastradh * The timecounter ticks only on the primary CPU. Check 22595d8ae3cSriastradh * whether it's suspended. 22695d8ae3cSriastradh * 22795d8ae3cSriastradh * XXX Would be nice if we could find the primary CPU without 22895d8ae3cSriastradh * iterating over all CPUs. 22995d8ae3cSriastradh */ 23095d8ae3cSriastradh for (CPU_INFO_FOREACH(cii, ci)) { 2315c3232dbSriastradh if (CPU_IS_PRIMARY(ci)) 2325c3232dbSriastradh return atomic_load_relaxed(&ci->ci_heartbeat_suspend); 23395d8ae3cSriastradh } 23495d8ae3cSriastradh 23595d8ae3cSriastradh /* 23695d8ae3cSriastradh * This should be unreachable -- there had better be a primary 23795d8ae3cSriastradh * CPU in the system! If not, the timecounter will be busted 23895d8ae3cSriastradh * anyway. 23995d8ae3cSriastradh */ 24095d8ae3cSriastradh panic("no primary CPU"); 24195d8ae3cSriastradh } 24295d8ae3cSriastradh 24395d8ae3cSriastradh /* 24412861a66Sriastradh * heartbeat_reset_xc(a, b) 24512861a66Sriastradh * 24612861a66Sriastradh * Cross-call handler to reset heartbeat state just prior to 24712861a66Sriastradh * enabling heartbeat checks. 24812861a66Sriastradh */ 24912861a66Sriastradh static void 25012861a66Sriastradh heartbeat_reset_xc(void *a, void *b) 25112861a66Sriastradh { 252572220daSriastradh int s; 25312861a66Sriastradh 254572220daSriastradh s = splsched(); 255572220daSriastradh heartbeat_resume_cpu(curcpu()); 256572220daSriastradh splx(s); 25712861a66Sriastradh } 25812861a66Sriastradh 25912861a66Sriastradh /* 26012861a66Sriastradh * set_max_period(max_period) 26112861a66Sriastradh * 26212861a66Sriastradh * Set the maximum period, in seconds, for heartbeat checks. 26312861a66Sriastradh * 26412861a66Sriastradh * - If max_period is zero, disable them. 26512861a66Sriastradh * 26612861a66Sriastradh * - If the max period was zero and max_period is nonzero, ensure 26712861a66Sriastradh * all CPUs' heartbeat uptime caches are up-to-date before 26812861a66Sriastradh * re-enabling them. 26912861a66Sriastradh * 27012861a66Sriastradh * max_period must be below UINT_MAX/4/hz to avoid arithmetic 27112861a66Sriastradh * overflow and give room for slop. 27212861a66Sriastradh * 27312861a66Sriastradh * Caller must hold heartbeat_lock. 27412861a66Sriastradh */ 27512861a66Sriastradh static void 27612861a66Sriastradh set_max_period(unsigned max_period) 27712861a66Sriastradh { 27812861a66Sriastradh 27912861a66Sriastradh KASSERTMSG(max_period <= UINT_MAX/4/hz, 28012861a66Sriastradh "max_period=%u must not exceed UINT_MAX/4/hz=%u (hz=%u)", 28112861a66Sriastradh max_period, UINT_MAX/4/hz, hz); 28212861a66Sriastradh KASSERT(mutex_owned(&heartbeat_lock)); 28312861a66Sriastradh 28412861a66Sriastradh /* 28512861a66Sriastradh * If we're enabling heartbeat checks, make sure we have a 286*730b8a0aSriastradh * reasonably up-to-date time_uptime32 cache on all CPUs so we 28712861a66Sriastradh * don't think we had an instant heart attack. 28812861a66Sriastradh */ 289fc3ac8aeSriastradh if (heartbeat_max_period_secs == 0 && max_period != 0) { 290fc3ac8aeSriastradh if (cold) { 291fc3ac8aeSriastradh CPU_INFO_ITERATOR cii; 292fc3ac8aeSriastradh struct cpu_info *ci; 293fc3ac8aeSriastradh 294fc3ac8aeSriastradh for (CPU_INFO_FOREACH(cii, ci)) 295fc3ac8aeSriastradh heartbeat_resume_cpu(ci); 296fc3ac8aeSriastradh } else { 297fc3ac8aeSriastradh const uint64_t ticket = 298fc3ac8aeSriastradh xc_broadcast(0, &heartbeat_reset_xc, NULL, NULL); 299fc3ac8aeSriastradh xc_wait(ticket); 300fc3ac8aeSriastradh } 301fc3ac8aeSriastradh } 30212861a66Sriastradh 30312861a66Sriastradh /* 30412861a66Sriastradh * Once the heartbeat state has been updated on all (online) 30512861a66Sriastradh * CPUs, set the period. At this point, heartbeat checks can 30612861a66Sriastradh * begin. 30712861a66Sriastradh */ 30812861a66Sriastradh atomic_store_relaxed(&heartbeat_max_period_secs, max_period); 30912861a66Sriastradh atomic_store_relaxed(&heartbeat_max_period_ticks, max_period*hz); 31012861a66Sriastradh } 31112861a66Sriastradh 31212861a66Sriastradh /* 31312861a66Sriastradh * heartbeat_max_period_ticks(SYSCTLFN_ARGS) 31412861a66Sriastradh * 31512861a66Sriastradh * Sysctl handler for sysctl kern.heartbeat.max_period. Verifies 31612861a66Sriastradh * it lies within a reasonable interval and sets it. 31712861a66Sriastradh */ 31812861a66Sriastradh static int 31912861a66Sriastradh heartbeat_max_period_sysctl(SYSCTLFN_ARGS) 32012861a66Sriastradh { 32112861a66Sriastradh struct sysctlnode node; 32212861a66Sriastradh unsigned max_period; 32312861a66Sriastradh int error; 32412861a66Sriastradh 32512861a66Sriastradh mutex_enter(&heartbeat_lock); 32612861a66Sriastradh 32712861a66Sriastradh max_period = heartbeat_max_period_secs; 32812861a66Sriastradh node = *rnode; 32912861a66Sriastradh node.sysctl_data = &max_period; 33012861a66Sriastradh error = sysctl_lookup(SYSCTLFN_CALL(&node)); 33112861a66Sriastradh if (error || newp == NULL) 33212861a66Sriastradh goto out; 33312861a66Sriastradh 33412861a66Sriastradh /* 33512861a66Sriastradh * Ensure there's plenty of slop between heartbeats. 33612861a66Sriastradh */ 33712861a66Sriastradh if (max_period > UINT_MAX/4/hz) { 33812861a66Sriastradh error = EOVERFLOW; 33912861a66Sriastradh goto out; 34012861a66Sriastradh } 34112861a66Sriastradh 34212861a66Sriastradh /* 34312861a66Sriastradh * Success! Set the period. This enables heartbeat checks if 34412861a66Sriastradh * we went from zero period to nonzero period, or disables them 34512861a66Sriastradh * if the other way around. 34612861a66Sriastradh */ 34712861a66Sriastradh set_max_period(max_period); 34812861a66Sriastradh error = 0; 34912861a66Sriastradh 35012861a66Sriastradh out: mutex_exit(&heartbeat_lock); 35112861a66Sriastradh return error; 35212861a66Sriastradh } 35312861a66Sriastradh 35412861a66Sriastradh /* 35512861a66Sriastradh * sysctl_heartbeat_setup() 35612861a66Sriastradh * 35712861a66Sriastradh * Set up the kern.heartbeat.* sysctl subtree. 35812861a66Sriastradh */ 35912861a66Sriastradh SYSCTL_SETUP(sysctl_heartbeat_setup, "sysctl kern.heartbeat setup") 36012861a66Sriastradh { 36112861a66Sriastradh const struct sysctlnode *rnode; 36212861a66Sriastradh int error; 36312861a66Sriastradh 36412861a66Sriastradh mutex_init(&heartbeat_lock, MUTEX_DEFAULT, IPL_NONE); 36512861a66Sriastradh 36612861a66Sriastradh /* kern.heartbeat */ 36712861a66Sriastradh error = sysctl_createv(NULL, 0, NULL, &rnode, 36812861a66Sriastradh CTLFLAG_PERMANENT, 36912861a66Sriastradh CTLTYPE_NODE, "heartbeat", 37012861a66Sriastradh SYSCTL_DESCR("Kernel heartbeat parameters"), 37112861a66Sriastradh NULL, 0, NULL, 0, 37212861a66Sriastradh CTL_KERN, CTL_CREATE, CTL_EOL); 37312861a66Sriastradh if (error) { 37412861a66Sriastradh printf("%s: failed to create kern.heartbeat: %d\n", 37512861a66Sriastradh __func__, error); 37612861a66Sriastradh return; 37712861a66Sriastradh } 37812861a66Sriastradh 37912861a66Sriastradh /* kern.heartbeat.max_period */ 38012861a66Sriastradh error = sysctl_createv(NULL, 0, &rnode, NULL, 38112861a66Sriastradh CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 38212861a66Sriastradh CTLTYPE_INT, "max_period", 38312861a66Sriastradh SYSCTL_DESCR("Max seconds between heartbeats before panic"), 38412861a66Sriastradh &heartbeat_max_period_sysctl, 0, NULL, 0, 38512861a66Sriastradh CTL_CREATE, CTL_EOL); 38612861a66Sriastradh if (error) { 38712861a66Sriastradh printf("%s: failed to create kern.heartbeat.max_period: %d\n", 38812861a66Sriastradh __func__, error); 38912861a66Sriastradh return; 39012861a66Sriastradh } 39112861a66Sriastradh } 39212861a66Sriastradh 39312861a66Sriastradh /* 39412861a66Sriastradh * heartbeat_intr(cookie) 39512861a66Sriastradh * 39612861a66Sriastradh * Soft interrupt handler to update the local CPU's view of the 39712861a66Sriastradh * system uptime. This runs at the same priority level as 39812861a66Sriastradh * callouts, so if callouts are stuck on this CPU, it won't run, 39912861a66Sriastradh * and eventually another CPU will notice that this one is stuck. 40012861a66Sriastradh * 40112861a66Sriastradh * Don't do spl* here -- keep it to a minimum so if anything goes 40212861a66Sriastradh * wrong we don't end up with hard interrupts blocked and unable 40312861a66Sriastradh * to detect a missed heartbeat. 40412861a66Sriastradh */ 40512861a66Sriastradh static void 40612861a66Sriastradh heartbeat_intr(void *cookie) 40712861a66Sriastradh { 40812861a66Sriastradh unsigned count = atomic_load_relaxed(&curcpu()->ci_heartbeat_count); 409*730b8a0aSriastradh unsigned uptime = time_uptime32; 41012861a66Sriastradh 41112861a66Sriastradh atomic_store_relaxed(&curcpu()->ci_heartbeat_uptime_stamp, count); 41212861a66Sriastradh atomic_store_relaxed(&curcpu()->ci_heartbeat_uptime_cache, uptime); 41312861a66Sriastradh } 41412861a66Sriastradh 41512861a66Sriastradh /* 41612861a66Sriastradh * heartbeat_start() 41712861a66Sriastradh * 41812861a66Sriastradh * Start system heartbeat monitoring. 41912861a66Sriastradh */ 42012861a66Sriastradh void 42112861a66Sriastradh heartbeat_start(void) 42212861a66Sriastradh { 423*730b8a0aSriastradh enum { max_period = HEARTBEAT_MAX_PERIOD_DEFAULT }; 424*730b8a0aSriastradh 425*730b8a0aSriastradh /* 426*730b8a0aSriastradh * Ensure the maximum period is small enough that we never have 427*730b8a0aSriastradh * to worry about 32-bit wraparound even if there's a lot of 428*730b8a0aSriastradh * slop. (In fact this is required to be less than 429*730b8a0aSriastradh * UINT_MAX/4/hz, but that's not a compile-time constant.) 430*730b8a0aSriastradh */ 431*730b8a0aSriastradh __CTASSERT(max_period < UINT_MAX/4); 43212861a66Sriastradh 43312861a66Sriastradh /* 43412861a66Sriastradh * Establish a softint so we can schedule it once ready. This 43512861a66Sriastradh * should be at the lowest softint priority level so that we 43612861a66Sriastradh * ensure all softint priorities are making progress. 43712861a66Sriastradh */ 43812861a66Sriastradh heartbeat_sih = softint_establish(SOFTINT_CLOCK|SOFTINT_MPSAFE, 43912861a66Sriastradh &heartbeat_intr, NULL); 44012861a66Sriastradh 44112861a66Sriastradh /* 44212861a66Sriastradh * Now that the softint is established, kick off heartbeat 44312861a66Sriastradh * monitoring with the default period. This will initialize 444*730b8a0aSriastradh * the per-CPU state to an up-to-date cache of time_uptime32. 44512861a66Sriastradh */ 44612861a66Sriastradh mutex_enter(&heartbeat_lock); 44712861a66Sriastradh set_max_period(max_period); 44812861a66Sriastradh mutex_exit(&heartbeat_lock); 44912861a66Sriastradh } 45012861a66Sriastradh 45112861a66Sriastradh /* 45212861a66Sriastradh * defibrillator(cookie) 45312861a66Sriastradh * 45412861a66Sriastradh * IPI handler for defibrillation. If the CPU's heart has stopped 45512861a66Sriastradh * beating normally, but the CPU can still execute things, 45612861a66Sriastradh * acknowledge the IPI to the doctor and then panic so we at least 45712861a66Sriastradh * get a stack trace from whatever the current CPU is stuck doing, 45812861a66Sriastradh * if not a core dump. 45912861a66Sriastradh * 46012861a66Sriastradh * (This metaphor is a little stretched, since defibrillation is 46112861a66Sriastradh * usually administered when the heart is beating errattically but 46212861a66Sriastradh * hasn't stopped, and causes the heart to stop temporarily, and 46312861a66Sriastradh * one hopes it is not fatal. But we're (software) engineers, so 46412861a66Sriastradh * we can stretch metaphors like silly putty in a blender.) 46512861a66Sriastradh */ 46612861a66Sriastradh static void 46712861a66Sriastradh defibrillator(void *cookie) 46812861a66Sriastradh { 46912861a66Sriastradh bool *ack = cookie; 47012861a66Sriastradh 471c0abd507Sriastradh /* 472c0abd507Sriastradh * Acknowledge the interrupt so the doctor CPU won't trigger a 473c0abd507Sriastradh * new panic for defibrillation timeout. 474c0abd507Sriastradh */ 47512861a66Sriastradh atomic_store_relaxed(ack, true); 476c0abd507Sriastradh 477c0abd507Sriastradh /* 478c0abd507Sriastradh * If a panic is already in progress, we may have interrupted 479c0abd507Sriastradh * the logic that prints a stack trace on this CPU -- so let's 480c0abd507Sriastradh * not make it worse by giving the misapprehension of a 481c0abd507Sriastradh * recursive panic. 482c0abd507Sriastradh */ 483c0abd507Sriastradh if (atomic_load_relaxed(&panicstr) != NULL) 484c0abd507Sriastradh return; 485c0abd507Sriastradh 48612861a66Sriastradh panic("%s[%d %s]: heart stopped beating", cpu_name(curcpu()), 48712861a66Sriastradh curlwp->l_lid, 48812861a66Sriastradh curlwp->l_name ? curlwp->l_name : curproc->p_comm); 48912861a66Sriastradh } 49012861a66Sriastradh 49112861a66Sriastradh /* 49212861a66Sriastradh * defibrillate(ci, unsigned d) 49312861a66Sriastradh * 49412861a66Sriastradh * The patient CPU ci's heart has stopped beating after d seconds. 49512861a66Sriastradh * Force the patient CPU ci to panic, or panic on this CPU if the 49612861a66Sriastradh * patient CPU doesn't respond within 1sec. 49712861a66Sriastradh */ 49812861a66Sriastradh static void __noinline 49912861a66Sriastradh defibrillate(struct cpu_info *ci, unsigned d) 50012861a66Sriastradh { 50112861a66Sriastradh bool ack = false; 50212861a66Sriastradh ipi_msg_t msg = { 50312861a66Sriastradh .func = &defibrillator, 50412861a66Sriastradh .arg = &ack, 50512861a66Sriastradh }; 50612861a66Sriastradh unsigned countdown = 1000; /* 1sec */ 50712861a66Sriastradh 5089401f7d8Sriastradh KASSERT(curcpu_stable()); 50912861a66Sriastradh 51012861a66Sriastradh /* 51112861a66Sriastradh * First notify the console that the patient CPU's heart seems 51212861a66Sriastradh * to have stopped beating. 51312861a66Sriastradh */ 51412861a66Sriastradh printf("%s: found %s heart stopped beating after %u seconds\n", 51512861a66Sriastradh cpu_name(curcpu()), cpu_name(ci), d); 51612861a66Sriastradh 51712861a66Sriastradh /* 51812861a66Sriastradh * Next, give the patient CPU a chance to panic, so we get a 51912861a66Sriastradh * stack trace on that CPU even if we don't get a crash dump. 52012861a66Sriastradh */ 52112861a66Sriastradh ipi_unicast(&msg, ci); 52212861a66Sriastradh 52312861a66Sriastradh /* 52412861a66Sriastradh * Busy-wait up to 1sec for the patient CPU to print a stack 52512861a66Sriastradh * trace and panic. If the patient CPU acknowledges the IPI, 526c0abd507Sriastradh * just give up and stop here -- the system is coming down soon 527c0abd507Sriastradh * and we should avoid getting in the way. 52812861a66Sriastradh */ 52912861a66Sriastradh while (countdown --> 0) { 530c0abd507Sriastradh if (atomic_load_relaxed(&ack)) 53112861a66Sriastradh return; 53212861a66Sriastradh DELAY(1000); /* 1ms */ 53312861a66Sriastradh } 53412861a66Sriastradh 53512861a66Sriastradh /* 53612861a66Sriastradh * The patient CPU failed to acknowledge the panic request. 53712861a66Sriastradh * Panic now; with any luck, we'll get a crash dump. 53812861a66Sriastradh */ 53912861a66Sriastradh panic("%s: found %s heart stopped beating and unresponsive", 54012861a66Sriastradh cpu_name(curcpu()), cpu_name(ci)); 54112861a66Sriastradh } 54212861a66Sriastradh 54312861a66Sriastradh /* 54412861a66Sriastradh * select_patient() 54512861a66Sriastradh * 54612861a66Sriastradh * Select another CPU to check the heartbeat of. Returns NULL if 54712861a66Sriastradh * there are no other online CPUs. Never returns curcpu(). 54812861a66Sriastradh * Caller must have kpreemption disabled. 54912861a66Sriastradh */ 55012861a66Sriastradh static struct cpu_info * 55112861a66Sriastradh select_patient(void) 55212861a66Sriastradh { 55312861a66Sriastradh CPU_INFO_ITERATOR cii; 55412861a66Sriastradh struct cpu_info *first = NULL, *patient = NULL, *ci; 55512861a66Sriastradh bool passedcur = false; 55612861a66Sriastradh 5579401f7d8Sriastradh KASSERT(curcpu_stable()); 55812861a66Sriastradh 55912861a66Sriastradh /* 56012861a66Sriastradh * In the iteration order of all CPUs, find the next online CPU 56112861a66Sriastradh * after curcpu(), or the first online one if curcpu() is last 56212861a66Sriastradh * in the iteration order. 56312861a66Sriastradh */ 56412861a66Sriastradh for (CPU_INFO_FOREACH(cii, ci)) { 5655c3232dbSriastradh if (atomic_load_relaxed(&ci->ci_heartbeat_suspend)) 56612861a66Sriastradh continue; 56712861a66Sriastradh if (passedcur) { 56812861a66Sriastradh /* 56912861a66Sriastradh * (...|curcpu()|ci|...) 57012861a66Sriastradh * 57112861a66Sriastradh * Found the patient right after curcpu(). 57212861a66Sriastradh */ 57312861a66Sriastradh KASSERT(patient != ci); 57412861a66Sriastradh patient = ci; 57512861a66Sriastradh break; 57612861a66Sriastradh } 57712861a66Sriastradh if (ci == curcpu()) { 57812861a66Sriastradh /* 57912861a66Sriastradh * (...|prev|ci=curcpu()|next|...) 58012861a66Sriastradh * 58112861a66Sriastradh * Note that we want next (or first, if there's 58212861a66Sriastradh * nothing after curcpu()). 58312861a66Sriastradh */ 58412861a66Sriastradh passedcur = true; 58512861a66Sriastradh continue; 58612861a66Sriastradh } 58712861a66Sriastradh if (first == NULL) { 58812861a66Sriastradh /* 58912861a66Sriastradh * (ci|...|curcpu()|...) 59012861a66Sriastradh * 59112861a66Sriastradh * Record ci as first in case there's nothing 59212861a66Sriastradh * after curcpu(). 59312861a66Sriastradh */ 59412861a66Sriastradh first = ci; 59512861a66Sriastradh continue; 59612861a66Sriastradh } 59712861a66Sriastradh } 59812861a66Sriastradh 59912861a66Sriastradh /* 60012861a66Sriastradh * If we hit the end, wrap around to the beginning. 60112861a66Sriastradh */ 60212861a66Sriastradh if (patient == NULL) { 60312861a66Sriastradh KASSERT(passedcur); 60412861a66Sriastradh patient = first; 60512861a66Sriastradh } 60612861a66Sriastradh 60712861a66Sriastradh return patient; 60812861a66Sriastradh } 60912861a66Sriastradh 61012861a66Sriastradh /* 61112861a66Sriastradh * heartbeat() 61212861a66Sriastradh * 61312861a66Sriastradh * 1. Count a heartbeat on the local CPU. 61412861a66Sriastradh * 61512861a66Sriastradh * 2. Panic if the system uptime doesn't seem to have advanced in 61612861a66Sriastradh * a while. 61712861a66Sriastradh * 61812861a66Sriastradh * 3. Panic if the soft interrupt on this CPU hasn't advanced the 61912861a66Sriastradh * local view of the system uptime. 62012861a66Sriastradh * 62112861a66Sriastradh * 4. Schedule the soft interrupt to advance the local view of the 62212861a66Sriastradh * system uptime. 62312861a66Sriastradh * 62412861a66Sriastradh * 5. Select another CPU to check the heartbeat of. 62512861a66Sriastradh * 62612861a66Sriastradh * 6. Panic if the other CPU hasn't advanced its view of the 62712861a66Sriastradh * system uptime in a while. 62812861a66Sriastradh */ 62912861a66Sriastradh void 63012861a66Sriastradh heartbeat(void) 63112861a66Sriastradh { 63212861a66Sriastradh unsigned period_ticks, period_secs; 63312861a66Sriastradh unsigned count, uptime, cache, stamp, d; 63412861a66Sriastradh struct cpu_info *patient; 63512861a66Sriastradh 6369401f7d8Sriastradh KASSERT(curcpu_stable()); 63712861a66Sriastradh 638c4eba877Sriastradh /* 639c4eba877Sriastradh * If heartbeat checks are disabled globally, or if they are 640c4eba877Sriastradh * suspended locally, or if we're already panicking so it's not 641c4eba877Sriastradh * helpful to trigger more panics for more reasons, do nothing. 642c4eba877Sriastradh */ 64312861a66Sriastradh period_ticks = atomic_load_relaxed(&heartbeat_max_period_ticks); 64412861a66Sriastradh period_secs = atomic_load_relaxed(&heartbeat_max_period_secs); 64512861a66Sriastradh if (__predict_false(period_ticks == 0) || 64612861a66Sriastradh __predict_false(period_secs == 0) || 647c4eba877Sriastradh __predict_false(curcpu()->ci_heartbeat_suspend) || 648c4eba877Sriastradh __predict_false(panicstr != NULL)) 64912861a66Sriastradh return; 65012861a66Sriastradh 65112861a66Sriastradh /* 65212861a66Sriastradh * Count a heartbeat on this CPU. 65312861a66Sriastradh */ 65412861a66Sriastradh count = curcpu()->ci_heartbeat_count++; 65512861a66Sriastradh 65612861a66Sriastradh /* 65712861a66Sriastradh * If the uptime hasn't changed, make sure that we haven't 65812861a66Sriastradh * counted too many of our own heartbeats since the uptime last 65912861a66Sriastradh * changed, and stop here -- we only do the cross-CPU work once 66012861a66Sriastradh * per second. 66112861a66Sriastradh */ 662*730b8a0aSriastradh uptime = time_uptime32; 66312861a66Sriastradh cache = atomic_load_relaxed(&curcpu()->ci_heartbeat_uptime_cache); 66412861a66Sriastradh if (__predict_true(cache == uptime)) { 66512861a66Sriastradh /* 66612861a66Sriastradh * Timecounter hasn't advanced by more than a second. 66712861a66Sriastradh * Make sure the timecounter isn't stuck according to 66895d8ae3cSriastradh * our heartbeats -- unless timecounter heartbeats are 66995d8ae3cSriastradh * suspended too. 67012861a66Sriastradh * 67112861a66Sriastradh * Our own heartbeat count can't roll back, and 672*730b8a0aSriastradh * time_uptime32 should be updated before it wraps 67312861a66Sriastradh * around, so d should never go negative; hence no 67412861a66Sriastradh * check for d < UINT_MAX/2. 67512861a66Sriastradh */ 67612861a66Sriastradh stamp = 67712861a66Sriastradh atomic_load_relaxed(&curcpu()->ci_heartbeat_uptime_stamp); 67812861a66Sriastradh d = count - stamp; 67995d8ae3cSriastradh if (__predict_false(d > period_ticks) && 68095d8ae3cSriastradh !heartbeat_timecounter_suspended()) { 68112861a66Sriastradh panic("%s: time has not advanced in %u heartbeats", 68212861a66Sriastradh cpu_name(curcpu()), d); 68312861a66Sriastradh } 68412861a66Sriastradh return; 68512861a66Sriastradh } 68612861a66Sriastradh 68712861a66Sriastradh /* 68812861a66Sriastradh * If the uptime has changed, make sure that it hasn't changed 68912861a66Sriastradh * so much that softints must be stuck on this CPU. Since 690*730b8a0aSriastradh * time_uptime32 is monotonic and our cache of it is updated at 691*730b8a0aSriastradh * most every UINT_MAX/4/hz sec (hence no concern about 692*730b8a0aSriastradh * wraparound even after 68 or 136 years), this can't go 693*730b8a0aSriastradh * negative, hence no check for d < UINT_MAX/2. 69412861a66Sriastradh * 69512861a66Sriastradh * This uses the hard timer interrupt handler on the current 69612861a66Sriastradh * CPU to ensure soft interrupts at all priority levels have 69712861a66Sriastradh * made progress. 69812861a66Sriastradh */ 69912861a66Sriastradh d = uptime - cache; 70012861a66Sriastradh if (__predict_false(d > period_secs)) { 70112861a66Sriastradh panic("%s: softints stuck for %u seconds", 70212861a66Sriastradh cpu_name(curcpu()), d); 70312861a66Sriastradh } 70412861a66Sriastradh 70512861a66Sriastradh /* 70612861a66Sriastradh * Schedule a softint to update our cache of the system uptime 70712861a66Sriastradh * so the next call to heartbeat, on this or another CPU, can 70812861a66Sriastradh * detect progress on this one. 70912861a66Sriastradh */ 71012861a66Sriastradh softint_schedule(heartbeat_sih); 71112861a66Sriastradh 71212861a66Sriastradh /* 71312861a66Sriastradh * Select a patient to check the heartbeat of. If there's no 71412861a66Sriastradh * other online CPU, nothing to do. 71512861a66Sriastradh */ 71612861a66Sriastradh patient = select_patient(); 71712861a66Sriastradh if (patient == NULL) 71812861a66Sriastradh return; 71912861a66Sriastradh 72012861a66Sriastradh /* 72112861a66Sriastradh * Verify that time is advancing on the patient CPU. If the 72212861a66Sriastradh * delta exceeds UINT_MAX/2, that means it is already ahead by 72312861a66Sriastradh * a little on the other CPU, and the subtraction went 724572220daSriastradh * negative, which is OK. If the CPU's heartbeats have been 725572220daSriastradh * suspended since we selected it, no worries. 72612861a66Sriastradh * 72712861a66Sriastradh * This uses the current CPU to ensure the other CPU has made 72812861a66Sriastradh * progress, even if the other CPU's hard timer interrupt 72912861a66Sriastradh * handler is stuck for some reason. 73012861a66Sriastradh * 73112861a66Sriastradh * XXX Maybe confirm it hasn't gone negative by more than 73212861a66Sriastradh * max_period? 73312861a66Sriastradh */ 73412861a66Sriastradh d = uptime - atomic_load_relaxed(&patient->ci_heartbeat_uptime_cache); 73512861a66Sriastradh if (__predict_false(d > period_secs) && 73612861a66Sriastradh __predict_false(d < UINT_MAX/2) && 7375c3232dbSriastradh atomic_load_relaxed(&patient->ci_heartbeat_suspend) == 0) 73812861a66Sriastradh defibrillate(patient, d); 73912861a66Sriastradh } 74012861a66Sriastradh 74112861a66Sriastradh /* 74212861a66Sriastradh * heartbeat_dump() 74312861a66Sriastradh * 74412861a66Sriastradh * Print the heartbeat data of all CPUs. Can be called from ddb. 74512861a66Sriastradh */ 74612861a66Sriastradh #ifdef DDB 74712861a66Sriastradh static unsigned 748572220daSriastradh db_read_unsigned(const volatile unsigned *p) 74912861a66Sriastradh { 75012861a66Sriastradh unsigned x; 75112861a66Sriastradh 752572220daSriastradh db_read_bytes((db_addr_t)(uintptr_t)p, sizeof(x), (char *)&x); 753572220daSriastradh 754572220daSriastradh return x; 755572220daSriastradh } 756572220daSriastradh 75712861a66Sriastradh void 75812861a66Sriastradh heartbeat_dump(void) 75912861a66Sriastradh { 76012861a66Sriastradh struct cpu_info *ci; 76112861a66Sriastradh 76212861a66Sriastradh db_printf("Heartbeats:\n"); 76312861a66Sriastradh for (ci = db_cpu_first(); ci != NULL; ci = db_cpu_next(ci)) { 7645c3232dbSriastradh db_printf("cpu%u: count %u uptime %u stamp %u suspend %u\n", 76512861a66Sriastradh db_read_unsigned(&ci->ci_index), 76612861a66Sriastradh db_read_unsigned(&ci->ci_heartbeat_count), 76712861a66Sriastradh db_read_unsigned(&ci->ci_heartbeat_uptime_cache), 768572220daSriastradh db_read_unsigned(&ci->ci_heartbeat_uptime_stamp), 7695c3232dbSriastradh db_read_unsigned(&ci->ci_heartbeat_suspend)); 77012861a66Sriastradh } 77112861a66Sriastradh } 77212861a66Sriastradh #endif 773