1 /* $NetBSD: watchdog.c,v 1.3 2020/03/18 19:05:22 christos Exp $ */ 2 3 /*++ 4 /* NAME 5 /* watchdog 3 6 /* SUMMARY 7 /* watchdog timer 8 /* SYNOPSIS 9 /* #include <watchdog.h> 10 /* 11 /* WATCHDOG *watchdog_create(timeout, action, context) 12 /* unsigned timeout; 13 /* void (*action)(WATCHDOG *watchdog, char *context); 14 /* char *context; 15 /* 16 /* void watchdog_start(watchdog) 17 /* WATCHDOG *watchdog; 18 /* 19 /* void watchdog_stop(watchdog) 20 /* WATCHDOG *watchdog; 21 /* 22 /* void watchdog_destroy(watchdog) 23 /* WATCHDOG *watchdog; 24 /* 25 /* void watchdog_pat() 26 /* DESCRIPTION 27 /* This module implements watchdog timers that are based on ugly 28 /* UNIX alarm timers. The module is designed to survive systems 29 /* with clocks that jump occasionally. 30 /* 31 /* Watchdog timers can be stacked. Only one watchdog timer can be 32 /* active at a time. Only the last created watchdog timer can be 33 /* manipulated. Watchdog timers must be destroyed in reverse order 34 /* of creation. 35 /* 36 /* watchdog_create() suspends the current watchdog timer, if any, 37 /* and instantiates a new watchdog timer. 38 /* 39 /* watchdog_start() starts or restarts the watchdog timer. 40 /* 41 /* watchdog_stop() stops the watchdog timer. 42 /* 43 /* watchdog_destroy() stops the watchdog timer, and resumes the 44 /* watchdog timer instance that was suspended by watchdog_create(). 45 /* 46 /* watchdog_pat() pats the watchdog, so it stays quiet. 47 /* 48 /* Arguments: 49 /* .IP timeout 50 /* The watchdog time limit. When the watchdog timer runs, the 51 /* process must invoke watchdog_start(), watchdog_stop() or 52 /* watchdog_destroy() before the time limit is reached. 53 /* .IP action 54 /* A null pointer, or pointer to function that is called when the 55 /* watchdog alarm goes off. The default action is to terminate 56 /* the process with a fatal error. 57 /* .IP context 58 /* Application context that is passed to the action routine. 59 /* .IP watchdog 60 /* Must be a pointer to the most recently created watchdog instance. 61 /* This argument is checked upon each call. 62 /* BUGS 63 /* UNIX alarm timers are not stackable, so there can be at most one 64 /* watchdog instance active at any given time. 65 /* SEE ALSO 66 /* msg(3) diagnostics interface 67 /* DIAGNOSTICS 68 /* Fatal errors: memory allocation problem, system call failure. 69 /* Panics: interface violations. 70 /* LICENSE 71 /* .ad 72 /* .fi 73 /* The Secure Mailer license must be distributed with this software. 74 /* AUTHOR(S) 75 /* Wietse Venema 76 /* IBM T.J. Watson Research 77 /* P.O. Box 704 78 /* Yorktown Heights, NY 10598, USA 79 /* 80 /* Wietse Venema 81 /* Google, Inc. 82 /* 111 8th Avenue 83 /* New York, NY 10011, USA 84 /*--*/ 85 86 /* System library. */ 87 88 #include <sys_defs.h> 89 #include <unistd.h> 90 #include <signal.h> 91 #include <posix_signals.h> 92 93 /* Utility library. */ 94 95 #include <msg.h> 96 #include <mymalloc.h> 97 #include <killme_after.h> 98 #include <watchdog.h> 99 100 /* Application-specific. */ 101 102 /* 103 * Rather than having one timer that goes off when it is too late, we break 104 * up the time limit into smaller intervals so that we can deal with clocks 105 * that jump occasionally. 106 */ 107 #define WATCHDOG_STEPS 3 108 109 /* 110 * UNIX alarms are not stackable, but we can save and restore state, so that 111 * watchdogs can at least be nested, sort of. 112 */ 113 struct WATCHDOG { 114 unsigned timeout; /* our time resolution */ 115 WATCHDOG_FN action; /* application routine */ 116 char *context; /* application context */ 117 int trip_run; /* number of successive timeouts */ 118 WATCHDOG *saved_watchdog; /* saved state */ 119 struct sigaction saved_action; /* saved state */ 120 unsigned saved_time; /* saved state */ 121 }; 122 123 /* 124 * However, only one watchdog instance can be current, and the caller has to 125 * restore state before a prior watchdog instance can be manipulated. 126 */ 127 static WATCHDOG *watchdog_curr; 128 129 /* 130 * Workaround for systems where the alarm signal does not wakeup the event 131 * machinery, and therefore does not restart the watchdog timer in the 132 * single_server etc. skeletons. The symptom is that programs abort when the 133 * watchdog timeout is less than the max_idle time. 134 */ 135 #ifdef USE_WATCHDOG_PIPE 136 #include <errno.h> 137 #include <iostuff.h> 138 #include <events.h> 139 140 static int watchdog_pipe[2]; 141 142 /* watchdog_read - read event pipe */ 143 144 static void watchdog_read(int unused_event, void *unused_context) 145 { 146 char ch; 147 148 while (read(watchdog_pipe[0], &ch, 1) > 0) 149 /* void */ ; 150 } 151 152 #endif /* USE_WATCHDOG_PIPE */ 153 154 /* watchdog_event - handle timeout event */ 155 156 static void watchdog_event(int unused_sig) 157 { 158 const char *myname = "watchdog_event"; 159 WATCHDOG *wp; 160 161 /* 162 * This routine runs as a signal handler. We should not do anything that 163 * could involve memory allocation/deallocation, but exiting without 164 * proper explanation would be unacceptable. For this reason, msg(3) was 165 * made safe for usage by signal handlers that terminate the process. 166 */ 167 if ((wp = watchdog_curr) == 0) 168 msg_panic("%s: no instance", myname); 169 if (msg_verbose > 1) 170 msg_info("%s: %p %d", myname, (void *) wp, wp->trip_run); 171 if (++(wp->trip_run) < WATCHDOG_STEPS) { 172 #ifdef USE_WATCHDOG_PIPE 173 int saved_errno = errno; 174 175 /* Wake up the events(3) engine. */ 176 if (write(watchdog_pipe[1], "", 1) != 1) 177 msg_warn("%s: write watchdog_pipe: %m", myname); 178 errno = saved_errno; 179 #endif 180 alarm(wp->timeout); 181 } else { 182 if (wp->action) 183 wp->action(wp, wp->context); 184 else { 185 killme_after(5); 186 #ifdef TEST 187 pause(); 188 #endif 189 msg_fatal("watchdog timeout"); 190 } 191 } 192 } 193 194 /* watchdog_create - create watchdog instance */ 195 196 WATCHDOG *watchdog_create(unsigned timeout, WATCHDOG_FN action, char *context) 197 { 198 const char *myname = "watchdog_create"; 199 struct sigaction sig_action; 200 WATCHDOG *wp; 201 202 wp = (WATCHDOG *) mymalloc(sizeof(*wp)); 203 if ((wp->timeout = timeout / WATCHDOG_STEPS) == 0) 204 msg_panic("%s: timeout %d is too small", myname, timeout); 205 wp->action = action; 206 wp->context = context; 207 wp->saved_watchdog = watchdog_curr; 208 wp->saved_time = alarm(0); 209 sigemptyset(&sig_action.sa_mask); 210 #ifdef SA_RESTART 211 sig_action.sa_flags = SA_RESTART; 212 #else 213 sig_action.sa_flags = 0; 214 #endif 215 sig_action.sa_handler = watchdog_event; 216 if (sigaction(SIGALRM, &sig_action, &wp->saved_action) < 0) 217 msg_fatal("%s: sigaction(SIGALRM): %m", myname); 218 if (msg_verbose > 1) 219 msg_info("%s: %p %d", myname, (void *) wp, timeout); 220 #ifdef USE_WATCHDOG_PIPE 221 if (watchdog_curr == 0) { 222 if (pipe(watchdog_pipe) < 0) 223 msg_fatal("%s: pipe: %m", myname); 224 non_blocking(watchdog_pipe[0], NON_BLOCKING); 225 non_blocking(watchdog_pipe[1], NON_BLOCKING); 226 close_on_exec(watchdog_pipe[0], CLOSE_ON_EXEC); /* Fix 20190126 */ 227 close_on_exec(watchdog_pipe[1], CLOSE_ON_EXEC); /* Fix 20190126 */ 228 event_enable_read(watchdog_pipe[0], watchdog_read, (void *) 0); 229 } 230 #endif 231 return (watchdog_curr = wp); 232 } 233 234 /* watchdog_destroy - destroy watchdog instance, restore state */ 235 236 void watchdog_destroy(WATCHDOG *wp) 237 { 238 const char *myname = "watchdog_destroy"; 239 240 watchdog_stop(wp); 241 watchdog_curr = wp->saved_watchdog; 242 if (sigaction(SIGALRM, &wp->saved_action, (struct sigaction *) 0) < 0) 243 msg_fatal("%s: sigaction(SIGALRM): %m", myname); 244 if (wp->saved_time) 245 alarm(wp->saved_time); 246 myfree((void *) wp); 247 #ifdef USE_WATCHDOG_PIPE 248 if (watchdog_curr == 0) { 249 event_disable_readwrite(watchdog_pipe[0]); 250 (void) close(watchdog_pipe[0]); 251 (void) close(watchdog_pipe[1]); 252 } 253 #endif 254 if (msg_verbose > 1) 255 msg_info("%s: %p", myname, (void *) wp); 256 } 257 258 /* watchdog_start - enable watchdog timer */ 259 260 void watchdog_start(WATCHDOG *wp) 261 { 262 const char *myname = "watchdog_start"; 263 264 if (wp != watchdog_curr) 265 msg_panic("%s: wrong watchdog instance", myname); 266 wp->trip_run = 0; 267 alarm(wp->timeout); 268 if (msg_verbose > 1) 269 msg_info("%s: %p", myname, (void *) wp); 270 } 271 272 /* watchdog_stop - disable watchdog timer */ 273 274 void watchdog_stop(WATCHDOG *wp) 275 { 276 const char *myname = "watchdog_stop"; 277 278 if (wp != watchdog_curr) 279 msg_panic("%s: wrong watchdog instance", myname); 280 alarm(0); 281 if (msg_verbose > 1) 282 msg_info("%s: %p", myname, (void *) wp); 283 } 284 285 /* watchdog_pat - pat the dog so it stays quiet */ 286 287 void watchdog_pat(void) 288 { 289 const char *myname = "watchdog_pat"; 290 291 if (watchdog_curr) 292 watchdog_curr->trip_run = 0; 293 if (msg_verbose > 1) 294 msg_info("%s: %p", myname, (void *) watchdog_curr); 295 } 296 297 #ifdef TEST 298 299 #include <vstream.h> 300 301 int main(int unused_argc, char **unused_argv) 302 { 303 WATCHDOG *wp; 304 305 msg_verbose = 2; 306 307 wp = watchdog_create(10, (WATCHDOG_FN) 0, (void *) 0); 308 watchdog_start(wp); 309 do { 310 watchdog_pat(); 311 } while (VSTREAM_GETCHAR() != VSTREAM_EOF); 312 watchdog_destroy(wp); 313 return (0); 314 } 315 316 #endif 317