1 /* $NetBSD: watchdog.c,v 1.2 2017/02/14 01:16:49 christos Exp $ */ 2 3 /*++ 4 /* NAME 5 /* watchdog 3 6 /* SUMMARY 7 /* watchdog timer 8 /* SYNOPSIS 9 /* #include <watchdog.h> 10 /* 11 /* WATCHDOG *watchdog_create(timeout, action, context) 12 /* unsigned timeout; 13 /* void (*action)(WATCHDOG *watchdog, char *context); 14 /* char *context; 15 /* 16 /* void watchdog_start(watchdog) 17 /* WATCHDOG *watchdog; 18 /* 19 /* void watchdog_stop(watchdog) 20 /* WATCHDOG *watchdog; 21 /* 22 /* void watchdog_destroy(watchdog) 23 /* WATCHDOG *watchdog; 24 /* 25 /* void watchdog_pat() 26 /* DESCRIPTION 27 /* This module implements watchdog timers that are based on ugly 28 /* UNIX alarm timers. The module is designed to survive systems 29 /* with clocks that jump occasionally. 30 /* 31 /* Watchdog timers can be stacked. Only one watchdog timer can be 32 /* active at a time. Only the last created watchdog timer can be 33 /* manipulated. Watchdog timers must be destroyed in reverse order 34 /* of creation. 35 /* 36 /* watchdog_create() suspends the current watchdog timer, if any, 37 /* and instantiates a new watchdog timer. 38 /* 39 /* watchdog_start() starts or restarts the watchdog timer. 40 /* 41 /* watchdog_stop() stops the watchdog timer. 42 /* 43 /* watchdog_destroy() stops the watchdog timer, and resumes the 44 /* watchdog timer instance that was suspended by watchdog_create(). 45 /* 46 /* watchdog_pat() pats the watchdog, so it stays quiet. 47 /* 48 /* Arguments: 49 /* .IP timeout 50 /* The watchdog time limit. When the watchdog timer runs, the 51 /* process must invoke watchdog_start(), watchdog_stop() or 52 /* watchdog_destroy() before the time limit is reached. 53 /* .IP action 54 /* A null pointer, or pointer to function that is called when the 55 /* watchdog alarm goes off. The default action is to terminate 56 /* the process with a fatal error. 57 /* .IP context 58 /* Application context that is passed to the action routine. 59 /* .IP watchdog 60 /* Must be a pointer to the most recently created watchdog instance. 61 /* This argument is checked upon each call. 62 /* BUGS 63 /* UNIX alarm timers are not stackable, so there can be at most one 64 /* watchdog instance active at any given time. 65 /* SEE ALSO 66 /* msg(3) diagnostics interface 67 /* DIAGNOSTICS 68 /* Fatal errors: memory allocation problem, system call failure. 69 /* Panics: interface violations. 70 /* LICENSE 71 /* .ad 72 /* .fi 73 /* The Secure Mailer license must be distributed with this software. 74 /* AUTHOR(S) 75 /* Wietse Venema 76 /* IBM T.J. Watson Research 77 /* P.O. Box 704 78 /* Yorktown Heights, NY 10598, USA 79 /*--*/ 80 81 /* System library. */ 82 83 #include <sys_defs.h> 84 #include <unistd.h> 85 #include <signal.h> 86 #include <posix_signals.h> 87 88 /* Utility library. */ 89 90 #include <msg.h> 91 #include <mymalloc.h> 92 #include <killme_after.h> 93 #include <watchdog.h> 94 95 /* Application-specific. */ 96 97 /* 98 * Rather than having one timer that goes off when it is too late, we break 99 * up the time limit into smaller intervals so that we can deal with clocks 100 * that jump occasionally. 101 */ 102 #define WATCHDOG_STEPS 3 103 104 /* 105 * UNIX alarms are not stackable, but we can save and restore state, so that 106 * watchdogs can at least be nested, sort of. 107 */ 108 struct WATCHDOG { 109 unsigned timeout; /* our time resolution */ 110 WATCHDOG_FN action; /* application routine */ 111 char *context; /* application context */ 112 int trip_run; /* number of successive timeouts */ 113 WATCHDOG *saved_watchdog; /* saved state */ 114 struct sigaction saved_action; /* saved state */ 115 unsigned saved_time; /* saved state */ 116 }; 117 118 /* 119 * However, only one watchdog instance can be current, and the caller has to 120 * restore state before a prior watchdog instance can be manipulated. 121 */ 122 static WATCHDOG *watchdog_curr; 123 124 /* 125 * Workaround for systems where the alarm signal does not wakeup the event 126 * machinery, and therefore does not restart the watchdog timer in the 127 * single_server etc. skeletons. The symptom is that programs abort when the 128 * watchdog timeout is less than the max_idle time. 129 */ 130 #ifdef USE_WATCHDOG_PIPE 131 #include <errno.h> 132 #include <iostuff.h> 133 #include <events.h> 134 135 static int watchdog_pipe[2]; 136 137 /* watchdog_read - read event pipe */ 138 139 static void watchdog_read(int unused_event, void *unused_context) 140 { 141 char ch; 142 143 while (read(watchdog_pipe[0], &ch, 1) > 0) 144 /* void */ ; 145 } 146 147 #endif /* USE_WATCHDOG_PIPE */ 148 149 /* watchdog_event - handle timeout event */ 150 151 static void watchdog_event(int unused_sig) 152 { 153 const char *myname = "watchdog_event"; 154 WATCHDOG *wp; 155 156 /* 157 * This routine runs as a signal handler. We should not do anything that 158 * could involve memory allocation/deallocation, but exiting without 159 * proper explanation would be unacceptable. For this reason, msg(3) was 160 * made safe for usage by signal handlers that terminate the process. 161 */ 162 if ((wp = watchdog_curr) == 0) 163 msg_panic("%s: no instance", myname); 164 if (msg_verbose > 1) 165 msg_info("%s: %p %d", myname, (void *) wp, wp->trip_run); 166 if (++(wp->trip_run) < WATCHDOG_STEPS) { 167 #ifdef USE_WATCHDOG_PIPE 168 int saved_errno = errno; 169 170 /* Wake up the events(3) engine. */ 171 if (write(watchdog_pipe[1], "", 1) != 1) 172 msg_warn("%s: write watchdog_pipe: %m", myname); 173 errno = saved_errno; 174 #endif 175 alarm(wp->timeout); 176 } else { 177 if (wp->action) 178 wp->action(wp, wp->context); 179 else { 180 killme_after(5); 181 #ifdef TEST 182 pause(); 183 #endif 184 msg_fatal("watchdog timeout"); 185 } 186 } 187 } 188 189 /* watchdog_create - create watchdog instance */ 190 191 WATCHDOG *watchdog_create(unsigned timeout, WATCHDOG_FN action, char *context) 192 { 193 const char *myname = "watchdog_create"; 194 struct sigaction sig_action; 195 WATCHDOG *wp; 196 197 wp = (WATCHDOG *) mymalloc(sizeof(*wp)); 198 if ((wp->timeout = timeout / WATCHDOG_STEPS) == 0) 199 msg_panic("%s: timeout %d is too small", myname, timeout); 200 wp->action = action; 201 wp->context = context; 202 wp->saved_watchdog = watchdog_curr; 203 wp->saved_time = alarm(0); 204 sigemptyset(&sig_action.sa_mask); 205 #ifdef SA_RESTART 206 sig_action.sa_flags = SA_RESTART; 207 #else 208 sig_action.sa_flags = 0; 209 #endif 210 sig_action.sa_handler = watchdog_event; 211 if (sigaction(SIGALRM, &sig_action, &wp->saved_action) < 0) 212 msg_fatal("%s: sigaction(SIGALRM): %m", myname); 213 if (msg_verbose > 1) 214 msg_info("%s: %p %d", myname, (void *) wp, timeout); 215 #ifdef USE_WATCHDOG_PIPE 216 if (watchdog_curr == 0) { 217 if (pipe(watchdog_pipe) < 0) 218 msg_fatal("%s: pipe: %m", myname); 219 non_blocking(watchdog_pipe[0], NON_BLOCKING); 220 non_blocking(watchdog_pipe[1], NON_BLOCKING); 221 event_enable_read(watchdog_pipe[0], watchdog_read, (void *) 0); 222 } 223 #endif 224 return (watchdog_curr = wp); 225 } 226 227 /* watchdog_destroy - destroy watchdog instance, restore state */ 228 229 void watchdog_destroy(WATCHDOG *wp) 230 { 231 const char *myname = "watchdog_destroy"; 232 233 watchdog_stop(wp); 234 watchdog_curr = wp->saved_watchdog; 235 if (sigaction(SIGALRM, &wp->saved_action, (struct sigaction *) 0) < 0) 236 msg_fatal("%s: sigaction(SIGALRM): %m", myname); 237 if (wp->saved_time) 238 alarm(wp->saved_time); 239 myfree((void *) wp); 240 #ifdef USE_WATCHDOG_PIPE 241 if (watchdog_curr == 0) { 242 event_disable_readwrite(watchdog_pipe[0]); 243 (void) close(watchdog_pipe[0]); 244 (void) close(watchdog_pipe[1]); 245 } 246 #endif 247 if (msg_verbose > 1) 248 msg_info("%s: %p", myname, (void *) wp); 249 } 250 251 /* watchdog_start - enable watchdog timer */ 252 253 void watchdog_start(WATCHDOG *wp) 254 { 255 const char *myname = "watchdog_start"; 256 257 if (wp != watchdog_curr) 258 msg_panic("%s: wrong watchdog instance", myname); 259 wp->trip_run = 0; 260 alarm(wp->timeout); 261 if (msg_verbose > 1) 262 msg_info("%s: %p", myname, (void *) wp); 263 } 264 265 /* watchdog_stop - disable watchdog timer */ 266 267 void watchdog_stop(WATCHDOG *wp) 268 { 269 const char *myname = "watchdog_stop"; 270 271 if (wp != watchdog_curr) 272 msg_panic("%s: wrong watchdog instance", myname); 273 alarm(0); 274 if (msg_verbose > 1) 275 msg_info("%s: %p", myname, (void *) wp); 276 } 277 278 /* watchdog_pat - pat the dog so it stays quiet */ 279 280 void watchdog_pat(void) 281 { 282 const char *myname = "watchdog_pat"; 283 284 if (watchdog_curr) 285 watchdog_curr->trip_run = 0; 286 if (msg_verbose > 1) 287 msg_info("%s: %p", myname, (void *) watchdog_curr); 288 } 289 290 #ifdef TEST 291 292 #include <vstream.h> 293 294 int main(int unused_argc, char **unused_argv) 295 { 296 WATCHDOG *wp; 297 298 msg_verbose = 2; 299 300 wp = watchdog_create(10, (WATCHDOG_FN) 0, (void *) 0); 301 watchdog_start(wp); 302 do { 303 watchdog_pat(); 304 } while (VSTREAM_GETCHAR() != VSTREAM_EOF); 305 watchdog_destroy(wp); 306 return (0); 307 } 308 309 #endif 310