1 /* $NetBSD: watchdog.c,v 1.3 2020/03/18 19:05:22 christos Exp $ */
2
3 /*++
4 /* NAME
5 /* watchdog 3
6 /* SUMMARY
7 /* watchdog timer
8 /* SYNOPSIS
9 /* #include <watchdog.h>
10 /*
11 /* WATCHDOG *watchdog_create(timeout, action, context)
12 /* unsigned timeout;
13 /* void (*action)(WATCHDOG *watchdog, char *context);
14 /* char *context;
15 /*
16 /* void watchdog_start(watchdog)
17 /* WATCHDOG *watchdog;
18 /*
19 /* void watchdog_stop(watchdog)
20 /* WATCHDOG *watchdog;
21 /*
22 /* void watchdog_destroy(watchdog)
23 /* WATCHDOG *watchdog;
24 /*
25 /* void watchdog_pat()
26 /* DESCRIPTION
27 /* This module implements watchdog timers that are based on ugly
28 /* UNIX alarm timers. The module is designed to survive systems
29 /* with clocks that jump occasionally.
30 /*
31 /* Watchdog timers can be stacked. Only one watchdog timer can be
32 /* active at a time. Only the last created watchdog timer can be
33 /* manipulated. Watchdog timers must be destroyed in reverse order
34 /* of creation.
35 /*
36 /* watchdog_create() suspends the current watchdog timer, if any,
37 /* and instantiates a new watchdog timer.
38 /*
39 /* watchdog_start() starts or restarts the watchdog timer.
40 /*
41 /* watchdog_stop() stops the watchdog timer.
42 /*
43 /* watchdog_destroy() stops the watchdog timer, and resumes the
44 /* watchdog timer instance that was suspended by watchdog_create().
45 /*
46 /* watchdog_pat() pats the watchdog, so it stays quiet.
47 /*
48 /* Arguments:
49 /* .IP timeout
50 /* The watchdog time limit. When the watchdog timer runs, the
51 /* process must invoke watchdog_start(), watchdog_stop() or
52 /* watchdog_destroy() before the time limit is reached.
53 /* .IP action
54 /* A null pointer, or pointer to function that is called when the
55 /* watchdog alarm goes off. The default action is to terminate
56 /* the process with a fatal error.
57 /* .IP context
58 /* Application context that is passed to the action routine.
59 /* .IP watchdog
60 /* Must be a pointer to the most recently created watchdog instance.
61 /* This argument is checked upon each call.
62 /* BUGS
63 /* UNIX alarm timers are not stackable, so there can be at most one
64 /* watchdog instance active at any given time.
65 /* SEE ALSO
66 /* msg(3) diagnostics interface
67 /* DIAGNOSTICS
68 /* Fatal errors: memory allocation problem, system call failure.
69 /* Panics: interface violations.
70 /* LICENSE
71 /* .ad
72 /* .fi
73 /* The Secure Mailer license must be distributed with this software.
74 /* AUTHOR(S)
75 /* Wietse Venema
76 /* IBM T.J. Watson Research
77 /* P.O. Box 704
78 /* Yorktown Heights, NY 10598, USA
79 /*
80 /* Wietse Venema
81 /* Google, Inc.
82 /* 111 8th Avenue
83 /* New York, NY 10011, USA
84 /*--*/
85
86 /* System library. */
87
88 #include <sys_defs.h>
89 #include <unistd.h>
90 #include <signal.h>
91 #include <posix_signals.h>
92
93 /* Utility library. */
94
95 #include <msg.h>
96 #include <mymalloc.h>
97 #include <killme_after.h>
98 #include <watchdog.h>
99
100 /* Application-specific. */
101
102 /*
103 * Rather than having one timer that goes off when it is too late, we break
104 * up the time limit into smaller intervals so that we can deal with clocks
105 * that jump occasionally.
106 */
107 #define WATCHDOG_STEPS 3
108
109 /*
110 * UNIX alarms are not stackable, but we can save and restore state, so that
111 * watchdogs can at least be nested, sort of.
112 */
113 struct WATCHDOG {
114 unsigned timeout; /* our time resolution */
115 WATCHDOG_FN action; /* application routine */
116 char *context; /* application context */
117 int trip_run; /* number of successive timeouts */
118 WATCHDOG *saved_watchdog; /* saved state */
119 struct sigaction saved_action; /* saved state */
120 unsigned saved_time; /* saved state */
121 };
122
123 /*
124 * However, only one watchdog instance can be current, and the caller has to
125 * restore state before a prior watchdog instance can be manipulated.
126 */
127 static WATCHDOG *watchdog_curr;
128
129 /*
130 * Workaround for systems where the alarm signal does not wakeup the event
131 * machinery, and therefore does not restart the watchdog timer in the
132 * single_server etc. skeletons. The symptom is that programs abort when the
133 * watchdog timeout is less than the max_idle time.
134 */
135 #ifdef USE_WATCHDOG_PIPE
136 #include <errno.h>
137 #include <iostuff.h>
138 #include <events.h>
139
140 static int watchdog_pipe[2];
141
142 /* watchdog_read - read event pipe */
143
watchdog_read(int unused_event,void * unused_context)144 static void watchdog_read(int unused_event, void *unused_context)
145 {
146 char ch;
147
148 while (read(watchdog_pipe[0], &ch, 1) > 0)
149 /* void */ ;
150 }
151
152 #endif /* USE_WATCHDOG_PIPE */
153
154 /* watchdog_event - handle timeout event */
155
watchdog_event(int unused_sig)156 static void watchdog_event(int unused_sig)
157 {
158 const char *myname = "watchdog_event";
159 WATCHDOG *wp;
160
161 /*
162 * This routine runs as a signal handler. We should not do anything that
163 * could involve memory allocation/deallocation, but exiting without
164 * proper explanation would be unacceptable. For this reason, msg(3) was
165 * made safe for usage by signal handlers that terminate the process.
166 */
167 if ((wp = watchdog_curr) == 0)
168 msg_panic("%s: no instance", myname);
169 if (msg_verbose > 1)
170 msg_info("%s: %p %d", myname, (void *) wp, wp->trip_run);
171 if (++(wp->trip_run) < WATCHDOG_STEPS) {
172 #ifdef USE_WATCHDOG_PIPE
173 int saved_errno = errno;
174
175 /* Wake up the events(3) engine. */
176 if (write(watchdog_pipe[1], "", 1) != 1)
177 msg_warn("%s: write watchdog_pipe: %m", myname);
178 errno = saved_errno;
179 #endif
180 alarm(wp->timeout);
181 } else {
182 if (wp->action)
183 wp->action(wp, wp->context);
184 else {
185 killme_after(5);
186 #ifdef TEST
187 pause();
188 #endif
189 msg_fatal("watchdog timeout");
190 }
191 }
192 }
193
194 /* watchdog_create - create watchdog instance */
195
watchdog_create(unsigned timeout,WATCHDOG_FN action,char * context)196 WATCHDOG *watchdog_create(unsigned timeout, WATCHDOG_FN action, char *context)
197 {
198 const char *myname = "watchdog_create";
199 struct sigaction sig_action;
200 WATCHDOG *wp;
201
202 wp = (WATCHDOG *) mymalloc(sizeof(*wp));
203 if ((wp->timeout = timeout / WATCHDOG_STEPS) == 0)
204 msg_panic("%s: timeout %d is too small", myname, timeout);
205 wp->action = action;
206 wp->context = context;
207 wp->saved_watchdog = watchdog_curr;
208 wp->saved_time = alarm(0);
209 sigemptyset(&sig_action.sa_mask);
210 #ifdef SA_RESTART
211 sig_action.sa_flags = SA_RESTART;
212 #else
213 sig_action.sa_flags = 0;
214 #endif
215 sig_action.sa_handler = watchdog_event;
216 if (sigaction(SIGALRM, &sig_action, &wp->saved_action) < 0)
217 msg_fatal("%s: sigaction(SIGALRM): %m", myname);
218 if (msg_verbose > 1)
219 msg_info("%s: %p %d", myname, (void *) wp, timeout);
220 #ifdef USE_WATCHDOG_PIPE
221 if (watchdog_curr == 0) {
222 if (pipe(watchdog_pipe) < 0)
223 msg_fatal("%s: pipe: %m", myname);
224 non_blocking(watchdog_pipe[0], NON_BLOCKING);
225 non_blocking(watchdog_pipe[1], NON_BLOCKING);
226 close_on_exec(watchdog_pipe[0], CLOSE_ON_EXEC); /* Fix 20190126 */
227 close_on_exec(watchdog_pipe[1], CLOSE_ON_EXEC); /* Fix 20190126 */
228 event_enable_read(watchdog_pipe[0], watchdog_read, (void *) 0);
229 }
230 #endif
231 return (watchdog_curr = wp);
232 }
233
234 /* watchdog_destroy - destroy watchdog instance, restore state */
235
watchdog_destroy(WATCHDOG * wp)236 void watchdog_destroy(WATCHDOG *wp)
237 {
238 const char *myname = "watchdog_destroy";
239
240 watchdog_stop(wp);
241 watchdog_curr = wp->saved_watchdog;
242 if (sigaction(SIGALRM, &wp->saved_action, (struct sigaction *) 0) < 0)
243 msg_fatal("%s: sigaction(SIGALRM): %m", myname);
244 if (wp->saved_time)
245 alarm(wp->saved_time);
246 myfree((void *) wp);
247 #ifdef USE_WATCHDOG_PIPE
248 if (watchdog_curr == 0) {
249 event_disable_readwrite(watchdog_pipe[0]);
250 (void) close(watchdog_pipe[0]);
251 (void) close(watchdog_pipe[1]);
252 }
253 #endif
254 if (msg_verbose > 1)
255 msg_info("%s: %p", myname, (void *) wp);
256 }
257
258 /* watchdog_start - enable watchdog timer */
259
watchdog_start(WATCHDOG * wp)260 void watchdog_start(WATCHDOG *wp)
261 {
262 const char *myname = "watchdog_start";
263
264 if (wp != watchdog_curr)
265 msg_panic("%s: wrong watchdog instance", myname);
266 wp->trip_run = 0;
267 alarm(wp->timeout);
268 if (msg_verbose > 1)
269 msg_info("%s: %p", myname, (void *) wp);
270 }
271
272 /* watchdog_stop - disable watchdog timer */
273
watchdog_stop(WATCHDOG * wp)274 void watchdog_stop(WATCHDOG *wp)
275 {
276 const char *myname = "watchdog_stop";
277
278 if (wp != watchdog_curr)
279 msg_panic("%s: wrong watchdog instance", myname);
280 alarm(0);
281 if (msg_verbose > 1)
282 msg_info("%s: %p", myname, (void *) wp);
283 }
284
285 /* watchdog_pat - pat the dog so it stays quiet */
286
watchdog_pat(void)287 void watchdog_pat(void)
288 {
289 const char *myname = "watchdog_pat";
290
291 if (watchdog_curr)
292 watchdog_curr->trip_run = 0;
293 if (msg_verbose > 1)
294 msg_info("%s: %p", myname, (void *) watchdog_curr);
295 }
296
297 #ifdef TEST
298
299 #include <vstream.h>
300
main(int unused_argc,char ** unused_argv)301 int main(int unused_argc, char **unused_argv)
302 {
303 WATCHDOG *wp;
304
305 msg_verbose = 2;
306
307 wp = watchdog_create(10, (WATCHDOG_FN) 0, (void *) 0);
308 watchdog_start(wp);
309 do {
310 watchdog_pat();
311 } while (VSTREAM_GETCHAR() != VSTREAM_EOF);
312 watchdog_destroy(wp);
313 return (0);
314 }
315
316 #endif
317