xref: /netbsd-src/external/bsd/ntp/dist/libntp/work_fork.c (revision deb6f0161a9109e7de9b519dc8dfb9478668dcdd)
1 /*	$NetBSD: work_fork.c,v 1.13 2018/09/29 21:52:33 christos Exp $	*/
2 
3 /*
4  * work_fork.c - fork implementation for blocking worker child.
5  */
6 #include <config.h>
7 #include "ntp_workimpl.h"
8 
9 #ifdef WORK_FORK
10 #include <stdio.h>
11 #include <ctype.h>
12 #include <signal.h>
13 #include <sys/wait.h>
14 
15 #include "iosignal.h"
16 #include "ntp_stdlib.h"
17 #include "ntp_malloc.h"
18 #include "ntp_syslog.h"
19 #include "ntpd.h"
20 #include "ntp_io.h"
21 #include "ntp_assert.h"
22 #include "ntp_unixtime.h"
23 #include "ntp_worker.h"
24 
25 /* === variables === */
26 	int			worker_process;
27 	addremove_io_fd_func	addremove_io_fd;
28 static	volatile int		worker_sighup_received;
29 int	saved_argc = 0;
30 char	**saved_argv;
31 
32 /* === function prototypes === */
33 static	void		fork_blocking_child(blocking_child *);
34 static	RETSIGTYPE	worker_sighup(int);
35 static	void		send_worker_home_atexit(void);
36 static	void		cleanup_after_child(blocking_child *);
37 
38 /* === I/O helpers === */
39 /* Since we have signals enabled, there's a good chance that blocking IO
40  * via pipe suffers from EINTR -- and this goes for both directions.
41  * The next two wrappers will loop until either all the data is written
42  * or read, plus handling the EOF condition on read. They may return
43  * zero if no data was transferred at all, and effectively every return
44  * value that differs from the given transfer length signifies an error
45  * condition.
46  */
47 
48 static size_t
49 netread(
50 	int		fd,
51 	void *		vb,
52 	size_t		l
53 	)
54 {
55 	char *		b = vb;
56 	ssize_t		r;
57 
58 	while (l) {
59 		r = read(fd, b, l);
60 		if (r > 0) {
61 			l -= r;
62 			b += r;
63 		} else if (r == 0 || errno != EINTR) {
64 			l = 0;
65 		}
66 	}
67 	return (size_t)(b - (char *)vb);
68 }
69 
70 
71 static size_t
72 netwrite(
73 	int		fd,
74 	const void *	vb,
75 	size_t		l
76 	)
77 {
78 	const char *	b = vb;
79 	ssize_t		w;
80 
81 	while (l) {
82 		w = write(fd, b, l);
83 		if (w > 0) {
84 			l -= w;
85 			b += w;
86 		} else if (errno != EINTR) {
87 			l = 0;
88 		}
89 	}
90 	return (size_t)(b - (const char *)vb);
91 }
92 
93 
94 int set_user_group_ids(void);
95 
96 /* === functions === */
97 /*
98  * exit_worker()
99  *
100  * On some systems _exit() is preferred to exit() for forked children.
101  * For example, http://netbsd.gw.com/cgi-bin/man-cgi?fork++NetBSD-5.0
102  * recommends _exit() to avoid double-flushing C runtime stream buffers
103  * and also to avoid calling the parent's atexit() routines in the
104  * child.  On those systems WORKER_CHILD_EXIT is _exit.  Since _exit
105  * bypasses CRT cleanup, fflush() files we know might have output
106  * buffered.
107  */
108 void
109 exit_worker(
110 	int	exitcode
111 	)
112 {
113 	if (syslog_file != NULL)
114 		fflush(syslog_file);
115 	fflush(stdout);
116 	fflush(stderr);
117 	WORKER_CHILD_EXIT (exitcode);	/* space before ( required */
118 }
119 
120 
121 static RETSIGTYPE
122 worker_sighup(
123 	int sig
124 	)
125 {
126 	if (SIGHUP == sig)
127 		worker_sighup_received = 1;
128 }
129 
130 
131 int
132 worker_sleep(
133 	blocking_child *	c,
134 	time_t			seconds
135 	)
136 {
137 	u_int sleep_remain;
138 
139 	sleep_remain = (u_int)seconds;
140 	do {
141 		if (!worker_sighup_received)
142 			sleep_remain = sleep(sleep_remain);
143 		if (worker_sighup_received) {
144 			TRACE(1, ("worker SIGHUP with %us left to sleep",
145 				  sleep_remain));
146 			worker_sighup_received = 0;
147 			return -1;
148 		}
149 	} while (sleep_remain);
150 
151 	return 0;
152 }
153 
154 
155 void
156 interrupt_worker_sleep(void)
157 {
158 	u_int			idx;
159 	blocking_child *	c;
160 	int			rc;
161 
162 	for (idx = 0; idx < blocking_children_alloc; idx++) {
163 		c = blocking_children[idx];
164 
165 		if (NULL == c || c->reusable == TRUE)
166 			continue;
167 
168 		rc = kill(c->pid, SIGHUP);
169 		if (rc < 0)
170 			msyslog(LOG_ERR,
171 				"Unable to signal HUP to wake child pid %d: %m",
172 				c->pid);
173 	}
174 }
175 
176 
177 /*
178  * harvest_child_status() runs in the parent.
179  *
180  * Note the error handling -- this is an interaction with SIGCHLD.
181  * SIG_IGN on SIGCHLD on some OSes means do not wait but reap
182  * automatically. Since we're not really interested in the result code,
183  * we simply ignore the error.
184  */
185 static void
186 harvest_child_status(
187 	blocking_child *	c
188 	)
189 {
190 	if (c->pid) {
191 		/* Wait on the child so it can finish terminating */
192 		if (waitpid(c->pid, NULL, 0) == c->pid)
193 			TRACE(4, ("harvested child %d\n", c->pid));
194 		else if (errno != ECHILD)
195 			msyslog(LOG_ERR, "error waiting on child %d: %m", c->pid);
196 		c->pid = 0;
197 	}
198 }
199 
200 /*
201  * req_child_exit() runs in the parent.
202  */
203 int
204 req_child_exit(
205 	blocking_child *	c
206 	)
207 {
208 	if (-1 != c->req_write_pipe) {
209 		close(c->req_write_pipe);
210 		c->req_write_pipe = -1;
211 		return 0;
212 	}
213 	/* Closing the pipe forces the child to exit */
214 	harvest_child_status(c);
215 	return -1;
216 }
217 
218 
219 /*
220  * cleanup_after_child() runs in parent.
221  */
222 static void
223 cleanup_after_child(
224 	blocking_child *	c
225 	)
226 {
227 	harvest_child_status(c);
228 	if (-1 != c->resp_read_pipe) {
229 		(*addremove_io_fd)(c->resp_read_pipe, c->ispipe, TRUE);
230 		close(c->resp_read_pipe);
231 		c->resp_read_pipe = -1;
232 	}
233 	c->resp_read_ctx = NULL;
234 	DEBUG_INSIST(-1 == c->req_read_pipe);
235 	DEBUG_INSIST(-1 == c->resp_write_pipe);
236 	c->reusable = TRUE;
237 }
238 
239 
240 static void
241 send_worker_home_atexit(void)
242 {
243 	u_int			idx;
244 	blocking_child *	c;
245 
246 	if (worker_process)
247 		return;
248 
249 	for (idx = 0; idx < blocking_children_alloc; idx++) {
250 		c = blocking_children[idx];
251 		if (NULL == c)
252 			continue;
253 		req_child_exit(c);
254 	}
255 }
256 
257 
258 int
259 send_blocking_req_internal(
260 	blocking_child *	c,
261 	blocking_pipe_header *	hdr,
262 	void *			data
263 	)
264 {
265 	size_t	octets;
266 	size_t	rc;
267 
268 	DEBUG_REQUIRE(hdr != NULL);
269 	DEBUG_REQUIRE(data != NULL);
270 	DEBUG_REQUIRE(BLOCKING_REQ_MAGIC == hdr->magic_sig);
271 
272 	if (-1 == c->req_write_pipe) {
273 		fork_blocking_child(c);
274 		DEBUG_INSIST(-1 != c->req_write_pipe);
275 	}
276 
277 	octets = sizeof(*hdr);
278 	rc = netwrite(c->req_write_pipe, hdr, octets);
279 
280 	if (rc == octets) {
281 		octets = hdr->octets - sizeof(*hdr);
282 		rc = netwrite(c->req_write_pipe, data, octets);
283 		if (rc == octets)
284 			return 0;
285 	}
286 
287 	msyslog(LOG_ERR,
288 		"send_blocking_req_internal: short write (%zu of %zu), %m",
289 		rc, octets);
290 
291 	/* Fatal error.  Clean up the child process.  */
292 	req_child_exit(c);
293 	exit(1);	/* otherwise would be return -1 */
294 }
295 
296 
297 blocking_pipe_header *
298 receive_blocking_req_internal(
299 	blocking_child *	c
300 	)
301 {
302 	blocking_pipe_header	hdr;
303 	blocking_pipe_header *	req;
304 	size_t			rc;
305 	size_t			octets;
306 
307 	DEBUG_REQUIRE(-1 != c->req_read_pipe);
308 
309 	req = NULL;
310 	rc = netread(c->req_read_pipe, &hdr, sizeof(hdr));
311 
312 	if (0 == rc) {
313 		TRACE(4, ("parent closed request pipe, child %d terminating\n",
314 			  c->pid));
315 	} else if (rc != sizeof(hdr)) {
316 		msyslog(LOG_ERR,
317 			"receive_blocking_req_internal: short header read (%zu of %zu), %m",
318 			rc, sizeof(hdr));
319 	} else {
320 		INSIST(sizeof(hdr) < hdr.octets && hdr.octets < 4 * 1024);
321 		req = emalloc(hdr.octets);
322 		memcpy(req, &hdr, sizeof(*req));
323 		octets = hdr.octets - sizeof(hdr);
324 		rc = netread(c->req_read_pipe, (char *)(req + 1),
325 			     octets);
326 
327 		if (rc != octets)
328 			msyslog(LOG_ERR,
329 				"receive_blocking_req_internal: short read (%zu of %zu), %m",
330 				rc, octets);
331 		else if (BLOCKING_REQ_MAGIC != req->magic_sig)
332 			msyslog(LOG_ERR,
333 				"receive_blocking_req_internal: packet header mismatch (0x%x)",
334 				req->magic_sig);
335 		else
336 			return req;
337 	}
338 
339 	if (req != NULL)
340 		free(req);
341 
342 	return NULL;
343 }
344 
345 
346 int
347 send_blocking_resp_internal(
348 	blocking_child *	c,
349 	blocking_pipe_header *	resp
350 	)
351 {
352 	size_t	octets;
353 	size_t	rc;
354 
355 	DEBUG_REQUIRE(-1 != c->resp_write_pipe);
356 
357 	octets = resp->octets;
358 	rc = netwrite(c->resp_write_pipe, resp, octets);
359 	free(resp);
360 
361 	if (octets == rc)
362 		return 0;
363 
364 	TRACE(1, ("send_blocking_resp_internal: short write (%zu of %zu), %m\n",
365 		  rc, octets));
366 	return -1;
367 }
368 
369 
370 blocking_pipe_header *
371 receive_blocking_resp_internal(
372 	blocking_child *	c
373 	)
374 {
375 	blocking_pipe_header	hdr;
376 	blocking_pipe_header *	resp;
377 	size_t			rc;
378 	size_t			octets;
379 
380 	DEBUG_REQUIRE(c->resp_read_pipe != -1);
381 
382 	resp = NULL;
383 	rc = netread(c->resp_read_pipe, &hdr, sizeof(hdr));
384 
385 	if (0 == rc) {
386 		/* this is the normal child exited indication */
387 	} else if (rc != sizeof(hdr)) {
388 		TRACE(1, ("receive_blocking_resp_internal: short header read (%zu of %zu), %m\n",
389 			  rc, sizeof(hdr)));
390 	} else if (BLOCKING_RESP_MAGIC != hdr.magic_sig) {
391 		TRACE(1, ("receive_blocking_resp_internal: header mismatch (0x%x)\n",
392 			  hdr.magic_sig));
393 	} else {
394 		INSIST(sizeof(hdr) < hdr.octets &&
395 		       hdr.octets < 16 * 1024);
396 		resp = emalloc(hdr.octets);
397 		memcpy(resp, &hdr, sizeof(*resp));
398 		octets = hdr.octets - sizeof(hdr);
399 		rc = netread(c->resp_read_pipe, (char *)(resp + 1),
400 			     octets);
401 
402 		if (rc != octets)
403 			TRACE(1, ("receive_blocking_resp_internal: short read (%zu of %zu), %m\n",
404 				  rc, octets));
405 		else
406 			return resp;
407 	}
408 
409 	cleanup_after_child(c);
410 
411 	if (resp != NULL)
412 		free(resp);
413 
414 	return NULL;
415 }
416 
417 
418 #if defined(HAVE_DROPROOT) && defined(WORK_FORK)
419 void
420 fork_deferred_worker(void)
421 {
422 	u_int			idx;
423 	blocking_child *	c;
424 
425 	REQUIRE(droproot && root_dropped);
426 
427 	for (idx = 0; idx < blocking_children_alloc; idx++) {
428 		c = blocking_children[idx];
429 		if (NULL == c)
430 			continue;
431 		if (-1 != c->req_write_pipe && 0 == c->pid)
432 			fork_blocking_child(c);
433 	}
434 }
435 #endif
436 
437 #if HAVE_SETPROCTITLE == 0
438 static void
439 setproctitle(const char *fmt, ...)
440 {
441 	va_list ap;
442 	char b1[128];
443 	int argcc, argvlen, l;
444 
445 	if (saved_argc == 0)
446 		return;
447 
448 	va_start(ap, fmt);
449 	vsnprintf(b1, sizeof(b1), fmt, ap);
450 	va_end(ap);
451 
452 	/* Clear argv */
453 	for (argvlen = 0, argcc = 0; argcc < saved_argc; argcc++) {
454 		l = strlen(saved_argv[argcc]);
455 		argvlen += l + 1;
456 		memset(saved_argv[argcc], 0, l);
457 	}
458 	l = snprintf(saved_argv[0], argvlen, "ntpd: %s", b1);
459 	for (argcc = 1; argcc < saved_argc; argcc++)
460 		saved_argv[argcc] = &saved_argv[0][l];
461 }
462 #endif
463 
464 static void
465 fork_blocking_child(
466 	blocking_child *	c
467 	)
468 {
469 	static int	atexit_installed;
470 	static int	blocking_pipes[4] = { -1, -1, -1, -1 };
471 	int		rc;
472 	int		was_pipe;
473 	int		is_pipe;
474 	int		saved_errno = 0;
475 	int		childpid;
476 	int		keep_fd;
477 	int		fd;
478 
479 	/*
480 	 * parent and child communicate via a pair of pipes.
481 	 *
482 	 * 0 child read request
483 	 * 1 parent write request
484 	 * 2 parent read response
485 	 * 3 child write response
486 	 */
487 	if (-1 == c->req_write_pipe) {
488 		rc = pipe_socketpair(&blocking_pipes[0], &was_pipe);
489 		if (0 != rc) {
490 			saved_errno = errno;
491 		} else {
492 			rc = pipe_socketpair(&blocking_pipes[2], &is_pipe);
493 			if (0 != rc) {
494 				saved_errno = errno;
495 				close(blocking_pipes[0]);
496 				close(blocking_pipes[1]);
497 			} else {
498 				INSIST(was_pipe == is_pipe);
499 			}
500 		}
501 		if (0 != rc) {
502 			errno = saved_errno;
503 			msyslog(LOG_ERR, "unable to create worker pipes: %m");
504 			exit(1);
505 		}
506 
507 		/*
508 		 * Move the descriptors the parent will keep open out of the
509 		 * low descriptors preferred by C runtime buffered FILE *.
510 		 */
511 		c->req_write_pipe = move_fd(blocking_pipes[1]);
512 		c->resp_read_pipe = move_fd(blocking_pipes[2]);
513 		/*
514 		 * wake any worker child on orderly shutdown of the
515 		 * daemon so that it can notice the broken pipes and
516 		 * go away promptly.
517 		 */
518 		if (!atexit_installed) {
519 			atexit(&send_worker_home_atexit);
520 			atexit_installed = TRUE;
521 		}
522 	}
523 
524 #if defined(HAVE_DROPROOT) && !defined(NEED_EARLY_FORK)
525 	/* defer the fork until after root is dropped */
526 	if (droproot && !root_dropped)
527 		return;
528 #endif
529 	if (syslog_file != NULL)
530 		fflush(syslog_file);
531 	fflush(stdout);
532 	fflush(stderr);
533 
534 	/* [BUG 3050] setting SIGCHLD to SIG_IGN likely causes unwanted
535 	 * or undefined effects. We don't do it and leave SIGCHLD alone.
536 	 */
537 	/* signal_no_reset(SIGCHLD, SIG_IGN); */
538 
539 	childpid = fork();
540 	if (-1 == childpid) {
541 		msyslog(LOG_ERR, "unable to fork worker: %m");
542 		exit(1);
543 	}
544 
545 	if (childpid) {
546 		/* this is the parent */
547 		TRACE(1, ("forked worker child (pid %d)\n", childpid));
548 		c->pid = childpid;
549 		c->ispipe = is_pipe;
550 
551 		/* close the child's pipe descriptors. */
552 		close(blocking_pipes[0]);
553 		close(blocking_pipes[3]);
554 
555 		memset(blocking_pipes, -1, sizeof(blocking_pipes));
556 
557 		/* wire into I/O loop */
558 		(*addremove_io_fd)(c->resp_read_pipe, is_pipe, FALSE);
559 
560 		return;		/* parent returns */
561 	}
562 
563 	/*
564 	 * The parent gets the child pid as the return value of fork().
565 	 * The child must work for it.
566 	 */
567 	c->pid = getpid();
568 	worker_process = TRUE;
569 
570 	/*
571 	 * Change the process name of the child to avoid confusion
572 	 * about ntpd trunning twice.
573 	 */
574 	setproctitle("asynchronous dns resolver");
575 
576 	/*
577 	 * In the child, close all files except stdin, stdout, stderr,
578 	 * and the two child ends of the pipes.
579 	 */
580 	DEBUG_INSIST(-1 == c->req_read_pipe);
581 	DEBUG_INSIST(-1 == c->resp_write_pipe);
582 	c->req_read_pipe = blocking_pipes[0];
583 	c->resp_write_pipe = blocking_pipes[3];
584 
585 	kill_asyncio(0);
586 	closelog();
587 	if (syslog_file != NULL) {
588 		fclose(syslog_file);
589 		syslog_file = NULL;
590 		syslogit = TRUE;
591 	}
592 	keep_fd = max(c->req_read_pipe, c->resp_write_pipe);
593 	for (fd = 3; fd < keep_fd; fd++)
594 		if (fd != c->req_read_pipe &&
595 		    fd != c->resp_write_pipe)
596 			close(fd);
597 	close_all_beyond(keep_fd);
598 	/*
599 	 * We get signals from refclock serial I/O on NetBSD in the
600 	 * worker if we do not reset SIGIO's handler to the default.
601 	 * It is not conditionalized for NetBSD alone because on
602 	 * systems where it is not needed, it is harmless, and that
603 	 * allows us to handle unknown others with NetBSD behavior.
604 	 * [Bug 1386]
605 	 */
606 #if defined(USE_SIGIO)
607 	signal_no_reset(SIGIO, SIG_DFL);
608 #elif defined(USE_SIGPOLL)
609 	signal_no_reset(SIGPOLL, SIG_DFL);
610 #endif
611 	signal_no_reset(SIGHUP, worker_sighup);
612 	init_logging("ntp_intres", 0, FALSE);
613 	setup_logfile(NULL);
614 
615 	(void) set_user_group_ids();
616 
617 	/*
618 	 * And now back to the portable code
619 	 */
620 	exit_worker(blocking_child_common(c));
621 }
622 
623 
624 void worker_global_lock(int inOrOut)
625 {
626 	(void)inOrOut;
627 }
628 
629 #else	/* !WORK_FORK follows */
630 char work_fork_nonempty_compilation_unit;
631 #endif
632