xref: /netbsd-src/external/bsd/ntp/dist/libntp/ntp_intres.c (revision f8cf1a9151c7af1cb0bd8b09c13c66bca599c027)
1 /*	$NetBSD: ntp_intres.c,v 1.13 2024/08/18 20:47:13 christos Exp $	*/
2 
3 /*
4  * ntp_intres.c - Implements a generic blocking worker child or thread,
5  *		  initially to provide a nonblocking solution for DNS
6  *		  name to address lookups available with getaddrinfo().
7  *
8  * This is a new implementation as of 2009 sharing the filename and
9  * very little else with the prior implementation, which used a
10  * temporary file to receive a single set of requests from the parent,
11  * and a NTP mode 7 authenticated request to push back responses.
12  *
13  * A primary goal in rewriting this code was the need to support the
14  * pool configuration directive's requirement to retrieve multiple
15  * addresses resolving a single name, which has previously been
16  * satisfied with blocking resolver calls from the ntpd mainline code.
17  *
18  * A secondary goal is to provide a generic mechanism for other
19  * blocking operations to be delegated to a worker using a common
20  * model for both Unix and Windows ntpd.  ntp_worker.c, work_fork.c,
21  * and work_thread.c implement the generic mechanism.  This file
22  * implements the two current consumers, getaddrinfo_sometime() and the
23  * presently unused getnameinfo_sometime().
24  *
25  * Both routines deliver results to a callback and manage memory
26  * allocation, meaning there is no freeaddrinfo_sometime().
27  *
28  * The initial implementation for Unix uses a pair of unidirectional
29  * pipes, one each for requests and responses, connecting the forked
30  * blocking child worker with the ntpd mainline.  The threaded code
31  * uses arrays of pointers to queue requests and responses.
32  *
33  * The parent drives the process, including scheduling sleeps between
34  * retries.
35  *
36  * Memory is managed differently for a child process, which mallocs
37  * request buffers to read from the pipe into, whereas the threaded
38  * code mallocs a copy of the request to hand off to the worker via
39  * the queueing array.  The resulting request buffer is free()d by
40  * platform-independent code.  A wrinkle is the request needs to be
41  * available to the requestor during response processing.
42  *
43  * Response memory allocation is also platform-dependent.  With a
44  * separate process and pipes, the response is free()d after being
45  * written to the pipe.  With threads, the same memory is handed
46  * over and the requestor frees it after processing is completed.
47  *
48  * The code should be generalized to support threads on Unix using
49  * much of the same code used for Windows initially.
50  *
51  */
52 #ifdef HAVE_CONFIG_H
53 # include <config.h>
54 #endif
55 
56 #include "ntp_workimpl.h"
57 
58 #ifdef WORKER
59 
60 #include <stdio.h>
61 #include <ctype.h>
62 #include <signal.h>
63 
64 /**/
65 #ifdef HAVE_SYS_TYPES_H
66 # include <sys/types.h>
67 #endif
68 #ifdef HAVE_NETINET_IN_H
69 #include <netinet/in.h>
70 #endif
71 #include <arpa/inet.h>
72 /**/
73 #ifdef HAVE_SYS_PARAM_H
74 # include <sys/param.h>
75 #endif
76 
77 #if !defined(HAVE_RES_INIT) && defined(HAVE___RES_INIT)
78 # define HAVE_RES_INIT
79 #endif
80 
81 #if defined(HAVE_RESOLV_H) && defined(HAVE_RES_INIT)
82 # ifdef HAVE_ARPA_NAMESER_H
83 #  include <arpa/nameser.h> /* DNS HEADER struct */
84 # endif
85 # ifdef HAVE_NETDB_H
86 #  include <netdb.h>
87 # endif
88 # include <resolv.h>
89 #endif
90 
91 #include "ntp.h"
92 #include "ntp_debug.h"
93 #include "ntp_malloc.h"
94 #include "ntp_syslog.h"
95 #include "ntp_unixtime.h"
96 #include "ntp_intres.h"
97 #include "intreswork.h"
98 
99 
100 /*
101  * Following are implementations of getaddrinfo_sometime() and
102  * getnameinfo_sometime().  Each is implemented in three routines:
103  *
104  * getaddrinfo_sometime()		getnameinfo_sometime()
105  * blocking_getaddrinfo()		blocking_getnameinfo()
106  * getaddrinfo_sometime_complete()	getnameinfo_sometime_complete()
107  *
108  * The first runs in the parent and marshalls (or serializes) request
109  * parameters into a request blob which is processed in the child by
110  * the second routine, blocking_*(), which serializes the results into
111  * a response blob unpacked by the third routine, *_complete(), which
112  * calls the callback routine provided with the request and frees
113  * _request_ memory allocated by the first routine.  Response memory
114  * is managed by the code which calls the *_complete routines.
115  */
116 
117 
118 /* === typedefs === */
119 typedef struct blocking_gai_req_tag {	/* marshalled args */
120 	size_t			octets;
121 	u_int			dns_idx;
122 	time_t			scheduled;
123 	time_t			earliest;
124 	int			retry;
125 	struct addrinfo		hints;
126 	u_int			qflags;
127 	gai_sometime_callback	callback;
128 	void *			context;
129 	size_t			nodesize;
130 	size_t			servsize;
131 } blocking_gai_req;
132 
133 typedef struct blocking_gai_resp_tag {
134 	size_t			octets;
135 	int			retcode;
136 	int			retry;
137 	int			gai_errno; /* for EAI_SYSTEM case */
138 	int			ai_count;
139 	/*
140 	 * Followed by ai_count struct addrinfo and then ai_count
141 	 * sockaddr_u and finally the canonical name strings.
142 	 */
143 } blocking_gai_resp;
144 
145 typedef struct blocking_gni_req_tag {
146 	size_t			octets;
147 	u_int			dns_idx;
148 	time_t			scheduled;
149 	time_t			earliest;
150 	int			retry;
151 	size_t			hostoctets;
152 	size_t			servoctets;
153 	int			flags;
154 	gni_sometime_callback	callback;
155 	void *			context;
156 	sockaddr_u		socku;
157 } blocking_gni_req;
158 
159 typedef struct blocking_gni_resp_tag {
160 	size_t			octets;
161 	int			retcode;
162 	int			gni_errno; /* for EAI_SYSTEM case */
163 	int			retry;
164 	size_t			hostoctets;
165 	size_t			servoctets;
166 	/*
167 	 * Followed by hostoctets bytes of null-terminated host,
168 	 * then servoctets bytes of null-terminated service.
169 	 */
170 } blocking_gni_resp;
171 
172 /* per-DNS-worker state in parent */
173 typedef struct dnschild_ctx_tag {
174 	u_int	index;
175 	time_t	next_dns_timeslot;
176 } dnschild_ctx;
177 
178 /* per-DNS-worker state in worker */
179 typedef struct dnsworker_ctx_tag {
180 	blocking_child *	c;
181 	time_t			ignore_scheduled_before;
182 #ifdef HAVE_RES_INIT
183 	time_t	next_res_init;
184 #endif
185 } dnsworker_ctx;
186 
187 
188 /* === variables === */
189 dnschild_ctx **		dnschild_contexts;		/* parent */
190 u_int			dnschild_contexts_alloc;
191 dnsworker_ctx **	dnsworker_contexts;		/* child */
192 u_int			dnsworker_contexts_alloc;
193 
194 #ifdef HAVE_RES_INIT
195 static	time_t		next_res_init;
196 #endif
197 
198 
199 /* === forward declarations === */
200 static	u_int		reserve_dnschild_ctx(void);
201 static	u_int		get_dnschild_ctx(void);
202 static	dnsworker_ctx *	get_worker_context(blocking_child *, u_int);
203 static	void		scheduled_sleep(time_t, time_t,
204 					dnsworker_ctx *);
205 static	void		manage_dns_retry_interval(time_t *, time_t *,
206 						  int *, time_t *,
207 						  int/*BOOL*/);
208 static	int		should_retry_dns(int, int);
209 #ifdef HAVE_RES_INIT
210 static	void		reload_resolv_conf(dnsworker_ctx *);
211 #else
212 # define		reload_resolv_conf(wc)		\
213 	do {						\
214 		(void)(wc);				\
215 	} while (FALSE)
216 #endif
217 static	void		getaddrinfo_sometime_complete(blocking_work_req,
218 						      void *, size_t,
219 						      void *);
220 static	void		getnameinfo_sometime_complete(blocking_work_req,
221 						      void *, size_t,
222 						      void *);
223 
224 
225 /* === functions === */
226 /*
227  * getaddrinfo_sometime - uses blocking child to call getaddrinfo then
228  *			  invokes provided callback completion function.
229  */
230 int
231 getaddrinfo_sometime_ex(
232 	const char *		node,
233 	const char *		service,
234 	const struct addrinfo *	hints,
235 	int			retry,
236 	gai_sometime_callback	callback,
237 	void *			context,
238 	u_int			qflags
239 	)
240 {
241 	blocking_gai_req *	gai_req;
242 	u_int			idx;
243 	dnschild_ctx *		child_ctx;
244 	size_t			req_size;
245 	size_t			nodesize;
246 	size_t			servsize;
247 	time_t			now;
248 
249 	REQUIRE(NULL != node);
250 	if (NULL != hints) {
251 		REQUIRE(0 == hints->ai_addrlen);
252 		REQUIRE(NULL == hints->ai_addr);
253 		REQUIRE(NULL == hints->ai_canonname);
254 		REQUIRE(NULL == hints->ai_next);
255 	}
256 
257 	idx = get_dnschild_ctx();
258 	child_ctx = dnschild_contexts[idx];
259 
260 	nodesize = strlen(node) + 1;
261 	servsize = strlen(service) + 1;
262 	req_size = sizeof(*gai_req) + nodesize + servsize;
263 
264 	gai_req = emalloc_zero(req_size);
265 
266 	gai_req->octets = req_size;
267 	gai_req->dns_idx = idx;
268 	now = time(NULL);
269 	gai_req->scheduled = now;
270 	gai_req->earliest = max(now, child_ctx->next_dns_timeslot);
271 	child_ctx->next_dns_timeslot = gai_req->earliest;
272 	if (hints != NULL)
273 		gai_req->hints = *hints;
274 	gai_req->retry = retry;
275 	gai_req->callback = callback;
276 	gai_req->context = context;
277 	gai_req->nodesize = nodesize;
278 	gai_req->servsize = servsize;
279 	gai_req->qflags = qflags;
280 
281 	memcpy((char *)gai_req + sizeof(*gai_req), node, nodesize);
282 	memcpy((char *)gai_req + sizeof(*gai_req) + nodesize, service,
283 	       servsize);
284 
285 	if (queue_blocking_request(
286 		BLOCKING_GETADDRINFO,
287 		gai_req,
288 		req_size,
289 		&getaddrinfo_sometime_complete,
290 		gai_req)) {
291 
292 		msyslog(LOG_ERR, "unable to queue getaddrinfo request");
293 		errno = EFAULT;
294 		return -1;
295 	}
296 
297 	return 0;
298 }
299 
300 int
301 blocking_getaddrinfo(
302 	blocking_child *	c,
303 	blocking_pipe_header *	req
304 	)
305 {
306 	blocking_gai_req *	gai_req;
307 	dnsworker_ctx *		worker_ctx;
308 	blocking_pipe_header *	resp;
309 	blocking_gai_resp *	gai_resp;
310 	char *			node;
311 	char *			service;
312 	struct addrinfo *	ai_res;
313 	struct addrinfo *	ai;
314 	struct addrinfo *	serialized_ai;
315 	size_t			canons_octets;
316 	size_t			this_octets;
317 	size_t			resp_octets;
318 	char *			cp;
319 	time_t			time_now;
320 
321 	gai_req = (void *)((char *)req + sizeof(*req));
322 	node = (char *)gai_req + sizeof(*gai_req);
323 	service = node + gai_req->nodesize;
324 
325 	worker_ctx = get_worker_context(c, gai_req->dns_idx);
326 	scheduled_sleep(gai_req->scheduled, gai_req->earliest,
327 			worker_ctx);
328 	reload_resolv_conf(worker_ctx);
329 
330 	/*
331 	 * Take a shot at the final size, better to overestimate
332 	 * at first and then realloc to a smaller size.
333 	 */
334 
335 	resp_octets = sizeof(*resp) + sizeof(*gai_resp) +
336 		      16 * (sizeof(struct addrinfo) +
337 			    sizeof(sockaddr_u)) +
338 		      256;
339 	resp = emalloc_zero(resp_octets);
340 	gai_resp = (void *)(resp + 1);
341 
342 	TRACE(2, ("blocking_getaddrinfo given node %s serv %s fam %d flags %x\n",
343 		  node, service, gai_req->hints.ai_family,
344 		  gai_req->hints.ai_flags));
345 #ifdef DEBUG
346 	if (debug >= 2)
347 		fflush(stdout);
348 #endif
349 	ai_res = NULL;
350 	gai_resp->retcode = getaddrinfo(node, service, &gai_req->hints,
351 					&ai_res);
352 	gai_resp->retry = gai_req->retry;
353 #ifdef EAI_SYSTEM
354 	if (EAI_SYSTEM == gai_resp->retcode)
355 		gai_resp->gai_errno = errno;
356 #endif
357 	canons_octets = 0;
358 
359 	if (0 == gai_resp->retcode) {
360 		ai = ai_res;
361 		while (NULL != ai) {
362 			gai_resp->ai_count++;
363 			if (ai->ai_canonname)
364 				canons_octets += strlen(ai->ai_canonname) + 1;
365 			ai = ai->ai_next;
366 		}
367 		/*
368 		 * If this query succeeded only after retrying, DNS may have
369 		 * just become responsive.  Ignore previously-scheduled
370 		 * retry sleeps once for each pending request, similar to
371 		 * the way scheduled_sleep() does when its worker_sleep()
372 		 * is interrupted.
373 		 */
374 		if (gai_resp->retry > INITIAL_DNS_RETRY) {
375 			time_now = time(NULL);
376 			worker_ctx->ignore_scheduled_before = time_now;
377 			TRACE(1, ("DNS success after retry, ignoring sleeps scheduled before now (%s)\n",
378 				  humantime(time_now)));
379 		}
380 	}
381 
382 	/*
383 	 * Our response consists of a header, followed by ai_count
384 	 * addrinfo structs followed by ai_count sockaddr_storage
385 	 * structs followed by the canonical names.
386 	 */
387 	gai_resp->octets = sizeof(*gai_resp)
388 			    + gai_resp->ai_count
389 				* (sizeof(gai_req->hints)
390 				   + sizeof(sockaddr_u))
391 			    + canons_octets;
392 
393 	resp_octets = sizeof(*resp) + gai_resp->octets;
394 	resp = erealloc(resp, resp_octets);
395 	gai_resp = (void *)(resp + 1);
396 
397 	/* cp serves as our current pointer while serializing */
398 	cp = (void *)(gai_resp + 1);
399 	canons_octets = 0;
400 
401 	if (0 == gai_resp->retcode) {
402 		ai = ai_res;
403 		while (NULL != ai) {
404 			memcpy(cp, ai, sizeof(*ai));
405 			serialized_ai = (void *)cp;
406 			cp += sizeof(*ai);
407 
408 			/* transform ai_canonname into offset */
409 			if (NULL != ai->ai_canonname) {
410 				serialized_ai->ai_canonname = (char *)canons_octets;
411 				canons_octets += strlen(ai->ai_canonname) + 1;
412 			}
413 
414 			/* leave fixup of ai_addr pointer for receiver */
415 
416 			ai = ai->ai_next;
417 		}
418 
419 		ai = ai_res;
420 		while (NULL != ai) {
421 			INSIST(ai->ai_addrlen <= sizeof(sockaddr_u));
422 			memcpy(cp, ai->ai_addr, ai->ai_addrlen);
423 			cp += sizeof(sockaddr_u);
424 
425 			ai = ai->ai_next;
426 		}
427 
428 		ai = ai_res;
429 		while (NULL != ai) {
430 			if (NULL != ai->ai_canonname) {
431 				this_octets = strlen(ai->ai_canonname) + 1;
432 				memcpy(cp, ai->ai_canonname, this_octets);
433 				cp += this_octets;
434 			}
435 
436 			ai = ai->ai_next;
437 		}
438 		freeaddrinfo(ai_res);
439 	}
440 
441 	/*
442 	 * make sure our walk and earlier calc match
443 	 */
444 	DEBUG_INSIST((size_t)(cp - (char *)resp) == resp_octets);
445 
446 	if (queue_blocking_response(c, resp, resp_octets, req)) {
447 		msyslog(LOG_ERR, "blocking_getaddrinfo can not queue response");
448 		return -1;
449 	}
450 
451 	return 0;
452 }
453 
454 int
455 getaddrinfo_sometime(
456 	const char *		node,
457 	const char *		service,
458 	const struct addrinfo *	hints,
459 	int			retry,
460 	gai_sometime_callback	callback,
461 	void *			context
462 	)
463 {
464 	return getaddrinfo_sometime_ex(node, service, hints, retry,
465 				       callback, context, 0);
466 }
467 
468 
469 static void
470 getaddrinfo_sometime_complete(
471 	blocking_work_req	rtype,
472 	void *			context,
473 	size_t			respsize,
474 	void *			resp
475 	)
476 {
477 	blocking_gai_req *	gai_req;
478 	blocking_gai_resp *	gai_resp;
479 	dnschild_ctx *		child_ctx;
480 	struct addrinfo *	ai;
481 	struct addrinfo *	next_ai;
482 	sockaddr_u *		psau;
483 	char *			node;
484 	char *			service;
485 	char *			canon_start;
486 	time_t			time_now;
487 	int			again, noerr;
488 	int			af;
489 	const char *		fam_spec;
490 	int			i;
491 
492 	gai_req = context;
493 	gai_resp = resp;
494 
495 	DEBUG_REQUIRE(BLOCKING_GETADDRINFO == rtype);
496 	DEBUG_REQUIRE(respsize == gai_resp->octets);
497 
498 	node = (char *)gai_req + sizeof(*gai_req);
499 	service = node + gai_req->nodesize;
500 
501 	child_ctx = dnschild_contexts[gai_req->dns_idx];
502 
503 	if (0 == gai_resp->retcode) {
504 		/*
505 		 * If this query succeeded only after retrying, DNS may have
506 		 * just become responsive.
507 		 */
508 		if (gai_resp->retry > INITIAL_DNS_RETRY) {
509 			time_now = time(NULL);
510 			child_ctx->next_dns_timeslot = time_now;
511 			TRACE(1, ("DNS success after retry, %u next_dns_timeslot reset (%s)\n",
512 				  gai_req->dns_idx, humantime(time_now)));
513 		}
514 	} else {
515 		noerr = !!(gai_req->qflags & GAIR_F_IGNDNSERR);
516 		again = noerr || should_retry_dns(
517 					gai_resp->retcode, gai_resp->gai_errno);
518 		/*
519 		 * exponential backoff of DNS retries to 64s
520 		 */
521 		if (gai_req->retry > 0 && again) {
522 			/* log the first retry only */
523 			if (INITIAL_DNS_RETRY == gai_req->retry)
524 				NLOG(NLOG_SYSINFO) {
525 					af = gai_req->hints.ai_family;
526 					fam_spec = (AF_INET6 == af)
527 						       ? " (AAAA)"
528 						       : (AF_INET == af)
529 							     ? " (A)"
530 							     : "";
531 #ifdef EAI_SYSTEM
532 					if (EAI_SYSTEM == gai_resp->retcode) {
533 						errno = gai_resp->gai_errno;
534 						msyslog(LOG_INFO,
535 							"retrying DNS %s%s: EAI_SYSTEM %d: %m",
536 							node, fam_spec,
537 							gai_resp->gai_errno);
538 					} else
539 #endif
540 						msyslog(LOG_INFO,
541 							"retrying DNS %s%s: %s (%d)",
542 							node, fam_spec,
543 							gai_strerror(gai_resp->retcode),
544 							gai_resp->retcode);
545 				}
546 			manage_dns_retry_interval(
547 				&gai_req->scheduled, &gai_req->earliest,
548 				&gai_req->retry, &child_ctx->next_dns_timeslot,
549 				noerr);
550 			if (!queue_blocking_request(
551 					BLOCKING_GETADDRINFO,
552 					gai_req,
553 					gai_req->octets,
554 					&getaddrinfo_sometime_complete,
555 					gai_req))
556 				return;
557 			else
558 				msyslog(LOG_ERR,
559 					"unable to retry hostname %s",
560 					node);
561 		}
562 	}
563 
564 	/*
565 	 * fixup pointers in returned addrinfo array
566 	 */
567 	ai = (void *)((char *)gai_resp + sizeof(*gai_resp));
568 	next_ai = NULL;
569 	for (i = gai_resp->ai_count - 1; i >= 0; i--) {
570 		ai[i].ai_next = next_ai;
571 		next_ai = &ai[i];
572 	}
573 
574 	psau = (void *)((char *)ai + gai_resp->ai_count * sizeof(*ai));
575 	canon_start = (char *)psau + gai_resp->ai_count * sizeof(*psau);
576 
577 	for (i = 0; i < gai_resp->ai_count; i++) {
578 		if (NULL != ai[i].ai_addr)
579 			ai[i].ai_addr = &psau->sa;
580 		psau++;
581 		if (NULL != ai[i].ai_canonname)
582 			ai[i].ai_canonname += (size_t)canon_start;
583 	}
584 
585 	ENSURE((char *)psau == canon_start);
586 
587 	if (!gai_resp->ai_count)
588 		ai = NULL;
589 
590 	(*gai_req->callback)(gai_resp->retcode, gai_resp->gai_errno,
591 			     gai_req->context, node, service,
592 			     &gai_req->hints, ai);
593 
594 	free(gai_req);
595 	/* gai_resp is part of block freed by process_blocking_resp() */
596 }
597 
598 
599 #ifdef TEST_BLOCKING_WORKER
600 void gai_test_callback(int rescode, int gai_errno, void *context, const char *name, const char *service, const struct addrinfo *hints, const struct addrinfo *ai_res)
601 {
602 	sockaddr_u addr;
603 
604 	if (rescode) {
605 		TRACE(1, ("gai_test_callback context %p error rescode %d %s serv %s\n",
606 			  context, rescode, name, service));
607 		return;
608 	}
609 	while (!rescode && NULL != ai_res) {
610 		ZERO_SOCK(&addr);
611 		memcpy(&addr, ai_res->ai_addr, ai_res->ai_addrlen);
612 		TRACE(1, ("ctx %p fam %d addr %s canon '%s' type %s at %p ai_addr %p ai_next %p\n",
613 			  context,
614 			  AF(&addr),
615 			  stoa(&addr),
616 			  (ai_res->ai_canonname)
617 			      ? ai_res->ai_canonname
618 			      : "",
619 			  (SOCK_DGRAM == ai_res->ai_socktype)
620 			      ? "DGRAM"
621 			      : (SOCK_STREAM == ai_res->ai_socktype)
622 				    ? "STREAM"
623 				    : "(other)",
624 			  ai_res,
625 			  ai_res->ai_addr,
626 			  ai_res->ai_next));
627 
628 		getnameinfo_sometime((sockaddr_u *)ai_res->ai_addr, 128, 32, 0, gni_test_callback, context);
629 
630 		ai_res = ai_res->ai_next;
631 	}
632 }
633 #endif	/* TEST_BLOCKING_WORKER */
634 
635 
636 int
637 getnameinfo_sometime(
638 	sockaddr_u *		psau,
639 	size_t			hostoctets,
640 	size_t			servoctets,
641 	int			flags,
642 	gni_sometime_callback	callback,
643 	void *			context
644 	)
645 {
646 	blocking_gni_req *	gni_req;
647 	u_int			idx;
648 	dnschild_ctx *		child_ctx;
649 	time_t			time_now;
650 
651 	REQUIRE(hostoctets);
652 	REQUIRE(hostoctets + servoctets < 1024);
653 
654 	idx = get_dnschild_ctx();
655 	child_ctx = dnschild_contexts[idx];
656 
657 	gni_req = emalloc_zero(sizeof(*gni_req));
658 
659 	gni_req->octets = sizeof(*gni_req);
660 	gni_req->dns_idx = idx;
661 	time_now = time(NULL);
662 	gni_req->scheduled = time_now;
663 	gni_req->earliest = max(time_now, child_ctx->next_dns_timeslot);
664 	child_ctx->next_dns_timeslot = gni_req->earliest;
665 	memcpy(&gni_req->socku, psau, SOCKLEN(psau));
666 	gni_req->hostoctets = hostoctets;
667 	gni_req->servoctets = servoctets;
668 	gni_req->flags = flags;
669 	gni_req->retry = INITIAL_DNS_RETRY;
670 	gni_req->callback = callback;
671 	gni_req->context = context;
672 
673 	if (queue_blocking_request(
674 		BLOCKING_GETNAMEINFO,
675 		gni_req,
676 		sizeof(*gni_req),
677 		&getnameinfo_sometime_complete,
678 		gni_req)) {
679 
680 		msyslog(LOG_ERR, "unable to queue getnameinfo request");
681 		errno = EFAULT;
682 		return -1;
683 	}
684 
685 	return 0;
686 }
687 
688 
689 int
690 blocking_getnameinfo(
691 	blocking_child *	c,
692 	blocking_pipe_header *	req
693 	)
694 {
695 	blocking_gni_req *	gni_req;
696 	dnsworker_ctx *		worker_ctx;
697 	blocking_pipe_header *	resp;
698 	blocking_gni_resp *	gni_resp;
699 	size_t			octets;
700 	size_t			resp_octets;
701 	char *			service;
702 	char *			cp;
703 	int			rc;
704 	time_t			time_now;
705 	char			host[1024];
706 
707 	gni_req = (void *)((char *)req + sizeof(*req));
708 
709 	octets = gni_req->hostoctets + gni_req->servoctets;
710 
711 	/*
712 	 * Some alloca() implementations are fragile regarding
713 	 * large allocations.  We only need room for the host
714 	 * and service names.
715 	 */
716 	REQUIRE(octets < sizeof(host));
717 	service = host + gni_req->hostoctets;
718 
719 	worker_ctx = get_worker_context(c, gni_req->dns_idx);
720 	scheduled_sleep(gni_req->scheduled, gni_req->earliest,
721 			worker_ctx);
722 	reload_resolv_conf(worker_ctx);
723 
724 	/*
725 	 * Take a shot at the final size, better to overestimate
726 	 * then realloc to a smaller size.
727 	 */
728 
729 	resp_octets = sizeof(*resp) + sizeof(*gni_resp) + octets;
730 	resp = emalloc_zero(resp_octets);
731 	gni_resp = (void *)((char *)resp + sizeof(*resp));
732 
733 	TRACE(2, ("blocking_getnameinfo given addr %s flags 0x%x hostlen %lu servlen %lu\n",
734 		  stoa(&gni_req->socku), gni_req->flags,
735 		  (u_long)gni_req->hostoctets, (u_long)gni_req->servoctets));
736 
737 	gni_resp->retcode = getnameinfo(&gni_req->socku.sa,
738 					SOCKLEN(&gni_req->socku),
739 					host,
740 					gni_req->hostoctets,
741 					service,
742 					gni_req->servoctets,
743 					gni_req->flags);
744 	gni_resp->retry = gni_req->retry;
745 #ifdef EAI_SYSTEM
746 	if (EAI_SYSTEM == gni_resp->retcode)
747 		gni_resp->gni_errno = errno;
748 #endif
749 
750 	if (0 != gni_resp->retcode) {
751 		gni_resp->hostoctets = 0;
752 		gni_resp->servoctets = 0;
753 	} else {
754 		gni_resp->hostoctets = strlen(host) + 1;
755 		gni_resp->servoctets = strlen(service) + 1;
756 		/*
757 		 * If this query succeeded only after retrying, DNS may have
758 		 * just become responsive.  Ignore previously-scheduled
759 		 * retry sleeps once for each pending request, similar to
760 		 * the way scheduled_sleep() does when its worker_sleep()
761 		 * is interrupted.
762 		 */
763 		if (gni_req->retry > INITIAL_DNS_RETRY) {
764 			time_now = time(NULL);
765 			worker_ctx->ignore_scheduled_before = time_now;
766 			TRACE(1, ("DNS success after retrying, ignoring sleeps scheduled before now (%s)\n",
767 				humantime(time_now)));
768 		}
769 	}
770 	octets = gni_resp->hostoctets + gni_resp->servoctets;
771 	/*
772 	 * Our response consists of a header, followed by the host and
773 	 * service strings, each null-terminated.
774 	 */
775 	resp_octets = sizeof(*resp) + sizeof(*gni_resp) + octets;
776 
777 	resp = erealloc(resp, resp_octets);
778 	gni_resp = (void *)(resp + 1);
779 
780 	gni_resp->octets = sizeof(*gni_resp) + octets;
781 
782 	/* cp serves as our current pointer while serializing */
783 	cp = (void *)(gni_resp + 1);
784 
785 	if (0 == gni_resp->retcode) {
786 		memcpy(cp, host, gni_resp->hostoctets);
787 		cp += gni_resp->hostoctets;
788 		memcpy(cp, service, gni_resp->servoctets);
789 		cp += gni_resp->servoctets;
790 	}
791 
792 	INSIST((size_t)(cp - (char *)resp) == resp_octets);
793 	INSIST(resp_octets - sizeof(*resp) == gni_resp->octets);
794 
795 	rc = queue_blocking_response(c, resp, resp_octets, req);
796 	if (rc)
797 		msyslog(LOG_ERR, "blocking_getnameinfo unable to queue response");
798 	return rc;
799 }
800 
801 
802 static void
803 getnameinfo_sometime_complete(
804 	blocking_work_req	rtype,
805 	void *			context,
806 	size_t			respsize,
807 	void *			resp
808 	)
809 {
810 	blocking_gni_req *	gni_req;
811 	blocking_gni_resp *	gni_resp;
812 	dnschild_ctx *		child_ctx;
813 	char *			host;
814 	char *			service;
815 	time_t			time_now;
816 	int			again;
817 
818 	gni_req = context;
819 	gni_resp = resp;
820 
821 	DEBUG_REQUIRE(BLOCKING_GETNAMEINFO == rtype);
822 	DEBUG_REQUIRE(respsize == gni_resp->octets);
823 
824 	child_ctx = dnschild_contexts[gni_req->dns_idx];
825 
826 	if (0 == gni_resp->retcode) {
827 		/*
828 		 * If this query succeeded only after retrying, DNS may have
829 		 * just become responsive.
830 		 */
831 		if (gni_resp->retry > INITIAL_DNS_RETRY) {
832 			time_now = time(NULL);
833 			child_ctx->next_dns_timeslot = time_now;
834 			TRACE(1, ("DNS success after retry, %u next_dns_timeslot reset (%s)\n",
835 				  gni_req->dns_idx, humantime(time_now)));
836 		}
837 	} else {
838 		again = should_retry_dns(gni_resp->retcode, gni_resp->gni_errno);
839 		/*
840 		 * exponential backoff of DNS retries to 64s
841 		 */
842 		if (gni_req->retry > 0)
843 			manage_dns_retry_interval(&gni_req->scheduled,
844 			    &gni_req->earliest, &gni_req->retry,
845 						  &child_ctx->next_dns_timeslot, FALSE);
846 
847 		if (gni_req->retry > 0 && again) {
848 			if (!queue_blocking_request(
849 				BLOCKING_GETNAMEINFO,
850 				gni_req,
851 				gni_req->octets,
852 				&getnameinfo_sometime_complete,
853 				gni_req))
854 				return;
855 
856 			msyslog(LOG_ERR, "unable to retry reverse lookup of %s", stoa(&gni_req->socku));
857 		}
858 	}
859 
860 	if (!gni_resp->hostoctets) {
861 		host = NULL;
862 		service = NULL;
863 	} else {
864 		host = (char *)gni_resp + sizeof(*gni_resp);
865 		service = (gni_resp->servoctets)
866 			      ? host + gni_resp->hostoctets
867 			      : NULL;
868 	}
869 
870 	(*gni_req->callback)(gni_resp->retcode, gni_resp->gni_errno,
871 			     &gni_req->socku, gni_req->flags, host,
872 			     service, gni_req->context);
873 
874 	free(gni_req);
875 	/* gni_resp is part of block freed by process_blocking_resp() */
876 }
877 
878 
879 #ifdef TEST_BLOCKING_WORKER
880 void gni_test_callback(int rescode, int gni_errno, sockaddr_u *psau, int flags, const char *host, const char *service, void *context)
881 {
882 	if (!rescode)
883 		TRACE(1, ("gni_test_callback got host '%s' serv '%s' for addr %s context %p\n",
884 			  host, service, stoa(psau), context));
885 	else
886 		TRACE(1, ("gni_test_callback context %p rescode %d gni_errno %d flags 0x%x addr %s\n",
887 			  context, rescode, gni_errno, flags, stoa(psau)));
888 }
889 #endif	/* TEST_BLOCKING_WORKER */
890 
891 
892 #ifdef HAVE_RES_INIT
893 static void
894 reload_resolv_conf(
895 	dnsworker_ctx *	worker_ctx
896 	)
897 {
898 	time_t	time_now;
899 
900 	/*
901 	 * This is ad-hoc.  Reload /etc/resolv.conf once per minute
902 	 * to pick up on changes from the DHCP client.  [Bug 1226]
903 	 * When using threads for the workers, this needs to happen
904 	 * only once per minute process-wide.
905 	 */
906 	time_now = time(NULL);
907 # ifdef WORK_THREAD
908 	worker_ctx->next_res_init = next_res_init;
909 # endif
910 	if (worker_ctx->next_res_init <= time_now) {
911 		if (worker_ctx->next_res_init != 0)
912 			res_init();
913 		worker_ctx->next_res_init = time_now + 60;
914 # ifdef WORK_THREAD
915 		next_res_init = worker_ctx->next_res_init;
916 # endif
917 	}
918 }
919 #endif	/* HAVE_RES_INIT */
920 
921 
922 static u_int
923 reserve_dnschild_ctx(void)
924 {
925 	const size_t	ps = sizeof(dnschild_contexts[0]);
926 	const size_t	cs = sizeof(*dnschild_contexts[0]);
927 	u_int		c;
928 	u_int		new_alloc;
929 	size_t		octets;
930 	size_t		new_octets;
931 
932 	c = 0;
933 	while (TRUE) {
934 		for ( ; c < dnschild_contexts_alloc; c++) {
935 			if (NULL == dnschild_contexts[c]) {
936 				dnschild_contexts[c] = emalloc_zero(cs);
937 
938 				return c;
939 			}
940 		}
941 		new_alloc = dnschild_contexts_alloc + 20;
942 		new_octets = new_alloc * ps;
943 		octets = dnschild_contexts_alloc * ps;
944 		dnschild_contexts = erealloc_zero(dnschild_contexts,
945 						  new_octets, octets);
946 		dnschild_contexts_alloc = new_alloc;
947 	}
948 }
949 
950 
951 static u_int
952 get_dnschild_ctx(void)
953 {
954 	static u_int	shared_ctx = UINT_MAX;
955 
956 	if (worker_per_query)
957 		return reserve_dnschild_ctx();
958 
959 	if (UINT_MAX == shared_ctx)
960 		shared_ctx = reserve_dnschild_ctx();
961 
962 	return shared_ctx;
963 }
964 
965 
966 static dnsworker_ctx *
967 get_worker_context(
968 	blocking_child *	c,
969 	u_int			idx
970 	)
971 {
972 	u_int		min_new_alloc;
973 	u_int		new_alloc;
974 	size_t		octets;
975 	size_t		new_octets;
976 	dnsworker_ctx *	retv;
977 
978 	worker_global_lock(TRUE);
979 
980 	if (dnsworker_contexts_alloc <= idx) {
981 		min_new_alloc = 1 + idx;
982 		/* round new_alloc up to nearest multiple of 4 */
983 		new_alloc = (min_new_alloc + 4) & ~(4 - 1);
984 		new_octets = new_alloc * sizeof(dnsworker_ctx*);
985 		octets = dnsworker_contexts_alloc * sizeof(dnsworker_ctx*);
986 		dnsworker_contexts = erealloc_zero(dnsworker_contexts,
987 						   new_octets, octets);
988 		dnsworker_contexts_alloc = new_alloc;
989 		retv = emalloc_zero(sizeof(dnsworker_ctx));
990 		dnsworker_contexts[idx] = retv;
991 	} else if (NULL == (retv = dnsworker_contexts[idx])) {
992 		retv = emalloc_zero(sizeof(dnsworker_ctx));
993 		dnsworker_contexts[idx] = retv;
994 	}
995 
996 	worker_global_lock(FALSE);
997 
998 	ZERO(*retv);
999 	retv->c = c;
1000 	return retv;
1001 }
1002 
1003 
1004 static void
1005 scheduled_sleep(
1006 	time_t		scheduled,
1007 	time_t		earliest,
1008 	dnsworker_ctx *	worker_ctx
1009 	)
1010 {
1011 	time_t now;
1012 
1013 	if (scheduled < worker_ctx->ignore_scheduled_before) {
1014 		TRACE(1, ("ignoring sleep until %s scheduled at %s (before %s)\n",
1015 			  humantime(earliest), humantime(scheduled),
1016 			  humantime(worker_ctx->ignore_scheduled_before)));
1017 		return;
1018 	}
1019 
1020 	now = time(NULL);
1021 
1022 	if (now < earliest) {
1023 		TRACE(1, ("sleep until %s scheduled at %s (>= %s)\n",
1024 			  humantime(earliest), humantime(scheduled),
1025 			  humantime(worker_ctx->ignore_scheduled_before)));
1026 		if (-1 == worker_sleep(worker_ctx->c, earliest - now)) {
1027 			/* our sleep was interrupted */
1028 			now = time(NULL);
1029 			worker_ctx->ignore_scheduled_before = now;
1030 #ifdef HAVE_RES_INIT
1031 			worker_ctx->next_res_init = now + 60;
1032 			next_res_init = worker_ctx->next_res_init;
1033 			res_init();
1034 #endif
1035 			TRACE(1, ("sleep interrupted by daemon, ignoring sleeps scheduled before now (%s)\n",
1036 				  humantime(worker_ctx->ignore_scheduled_before)));
1037 		}
1038 	}
1039 }
1040 
1041 
1042 /*
1043  * manage_dns_retry_interval is a helper used by
1044  * getaddrinfo_sometime_complete and getnameinfo_sometime_complete
1045  * to calculate the new retry interval and schedule the next query.
1046  */
1047 static void
1048 manage_dns_retry_interval(
1049 	time_t *	pscheduled,
1050 	time_t *	pwhen,
1051 	int *		pretry,
1052 	time_t *	pnext_timeslot,
1053 	int		forever
1054 	)
1055 {
1056 	time_t	now;
1057 	time_t	when;
1058 	int	retry;
1059 	int	retmax;
1060 
1061 	now = time(NULL);
1062 	retry = *pretry;
1063 	when = max(now + retry, *pnext_timeslot);
1064 	*pnext_timeslot = when;
1065 
1066 	/* this exponential backoff is slower than doubling up: The
1067 	 * sequence goes 2-3-4-6-8-12-16-24-32... and the upper limit is
1068 	 * 64 seconds for things that should not repeat forever, and
1069 	 * 1024 when repeated forever.
1070 	 */
1071 	retmax = forever ? 1024 : 64;
1072 	retry <<= 1;
1073 	if (retry & (retry - 1))
1074 		retry &= (retry - 1);
1075 	else
1076 		retry -= (retry >> 2);
1077 	retry = min(retmax, retry);
1078 
1079 	*pscheduled = now;
1080 	*pwhen = when;
1081 	*pretry = retry;
1082 }
1083 
1084 /*
1085  * should_retry_dns is a helper used by getaddrinfo_sometime_complete
1086  * and getnameinfo_sometime_complete which implements ntpd's DNS retry
1087  * policy.
1088  */
1089 static int
1090 should_retry_dns(
1091 	int	rescode,
1092 	int	res_errno
1093 	)
1094 {
1095 	static int	eai_again_seen;
1096 	int		again;
1097 #if defined (EAI_SYSTEM) && defined(DEBUG)
1098 	char		msg[256];
1099 #endif
1100 
1101 	/*
1102 	 * If the resolver failed, see if the failure is
1103 	 * temporary. If so, return success.
1104 	 */
1105 	again = 0;
1106 
1107 	switch (rescode) {
1108 
1109 	case EAI_FAIL:
1110 		again = 1;
1111 		break;
1112 
1113 	case EAI_AGAIN:
1114 		again = 1;
1115 		eai_again_seen = 1;		/* [Bug 1178] */
1116 		break;
1117 
1118 	case EAI_NONAME:
1119 #if defined(EAI_NODATA) && (EAI_NODATA != EAI_NONAME)
1120 	case EAI_NODATA:
1121 #endif
1122 		again = !eai_again_seen;	/* [Bug 1178] */
1123 		break;
1124 
1125 #ifdef EAI_SYSTEM
1126 	case EAI_SYSTEM:
1127 		/*
1128 		 * EAI_SYSTEM means the real error is in errno.  We should be more
1129 		 * discriminating about which errno values require retrying, but
1130 		 * this matches existing behavior.
1131 		 */
1132 		again = 1;
1133 # ifdef DEBUG
1134 		errno_to_str(res_errno, msg, sizeof(msg));
1135 		TRACE(1, ("intres: EAI_SYSTEM errno %d (%s) means try again, right?\n",
1136 			  res_errno, msg));
1137 # endif
1138 		break;
1139 #endif
1140 	}
1141 
1142 	TRACE(2, ("intres: resolver returned: %s (%d), %sretrying\n",
1143 		  gai_strerror(rescode), rescode, again ? "" : "not "));
1144 
1145 	return again;
1146 }
1147 
1148 #else	/* !WORKER follows */
1149 int ntp_intres_nonempty_compilation_unit;
1150 #endif
1151