xref: /netbsd-src/external/bsd/ntp/dist/libntp/ntp_intres.c (revision 53b02e147d4ed531c0d2a5ca9b3e8026ba3e99b5)
1 /*	$NetBSD: ntp_intres.c,v 1.12 2020/05/25 20:47:24 christos Exp $	*/
2 
3 /*
4  * ntp_intres.c - Implements a generic blocking worker child or thread,
5  *		  initially to provide a nonblocking solution for DNS
6  *		  name to address lookups available with getaddrinfo().
7  *
8  * This is a new implementation as of 2009 sharing the filename and
9  * very little else with the prior implementation, which used a
10  * temporary file to receive a single set of requests from the parent,
11  * and a NTP mode 7 authenticated request to push back responses.
12  *
13  * A primary goal in rewriting this code was the need to support the
14  * pool configuration directive's requirement to retrieve multiple
15  * addresses resolving a single name, which has previously been
16  * satisfied with blocking resolver calls from the ntpd mainline code.
17  *
18  * A secondary goal is to provide a generic mechanism for other
19  * blocking operations to be delegated to a worker using a common
20  * model for both Unix and Windows ntpd.  ntp_worker.c, work_fork.c,
21  * and work_thread.c implement the generic mechanism.  This file
22  * implements the two current consumers, getaddrinfo_sometime() and the
23  * presently unused getnameinfo_sometime().
24  *
25  * Both routines deliver results to a callback and manage memory
26  * allocation, meaning there is no freeaddrinfo_sometime().
27  *
28  * The initial implementation for Unix uses a pair of unidirectional
29  * pipes, one each for requests and responses, connecting the forked
30  * blocking child worker with the ntpd mainline.  The threaded code
31  * uses arrays of pointers to queue requests and responses.
32  *
33  * The parent drives the process, including scheduling sleeps between
34  * retries.
35  *
36  * Memory is managed differently for a child process, which mallocs
37  * request buffers to read from the pipe into, whereas the threaded
38  * code mallocs a copy of the request to hand off to the worker via
39  * the queueing array.  The resulting request buffer is free()d by
40  * platform-independent code.  A wrinkle is the request needs to be
41  * available to the requestor during response processing.
42  *
43  * Response memory allocation is also platform-dependent.  With a
44  * separate process and pipes, the response is free()d after being
45  * written to the pipe.  With threads, the same memory is handed
46  * over and the requestor frees it after processing is completed.
47  *
48  * The code should be generalized to support threads on Unix using
49  * much of the same code used for Windows initially.
50  *
51  */
52 #ifdef HAVE_CONFIG_H
53 # include <config.h>
54 #endif
55 
56 #include "ntp_workimpl.h"
57 
58 #ifdef WORKER
59 
60 #include <stdio.h>
61 #include <ctype.h>
62 #include <signal.h>
63 
64 /**/
65 #ifdef HAVE_SYS_TYPES_H
66 # include <sys/types.h>
67 #endif
68 #ifdef HAVE_NETINET_IN_H
69 #include <netinet/in.h>
70 #endif
71 #include <arpa/inet.h>
72 /**/
73 #ifdef HAVE_SYS_PARAM_H
74 # include <sys/param.h>
75 #endif
76 
77 #if !defined(HAVE_RES_INIT) && defined(HAVE___RES_INIT)
78 # define HAVE_RES_INIT
79 #endif
80 
81 #if defined(HAVE_RESOLV_H) && defined(HAVE_RES_INIT)
82 # ifdef HAVE_ARPA_NAMESER_H
83 #  include <arpa/nameser.h> /* DNS HEADER struct */
84 # endif
85 # ifdef HAVE_NETDB_H
86 #  include <netdb.h>
87 # endif
88 # include <resolv.h>
89 # ifdef HAVE_INT32_ONLY_WITH_DNS
90 #  define HAVE_INT32
91 # endif
92 # ifdef HAVE_U_INT32_ONLY_WITH_DNS
93 #  define HAVE_U_INT32
94 # endif
95 #endif
96 
97 #include "ntp.h"
98 #include "ntp_debug.h"
99 #include "ntp_malloc.h"
100 #include "ntp_syslog.h"
101 #include "ntp_unixtime.h"
102 #include "ntp_intres.h"
103 #include "intreswork.h"
104 
105 
106 /*
107  * Following are implementations of getaddrinfo_sometime() and
108  * getnameinfo_sometime().  Each is implemented in three routines:
109  *
110  * getaddrinfo_sometime()		getnameinfo_sometime()
111  * blocking_getaddrinfo()		blocking_getnameinfo()
112  * getaddrinfo_sometime_complete()	getnameinfo_sometime_complete()
113  *
114  * The first runs in the parent and marshalls (or serializes) request
115  * parameters into a request blob which is processed in the child by
116  * the second routine, blocking_*(), which serializes the results into
117  * a response blob unpacked by the third routine, *_complete(), which
118  * calls the callback routine provided with the request and frees
119  * _request_ memory allocated by the first routine.  Response memory
120  * is managed by the code which calls the *_complete routines.
121  */
122 
123 
124 /* === typedefs === */
125 typedef struct blocking_gai_req_tag {	/* marshalled args */
126 	size_t			octets;
127 	u_int			dns_idx;
128 	time_t			scheduled;
129 	time_t			earliest;
130 	int			retry;
131 	struct addrinfo		hints;
132 	u_int			qflags;
133 	gai_sometime_callback	callback;
134 	void *			context;
135 	size_t			nodesize;
136 	size_t			servsize;
137 } blocking_gai_req;
138 
139 typedef struct blocking_gai_resp_tag {
140 	size_t			octets;
141 	int			retcode;
142 	int			retry;
143 	int			gai_errno; /* for EAI_SYSTEM case */
144 	int			ai_count;
145 	/*
146 	 * Followed by ai_count struct addrinfo and then ai_count
147 	 * sockaddr_u and finally the canonical name strings.
148 	 */
149 } blocking_gai_resp;
150 
151 typedef struct blocking_gni_req_tag {
152 	size_t			octets;
153 	u_int			dns_idx;
154 	time_t			scheduled;
155 	time_t			earliest;
156 	int			retry;
157 	size_t			hostoctets;
158 	size_t			servoctets;
159 	int			flags;
160 	gni_sometime_callback	callback;
161 	void *			context;
162 	sockaddr_u		socku;
163 } blocking_gni_req;
164 
165 typedef struct blocking_gni_resp_tag {
166 	size_t			octets;
167 	int			retcode;
168 	int			gni_errno; /* for EAI_SYSTEM case */
169 	int			retry;
170 	size_t			hostoctets;
171 	size_t			servoctets;
172 	/*
173 	 * Followed by hostoctets bytes of null-terminated host,
174 	 * then servoctets bytes of null-terminated service.
175 	 */
176 } blocking_gni_resp;
177 
178 /* per-DNS-worker state in parent */
179 typedef struct dnschild_ctx_tag {
180 	u_int	index;
181 	time_t	next_dns_timeslot;
182 } dnschild_ctx;
183 
184 /* per-DNS-worker state in worker */
185 typedef struct dnsworker_ctx_tag {
186 	blocking_child *	c;
187 	time_t			ignore_scheduled_before;
188 #ifdef HAVE_RES_INIT
189 	time_t	next_res_init;
190 #endif
191 } dnsworker_ctx;
192 
193 
194 /* === variables === */
195 dnschild_ctx **		dnschild_contexts;		/* parent */
196 u_int			dnschild_contexts_alloc;
197 dnsworker_ctx **	dnsworker_contexts;		/* child */
198 u_int			dnsworker_contexts_alloc;
199 
200 #ifdef HAVE_RES_INIT
201 static	time_t		next_res_init;
202 #endif
203 
204 
205 /* === forward declarations === */
206 static	u_int		reserve_dnschild_ctx(void);
207 static	u_int		get_dnschild_ctx(void);
208 static	dnsworker_ctx *	get_worker_context(blocking_child *, u_int);
209 static	void		scheduled_sleep(time_t, time_t,
210 					dnsworker_ctx *);
211 static	void		manage_dns_retry_interval(time_t *, time_t *,
212 						  int *, time_t *,
213 						  int/*BOOL*/);
214 static	int		should_retry_dns(int, int);
215 #ifdef HAVE_RES_INIT
216 static	void		reload_resolv_conf(dnsworker_ctx *);
217 #else
218 # define		reload_resolv_conf(wc)		\
219 	do {						\
220 		(void)(wc);				\
221 	} while (FALSE)
222 #endif
223 static	void		getaddrinfo_sometime_complete(blocking_work_req,
224 						      void *, size_t,
225 						      void *);
226 static	void		getnameinfo_sometime_complete(blocking_work_req,
227 						      void *, size_t,
228 						      void *);
229 
230 
231 /* === functions === */
232 /*
233  * getaddrinfo_sometime - uses blocking child to call getaddrinfo then
234  *			  invokes provided callback completion function.
235  */
236 int
237 getaddrinfo_sometime_ex(
238 	const char *		node,
239 	const char *		service,
240 	const struct addrinfo *	hints,
241 	int			retry,
242 	gai_sometime_callback	callback,
243 	void *			context,
244 	u_int			qflags
245 	)
246 {
247 	blocking_gai_req *	gai_req;
248 	u_int			idx;
249 	dnschild_ctx *		child_ctx;
250 	size_t			req_size;
251 	size_t			nodesize;
252 	size_t			servsize;
253 	time_t			now;
254 
255 	REQUIRE(NULL != node);
256 	if (NULL != hints) {
257 		REQUIRE(0 == hints->ai_addrlen);
258 		REQUIRE(NULL == hints->ai_addr);
259 		REQUIRE(NULL == hints->ai_canonname);
260 		REQUIRE(NULL == hints->ai_next);
261 	}
262 
263 	idx = get_dnschild_ctx();
264 	child_ctx = dnschild_contexts[idx];
265 
266 	nodesize = strlen(node) + 1;
267 	servsize = strlen(service) + 1;
268 	req_size = sizeof(*gai_req) + nodesize + servsize;
269 
270 	gai_req = emalloc_zero(req_size);
271 
272 	gai_req->octets = req_size;
273 	gai_req->dns_idx = idx;
274 	now = time(NULL);
275 	gai_req->scheduled = now;
276 	gai_req->earliest = max(now, child_ctx->next_dns_timeslot);
277 	child_ctx->next_dns_timeslot = gai_req->earliest;
278 	if (hints != NULL)
279 		gai_req->hints = *hints;
280 	gai_req->retry = retry;
281 	gai_req->callback = callback;
282 	gai_req->context = context;
283 	gai_req->nodesize = nodesize;
284 	gai_req->servsize = servsize;
285 	gai_req->qflags = qflags;
286 
287 	memcpy((char *)gai_req + sizeof(*gai_req), node, nodesize);
288 	memcpy((char *)gai_req + sizeof(*gai_req) + nodesize, service,
289 	       servsize);
290 
291 	if (queue_blocking_request(
292 		BLOCKING_GETADDRINFO,
293 		gai_req,
294 		req_size,
295 		&getaddrinfo_sometime_complete,
296 		gai_req)) {
297 
298 		msyslog(LOG_ERR, "unable to queue getaddrinfo request");
299 		errno = EFAULT;
300 		return -1;
301 	}
302 
303 	return 0;
304 }
305 
306 int
307 blocking_getaddrinfo(
308 	blocking_child *	c,
309 	blocking_pipe_header *	req
310 	)
311 {
312 	blocking_gai_req *	gai_req;
313 	dnsworker_ctx *		worker_ctx;
314 	blocking_pipe_header *	resp;
315 	blocking_gai_resp *	gai_resp;
316 	char *			node;
317 	char *			service;
318 	struct addrinfo *	ai_res;
319 	struct addrinfo *	ai;
320 	struct addrinfo *	serialized_ai;
321 	size_t			canons_octets;
322 	size_t			this_octets;
323 	size_t			resp_octets;
324 	char *			cp;
325 	time_t			time_now;
326 
327 	gai_req = (void *)((char *)req + sizeof(*req));
328 	node = (char *)gai_req + sizeof(*gai_req);
329 	service = node + gai_req->nodesize;
330 
331 	worker_ctx = get_worker_context(c, gai_req->dns_idx);
332 	scheduled_sleep(gai_req->scheduled, gai_req->earliest,
333 			worker_ctx);
334 	reload_resolv_conf(worker_ctx);
335 
336 	/*
337 	 * Take a shot at the final size, better to overestimate
338 	 * at first and then realloc to a smaller size.
339 	 */
340 
341 	resp_octets = sizeof(*resp) + sizeof(*gai_resp) +
342 		      16 * (sizeof(struct addrinfo) +
343 			    sizeof(sockaddr_u)) +
344 		      256;
345 	resp = emalloc_zero(resp_octets);
346 	gai_resp = (void *)(resp + 1);
347 
348 	TRACE(2, ("blocking_getaddrinfo given node %s serv %s fam %d flags %x\n",
349 		  node, service, gai_req->hints.ai_family,
350 		  gai_req->hints.ai_flags));
351 #ifdef DEBUG
352 	if (debug >= 2)
353 		fflush(stdout);
354 #endif
355 	ai_res = NULL;
356 	gai_resp->retcode = getaddrinfo(node, service, &gai_req->hints,
357 					&ai_res);
358 	gai_resp->retry = gai_req->retry;
359 #ifdef EAI_SYSTEM
360 	if (EAI_SYSTEM == gai_resp->retcode)
361 		gai_resp->gai_errno = errno;
362 #endif
363 	canons_octets = 0;
364 
365 	if (0 == gai_resp->retcode) {
366 		ai = ai_res;
367 		while (NULL != ai) {
368 			gai_resp->ai_count++;
369 			if (ai->ai_canonname)
370 				canons_octets += strlen(ai->ai_canonname) + 1;
371 			ai = ai->ai_next;
372 		}
373 		/*
374 		 * If this query succeeded only after retrying, DNS may have
375 		 * just become responsive.  Ignore previously-scheduled
376 		 * retry sleeps once for each pending request, similar to
377 		 * the way scheduled_sleep() does when its worker_sleep()
378 		 * is interrupted.
379 		 */
380 		if (gai_resp->retry > INITIAL_DNS_RETRY) {
381 			time_now = time(NULL);
382 			worker_ctx->ignore_scheduled_before = time_now;
383 			TRACE(1, ("DNS success after retry, ignoring sleeps scheduled before now (%s)\n",
384 				  humantime(time_now)));
385 		}
386 	}
387 
388 	/*
389 	 * Our response consists of a header, followed by ai_count
390 	 * addrinfo structs followed by ai_count sockaddr_storage
391 	 * structs followed by the canonical names.
392 	 */
393 	gai_resp->octets = sizeof(*gai_resp)
394 			    + gai_resp->ai_count
395 				* (sizeof(gai_req->hints)
396 				   + sizeof(sockaddr_u))
397 			    + canons_octets;
398 
399 	resp_octets = sizeof(*resp) + gai_resp->octets;
400 	resp = erealloc(resp, resp_octets);
401 	gai_resp = (void *)(resp + 1);
402 
403 	/* cp serves as our current pointer while serializing */
404 	cp = (void *)(gai_resp + 1);
405 	canons_octets = 0;
406 
407 	if (0 == gai_resp->retcode) {
408 		ai = ai_res;
409 		while (NULL != ai) {
410 			memcpy(cp, ai, sizeof(*ai));
411 			serialized_ai = (void *)cp;
412 			cp += sizeof(*ai);
413 
414 			/* transform ai_canonname into offset */
415 			if (NULL != ai->ai_canonname) {
416 				serialized_ai->ai_canonname = (char *)canons_octets;
417 				canons_octets += strlen(ai->ai_canonname) + 1;
418 			}
419 
420 			/* leave fixup of ai_addr pointer for receiver */
421 
422 			ai = ai->ai_next;
423 		}
424 
425 		ai = ai_res;
426 		while (NULL != ai) {
427 			INSIST(ai->ai_addrlen <= sizeof(sockaddr_u));
428 			memcpy(cp, ai->ai_addr, ai->ai_addrlen);
429 			cp += sizeof(sockaddr_u);
430 
431 			ai = ai->ai_next;
432 		}
433 
434 		ai = ai_res;
435 		while (NULL != ai) {
436 			if (NULL != ai->ai_canonname) {
437 				this_octets = strlen(ai->ai_canonname) + 1;
438 				memcpy(cp, ai->ai_canonname, this_octets);
439 				cp += this_octets;
440 			}
441 
442 			ai = ai->ai_next;
443 		}
444 		freeaddrinfo(ai_res);
445 	}
446 
447 	/*
448 	 * make sure our walk and earlier calc match
449 	 */
450 	DEBUG_INSIST((size_t)(cp - (char *)resp) == resp_octets);
451 
452 	if (queue_blocking_response(c, resp, resp_octets, req)) {
453 		msyslog(LOG_ERR, "blocking_getaddrinfo can not queue response");
454 		return -1;
455 	}
456 
457 	return 0;
458 }
459 
460 int
461 getaddrinfo_sometime(
462 	const char *		node,
463 	const char *		service,
464 	const struct addrinfo *	hints,
465 	int			retry,
466 	gai_sometime_callback	callback,
467 	void *			context
468 	)
469 {
470 	return getaddrinfo_sometime_ex(node, service, hints, retry,
471 				       callback, context, 0);
472 }
473 
474 
475 static void
476 getaddrinfo_sometime_complete(
477 	blocking_work_req	rtype,
478 	void *			context,
479 	size_t			respsize,
480 	void *			resp
481 	)
482 {
483 	blocking_gai_req *	gai_req;
484 	blocking_gai_resp *	gai_resp;
485 	dnschild_ctx *		child_ctx;
486 	struct addrinfo *	ai;
487 	struct addrinfo *	next_ai;
488 	sockaddr_u *		psau;
489 	char *			node;
490 	char *			service;
491 	char *			canon_start;
492 	time_t			time_now;
493 	int			again, noerr;
494 	int			af;
495 	const char *		fam_spec;
496 	int			i;
497 
498 	gai_req = context;
499 	gai_resp = resp;
500 
501 	DEBUG_REQUIRE(BLOCKING_GETADDRINFO == rtype);
502 	DEBUG_REQUIRE(respsize == gai_resp->octets);
503 
504 	node = (char *)gai_req + sizeof(*gai_req);
505 	service = node + gai_req->nodesize;
506 
507 	child_ctx = dnschild_contexts[gai_req->dns_idx];
508 
509 	if (0 == gai_resp->retcode) {
510 		/*
511 		 * If this query succeeded only after retrying, DNS may have
512 		 * just become responsive.
513 		 */
514 		if (gai_resp->retry > INITIAL_DNS_RETRY) {
515 			time_now = time(NULL);
516 			child_ctx->next_dns_timeslot = time_now;
517 			TRACE(1, ("DNS success after retry, %u next_dns_timeslot reset (%s)\n",
518 				  gai_req->dns_idx, humantime(time_now)));
519 		}
520 	} else {
521 		noerr = !!(gai_req->qflags & GAIR_F_IGNDNSERR);
522 		again = noerr || should_retry_dns(
523 					gai_resp->retcode, gai_resp->gai_errno);
524 		/*
525 		 * exponential backoff of DNS retries to 64s
526 		 */
527 		if (gai_req->retry > 0 && again) {
528 			/* log the first retry only */
529 			if (INITIAL_DNS_RETRY == gai_req->retry)
530 				NLOG(NLOG_SYSINFO) {
531 					af = gai_req->hints.ai_family;
532 					fam_spec = (AF_INET6 == af)
533 						       ? " (AAAA)"
534 						       : (AF_INET == af)
535 							     ? " (A)"
536 							     : "";
537 #ifdef EAI_SYSTEM
538 					if (EAI_SYSTEM == gai_resp->retcode) {
539 						errno = gai_resp->gai_errno;
540 						msyslog(LOG_INFO,
541 							"retrying DNS %s%s: EAI_SYSTEM %d: %m",
542 							node, fam_spec,
543 							gai_resp->gai_errno);
544 					} else
545 #endif
546 						msyslog(LOG_INFO,
547 							"retrying DNS %s%s: %s (%d)",
548 							node, fam_spec,
549 							gai_strerror(gai_resp->retcode),
550 							gai_resp->retcode);
551 				}
552 			manage_dns_retry_interval(
553 				&gai_req->scheduled, &gai_req->earliest,
554 				&gai_req->retry, &child_ctx->next_dns_timeslot,
555 				noerr);
556 			if (!queue_blocking_request(
557 					BLOCKING_GETADDRINFO,
558 					gai_req,
559 					gai_req->octets,
560 					&getaddrinfo_sometime_complete,
561 					gai_req))
562 				return;
563 			else
564 				msyslog(LOG_ERR,
565 					"unable to retry hostname %s",
566 					node);
567 		}
568 	}
569 
570 	/*
571 	 * fixup pointers in returned addrinfo array
572 	 */
573 	ai = (void *)((char *)gai_resp + sizeof(*gai_resp));
574 	next_ai = NULL;
575 	for (i = gai_resp->ai_count - 1; i >= 0; i--) {
576 		ai[i].ai_next = next_ai;
577 		next_ai = &ai[i];
578 	}
579 
580 	psau = (void *)((char *)ai + gai_resp->ai_count * sizeof(*ai));
581 	canon_start = (char *)psau + gai_resp->ai_count * sizeof(*psau);
582 
583 	for (i = 0; i < gai_resp->ai_count; i++) {
584 		if (NULL != ai[i].ai_addr)
585 			ai[i].ai_addr = &psau->sa;
586 		psau++;
587 		if (NULL != ai[i].ai_canonname)
588 			ai[i].ai_canonname += (size_t)canon_start;
589 	}
590 
591 	ENSURE((char *)psau == canon_start);
592 
593 	if (!gai_resp->ai_count)
594 		ai = NULL;
595 
596 	(*gai_req->callback)(gai_resp->retcode, gai_resp->gai_errno,
597 			     gai_req->context, node, service,
598 			     &gai_req->hints, ai);
599 
600 	free(gai_req);
601 	/* gai_resp is part of block freed by process_blocking_resp() */
602 }
603 
604 
605 #ifdef TEST_BLOCKING_WORKER
606 void gai_test_callback(int rescode, int gai_errno, void *context, const char *name, const char *service, const struct addrinfo *hints, const struct addrinfo *ai_res)
607 {
608 	sockaddr_u addr;
609 
610 	if (rescode) {
611 		TRACE(1, ("gai_test_callback context %p error rescode %d %s serv %s\n",
612 			  context, rescode, name, service));
613 		return;
614 	}
615 	while (!rescode && NULL != ai_res) {
616 		ZERO_SOCK(&addr);
617 		memcpy(&addr, ai_res->ai_addr, ai_res->ai_addrlen);
618 		TRACE(1, ("ctx %p fam %d addr %s canon '%s' type %s at %p ai_addr %p ai_next %p\n",
619 			  context,
620 			  AF(&addr),
621 			  stoa(&addr),
622 			  (ai_res->ai_canonname)
623 			      ? ai_res->ai_canonname
624 			      : "",
625 			  (SOCK_DGRAM == ai_res->ai_socktype)
626 			      ? "DGRAM"
627 			      : (SOCK_STREAM == ai_res->ai_socktype)
628 				    ? "STREAM"
629 				    : "(other)",
630 			  ai_res,
631 			  ai_res->ai_addr,
632 			  ai_res->ai_next));
633 
634 		getnameinfo_sometime((sockaddr_u *)ai_res->ai_addr, 128, 32, 0, gni_test_callback, context);
635 
636 		ai_res = ai_res->ai_next;
637 	}
638 }
639 #endif	/* TEST_BLOCKING_WORKER */
640 
641 
642 int
643 getnameinfo_sometime(
644 	sockaddr_u *		psau,
645 	size_t			hostoctets,
646 	size_t			servoctets,
647 	int			flags,
648 	gni_sometime_callback	callback,
649 	void *			context
650 	)
651 {
652 	blocking_gni_req *	gni_req;
653 	u_int			idx;
654 	dnschild_ctx *		child_ctx;
655 	time_t			time_now;
656 
657 	REQUIRE(hostoctets);
658 	REQUIRE(hostoctets + servoctets < 1024);
659 
660 	idx = get_dnschild_ctx();
661 	child_ctx = dnschild_contexts[idx];
662 
663 	gni_req = emalloc_zero(sizeof(*gni_req));
664 
665 	gni_req->octets = sizeof(*gni_req);
666 	gni_req->dns_idx = idx;
667 	time_now = time(NULL);
668 	gni_req->scheduled = time_now;
669 	gni_req->earliest = max(time_now, child_ctx->next_dns_timeslot);
670 	child_ctx->next_dns_timeslot = gni_req->earliest;
671 	memcpy(&gni_req->socku, psau, SOCKLEN(psau));
672 	gni_req->hostoctets = hostoctets;
673 	gni_req->servoctets = servoctets;
674 	gni_req->flags = flags;
675 	gni_req->retry = INITIAL_DNS_RETRY;
676 	gni_req->callback = callback;
677 	gni_req->context = context;
678 
679 	if (queue_blocking_request(
680 		BLOCKING_GETNAMEINFO,
681 		gni_req,
682 		sizeof(*gni_req),
683 		&getnameinfo_sometime_complete,
684 		gni_req)) {
685 
686 		msyslog(LOG_ERR, "unable to queue getnameinfo request");
687 		errno = EFAULT;
688 		return -1;
689 	}
690 
691 	return 0;
692 }
693 
694 
695 int
696 blocking_getnameinfo(
697 	blocking_child *	c,
698 	blocking_pipe_header *	req
699 	)
700 {
701 	blocking_gni_req *	gni_req;
702 	dnsworker_ctx *		worker_ctx;
703 	blocking_pipe_header *	resp;
704 	blocking_gni_resp *	gni_resp;
705 	size_t			octets;
706 	size_t			resp_octets;
707 	char *			service;
708 	char *			cp;
709 	int			rc;
710 	time_t			time_now;
711 	char			host[1024];
712 
713 	gni_req = (void *)((char *)req + sizeof(*req));
714 
715 	octets = gni_req->hostoctets + gni_req->servoctets;
716 
717 	/*
718 	 * Some alloca() implementations are fragile regarding
719 	 * large allocations.  We only need room for the host
720 	 * and service names.
721 	 */
722 	REQUIRE(octets < sizeof(host));
723 	service = host + gni_req->hostoctets;
724 
725 	worker_ctx = get_worker_context(c, gni_req->dns_idx);
726 	scheduled_sleep(gni_req->scheduled, gni_req->earliest,
727 			worker_ctx);
728 	reload_resolv_conf(worker_ctx);
729 
730 	/*
731 	 * Take a shot at the final size, better to overestimate
732 	 * then realloc to a smaller size.
733 	 */
734 
735 	resp_octets = sizeof(*resp) + sizeof(*gni_resp) + octets;
736 	resp = emalloc_zero(resp_octets);
737 	gni_resp = (void *)((char *)resp + sizeof(*resp));
738 
739 	TRACE(2, ("blocking_getnameinfo given addr %s flags 0x%x hostlen %lu servlen %lu\n",
740 		  stoa(&gni_req->socku), gni_req->flags,
741 		  (u_long)gni_req->hostoctets, (u_long)gni_req->servoctets));
742 
743 	gni_resp->retcode = getnameinfo(&gni_req->socku.sa,
744 					SOCKLEN(&gni_req->socku),
745 					host,
746 					gni_req->hostoctets,
747 					service,
748 					gni_req->servoctets,
749 					gni_req->flags);
750 	gni_resp->retry = gni_req->retry;
751 #ifdef EAI_SYSTEM
752 	if (EAI_SYSTEM == gni_resp->retcode)
753 		gni_resp->gni_errno = errno;
754 #endif
755 
756 	if (0 != gni_resp->retcode) {
757 		gni_resp->hostoctets = 0;
758 		gni_resp->servoctets = 0;
759 	} else {
760 		gni_resp->hostoctets = strlen(host) + 1;
761 		gni_resp->servoctets = strlen(service) + 1;
762 		/*
763 		 * If this query succeeded only after retrying, DNS may have
764 		 * just become responsive.  Ignore previously-scheduled
765 		 * retry sleeps once for each pending request, similar to
766 		 * the way scheduled_sleep() does when its worker_sleep()
767 		 * is interrupted.
768 		 */
769 		if (gni_req->retry > INITIAL_DNS_RETRY) {
770 			time_now = time(NULL);
771 			worker_ctx->ignore_scheduled_before = time_now;
772 			TRACE(1, ("DNS success after retrying, ignoring sleeps scheduled before now (%s)\n",
773 				humantime(time_now)));
774 		}
775 	}
776 	octets = gni_resp->hostoctets + gni_resp->servoctets;
777 	/*
778 	 * Our response consists of a header, followed by the host and
779 	 * service strings, each null-terminated.
780 	 */
781 	resp_octets = sizeof(*resp) + sizeof(*gni_resp) + octets;
782 
783 	resp = erealloc(resp, resp_octets);
784 	gni_resp = (void *)(resp + 1);
785 
786 	gni_resp->octets = sizeof(*gni_resp) + octets;
787 
788 	/* cp serves as our current pointer while serializing */
789 	cp = (void *)(gni_resp + 1);
790 
791 	if (0 == gni_resp->retcode) {
792 		memcpy(cp, host, gni_resp->hostoctets);
793 		cp += gni_resp->hostoctets;
794 		memcpy(cp, service, gni_resp->servoctets);
795 		cp += gni_resp->servoctets;
796 	}
797 
798 	INSIST((size_t)(cp - (char *)resp) == resp_octets);
799 	INSIST(resp_octets - sizeof(*resp) == gni_resp->octets);
800 
801 	rc = queue_blocking_response(c, resp, resp_octets, req);
802 	if (rc)
803 		msyslog(LOG_ERR, "blocking_getnameinfo unable to queue response");
804 	return rc;
805 }
806 
807 
808 static void
809 getnameinfo_sometime_complete(
810 	blocking_work_req	rtype,
811 	void *			context,
812 	size_t			respsize,
813 	void *			resp
814 	)
815 {
816 	blocking_gni_req *	gni_req;
817 	blocking_gni_resp *	gni_resp;
818 	dnschild_ctx *		child_ctx;
819 	char *			host;
820 	char *			service;
821 	time_t			time_now;
822 	int			again;
823 
824 	gni_req = context;
825 	gni_resp = resp;
826 
827 	DEBUG_REQUIRE(BLOCKING_GETNAMEINFO == rtype);
828 	DEBUG_REQUIRE(respsize == gni_resp->octets);
829 
830 	child_ctx = dnschild_contexts[gni_req->dns_idx];
831 
832 	if (0 == gni_resp->retcode) {
833 		/*
834 		 * If this query succeeded only after retrying, DNS may have
835 		 * just become responsive.
836 		 */
837 		if (gni_resp->retry > INITIAL_DNS_RETRY) {
838 			time_now = time(NULL);
839 			child_ctx->next_dns_timeslot = time_now;
840 			TRACE(1, ("DNS success after retry, %u next_dns_timeslot reset (%s)\n",
841 				  gni_req->dns_idx, humantime(time_now)));
842 		}
843 	} else {
844 		again = should_retry_dns(gni_resp->retcode, gni_resp->gni_errno);
845 		/*
846 		 * exponential backoff of DNS retries to 64s
847 		 */
848 		if (gni_req->retry > 0)
849 			manage_dns_retry_interval(&gni_req->scheduled,
850 			    &gni_req->earliest, &gni_req->retry,
851 						  &child_ctx->next_dns_timeslot, FALSE);
852 
853 		if (gni_req->retry > 0 && again) {
854 			if (!queue_blocking_request(
855 				BLOCKING_GETNAMEINFO,
856 				gni_req,
857 				gni_req->octets,
858 				&getnameinfo_sometime_complete,
859 				gni_req))
860 				return;
861 
862 			msyslog(LOG_ERR, "unable to retry reverse lookup of %s", stoa(&gni_req->socku));
863 		}
864 	}
865 
866 	if (!gni_resp->hostoctets) {
867 		host = NULL;
868 		service = NULL;
869 	} else {
870 		host = (char *)gni_resp + sizeof(*gni_resp);
871 		service = (gni_resp->servoctets)
872 			      ? host + gni_resp->hostoctets
873 			      : NULL;
874 	}
875 
876 	(*gni_req->callback)(gni_resp->retcode, gni_resp->gni_errno,
877 			     &gni_req->socku, gni_req->flags, host,
878 			     service, gni_req->context);
879 
880 	free(gni_req);
881 	/* gni_resp is part of block freed by process_blocking_resp() */
882 }
883 
884 
885 #ifdef TEST_BLOCKING_WORKER
886 void gni_test_callback(int rescode, int gni_errno, sockaddr_u *psau, int flags, const char *host, const char *service, void *context)
887 {
888 	if (!rescode)
889 		TRACE(1, ("gni_test_callback got host '%s' serv '%s' for addr %s context %p\n",
890 			  host, service, stoa(psau), context));
891 	else
892 		TRACE(1, ("gni_test_callback context %p rescode %d gni_errno %d flags 0x%x addr %s\n",
893 			  context, rescode, gni_errno, flags, stoa(psau)));
894 }
895 #endif	/* TEST_BLOCKING_WORKER */
896 
897 
898 #ifdef HAVE_RES_INIT
899 static void
900 reload_resolv_conf(
901 	dnsworker_ctx *	worker_ctx
902 	)
903 {
904 	time_t	time_now;
905 
906 	/*
907 	 * This is ad-hoc.  Reload /etc/resolv.conf once per minute
908 	 * to pick up on changes from the DHCP client.  [Bug 1226]
909 	 * When using threads for the workers, this needs to happen
910 	 * only once per minute process-wide.
911 	 */
912 	time_now = time(NULL);
913 # ifdef WORK_THREAD
914 	worker_ctx->next_res_init = next_res_init;
915 # endif
916 	if (worker_ctx->next_res_init <= time_now) {
917 		if (worker_ctx->next_res_init != 0)
918 			res_init();
919 		worker_ctx->next_res_init = time_now + 60;
920 # ifdef WORK_THREAD
921 		next_res_init = worker_ctx->next_res_init;
922 # endif
923 	}
924 }
925 #endif	/* HAVE_RES_INIT */
926 
927 
928 static u_int
929 reserve_dnschild_ctx(void)
930 {
931 	const size_t	ps = sizeof(dnschild_contexts[0]);
932 	const size_t	cs = sizeof(*dnschild_contexts[0]);
933 	u_int		c;
934 	u_int		new_alloc;
935 	size_t		octets;
936 	size_t		new_octets;
937 
938 	c = 0;
939 	while (TRUE) {
940 		for ( ; c < dnschild_contexts_alloc; c++) {
941 			if (NULL == dnschild_contexts[c]) {
942 				dnschild_contexts[c] = emalloc_zero(cs);
943 
944 				return c;
945 			}
946 		}
947 		new_alloc = dnschild_contexts_alloc + 20;
948 		new_octets = new_alloc * ps;
949 		octets = dnschild_contexts_alloc * ps;
950 		dnschild_contexts = erealloc_zero(dnschild_contexts,
951 						  new_octets, octets);
952 		dnschild_contexts_alloc = new_alloc;
953 	}
954 }
955 
956 
957 static u_int
958 get_dnschild_ctx(void)
959 {
960 	static u_int	shared_ctx = UINT_MAX;
961 
962 	if (worker_per_query)
963 		return reserve_dnschild_ctx();
964 
965 	if (UINT_MAX == shared_ctx)
966 		shared_ctx = reserve_dnschild_ctx();
967 
968 	return shared_ctx;
969 }
970 
971 
972 static dnsworker_ctx *
973 get_worker_context(
974 	blocking_child *	c,
975 	u_int			idx
976 	)
977 {
978 	u_int		min_new_alloc;
979 	u_int		new_alloc;
980 	size_t		octets;
981 	size_t		new_octets;
982 	dnsworker_ctx *	retv;
983 
984 	worker_global_lock(TRUE);
985 
986 	if (dnsworker_contexts_alloc <= idx) {
987 		min_new_alloc = 1 + idx;
988 		/* round new_alloc up to nearest multiple of 4 */
989 		new_alloc = (min_new_alloc + 4) & ~(4 - 1);
990 		new_octets = new_alloc * sizeof(dnsworker_ctx*);
991 		octets = dnsworker_contexts_alloc * sizeof(dnsworker_ctx*);
992 		dnsworker_contexts = erealloc_zero(dnsworker_contexts,
993 						   new_octets, octets);
994 		dnsworker_contexts_alloc = new_alloc;
995 		retv = emalloc_zero(sizeof(dnsworker_ctx));
996 		dnsworker_contexts[idx] = retv;
997 	} else if (NULL == (retv = dnsworker_contexts[idx])) {
998 		retv = emalloc_zero(sizeof(dnsworker_ctx));
999 		dnsworker_contexts[idx] = retv;
1000 	}
1001 
1002 	worker_global_lock(FALSE);
1003 
1004 	ZERO(*retv);
1005 	retv->c = c;
1006 	return retv;
1007 }
1008 
1009 
1010 static void
1011 scheduled_sleep(
1012 	time_t		scheduled,
1013 	time_t		earliest,
1014 	dnsworker_ctx *	worker_ctx
1015 	)
1016 {
1017 	time_t now;
1018 
1019 	if (scheduled < worker_ctx->ignore_scheduled_before) {
1020 		TRACE(1, ("ignoring sleep until %s scheduled at %s (before %s)\n",
1021 			  humantime(earliest), humantime(scheduled),
1022 			  humantime(worker_ctx->ignore_scheduled_before)));
1023 		return;
1024 	}
1025 
1026 	now = time(NULL);
1027 
1028 	if (now < earliest) {
1029 		TRACE(1, ("sleep until %s scheduled at %s (>= %s)\n",
1030 			  humantime(earliest), humantime(scheduled),
1031 			  humantime(worker_ctx->ignore_scheduled_before)));
1032 		if (-1 == worker_sleep(worker_ctx->c, earliest - now)) {
1033 			/* our sleep was interrupted */
1034 			now = time(NULL);
1035 			worker_ctx->ignore_scheduled_before = now;
1036 #ifdef HAVE_RES_INIT
1037 			worker_ctx->next_res_init = now + 60;
1038 			next_res_init = worker_ctx->next_res_init;
1039 			res_init();
1040 #endif
1041 			TRACE(1, ("sleep interrupted by daemon, ignoring sleeps scheduled before now (%s)\n",
1042 				  humantime(worker_ctx->ignore_scheduled_before)));
1043 		}
1044 	}
1045 }
1046 
1047 
1048 /*
1049  * manage_dns_retry_interval is a helper used by
1050  * getaddrinfo_sometime_complete and getnameinfo_sometime_complete
1051  * to calculate the new retry interval and schedule the next query.
1052  */
1053 static void
1054 manage_dns_retry_interval(
1055 	time_t *	pscheduled,
1056 	time_t *	pwhen,
1057 	int *		pretry,
1058 	time_t *	pnext_timeslot,
1059 	int		forever
1060 	)
1061 {
1062 	time_t	now;
1063 	time_t	when;
1064 	int	retry;
1065 	int	retmax;
1066 
1067 	now = time(NULL);
1068 	retry = *pretry;
1069 	when = max(now + retry, *pnext_timeslot);
1070 	*pnext_timeslot = when;
1071 
1072 	/* this exponential backoff is slower than doubling up: The
1073 	 * sequence goes 2-3-4-6-8-12-16-24-32... and the upper limit is
1074 	 * 64 seconds for things that should not repeat forever, and
1075 	 * 1024 when repeated forever.
1076 	 */
1077 	retmax = forever ? 1024 : 64;
1078 	retry <<= 1;
1079 	if (retry & (retry - 1))
1080 		retry &= (retry - 1);
1081 	else
1082 		retry -= (retry >> 2);
1083 	retry = min(retmax, retry);
1084 
1085 	*pscheduled = now;
1086 	*pwhen = when;
1087 	*pretry = retry;
1088 }
1089 
1090 /*
1091  * should_retry_dns is a helper used by getaddrinfo_sometime_complete
1092  * and getnameinfo_sometime_complete which implements ntpd's DNS retry
1093  * policy.
1094  */
1095 static int
1096 should_retry_dns(
1097 	int	rescode,
1098 	int	res_errno
1099 	)
1100 {
1101 	static int	eai_again_seen;
1102 	int		again;
1103 #if defined (EAI_SYSTEM) && defined(DEBUG)
1104 	char		msg[256];
1105 #endif
1106 
1107 	/*
1108 	 * If the resolver failed, see if the failure is
1109 	 * temporary. If so, return success.
1110 	 */
1111 	again = 0;
1112 
1113 	switch (rescode) {
1114 
1115 	case EAI_FAIL:
1116 		again = 1;
1117 		break;
1118 
1119 	case EAI_AGAIN:
1120 		again = 1;
1121 		eai_again_seen = 1;		/* [Bug 1178] */
1122 		break;
1123 
1124 	case EAI_NONAME:
1125 #if defined(EAI_NODATA) && (EAI_NODATA != EAI_NONAME)
1126 	case EAI_NODATA:
1127 #endif
1128 		again = !eai_again_seen;	/* [Bug 1178] */
1129 		break;
1130 
1131 #ifdef EAI_SYSTEM
1132 	case EAI_SYSTEM:
1133 		/*
1134 		 * EAI_SYSTEM means the real error is in errno.  We should be more
1135 		 * discriminating about which errno values require retrying, but
1136 		 * this matches existing behavior.
1137 		 */
1138 		again = 1;
1139 # ifdef DEBUG
1140 		errno_to_str(res_errno, msg, sizeof(msg));
1141 		TRACE(1, ("intres: EAI_SYSTEM errno %d (%s) means try again, right?\n",
1142 			  res_errno, msg));
1143 # endif
1144 		break;
1145 #endif
1146 	}
1147 
1148 	TRACE(2, ("intres: resolver returned: %s (%d), %sretrying\n",
1149 		  gai_strerror(rescode), rescode, again ? "" : "not "));
1150 
1151 	return again;
1152 }
1153 
1154 #else	/* !WORKER follows */
1155 int ntp_intres_nonempty_compilation_unit;
1156 #endif
1157