xref: /netbsd-src/external/bsd/ntp/dist/ntpd/ntp_io.c (revision f8cf1a9151c7af1cb0bd8b09c13c66bca599c027)
1 /*	$NetBSD: ntp_io.c,v 1.33 2024/08/18 20:47:17 christos Exp $	*/
2 
3 /*
4  * ntp_io.c - input/output routines for ntpd.	The socket-opening code
5  *		   was shamelessly stolen from ntpd.
6  */
7 
8 #ifdef HAVE_CONFIG_H
9 # include <config.h>
10 #endif
11 
12 #include <stdio.h>
13 #include <signal.h>
14 #ifdef HAVE_FNMATCH_H
15 # include <fnmatch.h>
16 # if !defined(FNM_CASEFOLD) && defined(FNM_IGNORECASE)
17 #  define FNM_CASEFOLD FNM_IGNORECASE
18 # endif
19 #endif
20 #ifdef HAVE_SYS_PARAM_H
21 # include <sys/param.h>
22 #endif
23 #ifdef HAVE_SYS_IOCTL_H
24 # include <sys/ioctl.h>
25 #endif
26 #ifdef HAVE_SYS_SOCKIO_H	/* UXPV: SIOC* #defines (Frank Vance <fvance@waii.com>) */
27 # include <sys/sockio.h>
28 #endif
29 #ifdef HAVE_SYS_UIO_H
30 # include <sys/uio.h>
31 #endif
32 
33 #include "ntp_machine.h"
34 #include "ntpd.h"
35 #include "ntp_io.h"
36 #include "iosignal.h"
37 #include "ntp_lists.h"
38 #include "ntp_refclock.h"
39 #include "ntp_stdlib.h"
40 #include "ntp_worker.h"
41 #include "ntp_request.h"
42 #include "ntp_assert.h"
43 #include "timevalops.h"
44 #include "timespecops.h"
45 #include "ntpd-opts.h"
46 #include "safecast.h"
47 
48 /* Don't include ISC's version of IPv6 variables and structures */
49 #define ISC_IPV6_H 1
50 #include <isc/mem.h>
51 #include <isc/interfaceiter.h>
52 #include <isc/netaddr.h>
53 #include <isc/result.h>
54 #include <isc/sockaddr.h>
55 
56 #ifdef SIM
57 #include "ntpsim.h"
58 #endif
59 
60 #ifdef HAS_ROUTING_SOCKET
61 # include <net/route.h>
62 # ifdef HAVE_RTNETLINK
63 #  include <linux/rtnetlink.h>
64 # endif
65 #endif
66 
67 /*
68  * setsockopt does not always have the same arg declaration
69  * across all platforms. If it's not defined we make it empty
70  */
71 
72 #ifndef SETSOCKOPT_ARG_CAST
73 #define SETSOCKOPT_ARG_CAST
74 #endif
75 
76 extern int listen_to_virtual_ips;
77 
78 #ifndef IPTOS_DSCP_EF
79 #define IPTOS_DSCP_EF 0xb8
80 #endif
81 int qos = IPTOS_DSCP_EF;	/* QoS RFC3246 */
82 
83 #ifdef LEAP_SMEAR
84 /* TODO burnicki: This should be moved to ntp_timer.c, but if we do so
85  * we get a linker error. Since we're running out of time before the leap
86  * second occurs, we let it here where it just works.
87  */
88 int leap_smear_intv;
89 #endif
90 
91 /*
92  * NIC rule entry
93  */
94 typedef struct nic_rule_tag nic_rule;
95 
96 struct nic_rule_tag {
97 	nic_rule *	next;
98 	nic_rule_action	action;
99 	nic_rule_match	match_type;
100 	char *		if_name;
101 	sockaddr_u	addr;
102 	int		prefixlen;
103 };
104 
105 /*
106  * NIC rule listhead.  Entries are added at the head so that the first
107  * match in the list is the last matching rule specified.
108  */
109 nic_rule *nic_rule_list;
110 
111 
112 #if defined(SO_BINTIME) && defined(SCM_BINTIME) && defined(CMSG_FIRSTHDR)
113 #  define HAVE_PACKET_TIMESTAMP
114 #  define HAVE_BINTIME
115 #  ifdef BINTIME_CTLMSGBUF_SIZE
116 #   define CMSG_BUFSIZE BINTIME_CTLMSGBUF_SIZE
117 #  else
118 #   define CMSG_BUFSIZE  1536 /* moderate default */
119 #  endif
120 #elif defined(SO_TIMESTAMPNS) && defined(SCM_TIMESTAMPNS) && defined(CMSG_FIRSTHDR)
121 #  define HAVE_PACKET_TIMESTAMP
122 #  define HAVE_TIMESTAMPNS
123 #  ifdef TIMESTAMPNS_CTLMSGBUF_SIZE
124 #   define CMSG_BUFSIZE TIMESTAMPNS_CTLMSGBUF_SIZE
125 #  else
126 #   define CMSG_BUFSIZE  1536 /* moderate default */
127 #  endif
128 #elif defined(SO_TIMESTAMP) && defined(SCM_TIMESTAMP) && defined(CMSG_FIRSTHDR)
129 #  define HAVE_PACKET_TIMESTAMP
130 #  define HAVE_TIMESTAMP
131 #  ifdef TIMESTAMP_CTLMSGBUF_SIZE
132 #   define CMSG_BUFSIZE TIMESTAMP_CTLMSGBUF_SIZE
133 #  else
134 #   define CMSG_BUFSIZE  1536 /* moderate default */
135 #  endif
136 #else
137 /* fill in for old/other timestamp interfaces */
138 #endif
139 
140 #if defined(SYS_WINNT)
141 #include "win32_io.h"
142 #include <isc/win32os.h>
143 #endif
144 
145 /*
146  * We do asynchronous input using the SIGIO facility.  A number of
147  * recvbuf buffers are preallocated for input.	In the signal
148  * handler we poll to see which sockets are ready and read the
149  * packets from them into the recvbuf's along with a time stamp and
150  * an indication of the source host and the interface it was received
151  * through.  This allows us to get as accurate receive time stamps
152  * as possible independent of other processing going on.
153  *
154  * We watch the number of recvbufs available to the signal handler
155  * and allocate more when this number drops below the low water
156  * mark.  If the signal handler should run out of buffers in the
157  * interim it will drop incoming frames, the idea being that it is
158  * better to drop a packet than to be inaccurate.
159  */
160 
161 
162 /*
163  * Other statistics of possible interest
164  */
165 volatile u_long packets_dropped;	/* total number of packets dropped on reception */
166 volatile u_long packets_ignored;	/* packets received on wild card interface */
167 volatile u_long packets_received;	/* total number of packets received */
168 	 u_long packets_sent;		/* total number of packets sent */
169 	 u_long packets_notsent;	/* total number of packets which couldn't be sent */
170 
171 volatile u_long handler_calls;	/* number of calls to interrupt handler */
172 volatile u_long handler_pkts;	/* number of pkts received by handler */
173 u_long io_timereset;		/* time counters were reset */
174 
175 /*
176  * Interface stuff
177  */
178 endpt *	any_interface;		/* wildcard ipv4 interface */
179 endpt *	any6_interface;		/* wildcard ipv6 interface */
180 endpt *	loopback_interface;	/* loopback ipv4 interface */
181 
182 static isc_boolean_t broadcast_client_enabled;
183 u_int sys_ifnum;			/* next .ifnum to assign */
184 int ninterfaces;			/* Total number of interfaces */
185 
186 int no_periodic_scan;		/* network endpoint scans */
187 int scan_addrs_once;		/* because dropped privs */
188 int nonlocal_v4_addr_up;	/* should we try IPv4 pool? */
189 int nonlocal_v6_addr_up;	/* should we try IPv6 pool? */
190 
191 #ifdef REFCLOCK
192 /*
193  * Refclock stuff.	We keep a chain of structures with data concerning
194  * the guys we are doing I/O for.
195  */
196 static	struct refclockio *refio;
197 #endif /* REFCLOCK */
198 
199 /*
200  * File descriptor masks etc. for call to select
201  * Not needed for I/O Completion Ports or anything outside this file
202  */
203 static fd_set activefds;
204 static int maxactivefd;
205 
206 /*
207  * bit alternating value to detect verified interfaces during an update cycle
208  */
209 static  u_short		sys_interphase = 0;
210 
211 static endpt *	new_interface(endpt *);
212 static void	add_interface(endpt *);
213 static int	update_interfaces(u_short, interface_receiver_t,
214 				  void *);
215 static void	remove_interface(endpt *);
216 static endpt *	create_interface(u_short, endpt *);
217 
218 static inline int is_wildcard_addr(const sockaddr_u *psau);
219 
220 /*
221  * Multicast functions
222  */
223 static	isc_boolean_t	addr_ismulticast	(sockaddr_u *);
224 static	isc_boolean_t	is_anycast		(sockaddr_u *,
225 						 const char *);
226 
227 /*
228  * Not all platforms support multicast
229  */
230 #ifdef MCAST
231 static	isc_boolean_t	socket_multicast_enable	(endpt *, sockaddr_u *);
232 static	isc_boolean_t	socket_multicast_disable(endpt *, sockaddr_u *);
233 #endif
234 
235 #ifdef DEBUG
236 static void interface_dump	(const endpt *);
237 static void print_interface	(const endpt *, const char *, const char *);
238 #define DPRINT_INTERFACE(level, args) do { if (debug >= (level)) { print_interface args; } } while (0)
239 #else
240 #define DPRINT_INTERFACE(level, args) do {} while (0)
241 #endif
242 
243 typedef struct vsock vsock_t;
244 enum desc_type { FD_TYPE_SOCKET, FD_TYPE_FILE };
245 
246 struct vsock {
247 	vsock_t	*	link;
248 	SOCKET		fd;
249 	enum desc_type	type;
250 };
251 
252 vsock_t	*fd_list;
253 
254 #if !defined(HAVE_IO_COMPLETION_PORT) && defined(HAS_ROUTING_SOCKET)
255 /*
256  * async notification processing (e. g. routing sockets)
257  */
258 /*
259  * support for receiving data on fd that is not a refclock or a socket
260  * like e. g. routing sockets
261  */
262 struct asyncio_reader {
263 	struct asyncio_reader *link;		    /* the list this is being kept in */
264 	SOCKET fd;				    /* fd to be read */
265 	void  *data;				    /* possibly local data */
266 	void (*receiver)(struct asyncio_reader *);  /* input handler */
267 };
268 
269 struct asyncio_reader *asyncio_reader_list;
270 
271 static void delete_asyncio_reader (struct asyncio_reader *);
272 static struct asyncio_reader *new_asyncio_reader (void);
273 static void add_asyncio_reader (struct asyncio_reader *, enum desc_type);
274 static void remove_asyncio_reader (struct asyncio_reader *);
275 
276 #endif /* !defined(HAVE_IO_COMPLETION_PORT) && defined(HAS_ROUTING_SOCKET) */
277 
278 static void init_async_notifications (void);
279 
280 static	int	addr_eqprefix	(const sockaddr_u *, const sockaddr_u *,
281 				 int);
282 static int	addr_samesubnet	(const sockaddr_u *, const sockaddr_u *,
283 				 const sockaddr_u *, const sockaddr_u *);
284 static	int	create_sockets	(u_short);
285 static	SOCKET	open_socket	(sockaddr_u *, int, int, endpt *);
286 static	void	set_reuseaddr	(int);
287 static	isc_boolean_t	socket_broadcast_enable	 (endpt *, SOCKET, sockaddr_u *);
288 
289 #if !defined(HAVE_IO_COMPLETION_PORT) && !defined(HAVE_SIGNALED_IO)
290 static	char *	fdbits		(int, const fd_set *);
291 #endif
292 #ifdef  OS_MISSES_SPECIFIC_ROUTE_UPDATES
293 static	isc_boolean_t	socket_broadcast_disable (endpt *, sockaddr_u *);
294 #endif
295 
296 typedef struct remaddr remaddr_t;
297 
298 struct remaddr {
299 	remaddr_t *		link;
300 	sockaddr_u		addr;
301 	endpt *			ep;
302 };
303 
304 remaddr_t *	remoteaddr_list;
305 endpt *		ep_list;	/* complete endpt list */
306 endpt *		mc4_list;	/* IPv4 mcast-capable unicast endpts */
307 endpt *		mc6_list;	/* IPv6 mcast-capable unicast endpts */
308 
309 static endpt *	wildipv4;
310 static endpt *	wildipv6;
311 
312 #define		RFC3927_ADDR	0xa9fe0000	/* 169.254. */
313 #define		RFC3927_MASK	0xffff0000
314 #define		IS_AUTOCONF(addr4)					\
315 		((SRCADR(addr4) & RFC3927_MASK) == RFC3927_ADDR)
316 
317 #ifdef SYS_WINNT
318 int accept_wildcard_if_for_winnt;
319 #else
320 const int accept_wildcard_if_for_winnt = FALSE;
321 #define		init_io_completion_port()	do {} while (FALSE)
322 #endif
323 
324 static void	add_fd_to_list		(SOCKET, enum desc_type);
325 static endpt *	find_addr_in_list	(sockaddr_u *);
326 static endpt *	find_flagged_addr_in_list(sockaddr_u *, u_int32);
327 static void	delete_addr_from_list	(sockaddr_u *);
328 static void	delete_interface_from_list(endpt *);
329 static void	close_and_delete_fd_from_list(SOCKET, endpt *);
330 static void	add_addr_to_list	(sockaddr_u *, endpt *);
331 static void	create_wildcards	(u_short);
332 static endpt *	findlocalinterface	(sockaddr_u *, int, int);
333 static endpt *	findclosestinterface	(sockaddr_u *, int);
334 #ifdef DEBUG
335 static const char *	action_text	(nic_rule_action);
336 #endif
337 static nic_rule_action	interface_action(char *, sockaddr_u *, u_int32);
338 static void		convert_isc_if	(isc_interface_t *,
339 					 endpt *, u_short);
340 static void		calc_addr_distance(sockaddr_u *,
341 					   const sockaddr_u *,
342 					   const sockaddr_u *);
343 static int		cmp_addr_distance(const sockaddr_u *,
344 					  const sockaddr_u *);
345 
346 /*
347  * Routines to read the ntp packets
348  */
349 #if !defined(HAVE_IO_COMPLETION_PORT)
350 static inline int	read_network_packet	(SOCKET, endpt *, l_fp);
351 static void		ntpd_addremove_io_fd	(int, int, int);
352 static void 		input_handler_scan	(const l_fp*, const fd_set*);
353 static int/*BOOL*/	sanitize_fdset		(int errc);
354 #ifdef REFCLOCK
355 static inline int	read_refclock_packet	(SOCKET, struct refclockio *, l_fp);
356 #endif
357 #ifdef HAVE_SIGNALED_IO
358 static void 		input_handler		(l_fp*);
359 #endif
360 #endif
361 
362 
363 #ifndef HAVE_IO_COMPLETION_PORT
364 void
365 maintain_activefds(
366 	int fd,
367 	int closing
368 	)
369 {
370 	int i;
371 
372 	if (fd < 0 || fd >= FD_SETSIZE) {
373 		msyslog(LOG_ERR,
374 			"Too many sockets in use, FD_SETSIZE %d exceeded by fd %d",
375 			FD_SETSIZE, fd);
376 		exit(1);
377 	}
378 
379 	if (!closing) {
380 		FD_SET(fd, &activefds);
381 		maxactivefd = max(fd, maxactivefd);
382 	} else {
383 		FD_CLR(fd, &activefds);
384 		if (maxactivefd && fd == maxactivefd) {
385 			for (i = maxactivefd - 1; i >= 0; i--)
386 				if (FD_ISSET(i, &activefds)) {
387 					maxactivefd = i;
388 					break;
389 				}
390 			INSIST(fd != maxactivefd);
391 		}
392 	}
393 }
394 #endif	/* !HAVE_IO_COMPLETION_PORT */
395 
396 
397 #ifdef DEBUG_TIMING
398 /*
399  * collect timing information for various processing
400  * paths. currently we only pass them on to the file
401  * for later processing. this could also do histogram
402  * based analysis in other to reduce the load (and skew)
403  * dur to the file output
404  */
405 void
406 collect_timing(struct recvbuf *rb, const char *tag, int count, l_fp *dts)
407 {
408 	char buf[256];
409 
410 	snprintf(buf, sizeof(buf), "%s %d %s %s",
411 		 (rb != NULL)
412 		     ? ((rb->dstadr != NULL)
413 			    ? stoa(&rb->recv_srcadr)
414 			    : "-REFCLOCK-")
415 		     : "-",
416 		 count, lfptoa(dts, 9), tag);
417 	record_timing_stats(buf);
418 }
419 #endif
420 
421 /*
422  * About dynamic interfaces, sockets, reception and more...
423  *
424  * the code solves following tasks:
425  *
426  *   - keep a current list of active interfaces in order
427  *     to bind to to the interface address on NTP_PORT so that
428  *     all wild and specific bindings for NTP_PORT are taken by ntpd
429  *     to avoid other daemons messing with the time or sockets.
430  *   - all interfaces keep a list of peers that are referencing
431  *     the interface in order to quickly re-assign the peers to
432  *     new interface in case an interface is deleted (=> gone from system or
433  *     down)
434  *   - have a preconfigured socket ready with the right local address
435  *     for transmission and reception
436  *   - have an address list for all destination addresses used within ntpd
437  *     to find the "right" preconfigured socket.
438  *   - facilitate updating the internal interface list with respect to
439  *     the current kernel state
440  *
441  * special issues:
442  *
443  *   - mapping of multicast addresses to the interface affected is not always
444  *     one to one - especially on hosts with multiple interfaces
445  *     the code here currently allocates a separate interface entry for those
446  *     multicast addresses
447  *     iff it is able to bind to a *new* socket with the multicast address (flags |= MCASTIF)
448  *     in case of failure the multicast address is bound to an existing interface.
449  *   - on some systems it is perfectly legal to assign the same address to
450  *     multiple interfaces. Therefore this code does not keep a list of interfaces
451  *     but a list of interfaces that represent a unique address as determined by the kernel
452  *     by the procedure in findlocalinterface. Thus it is perfectly legal to see only
453  *     one representative of a group of real interfaces if they share the same address.
454  *
455  * Frank Kardel 20050910
456  */
457 
458 /*
459  * init_io - initialize I/O module.
460  */
461 void
462 init_io(void)
463 {
464 	/* Init buffer free list and stat counters */
465 	init_recvbuff(RECV_INIT);
466 	endpt_scan_period = 301;
467 
468 #ifdef WORK_PIPE
469 	addremove_io_fd = &ntpd_addremove_io_fd;
470 #endif
471 
472 	init_io_completion_port();
473 #if defined(HAVE_SIGNALED_IO)
474 	(void) set_signal(input_handler);
475 #endif
476 }
477 
478 
479 static void
480 ntpd_addremove_io_fd(
481 	int	fd,
482 	int	is_pipe,
483 	int	remove_it
484 	)
485 {
486 	UNUSED_ARG(is_pipe);
487 
488 #ifdef HAVE_SIGNALED_IO
489 	if (!remove_it)
490 		init_socket_sig(fd);
491 #endif /* not HAVE_SIGNALED_IO */
492 
493 	maintain_activefds(fd, remove_it);
494 }
495 
496 
497 /*
498  * io_open_sockets - call socket creation routine
499  */
500 void
501 io_open_sockets(void)
502 {
503 	static int already_opened;
504 
505 	if (already_opened || HAVE_OPT( SAVECONFIGQUIT ))
506 		return;
507 
508 	already_opened = 1;
509 
510 	/*
511 	 * Create the sockets
512 	 */
513 	BLOCKIO();
514 	create_sockets(NTP_PORT);
515 	UNBLOCKIO();
516 
517 	init_async_notifications();
518 
519 	DPRINTF(3, ("io_open_sockets: maxactivefd %d\n", maxactivefd));
520 }
521 
522 
523 #ifdef DEBUG
524 /*
525  * function to dump the contents of the interface structure
526  * for debugging use only.
527  * We face a dilemma here -- sockets are FDs under POSIX and
528  * actually HANDLES under Windows. So we use '%lld' as format
529  * and cast the value to 'long long'; this should not hurt
530  * with UNIX-like systems and does not truncate values on Win64.
531  */
532 void
533 interface_dump(const endpt *itf)
534 {
535 	printf("Dumping interface: %p\n", itf);
536 	printf("fd = %lld\n", (long long)itf->fd);
537 	printf("bfd = %lld\n", (long long)itf->bfd);
538 	printf("sin = %s,\n", stoa(&itf->sin));
539 	printf("bcast = %s,\n", stoa(&itf->bcast));
540 	printf("mask = %s,\n", stoa(&itf->mask));
541 	printf("name = %s\n", itf->name);
542 	printf("flags = 0x%08x\n", itf->flags);
543 	printf("last_ttl = %d\n", itf->last_ttl);
544 	printf("addr_refid = %08x\n", itf->addr_refid);
545 	printf("num_mcast = %d\n", itf->num_mcast);
546 	printf("received = %ld\n", itf->received);
547 	printf("sent = %ld\n", itf->sent);
548 	printf("notsent = %ld\n", itf->notsent);
549 	printf("ifindex = %u\n", itf->ifindex);
550 	printf("peercnt = %u\n", itf->peercnt);
551 	printf("phase = %u\n", itf->phase);
552 }
553 
554 
555 /*
556  * print_interface - helper to output debug information
557  */
558 static void
559 print_interface(const endpt *iface, const char *pfx, const char *sfx)
560 {
561 	printf("%sinterface #%d: fd=%lld, bfd=%lld, name=%s, flags=0x%x, ifindex=%u, sin=%s",
562 	       pfx,
563 	       iface->ifnum,
564 	       (long long)iface->fd,
565 	       (long long)iface->bfd,
566 	       iface->name,
567 	       iface->flags,
568 	       iface->ifindex,
569 	       stoa(&iface->sin));
570 	if (AF_INET == iface->family) {
571 		if (iface->flags & INT_BROADCAST)
572 			printf(", bcast=%s", stoa(&iface->bcast));
573 		printf(", mask=%s", stoa(&iface->mask));
574 	}
575 	printf(", %s:%s",
576 	       (iface->ignore_packets)
577 		   ? "Disabled"
578 		   : "Enabled",
579 	       sfx);
580 	if (debug > 4)	/* in-depth debugging only */
581 		interface_dump(iface);
582 }
583 #endif
584 
585 #if !defined(HAVE_IO_COMPLETION_PORT) && defined(HAS_ROUTING_SOCKET)
586 /*
587  * create an asyncio_reader structure
588  */
589 static struct asyncio_reader *
590 new_asyncio_reader(void)
591 {
592 	struct asyncio_reader *reader;
593 
594 	reader = emalloc_zero(sizeof(*reader));
595 	reader->fd = INVALID_SOCKET;
596 
597 	return reader;
598 }
599 
600 /*
601  * delete a reader
602  */
603 static void
604 delete_asyncio_reader(
605 	struct asyncio_reader *reader
606 	)
607 {
608 	free(reader);
609 }
610 
611 /*
612  * add asynchio_reader
613  */
614 static void
615 add_asyncio_reader(
616 	struct asyncio_reader *	reader,
617 	enum desc_type		type)
618 {
619 	LINK_SLIST(asyncio_reader_list, reader, link);
620 	add_fd_to_list(reader->fd, type);
621 }
622 
623 /*
624  * remove asyncio_reader
625  */
626 static void
627 remove_asyncio_reader(
628 	struct asyncio_reader *reader
629 	)
630 {
631 	struct asyncio_reader *unlinked;
632 
633 	UNLINK_SLIST(unlinked, asyncio_reader_list, reader, link,
634 	    struct asyncio_reader);
635 
636 	if (reader->fd != INVALID_SOCKET) {
637 		close_and_delete_fd_from_list(reader->fd, NULL);
638 	}
639 	reader->fd = INVALID_SOCKET;
640 }
641 #endif /* !defined(HAVE_IO_COMPLETION_PORT) && defined(HAS_ROUTING_SOCKET) */
642 
643 
644 /* compare two sockaddr prefixes */
645 static int
646 addr_eqprefix(
647 	const sockaddr_u *	a,
648 	const sockaddr_u *	b,
649 	int			prefixlen
650 	)
651 {
652 	isc_netaddr_t		isc_a;
653 	isc_netaddr_t		isc_b;
654 	isc_sockaddr_t		isc_sa;
655 
656 	ZERO(isc_sa);
657 	memcpy(&isc_sa.type, a, min(sizeof(isc_sa.type), sizeof(*a)));
658 	isc_netaddr_fromsockaddr(&isc_a, &isc_sa);
659 
660 	ZERO(isc_sa);
661 	memcpy(&isc_sa.type, b, min(sizeof(isc_sa.type), sizeof(*b)));
662 	isc_netaddr_fromsockaddr(&isc_b, &isc_sa);
663 
664 	return (int)isc_netaddr_eqprefix(&isc_a, &isc_b,
665 					 (u_int)prefixlen);
666 }
667 
668 
669 static int
670 addr_samesubnet(
671 	const sockaddr_u *	a,
672 	const sockaddr_u *	a_mask,
673 	const sockaddr_u *	b,
674 	const sockaddr_u *	b_mask
675 	)
676 {
677 	const u_int32 *	pa;
678 	const u_int32 *	pa_limit;
679 	const u_int32 *	pb;
680 	const u_int32 *	pm;
681 	size_t		loops;
682 
683 	REQUIRE(AF(a) == AF(a_mask));
684 	REQUIRE(AF(b) == AF(b_mask));
685 	/*
686 	 * With address and mask families verified to match, comparing
687 	 * the masks also validates the address's families match.
688 	 */
689 	if (!SOCK_EQ(a_mask, b_mask))
690 		return FALSE;
691 
692 	if (IS_IPV6(a)) {
693 		loops = sizeof(NSRCADR6(a)) / sizeof(*pa);
694 		pa = (const void *)&NSRCADR6(a);
695 		pb = (const void *)&NSRCADR6(b);
696 		pm = (const void *)&NSRCADR6(a_mask);
697 	} else {
698 		loops = sizeof(NSRCADR(a)) / sizeof(*pa);
699 		pa = (const void *)&NSRCADR(a);
700 		pb = (const void *)&NSRCADR(b);
701 		pm = (const void *)&NSRCADR(a_mask);
702 	}
703 	for (pa_limit = pa + loops; pa < pa_limit; pa++, pb++, pm++)
704 		if ((*pa & *pm) != (*pb & *pm))
705 			return FALSE;
706 
707 	return TRUE;
708 }
709 
710 
711 /*
712  * interface list enumerator - visitor pattern
713  */
714 void
715 interface_enumerate(
716 	interface_receiver_t	receiver,
717 	void *			data
718 	)
719 {
720 	interface_info_t ifi;
721 
722 	ifi.action = IFS_EXISTS;
723 	for (ifi.ep = ep_list; ifi.ep != NULL; ifi.ep = ifi.ep->elink)
724 		(*receiver)(data, &ifi);
725 }
726 
727 /*
728  * do standard initialization of interface structure
729  */
730 static inline void
731 init_interface(
732 	endpt *ep
733 	)
734 {
735 	ZERO(*ep);
736 	ep->fd = INVALID_SOCKET;
737 	ep->bfd = INVALID_SOCKET;
738 	ep->phase = sys_interphase;
739 }
740 
741 
742 /*
743  * create new interface structure initialize from
744  * template structure or via standard initialization
745  * function
746  */
747 static endpt *
748 new_interface(
749 	endpt *protot
750 	)
751 {
752 	endpt *	iface;
753 
754 	iface = emalloc(sizeof(*iface));
755 	if (NULL == protot) {
756 		ZERO(*iface);
757 	} else {
758 		memcpy(iface, protot, sizeof(*iface));
759 	}
760 	/* count every new instance of an interface in the system */
761 	iface->ifnum = sys_ifnum++;
762 	iface->starttime = current_time;
763 
764 #   ifdef HAVE_IO_COMPLETION_PORT
765 	if (!io_completion_port_add_interface(iface)) {
766 		msyslog(LOG_EMERG, "cannot register interface with IO engine -- will exit now");
767 		exit(1);
768 	}
769 #   endif
770 	return iface;
771 }
772 
773 
774 /*
775  * return interface storage into free memory pool
776  */
777 static void
778 delete_interface(
779 	endpt *ep
780 	)
781 {
782 #    ifdef HAVE_IO_COMPLETION_PORT
783 	io_completion_port_remove_interface(ep);
784 #    endif
785 	free(ep);
786 }
787 
788 
789 /*
790  * link interface into list of known interfaces
791  */
792 static void
793 add_interface(
794 	endpt *	ep
795 	)
796 {
797 	endpt **	pmclisthead;
798 	endpt *		scan;
799 	endpt *		scan_next;
800 	int		same_subnet;
801 	int		rc;
802 
803 	/* Calculate the refid */
804 	ep->addr_refid = addr2refid(&ep->sin);
805 #    ifdef WORDS_BIGENDIAN
806 	if (IS_IPV6(&ep->sin)) {
807 		ep->old_refid = BYTESWAP32(ep->addr_refid);
808 	}
809 #    endif
810 	/* link at tail so ntpdc -c ifstats index increases each row */
811 	LINK_TAIL_SLIST(ep_list, ep, elink, endpt);
812 	ninterfaces++;
813 #ifdef MCAST
814 	/* the rest is for enabled multicast-capable addresses only */
815 	if (ep->ignore_packets || !(INT_MULTICAST & ep->flags) ||
816 	    INT_LOOPBACK & ep->flags)
817 		return;
818 # ifndef INCLUDE_IPV6_MULTICAST_SUPPORT
819 	if (AF_INET6 == ep->family)
820 		return;
821 # endif
822 	pmclisthead = (AF_INET == ep->family)
823 			 ? &mc4_list
824 			 : &mc6_list;
825 
826 	/*
827 	 * If we have multiple global addresses from the same prefix
828 	 * on the same network interface, multicast from one.
829 	 */
830 	for (scan = *pmclisthead; scan != NULL; scan = scan_next) {
831 		scan_next = scan->mclink;
832 		if (   ep->family != scan->family
833 		    || ep->ifindex != scan->ifindex) {
834 			continue;
835 		}
836 		same_subnet = addr_samesubnet(&ep->sin, &ep->mask,
837 					      &scan->sin, &scan->mask);
838 		if (same_subnet) {
839 			DPRINTF(4, ("did not add %s to multicast-capable list"
840 				    "which already has %s\n",
841 				    stoa(&ep->sin), stoa(&scan->sin)));
842 			return;
843 		}
844 	}
845 	LINK_SLIST(*pmclisthead, ep, mclink);
846 	if (INVALID_SOCKET == ep->fd)
847 		return;
848 
849 	/*
850 	 * select the local address from which to send to multicast.
851 	 */
852 	switch (AF(&ep->sin)) {
853 
854 	case AF_INET :
855 		rc = setsockopt(ep->fd, IPPROTO_IP,
856 				IP_MULTICAST_IF,
857 				(void *)&NSRCADR(&ep->sin),
858 				sizeof(NSRCADR(&ep->sin)));
859 		if (rc)
860 			msyslog(LOG_ERR,
861 				"setsockopt IP_MULTICAST_IF %s fails: %m",
862 				stoa(&ep->sin));
863 		break;
864 
865 # ifdef INCLUDE_IPV6_MULTICAST_SUPPORT
866 	case AF_INET6 :
867 		rc = setsockopt(ep->fd, IPPROTO_IPV6,
868 				 IPV6_MULTICAST_IF,
869 				 (void *)&ep->ifindex,
870 				 sizeof(ep->ifindex));
871 		/* do not complain if bound addr scope is ifindex */
872 		if (rc && ep->ifindex != SCOPE(&ep->sin))
873 			msyslog(LOG_ERR,
874 				"setsockopt IPV6_MULTICAST_IF %u for %s fails: %m",
875 				ep->ifindex, stoa(&ep->sin));
876 		break;
877 # endif
878 	}
879 #endif	/* MCAST */
880 }
881 
882 
883 /*
884  * remove interface from known interface list and clean up
885  * associated resources
886  */
887 static void
888 remove_interface(
889 	endpt *	ep
890 	)
891 {
892 	endpt *		unlinked;
893 	endpt **	pmclisthead;
894 	sockaddr_u	resmask;
895 	int/*BOOL*/	success;
896 
897 	UNLINK_SLIST(unlinked, ep_list, ep, elink, endpt);
898 	if (!ep->ignore_packets && INT_MULTICAST & ep->flags) {
899 		pmclisthead = (AF_INET == ep->family)
900 				 ? &mc4_list
901 				 : &mc6_list;
902 		UNLINK_SLIST(unlinked, *pmclisthead, ep, mclink, endpt);
903 		DPRINTF(4, ("%s %s IPv%s multicast-capable unicast local address list\n",
904 			stoa(&ep->sin),
905 			(unlinked != NULL)
906 			    ? "removed from"
907 			    : "not found on",
908 			(AF_INET == ep->family)
909 			    ? "4"
910 			    : "6"));
911 	}
912 	delete_interface_from_list(ep);
913 
914 	if (ep->fd != INVALID_SOCKET) {
915 		msyslog(LOG_INFO,
916 			"Deleting %d %s, [%s]:%hd, stats:"
917 			" received=%ld, sent=%ld, dropped=%ld,"
918 			" active_time=%ld secs",
919 			ep->ifnum,
920 			ep->name,
921 			stoa(&ep->sin),
922 			SRCPORT(&ep->sin),
923 			ep->received,
924 			ep->sent,
925 			ep->notsent,
926 			current_time - ep->starttime);
927 		close_and_delete_fd_from_list(ep->fd, ep);
928 		ep->fd = INVALID_SOCKET;
929 	}
930 
931 	if (ep->bfd != INVALID_SOCKET) {
932 		msyslog(LOG_INFO,
933 			"stop listening for broadcasts to %s on interface #%d %s",
934 			stoa(&ep->bcast), ep->ifnum, ep->name);
935 		close_and_delete_fd_from_list(ep->bfd, ep);
936 		ep->bfd = INVALID_SOCKET;
937 	}
938 #   ifdef HAVE_IO_COMPLETION_PORT
939 	io_completion_port_remove_interface(ep);
940 #   endif
941 
942 	ninterfaces--;
943 	mon_clearinterface(ep);
944 
945 	/* remove restrict interface entry */
946 	SET_HOSTMASK(&resmask, AF(&ep->sin));
947 	success = hack_restrict(RESTRICT_REMOVEIF, &ep->sin, &resmask, 0,
948 				RESM_NTPONLY | RESM_INTERFACE, 0, 0);
949 	if (!success) {
950 		msyslog(LOG_ERR,
951 			"unable to remove self-restriction for %s",
952 			stoa(&ep->sin));
953 	}
954 
955 }
956 
957 
958 static void
959 log_listen_address(
960 	endpt *	ep
961 	)
962 {
963 	msyslog(LOG_INFO, "%s on %d %s %s",
964 		(ep->ignore_packets)
965 		    ? "Listen and drop"
966 		    : "Listen normally",
967 		ep->ifnum,
968 		ep->name,
969 		sptoa(&ep->sin));
970 }
971 
972 
973 static void
974 create_wildcards(
975 	u_short	port
976 	)
977 {
978 	int			v4wild;
979 #ifdef INCLUDE_IPV6_SUPPORT
980 	int			v6wild;
981 #endif
982 	sockaddr_u		wildaddr;
983 	nic_rule_action		action;
984 	endpt *			wildif;
985 
986 	/*
987 	 * silence "potentially uninitialized" warnings from VC9
988 	 * failing to follow the logic.  Ideally action could remain
989 	 * uninitialized, and the memset be the first statement under
990 	 * the first if (v4wild).
991 	 */
992 	action = ACTION_LISTEN;
993 	ZERO(wildaddr);
994 
995 #ifdef INCLUDE_IPV6_SUPPORT
996 	/*
997 	 * create pseudo-interface with wildcard IPv6 address
998 	 */
999 	v6wild = ipv6_works;
1000 	if (v6wild) {
1001 		/* set wildaddr to the v6 wildcard address :: */
1002 		ZERO(wildaddr);
1003 		AF(&wildaddr) = AF_INET6;
1004 		SET_ADDR6N(&wildaddr, in6addr_any);
1005 		SET_PORT(&wildaddr, port);
1006 		SET_SCOPE(&wildaddr, 0);
1007 
1008 		/* check for interface/nic rules affecting the wildcard */
1009 		action = interface_action(NULL, &wildaddr, 0);
1010 		v6wild = (ACTION_IGNORE != action);
1011 	}
1012 	if (v6wild) {
1013 		wildif = new_interface(NULL);
1014 
1015 		strlcpy(wildif->name, "v6wildcard", sizeof(wildif->name));
1016 		memcpy(&wildif->sin, &wildaddr, sizeof(wildif->sin));
1017 		wildif->family = AF_INET6;
1018 		AF(&wildif->mask) = AF_INET6;
1019 		SET_ONESMASK(&wildif->mask);
1020 
1021 		wildif->flags = INT_UP | INT_WILDCARD;
1022 		wildif->ignore_packets = (ACTION_DROP == action);
1023 
1024 		wildif->fd = open_socket(&wildif->sin, 0, 1, wildif);
1025 
1026 		if (wildif->fd != INVALID_SOCKET) {
1027 			wildipv6 = wildif;
1028 			any6_interface = wildif;
1029 			add_addr_to_list(&wildif->sin, wildif);
1030 			add_interface(wildif);
1031 			log_listen_address(wildif);
1032 		} else {
1033 			msyslog(LOG_ERR,
1034 				"unable to bind to wildcard address %s - another process may be running - EXITING",
1035 				stoa(&wildif->sin));
1036 			exit(1);
1037 		}
1038 		DPRINT_INTERFACE(2, (wildif, "created ", "\n"));
1039 	}
1040 #endif
1041 
1042 	/*
1043 	 * create pseudo-interface with wildcard IPv4 address
1044 	 */
1045 	v4wild = ipv4_works;
1046 	if (v4wild) {
1047 		/* set wildaddr to the v4 wildcard address 0.0.0.0 */
1048 		AF(&wildaddr) = AF_INET;
1049 		SET_ADDR4N(&wildaddr, INADDR_ANY);
1050 		SET_PORT(&wildaddr, port);
1051 
1052 		/* check for interface/nic rules affecting the wildcard */
1053 		action = interface_action(NULL, &wildaddr, 0);
1054 		v4wild = (ACTION_IGNORE != action);
1055 	}
1056 	if (v4wild) {
1057 		wildif = new_interface(NULL);
1058 
1059 		strlcpy(wildif->name, "v4wildcard", sizeof(wildif->name));
1060 		memcpy(&wildif->sin, &wildaddr, sizeof(wildif->sin));
1061 		wildif->family = AF_INET;
1062 		AF(&wildif->mask) = AF_INET;
1063 		SET_ONESMASK(&wildif->mask);
1064 
1065 		wildif->flags = INT_BROADCAST | INT_UP | INT_WILDCARD;
1066 		wildif->ignore_packets = (ACTION_DROP == action);
1067 #if defined(MCAST)
1068 		/*
1069 		 * enable multicast reception on the broadcast socket
1070 		 */
1071 		AF(&wildif->bcast) = AF_INET;
1072 		SET_ADDR4N(&wildif->bcast, INADDR_ANY);
1073 		SET_PORT(&wildif->bcast, port);
1074 #endif /* MCAST */
1075 		wildif->fd = open_socket(&wildif->sin, 0, 1, wildif);
1076 
1077 		if (wildif->fd != INVALID_SOCKET) {
1078 			wildipv4 = wildif;
1079 			any_interface = wildif;
1080 
1081 			add_addr_to_list(&wildif->sin, wildif);
1082 			add_interface(wildif);
1083 			log_listen_address(wildif);
1084 		} else {
1085 			msyslog(LOG_ERR,
1086 				"unable to bind to wildcard address %s - another process may be running - EXITING",
1087 				stoa(&wildif->sin));
1088 			exit(1);
1089 		}
1090 		DPRINT_INTERFACE(2, (wildif, "created ", "\n"));
1091 	}
1092 }
1093 
1094 
1095 /*
1096  * add_nic_rule() -- insert a rule entry at the head of nic_rule_list.
1097  */
1098 void
1099 add_nic_rule(
1100 	nic_rule_match	match_type,
1101 	const char *	if_name,	/* interface name or numeric address */
1102 	int		prefixlen,
1103 	nic_rule_action	action
1104 	)
1105 {
1106 	nic_rule *	rule;
1107 	isc_boolean_t	is_ip;
1108 
1109 	rule = emalloc_zero(sizeof(*rule));
1110 	rule->match_type = match_type;
1111 	rule->prefixlen = prefixlen;
1112 	rule->action = action;
1113 
1114 	if (MATCH_IFNAME == match_type) {
1115 		REQUIRE(NULL != if_name);
1116 		rule->if_name = estrdup(if_name);
1117 	} else if (MATCH_IFADDR == match_type) {
1118 		REQUIRE(NULL != if_name);
1119 		/* set rule->addr */
1120 		is_ip = is_ip_address(if_name, AF_UNSPEC, &rule->addr);
1121 		REQUIRE(is_ip);
1122 	} else
1123 		REQUIRE(NULL == if_name);
1124 
1125 	LINK_SLIST(nic_rule_list, rule, next);
1126 }
1127 
1128 
1129 #ifdef DEBUG
1130 static const char *
1131 action_text(
1132 	nic_rule_action	action
1133 	)
1134 {
1135 	const char *t;
1136 
1137 	switch (action) {
1138 
1139 	default:
1140 		t = "ERROR";	/* quiet uninit warning */
1141 		DPRINTF(1, ("fatal: unknown nic_rule_action %d\n",
1142 			    action));
1143 		ENSURE(0);
1144 		break;
1145 
1146 	case ACTION_LISTEN:
1147 		t = "listen";
1148 		break;
1149 
1150 	case ACTION_IGNORE:
1151 		t = "ignore";
1152 		break;
1153 
1154 	case ACTION_DROP:
1155 		t = "drop";
1156 		break;
1157 	}
1158 
1159 	return t;
1160 }
1161 #endif	/* DEBUG */
1162 
1163 
1164 static nic_rule_action
1165 interface_action(
1166 	char *		if_name,
1167 	sockaddr_u *	if_addr,
1168 	u_int32		if_flags
1169 	)
1170 {
1171 	nic_rule *	rule;
1172 	int		isloopback;
1173 	int		iswildcard;
1174 
1175 	DPRINTF(4, ("interface_action: interface %s ",
1176 		    (if_name != NULL) ? if_name : "wildcard"));
1177 
1178 	iswildcard = is_wildcard_addr(if_addr);
1179 	isloopback = !!(INT_LOOPBACK & if_flags);
1180 
1181 	/*
1182 	 * Find any matching NIC rule from --interface / -I or ntp.conf
1183 	 * interface/nic rules.
1184 	 */
1185 	for (rule = nic_rule_list; rule != NULL; rule = rule->next) {
1186 
1187 		switch (rule->match_type) {
1188 
1189 		case MATCH_ALL:
1190 			/* loopback and wildcard excluded from "all" */
1191 			if (isloopback || iswildcard)
1192 				break;
1193 			DPRINTF(4, ("nic all %s\n",
1194 			    action_text(rule->action)));
1195 			return rule->action;
1196 
1197 		case MATCH_IPV4:
1198 			if (IS_IPV4(if_addr)) {
1199 				DPRINTF(4, ("nic ipv4 %s\n",
1200 				    action_text(rule->action)));
1201 				return rule->action;
1202 			}
1203 			break;
1204 
1205 		case MATCH_IPV6:
1206 			if (IS_IPV6(if_addr)) {
1207 				DPRINTF(4, ("nic ipv6 %s\n",
1208 				    action_text(rule->action)));
1209 				return rule->action;
1210 			}
1211 			break;
1212 
1213 		case MATCH_WILDCARD:
1214 			if (iswildcard) {
1215 				DPRINTF(4, ("nic wildcard %s\n",
1216 				    action_text(rule->action)));
1217 				return rule->action;
1218 			}
1219 			break;
1220 
1221 		case MATCH_IFADDR:
1222 			if (rule->prefixlen != -1) {
1223 				if (addr_eqprefix(if_addr, &rule->addr,
1224 						  rule->prefixlen)) {
1225 
1226 					DPRINTF(4, ("subnet address match - %s\n",
1227 					    action_text(rule->action)));
1228 					return rule->action;
1229 				}
1230 			} else
1231 				if (SOCK_EQ(if_addr, &rule->addr)) {
1232 
1233 					DPRINTF(4, ("address match - %s\n",
1234 					    action_text(rule->action)));
1235 					return rule->action;
1236 				}
1237 			break;
1238 
1239 		case MATCH_IFNAME:
1240 			if (if_name != NULL
1241 #if defined(HAVE_FNMATCH) && defined(FNM_CASEFOLD)
1242 			    && !fnmatch(rule->if_name, if_name, FNM_CASEFOLD)
1243 #else
1244 			    && !strcasecmp(if_name, rule->if_name)
1245 #endif
1246 			    ) {
1247 
1248 				DPRINTF(4, ("interface name match - %s\n",
1249 				    action_text(rule->action)));
1250 				return rule->action;
1251 			}
1252 			break;
1253 		}
1254 	}
1255 
1256 	/*
1257 	 * Unless explicitly disabled such as with "nic ignore ::1"
1258 	 * listen on loopback addresses.  Since ntpq and ntpdc query
1259 	 * "localhost" by default, which typically resolves to ::1 and
1260 	 * 127.0.0.1, it's useful to default to listening on both.
1261 	 */
1262 	if (isloopback) {
1263 		DPRINTF(4, ("default loopback listen\n"));
1264 		return ACTION_LISTEN;
1265 	}
1266 
1267 	/*
1268 	 * Treat wildcard addresses specially.  If there is no explicit
1269 	 * "nic ... wildcard" or "nic ... 0.0.0.0" or "nic ... ::" rule
1270 	 * default to drop.
1271 	 */
1272 	if (iswildcard) {
1273 		DPRINTF(4, ("default wildcard drop\n"));
1274 		return ACTION_DROP;
1275 	}
1276 
1277 	/*
1278 	 * Check for "virtual IP" (colon in the interface name) after
1279 	 * the rules so that "ntpd --interface eth0:1 -novirtualips"
1280 	 * does indeed listen on eth0:1's addresses.
1281 	 */
1282 	if (!listen_to_virtual_ips && if_name != NULL
1283 	    && (strchr(if_name, ':') != NULL)) {
1284 
1285 		DPRINTF(4, ("virtual ip - ignore\n"));
1286 		return ACTION_IGNORE;
1287 	}
1288 
1289 	/*
1290 	 * If there are no --interface/-I command-line options and no
1291 	 * interface/nic rules in ntp.conf, the default action is to
1292 	 * listen.  In the presence of rules from either, the default
1293 	 * is to ignore.  This implements ntpd's traditional listen-
1294 	 * every default with no interface listen configuration, and
1295 	 * ensures a single -I eth0 or "nic listen eth0" means do not
1296 	 * listen on any other addresses.
1297 	 */
1298 	if (NULL == nic_rule_list) {
1299 		DPRINTF(4, ("default listen\n"));
1300 		return ACTION_LISTEN;
1301 	}
1302 
1303 	DPRINTF(4, ("implicit ignore\n"));
1304 	return ACTION_IGNORE;
1305 }
1306 
1307 
1308 static void
1309 convert_isc_if(
1310 	isc_interface_t *isc_if,
1311 	endpt *itf,
1312 	u_short port
1313 	)
1314 {
1315 	strlcpy(itf->name, isc_if->name, sizeof(itf->name));
1316 	itf->ifindex = isc_if->ifindex;
1317 	itf->family = (u_short)isc_if->af;
1318 	AF(&itf->sin) = itf->family;
1319 	AF(&itf->mask) = itf->family;
1320 	AF(&itf->bcast) = itf->family;
1321 	SET_PORT(&itf->sin, port);
1322 	SET_PORT(&itf->mask, port);
1323 	SET_PORT(&itf->bcast, port);
1324 
1325 	if (IS_IPV4(&itf->sin)) {
1326 		NSRCADR(&itf->sin) = isc_if->address.type.in.s_addr;
1327 		NSRCADR(&itf->mask) = isc_if->netmask.type.in.s_addr;
1328 
1329 		if (isc_if->flags & INTERFACE_F_BROADCAST) {
1330 			itf->flags |= INT_BROADCAST;
1331 			NSRCADR(&itf->bcast) =
1332 			    isc_if->broadcast.type.in.s_addr;
1333 		}
1334 	}
1335 #ifdef INCLUDE_IPV6_SUPPORT
1336 	else if (IS_IPV6(&itf->sin)) {
1337 		SET_ADDR6N(&itf->sin, isc_if->address.type.in6);
1338 		SET_ADDR6N(&itf->mask, isc_if->netmask.type.in6);
1339 
1340 		SET_SCOPE(&itf->sin, isc_if->address.zone);
1341 	}
1342 #endif /* INCLUDE_IPV6_SUPPORT */
1343 
1344 
1345 	/* Process the rest of the flags */
1346 
1347 	itf->flags |=
1348 		  ((INTERFACE_F_UP & isc_if->flags)
1349 			? INT_UP : 0)
1350 		| ((INTERFACE_F_LOOPBACK & isc_if->flags)
1351 			? INT_LOOPBACK : 0)
1352 		| ((INTERFACE_F_POINTTOPOINT & isc_if->flags)
1353 			? INT_PPP : 0)
1354 		| ((INTERFACE_F_MULTICAST & isc_if->flags)
1355 			? INT_MULTICAST : 0)
1356 		| ((INTERFACE_F_PRIVACY & isc_if->flags)
1357 			? INT_PRIVACY : 0)
1358 		;
1359 
1360 	/*
1361 	 * Clear the loopback flag if the address is not localhost.
1362 	 * http://bugs.ntp.org/1683
1363 	 */
1364 	if ((INT_LOOPBACK & itf->flags) && !IS_LOOPBACK_ADDR(&itf->sin)) {
1365 		itf->flags &= ~INT_LOOPBACK;
1366 	}
1367 }
1368 
1369 
1370 /*
1371  * refresh_interface
1372  *
1373  * some OSes have been observed to keep
1374  * cached routes even when more specific routes
1375  * become available.
1376  * this can be mitigated by re-binding
1377  * the socket.
1378  */
1379 static int
1380 refresh_interface(
1381 	endpt *	iface
1382 	)
1383 {
1384 #ifdef  OS_MISSES_SPECIFIC_ROUTE_UPDATES
1385 	if (iface->fd != INVALID_SOCKET) {
1386 		int bcast = (iface->flags & INT_BCASTXMIT) != 0;
1387 		/* as we forcibly close() the socket remove the
1388 		   broadcast permission indication */
1389 		if (bcast)
1390 			socket_broadcast_disable(iface, &iface->sin);
1391 
1392 		close_and_delete_fd_from_list(iface->fd, iface);
1393 
1394 		/* create new socket picking up a new first hop binding
1395 		   at connect() time */
1396 		iface->fd = open_socket(&iface->sin,
1397 					    bcast, 0, iface);
1398 		 /*
1399 		  * reset TTL indication so TTL is is set again
1400 		  * next time around
1401 		  */
1402 		iface->last_ttl = 0;
1403 		return (iface->fd != INVALID_SOCKET);
1404 	} else
1405 		return 0;	/* invalid sockets are not refreshable */
1406 #else /* !OS_MISSES_SPECIFIC_ROUTE_UPDATES */
1407 	return (iface->fd != INVALID_SOCKET);
1408 #endif /* !OS_MISSES_SPECIFIC_ROUTE_UPDATES */
1409 }
1410 
1411 /*
1412  * interface_update - externally callable update function
1413  */
1414 void
1415 interface_update(
1416 	interface_receiver_t	receiver,
1417 	void *			data
1418 	)
1419 {
1420 	int new_interface_found;
1421 
1422 	if (scan_addrs_once) {
1423 		return;
1424 	}
1425 	BLOCKIO();
1426 	new_interface_found = update_interfaces(NTP_PORT, receiver, data);
1427 	UNBLOCKIO();
1428 
1429 	if (!new_interface_found) {
1430 		return;
1431 	}
1432 #ifdef DEBUG
1433 	msyslog(LOG_DEBUG, "new interface(s) found: waking up resolver");
1434 #endif
1435 	interrupt_worker_sleep();
1436 }
1437 
1438 
1439 /*
1440  * sau_from_netaddr() - convert network address on-wire formats.
1441  * Convert from libisc's isc_netaddr_t to NTP's sockaddr_u
1442  */
1443 void
1444 sau_from_netaddr(
1445 	sockaddr_u *psau,
1446 	const isc_netaddr_t *pna
1447 	)
1448 {
1449 	ZERO_SOCK(psau);
1450 	AF(psau) = (u_short)pna->family;
1451 	switch (pna->family) {
1452 
1453 	case AF_INET:
1454 		psau->sa4.sin_addr = pna->type.in;
1455 		break;
1456 
1457 	case AF_INET6:
1458 		psau->sa6.sin6_addr = pna->type.in6;
1459 		break;
1460 	}
1461 }
1462 
1463 
1464 static int
1465 is_wildcard_addr(
1466 	const sockaddr_u *psau
1467 	)
1468 {
1469 	if (IS_IPV4(psau) && !NSRCADR(psau))
1470 		return 1;
1471 
1472 #ifdef INCLUDE_IPV6_SUPPORT
1473 	if (IS_IPV6(psau) && S_ADDR6_EQ(psau, &in6addr_any))
1474 		return 1;
1475 #endif
1476 
1477 	return 0;
1478 }
1479 
1480 
1481 isc_boolean_t
1482 is_linklocal(
1483 	sockaddr_u *		psau
1484 )
1485 {
1486 	struct in6_addr *	p6addr;
1487 
1488 	if (IS_IPV6(psau)) {
1489 		p6addr = &psau->sa6.sin6_addr;
1490 		if (   IN6_IS_ADDR_LINKLOCAL(p6addr)
1491 		    || IN6_IS_ADDR_SITELOCAL(p6addr)) {
1492 
1493 			return TRUE;
1494 		}
1495 	} else if (IS_IPV4(psau)) {
1496 		/* autoconf are link-local 169.254.0.0/16 */
1497 		if (IS_AUTOCONF(psau)) {
1498 			return TRUE;
1499 		}
1500 	}
1501 	return FALSE;
1502 }
1503 
1504 
1505 #ifdef OS_NEEDS_REUSEADDR_FOR_IFADDRBIND
1506 /*
1507  * enable/disable re-use of wildcard address socket
1508  */
1509 static void
1510 set_wildcard_reuse(
1511 	u_short	family,
1512 	int	on
1513 	)
1514 {
1515 	endpt *any;
1516 	SOCKET fd = INVALID_SOCKET;
1517 
1518 	any = ANY_INTERFACE_BYFAM(family);
1519 	if (any != NULL)
1520 		fd = any->fd;
1521 
1522 	if (fd != INVALID_SOCKET) {
1523 		if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
1524 			       (void *)&on, sizeof(on)))
1525 			msyslog(LOG_ERR,
1526 				"set_wildcard_reuse: setsockopt(SO_REUSEADDR, %s) failed: %m",
1527 				on ? "on" : "off");
1528 
1529 		DPRINTF(4, ("set SO_REUSEADDR to %s on %s\n",
1530 			    on ? "on" : "off",
1531 			    stoa(&any->sin)));
1532 	}
1533 }
1534 #endif /* OS_NEEDS_REUSEADDR_FOR_IFADDRBIND */
1535 
1536 static isc_boolean_t
1537 check_flags(
1538 	sockaddr_u *psau,
1539 	const char *name,
1540 	u_int32 flags
1541 	)
1542 {
1543 #if defined(SIOCGIFAFLAG_IN)
1544 	struct ifreq ifr;
1545 	int fd;
1546 
1547 	if (psau->sa.sa_family != AF_INET)
1548 		return ISC_FALSE;
1549 	if ((fd = socket(AF_INET, SOCK_DGRAM, 0)) < 0)
1550 		return ISC_FALSE;
1551 	ZERO(ifr);
1552 	memcpy(&ifr.ifr_addr, &psau->sa, sizeof(ifr.ifr_addr));
1553 	strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
1554 	if (ioctl(fd, SIOCGIFAFLAG_IN, &ifr) < 0) {
1555 		close(fd);
1556 		return ISC_FALSE;
1557 	}
1558 	close(fd);
1559 	if ((ifr.ifr_addrflags & flags) != 0)
1560 		return ISC_TRUE;
1561 #endif	/* SIOCGIFAFLAG_IN */
1562 	return ISC_FALSE;
1563 }
1564 
1565 static isc_boolean_t
1566 check_flags6(
1567 	sockaddr_u *psau,
1568 	const char *name,
1569 	u_int32 flags6
1570 	)
1571 {
1572 #if defined(INCLUDE_IPV6_SUPPORT) && defined(SIOCGIFAFLAG_IN6)
1573 	struct in6_ifreq ifr6;
1574 	int fd;
1575 
1576 	if (psau->sa.sa_family != AF_INET6)
1577 		return ISC_FALSE;
1578 	if ((fd = socket(AF_INET6, SOCK_DGRAM, 0)) < 0)
1579 		return ISC_FALSE;
1580 	ZERO(ifr6);
1581 	memcpy(&ifr6.ifr_addr, &psau->sa6, sizeof(ifr6.ifr_addr));
1582 	strlcpy(ifr6.ifr_name, name, sizeof(ifr6.ifr_name));
1583 	if (ioctl(fd, SIOCGIFAFLAG_IN6, &ifr6) < 0) {
1584 		close(fd);
1585 		return ISC_FALSE;
1586 	}
1587 	close(fd);
1588 	if ((ifr6.ifr_ifru.ifru_flags6 & flags6) != 0)
1589 		return ISC_TRUE;
1590 #endif	/* INCLUDE_IPV6_SUPPORT && SIOCGIFAFLAG_IN6 */
1591 	return ISC_FALSE;
1592 }
1593 
1594 static isc_boolean_t
1595 is_anycast(
1596 	sockaddr_u *psau,
1597 	const char *name
1598 	)
1599 {
1600 #ifdef IN6_IFF_ANYCAST
1601 	return check_flags6(psau, name, IN6_IFF_ANYCAST);
1602 #else
1603 	return ISC_FALSE;
1604 #endif
1605 }
1606 
1607 static isc_boolean_t
1608 is_valid(
1609 	sockaddr_u *psau,
1610 	const char *name
1611 	)
1612 {
1613 	u_int32 flags;
1614 
1615 	flags = 0;
1616 	switch (psau->sa.sa_family) {
1617 	case AF_INET:
1618 #ifdef IN_IFF_DETACHED
1619 		flags |= IN_IFF_DETACHED;
1620 #endif
1621 #ifdef IN_IFF_TENTATIVE
1622 		flags |= IN_IFF_TENTATIVE;
1623 #endif
1624 		return check_flags(psau, name, flags) ? ISC_FALSE : ISC_TRUE;
1625 	case AF_INET6:
1626 #ifdef IN6_IFF_DEPARTED
1627 		flags |= IN6_IFF_DEPARTED;
1628 #endif
1629 #ifdef IN6_IFF_DETACHED
1630 		flags |= IN6_IFF_DETACHED;
1631 #endif
1632 #ifdef IN6_IFF_TENTATIVE
1633 		flags |= IN6_IFF_TENTATIVE;
1634 #endif
1635 		return check_flags6(psau, name, flags) ? ISC_FALSE : ISC_TRUE;
1636 	default:
1637 		return ISC_FALSE;
1638 	}
1639 }
1640 
1641 /*
1642  * update_interface strategy
1643  *
1644  * toggle configuration phase
1645  *
1646  * Phase 1a:
1647  * forall currently existing interfaces
1648  *   if address is known:
1649  *	drop socket - rebind again
1650  *
1651  *   if address is NOT known:
1652  *	Add address to list of new addresses
1653  *
1654  * Phase 1b:
1655  *	Scan the list of new addresses marking IPv6 link-local addresses
1656  *	   which also have a global v6 address using the same OS ifindex.
1657  *	Attempt to create a new interface entry
1658  *
1659  * Phase 2:
1660  * forall currently known non MCAST and WILDCARD interfaces
1661  *   if interface does not match configuration phase (not seen in phase 1):
1662  *	remove interface from known interface list
1663  *	forall peers associated with this interface
1664  *         disconnect peer from this interface
1665  *
1666  * Phase 3:
1667  *   attempt to re-assign interfaces to peers
1668  *
1669  */
1670 
1671 static int
1672 update_interfaces(
1673 	u_short			port,
1674 	interface_receiver_t	receiver,
1675 	void *			data
1676 	)
1677 {
1678 	isc_mem_t *		mctx = (void *)-1;
1679 	interface_info_t	ifi;
1680 	isc_interfaceiter_t *	iter;
1681 	isc_result_t		result;
1682 	isc_interface_t		isc_if;
1683 	int			new_interface_found;
1684 	unsigned int		family;
1685 	endpt			enumep;
1686 	endpt *			ep;
1687 	endpt *			next_ep;
1688 	endpt *			newaddrs;
1689 	endpt *			newaddrs_tail;
1690 	endpt *			ep2;
1691 
1692 	DPRINTF(3, ("update_interfaces(%d)\n", port));
1693 
1694 	/*
1695 	 * phase 1a - scan OS local addresses
1696 	 * - update those that ntpd already knows
1697 	 * - build a list of newly-discovered addresses.
1698 	 */
1699 
1700 	new_interface_found = FALSE;
1701 	nonlocal_v4_addr_up = nonlocal_v6_addr_up = FALSE;
1702 	iter = NULL;
1703 	newaddrs = newaddrs_tail = NULL;
1704 	result = isc_interfaceiter_create(mctx, &iter);
1705 
1706 	if (result != ISC_R_SUCCESS)
1707 		return 0;
1708 
1709 	/*
1710 	 * Toggle system interface scan phase to find untouched
1711 	 * interfaces to be deleted.
1712 	 */
1713 	sys_interphase ^= 0x1;
1714 
1715 	for (result = isc_interfaceiter_first(iter);
1716 	     ISC_R_SUCCESS == result;
1717 	     result = isc_interfaceiter_next(iter)) {
1718 
1719 		result = isc_interfaceiter_current(iter, &isc_if);
1720 
1721 		if (result != ISC_R_SUCCESS) {
1722 			break;
1723 		}
1724 		/* See if we have a valid family to use */
1725 		family = isc_if.address.family;
1726 		if (AF_INET != family && AF_INET6 != family)
1727 			continue;
1728 		if (AF_INET == family && !ipv4_works)
1729 			continue;
1730 		if (AF_INET6 == family && !ipv6_works)
1731 			continue;
1732 
1733 		/* create prototype */
1734 		init_interface(&enumep);
1735 
1736 		convert_isc_if(&isc_if, &enumep, port);
1737 
1738 		DPRINT_INTERFACE(4, (&enumep, "examining ", "\n"));
1739 
1740 		/*
1741 		 * Check if and how we are going to use the interface.
1742 		 */
1743 		switch (interface_action(enumep.name, &enumep.sin,
1744 					 enumep.flags)) {
1745 
1746 		case ACTION_IGNORE:
1747 			DPRINTF(4, ("ignoring interface %s (%s) - by nic rules\n",
1748 				    enumep.name, stoa(&enumep.sin)));
1749 			continue;
1750 
1751 		case ACTION_LISTEN:
1752 			DPRINTF(4, ("listen interface %s (%s) - by nic rules\n",
1753 				    enumep.name, stoa(&enumep.sin)));
1754 			enumep.ignore_packets = ISC_FALSE;
1755 			break;
1756 
1757 		case ACTION_DROP:
1758 			DPRINTF(4, ("drop on interface %s (%s) - by nic rules\n",
1759 				    enumep.name, stoa(&enumep.sin)));
1760 			enumep.ignore_packets = ISC_TRUE;
1761 			break;
1762 		}
1763 
1764 		 /* interfaces must be UP to be usable */
1765 		if (!(enumep.flags & INT_UP)) {
1766 			DPRINTF(4, ("skipping interface %s (%s) - DOWN\n",
1767 				    enumep.name, stoa(&enumep.sin)));
1768 			continue;
1769 		}
1770 
1771 		/*
1772 		 * skip any interfaces UP and bound to a wildcard
1773 		 * address - some dhcp clients produce that in the
1774 		 * wild
1775 		 */
1776 		if (is_wildcard_addr(&enumep.sin))
1777 			continue;
1778 
1779 		if (is_anycast(&enumep.sin, isc_if.name))
1780 			continue;
1781 
1782 		/*
1783 		 * skip any address that is an invalid state to be used
1784 		 */
1785 		if (!is_valid(&enumep.sin, isc_if.name))
1786 			continue;
1787 
1788 		/*
1789 		 * Keep track of having non-linklocal connectivity
1790 		 * for IPv4 and IPv6 so we don't solicit pool hosts
1791 		 * when it can't work.
1792 		 */
1793 		if (   !(INT_LOOPBACK & enumep.flags)
1794 		    && !is_linklocal(&enumep.sin)) {
1795 			if (IS_IPV6(&enumep.sin)) {
1796 				nonlocal_v6_addr_up = TRUE;
1797 			} else {
1798 				nonlocal_v4_addr_up = TRUE;
1799 			}
1800 		}
1801 		/*
1802 		 * map to local *address* in order to map all duplicate
1803 		 * interfaces to an endpt structure with the appropriate
1804 		 * socket.  Our name space is (ip-address), NOT
1805 		 * (interface name, ip-address).
1806 		 */
1807 		ep = getinterface(&enumep.sin, INT_WILDCARD);
1808 
1809 		if (NULL == ep) {
1810 			ep = emalloc(sizeof(*ep));
1811 			memcpy(ep, &enumep, sizeof(*ep));
1812 			if (NULL != newaddrs_tail) {
1813 				newaddrs_tail->elink = ep;
1814 				newaddrs_tail = ep;
1815 			} else {
1816 				newaddrs_tail = newaddrs = ep;
1817 			}
1818 			continue;
1819 		}
1820 
1821 		if (!refresh_interface(ep)) {
1822 			/*
1823 			 * Refreshing failed, we will delete the endpt
1824 			 * in phase 2 because it was not marked current.
1825 			 * We can bind to the address as the refresh
1826 			 * code already closed the endpt's socket.
1827 			*/
1828 			continue;
1829 		}
1830 		/*
1831 		 * found existing and up to date interface -
1832 		 * mark present.
1833 		 */
1834 		if (ep->phase != sys_interphase) {
1835 			/*
1836 			 * On a new round we reset the name so
1837 			 * the interface name shows up again if
1838 			 * this address is no longer shared.
1839 			 * We reset ignore_packets from the
1840 			 * new prototype to respect any runtime
1841 			 * changes to the nic rules.
1842 			 */
1843 			strlcpy(ep->name, enumep.name, sizeof(ep->name));
1844 			ep->ignore_packets = enumep.ignore_packets;
1845 		} else {
1846 			/*
1847 			 * DLH: else branch might be dead code from
1848 			 * when both address and name were compared.
1849 			 */
1850 			msyslog(LOG_INFO, "%s on %u %s -> *multiple*",
1851 				stoa(&ep->sin), ep->ifnum, ep->name);
1852 			/* name collision - rename interface */
1853 			strlcpy(ep->name, "*multiple*", sizeof(ep->name));
1854 		}
1855 
1856 		DPRINT_INTERFACE(4, (ep, "updating ", " present\n"));
1857 
1858 		if (ep->ignore_packets != enumep.ignore_packets) {
1859 			/*
1860 			 * We have conflicting configurations for the
1861 			 * address. This can happen with
1862 			 * -I <interfacename> on the command line for an
1863 			 *  interface that shares its address with other
1864 			 * interfaces. We cannot disambiguate incoming
1865 			 * packets delivered to this socket without extra
1866 			 * syscalls/features.  Note this is an unusual
1867 			 * configuration where several interfaces share
1868 			 * an address but filtering via interface name is
1869 			 * attempted.  We resolve the config conflict by
1870 			 * disabling the processing of received packets.
1871 			 * This leads to no service on the address where
1872 			 * the conflict occurs.
1873 			 */
1874 			msyslog(LOG_WARNING,
1875 				"conflicting listen configuration between"
1876 				" %s and %s for %s, disabled",
1877 				enumep.name, ep->name, stoa(&enumep.sin));
1878 
1879 			ep->ignore_packets = TRUE;
1880 		}
1881 
1882 		ep->phase = sys_interphase;
1883 
1884 		ifi.action = IFS_EXISTS;
1885 		ifi.ep = ep;
1886 		if (receiver != NULL) {
1887 			(*receiver)(data, &ifi);
1888 		}
1889 	}
1890 
1891 	isc_interfaceiter_destroy(&iter);
1892 
1893 	/*
1894 	 * Phase 1b
1895 	 */
1896 	for (ep = newaddrs; ep != NULL; ep = ep->elink) {
1897 		if (IS_IPV6(&ep->sin) && is_linklocal(&ep->sin)) {
1898 			for (ep2 = newaddrs; ep2 != NULL; ep2 = ep2->elink) {
1899 				if (   IS_IPV6(&ep2->sin)
1900 				    && ep != ep2
1901 				    && !is_linklocal(&ep2->sin)) {
1902 
1903 					ep->flags |= INT_LL_OF_GLOB;
1904 					break;
1905 				}
1906 			}
1907 		}
1908 	}
1909 	for (ep2 = newaddrs; ep2 != NULL; ep2 = next_ep) {
1910 		next_ep = ep2->elink;
1911 		ep2->elink = NULL;
1912 		ep = create_interface(port, ep2);
1913 		if (ep != NULL) {
1914 			ifi.action = IFS_CREATED;
1915 			ifi.ep = ep;
1916 			if (receiver != NULL) {
1917 				(*receiver)(data, &ifi);
1918 			}
1919 			new_interface_found = TRUE;
1920 			DPRINT_INTERFACE(3,
1921 				(ep, "updating ", " new - created\n"));
1922 		}
1923 		else {
1924 			DPRINT_INTERFACE(3,
1925 				(ep, "updating ", " new - FAILED"));
1926 
1927 			msyslog(LOG_ERR,
1928 				"cannot bind address %s",
1929 				stoa(&ep->sin));
1930 		}
1931 		free(ep2);
1932 	}
1933 
1934 	/*
1935 	 * phase 2 - delete gone interfaces - reassigning peers to
1936 	 * other interfaces
1937 	 */
1938 	for (ep = ep_list; ep != NULL; ep = next_ep) {
1939 		next_ep = ep->elink;
1940 
1941 		/*
1942 		 * if phase does not match sys_phase this interface was
1943 		 * not enumerated during the last interface scan - so it
1944 		 * is gone and will be deleted here unless it did not
1945 		 * originate from interface enumeration (INT_WILDCARD,
1946 		 * INT_MCASTIF).
1947 		 */
1948 		if (((INT_WILDCARD | INT_MCASTIF) & ep->flags) ||
1949 		    ep->phase == sys_interphase)
1950 			continue;
1951 
1952 		DPRINT_INTERFACE(3, (ep, "updating ",
1953 				     "GONE - deleting\n"));
1954 		remove_interface(ep);
1955 
1956 		ifi.action = IFS_DELETED;
1957 		ifi.ep = ep;
1958 		if (receiver != NULL) {
1959 			(*receiver)(data, &ifi);
1960 		}
1961 		/* disconnect peers from deleted endpt. */
1962 		while (ep->peers != NULL) {
1963 			set_peerdstadr(ep->peers, NULL);
1964 		}
1965 		/*
1966 		 * update globals in case we lose
1967 		 * a loopback interface
1968 		 */
1969 		if (ep == loopback_interface) {
1970 			loopback_interface = NULL;
1971 		}
1972 		delete_interface(ep);
1973 	}
1974 
1975 	/*
1976 	 * phase 3 - re-configure as the world has possibly changed
1977 	 *
1978 	 * never ever make this conditional again - it is needed to track
1979 	 * routing updates. see bug #2506
1980 	 */
1981 	refresh_all_peerinterfaces();
1982 
1983 	if (sys_bclient) {
1984 		io_setbclient();
1985 	}
1986 #ifdef MCAST
1987 	/*
1988 	 * Check multicast interfaces and try to join multicast groups if
1989 	 * not joined yet.
1990 	 */
1991 	for (ep = ep_list; ep != NULL; ep = ep->elink) {
1992 		remaddr_t *entry;
1993 
1994 		if (!(INT_MCASTIF & ep->flags) || (INT_MCASTOPEN & ep->flags)) {
1995 			continue;
1996 		}
1997 		/* Find remote address that was linked to this interface */
1998 		for (entry = remoteaddr_list;
1999 		     entry != NULL;
2000 		     entry = entry->link) {
2001 			if (entry->ep == ep) {
2002 				if (socket_multicast_enable(ep, &entry->addr)) {
2003 					msyslog(LOG_INFO,
2004 						"Joined %s socket to multicast group %s",
2005 						stoa(&ep->sin),
2006 						stoa(&entry->addr));
2007 				}
2008 				break;
2009 			}
2010 		}
2011 	}
2012 #endif /* MCAST */
2013 
2014 	return new_interface_found;
2015 }
2016 
2017 
2018 /*
2019  * create_sockets - create a socket for each interface plus a default
2020  *			socket for when we don't know where to send
2021  */
2022 static int
2023 create_sockets(
2024 	u_short port
2025 	)
2026 {
2027 #ifndef HAVE_IO_COMPLETION_PORT
2028 	/*
2029 	 * I/O Completion Ports don't care about the select and FD_SET
2030 	 */
2031 	maxactivefd = 0;
2032 	FD_ZERO(&activefds);
2033 #endif
2034 
2035 	DPRINTF(2, ("create_sockets(%d)\n", port));
2036 
2037 	create_wildcards(port);
2038 
2039 	update_interfaces(port, NULL, NULL);
2040 
2041 	/*
2042 	 * Now that we have opened all the sockets, turn off the reuse
2043 	 * flag for security.
2044 	 */
2045 	set_reuseaddr(0);
2046 
2047 	DPRINTF(2, ("create_sockets: Total interfaces = %d\n", ninterfaces));
2048 
2049 	return ninterfaces;
2050 }
2051 
2052 /*
2053  * create_interface - create a new interface for a given prototype
2054  *		      binding the socket.
2055  */
2056 static endpt *
2057 create_interface(
2058 	u_short	port,
2059 	endpt *	protot
2060 	)
2061 {
2062 	sockaddr_u	resmask;
2063 	endpt *		iface;
2064 	int/*BOOL*/	success;
2065 #if defined(MCAST) && defined(MULTICAST_NONEWSOCKET)
2066 	remaddr_t *	entry;
2067 	remaddr_t *	next_entry;
2068 #endif
2069 	DPRINTF(2, ("create_interface(%s)\n", sptoa(&protot->sin)));
2070 
2071 	/* build an interface */
2072 	iface = new_interface(protot);
2073 
2074 	/*
2075 	 * create socket
2076 	 */
2077 	iface->fd = open_socket(&iface->sin, 0, 0, iface);
2078 
2079 	if (iface->fd != INVALID_SOCKET)
2080 		log_listen_address(iface);
2081 
2082 	if ((INT_BROADCAST & iface->flags)
2083 	    && iface->bfd != INVALID_SOCKET)
2084 		msyslog(LOG_INFO, "Listening on broadcast address %s",
2085 			sptoa(&iface->bcast));
2086 
2087 	if (INVALID_SOCKET == iface->fd
2088 	    && INVALID_SOCKET == iface->bfd) {
2089 		msyslog(LOG_ERR, "unable to create socket on %s (%d) for %s",
2090 			iface->name,
2091 			iface->ifnum,
2092 			sptoa(&iface->sin));
2093 		delete_interface(iface);
2094 		return NULL;
2095 	}
2096 
2097 	/*
2098 	 * Blacklist our own addresses, no use talking to ourself
2099 	 */
2100 	SET_HOSTMASK(&resmask, AF(&iface->sin));
2101 	success = hack_restrict(RESTRICT_FLAGS, &iface->sin, &resmask,
2102 				-4, RESM_NTPONLY | RESM_INTERFACE,
2103 				RES_IGNORE, 0);
2104 	if (!success) {
2105 		msyslog(LOG_ERR,
2106 			"unable to self-restrict %s", stoa(&iface->sin));
2107 	}
2108 
2109 	/*
2110 	 * set globals with the first found
2111 	 * loopback interface of the appropriate class
2112 	 */
2113 	if (NULL == loopback_interface && AF_INET == iface->family
2114 	    && (INT_LOOPBACK & iface->flags))
2115 		loopback_interface = iface;
2116 
2117 	/*
2118 	 * put into our interface list
2119 	 */
2120 	add_addr_to_list(&iface->sin, iface);
2121 	add_interface(iface);
2122 
2123 #if defined(MCAST) && defined(MULTICAST_NONEWSOCKET)
2124 	/*
2125 	 * Join any previously-configured compatible multicast groups.
2126 	 */
2127 	if (INT_MULTICAST & iface->flags &&
2128 	    !((INT_LOOPBACK | INT_WILDCARD) & iface->flags) &&
2129 	    !iface->ignore_packets) {
2130 		for (entry = remoteaddr_list;
2131 		     entry != NULL;
2132 		     entry = next_entry) {
2133 			next_entry = entry->link;
2134 			if (AF(&iface->sin) != AF(&entry->addr) ||
2135 			    !IS_MCAST(&entry->addr))
2136 				continue;
2137 			if (socket_multicast_enable(iface,
2138 						    &entry->addr))
2139 				msyslog(LOG_INFO,
2140 					"Joined %s socket to multicast group %s",
2141 					stoa(&iface->sin),
2142 					stoa(&entry->addr));
2143 			else
2144 				msyslog(LOG_ERR,
2145 					"Failed to join %s socket to multicast group %s",
2146 					stoa(&iface->sin),
2147 					stoa(&entry->addr));
2148 		}
2149 	}
2150 #endif	/* MCAST && MCAST_NONEWSOCKET */
2151 
2152 	DPRINT_INTERFACE(2, (iface, "created ", "\n"));
2153 	return iface;
2154 }
2155 
2156 
2157 #ifdef DEBUG
2158 const char *
2159 iflags_str(
2160 	u_int32 iflags
2161 )
2162 {
2163 	const size_t	sz = LIB_BUFLENGTH;
2164 	char *		ifs;
2165 
2166 	LIB_GETBUF(ifs);
2167 	ifs[0] = '\0';
2168 
2169 	if (iflags & INT_UP) {
2170 		CLEAR_BIT_IF_DEBUG(INT_UP, iflags);
2171 		append_flagstr(ifs, sz, "up");
2172 	}
2173 
2174 	if (iflags & INT_PPP) {
2175 		CLEAR_BIT_IF_DEBUG(INT_PPP, iflags);
2176 		append_flagstr(ifs, sz, "ppp");
2177 	}
2178 
2179 	if (iflags & INT_LOOPBACK) {
2180 		CLEAR_BIT_IF_DEBUG(INT_LOOPBACK, iflags);
2181 		append_flagstr(ifs, sz, "loopback");
2182 	}
2183 
2184 	if (iflags & INT_BROADCAST) {
2185 		CLEAR_BIT_IF_DEBUG(INT_BROADCAST, iflags);
2186 		append_flagstr(ifs, sz, "broadcast");
2187 	}
2188 
2189 	if (iflags & INT_MULTICAST) {
2190 		CLEAR_BIT_IF_DEBUG(INT_MULTICAST, iflags);
2191 		append_flagstr(ifs, sz, "multicast");
2192 	}
2193 
2194 	if (iflags & INT_BCASTOPEN) {
2195 		CLEAR_BIT_IF_DEBUG(INT_BCASTOPEN, iflags);
2196 		append_flagstr(ifs, sz, "bcastopen");
2197 	}
2198 
2199 	if (iflags & INT_MCASTOPEN) {
2200 		CLEAR_BIT_IF_DEBUG(INT_MCASTOPEN, iflags);
2201 		append_flagstr(ifs, sz, "mcastopen");
2202 	}
2203 
2204 	if (iflags & INT_WILDCARD) {
2205 		CLEAR_BIT_IF_DEBUG(INT_WILDCARD, iflags);
2206 		append_flagstr(ifs, sz, "wildcard");
2207 	}
2208 
2209 	if (iflags & INT_MCASTIF) {
2210 		CLEAR_BIT_IF_DEBUG(INT_MCASTIF, iflags);
2211 		append_flagstr(ifs, sz, "mcastif");
2212 	}
2213 
2214 	if (iflags & INT_PRIVACY) {
2215 		CLEAR_BIT_IF_DEBUG(INT_PRIVACY, iflags);
2216 		append_flagstr(ifs, sz, "IPv6privacy");
2217 	}
2218 
2219 	if (iflags & INT_BCASTXMIT) {
2220 		CLEAR_BIT_IF_DEBUG(INT_BCASTXMIT, iflags);
2221 		append_flagstr(ifs, sz, "bcastxmit");
2222 	}
2223 
2224 	if (iflags & INT_LL_OF_GLOB) {
2225 		CLEAR_BIT_IF_DEBUG(INT_LL_OF_GLOB, iflags);
2226 		append_flagstr(ifs, sz, "linklocal-w-global");
2227 	}
2228 
2229 	DEBUG_INVARIANT(!iflags);
2230 
2231 	return ifs;
2232 }
2233 #endif	/* DEBUG */
2234 
2235 
2236 #ifdef SO_EXCLUSIVEADDRUSE
2237 static void
2238 set_excladdruse(
2239 	SOCKET fd
2240 	)
2241 {
2242 	int one = 1;
2243 	int failed;
2244 #ifdef SYS_WINNT
2245 	DWORD err;
2246 #endif
2247 
2248 	failed = setsockopt(fd, SOL_SOCKET, SO_EXCLUSIVEADDRUSE,
2249 			    (void *)&one, sizeof(one));
2250 
2251 	if (!failed)
2252 		return;
2253 
2254 #ifdef SYS_WINNT
2255 	/*
2256 	 * Prior to Windows XP setting SO_EXCLUSIVEADDRUSE can fail with
2257 	 * error WSAINVAL depending on service pack level and whether
2258 	 * the user account is in the Administrators group.  Do not
2259 	 * complain if it fails that way on versions prior to XP (5.1).
2260 	 */
2261 	err = GetLastError();
2262 
2263 	if (isc_win32os_versioncheck(5, 1, 0, 0) < 0	/* < 5.1/XP */
2264 	    && WSAEINVAL == err)
2265 		return;
2266 
2267 	SetLastError(err);
2268 #endif
2269 	msyslog(LOG_ERR,
2270 		"setsockopt(%d, SO_EXCLUSIVEADDRUSE, on): %m",
2271 		(int)fd);
2272 }
2273 #endif  /* SO_EXCLUSIVEADDRUSE */
2274 
2275 
2276 /*
2277  * set_reuseaddr() - set/clear REUSEADDR on all sockets
2278  *			NB possible hole - should we be doing this on broadcast
2279  *			fd's also?
2280  */
2281 static void
2282 set_reuseaddr(
2283 	int flag
2284 	)
2285 {
2286 #ifndef SO_EXCLUSIVEADDRUSE
2287 	endpt *ep;
2288 
2289 	for (ep = ep_list; ep != NULL; ep = ep->elink) {
2290 		if (ep->flags & INT_WILDCARD)
2291 			continue;
2292 
2293 		/*
2294 		 * if ep->fd  is INVALID_SOCKET, we might have a adapter
2295 		 * configured but not present
2296 		 */
2297 		DPRINTF(4, ("setting SO_REUSEADDR on %.16s@%s to %s\n",
2298 			    ep->name, stoa(&ep->sin),
2299 			    flag ? "on" : "off"));
2300 
2301 		if (ep->fd != INVALID_SOCKET) {
2302 			if (setsockopt(ep->fd, SOL_SOCKET, SO_REUSEADDR,
2303 				       (void *)&flag, sizeof(flag))) {
2304 				msyslog(LOG_ERR, "set_reuseaddr: setsockopt(%s, SO_REUSEADDR, %s) failed: %m",
2305 					stoa(&ep->sin), flag ? "on" : "off");
2306 			}
2307 		}
2308 	}
2309 #endif /* ! SO_EXCLUSIVEADDRUSE */
2310 }
2311 
2312 /*
2313  * This is just a wrapper around an internal function so we can
2314  * make other changes as necessary later on
2315  */
2316 void
2317 enable_broadcast(
2318 	endpt *		iface,
2319 	sockaddr_u *	baddr
2320 	)
2321 {
2322 #ifdef OPEN_BCAST_SOCKET
2323 	socket_broadcast_enable(iface, iface->fd, baddr);
2324 #endif
2325 }
2326 
2327 #ifdef OPEN_BCAST_SOCKET
2328 /*
2329  * Enable a broadcast address to a given socket
2330  * The socket is in the ep_list all we need to do is enable
2331  * broadcasting. It is not this function's job to select the socket
2332  */
2333 static isc_boolean_t
2334 socket_broadcast_enable(
2335 	endpt *		iface,
2336 	SOCKET		fd,
2337 	sockaddr_u *	baddr
2338 	)
2339 {
2340 #ifdef SO_BROADCAST
2341 	int on = 1;
2342 
2343 	if (IS_IPV4(baddr)) {
2344 		/* if this interface can support broadcast, set SO_BROADCAST */
2345 		if (setsockopt(fd, SOL_SOCKET, SO_BROADCAST,
2346 			       (void *)&on, sizeof(on)))
2347 			msyslog(LOG_ERR,
2348 				"setsockopt(SO_BROADCAST) enable failure on address %s: %m",
2349 				stoa(baddr));
2350 		else
2351 			DPRINTF(2, ("Broadcast enabled on socket %d for address %s\n",
2352 				    fd, stoa(baddr)));
2353 	}
2354 	iface->flags |= INT_BCASTXMIT;
2355 	return ISC_TRUE;
2356 #else
2357 	return ISC_FALSE;
2358 #endif /* SO_BROADCAST */
2359 }
2360 
2361 #ifdef  OS_MISSES_SPECIFIC_ROUTE_UPDATES
2362 /*
2363  * Remove a broadcast address from a given socket
2364  * The socket is in the ep_list all we need to do is disable
2365  * broadcasting. It is not this function's job to select the socket
2366  */
2367 static isc_boolean_t
2368 socket_broadcast_disable(
2369 	endpt *	iface,
2370 	sockaddr_u *		baddr
2371 	)
2372 {
2373 #ifdef SO_BROADCAST
2374 	int off = 0;	/* This seems to be OK as an int */
2375 
2376 	if (IS_IPV4(baddr) && setsockopt(iface->fd, SOL_SOCKET,
2377 	    SO_BROADCAST, (void *)&off, sizeof(off)))
2378 		msyslog(LOG_ERR,
2379 			"setsockopt(SO_BROADCAST) disable failure on address %s: %m",
2380 			stoa(baddr));
2381 
2382 	iface->flags &= ~INT_BCASTXMIT;
2383 	return ISC_TRUE;
2384 #else
2385 	return ISC_FALSE;
2386 #endif /* SO_BROADCAST */
2387 }
2388 #endif /* OS_MISSES_SPECIFIC_ROUTE_UPDATES */
2389 
2390 #endif /* OPEN_BCAST_SOCKET */
2391 
2392 
2393 /*
2394  * Check to see if the address is a multicast address
2395  */
2396 static isc_boolean_t
2397 addr_ismulticast(
2398 	sockaddr_u *maddr
2399 	)
2400 {
2401 	isc_boolean_t result;
2402 
2403 #ifndef INCLUDE_IPV6_MULTICAST_SUPPORT
2404 	/*
2405 	 * If we don't have IPV6 support any IPV6 addr is not multicast
2406 	 */
2407 	if (IS_IPV6(maddr))
2408 		result = ISC_FALSE;
2409 	else
2410 #endif
2411 		result = IS_MCAST(maddr);
2412 
2413 	if (!result)
2414 		DPRINTF(4, ("address %s is not multicast\n",
2415 			    stoa(maddr)));
2416 
2417 	return result;
2418 }
2419 
2420 /*
2421  * Multicast servers need to set the appropriate Multicast interface
2422  * socket option in order for it to know which interface to use for
2423  * send the multicast packet.
2424  */
2425 void
2426 enable_multicast_if(
2427 	endpt *		iface,
2428 	sockaddr_u *	maddr
2429 	)
2430 {
2431 #ifdef MCAST
2432 #ifdef IP_MULTICAST_LOOP
2433 	TYPEOF_IP_MULTICAST_LOOP off = 0;
2434 #endif
2435 #if defined(INCLUDE_IPV6_MULTICAST_SUPPORT) && defined(IPV6_MULTICAST_LOOP)
2436 	u_int off6 = 0;
2437 #endif
2438 
2439 	REQUIRE(AF(maddr) == AF(&iface->sin));
2440 
2441 	switch (AF(&iface->sin)) {
2442 
2443 	case AF_INET:
2444 #ifdef IP_MULTICAST_LOOP
2445 		/*
2446 		 * Don't send back to itself, but allow failure to set
2447 		 */
2448 		if (setsockopt(iface->fd, IPPROTO_IP,
2449 			       IP_MULTICAST_LOOP,
2450 			       (void *)&off,
2451 			       sizeof(off))) {
2452 
2453 			msyslog(LOG_ERR,
2454 				"setsockopt IP_MULTICAST_LOOP failed: %m on socket %d, addr %s for multicast address %s",
2455 				iface->fd, stoa(&iface->sin),
2456 				stoa(maddr));
2457 		}
2458 #endif
2459 		break;
2460 
2461 	case AF_INET6:
2462 #ifdef INCLUDE_IPV6_MULTICAST_SUPPORT
2463 #ifdef IPV6_MULTICAST_LOOP
2464 		/*
2465 		 * Don't send back to itself, but allow failure to set
2466 		 */
2467 		if (setsockopt(iface->fd, IPPROTO_IPV6,
2468 			       IPV6_MULTICAST_LOOP,
2469 			       (void *) &off6, sizeof(off6))) {
2470 
2471 			msyslog(LOG_ERR,
2472 				"setsockopt IPV6_MULTICAST_LOOP failed: %m on socket %d, addr %s for multicast address %s",
2473 				iface->fd, stoa(&iface->sin),
2474 				stoa(maddr));
2475 		}
2476 #endif
2477 		break;
2478 #else
2479 		return;
2480 #endif	/* INCLUDE_IPV6_MULTICAST_SUPPORT */
2481 	}
2482 	return;
2483 #endif
2484 }
2485 
2486 /*
2487  * Add a multicast address to a given socket
2488  * The socket is in the ep_list all we need to do is enable
2489  * multicasting. It is not this function's job to select the socket
2490  */
2491 #if defined(MCAST)
2492 static isc_boolean_t
2493 socket_multicast_enable(
2494 	endpt *		iface,
2495 	sockaddr_u *	maddr
2496 	)
2497 {
2498 	struct ip_mreq		mreq;
2499 # ifdef INCLUDE_IPV6_MULTICAST_SUPPORT
2500 	struct ipv6_mreq	mreq6;
2501 # endif
2502 	switch (AF(maddr)) {
2503 
2504 	case AF_INET:
2505 		ZERO(mreq);
2506 		mreq.imr_multiaddr = SOCK_ADDR4(maddr);
2507 		mreq.imr_interface.s_addr = htonl(INADDR_ANY);
2508 		if (setsockopt(iface->fd,
2509 			       IPPROTO_IP,
2510 			       IP_ADD_MEMBERSHIP,
2511 			       (void *)&mreq,
2512 			       sizeof(mreq))) {
2513 			DPRINTF(2, (
2514 				"setsockopt IP_ADD_MEMBERSHIP failed: %m on socket %d, addr %s for %x / %x (%s)",
2515 				iface->fd, stoa(&iface->sin),
2516 				mreq.imr_multiaddr.s_addr,
2517 				mreq.imr_interface.s_addr,
2518 				stoa(maddr)));
2519 			return ISC_FALSE;
2520 		}
2521 		DPRINTF(4, ("Added IPv4 multicast membership on socket %d, addr %s for %x / %x (%s)\n",
2522 			    iface->fd, stoa(&iface->sin),
2523 			    mreq.imr_multiaddr.s_addr,
2524 			    mreq.imr_interface.s_addr, stoa(maddr)));
2525 		break;
2526 
2527 	case AF_INET6:
2528 # ifdef INCLUDE_IPV6_MULTICAST_SUPPORT
2529 		/*
2530 		 * Enable reception of multicast packets.
2531 		 * If the address is link-local we can get the
2532 		 * interface index from the scope id. Don't do this
2533 		 * for other types of multicast addresses. For now let
2534 		 * the kernel figure it out.
2535 		 */
2536 		ZERO(mreq6);
2537 		mreq6.ipv6mr_multiaddr = SOCK_ADDR6(maddr);
2538 		mreq6.ipv6mr_interface = iface->ifindex;
2539 
2540 		if (setsockopt(iface->fd, IPPROTO_IPV6,
2541 			       IPV6_JOIN_GROUP, (void *)&mreq6,
2542 			       sizeof(mreq6))) {
2543 			DPRINTF(2, (
2544 				"setsockopt IPV6_JOIN_GROUP failed: %m on socket %d, addr %s for interface %u (%s)",
2545 				iface->fd, stoa(&iface->sin),
2546 				mreq6.ipv6mr_interface, stoa(maddr)));
2547 			return ISC_FALSE;
2548 		}
2549 		DPRINTF(4, ("Added IPv6 multicast group on socket %d, addr %s for interface %u (%s)\n",
2550 			    iface->fd, stoa(&iface->sin),
2551 			    mreq6.ipv6mr_interface, stoa(maddr)));
2552 # else
2553 		return ISC_FALSE;
2554 # endif	/* INCLUDE_IPV6_MULTICAST_SUPPORT */
2555 	}
2556 	iface->flags |= INT_MCASTOPEN;
2557 	iface->num_mcast++;
2558 
2559 	return ISC_TRUE;
2560 }
2561 #endif	/* MCAST */
2562 
2563 
2564 /*
2565  * Remove a multicast address from a given socket
2566  * The socket is in the ep_list all we need to do is disable
2567  * multicasting. It is not this function's job to select the socket
2568  */
2569 #ifdef MCAST
2570 static isc_boolean_t
2571 socket_multicast_disable(
2572 	endpt *	iface,
2573 	sockaddr_u *		maddr
2574 	)
2575 {
2576 # ifdef INCLUDE_IPV6_MULTICAST_SUPPORT
2577 	struct ipv6_mreq mreq6;
2578 # endif
2579 	struct ip_mreq mreq;
2580 
2581 	if (find_addr_in_list(maddr) == NULL) {
2582 		DPRINTF(4, ("socket_multicast_disable(%s): not found\n",
2583 			    stoa(maddr)));
2584 		return ISC_TRUE;
2585 	}
2586 
2587 	switch (AF(maddr)) {
2588 
2589 	case AF_INET:
2590 		ZERO(mreq);
2591 		mreq.imr_multiaddr = SOCK_ADDR4(maddr);
2592 		mreq.imr_interface = SOCK_ADDR4(&iface->sin);
2593 		if (setsockopt(iface->fd, IPPROTO_IP,
2594 			       IP_DROP_MEMBERSHIP, (void *)&mreq,
2595 			       sizeof(mreq))) {
2596 
2597 			msyslog(LOG_ERR,
2598 				"setsockopt IP_DROP_MEMBERSHIP failed: %m on socket %d, addr %s for %x / %x (%s)",
2599 				iface->fd, stoa(&iface->sin),
2600 				SRCADR(maddr), SRCADR(&iface->sin),
2601 				stoa(maddr));
2602 			return ISC_FALSE;
2603 		}
2604 		break;
2605 	case AF_INET6:
2606 # ifdef INCLUDE_IPV6_MULTICAST_SUPPORT
2607 		/*
2608 		 * Disable reception of multicast packets
2609 		 * If the address is link-local we can get the
2610 		 * interface index from the scope id.  Don't do this
2611 		 * for other types of multicast addresses. For now let
2612 		 * the kernel figure it out.
2613 		 */
2614 		ZERO(mreq6);
2615 		mreq6.ipv6mr_multiaddr = SOCK_ADDR6(maddr);
2616 		mreq6.ipv6mr_interface = iface->ifindex;
2617 
2618 		if (setsockopt(iface->fd, IPPROTO_IPV6,
2619 			       IPV6_LEAVE_GROUP, (void *)&mreq6,
2620 			       sizeof(mreq6))) {
2621 
2622 			msyslog(LOG_ERR,
2623 				"setsockopt IPV6_LEAVE_GROUP failure: %m on socket %d, addr %s for %d (%s)",
2624 				iface->fd, stoa(&iface->sin),
2625 				iface->ifindex, stoa(maddr));
2626 			return ISC_FALSE;
2627 		}
2628 		break;
2629 # else
2630 		return ISC_FALSE;
2631 # endif	/* INCLUDE_IPV6_MULTICAST_SUPPORT */
2632 	}
2633 
2634 	iface->num_mcast--;
2635 	if (iface->num_mcast <= 0) {
2636 		iface->flags &= ~INT_MCASTOPEN;
2637 	}
2638 	return ISC_TRUE;
2639 }
2640 #endif	/* MCAST */
2641 
2642 
2643 /*
2644  * io_setbclient - open the broadcast client sockets
2645  */
2646 void
2647 io_setbclient(void)
2648 {
2649 #ifdef OPEN_BCAST_SOCKET
2650 	endpt *		ep;
2651 	unsigned int	nif, ni4;
2652 
2653 	nif = ni4 = 0;
2654 	set_reuseaddr(1);
2655 
2656 	for (ep = ep_list; ep != NULL; ep = ep->elink) {
2657 		/* count IPv4 interfaces. Needed later to decide
2658 		 * if we should log an error or not.
2659 		 */
2660 		if (AF_INET == ep->family) {
2661 			++ni4;
2662 		}
2663 
2664 		if (ep->flags & (INT_WILDCARD | INT_LOOPBACK))
2665 			continue;
2666 
2667 		/* use only allowed addresses */
2668 		if (ep->ignore_packets)
2669 			continue;
2670 
2671 		/* Need a broadcast-capable interface */
2672 		if (!(ep->flags & INT_BROADCAST))
2673 			continue;
2674 
2675 		/* Only IPv4 addresses are valid for broadcast */
2676 		REQUIRE(IS_IPV4(&ep->bcast));
2677 
2678 		/* Do we already have the broadcast address open? */
2679 		if (ep->flags & INT_BCASTOPEN) {
2680 			/*
2681 			 * account for already open interfaces to avoid
2682 			 * misleading warning below
2683 			 */
2684 			nif++;
2685 			continue;
2686 		}
2687 
2688 		/*
2689 		 * Try to open the broadcast address
2690 		 */
2691 		ep->family = AF_INET;
2692 		ep->bfd = open_socket(&ep->bcast, 1, 0, ep);
2693 
2694 		/*
2695 		 * If we succeeded then we use it otherwise enable
2696 		 * broadcast on the interface address
2697 		 */
2698 		if (ep->bfd != INVALID_SOCKET) {
2699 			nif++;
2700 			ep->flags |= INT_BCASTOPEN;
2701 			msyslog(LOG_INFO,
2702 				"Listen for broadcasts to %s on interface #%d %s",
2703 				stoa(&ep->bcast), ep->ifnum, ep->name);
2704 		} else switch (errno) {
2705 			/* Silently ignore EADDRINUSE as we probably
2706 			 * opened the socket already for an address in
2707 			 * the same network */
2708 		case EADDRINUSE:
2709 			/* Some systems cannot bind a socket to a broadcast
2710 			 * address, as that is not a valid host address. */
2711 		case EADDRNOTAVAIL:
2712 #		    ifdef SYS_WINNT	/*TODO: use for other systems, too? */
2713 			/* avoid recurrence here -- if we already have a
2714 			 * regular socket, it's quite useless to try this
2715 			 * again.
2716 			 */
2717 			if (ep->fd != INVALID_SOCKET) {
2718 				ep->flags |= INT_BCASTOPEN;
2719 				nif++;
2720 			}
2721 #		    endif
2722 			break;
2723 
2724 		default:
2725 			msyslog(LOG_INFO,
2726 				"failed to listen for broadcasts to %s on interface #%d %s",
2727 				stoa(&ep->bcast), ep->ifnum, ep->name);
2728 			break;
2729 		}
2730 	}
2731 	set_reuseaddr(0);
2732 	if (nif != 0) {
2733 		broadcast_client_enabled = ISC_TRUE;
2734 		DPRINTF(1, ("io_setbclient: listening to %d broadcast addresses\n", nif));
2735 	} else {
2736 		broadcast_client_enabled = ISC_FALSE;
2737 		/* This is expected when having only IPv6 interfaces
2738 		 * and no IPv4 interfaces at all. We suppress the error
2739 		 * log in that case... everything else should work!
2740 		 */
2741 		if (ni4) {
2742 			msyslog(LOG_ERR,
2743 				"Unable to listen for broadcasts, no broadcast interfaces available");
2744 		}
2745 	}
2746 #else
2747 	msyslog(LOG_ERR,
2748 		"io_setbclient: Broadcast Client disabled by build");
2749 #endif	/* OPEN_BCAST_SOCKET */
2750 }
2751 
2752 
2753 /*
2754  * io_unsetbclient - close the broadcast client sockets
2755  */
2756 void
2757 io_unsetbclient(void)
2758 {
2759 	endpt *ep;
2760 
2761 	for (ep = ep_list; ep != NULL; ep = ep->elink) {
2762 		if (INT_WILDCARD & ep->flags)
2763 			continue;
2764 		if (!(INT_BCASTOPEN & ep->flags))
2765 			continue;
2766 
2767 		if (ep->bfd != INVALID_SOCKET) {
2768 			/* destroy broadcast listening socket */
2769 			msyslog(LOG_INFO,
2770 				"stop listening for broadcasts to %s on interface #%d %s",
2771 				stoa(&ep->bcast), ep->ifnum, ep->name);
2772 			close_and_delete_fd_from_list(ep->bfd, ep);
2773 			ep->bfd = INVALID_SOCKET;
2774 		}
2775 		ep->flags &= ~INT_BCASTOPEN;
2776 	}
2777 	broadcast_client_enabled = ISC_FALSE;
2778 }
2779 
2780 
2781 /*
2782  * io_multicast_add() - add multicast group address
2783  */
2784 void
2785 io_multicast_add(
2786 	sockaddr_u *addr
2787 	)
2788 {
2789 #ifdef MCAST
2790 	endpt *	ep;
2791 	endpt *	one_ep;
2792 
2793 	/*
2794 	 * Check to see if this is a multicast address
2795 	 */
2796 	if (!addr_ismulticast(addr))
2797 		return;
2798 
2799 	/* If we already have it we can just return */
2800 	if (NULL != find_flagged_addr_in_list(addr, INT_MCASTOPEN)) {
2801 		return;
2802 	}
2803 
2804 # ifndef MULTICAST_NONEWSOCKET
2805 	ep = new_interface(NULL);
2806 
2807 	/*
2808 	 * Open a new socket for the multicast address
2809 	 */
2810 	ep->sin = *addr;
2811 	SET_PORT(&ep->sin, NTP_PORT);
2812 	ep->family = AF(&ep->sin);
2813 	AF(&ep->mask) = ep->family;
2814 	SET_ONESMASK(&ep->mask);
2815 
2816 	set_reuseaddr(1);
2817 	ep->bfd = INVALID_SOCKET;
2818 	ep->fd = open_socket(&ep->sin, 0, 0, ep);
2819 	if (ep->fd != INVALID_SOCKET) {
2820 		ep->ignore_packets = ISC_FALSE;
2821 		ep->flags |= INT_MCASTIF;
2822 		ep->ifindex = SCOPE(addr);
2823 
2824 		strlcpy(ep->name, "multicast", sizeof(ep->name));
2825 		DPRINT_INTERFACE(2, (ep, "multicast add ", "\n"));
2826 		add_interface(ep);
2827 		log_listen_address(ep);
2828 	} else {
2829 		/* bind failed, re-use wildcard interface */
2830 		delete_interface(ep);
2831 
2832 		if (IS_IPV4(addr))
2833 			ep = wildipv4;
2834 		else if (IS_IPV6(addr))
2835 			ep = wildipv6;
2836 		else
2837 			ep = NULL;
2838 
2839 		if (ep != NULL) {
2840 			/* HACK ! -- stuff in an address */
2841 			/* because we don't bind addr? DH */
2842 			ep->bcast = *addr;
2843 			msyslog(LOG_ERR,
2844 				"multicast address %s using wildcard interface #%d %s",
2845 				stoa(addr), ep->ifnum, ep->name);
2846 		} else {
2847 			msyslog(LOG_ERR,
2848 				"No multicast socket available to use for address %s",
2849 				stoa(addr));
2850 			return;
2851 		}
2852 	}
2853 	{	/* in place of the { following for in #else clause */
2854 		one_ep = ep;
2855 # else	/* MULTICAST_NONEWSOCKET follows */
2856 	/*
2857 	 * For the case where we can't use a separate socket (Windows)
2858 	 * join each applicable endpoint socket to the group address.
2859 	 */
2860 	if (IS_IPV4(addr))
2861 		one_ep = wildipv4;
2862 	else
2863 		one_ep = wildipv6;
2864 	for (ep = ep_list; ep != NULL; ep = ep->elink) {
2865 		if (ep->ignore_packets || AF(&ep->sin) != AF(addr) ||
2866 		    !(INT_MULTICAST & ep->flags) ||
2867 		    (INT_LOOPBACK | INT_WILDCARD) & ep->flags)
2868 			continue;
2869 		one_ep = ep;
2870 # endif	/* MULTICAST_NONEWSOCKET */
2871 		if (socket_multicast_enable(ep, addr))
2872 			msyslog(LOG_INFO,
2873 				"Joined %s socket to multicast group %s",
2874 				stoa(&ep->sin),
2875 				stoa(addr));
2876 	}
2877 
2878 	add_addr_to_list(addr, one_ep);
2879 #else	/* !MCAST  follows*/
2880 	msyslog(LOG_ERR,
2881 		"Can not add multicast address %s: no multicast support",
2882 		stoa(addr));
2883 #endif
2884 	return;
2885 }
2886 
2887 
2888 /*
2889  * io_multicast_del() - delete multicast group address
2890  */
2891 void
2892 io_multicast_del(
2893 	sockaddr_u *	addr
2894 	)
2895 {
2896 #ifdef MCAST
2897 	endpt *iface;
2898 
2899 	/*
2900 	 * Check to see if this is a multicast address
2901 	 */
2902 	if (!addr_ismulticast(addr)) {
2903 		msyslog(LOG_ERR, "invalid multicast address %s",
2904 			stoa(addr));
2905 		return;
2906 	}
2907 
2908 	/*
2909 	 * Disable reception of multicast packets
2910 	 */
2911 	while ((iface = find_flagged_addr_in_list(addr, INT_MCASTOPEN))
2912 	       != NULL)
2913 		socket_multicast_disable(iface, addr);
2914 
2915 	delete_addr_from_list(addr);
2916 
2917 #else /* not MCAST */
2918 	msyslog(LOG_ERR,
2919 		"Can not delete multicast address %s: no multicast support",
2920 		stoa(addr));
2921 #endif /* not MCAST */
2922 }
2923 
2924 
2925 /*
2926  * open_socket - open a socket, returning the file descriptor
2927  */
2928 
2929 static SOCKET
2930 open_socket(
2931 	sockaddr_u *	addr,
2932 	int		bcast,
2933 	int		turn_off_reuse,
2934 	endpt *		interf
2935 	)
2936 {
2937 	SOCKET	fd;
2938 	int	errval;
2939 	/*
2940 	 * int is OK for REUSEADR per
2941 	 * http://www.kohala.com/start/mcast.api.txt
2942 	 */
2943 	int	on = 1;
2944 	int	off = 0;
2945 
2946 	if (IS_IPV6(addr) && !ipv6_works)
2947 		return INVALID_SOCKET;
2948 
2949 	/* create a datagram (UDP) socket */
2950 	fd = socket(AF(addr), SOCK_DGRAM, 0);
2951 	if (INVALID_SOCKET == fd) {
2952 		errval = socket_errno();
2953 		msyslog(LOG_ERR,
2954 			"socket(AF_INET%s, SOCK_DGRAM, 0) failed on address %s: %m",
2955 			IS_IPV6(addr) ? "6" : "", stoa(addr));
2956 
2957 		if (errval == EPROTONOSUPPORT ||
2958 		    errval == EAFNOSUPPORT ||
2959 		    errval == EPFNOSUPPORT)
2960 			return (INVALID_SOCKET);
2961 
2962 		errno = errval;
2963 		msyslog(LOG_ERR,
2964 			"unexpected socket() error %m code %d (not EPROTONOSUPPORT nor EAFNOSUPPORT nor EPFNOSUPPORT) - exiting",
2965 			errno);
2966 		exit(1);
2967 	}
2968 
2969 #ifdef SYS_WINNT
2970 	connection_reset_fix(fd, addr);
2971 #endif
2972 	/*
2973 	 * Fixup the file descriptor for some systems
2974 	 * See bug #530 for details of the issue.
2975 	 */
2976 	fd = move_fd(fd);
2977 
2978 	/*
2979 	 * set SO_REUSEADDR since we will be binding the same port
2980 	 * number on each interface according to turn_off_reuse.
2981 	 * This is undesirable on Windows versions starting with
2982 	 * Windows XP (numeric version 5.1).
2983 	 */
2984 #ifdef SYS_WINNT
2985 	if (isc_win32os_versioncheck(5, 1, 0, 0) < 0)  /* before 5.1 */
2986 #endif
2987 		if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
2988 			       (void *)((turn_off_reuse)
2989 					    ? &off
2990 					    : &on),
2991 			       sizeof(on))) {
2992 
2993 			msyslog(LOG_ERR,
2994 				"setsockopt SO_REUSEADDR %s fails for address %s: %m",
2995 				(turn_off_reuse)
2996 				    ? "off"
2997 				    : "on",
2998 				stoa(addr));
2999 			closesocket(fd);
3000 			return INVALID_SOCKET;
3001 		}
3002 #ifdef SO_EXCLUSIVEADDRUSE
3003 	/*
3004 	 * setting SO_EXCLUSIVEADDRUSE on the wildcard we open
3005 	 * first will cause more specific binds to fail.
3006 	 */
3007 	if (!(interf->flags & INT_WILDCARD))
3008 		set_excladdruse(fd);
3009 #endif
3010 
3011 	/*
3012 	 * IPv4 specific options go here
3013 	 */
3014 	if (IS_IPV4(addr)) {
3015 #if defined(IPPROTO_IP) && defined(IP_TOS)
3016 		if (setsockopt(fd, IPPROTO_IP, IP_TOS, (void *)&qos,
3017 			       sizeof(qos)))
3018 			msyslog(LOG_ERR,
3019 				"setsockopt IP_TOS (%02x) fails on address %s: %m",
3020 				qos, stoa(addr));
3021 #endif /* IPPROTO_IP && IP_TOS */
3022 		if (bcast)
3023 			socket_broadcast_enable(interf, fd, addr);
3024 	}
3025 
3026 	/*
3027 	 * IPv6 specific options go here
3028 	 */
3029 	if (IS_IPV6(addr)) {
3030 #if defined(IPPROTO_IPV6) && defined(IPV6_TCLASS)
3031 		if (setsockopt(fd, IPPROTO_IPV6, IPV6_TCLASS, (void *)&qos,
3032 			       sizeof(qos)))
3033 			msyslog(LOG_ERR,
3034 				"setsockopt IPV6_TCLASS (%02x) fails on address %s: %m",
3035 				qos, stoa(addr));
3036 #endif /* IPPROTO_IPV6 && IPV6_TCLASS */
3037 #ifdef IPV6_V6ONLY
3038 		if (isc_net_probe_ipv6only() == ISC_R_SUCCESS
3039 		    && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY,
3040 		    (void *)&on, sizeof(on)))
3041 			msyslog(LOG_ERR,
3042 				"setsockopt IPV6_V6ONLY on fails on address %s: %m",
3043 				stoa(addr));
3044 #endif
3045 #ifdef IPV6_BINDV6ONLY
3046 		if (setsockopt(fd, IPPROTO_IPV6, IPV6_BINDV6ONLY,
3047 		    (void *)&on, sizeof(on)))
3048 			msyslog(LOG_ERR,
3049 				"setsockopt IPV6_BINDV6ONLY on fails on address %s: %m",
3050 				stoa(addr));
3051 #endif
3052 	}
3053 
3054 #ifdef OS_NEEDS_REUSEADDR_FOR_IFADDRBIND
3055 	/*
3056 	 * some OSes don't allow binding to more specific
3057 	 * addresses if a wildcard address already bound
3058 	 * to the port and SO_REUSEADDR is not set
3059 	 */
3060 	if (!is_wildcard_addr(addr))
3061 		set_wildcard_reuse(AF(addr), 1);
3062 #endif
3063 
3064 	/*
3065 	 * bind the local address.
3066 	 */
3067 	errval = bind(fd, &addr->sa, SOCKLEN(addr));
3068 
3069 #ifdef OS_NEEDS_REUSEADDR_FOR_IFADDRBIND
3070 	if (!is_wildcard_addr(addr))
3071 		set_wildcard_reuse(AF(addr), 0);
3072 #endif
3073 
3074 	if (errval < 0) {
3075 		/*
3076 		 * Don't log this under all conditions
3077 		 */
3078 		if (turn_off_reuse == 0
3079 #ifdef DEBUG
3080 		    || debug > 1
3081 #endif
3082 		    ) {
3083 			msyslog(LOG_ERR,
3084 				"bind(%d) AF_INET%s %s%s flags 0x%x failed: %m",
3085 				fd, IS_IPV6(addr) ? "6" : "",
3086 				sptoa(addr),
3087 				IS_MCAST(addr) ? " (multicast)" : "",
3088 				interf->flags);
3089 		}
3090 
3091 		closesocket(fd);
3092 
3093 		return INVALID_SOCKET;
3094 	}
3095 
3096 #ifdef HAVE_TIMESTAMP
3097 	{
3098 		if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMP,
3099 			       (void *)&on, sizeof(on)))
3100 			msyslog(LOG_DEBUG,
3101 				"setsockopt SO_TIMESTAMP on fails on address %s: %m",
3102 				stoa(addr));
3103 		else
3104 			DPRINTF(4, ("setsockopt SO_TIMESTAMP enabled on fd %d address %s\n",
3105 				    fd, stoa(addr)));
3106 	}
3107 #endif
3108 #ifdef HAVE_TIMESTAMPNS
3109 	{
3110 		if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPNS,
3111 			       (void *)&on, sizeof(on)))
3112 			msyslog(LOG_DEBUG,
3113 				"setsockopt SO_TIMESTAMPNS on fails on address %s: %m",
3114 				stoa(addr));
3115 		else
3116 			DPRINTF(4, ("setsockopt SO_TIMESTAMPNS enabled on fd %d address %s\n",
3117 				    fd, stoa(addr)));
3118 	}
3119 #endif
3120 #ifdef HAVE_BINTIME
3121 	{
3122 		if (setsockopt(fd, SOL_SOCKET, SO_BINTIME,
3123 			       (void *)&on, sizeof(on)))
3124 			msyslog(LOG_DEBUG,
3125 				"setsockopt SO_BINTIME on fails on address %s: %m",
3126 				stoa(addr));
3127 		else
3128 			DPRINTF(4, ("setsockopt SO_BINTIME enabled on fd %d address %s\n",
3129 				    fd, stoa(addr)));
3130 	}
3131 #endif
3132 
3133 	DPRINTF(4, ("bind(%d) addr %s, flags 0x%x\n",
3134 		    fd, sptoa(addr), interf->flags));
3135 
3136 	make_socket_nonblocking(fd);
3137 
3138 #ifdef HAVE_SIGNALED_IO
3139 	init_socket_sig(fd);
3140 #endif /* not HAVE_SIGNALED_IO */
3141 
3142 	add_fd_to_list(fd, FD_TYPE_SOCKET);
3143 
3144 #if !defined(SYS_WINNT) && !defined(VMS)
3145 	DPRINTF(4, ("flags for fd %d: 0x%x\n", fd,
3146 		    fcntl(fd, F_GETFL, 0)));
3147 #endif /* SYS_WINNT || VMS */
3148 
3149 #if defined(HAVE_IO_COMPLETION_PORT)
3150 /*
3151  * Add the socket to the completion port
3152  */
3153 	if (!io_completion_port_add_socket(fd, interf, bcast)) {
3154 		msyslog(LOG_ERR, "unable to set up io completion port - EXITING");
3155 		exit(1);
3156 	}
3157 #endif
3158 	return fd;
3159 }
3160 
3161 
3162 
3163 /* XXX ELIMINATE sendpkt similar in ntpq.c, ntpdc.c, ntp_io.c, ntptrace.c */
3164 /*
3165  * sendpkt - send a packet to the specified destination from the given endpt
3166  *	     except for multicast, which may be sent from several addresses.
3167  */
3168 void
3169 sendpkt(
3170 	sockaddr_u *	dest,
3171 	endpt *		ep,
3172 	int		ttl,
3173 	struct pkt *	pkt,
3174 	int		len
3175 	)
3176 {
3177 	endpt *	src;
3178 	int	ismcast;
3179 	int	cc;
3180 	int	rc;
3181 	u_char	cttl;
3182 	l_fp	fp_zero = { { 0 }, 0 };
3183 	l_fp	org, rec, xmt;
3184 
3185 	ismcast = IS_MCAST(dest);
3186 	if (!ismcast) {
3187 		src = ep;
3188 	} else {
3189 #ifndef MCAST
3190 		return;
3191 #endif
3192 		src = (IS_IPV4(dest))
3193 			? mc4_list
3194 			: mc6_list;
3195 	}
3196 
3197 	if (NULL == src) {
3198 		/*
3199 		 * unbound peer - drop request and wait for better
3200 		 * network conditions
3201 		 */
3202 		DPRINTF(2, ("%ssendpkt(dst=%s, ttl=%d, len=%d): no interface - IGNORED\n",
3203 			    ismcast ? "\tMCAST\t***** " : "",
3204 			    stoa(dest), ttl, len));
3205 		return;
3206 	}
3207 
3208 	do {
3209 		if (INT_LL_OF_GLOB & src->flags) {
3210 			/* avoid duplicate multicasts on same IPv6 net */
3211 			goto loop;
3212 		}
3213 		DPRINTF(2, ("%ssendpkt(%d, dst=%s, src=%s, ttl=%d, len=%d)\n",
3214 			    ismcast ? "\tMCAST\t***** " : "", src->fd,
3215 			    stoa(dest), stoa(&src->sin), ttl, len));
3216 #ifdef MCAST
3217 		if (ismcast && ttl > 0 && ttl != src->last_ttl) {
3218 			/*
3219 			 * set the multicast ttl for outgoing packets
3220 			 */
3221 			switch (AF(&src->sin)) {
3222 
3223 			case AF_INET :
3224 				cttl = (u_char)ttl;
3225 				rc = setsockopt(src->fd, IPPROTO_IP,
3226 						IP_MULTICAST_TTL,
3227 						(void *)&cttl,
3228 						sizeof(cttl));
3229 				break;
3230 
3231 # ifdef INCLUDE_IPV6_SUPPORT
3232 			case AF_INET6 :
3233 				rc = setsockopt(src->fd, IPPROTO_IPV6,
3234 						 IPV6_MULTICAST_HOPS,
3235 						 (void *)&ttl,
3236 						 sizeof(ttl));
3237 				break;
3238 # endif	/* INCLUDE_IPV6_SUPPORT */
3239 
3240 			default:
3241 				rc = 0;
3242 			}
3243 
3244 			if (!rc)
3245 				src->last_ttl = ttl;
3246 			else
3247 				msyslog(LOG_ERR,
3248 					"setsockopt IP_MULTICAST_TTL/IPV6_MULTICAST_HOPS fails on address %s: %m",
3249 					stoa(&src->sin));
3250 		}
3251 #endif	/* MCAST */
3252 
3253 #ifdef SIM
3254 		cc = simulate_server(dest, src, pkt);
3255 #elif defined(HAVE_IO_COMPLETION_PORT)
3256 		cc = io_completion_port_sendto(src, src->fd, pkt,
3257 			(size_t)len, dest);
3258 #else
3259 		cc = sendto(src->fd, (char *)pkt, (u_int)len, 0,
3260 			    &dest->sa, SOCKLEN(dest));
3261 #endif
3262 		if (cc == -1) {
3263 			src->notsent++;
3264 			packets_notsent++;
3265 		} else	{
3266 			src->sent++;
3267 			packets_sent++;
3268 		}
3269 	    loop:
3270 		if (ismcast)
3271 			src = src->mclink;
3272 	} while (ismcast && src != NULL);
3273 
3274 	/* HMS: pkt->rootdisp is usually random here */
3275 	NTOHL_FP(&pkt->org, &org);
3276 	NTOHL_FP(&pkt->rec, &rec);
3277 	NTOHL_FP(&pkt->xmt, &xmt);
3278 	record_raw_stats(src ? &src->sin : NULL, dest,
3279 			&org, &rec, &xmt, &fp_zero,
3280 			PKT_LEAP(pkt->li_vn_mode),
3281 			PKT_VERSION(pkt->li_vn_mode),
3282 			PKT_MODE(pkt->li_vn_mode),
3283 			pkt->stratum,
3284 			pkt->ppoll, pkt->precision,
3285 			FPTOD(NTOHS_FP(pkt->rootdelay)),
3286 			FPTOD(NTOHS_FP(pkt->rootdisp)),  pkt->refid,
3287 			len - MIN_V4_PKT_LEN, (u_char *)&pkt->exten);
3288 }
3289 
3290 
3291 #if !defined(HAVE_IO_COMPLETION_PORT)
3292 #if !defined(HAVE_SIGNALED_IO)
3293 /*
3294  * fdbits - generate ascii representation of fd_set (FAU debug support)
3295  * HFDF format - highest fd first.
3296  */
3297 static char *
3298 fdbits(
3299 	int		count,
3300 	const fd_set*	set
3301 	)
3302 {
3303 	static char buffer[256];
3304 	char * buf = buffer;
3305 
3306 	count = min(count,  sizeof(buffer) - 1);
3307 
3308 	while (count >= 0) {
3309 		*buf++ = FD_ISSET(count, set) ? '#' : '-';
3310 		count--;
3311 	}
3312 	*buf = '\0';
3313 
3314 	return buffer;
3315 }
3316 #endif
3317 
3318 #ifdef REFCLOCK
3319 /*
3320  * Routine to read the refclock packets for a specific interface
3321  * Return the number of bytes read. That way we know if we should
3322  * read it again or go on to the next one if no bytes returned
3323  */
3324 static inline int
3325 read_refclock_packet(
3326 	SOCKET			fd,
3327 	struct refclockio *	rp,
3328 	l_fp			ts
3329 	)
3330 {
3331 	u_int			read_count;
3332 	int			buflen;
3333 	int			saved_errno;
3334 	int			consumed;
3335 	struct recvbuf *	rb;
3336 
3337 	rb = get_free_recv_buffer(TRUE);
3338 
3339 	if (NULL == rb) {
3340 		/*
3341 		 * No buffer space available - just drop the 'packet'.
3342 		 * Since this is a non-blocking character stream we read
3343 		 * all data that we can.
3344 		 *
3345 		 * ...hmmmm... what about "tcflush(fd,TCIFLUSH)" here?!?
3346 		 */
3347 		char buf[128];
3348 		do
3349 			buflen = read(fd, buf, sizeof(buf));
3350 		while (buflen > 0);
3351 		packets_dropped++;
3352 		return (buflen);
3353 	}
3354 
3355 	/* TALOS-CAN-0064: avoid signed/unsigned clashes that can lead
3356 	 * to buffer overrun and memory corruption
3357 	 */
3358 	if (rp->datalen <= 0 || (size_t)rp->datalen > sizeof(rb->recv_space))
3359 		read_count = sizeof(rb->recv_space);
3360 	else
3361 		read_count = (u_int)rp->datalen;
3362 	do {
3363 		buflen = read(fd, (char *)&rb->recv_space, read_count);
3364 	} while (buflen < 0 && EINTR == errno);
3365 
3366 	if (buflen <= 0) {
3367 		saved_errno = errno;
3368 		freerecvbuf(rb);
3369 		errno = saved_errno;
3370 		return buflen;
3371 	}
3372 
3373 	/*
3374 	 * Got one. Mark how and when it got here,
3375 	 * put it on the full list and do bookkeeping.
3376 	 */
3377 	rb->recv_length = buflen;
3378 	rb->recv_peer = rp->srcclock;
3379 	rb->dstadr = NULL;
3380 	rb->fd = fd;
3381 	rb->recv_time = ts;
3382 	rb->receiver = rp->clock_recv;
3383 
3384 	consumed = indicate_refclock_packet(rp, rb);
3385 	if (!consumed) {
3386 		rp->recvcount++;
3387 		packets_received++;
3388 	}
3389 
3390 	return buflen;
3391 }
3392 #endif	/* REFCLOCK */
3393 
3394 
3395 #ifdef HAVE_PACKET_TIMESTAMP
3396 /*
3397  * extract timestamps from control message buffer
3398  */
3399 static l_fp
3400 fetch_timestamp(
3401 	struct recvbuf *	rb,
3402 	struct msghdr *		msghdr,
3403 	l_fp			ts
3404 	)
3405 {
3406 	struct cmsghdr *	cmsghdr;
3407 	unsigned long		ticks;
3408 	double			fuzz;
3409 	l_fp			lfpfuzz;
3410 	l_fp			nts;
3411 #ifdef DEBUG_TIMING
3412 	l_fp			dts;
3413 #endif
3414 
3415 	cmsghdr = CMSG_FIRSTHDR(msghdr);
3416 	while (cmsghdr != NULL) {
3417 		switch (cmsghdr->cmsg_type)
3418 		{
3419 #ifdef HAVE_BINTIME
3420 		case SCM_BINTIME:
3421 #endif  /* HAVE_BINTIME */
3422 #ifdef HAVE_TIMESTAMPNS
3423 		case SCM_TIMESTAMPNS:
3424 #endif	/* HAVE_TIMESTAMPNS */
3425 #ifdef HAVE_TIMESTAMP
3426 		case SCM_TIMESTAMP:
3427 #endif	/* HAVE_TIMESTAMP */
3428 #if defined(HAVE_BINTIME) || defined (HAVE_TIMESTAMPNS) || defined(HAVE_TIMESTAMP)
3429 			switch (cmsghdr->cmsg_type)
3430 			{
3431 #ifdef HAVE_BINTIME
3432 			case SCM_BINTIME:
3433 				{
3434 					struct bintime	pbt;
3435 					memcpy(&pbt, CMSG_DATA(cmsghdr), sizeof(pbt));
3436 					/*
3437 					 * bintime documentation is at http://phk.freebsd.dk/pubs/timecounter.pdf
3438 					 */
3439 					nts.l_i = pbt.sec + JAN_1970;
3440 					nts.l_uf = (u_int32)(pbt.frac >> 32);
3441 					if (sys_tick > measured_tick &&
3442 					    sys_tick > 1e-9) {
3443 						ticks = (unsigned long)(nts.l_uf / (unsigned long)(sys_tick * FRAC));
3444 						nts.l_uf = (unsigned long)(ticks * (unsigned long)(sys_tick * FRAC));
3445 					}
3446 					DPRINTF(4, ("fetch_timestamp: system bintime network time stamp: %ld.%09lu\n",
3447 						    (long)pbt.sec, (u_long)((nts.l_uf / FRAC) * 1e9)));
3448 				}
3449 				break;
3450 #endif  /* HAVE_BINTIME */
3451 #ifdef HAVE_TIMESTAMPNS
3452 			case SCM_TIMESTAMPNS:
3453 				{
3454 					struct timespec	pts;
3455 					memcpy(&pts, CMSG_DATA(cmsghdr), sizeof(pts));
3456 					if (sys_tick > measured_tick &&
3457 					    sys_tick > 1e-9) {
3458 						ticks = (unsigned long)((pts.tv_nsec * 1e-9) /
3459 									sys_tick);
3460 						pts.tv_nsec = (long)(ticks * 1e9 *
3461 								     sys_tick);
3462 					}
3463 					DPRINTF(4, ("fetch_timestamp: system nsec network time stamp: %ld.%09ld\n",
3464 						    pts.tv_sec, pts.tv_nsec));
3465 					nts = tspec_stamp_to_lfp(pts);
3466 				}
3467 				break;
3468 #endif	/* HAVE_TIMESTAMPNS */
3469 #ifdef HAVE_TIMESTAMP
3470 			case SCM_TIMESTAMP:
3471 				{
3472 					struct timeval	ptv;
3473 					memcpy(&ptv, CMSG_DATA(cmsghdr), sizeof(ptv));
3474 					if (sys_tick > measured_tick &&
3475 					    sys_tick > 1e-6) {
3476 						ticks = (unsigned long)((ptv.tv_usec * 1e-6) /
3477 									sys_tick);
3478 						ptv.tv_usec = (long)(ticks * 1e6 *
3479 								    sys_tick);
3480 					}
3481 					DPRINTF(4, ("fetch_timestamp: system usec network time stamp: %jd.%06ld\n",
3482 						    (intmax_t)ptv.tv_sec, (long)ptv.tv_usec));
3483 					nts = tval_stamp_to_lfp(ptv);
3484 				}
3485 				break;
3486 #endif  /* HAVE_TIMESTAMP */
3487 			}
3488 			fuzz = ntp_uurandom() * sys_fuzz;
3489 			DTOLFP(fuzz, &lfpfuzz);
3490 			L_ADD(&nts, &lfpfuzz);
3491 #ifdef DEBUG_TIMING
3492 			dts = ts;
3493 			L_SUB(&dts, &nts);
3494 			collect_timing(rb, "input processing delay", 1,
3495 				       &dts);
3496 			DPRINTF(4, ("fetch_timestamp: timestamp delta: %s (incl. fuzz)\n",
3497 				    lfptoa(&dts, 9)));
3498 #endif	/* DEBUG_TIMING */
3499 			ts = nts;  /* network time stamp */
3500 			break;
3501 #endif	/* HAVE_BINTIME || HAVE_TIMESTAMPNS || HAVE_TIMESTAMP */
3502 
3503 		default:
3504 			DPRINTF(4, ("fetch_timestamp: skipping control message 0x%x\n",
3505 				    cmsghdr->cmsg_type));
3506 		}
3507 		cmsghdr = CMSG_NXTHDR(msghdr, cmsghdr);
3508 	}
3509 	return ts;
3510 }
3511 #endif	/* HAVE_PACKET_TIMESTAMP */
3512 
3513 
3514 /*
3515  * Routine to read the network NTP packets for a specific interface
3516  * Return the number of bytes read. That way we know if we should
3517  * read it again or go on to the next one if no bytes returned
3518  */
3519 static inline int
3520 read_network_packet(
3521 	SOCKET		fd,
3522 	endpt *		itf,
3523 	l_fp		ts
3524 	)
3525 {
3526 	GETSOCKNAME_SOCKLEN_TYPE fromlen;
3527 	int buflen;
3528 	register struct recvbuf *rb;
3529 #ifdef HAVE_PACKET_TIMESTAMP
3530 	struct msghdr msghdr;
3531 	struct iovec iovec;
3532 	char control[CMSG_BUFSIZE];
3533 #endif
3534 
3535 	/*
3536 	 * Get a buffer and read the frame.  If we haven't got a buffer,
3537 	 * or this is received on a disallowed socket, just dump the
3538 	 * packet.
3539 	 */
3540 
3541 	rb = itf->ignore_packets ? NULL : get_free_recv_buffer(FALSE);
3542 	if (NULL == rb) {
3543 		/* A partial read on a UDP socket truncates the data and
3544 		 * removes the message from the queue. So there's no
3545 		 * need to have a full buffer here on the stack.
3546 		 */
3547 		char buf[16];
3548 		sockaddr_u from;
3549 
3550 		if (rb != NULL)
3551 			freerecvbuf(rb);
3552 
3553 		fromlen = sizeof(from);
3554 		buflen = recvfrom(fd, buf, sizeof(buf), 0,
3555 				  &from.sa, &fromlen);
3556 		DPRINTF(4, ("%s on (%lu) fd=%d from %s\n",
3557 			(itf->ignore_packets)
3558 			    ? "ignore"
3559 			    : "drop",
3560 			free_recvbuffs(), fd, stoa(&from)));
3561 		if (itf->ignore_packets)
3562 			packets_ignored++;
3563 		else
3564 			packets_dropped++;
3565 		return (buflen);
3566 	}
3567 
3568 	fromlen = sizeof(rb->recv_srcadr);
3569 
3570 #ifndef HAVE_PACKET_TIMESTAMP
3571 	rb->recv_length = recvfrom(fd, (char *)&rb->recv_space,
3572 				   sizeof(rb->recv_space), 0,
3573 				   &rb->recv_srcadr.sa, &fromlen);
3574 #else
3575 	iovec.iov_base        = &rb->recv_space;
3576 	iovec.iov_len         = sizeof(rb->recv_space);
3577 	msghdr.msg_name       = &rb->recv_srcadr;
3578 	msghdr.msg_namelen    = fromlen;
3579 	msghdr.msg_iov        = &iovec;
3580 	msghdr.msg_iovlen     = 1;
3581 	msghdr.msg_control    = (void *)&control;
3582 	msghdr.msg_controllen = sizeof(control);
3583 	msghdr.msg_flags      = 0;
3584 	rb->recv_length       = recvmsg(fd, &msghdr, 0);
3585 #endif
3586 
3587 	buflen = rb->recv_length;
3588 
3589 	if (buflen == 0 || (buflen == -1 &&
3590 	    (EWOULDBLOCK == errno
3591 #ifdef EAGAIN
3592 	     || EAGAIN == errno
3593 #endif
3594 	     ))) {
3595 		freerecvbuf(rb);
3596 		return (buflen);
3597 	} else if (buflen < 0) {
3598 		msyslog(LOG_ERR, "recvfrom(%s) fd=%d: %m",
3599 			stoa(&rb->recv_srcadr), fd);
3600 		DPRINTF(5, ("read_network_packet: fd=%d dropped (bad recvfrom)\n",
3601 			    fd));
3602 		freerecvbuf(rb);
3603 		return (buflen);
3604 	}
3605 
3606 	DPRINTF(3, ("read_network_packet: fd=%d length %d from %s\n",
3607 		    fd, buflen, stoa(&rb->recv_srcadr)));
3608 
3609 #ifdef ENABLE_BUG3020_FIX
3610 	if (ISREFCLOCKADR(&rb->recv_srcadr)) {
3611 		msyslog(LOG_ERR, "recvfrom(%s) fd=%d: refclock srcadr on a network interface!",
3612 			stoa(&rb->recv_srcadr), fd);
3613 		DPRINTF(1, ("read_network_packet: fd=%d dropped (refclock srcadr))\n",
3614 			    fd));
3615 		packets_dropped++;
3616 		freerecvbuf(rb);
3617 		return (buflen);
3618 	}
3619 #endif
3620 
3621 	/*
3622 	** Bug 2672: Some OSes (MacOSX and Linux) don't block spoofed ::1
3623 	*/
3624 
3625 	if (   IS_IPV6(&rb->recv_srcadr)
3626 	    && IN6_IS_ADDR_LOOPBACK(PSOCK_ADDR6(&rb->recv_srcadr))
3627 	    && !(INT_LOOPBACK & itf->flags)) {
3628 
3629 		packets_dropped++;
3630 		DPRINTF(2, ("DROPPING pkt with spoofed ::1 source on %s\n", latoa(itf)));
3631 		freerecvbuf(rb);
3632 		return -1;
3633 	}
3634 
3635 	/*
3636 	 * Got one.  Mark how and when it got here,
3637 	 * put it on the full list and do bookkeeping.
3638 	 */
3639 	rb->dstadr = itf;
3640 	rb->fd = fd;
3641 #ifdef HAVE_PACKET_TIMESTAMP
3642 	/* pick up a network time stamp if possible */
3643 	ts = fetch_timestamp(rb, &msghdr, ts);
3644 #endif
3645 	rb->recv_time = ts;
3646 	rb->receiver = receive;
3647 
3648 	add_full_recv_buffer(rb);
3649 
3650 	itf->received++;
3651 	packets_received++;
3652 	return (buflen);
3653 }
3654 
3655 /*
3656  * attempt to handle io (select()/signaled IO)
3657  */
3658 void
3659 io_handler(void)
3660 {
3661 #  ifndef HAVE_SIGNALED_IO
3662 	fd_set rdfdes;
3663 	int nfound;
3664 
3665 	/*
3666 	 * Use select() on all on all input fd's for unlimited
3667 	 * time.  select() will terminate on SIGALARM or on the
3668 	 * reception of input.	Using select() means we can't do
3669 	 * robust signal handling and we get a potential race
3670 	 * between checking for alarms and doing the select().
3671 	 * Mostly harmless, I think.
3672 	 */
3673 	/*
3674 	 * On VMS, I suspect that select() can't be interrupted
3675 	 * by a "signal" either, so I take the easy way out and
3676 	 * have select() time out after one second.
3677 	 * System clock updates really aren't time-critical,
3678 	 * and - lacking a hardware reference clock - I have
3679 	 * yet to learn about anything else that is.
3680 	 */
3681 	++handler_calls;
3682 	rdfdes = activefds;
3683 #   if !defined(VMS) && !defined(SYS_VXWORKS)
3684 	nfound = select(maxactivefd + 1, &rdfdes, NULL,
3685 			NULL, NULL);
3686 #   else	/* VMS, VxWorks */
3687 	/* make select() wake up after one second */
3688 	{
3689 		struct timeval t1;
3690 		t1.tv_sec  = 1;
3691 		t1.tv_usec = 0;
3692 		nfound = select(maxactivefd + 1,
3693 				&rdfdes, NULL, NULL,
3694 				&t1);
3695 	}
3696 #   endif	/* VMS, VxWorks */
3697 	if (nfound < 0 && sanitize_fdset(errno)) {
3698 		struct timeval t1;
3699 		t1.tv_sec  = 0;
3700 		t1.tv_usec = 0;
3701 		rdfdes = activefds;
3702 		nfound = select(maxactivefd + 1,
3703 				&rdfdes, NULL, NULL,
3704 				&t1);
3705 	}
3706 
3707 	if (nfound > 0) {
3708 		l_fp ts;
3709 
3710 		get_systime(&ts);
3711 
3712 		input_handler_scan(&ts, &rdfdes);
3713 	} else if (nfound == -1 && errno != EINTR) {
3714 		msyslog(LOG_ERR, "select() error: %m");
3715 	}
3716 #   ifdef DEBUG
3717 	else if (debug > 4) {
3718 		msyslog(LOG_DEBUG, "select(): nfound=%d, error: %m", nfound);
3719 	} else {
3720 		DPRINTF(3, ("select() returned %d: %m\n", nfound));
3721 	}
3722 #   endif /* DEBUG */
3723 #  else /* HAVE_SIGNALED_IO */
3724 	wait_for_signal();
3725 #  endif /* HAVE_SIGNALED_IO */
3726 }
3727 
3728 #ifdef HAVE_SIGNALED_IO
3729 /*
3730  * input_handler - receive packets asynchronously
3731  *
3732  * ALWAYS IN SIGNAL HANDLER CONTEXT -- only async-safe functions allowed!
3733  */
3734 static RETSIGTYPE
3735 input_handler(
3736 	l_fp *	cts
3737 	)
3738 {
3739 	int		n;
3740 	struct timeval	tvzero;
3741 	fd_set		fds;
3742 
3743 	++handler_calls;
3744 
3745 	/*
3746 	 * Do a poll to see who has data
3747 	 */
3748 
3749 	fds = activefds;
3750 	tvzero.tv_sec = tvzero.tv_usec = 0;
3751 
3752 	n = select(maxactivefd + 1, &fds, NULL, NULL, &tvzero);
3753 	if (n < 0 && sanitize_fdset(errno)) {
3754 		fds = activefds;
3755 		tvzero.tv_sec = tvzero.tv_usec = 0;
3756 		n = select(maxactivefd + 1, &fds, NULL, NULL, &tvzero);
3757 	}
3758 	if (n > 0)
3759 		input_handler_scan(cts, &fds);
3760 }
3761 #endif /* HAVE_SIGNALED_IO */
3762 
3763 
3764 /*
3765  * Try to sanitize the global FD set
3766  *
3767  * SIGNAL HANDLER CONTEXT if HAVE_SIGNALED_IO, ordinary userspace otherwise
3768  */
3769 static int/*BOOL*/
3770 sanitize_fdset(
3771 	int	errc
3772 	)
3773 {
3774 	int j, b, maxscan;
3775 
3776 #  ifndef HAVE_SIGNALED_IO
3777 	/*
3778 	 * extended FAU debugging output
3779 	 */
3780 	if (errc != EINTR) {
3781 		msyslog(LOG_ERR,
3782 			"select(%d, %s, 0L, 0L, &0.0) error: %m",
3783 			maxactivefd + 1,
3784 			fdbits(maxactivefd, &activefds));
3785 	}
3786 #   endif
3787 
3788 	if (errc != EBADF)
3789 		return FALSE;
3790 
3791 	/* if we have oviously bad FDs, try to sanitize the FD set. */
3792 	for (j = 0, maxscan = 0; j <= maxactivefd; j++) {
3793 		if (FD_ISSET(j, &activefds)) {
3794 			if (-1 != read(j, &b, 0)) {
3795 				maxscan = j;
3796 				continue;
3797 			}
3798 #		    ifndef HAVE_SIGNALED_IO
3799 			msyslog(LOG_ERR,
3800 				"Removing bad file descriptor %d from select set",
3801 				j);
3802 #		    endif
3803 			FD_CLR(j, &activefds);
3804 		}
3805 	}
3806 	if (maxactivefd != maxscan)
3807 		maxactivefd = maxscan;
3808 	return TRUE;
3809 }
3810 
3811 /*
3812  * scan the known FDs (clocks, servers, ...) for presence in a 'fd_set'.
3813  *
3814  * SIGNAL HANDLER CONTEXT if HAVE_SIGNALED_IO, ordinary userspace otherwise
3815  */
3816 static void
3817 input_handler_scan(
3818 	const l_fp *	cts,
3819 	const fd_set *	pfds
3820 	)
3821 {
3822 	int		buflen;
3823 	u_int		idx;
3824 	int		doing;
3825 	SOCKET		fd;
3826 	blocking_child *c;
3827 	l_fp		ts;	/* Timestamp at BOselect() gob */
3828 
3829 #if defined(DEBUG_TIMING)
3830 	l_fp		ts_e;	/* Timestamp at EOselect() gob */
3831 #endif
3832 	endpt *		ep;
3833 #ifdef REFCLOCK
3834 	struct refclockio *rp;
3835 	int		saved_errno;
3836 	const char *	clk;
3837 #endif
3838 #ifdef HAS_ROUTING_SOCKET
3839 	struct asyncio_reader *	asyncio_reader;
3840 	struct asyncio_reader *	next_asyncio_reader;
3841 #endif
3842 
3843 	++handler_pkts;
3844 	ts = *cts;
3845 
3846 #ifdef REFCLOCK
3847 	/*
3848 	 * Check out the reference clocks first, if any
3849 	 */
3850 
3851 	for (rp = refio; rp != NULL; rp = rp->next) {
3852 		fd = rp->fd;
3853 
3854 		if (!FD_ISSET(fd, pfds))
3855 			continue;
3856 		buflen = read_refclock_packet(fd, rp, ts);
3857 		/*
3858 		 * The first read must succeed after select() indicates
3859 		 * readability, or we've reached a permanent EOF.
3860 		 * http://bugs.ntp.org/1732 reported ntpd munching CPU
3861 		 * after a USB GPS was unplugged because select was
3862 		 * indicating EOF but ntpd didn't remove the descriptor
3863 		 * from the activefds set.
3864 		 */
3865 		if (buflen < 0 && EAGAIN != errno) {
3866 			saved_errno = errno;
3867 			clk = refnumtoa(&rp->srcclock->srcadr);
3868 			errno = saved_errno;
3869 			msyslog(LOG_ERR, "%s read: %m", clk);
3870 			maintain_activefds(fd, TRUE);
3871 		} else if (0 == buflen) {
3872 			clk = refnumtoa(&rp->srcclock->srcadr);
3873 			msyslog(LOG_ERR, "%s read EOF", clk);
3874 			maintain_activefds(fd, TRUE);
3875 		} else {
3876 			/* drain any remaining refclock input */
3877 			do {
3878 				buflen = read_refclock_packet(fd, rp, ts);
3879 			} while (buflen > 0);
3880 		}
3881 	}
3882 #endif /* REFCLOCK */
3883 
3884 	/*
3885 	 * Loop through the interfaces looking for data to read.
3886 	 */
3887 	for (ep = ep_list; ep != NULL; ep = ep->elink) {
3888 		for (doing = 0; doing < 2; doing++) {
3889 			if (!doing) {
3890 				fd = ep->fd;
3891 			} else {
3892 				if (!(ep->flags & INT_BCASTOPEN))
3893 					break;
3894 				fd = ep->bfd;
3895 			}
3896 			if (fd < 0)
3897 				continue;
3898 			if (FD_ISSET(fd, pfds))
3899 				do {
3900 					buflen = read_network_packet(
3901 							fd, ep, ts);
3902 				} while (buflen > 0);
3903 			/* Check more interfaces */
3904 		}
3905 	}
3906 
3907 #ifdef HAS_ROUTING_SOCKET
3908 	/*
3909 	 * scan list of asyncio readers - currently only used for routing sockets
3910 	 */
3911 	asyncio_reader = asyncio_reader_list;
3912 
3913 	while (asyncio_reader != NULL) {
3914 		/* callback may unlink and free asyncio_reader */
3915 		next_asyncio_reader = asyncio_reader->link;
3916 		if (FD_ISSET(asyncio_reader->fd, pfds))
3917 			(*asyncio_reader->receiver)(asyncio_reader);
3918 		asyncio_reader = next_asyncio_reader;
3919 	}
3920 #endif /* HAS_ROUTING_SOCKET */
3921 
3922 	/*
3923 	 * Check for a response from a blocking child
3924 	 */
3925 	for (idx = 0; idx < blocking_children_alloc; idx++) {
3926 		c = blocking_children[idx];
3927 		if (NULL == c || -1 == c->resp_read_pipe)
3928 			continue;
3929 		if (FD_ISSET(c->resp_read_pipe, pfds)) {
3930 			++c->resp_ready_seen;
3931 			++blocking_child_ready_seen;
3932 		}
3933 	}
3934 
3935 	/* We've done our work */
3936 #if defined(DEBUG_TIMING)
3937 	get_systime(&ts_e);
3938 	/*
3939 	 * (ts_e - ts) is the amount of time we spent
3940 	 * processing this gob of file descriptors.  Log
3941 	 * it.
3942 	 */
3943 	L_SUB(&ts_e, &ts);
3944 	collect_timing(NULL, "input handler", 1, &ts_e);
3945 	if (debug > 3)
3946 		msyslog(LOG_DEBUG,
3947 			"input_handler: Processed a gob of fd's in %s msec",
3948 			lfptoms(&ts_e, 6));
3949 #endif /* DEBUG_TIMING */
3950 }
3951 #endif /* !HAVE_IO_COMPLETION_PORT */
3952 
3953 /*
3954  * find an interface suitable for the src address
3955  */
3956 endpt *
3957 select_peerinterface(
3958 	struct peer *	peer,
3959 	sockaddr_u *	srcadr,
3960 	endpt *		dstadr
3961 	)
3962 {
3963 	endpt *ep;
3964 #ifndef SIM
3965 	endpt *wild;
3966 
3967 	wild = ANY_INTERFACE_CHOOSE(srcadr);
3968 
3969 	/*
3970 	 * Initialize the peer structure and dance the interface jig.
3971 	 * Reference clocks step the loopback waltz, the others
3972 	 * squaredance around the interface list looking for a buddy. If
3973 	 * the dance peters out, there is always the wildcard interface.
3974 	 * This might happen in some systems and would preclude proper
3975 	 * operation with public key cryptography.
3976 	 */
3977 	if (ISREFCLOCKADR(srcadr)) {
3978 		ep = loopback_interface;
3979 	} else if (peer->cast_flags &
3980 		   (MDF_BCLNT | MDF_ACAST | MDF_MCAST | MDF_BCAST)) {
3981 		ep = findbcastinter(srcadr);
3982 		if (ep != NULL)
3983 			DPRINTF(4, ("Found *-cast interface %s for address %s\n",
3984 				stoa(&ep->sin), stoa(srcadr)));
3985 		else
3986 			DPRINTF(4, ("No *-cast local address found for address %s\n",
3987 				stoa(srcadr)));
3988 	} else {
3989 		ep = dstadr;
3990 		if (NULL == ep) {
3991 			ep = wild;
3992 		}
3993 	}
3994 	/*
3995 	 * If it is a multicast address, findbcastinter() may not find
3996 	 * it.  For unicast, we get to find the interface when dstadr is
3997 	 * given to us as the wildcard (ANY_INTERFACE_CHOOSE).  Either
3998 	 * way, try a little harder.
3999 	 */
4000 	if (wild == ep) {
4001 		ep = findinterface(srcadr);
4002 	}
4003 	/*
4004 	 * we do not bind to the wildcard interfaces for output
4005 	 * as our (network) source address would be undefined and
4006 	 * crypto will not work without knowing the own transmit address
4007 	 */
4008 	if (ep != NULL && (INT_WILDCARD & ep->flags)) {
4009 		if (!accept_wildcard_if_for_winnt) {
4010 			ep = NULL;
4011 		}
4012 	}
4013 #else	/* SIM follows */
4014 	ep = loopback_interface;
4015 #endif
4016 
4017 	return ep;
4018 }
4019 
4020 
4021 /*
4022  * findinterface - find local interface corresponding to address
4023  */
4024 endpt *
4025 findinterface(
4026 	sockaddr_u *addr
4027 	)
4028 {
4029 	endpt *iface;
4030 
4031 	iface = findlocalinterface(addr, INT_WILDCARD, 0);
4032 
4033 	if (NULL == iface) {
4034 		DPRINTF(4, ("Found no interface for address %s - returning wildcard\n",
4035 			    stoa(addr)));
4036 
4037 		iface = ANY_INTERFACE_CHOOSE(addr);
4038 	} else
4039 		DPRINTF(4, ("Found interface #%d %s for address %s\n",
4040 			    iface->ifnum, iface->name, stoa(addr)));
4041 
4042 	return iface;
4043 }
4044 
4045 /*
4046  * findlocalinterface - find local interface corresponding to addr,
4047  * which does not have any of flags set.  If bcast is nonzero, addr is
4048  * a broadcast address.
4049  *
4050  * This code attempts to find the local sending address for an outgoing
4051  * address by connecting a new socket to destinationaddress:NTP_PORT
4052  * and reading the sockname of the resulting connect.
4053  * the complicated sequence simulates the routing table lookup
4054  * for to first hop without duplicating any of the routing logic into
4055  * ntpd. preferably we would have used an API call - but its not there -
4056  * so this is the best we can do here short of duplicating to entire routing
4057  * logic in ntpd which would be a silly and really unportable thing to do.
4058  *
4059  */
4060 static endpt *
4061 findlocalinterface(
4062 	sockaddr_u *	addr,
4063 	int		flags,
4064 	int		bcast
4065 	)
4066 {
4067 	GETSOCKNAME_SOCKLEN_TYPE	sockaddrlen;
4068 	endpt *				iface;
4069 	sockaddr_u			saddr;
4070 	SOCKET				s;
4071 	int				rtn;
4072 	int				on;
4073 
4074 	DPRINTF(4, ("Finding interface for addr %s in list of addresses\n",
4075 		    stoa(addr)));
4076 
4077 	/* [Bug 3437] The prototype POOL peer can be AF_UNSPEC.
4078 	 * This is bound to fail, but on the way to nowhere it
4079 	 * triggers a security incident on SELinux.
4080 	 *
4081 	 * Checking the condition and failing early is probably good
4082 	 * advice, and even saves us some syscalls in that case.
4083 	 * Thanks to Miroslav Lichvar for finding this.
4084 	 */
4085 	if (AF_UNSPEC == AF(addr)) {
4086 		return NULL;
4087 	}
4088 	s = socket(AF(addr), SOCK_DGRAM, 0);
4089 	if (INVALID_SOCKET == s) {
4090 		return NULL;
4091 	}
4092 	/*
4093 	 * If we are looking for broadcast interface we need to set this
4094 	 * socket to allow broadcast
4095 	 */
4096 	if (bcast) {
4097 		on = 1;
4098 		if (SOCKET_ERROR == setsockopt(s, SOL_SOCKET,
4099 						SO_BROADCAST,
4100 						(void *)&on,
4101 						sizeof(on))) {
4102 			closesocket(s);
4103 			return NULL;
4104 		}
4105 	}
4106 
4107 	rtn = connect(s, &addr->sa, SOCKLEN(addr));
4108 	if (SOCKET_ERROR == rtn) {
4109 		closesocket(s);
4110 		return NULL;
4111 	}
4112 
4113 	sockaddrlen = sizeof(saddr);
4114 	rtn = getsockname(s, &saddr.sa, &sockaddrlen);
4115 	closesocket(s);
4116 	if (SOCKET_ERROR == rtn)
4117 		return NULL;
4118 
4119 	DPRINTF(4, ("findlocalinterface: kernel maps %s to %s\n",
4120 		    stoa(addr), stoa(&saddr)));
4121 
4122 	iface = getinterface(&saddr, flags);
4123 
4124 	/*
4125 	 * if we didn't find an exact match on saddr, find the closest
4126 	 * available local address.  This handles the case of the
4127 	 * address suggested by the kernel being excluded by nic rules
4128 	 * or the user's -I and -L options to ntpd.
4129 	 * See http://bugs.ntp.org/1184 and http://bugs.ntp.org/1683
4130 	 * for more background.
4131 	 */
4132 	if (NULL == iface || iface->ignore_packets) {
4133 		iface = findclosestinterface(&saddr,
4134 					     flags | INT_LOOPBACK);
4135 	}
4136 	/*
4137 	 * Don't select an interface which will ignore replies, or one
4138 	 * dedicated to multicast receive.
4139 	 */
4140 	if (   iface != NULL
4141 	    && (iface->ignore_packets || (INT_MCASTIF & iface->flags))) {
4142 		iface = NULL;
4143 	}
4144 	return iface;
4145 }
4146 
4147 
4148 /*
4149  * findclosestinterface
4150  *
4151  * If there are -I/--interface or -L/novirtualips command-line options,
4152  * or "nic" or "interface" rules in ntp.conf, findlocalinterface() may
4153  * find the kernel's preferred local address for a given peer address is
4154  * administratively unavailable to ntpd, and punt to this routine's more
4155  * expensive search.
4156  *
4157  * Find the numerically closest local address to the one connect()
4158  * suggested.  This matches an address on the same subnet first, as
4159  * needed by Bug 1184, and provides a consistent choice if there are
4160  * multiple feasible local addresses, regardless of the order ntpd
4161  * enumerated them.
4162  */
4163 endpt *
4164 findclosestinterface(
4165 	sockaddr_u *	addr,
4166 	int		flags
4167 	)
4168 {
4169 	endpt *		ep;
4170 	endpt *		winner;
4171 	sockaddr_u	addr_dist;
4172 	sockaddr_u	min_dist;
4173 
4174 	ZERO_SOCK(&min_dist);
4175 	winner = NULL;
4176 
4177 	for (ep = ep_list; ep != NULL; ep = ep->elink) {
4178 		if (ep->ignore_packets ||
4179 		    AF(addr) != ep->family ||
4180 		    flags & ep->flags)
4181 			continue;
4182 
4183 		calc_addr_distance(&addr_dist, addr, &ep->sin);
4184 		if (NULL == winner ||
4185 		    -1 == cmp_addr_distance(&addr_dist, &min_dist)) {
4186 			min_dist = addr_dist;
4187 			winner = ep;
4188 		}
4189 	}
4190 	if (NULL == winner)
4191 		DPRINTF(4, ("findclosestinterface(%s) failed\n",
4192 			    stoa(addr)));
4193 	else
4194 		DPRINTF(4, ("findclosestinterface(%s) -> %s\n",
4195 			    stoa(addr), stoa(&winner->sin)));
4196 
4197 	return winner;
4198 }
4199 
4200 
4201 /*
4202  * calc_addr_distance - calculate the distance between two addresses,
4203  *			the absolute value of the difference between
4204  *			the addresses numerically, stored as an address.
4205  */
4206 static void
4207 calc_addr_distance(
4208 	sockaddr_u *		dist,
4209 	const sockaddr_u *	a1,
4210 	const sockaddr_u *	a2
4211 	)
4212 {
4213 	u_char *	pdist;
4214 	const u_char *	p1;
4215 	const u_char *	p2;
4216 	size_t		cb;
4217 	int		different;
4218 	int		a1_greater;
4219 	u_int		u;
4220 
4221 	REQUIRE(AF(a1) == AF(a2));
4222 
4223 	ZERO_SOCK(dist);
4224 	AF(dist) = AF(a1);
4225 
4226 	if (IS_IPV4(a1)) {
4227 		pdist = (      u_char *)&NSRCADR(dist);
4228 		p1 =	(const u_char *)&NSRCADR(a1);
4229 		p2 =	(const u_char *)&NSRCADR(a2);
4230 	} else {
4231 		pdist = (      u_char *)&NSRCADR(dist);
4232 		p1 =	(const u_char *)&NSRCADR(a1);
4233 		p2 =	(const u_char *)&NSRCADR(a2);
4234 	}
4235 	cb = SIZEOF_INADDR(AF(dist));
4236 	different = FALSE;
4237 	a1_greater = FALSE;
4238 	for (u = 0; u < cb; u++) {
4239 		if (!different && p1[u] != p2[u]) {
4240 			a1_greater = (p1[u] > p2[u]);
4241 			different = TRUE;
4242 		}
4243 		if (a1_greater) {
4244 			pdist[u] = p1[u] - p2[u];
4245 		} else {
4246 			pdist[u] = p2[u] - p1[u];
4247 		}
4248 	}
4249 }
4250 
4251 
4252 /*
4253  * cmp_addr_distance - compare two address distances, returning -1, 0,
4254  *		       1 to indicate their relationship.
4255  */
4256 static int
4257 cmp_addr_distance(
4258 	const sockaddr_u *	d1,
4259 	const sockaddr_u *	d2
4260 	)
4261 {
4262 	int	i;
4263 
4264 	REQUIRE(AF(d1) == AF(d2));
4265 
4266 	if (IS_IPV4(d1)) {
4267 		if (SRCADR(d1) < SRCADR(d2))
4268 			return -1;
4269 		else if (SRCADR(d1) == SRCADR(d2))
4270 			return 0;
4271 		else
4272 			return 1;
4273 	}
4274 
4275 	for (i = 0; i < (int)sizeof(NSRCADR6(d1)); i++) {
4276 		if (NSRCADR6(d1)[i] < NSRCADR6(d2)[i])
4277 			return -1;
4278 		else if (NSRCADR6(d1)[i] > NSRCADR6(d2)[i])
4279 			return 1;
4280 	}
4281 
4282 	return 0;
4283 }
4284 
4285 
4286 
4287 /*
4288  * fetch an interface structure the matches the
4289  * address and has the given flags NOT set
4290  */
4291 endpt *
4292 getinterface(
4293 	sockaddr_u *	addr,
4294 	u_int32		flags
4295 	)
4296 {
4297 	endpt *iface;
4298 
4299 	iface = find_addr_in_list(addr);
4300 
4301 	if (iface != NULL && (iface->flags & flags))
4302 		iface = NULL;
4303 
4304 	return iface;
4305 }
4306 
4307 
4308 /*
4309  * findbcastinter - find broadcast interface corresponding to address
4310  */
4311 endpt *
4312 findbcastinter(
4313 	sockaddr_u *addr
4314 	)
4315 {
4316 	endpt *	iface;
4317 
4318 	iface = NULL;
4319 #if !defined(MPE) && (defined(SIOCGIFCONF) || defined(SYS_WINNT))
4320 	DPRINTF(4, ("Finding broadcast/multicast interface for addr %s in list of addresses\n",
4321 		    stoa(addr)));
4322 
4323 	iface = findlocalinterface(addr, INT_LOOPBACK | INT_WILDCARD,
4324 				   1);
4325 	if (iface != NULL) {
4326 		DPRINTF(4, ("Easily found bcast-/mcast- interface index #%d %s\n",
4327 			    iface->ifnum, iface->name));
4328 		return iface;
4329 	}
4330 
4331 	/*
4332 	 * plan B - try to find something reasonable in our lists in
4333 	 * case kernel lookup doesn't help
4334 	 */
4335 	for (iface = ep_list; iface != NULL; iface = iface->elink) {
4336 		if (iface->flags & INT_WILDCARD)
4337 			continue;
4338 
4339 		/* Don't bother with ignored interfaces */
4340 		if (iface->ignore_packets)
4341 			continue;
4342 
4343 		/*
4344 		 * First look if this is the correct family
4345 		 */
4346 		if(AF(&iface->sin) != AF(addr))
4347 			continue;
4348 
4349 		/* Skip the loopback addresses */
4350 		if (iface->flags & INT_LOOPBACK)
4351 			continue;
4352 
4353 		/*
4354 		 * If we are looking to match a multicast address and
4355 		 * this interface is one...
4356 		 */
4357 		if (addr_ismulticast(addr)
4358 		    && (iface->flags & INT_MULTICAST)) {
4359 #ifdef INCLUDE_IPV6_SUPPORT
4360 			/*
4361 			 * ...it is the winner unless we're looking for
4362 			 * an interface to use for link-local multicast
4363 			 * and its address is not link-local.
4364 			 */
4365 			if (IS_IPV6(addr)
4366 			    && IN6_IS_ADDR_MC_LINKLOCAL(PSOCK_ADDR6(addr))
4367 			    && !IN6_IS_ADDR_LINKLOCAL(PSOCK_ADDR6(&iface->sin)))
4368 				continue;
4369 #endif
4370 			break;
4371 		}
4372 
4373 		/*
4374 		 * We match only those interfaces marked as
4375 		 * broadcastable and either the explicit broadcast
4376 		 * address or the network portion of the IP address.
4377 		 * Sloppy.
4378 		 */
4379 		if (IS_IPV4(addr)) {
4380 			if (SOCK_EQ(&iface->bcast, addr))
4381 				break;
4382 
4383 			if ((NSRCADR(&iface->sin) & NSRCADR(&iface->mask))
4384 			    == (NSRCADR(addr)	  & NSRCADR(&iface->mask)))
4385 				break;
4386 		}
4387 #ifdef INCLUDE_IPV6_SUPPORT
4388 		else if (IS_IPV6(addr)) {
4389 			if (SOCK_EQ(&iface->bcast, addr))
4390 				break;
4391 
4392 			if (SOCK_EQ(netof(&iface->sin), netof(addr)))
4393 				break;
4394 		}
4395 #endif
4396 	}
4397 #endif /* SIOCGIFCONF */
4398 	if (NULL == iface) {
4399 		DPRINTF(4, ("No bcast interface found for %s\n",
4400 			    stoa(addr)));
4401 		iface = ANY_INTERFACE_CHOOSE(addr);
4402 	} else {
4403 		DPRINTF(4, ("Found bcast-/mcast- interface index #%d %s\n",
4404 			    iface->ifnum, iface->name));
4405 	}
4406 
4407 	return iface;
4408 }
4409 
4410 
4411 /*
4412  * io_clr_stats - clear I/O module statistics
4413  */
4414 void
4415 io_clr_stats(void)
4416 {
4417 	packets_dropped = 0;
4418 	packets_ignored = 0;
4419 	packets_received = 0;
4420 	packets_sent = 0;
4421 	packets_notsent = 0;
4422 
4423 	handler_calls = 0;
4424 	handler_pkts = 0;
4425 	io_timereset = current_time;
4426 }
4427 
4428 
4429 #ifdef REFCLOCK
4430 /*
4431  * io_addclock - add a reference clock to the list and arrange that we
4432  *				 get SIGIO interrupts from it.
4433  */
4434 int
4435 io_addclock(
4436 	struct refclockio *rio
4437 	)
4438 {
4439 	BLOCKIO();
4440 
4441 	/*
4442 	 * Stuff the I/O structure in the list and mark the descriptor
4443 	 * in use.  There is a harmless (I hope) race condition here.
4444 	 */
4445 	rio->active = TRUE;
4446 
4447 # ifdef HAVE_SIGNALED_IO
4448 	if (init_clock_sig(rio)) {
4449 		UNBLOCKIO();
4450 		return 0;
4451 	}
4452 # elif defined(HAVE_IO_COMPLETION_PORT)
4453 	if (!io_completion_port_add_clock_io(rio)) {
4454 		UNBLOCKIO();
4455 		return 0;
4456 	}
4457 # endif
4458 
4459 	/*
4460 	 * enqueue
4461 	 */
4462 	LINK_SLIST(refio, rio, next);
4463 
4464 	/*
4465 	 * register fd
4466 	 */
4467 	add_fd_to_list(rio->fd, FD_TYPE_FILE);
4468 
4469 	UNBLOCKIO();
4470 	return 1;
4471 }
4472 
4473 
4474 /*
4475  * io_closeclock - close the clock in the I/O structure given
4476  */
4477 void
4478 io_closeclock(
4479 	struct refclockio *rio
4480 	)
4481 {
4482 	struct refclockio *unlinked;
4483 
4484 	BLOCKIO();
4485 
4486 	/*
4487 	 * Remove structure from the list
4488 	 */
4489 	rio->active = FALSE;
4490 	UNLINK_SLIST(unlinked, refio, rio, next, struct refclockio);
4491 	if (NULL != unlinked) {
4492 		/* Close the descriptor. The order of operations is
4493 		 * important here in case of async / overlapped IO:
4494 		 * only after we have removed the clock from the
4495 		 * IO completion port we can be sure no further
4496 		 * input is queued. So...
4497 		 *  - we first disable feeding to the queu by removing
4498 		 *    the clock from the IO engine
4499 		 *  - close the file (which brings down any IO on it)
4500 		 *  - clear the buffer from results for this fd
4501 		 */
4502 #	    ifdef HAVE_IO_COMPLETION_PORT
4503 		io_completion_port_remove_clock_io(rio);
4504 #	    endif
4505 		close_and_delete_fd_from_list(rio->fd, NULL);
4506 		purge_recv_buffers_for_fd(rio->fd);
4507 		rio->fd = -1;
4508 	}
4509 
4510 	UNBLOCKIO();
4511 }
4512 #endif	/* REFCLOCK */
4513 
4514 
4515 /*
4516  * On NT a SOCKET is an unsigned int so we cannot possibly keep it in
4517  * an array. So we use one of the ISC_LIST functions to hold the
4518  * socket value and use that when we want to enumerate it.
4519  *
4520  * This routine is called by the forked intres child process to close
4521  * all open sockets.  On Windows there's no need as intres runs in
4522  * the same process as a thread.
4523  */
4524 #ifndef SYS_WINNT
4525 void
4526 kill_asyncio(
4527 	int	startfd
4528 	)
4529 {
4530 	BLOCKIO();
4531 
4532 	/*
4533 	 * In the child process we do not maintain activefds and
4534 	 * maxactivefd.  Zeroing maxactivefd disables code which
4535 	 * maintains it in close_and_delete_fd_from_list().
4536 	 */
4537 	maxactivefd = 0;
4538 
4539 	while (fd_list != NULL)
4540 		close_and_delete_fd_from_list(fd_list->fd, NULL);
4541 
4542 	UNBLOCKIO();
4543 }
4544 #endif	/* !SYS_WINNT */
4545 
4546 
4547 /*
4548  * Add and delete functions for the list of input file descriptors
4549  */
4550 static void
4551 add_fd_to_list(
4552 	SOCKET fd,
4553 	enum desc_type type
4554 	)
4555 {
4556 	vsock_t *lsock = emalloc(sizeof(*lsock));
4557 
4558 	lsock->fd = fd;
4559 	lsock->type = type;
4560 
4561 	LINK_SLIST(fd_list, lsock, link);
4562 	maintain_activefds(fd, 0);
4563 }
4564 
4565 
4566 static void
4567 close_and_delete_fd_from_list(
4568 	SOCKET fd,
4569 	endpt *ep	/* req. if fd is in struct endpt */
4570 	)
4571 {
4572 	vsock_t *lsock;
4573 
4574 	UNLINK_EXPR_SLIST(lsock, fd_list, fd ==
4575 	    UNLINK_EXPR_SLIST_CURRENT()->fd, link, vsock_t);
4576 
4577 	if (NULL == lsock)
4578 		return;
4579 
4580 	switch (lsock->type) {
4581 
4582 	case FD_TYPE_SOCKET:
4583 	    #ifdef HAVE_IO_COMPLETION_PORT
4584 		if (ep != NULL) {
4585 			io_completion_port_remove_socket(fd, ep);
4586 		}
4587 	    #endif
4588 		closesocket(lsock->fd);
4589 		break;
4590 
4591 	case FD_TYPE_FILE:
4592 		closeserial((int)lsock->fd);
4593 		break;
4594 
4595 	default:
4596 		msyslog(LOG_ERR,
4597 			"internal error - illegal descriptor type %d - EXITING",
4598 			(int)lsock->type);
4599 		exit(1);
4600 	}
4601 
4602 	free(lsock);
4603 	/*
4604 	 * remove from activefds
4605 	 */
4606 	maintain_activefds(fd, 1);
4607 }
4608 
4609 
4610 static void
4611 add_addr_to_list(
4612 	sockaddr_u *	addr,
4613 	endpt *		ep
4614 	)
4615 {
4616 	remaddr_t *laddr;
4617 
4618 #ifdef DEBUG
4619 	if (find_addr_in_list(addr) == NULL) {
4620 #endif
4621 		/* not there yet - add to list */
4622 		laddr = emalloc(sizeof(*laddr));
4623 		laddr->addr = *addr;
4624 		laddr->ep = ep;
4625 
4626 		LINK_SLIST(remoteaddr_list, laddr, link);
4627 
4628 		DPRINTF(4, ("Added addr %s to list of addresses\n",
4629 			    stoa(addr)));
4630 #ifdef DEBUG
4631 	} else
4632 		DPRINTF(4, ("WARNING: Attempt to add duplicate addr %s to address list\n",
4633 			    stoa(addr)));
4634 #endif
4635 }
4636 
4637 
4638 static void
4639 delete_addr_from_list(
4640 	sockaddr_u *addr
4641 	)
4642 {
4643 	remaddr_t *unlinked;
4644 
4645 	UNLINK_EXPR_SLIST(unlinked, remoteaddr_list, SOCK_EQ(addr,
4646 		&(UNLINK_EXPR_SLIST_CURRENT()->addr)), link, remaddr_t);
4647 
4648 	if (unlinked != NULL) {
4649 		DPRINTF(4, ("Deleted addr %s from list of addresses\n",
4650 			stoa(addr)));
4651 		free(unlinked);
4652 	}
4653 }
4654 
4655 
4656 static void
4657 delete_interface_from_list(
4658 	endpt *iface
4659 	)
4660 {
4661 	remaddr_t *unlinked;
4662 
4663 	for (;;) {
4664 		UNLINK_EXPR_SLIST(unlinked, remoteaddr_list, iface ==
4665 		    UNLINK_EXPR_SLIST_CURRENT()->ep, link,
4666 		    remaddr_t);
4667 
4668 		if (unlinked == NULL)
4669 			break;
4670 		DPRINTF(4, ("Deleted addr %s for interface #%d %s from list of addresses\n",
4671 			    stoa(&unlinked->addr), iface->ifnum,
4672 			    iface->name));
4673 		free(unlinked);
4674 	}
4675 }
4676 
4677 
4678 static endpt *
4679 find_addr_in_list(
4680 	sockaddr_u *addr
4681 	)
4682 {
4683 	remaddr_t *entry;
4684 
4685 	DPRINTF(4, ("Searching for addr %s in list of addresses - ",
4686 		    stoa(addr)));
4687 
4688 	for (entry = remoteaddr_list;
4689 	     entry != NULL;
4690 	     entry = entry->link)
4691 		if (SOCK_EQ(&entry->addr, addr)) {
4692 			DPRINTF(4, ("FOUND\n"));
4693 			return entry->ep;
4694 		}
4695 
4696 	DPRINTF(4, ("NOT FOUND\n"));
4697 	return NULL;
4698 }
4699 
4700 
4701 /*
4702  * Find the given address with the all given flags set in the list
4703  */
4704 static endpt *
4705 find_flagged_addr_in_list(
4706 	sockaddr_u *	addr,
4707 	u_int32		flags
4708 	)
4709 {
4710 	remaddr_t *entry;
4711 
4712 	DPRINTF(4, ("Finding addr %s with flags %d in list: ",
4713 		    stoa(addr), flags));
4714 
4715 	for (entry = remoteaddr_list;
4716 	     entry != NULL;
4717 	     entry = entry->link)
4718 
4719 		if (SOCK_EQ(&entry->addr, addr)
4720 		    && (entry->ep->flags & flags) == flags) {
4721 
4722 			DPRINTF(4, ("FOUND\n"));
4723 			return entry->ep;
4724 		}
4725 
4726 	DPRINTF(4, ("NOT FOUND\n"));
4727 	return NULL;
4728 }
4729 
4730 
4731 const char *
4732 localaddrtoa(
4733 	endpt *la
4734 	)
4735 {
4736 	return (NULL == la)
4737 		   ? "<null>"
4738 		   : stoa(&la->sin);
4739 }
4740 
4741 
4742 #ifdef HAS_ROUTING_SOCKET
4743 # ifndef UPDATE_GRACE
4744 #  define UPDATE_GRACE	3	/* min. UPDATE_GRACE - 1 seconds before scanning */
4745 # endif
4746 
4747 static void
4748 process_routing_msgs(struct asyncio_reader *reader)
4749 {
4750 	static void *	buffer;
4751 	static size_t	buffsz = 8192;
4752 	int		cnt, new, msg_type;
4753 	socklen_t	len;
4754 #ifdef HAVE_RTNETLINK
4755 	struct nlmsghdr *nh;
4756 #else
4757 	struct rt_msghdr rtm;
4758 	char *p;
4759 	char *endp;
4760 #endif
4761 
4762 	if (scan_addrs_once) {
4763 		/*
4764 		 * discard ourselves if we are not needed any more
4765 		 * usually happens when running unprivileged
4766 		 */
4767 		goto disable;
4768 	}
4769 
4770 	if (NULL == buffer) {
4771 		buffer = emalloc(buffsz);
4772 	}
4773 
4774 	cnt = read(reader->fd, buffer, buffsz);
4775 
4776 	if (cnt < 0) {
4777 		if (errno == ENOBUFS) {
4778 			/* increase socket buffer by 25% */
4779 			len = sizeof cnt;
4780 			if (0 > getsockopt(reader->fd, SOL_SOCKET, SO_RCVBUF, &cnt, &len) ||
4781 			    sizeof cnt != len) {
4782 				msyslog(LOG_ERR,
4783 					"routing getsockopt SO_RCVBUF %u %u: %m - disabling",
4784 					(u_int)cnt, (u_int)sizeof cnt);
4785 				goto disable;
4786 			}
4787 			new = cnt + (cnt / 4);
4788 			if (0 > setsockopt(reader->fd, SOL_SOCKET, SO_RCVBUF, &new, sizeof new)) {
4789 				msyslog(LOG_ERR,
4790 					"routing setsockopt SO_RCVBUF %d -> %d: %m - disabling",
4791 					cnt, new);
4792 				goto disable;
4793 			}
4794 		} else {
4795 			msyslog(LOG_ERR,
4796 				"routing socket reports: %m - disabling");
4797 		    disable:
4798 			remove_asyncio_reader(reader);
4799 			delete_asyncio_reader(reader);
4800 			return;
4801 		}
4802 	}
4803 
4804 	/*
4805 	 * process routing message
4806 	 */
4807 #ifdef HAVE_RTNETLINK
4808 	for (nh = buffer; NLMSG_OK(nh, cnt); nh = NLMSG_NEXT(nh, cnt))
4809 	{
4810 		msg_type = nh->nlmsg_type;
4811 #else
4812 	for (p = buffer, endp = p + cnt;
4813 	     (p + sizeof(struct rt_msghdr)) <= endp;
4814 	     p += rtm.rtm_msglen)
4815 	{
4816 		memcpy(&rtm, p, sizeof(rtm));
4817 		if (rtm.rtm_version != RTM_VERSION) {
4818 			msyslog(LOG_ERR,
4819 				"version mismatch (got %d - expected %d) on routing socket - disabling",
4820 				rtm.rtm_version, RTM_VERSION);
4821 
4822 			remove_asyncio_reader(reader);
4823 			delete_asyncio_reader(reader);
4824 			return;
4825 		}
4826 		msg_type = rtm.rtm_type;
4827 #endif	/* !HAVE_RTNETLINK */
4828 		switch (msg_type) {
4829 #ifdef RTM_NEWADDR
4830 		case RTM_NEWADDR:
4831 #endif
4832 #ifdef RTM_DELADDR
4833 		case RTM_DELADDR:
4834 #endif
4835 #ifdef RTM_ADD
4836 		case RTM_ADD:
4837 #endif
4838 #ifdef RTM_DELETE
4839 		case RTM_DELETE:
4840 #endif
4841 #ifdef RTM_REDIRECT
4842 		case RTM_REDIRECT:
4843 #endif
4844 #ifdef RTM_CHANGE
4845 		case RTM_CHANGE:
4846 #endif
4847 #ifdef RTM_IFINFO
4848 		case RTM_IFINFO:
4849 #endif
4850 #ifdef RTM_NEWLINK
4851 		case RTM_NEWLINK:
4852 #endif
4853 #ifdef RTM_DELLINK
4854 		case RTM_DELLINK:
4855 #endif
4856 #ifdef RTM_NEWROUTE
4857 		case RTM_NEWROUTE:
4858 #endif
4859 #ifdef RTM_DELROUTE
4860 		case RTM_DELROUTE:
4861 #endif
4862 			/*
4863 			 * we are keen on new and deleted addresses and
4864 			 * if an interface goes up and down or routing
4865 			 * changes
4866 			 */
4867 			DPRINTF(3, ("routing message op = %d: scheduling interface update\n",
4868 				    msg_type));
4869 			endpt_scan_timer = UPDATE_GRACE + current_time;
4870 			break;
4871 #ifdef HAVE_RTNETLINK
4872 		case NLMSG_DONE:
4873 			/* end of multipart message */
4874 			return;
4875 #endif
4876 		default:
4877 			/*
4878 			 * the rest doesn't bother us.
4879 			 */
4880 			DPRINTF(4, ("routing message op = %d: ignored\n",
4881 				    msg_type));
4882 			break;
4883 		}
4884 	}
4885 }
4886 
4887 /*
4888  * set up routing notifications
4889  */
4890 static void
4891 init_async_notifications(void)
4892 {
4893 	struct asyncio_reader *reader;
4894 #ifdef HAVE_RTNETLINK
4895 	int fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
4896 	struct sockaddr_nl sa;
4897 #else
4898 	int fd = socket(PF_ROUTE, SOCK_RAW, AF_UNSPEC);
4899 #ifdef SO_RERROR
4900 	int on = 1;
4901 #endif
4902 #endif
4903 #ifdef RO_MSGFILTER
4904 	unsigned char msgfilter[] = {
4905 #ifdef RTM_NEWADDR
4906 		RTM_NEWADDR,
4907 #endif
4908 #ifdef RTM_DELADDR
4909 		RTM_DELADDR,
4910 #endif
4911 #ifdef RTM_ADD
4912 		RTM_ADD,
4913 #endif
4914 #ifdef RTM_DELETE
4915 		RTM_DELETE,
4916 #endif
4917 #ifdef RTM_REDIRECT
4918 		RTM_REDIRECT,
4919 #endif
4920 #ifdef RTM_CHANGE
4921 		RTM_CHANGE,
4922 #endif
4923 #ifdef RTM_IFINFO
4924 		RTM_IFINFO,
4925 #endif
4926 #ifdef RTM_NEWLINK
4927 		RTM_NEWLINK,
4928 #endif
4929 #ifdef RTM_DELLINK
4930 		RTM_DELLINK,
4931 #endif
4932 #ifdef RTM_NEWROUTE
4933 		RTM_NEWROUTE,
4934 #endif
4935 #ifdef RTM_DELROUTE
4936 		RTM_DELROUTE,
4937 #endif
4938 	};
4939 #endif /* !RO_MSGFILTER */
4940 
4941 	if (fd < 0) {
4942 		msyslog(LOG_ERR,
4943 			"unable to open routing socket (%m) - using polled interface update");
4944 		return;
4945 	}
4946 
4947 	fd = move_fd(fd);
4948 #ifdef HAVE_RTNETLINK
4949 	ZERO(sa);
4950 	sa.nl_family = PF_NETLINK;
4951 	sa.nl_groups = RTMGRP_LINK | RTMGRP_IPV4_IFADDR
4952 		       | RTMGRP_IPV6_IFADDR | RTMGRP_IPV4_ROUTE
4953 		       | RTMGRP_IPV4_MROUTE | RTMGRP_IPV6_ROUTE
4954 		       | RTMGRP_IPV6_MROUTE;
4955 	if (bind(fd, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
4956 		msyslog(LOG_ERR,
4957 			"bind failed on routing socket (%m) - using polled interface update");
4958 		return;
4959 	}
4960 #endif
4961 #ifdef RO_MSGFILTER
4962 	if (setsockopt(fd, PF_ROUTE, RO_MSGFILTER,
4963 	    &msgfilter, sizeof(msgfilter)) == -1)
4964 		msyslog(LOG_ERR, "RO_MSGFILTER: %m");
4965 #endif
4966 #ifdef SO_RERROR
4967 	if (setsockopt(fd, SOL_SOCKET, SO_RERROR, &on, sizeof(on)) == -1)
4968 		msyslog(LOG_ERR, "SO_RERROR: %m");
4969 #endif
4970 	make_socket_nonblocking(fd);
4971 #if defined(HAVE_SIGNALED_IO)
4972 	init_socket_sig(fd);
4973 #endif /* HAVE_SIGNALED_IO */
4974 
4975 	reader = new_asyncio_reader();
4976 
4977 	reader->fd = fd;
4978 	reader->receiver = process_routing_msgs;
4979 
4980 	add_asyncio_reader(reader, FD_TYPE_SOCKET);
4981 	msyslog(LOG_INFO,
4982 		"Listening on routing socket on fd #%d for interface updates",
4983 		fd);
4984 }
4985 #else
4986 /* HAS_ROUTING_SOCKET not defined */
4987 static void
4988 init_async_notifications(void)
4989 {
4990 }
4991 #endif
4992