xref: /onnv-gate/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_main.c (revision 2496:636492f9c2f8)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
52074Smeem  * Common Development and Distribution License (the "License").
62074Smeem  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
222074Smeem  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
230Sstevel@tonic-gate  * Use is subject to license terms.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate 
260Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
270Sstevel@tonic-gate 
280Sstevel@tonic-gate #include "mpd_defs.h"
290Sstevel@tonic-gate #include "mpd_tables.h"
300Sstevel@tonic-gate 
310Sstevel@tonic-gate int debug = 0;				/* Debug flag */
320Sstevel@tonic-gate static int pollfd_num = 0;		/* Num. of poll descriptors */
330Sstevel@tonic-gate static struct pollfd *pollfds = NULL;	/* Array of poll descriptors */
340Sstevel@tonic-gate 
350Sstevel@tonic-gate 					/* All times below in ms */
360Sstevel@tonic-gate int	user_failure_detection_time;	/* user specified failure detection */
370Sstevel@tonic-gate 					/* time (fdt) */
380Sstevel@tonic-gate int	user_probe_interval;		/* derived from user specified fdt */
390Sstevel@tonic-gate 
400Sstevel@tonic-gate static int	rtsock_v4;		/* AF_INET routing socket */
410Sstevel@tonic-gate static int	rtsock_v6;		/* AF_INET6 routing socket */
420Sstevel@tonic-gate int	ifsock_v4 = -1;			/* IPv4 socket for ioctls  */
430Sstevel@tonic-gate int	ifsock_v6 = -1;			/* IPv6 socket for ioctls  */
440Sstevel@tonic-gate static int	lsock_v4;		/* Listen socket to detect mpathd */
450Sstevel@tonic-gate static int	lsock_v6;		/* Listen socket to detect mpathd */
460Sstevel@tonic-gate static int	mibfd = -1;		/* fd to get mib info */
470Sstevel@tonic-gate static boolean_t force_mcast = _B_FALSE; /* Only for test purposes */
480Sstevel@tonic-gate 
490Sstevel@tonic-gate boolean_t	full_scan_required = _B_FALSE;
500Sstevel@tonic-gate static uint_t	last_initifs_time;	/* Time when initifs was last run */
510Sstevel@tonic-gate static	char **argv0;			/* Saved for re-exec on SIGHUP */
520Sstevel@tonic-gate boolean_t handle_link_notifications = _B_TRUE;
530Sstevel@tonic-gate 
540Sstevel@tonic-gate static void	initlog(void);
550Sstevel@tonic-gate static void	run_timeouts(void);
560Sstevel@tonic-gate static void	initifs(void);
570Sstevel@tonic-gate static void	check_if_removed(struct phyint_instance *pii);
580Sstevel@tonic-gate static void	select_test_ifs(void);
590Sstevel@tonic-gate static void	ire_process_v4(mib2_ipRouteEntry_t *buf, size_t len);
600Sstevel@tonic-gate static void	ire_process_v6(mib2_ipv6RouteEntry_t *buf, size_t len);
610Sstevel@tonic-gate static void	router_add_v4(mib2_ipRouteEntry_t *rp1,
620Sstevel@tonic-gate     struct in_addr nexthop_v4);
630Sstevel@tonic-gate static void	router_add_v6(mib2_ipv6RouteEntry_t *rp1,
640Sstevel@tonic-gate     struct in6_addr nexthop_v6);
650Sstevel@tonic-gate static void	router_add_common(int af, char *ifname,
660Sstevel@tonic-gate     struct in6_addr nexthop);
670Sstevel@tonic-gate static void	init_router_targets();
680Sstevel@tonic-gate static void	cleanup(void);
690Sstevel@tonic-gate static int	setup_listener(int af);
700Sstevel@tonic-gate static void	check_config(void);
71*2496Smeem static void	check_addr_unique(struct phyint_instance *,
72*2496Smeem     struct sockaddr_storage *);
730Sstevel@tonic-gate static void	init_host_targets(void);
740Sstevel@tonic-gate static void	dup_host_targets(struct phyint_instance *desired_pii);
750Sstevel@tonic-gate static void	loopback_cmd(int sock, int family);
760Sstevel@tonic-gate static int	poll_remove(int fd);
770Sstevel@tonic-gate static boolean_t daemonize(void);
780Sstevel@tonic-gate static int	closefunc(void *, int);
790Sstevel@tonic-gate static unsigned int process_cmd(int newfd, union mi_commands *mpi);
800Sstevel@tonic-gate static unsigned int process_query(int fd, mi_query_t *miq);
810Sstevel@tonic-gate static unsigned int send_groupinfo(int fd, ipmp_groupinfo_t *grinfop);
820Sstevel@tonic-gate static unsigned int send_grouplist(int fd, ipmp_grouplist_t *grlistp);
830Sstevel@tonic-gate static unsigned int send_ifinfo(int fd, ipmp_ifinfo_t *ifinfop);
840Sstevel@tonic-gate static unsigned int send_result(int fd, unsigned int error, int syserror);
850Sstevel@tonic-gate 
862250Srk129064 struct local_addr *laddr_list = NULL;
872250Srk129064 
880Sstevel@tonic-gate /*
890Sstevel@tonic-gate  * Return the current time in milliseconds (from an arbitrary reference)
900Sstevel@tonic-gate  * truncated to fit into an int. Truncation is ok since we are interested
910Sstevel@tonic-gate  * only in differences and not the absolute values.
920Sstevel@tonic-gate  */
930Sstevel@tonic-gate uint_t
940Sstevel@tonic-gate getcurrenttime(void)
950Sstevel@tonic-gate {
960Sstevel@tonic-gate 	uint_t	cur_time;	/* In ms */
970Sstevel@tonic-gate 
980Sstevel@tonic-gate 	/*
990Sstevel@tonic-gate 	 * Use of a non-user-adjustable source of time is
1000Sstevel@tonic-gate 	 * required. However millisecond precision is sufficient.
1010Sstevel@tonic-gate 	 * divide by 10^6
1020Sstevel@tonic-gate 	 */
1030Sstevel@tonic-gate 	cur_time = (uint_t)(gethrtime() / 1000000LL);
1040Sstevel@tonic-gate 	return (cur_time);
1050Sstevel@tonic-gate }
1060Sstevel@tonic-gate 
1070Sstevel@tonic-gate /*
1080Sstevel@tonic-gate  * Add fd to the set being polled. Returns 0 if ok; -1 if failed.
1090Sstevel@tonic-gate  */
1100Sstevel@tonic-gate int
1110Sstevel@tonic-gate poll_add(int fd)
1120Sstevel@tonic-gate {
1130Sstevel@tonic-gate 	int i;
1140Sstevel@tonic-gate 	int new_num;
1150Sstevel@tonic-gate 	struct pollfd *newfds;
1160Sstevel@tonic-gate retry:
1170Sstevel@tonic-gate 	/* Check if already present */
1180Sstevel@tonic-gate 	for (i = 0; i < pollfd_num; i++) {
1190Sstevel@tonic-gate 		if (pollfds[i].fd == fd)
1200Sstevel@tonic-gate 			return (0);
1210Sstevel@tonic-gate 	}
1220Sstevel@tonic-gate 	/* Check for empty spot already present */
1230Sstevel@tonic-gate 	for (i = 0; i < pollfd_num; i++) {
1240Sstevel@tonic-gate 		if (pollfds[i].fd == -1) {
1250Sstevel@tonic-gate 			pollfds[i].fd = fd;
1260Sstevel@tonic-gate 			return (0);
1270Sstevel@tonic-gate 		}
1280Sstevel@tonic-gate 	}
1290Sstevel@tonic-gate 
1300Sstevel@tonic-gate 	/* Allocate space for 32 more fds and initialize to -1 */
1310Sstevel@tonic-gate 	new_num = pollfd_num + 32;
1320Sstevel@tonic-gate 	newfds = realloc(pollfds, new_num * sizeof (struct pollfd));
1330Sstevel@tonic-gate 	if (newfds == NULL) {
1340Sstevel@tonic-gate 		logperror("poll_add: realloc");
1350Sstevel@tonic-gate 		return (-1);
1360Sstevel@tonic-gate 	}
1370Sstevel@tonic-gate 	for (i = pollfd_num; i < new_num; i++) {
1380Sstevel@tonic-gate 		newfds[i].fd = -1;
1390Sstevel@tonic-gate 		newfds[i].events = POLLIN;
1400Sstevel@tonic-gate 	}
1410Sstevel@tonic-gate 	pollfd_num = new_num;
1420Sstevel@tonic-gate 	pollfds = newfds;
1430Sstevel@tonic-gate 	goto retry;
1440Sstevel@tonic-gate }
1450Sstevel@tonic-gate 
1460Sstevel@tonic-gate /*
1470Sstevel@tonic-gate  * Remove fd from the set being polled. Returns 0 if ok; -1 if failed.
1480Sstevel@tonic-gate  */
1490Sstevel@tonic-gate static int
1500Sstevel@tonic-gate poll_remove(int fd)
1510Sstevel@tonic-gate {
1520Sstevel@tonic-gate 	int i;
1530Sstevel@tonic-gate 
1540Sstevel@tonic-gate 	/* Check if already present */
1550Sstevel@tonic-gate 	for (i = 0; i < pollfd_num; i++) {
1560Sstevel@tonic-gate 		if (pollfds[i].fd == fd) {
1570Sstevel@tonic-gate 			pollfds[i].fd = -1;
1580Sstevel@tonic-gate 			return (0);
1590Sstevel@tonic-gate 		}
1600Sstevel@tonic-gate 	}
1610Sstevel@tonic-gate 	return (-1);
1620Sstevel@tonic-gate }
1630Sstevel@tonic-gate 
1640Sstevel@tonic-gate /*
1650Sstevel@tonic-gate  * Extract information about the phyint instance. If the phyint instance still
1660Sstevel@tonic-gate  * exists in the kernel then set pii_in_use, else clear it. check_if_removed()
1670Sstevel@tonic-gate  * will use it to detect phyint instances that don't exist any longer and
1680Sstevel@tonic-gate  * remove them, from our database of phyint instances.
1690Sstevel@tonic-gate  * Return value:
1700Sstevel@tonic-gate  *	returns true if the phyint instance exists in the kernel,
1710Sstevel@tonic-gate  *	returns false otherwise
1720Sstevel@tonic-gate  */
1730Sstevel@tonic-gate static boolean_t
1740Sstevel@tonic-gate pii_process(int af, char *name, struct phyint_instance **pii_p)
1750Sstevel@tonic-gate {
1760Sstevel@tonic-gate 	int err;
1770Sstevel@tonic-gate 	struct phyint_instance *pii;
1780Sstevel@tonic-gate 	struct phyint_instance *pii_other;
1790Sstevel@tonic-gate 
1800Sstevel@tonic-gate 	if (debug & D_PHYINT)
1810Sstevel@tonic-gate 		logdebug("pii_process(%s %s)\n", AF_STR(af), name);
1820Sstevel@tonic-gate 
1830Sstevel@tonic-gate 	pii = phyint_inst_lookup(af, name);
1840Sstevel@tonic-gate 	if (pii == NULL) {
1850Sstevel@tonic-gate 		/*
1860Sstevel@tonic-gate 		 * Phyint instance does not exist in our tables,
1870Sstevel@tonic-gate 		 * create new phyint instance
1880Sstevel@tonic-gate 		 */
1890Sstevel@tonic-gate 		pii = phyint_inst_init_from_k(af, name);
1900Sstevel@tonic-gate 	} else {
1910Sstevel@tonic-gate 		/* Phyint exists in our tables */
1920Sstevel@tonic-gate 		err = phyint_inst_update_from_k(pii);
1930Sstevel@tonic-gate 
1940Sstevel@tonic-gate 		switch (err) {
1950Sstevel@tonic-gate 		case PI_IOCTL_ERROR:
1960Sstevel@tonic-gate 			/* Some ioctl error. don't change anything */
1970Sstevel@tonic-gate 			pii->pii_in_use = 1;
1980Sstevel@tonic-gate 			break;
1990Sstevel@tonic-gate 
2000Sstevel@tonic-gate 		case PI_GROUP_CHANGED:
2010Sstevel@tonic-gate 			/*
2020Sstevel@tonic-gate 			 * The phyint has changed group.
2030Sstevel@tonic-gate 			 */
2040Sstevel@tonic-gate 			restore_phyint(pii->pii_phyint);
2050Sstevel@tonic-gate 			/* FALLTHRU */
2060Sstevel@tonic-gate 
2070Sstevel@tonic-gate 		case PI_IFINDEX_CHANGED:
2080Sstevel@tonic-gate 			/*
2090Sstevel@tonic-gate 			 * Interface index has changed. Delete and
2100Sstevel@tonic-gate 			 * recreate the phyint as it is quite likely
2110Sstevel@tonic-gate 			 * the interface has been unplumbed and replumbed.
2120Sstevel@tonic-gate 			 */
2130Sstevel@tonic-gate 			pii_other = phyint_inst_other(pii);
2140Sstevel@tonic-gate 			if (pii_other != NULL)
2150Sstevel@tonic-gate 				phyint_inst_delete(pii_other);
2160Sstevel@tonic-gate 			phyint_inst_delete(pii);
2170Sstevel@tonic-gate 			pii = phyint_inst_init_from_k(af, name);
2180Sstevel@tonic-gate 			break;
2190Sstevel@tonic-gate 
2200Sstevel@tonic-gate 		case PI_DELETED:
2210Sstevel@tonic-gate 			/* Phyint instance has disappeared from kernel */
2220Sstevel@tonic-gate 			pii->pii_in_use = 0;
2230Sstevel@tonic-gate 			break;
2240Sstevel@tonic-gate 
2250Sstevel@tonic-gate 		case PI_OK:
2260Sstevel@tonic-gate 			/* Phyint instance exists and is fine */
2270Sstevel@tonic-gate 			pii->pii_in_use = 1;
2280Sstevel@tonic-gate 			break;
2290Sstevel@tonic-gate 
2300Sstevel@tonic-gate 		default:
2310Sstevel@tonic-gate 			/* Unknown status */
2320Sstevel@tonic-gate 			logerr("pii_process: Unknown status %d\n", err);
2330Sstevel@tonic-gate 			break;
2340Sstevel@tonic-gate 		}
2350Sstevel@tonic-gate 	}
2360Sstevel@tonic-gate 
2370Sstevel@tonic-gate 	*pii_p = pii;
2380Sstevel@tonic-gate 	if (pii != NULL)
2390Sstevel@tonic-gate 		return (pii->pii_in_use ? _B_TRUE : _B_FALSE);
2400Sstevel@tonic-gate 	else
2410Sstevel@tonic-gate 		return (_B_FALSE);
2420Sstevel@tonic-gate }
2430Sstevel@tonic-gate 
2440Sstevel@tonic-gate /*
2450Sstevel@tonic-gate  * This phyint is leaving the group. Try to restore the phyint to its
2460Sstevel@tonic-gate  * initial state. Return the addresses that belong to other group members,
2470Sstevel@tonic-gate  * to the group, and take back any addresses owned by this phyint
2480Sstevel@tonic-gate  */
2490Sstevel@tonic-gate void
2500Sstevel@tonic-gate restore_phyint(struct phyint *pi)
2510Sstevel@tonic-gate {
2520Sstevel@tonic-gate 	if (pi->pi_group == phyint_anongroup)
2530Sstevel@tonic-gate 		return;
2540Sstevel@tonic-gate 
2550Sstevel@tonic-gate 	/*
2560Sstevel@tonic-gate 	 * Move everthing to some other member in the group.
2570Sstevel@tonic-gate 	 * The phyint has changed group in the kernel. But we
2580Sstevel@tonic-gate 	 * have yet to do it in our tables.
2590Sstevel@tonic-gate 	 */
2600Sstevel@tonic-gate 	if (!pi->pi_empty)
2610Sstevel@tonic-gate 		(void) try_failover(pi, FAILOVER_TO_ANY);
2620Sstevel@tonic-gate 	/*
2630Sstevel@tonic-gate 	 * Move all addresses owned by 'pi' back to pi, from each
2640Sstevel@tonic-gate 	 * of the other members of the group
2650Sstevel@tonic-gate 	 */
266*2496Smeem 	(void) try_failback(pi);
2670Sstevel@tonic-gate }
2680Sstevel@tonic-gate 
2690Sstevel@tonic-gate /*
2700Sstevel@tonic-gate  * Scan all interfaces to detect changes as well as new and deleted interfaces
2710Sstevel@tonic-gate  */
2720Sstevel@tonic-gate static void
2730Sstevel@tonic-gate initifs()
2740Sstevel@tonic-gate {
2750Sstevel@tonic-gate 	int	n;
2760Sstevel@tonic-gate 	int	af;
2770Sstevel@tonic-gate 	char	*cp;
2780Sstevel@tonic-gate 	char	*buf;
2790Sstevel@tonic-gate 	int	numifs;
2800Sstevel@tonic-gate 	struct lifnum	lifn;
2810Sstevel@tonic-gate 	struct lifconf	lifc;
2820Sstevel@tonic-gate 	struct lifreq	*lifr;
2830Sstevel@tonic-gate 	struct logint	*li;
2840Sstevel@tonic-gate 	struct phyint_instance *pii;
2850Sstevel@tonic-gate 	struct phyint_instance *next_pii;
2860Sstevel@tonic-gate 	char	pi_name[LIFNAMSIZ + 1];
2870Sstevel@tonic-gate 	boolean_t exists;
2880Sstevel@tonic-gate 	struct phyint	*pi;
2892250Srk129064 	struct local_addr *next;
2900Sstevel@tonic-gate 
2910Sstevel@tonic-gate 	if (debug & D_PHYINT)
2920Sstevel@tonic-gate 		logdebug("initifs: Scanning interfaces\n");
2930Sstevel@tonic-gate 
2940Sstevel@tonic-gate 	last_initifs_time = getcurrenttime();
2950Sstevel@tonic-gate 
2960Sstevel@tonic-gate 	/*
2972250Srk129064 	 * Free the laddr_list before collecting the local addresses.
2982250Srk129064 	 */
2992250Srk129064 	while (laddr_list != NULL) {
3002250Srk129064 		next = laddr_list->next;
3012250Srk129064 		free(laddr_list);
3022250Srk129064 		laddr_list = next;
3032250Srk129064 	}
3042250Srk129064 
3052250Srk129064 	/*
3060Sstevel@tonic-gate 	 * Mark the interfaces so that we can find phyints and logints
3070Sstevel@tonic-gate 	 * which have disappeared from the kernel. pii_process() and
3080Sstevel@tonic-gate 	 * logint_init_from_k() will set {pii,li}_in_use when they find
3090Sstevel@tonic-gate 	 * the interface in the kernel. Also, clear dupaddr bit on probe
3100Sstevel@tonic-gate 	 * logint. check_addr_unique() will set the dupaddr bit on the
3110Sstevel@tonic-gate 	 * probe logint, if the testaddress is not unique.
3120Sstevel@tonic-gate 	 */
3130Sstevel@tonic-gate 	for (pii = phyint_instances; pii != NULL; pii = pii->pii_next) {
3140Sstevel@tonic-gate 		pii->pii_in_use = 0;
3150Sstevel@tonic-gate 		for (li = pii->pii_logint; li != NULL; li = li->li_next) {
3160Sstevel@tonic-gate 			li->li_in_use = 0;
3170Sstevel@tonic-gate 			if (pii->pii_probe_logint == li)
3180Sstevel@tonic-gate 				li->li_dupaddr = 0;
3190Sstevel@tonic-gate 		}
3200Sstevel@tonic-gate 	}
3210Sstevel@tonic-gate 
3220Sstevel@tonic-gate 	lifn.lifn_family = AF_UNSPEC;
3232250Srk129064 	lifn.lifn_flags = LIFC_ALLZONES;
3240Sstevel@tonic-gate 	if (ioctl(ifsock_v4, SIOCGLIFNUM, (char *)&lifn) < 0) {
3250Sstevel@tonic-gate 		logperror("initifs: ioctl (get interface numbers)");
3260Sstevel@tonic-gate 		return;
3270Sstevel@tonic-gate 	}
3280Sstevel@tonic-gate 	numifs = lifn.lifn_count;
3290Sstevel@tonic-gate 
3300Sstevel@tonic-gate 	buf = (char *)calloc(numifs, sizeof (struct lifreq));
3310Sstevel@tonic-gate 	if (buf == NULL) {
3320Sstevel@tonic-gate 		logperror("initifs: calloc");
3330Sstevel@tonic-gate 		return;
3340Sstevel@tonic-gate 	}
3350Sstevel@tonic-gate 
3360Sstevel@tonic-gate 	lifc.lifc_family = AF_UNSPEC;
3372250Srk129064 	lifc.lifc_flags = LIFC_ALLZONES;
3380Sstevel@tonic-gate 	lifc.lifc_len = numifs * sizeof (struct lifreq);
3390Sstevel@tonic-gate 	lifc.lifc_buf = buf;
3400Sstevel@tonic-gate 
3410Sstevel@tonic-gate 	if (ioctl(ifsock_v4, SIOCGLIFCONF, (char *)&lifc) < 0) {
3420Sstevel@tonic-gate 		/*
3430Sstevel@tonic-gate 		 * EINVAL is commonly encountered, when things change
3440Sstevel@tonic-gate 		 * underneath us rapidly, (eg. at boot, when new interfaces
3450Sstevel@tonic-gate 		 * are plumbed successively) and the kernel finds the buffer
3460Sstevel@tonic-gate 		 * size we passed as too small. We will retry again
3470Sstevel@tonic-gate 		 * when we see the next routing socket msg, or at worst after
3480Sstevel@tonic-gate 		 * IF_SCAN_INTERVAL ms.
3490Sstevel@tonic-gate 		 */
3500Sstevel@tonic-gate 		if (errno != EINVAL) {
3510Sstevel@tonic-gate 			logperror("initifs: ioctl"
3520Sstevel@tonic-gate 			    " (get interface configuration)");
3530Sstevel@tonic-gate 		}
3540Sstevel@tonic-gate 		free(buf);
3550Sstevel@tonic-gate 		return;
3560Sstevel@tonic-gate 	}
3570Sstevel@tonic-gate 
3580Sstevel@tonic-gate 	lifr = (struct lifreq *)lifc.lifc_req;
3590Sstevel@tonic-gate 
3600Sstevel@tonic-gate 	/*
3610Sstevel@tonic-gate 	 * For each lifreq returned by SIOGGLIFCONF, call pii_process()
3620Sstevel@tonic-gate 	 * and get the state of the corresponding phyint_instance. If it is
3630Sstevel@tonic-gate 	 * successful, then call logint_init_from_k() to get the state of the
3640Sstevel@tonic-gate 	 * logint.
3650Sstevel@tonic-gate 	 */
3660Sstevel@tonic-gate 	for (n = lifc.lifc_len / sizeof (struct lifreq); n > 0; n--, lifr++) {
3672250Srk129064 		int	sockfd;
3682250Srk129064 		struct local_addr	*taddr;
3692250Srk129064 		struct sockaddr_in	*sin;
3702250Srk129064 		struct sockaddr_in6	*sin6;
3712250Srk129064 		struct lifreq	lifreq;
3722250Srk129064 
3730Sstevel@tonic-gate 		af = lifr->lifr_addr.ss_family;
3740Sstevel@tonic-gate 
3750Sstevel@tonic-gate 		/*
3762250Srk129064 		 * Collect all local addresses.
3772250Srk129064 		 */
3782250Srk129064 		sockfd = (af == AF_INET) ? ifsock_v4 : ifsock_v6;
3792250Srk129064 		(void) memset(&lifreq, 0, sizeof (lifreq));
3802250Srk129064 		(void) strlcpy(lifreq.lifr_name, lifr->lifr_name,
3812250Srk129064 		    sizeof (lifreq.lifr_name));
3822250Srk129064 
3832250Srk129064 		if (ioctl(sockfd, SIOCGLIFFLAGS, &lifreq) == -1) {
3842250Srk129064 			if (errno != ENXIO)
3852250Srk129064 				logperror("initifs: ioctl (SIOCGLIFFLAGS)");
3862250Srk129064 			continue;
3872250Srk129064 		}
3882250Srk129064 
3892250Srk129064 		/*
3902250Srk129064 		 * Add the interface address to laddr_list.
3912250Srk129064 		 * Another node might have the same IP address which is up.
3922250Srk129064 		 * In that case, it is appropriate  to use the address as a
3932250Srk129064 		 * target, even though it is also configured (but not up) on
3942250Srk129064 		 * the local system.
3952250Srk129064 		 * Hence,the interface address is not added to laddr_list
3962250Srk129064 		 * unless it is IFF_UP.
3972250Srk129064 		 */
3982250Srk129064 		if (lifreq.lifr_flags & IFF_UP) {
3992250Srk129064 			taddr = malloc(sizeof (struct local_addr));
4002250Srk129064 			if (taddr == NULL) {
4012250Srk129064 				logperror("initifs: malloc");
4022250Srk129064 				continue;
4032250Srk129064 			}
4042250Srk129064 			if (af == AF_INET) {
4052250Srk129064 				sin = (struct sockaddr_in *)&lifr->lifr_addr;
4062250Srk129064 				IN6_INADDR_TO_V4MAPPED(&sin->sin_addr,
4072250Srk129064 				    &taddr->addr);
4082250Srk129064 			} else {
4092250Srk129064 				sin6 = (struct sockaddr_in6 *)&lifr->lifr_addr;
4102250Srk129064 				taddr->addr = sin6->sin6_addr;
4112250Srk129064 			}
4122250Srk129064 			taddr->next = laddr_list;
4132250Srk129064 			laddr_list = taddr;
4142250Srk129064 		}
4152250Srk129064 
4162250Srk129064 		/*
4170Sstevel@tonic-gate 		 * Need to pass a phyint name to pii_process. Insert the
4180Sstevel@tonic-gate 		 * null where the ':' IF_SEPARATOR is found in the logical
4190Sstevel@tonic-gate 		 * name.
4200Sstevel@tonic-gate 		 */
4212250Srk129064 		(void) strlcpy(pi_name, lifr->lifr_name, sizeof (pi_name));
4220Sstevel@tonic-gate 		if ((cp = strchr(pi_name, IF_SEPARATOR)) != NULL)
4230Sstevel@tonic-gate 			*cp = '\0';
4240Sstevel@tonic-gate 
4250Sstevel@tonic-gate 		exists = pii_process(af, pi_name, &pii);
4260Sstevel@tonic-gate 		if (exists) {
4270Sstevel@tonic-gate 			/* The phyint is fine. So process the logint */
4280Sstevel@tonic-gate 			logint_init_from_k(pii, lifr->lifr_name);
429*2496Smeem 			check_addr_unique(pii, &lifr->lifr_addr);
4300Sstevel@tonic-gate 		}
431*2496Smeem 
4320Sstevel@tonic-gate 	}
4330Sstevel@tonic-gate 
4340Sstevel@tonic-gate 	free(buf);
4350Sstevel@tonic-gate 
4360Sstevel@tonic-gate 	/*
4370Sstevel@tonic-gate 	 * If the test address is now unique, and if it was not unique
4380Sstevel@tonic-gate 	 * previously,	clear the li_dupaddrmsg_printed flag and log a
4390Sstevel@tonic-gate 	 * recovery message
4400Sstevel@tonic-gate 	 */
4410Sstevel@tonic-gate 	for (pii = phyint_instances; pii != NULL; pii = pii->pii_next) {
4420Sstevel@tonic-gate 		struct logint *li;
4430Sstevel@tonic-gate 		char abuf[INET6_ADDRSTRLEN];
4440Sstevel@tonic-gate 
4450Sstevel@tonic-gate 		li = pii->pii_probe_logint;
4460Sstevel@tonic-gate 		if ((li != NULL) && !li->li_dupaddr &&
4470Sstevel@tonic-gate 		    li->li_dupaddrmsg_printed) {
448*2496Smeem 			logerr("Test address %s is unique in group; enabling "
449*2496Smeem 			    "probe-based failure detection on %s\n",
4500Sstevel@tonic-gate 			    pr_addr(pii->pii_af, li->li_addr, abuf,
451*2496Smeem 				sizeof (abuf)), pii->pii_phyint->pi_name);
4520Sstevel@tonic-gate 			li->li_dupaddrmsg_printed = 0;
4530Sstevel@tonic-gate 		}
4540Sstevel@tonic-gate 	}
4550Sstevel@tonic-gate 
4560Sstevel@tonic-gate 	/*
4570Sstevel@tonic-gate 	 * Scan for phyints and logints that have disappeared from the
4580Sstevel@tonic-gate 	 * kernel, and delete them.
4590Sstevel@tonic-gate 	 */
4600Sstevel@tonic-gate 	pii = phyint_instances;
4610Sstevel@tonic-gate 
4620Sstevel@tonic-gate 	while (pii != NULL) {
4630Sstevel@tonic-gate 		next_pii = pii->pii_next;
4640Sstevel@tonic-gate 		check_if_removed(pii);
4650Sstevel@tonic-gate 		pii = next_pii;
4660Sstevel@tonic-gate 	}
4670Sstevel@tonic-gate 
4680Sstevel@tonic-gate 	/*
4690Sstevel@tonic-gate 	 * Select a test address for sending probes on each phyint instance
4700Sstevel@tonic-gate 	 */
4710Sstevel@tonic-gate 	select_test_ifs();
4720Sstevel@tonic-gate 
4730Sstevel@tonic-gate 	/*
4740Sstevel@tonic-gate 	 * Handle link up/down notifications from the NICs.
4750Sstevel@tonic-gate 	 */
4760Sstevel@tonic-gate 	process_link_state_changes();
4770Sstevel@tonic-gate 
4780Sstevel@tonic-gate 	for (pi = phyints; pi != NULL; pi = pi->pi_next) {
4790Sstevel@tonic-gate 		/*
4800Sstevel@tonic-gate 		 * If this is a case of group failure, we don't have much
4810Sstevel@tonic-gate 		 * to do until the group recovers again.
4820Sstevel@tonic-gate 		 */
4830Sstevel@tonic-gate 		if (GROUP_FAILED(pi->pi_group))
4840Sstevel@tonic-gate 			continue;
4850Sstevel@tonic-gate 
4860Sstevel@tonic-gate 		/*
4870Sstevel@tonic-gate 		 * Try/Retry any pending failovers / failbacks, that did not
4880Sstevel@tonic-gate 		 * not complete, or that could not be initiated previously.
4890Sstevel@tonic-gate 		 * This implements the 3 invariants described in the big block
4900Sstevel@tonic-gate 		 * comment at the beginning of probe.c
4910Sstevel@tonic-gate 		 */
4920Sstevel@tonic-gate 		if (pi->pi_flags & IFF_INACTIVE) {
493704Sethindra 			if (!pi->pi_empty && (pi->pi_flags & IFF_STANDBY))
4940Sstevel@tonic-gate 				(void) try_failover(pi, FAILOVER_TO_NONSTANDBY);
4950Sstevel@tonic-gate 		} else {
4960Sstevel@tonic-gate 			struct phyint_instance *pii;
4970Sstevel@tonic-gate 
4982302Sudpa 			/*
4992302Sudpa 			 * Skip interfaces which are not capable of probing,
5002302Sudpa 			 * and interfaces that have downed links (as we will
5012302Sudpa 			 * not get any response).
5022302Sudpa 			 */
5032302Sudpa 			if (LINK_DOWN(pi))
5042302Sudpa 				continue;
5052302Sudpa 
5060Sstevel@tonic-gate 			pii = pi->pi_v4;
5072302Sudpa 			if (!PROBE_CAPABLE(pii)) {
5080Sstevel@tonic-gate 				pii = pi->pi_v6;
5092302Sudpa 				if (!PROBE_CAPABLE(pii))
5102302Sudpa 					continue;
5112302Sudpa 			}
5122302Sudpa 
5130Sstevel@tonic-gate 			/*
5140Sstevel@tonic-gate 			 * It is possible that the phyint has started
5150Sstevel@tonic-gate 			 * receiving packets, after it has been marked
5160Sstevel@tonic-gate 			 * PI_FAILED. Don't initiate failover, if the
5170Sstevel@tonic-gate 			 * phyint has started recovering. failure_state()
5180Sstevel@tonic-gate 			 * captures this check. A similar logic is used
5190Sstevel@tonic-gate 			 * for failback/repair case.
5200Sstevel@tonic-gate 			 */
5210Sstevel@tonic-gate 			if (pi->pi_state == PI_FAILED && !pi->pi_empty &&
5220Sstevel@tonic-gate 			    (failure_state(pii) == PHYINT_FAILURE)) {
5230Sstevel@tonic-gate 				(void) try_failover(pi, FAILOVER_NORMAL);
5240Sstevel@tonic-gate 			} else if (pi->pi_state == PI_RUNNING && !pi->pi_full) {
525*2496Smeem 				if (try_failback(pi) != IPMP_FAILURE) {
5260Sstevel@tonic-gate 					(void) change_lif_flags(pi, IFF_FAILED,
5270Sstevel@tonic-gate 					    _B_FALSE);
5280Sstevel@tonic-gate 					/* Per state diagram */
5290Sstevel@tonic-gate 					pi->pi_empty = 0;
5300Sstevel@tonic-gate 				}
5310Sstevel@tonic-gate 			}
5320Sstevel@tonic-gate 		}
5330Sstevel@tonic-gate 	}
5340Sstevel@tonic-gate }
5350Sstevel@tonic-gate 
5360Sstevel@tonic-gate /*
537*2496Smeem  * Check that a given test address is unique across all of the interfaces in a
538*2496Smeem  * group.  (e.g., IPv6 link-locals may not be inherently unique, and binding
539*2496Smeem  * to such an (IFF_NOFAILOVER) address can produce unexpected results.)
540*2496Smeem  * Log an error and alert the user.
5410Sstevel@tonic-gate  */
5420Sstevel@tonic-gate static void
543*2496Smeem check_addr_unique(struct phyint_instance *ourpii, struct sockaddr_storage *ss)
5440Sstevel@tonic-gate {
545*2496Smeem 	struct phyint		*pi;
546*2496Smeem 	struct phyint_group	*pg;
547*2496Smeem 	struct in6_addr		addr;
5480Sstevel@tonic-gate 	struct phyint_instance	*pii;
5490Sstevel@tonic-gate 	struct sockaddr_in	*sin;
550*2496Smeem 	char			abuf[INET6_ADDRSTRLEN];
551*2496Smeem 
552*2496Smeem 	if (ss->ss_family == AF_INET) {
553*2496Smeem 		sin = (struct sockaddr_in *)ss;
5540Sstevel@tonic-gate 		IN6_INADDR_TO_V4MAPPED(&sin->sin_addr, &addr);
5550Sstevel@tonic-gate 	} else {
556*2496Smeem 		assert(ss->ss_family == AF_INET6);
557*2496Smeem 		addr = ((struct sockaddr_in6 *)ss)->sin6_addr;
5580Sstevel@tonic-gate 	}
5590Sstevel@tonic-gate 
5600Sstevel@tonic-gate 	/*
561*2496Smeem 	 * For anonymous groups, every interface is assumed to be on its own
562*2496Smeem 	 * link, so there is no chance of overlapping addresses.
5630Sstevel@tonic-gate 	 */
564*2496Smeem 	pg = ourpii->pii_phyint->pi_group;
565*2496Smeem 	if (pg == phyint_anongroup)
566*2496Smeem 		return;
567*2496Smeem 
568*2496Smeem 	/*
569*2496Smeem 	 * Walk the list of phyint instances in the group and check for test
570*2496Smeem 	 * addresses matching ours.  Of course, we skip ourself.
571*2496Smeem 	 */
572*2496Smeem 	for (pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext) {
573*2496Smeem 		pii = PHYINT_INSTANCE(pi, ss->ss_family);
574*2496Smeem 		if (pii == NULL || pii == ourpii ||
575*2496Smeem 		    pii->pii_probe_logint == NULL)
5760Sstevel@tonic-gate 			continue;
5770Sstevel@tonic-gate 
5780Sstevel@tonic-gate 		if (!IN6_ARE_ADDR_EQUAL(&addr,
5790Sstevel@tonic-gate 		    &pii->pii_probe_logint->li_addr)) {
5800Sstevel@tonic-gate 			continue;
5810Sstevel@tonic-gate 		}
5820Sstevel@tonic-gate 
5830Sstevel@tonic-gate 		/*
5840Sstevel@tonic-gate 		 * This test address is not unique. Set the dupaddr bit
585*2496Smeem 		 * and log an error message if not already logged.
5860Sstevel@tonic-gate 		 */
5870Sstevel@tonic-gate 		pii->pii_probe_logint->li_dupaddr = 1;
588*2496Smeem 		if (!pii->pii_probe_logint->li_dupaddrmsg_printed) {
589*2496Smeem 			logerr("Test address %s is not unique in group; "
590*2496Smeem 			    "disabling probe-based failure detection on %s\n",
591*2496Smeem 			    pr_addr(ss->ss_family, addr, abuf, sizeof (abuf)),
592*2496Smeem 			    pii->pii_phyint->pi_name);
593*2496Smeem 			pii->pii_probe_logint->li_dupaddrmsg_printed = 1;
594*2496Smeem 		}
5950Sstevel@tonic-gate 	}
5960Sstevel@tonic-gate }
5970Sstevel@tonic-gate 
5980Sstevel@tonic-gate /*
5990Sstevel@tonic-gate  * Stop probing an interface.  Called when an interface is offlined.
6000Sstevel@tonic-gate  * The probe socket is closed on each interface instance, and the
6010Sstevel@tonic-gate  * interface state set to PI_OFFLINE.
6020Sstevel@tonic-gate  */
6030Sstevel@tonic-gate static void
6040Sstevel@tonic-gate stop_probing(struct phyint *pi)
6050Sstevel@tonic-gate {
6060Sstevel@tonic-gate 	struct phyint_instance *pii;
6070Sstevel@tonic-gate 
6080Sstevel@tonic-gate 	pii = pi->pi_v4;
6090Sstevel@tonic-gate 	if (pii != NULL) {
6100Sstevel@tonic-gate 		if (pii->pii_probe_sock != -1)
6110Sstevel@tonic-gate 			close_probe_socket(pii, _B_TRUE);
6120Sstevel@tonic-gate 		pii->pii_probe_logint = NULL;
6130Sstevel@tonic-gate 	}
6140Sstevel@tonic-gate 
6150Sstevel@tonic-gate 	pii = pi->pi_v6;
6160Sstevel@tonic-gate 	if (pii != NULL) {
6170Sstevel@tonic-gate 		if (pii->pii_probe_sock != -1)
6180Sstevel@tonic-gate 			close_probe_socket(pii, _B_TRUE);
6190Sstevel@tonic-gate 		pii->pii_probe_logint = NULL;
6200Sstevel@tonic-gate 	}
6210Sstevel@tonic-gate 
6220Sstevel@tonic-gate 	phyint_chstate(pi, PI_OFFLINE);
6230Sstevel@tonic-gate }
6240Sstevel@tonic-gate 
6252074Smeem enum { BAD_TESTFLAGS, OK_TESTFLAGS, BEST_TESTFLAGS };
6262074Smeem 
6270Sstevel@tonic-gate /*
6282074Smeem  * Rate the provided test flags.  By definition, IFF_NOFAILOVER must be set.
6292074Smeem  * IFF_UP must also be set so that the associated address can be used as a
6302074Smeem  * source address.  Further, we must be able to exchange packets with local
6312074Smeem  * destinations, so IFF_NOXMIT and IFF_NOLOCAL must be clear.  For historical
6322074Smeem  * reasons, we have a proclivity for IFF_DEPRECATED IPv4 test addresses.
6332074Smeem  */
6342074Smeem static int
6352074Smeem rate_testflags(uint64_t flags)
6362074Smeem {
6372074Smeem 	if ((flags & (IFF_NOFAILOVER | IFF_UP)) != (IFF_NOFAILOVER | IFF_UP))
6382074Smeem 		return (BAD_TESTFLAGS);
6392074Smeem 
6402074Smeem 	if ((flags & (IFF_NOXMIT | IFF_NOLOCAL)) != 0)
6412074Smeem 		return (BAD_TESTFLAGS);
6422074Smeem 
6432074Smeem 	if ((flags & (IFF_IPV6 | IFF_DEPRECATED)) == IFF_DEPRECATED)
6442074Smeem 		return (BEST_TESTFLAGS);
6452074Smeem 
6462074Smeem 	if ((flags & (IFF_IPV6 | IFF_DEPRECATED)) == IFF_IPV6)
6472074Smeem 		return (BEST_TESTFLAGS);
6482074Smeem 
6492074Smeem 	return (OK_TESTFLAGS);
6502074Smeem }
6512074Smeem 
6522074Smeem /*
6532074Smeem  * Attempt to select a test address for each phyint instance.
6542074Smeem  * Call phyint_inst_sockinit() to complete the initializations.
6550Sstevel@tonic-gate  */
6560Sstevel@tonic-gate static void
6570Sstevel@tonic-gate select_test_ifs(void)
6580Sstevel@tonic-gate {
6590Sstevel@tonic-gate 	struct phyint		*pi;
6600Sstevel@tonic-gate 	struct phyint_instance	*pii;
6610Sstevel@tonic-gate 	struct phyint_instance	*next_pii;
6622074Smeem 	struct logint		*li;
6632074Smeem 	struct logint  		*probe_logint;
6642074Smeem 	boolean_t		target_scan_reqd = _B_FALSE;
6652074Smeem 	struct target		*tg;
6662074Smeem 	int			rating;
6670Sstevel@tonic-gate 
6680Sstevel@tonic-gate 	if (debug & D_PHYINT)
6690Sstevel@tonic-gate 		logdebug("select_test_ifs\n");
6700Sstevel@tonic-gate 
6710Sstevel@tonic-gate 	/*
6720Sstevel@tonic-gate 	 * For each phyint instance, do the test address selection
6730Sstevel@tonic-gate 	 */
6740Sstevel@tonic-gate 	for (pii = phyint_instances; pii != NULL; pii = next_pii) {
6750Sstevel@tonic-gate 		next_pii = pii->pii_next;
6762074Smeem 		probe_logint = NULL;
6772074Smeem 
6780Sstevel@tonic-gate 		/*
6790Sstevel@tonic-gate 		 * An interface that is offline, should not be probed.
6800Sstevel@tonic-gate 		 * Offline interfaces should always in PI_OFFLINE state,
6810Sstevel@tonic-gate 		 * unless some other entity has set the offline flag.
6820Sstevel@tonic-gate 		 */
6830Sstevel@tonic-gate 		if (pii->pii_phyint->pi_flags & IFF_OFFLINE) {
6840Sstevel@tonic-gate 			if (pii->pii_phyint->pi_state != PI_OFFLINE) {
6850Sstevel@tonic-gate 				logerr("shouldn't be probing offline"
6860Sstevel@tonic-gate 					" interface %s (state is: %u)."
6870Sstevel@tonic-gate 					" Stopping probes.\n",
6880Sstevel@tonic-gate 					pii->pii_phyint->pi_name,
6890Sstevel@tonic-gate 					pii->pii_phyint->pi_state);
6900Sstevel@tonic-gate 				stop_probing(pii->pii_phyint);
6910Sstevel@tonic-gate 			}
6920Sstevel@tonic-gate 			continue;
6930Sstevel@tonic-gate 		}
6940Sstevel@tonic-gate 
6952074Smeem 		li = pii->pii_probe_logint;
6962074Smeem 		if (li != NULL) {
6970Sstevel@tonic-gate 			/*
6982074Smeem 			 * We've already got a test address; only proceed
6992074Smeem 			 * if it's suboptimal.
7000Sstevel@tonic-gate 			 */
7012074Smeem 			if (rate_testflags(li->li_flags) == BEST_TESTFLAGS)
7022074Smeem 				continue;
7030Sstevel@tonic-gate 		}
7040Sstevel@tonic-gate 
7050Sstevel@tonic-gate 		/*
7060Sstevel@tonic-gate 		 * Walk the logints of this phyint instance, and select
7070Sstevel@tonic-gate 		 * the best available test address
7080Sstevel@tonic-gate 		 */
7090Sstevel@tonic-gate 		for (li = pii->pii_logint; li != NULL; li = li->li_next) {
7100Sstevel@tonic-gate 			/*
711*2496Smeem 			 * Skip 0.0.0.0 addresses, as those are never
712*2496Smeem 			 * actually usable.
713*2496Smeem 			 */
714*2496Smeem 			if (pii->pii_af == AF_INET &&
715*2496Smeem 			    IN6_IS_ADDR_V4MAPPED_ANY(&li->li_addr))
716*2496Smeem 				continue;
717*2496Smeem 
718*2496Smeem 			/*
7190Sstevel@tonic-gate 			 * Skip any IPv6 logints that are not link-local,
7200Sstevel@tonic-gate 			 * since we should always have a link-local address
7210Sstevel@tonic-gate 			 * anyway and in6_data() expects link-local replies.
7220Sstevel@tonic-gate 			 */
7230Sstevel@tonic-gate 			if (pii->pii_af == AF_INET6 &&
7240Sstevel@tonic-gate 			    !IN6_IS_ADDR_LINKLOCAL(&li->li_addr))
7250Sstevel@tonic-gate 				continue;
7260Sstevel@tonic-gate 
7272074Smeem 			/*
7282074Smeem 			 * Rate the testflags. If we've found an optimal
7292074Smeem 			 * match, then break out; otherwise, record the most
7302074Smeem 			 * recent OK one.
7312074Smeem 			 */
7322074Smeem 			rating = rate_testflags(li->li_flags);
7332074Smeem 			if (rating == BAD_TESTFLAGS)
7342074Smeem 				continue;
7352074Smeem 
7362074Smeem 			probe_logint = li;
7372074Smeem 			if (rating == BEST_TESTFLAGS)
7382074Smeem 				break;
7390Sstevel@tonic-gate 		}
7400Sstevel@tonic-gate 
7410Sstevel@tonic-gate 		/*
7422074Smeem 		 * If the probe logint has changed, ditch the old one.
7430Sstevel@tonic-gate 		 */
7442074Smeem 		if (pii->pii_probe_logint != NULL &&
7452074Smeem 		    pii->pii_probe_logint != probe_logint) {
7460Sstevel@tonic-gate 			if (pii->pii_probe_sock != -1)
7470Sstevel@tonic-gate 				close_probe_socket(pii, _B_TRUE);
7480Sstevel@tonic-gate 			pii->pii_probe_logint = NULL;
7490Sstevel@tonic-gate 		}
7500Sstevel@tonic-gate 
7512074Smeem 		if (probe_logint == NULL) {
7520Sstevel@tonic-gate 			/*
7530Sstevel@tonic-gate 			 * We don't have a test address. Don't print an
7540Sstevel@tonic-gate 			 * error message immediately. check_config() will
7550Sstevel@tonic-gate 			 * take care of it. Zero out the probe stats array
7560Sstevel@tonic-gate 			 * since it is no longer relevant. Optimize by
7570Sstevel@tonic-gate 			 * checking if it is already zeroed out.
7580Sstevel@tonic-gate 			 */
7590Sstevel@tonic-gate 			int pr_ndx;
7600Sstevel@tonic-gate 
7610Sstevel@tonic-gate 			pr_ndx = PROBE_INDEX_PREV(pii->pii_probe_next);
7620Sstevel@tonic-gate 			if (pii->pii_probes[pr_ndx].pr_status != PR_UNUSED) {
7630Sstevel@tonic-gate 				clear_pii_probe_stats(pii);
7640Sstevel@tonic-gate 				reset_crtt_all(pii->pii_phyint);
7650Sstevel@tonic-gate 			}
7660Sstevel@tonic-gate 			continue;
7672074Smeem 		} else if (probe_logint == pii->pii_probe_logint) {
7680Sstevel@tonic-gate 			/*
7690Sstevel@tonic-gate 			 * If we didn't find any new test addr, go to the
7700Sstevel@tonic-gate 			 * next phyint.
7710Sstevel@tonic-gate 			 */
7720Sstevel@tonic-gate 			continue;
7730Sstevel@tonic-gate 		}
7740Sstevel@tonic-gate 
7750Sstevel@tonic-gate 		/*
7760Sstevel@tonic-gate 		 * The phyint is either being assigned a new testaddr
7770Sstevel@tonic-gate 		 * or is being assigned a testaddr for the 1st time.
7780Sstevel@tonic-gate 		 * Need to initialize the phyint socket
7790Sstevel@tonic-gate 		 */
7802074Smeem 		pii->pii_probe_logint = probe_logint;
7810Sstevel@tonic-gate 		if (!phyint_inst_sockinit(pii)) {
7820Sstevel@tonic-gate 			if (debug & D_PHYINT) {
7830Sstevel@tonic-gate 				logdebug("select_test_ifs: "
7840Sstevel@tonic-gate 				    "phyint_sockinit failed\n");
7850Sstevel@tonic-gate 			}
7860Sstevel@tonic-gate 			phyint_inst_delete(pii);
7870Sstevel@tonic-gate 			continue;
7880Sstevel@tonic-gate 		}
7890Sstevel@tonic-gate 
7900Sstevel@tonic-gate 		/*
7910Sstevel@tonic-gate 		 * This phyint instance is now enabled for probes; this
7920Sstevel@tonic-gate 		 * impacts our state machine in two ways:
7930Sstevel@tonic-gate 		 *
7940Sstevel@tonic-gate 		 * 1. If we're probe *capable* as well (i.e., we have
7950Sstevel@tonic-gate 		 *    probe targets) and the interface is in PI_NOTARGETS,
7960Sstevel@tonic-gate 		 *    then transition to PI_RUNNING.
7970Sstevel@tonic-gate 		 *
7980Sstevel@tonic-gate 		 * 2. If we're not probe capable, and the other phyint
7990Sstevel@tonic-gate 		 *    instance is also not probe capable, and we were in
8000Sstevel@tonic-gate 		 *    PI_RUNNING, then transition to PI_NOTARGETS.
8010Sstevel@tonic-gate 		 *
8020Sstevel@tonic-gate 		 * Also see the state diagram in mpd_probe.c.
8030Sstevel@tonic-gate 		 */
8040Sstevel@tonic-gate 		if (PROBE_CAPABLE(pii)) {
8050Sstevel@tonic-gate 			if (pii->pii_phyint->pi_state == PI_NOTARGETS)
8060Sstevel@tonic-gate 				phyint_chstate(pii->pii_phyint, PI_RUNNING);
8070Sstevel@tonic-gate 		} else if (!PROBE_CAPABLE(phyint_inst_other(pii))) {
8080Sstevel@tonic-gate 			if (pii->pii_phyint->pi_state == PI_RUNNING)
8090Sstevel@tonic-gate 				phyint_chstate(pii->pii_phyint, PI_NOTARGETS);
8100Sstevel@tonic-gate 		}
8110Sstevel@tonic-gate 
8120Sstevel@tonic-gate 		if (pii->pii_phyint->pi_flags & IFF_POINTOPOINT) {
8130Sstevel@tonic-gate 			tg = pii->pii_targets;
8140Sstevel@tonic-gate 			if (tg != NULL)
8150Sstevel@tonic-gate 				target_delete(tg);
8160Sstevel@tonic-gate 			assert(pii->pii_targets == NULL);
8170Sstevel@tonic-gate 			assert(pii->pii_target_next == NULL);
8180Sstevel@tonic-gate 			assert(pii->pii_ntargets == 0);
8192074Smeem 			target_create(pii, probe_logint->li_dstaddr,
8200Sstevel@tonic-gate 			    _B_TRUE);
8210Sstevel@tonic-gate 		}
8220Sstevel@tonic-gate 
8230Sstevel@tonic-gate 		/*
8240Sstevel@tonic-gate 		 * If no targets are currently known for this phyint
8250Sstevel@tonic-gate 		 * we need to call init_router_targets. Since
8260Sstevel@tonic-gate 		 * init_router_targets() initializes the list of targets
8270Sstevel@tonic-gate 		 * for all phyints it is done below the loop.
8280Sstevel@tonic-gate 		 */
8290Sstevel@tonic-gate 		if (pii->pii_targets == NULL)
8300Sstevel@tonic-gate 			target_scan_reqd = _B_TRUE;
8310Sstevel@tonic-gate 
8320Sstevel@tonic-gate 		/*
8330Sstevel@tonic-gate 		 * Start the probe timer for this instance.
8340Sstevel@tonic-gate 		 */
835*2496Smeem 		if (!pii->pii_basetime_inited && PROBE_ENABLED(pii)) {
8360Sstevel@tonic-gate 			start_timer(pii);
8370Sstevel@tonic-gate 			pii->pii_basetime_inited = 1;
8380Sstevel@tonic-gate 		}
8390Sstevel@tonic-gate 	}
8400Sstevel@tonic-gate 
8410Sstevel@tonic-gate 	/*
8420Sstevel@tonic-gate 	 * Check the interface list for any interfaces that are marked
8430Sstevel@tonic-gate 	 * PI_FAILED but no longer enabled to send probes, and call
8440Sstevel@tonic-gate 	 * phyint_check_for_repair() to see if the link now indicates that the
8450Sstevel@tonic-gate 	 * interface should be repaired.  Also see the state diagram in
8460Sstevel@tonic-gate 	 * mpd_probe.c.
8470Sstevel@tonic-gate 	 */
8480Sstevel@tonic-gate 	for (pi = phyints; pi != NULL; pi = pi->pi_next) {
8490Sstevel@tonic-gate 		if (pi->pi_state == PI_FAILED &&
8500Sstevel@tonic-gate 		    !PROBE_ENABLED(pi->pi_v4) && !PROBE_ENABLED(pi->pi_v6)) {
8510Sstevel@tonic-gate 			phyint_check_for_repair(pi);
8520Sstevel@tonic-gate 		}
8530Sstevel@tonic-gate 	}
8540Sstevel@tonic-gate 
8550Sstevel@tonic-gate 	/*
8560Sstevel@tonic-gate 	 * Try to populate the target list. init_router_targets populates
8570Sstevel@tonic-gate 	 * the target list from the routing table. If our target list is
8580Sstevel@tonic-gate 	 * still empty, init_host_targets adds host targets based on the
8590Sstevel@tonic-gate 	 * host target list of other phyints in the group.
8600Sstevel@tonic-gate 	 */
8610Sstevel@tonic-gate 	if (target_scan_reqd) {
8620Sstevel@tonic-gate 		init_router_targets();
8630Sstevel@tonic-gate 		init_host_targets();
8640Sstevel@tonic-gate 	}
8650Sstevel@tonic-gate }
8660Sstevel@tonic-gate 
8670Sstevel@tonic-gate /*
8680Sstevel@tonic-gate  * Check phyint group configuration, to detect any inconsistencies,
8690Sstevel@tonic-gate  * and log an error message. This is called from runtimeouts every
8700Sstevel@tonic-gate  * 20 secs. But the error message is displayed once. If the
8710Sstevel@tonic-gate  * consistency is resolved by the admin, a recovery message is displayed
8720Sstevel@tonic-gate  * once.
8730Sstevel@tonic-gate  */
8740Sstevel@tonic-gate static void
8750Sstevel@tonic-gate check_config(void)
8760Sstevel@tonic-gate {
8770Sstevel@tonic-gate 	struct phyint_group *pg;
8780Sstevel@tonic-gate 	struct phyint *pi;
8790Sstevel@tonic-gate 	boolean_t v4_in_group;
8800Sstevel@tonic-gate 	boolean_t v6_in_group;
8810Sstevel@tonic-gate 
8820Sstevel@tonic-gate 	/*
8830Sstevel@tonic-gate 	 * All phyints of a group must be homogenous to ensure that
8840Sstevel@tonic-gate 	 * failover or failback can be done. If any phyint in a group
8850Sstevel@tonic-gate 	 * has IPv4 plumbed, check that all phyints have IPv4 plumbed.
8860Sstevel@tonic-gate 	 * Do a similar check for IPv6.
8870Sstevel@tonic-gate 	 */
8880Sstevel@tonic-gate 	for (pg = phyint_groups; pg != NULL; pg = pg->pg_next) {
8890Sstevel@tonic-gate 		if (pg == phyint_anongroup)
8900Sstevel@tonic-gate 			continue;
8910Sstevel@tonic-gate 
8920Sstevel@tonic-gate 		v4_in_group = _B_FALSE;
8930Sstevel@tonic-gate 		v6_in_group = _B_FALSE;
8940Sstevel@tonic-gate 		/*
8950Sstevel@tonic-gate 		 * 1st pass. Determine if at least 1 phyint in the group
8960Sstevel@tonic-gate 		 * has IPv4 plumbed and if so set v4_in_group to true.
8970Sstevel@tonic-gate 		 * Repeat similarly for IPv6.
8980Sstevel@tonic-gate 		 */
8990Sstevel@tonic-gate 		for (pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext) {
9000Sstevel@tonic-gate 			if (pi->pi_v4 != NULL)
9010Sstevel@tonic-gate 				v4_in_group = _B_TRUE;
9020Sstevel@tonic-gate 			if (pi->pi_v6 != NULL)
9030Sstevel@tonic-gate 				v6_in_group = _B_TRUE;
9040Sstevel@tonic-gate 		}
9050Sstevel@tonic-gate 
9060Sstevel@tonic-gate 		/*
9070Sstevel@tonic-gate 		 * 2nd pass. If v4_in_group is true, check that phyint
9080Sstevel@tonic-gate 		 * has IPv4 plumbed. Repeat similarly for IPv6. Print
9090Sstevel@tonic-gate 		 * out a message the 1st time only.
9100Sstevel@tonic-gate 		 */
9110Sstevel@tonic-gate 		for (pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext) {
9120Sstevel@tonic-gate 			if (pi->pi_flags & IFF_OFFLINE)
9130Sstevel@tonic-gate 				continue;
9140Sstevel@tonic-gate 
9150Sstevel@tonic-gate 			if (v4_in_group == _B_TRUE && pi->pi_v4 == NULL) {
9160Sstevel@tonic-gate 				if (!pi->pi_cfgmsg_printed) {
9170Sstevel@tonic-gate 					logerr("NIC %s of group %s is"
9180Sstevel@tonic-gate 					    " not plumbed for IPv4 and may"
9190Sstevel@tonic-gate 					    " affect failover capability\n",
9200Sstevel@tonic-gate 					    pi->pi_name,
9210Sstevel@tonic-gate 					    pi->pi_group->pg_name);
9220Sstevel@tonic-gate 					pi->pi_cfgmsg_printed = 1;
9230Sstevel@tonic-gate 				}
9240Sstevel@tonic-gate 			} else if (v6_in_group == _B_TRUE &&
9250Sstevel@tonic-gate 			    pi->pi_v6 == NULL) {
9260Sstevel@tonic-gate 				if (!pi->pi_cfgmsg_printed) {
9270Sstevel@tonic-gate 					logerr("NIC %s of group %s is"
9280Sstevel@tonic-gate 					    " not plumbed for IPv6 and may"
9290Sstevel@tonic-gate 					    " affect failover capability\n",
9300Sstevel@tonic-gate 					    pi->pi_name,
9310Sstevel@tonic-gate 					    pi->pi_group->pg_name);
9320Sstevel@tonic-gate 					pi->pi_cfgmsg_printed = 1;
9330Sstevel@tonic-gate 				}
9340Sstevel@tonic-gate 			} else {
9350Sstevel@tonic-gate 				/*
9360Sstevel@tonic-gate 				 * The phyint matches the group configuration,
9370Sstevel@tonic-gate 				 * if we have reached this point. If it was
9380Sstevel@tonic-gate 				 * improperly configured earlier, log an
9390Sstevel@tonic-gate 				 * error recovery message
9400Sstevel@tonic-gate 				 */
9410Sstevel@tonic-gate 				if (pi->pi_cfgmsg_printed) {
9420Sstevel@tonic-gate 					logerr("NIC %s is now consistent with "
9430Sstevel@tonic-gate 					    "group %s and failover capability "
9440Sstevel@tonic-gate 					    "is restored\n", pi->pi_name,
9450Sstevel@tonic-gate 					    pi->pi_group->pg_name);
9460Sstevel@tonic-gate 					pi->pi_cfgmsg_printed = 0;
9470Sstevel@tonic-gate 				}
9480Sstevel@tonic-gate 			}
9490Sstevel@tonic-gate 
9500Sstevel@tonic-gate 		}
9510Sstevel@tonic-gate 	}
9520Sstevel@tonic-gate 
9530Sstevel@tonic-gate 	/*
9540Sstevel@tonic-gate 	 * In order to perform probe-based failure detection, a phyint must
9550Sstevel@tonic-gate 	 * have at least 1 test/probe address for sending and receiving probes
9560Sstevel@tonic-gate 	 * (either on IPv4 or IPv6 instance or both).  If no test address has
9570Sstevel@tonic-gate 	 * been configured, notify the administrator, but continue on since we
9580Sstevel@tonic-gate 	 * can still perform load spreading, along with "link up/down" based
9590Sstevel@tonic-gate 	 * failure detection.
9600Sstevel@tonic-gate 	 */
9610Sstevel@tonic-gate 	for (pi = phyints; pi != NULL; pi = pi->pi_next) {
9620Sstevel@tonic-gate 		if (pi->pi_flags & IFF_OFFLINE)
9630Sstevel@tonic-gate 			continue;
9640Sstevel@tonic-gate 
9650Sstevel@tonic-gate 		if ((pi->pi_v4 == NULL ||
9660Sstevel@tonic-gate 		    pi->pi_v4->pii_probe_logint == NULL) &&
9670Sstevel@tonic-gate 		    (pi->pi_v6 == NULL ||
9680Sstevel@tonic-gate 		    pi->pi_v6->pii_probe_logint == NULL)) {
9690Sstevel@tonic-gate 			if (!pi->pi_taddrmsg_printed) {
9700Sstevel@tonic-gate 				logerr("No test address configured on "
9710Sstevel@tonic-gate 				    "interface %s; disabling probe-based "
9720Sstevel@tonic-gate 				    "failure detection on it\n", pi->pi_name);
9730Sstevel@tonic-gate 				pi->pi_taddrmsg_printed = 1;
9740Sstevel@tonic-gate 			}
9750Sstevel@tonic-gate 		} else if (pi->pi_taddrmsg_printed) {
9760Sstevel@tonic-gate 			logerr("Test address now configured on interface %s; "
9770Sstevel@tonic-gate 			    "enabling probe-based failure detection on it\n",
9780Sstevel@tonic-gate 			    pi->pi_name);
9790Sstevel@tonic-gate 			pi->pi_taddrmsg_printed = 0;
9800Sstevel@tonic-gate 		}
9810Sstevel@tonic-gate 
9820Sstevel@tonic-gate 	}
9830Sstevel@tonic-gate }
9840Sstevel@tonic-gate 
9850Sstevel@tonic-gate /*
9860Sstevel@tonic-gate  * Timer mechanism using relative time (in milliseconds) from the
9870Sstevel@tonic-gate  * previous timer event. Timers exceeding TIMER_INFINITY milliseconds
9880Sstevel@tonic-gate  * will fire after TIMER_INFINITY milliseconds.
9890Sstevel@tonic-gate  * Unsigned arithmetic note: We assume a 32-bit circular sequence space for
9900Sstevel@tonic-gate  * time values. Hence 2 consecutive timer events cannot be spaced farther
9910Sstevel@tonic-gate  * than 0x7fffffff. We call this TIMER_INFINITY, and it is the maximum value
9920Sstevel@tonic-gate  * that can be passed for the delay parameter of timer_schedule()
9930Sstevel@tonic-gate  */
9940Sstevel@tonic-gate static uint_t timer_next;	/* Currently scheduled timeout */
9950Sstevel@tonic-gate static boolean_t timer_active = _B_FALSE; /* SIGALRM has not yet occurred */
9960Sstevel@tonic-gate 
9970Sstevel@tonic-gate static void
9980Sstevel@tonic-gate timer_init(void)
9990Sstevel@tonic-gate {
10000Sstevel@tonic-gate 	timer_next = getcurrenttime() + TIMER_INFINITY;
10010Sstevel@tonic-gate 	/*
10020Sstevel@tonic-gate 	 * The call to run_timeouts() will get the timer started
10030Sstevel@tonic-gate 	 * Since there are no phyints at this point, the timer will
10040Sstevel@tonic-gate 	 * be set for IF_SCAN_INTERVAL ms.
10050Sstevel@tonic-gate 	 */
10060Sstevel@tonic-gate 	run_timeouts();
10070Sstevel@tonic-gate }
10080Sstevel@tonic-gate 
10090Sstevel@tonic-gate /*
10100Sstevel@tonic-gate  * Make sure the next SIGALRM occurs delay milliseconds from the current
10110Sstevel@tonic-gate  * time if not earlier. We are interested only in time differences.
10120Sstevel@tonic-gate  */
10130Sstevel@tonic-gate void
10140Sstevel@tonic-gate timer_schedule(uint_t delay)
10150Sstevel@tonic-gate {
10160Sstevel@tonic-gate 	uint_t now;
10170Sstevel@tonic-gate 	struct itimerval itimerval;
10180Sstevel@tonic-gate 
10190Sstevel@tonic-gate 	if (debug & D_TIMER)
10200Sstevel@tonic-gate 		logdebug("timer_schedule(%u)\n", delay);
10210Sstevel@tonic-gate 
10220Sstevel@tonic-gate 	assert(delay <= TIMER_INFINITY);
10230Sstevel@tonic-gate 
10240Sstevel@tonic-gate 	now = getcurrenttime();
10250Sstevel@tonic-gate 	if (delay == 0) {
10260Sstevel@tonic-gate 		/* Minimum allowed delay */
10270Sstevel@tonic-gate 		delay = 1;
10280Sstevel@tonic-gate 	}
10290Sstevel@tonic-gate 	/* Will this timer occur before the currently scheduled SIGALRM? */
10300Sstevel@tonic-gate 	if (timer_active && TIME_GE(now + delay, timer_next)) {
10310Sstevel@tonic-gate 		if (debug & D_TIMER) {
10320Sstevel@tonic-gate 			logdebug("timer_schedule(%u) - no action: "
10330Sstevel@tonic-gate 			    "now %u next %u\n", delay, now, timer_next);
10340Sstevel@tonic-gate 		}
10350Sstevel@tonic-gate 		return;
10360Sstevel@tonic-gate 	}
10370Sstevel@tonic-gate 	timer_next = now + delay;
10380Sstevel@tonic-gate 
10390Sstevel@tonic-gate 	itimerval.it_value.tv_sec = delay / 1000;
10400Sstevel@tonic-gate 	itimerval.it_value.tv_usec = (delay % 1000) * 1000;
10410Sstevel@tonic-gate 	itimerval.it_interval.tv_sec = 0;
10420Sstevel@tonic-gate 	itimerval.it_interval.tv_usec = 0;
10430Sstevel@tonic-gate 	if (debug & D_TIMER) {
10440Sstevel@tonic-gate 		logdebug("timer_schedule(%u): sec %ld usec %ld\n",
10450Sstevel@tonic-gate 		    delay, itimerval.it_value.tv_sec,
10460Sstevel@tonic-gate 		    itimerval.it_value.tv_usec);
10470Sstevel@tonic-gate 	}
10480Sstevel@tonic-gate 	timer_active = _B_TRUE;
10490Sstevel@tonic-gate 	if (setitimer(ITIMER_REAL, &itimerval, NULL) < 0) {
10500Sstevel@tonic-gate 		logperror("timer_schedule: setitimer");
10510Sstevel@tonic-gate 		exit(2);
10520Sstevel@tonic-gate 	}
10530Sstevel@tonic-gate }
10540Sstevel@tonic-gate 
10550Sstevel@tonic-gate /*
10560Sstevel@tonic-gate  * Timer has fired. Determine when the next timer event will occur by asking
10570Sstevel@tonic-gate  * all the timer routines. Should not be called from a timer routine.
10580Sstevel@tonic-gate  */
10590Sstevel@tonic-gate static void
10600Sstevel@tonic-gate run_timeouts(void)
10610Sstevel@tonic-gate {
10620Sstevel@tonic-gate 	uint_t next;
10630Sstevel@tonic-gate 	uint_t next_event_time;
10640Sstevel@tonic-gate 	struct phyint_instance *pii;
10650Sstevel@tonic-gate 	struct phyint_instance *next_pii;
10660Sstevel@tonic-gate 	static boolean_t timeout_running;
10670Sstevel@tonic-gate 
10680Sstevel@tonic-gate 	/* assert that recursive timeouts don't happen. */
10690Sstevel@tonic-gate 	assert(!timeout_running);
10700Sstevel@tonic-gate 
10710Sstevel@tonic-gate 	timeout_running = _B_TRUE;
10720Sstevel@tonic-gate 
10730Sstevel@tonic-gate 	if (debug & D_TIMER)
10740Sstevel@tonic-gate 		logdebug("run_timeouts()\n");
10750Sstevel@tonic-gate 
10760Sstevel@tonic-gate 	next = TIMER_INFINITY;
10770Sstevel@tonic-gate 
10780Sstevel@tonic-gate 	for (pii = phyint_instances; pii != NULL; pii = next_pii) {
10790Sstevel@tonic-gate 		next_pii = pii->pii_next;
10800Sstevel@tonic-gate 		next_event_time = phyint_inst_timer(pii);
10810Sstevel@tonic-gate 		if (next_event_time != TIMER_INFINITY && next_event_time < next)
10820Sstevel@tonic-gate 			next = next_event_time;
10830Sstevel@tonic-gate 
10840Sstevel@tonic-gate 		if (debug & D_TIMER) {
10850Sstevel@tonic-gate 			logdebug("run_timeouts(%s %s): next scheduled for"
10860Sstevel@tonic-gate 			    " this phyint inst %u, next scheduled global"
10870Sstevel@tonic-gate 			    " %u ms\n",
10880Sstevel@tonic-gate 			    AF_STR(pii->pii_af), pii->pii_phyint->pi_name,
10890Sstevel@tonic-gate 			    next_event_time, next);
10900Sstevel@tonic-gate 		}
10910Sstevel@tonic-gate 	}
10920Sstevel@tonic-gate 
10930Sstevel@tonic-gate 	/*
10940Sstevel@tonic-gate 	 * Make sure initifs() is called at least once every
10950Sstevel@tonic-gate 	 * IF_SCAN_INTERVAL, to make sure that we are in sync
10960Sstevel@tonic-gate 	 * with the kernel, in case we have missed any routing
10970Sstevel@tonic-gate 	 * socket messages.
10980Sstevel@tonic-gate 	 */
10990Sstevel@tonic-gate 	if (next > IF_SCAN_INTERVAL)
11000Sstevel@tonic-gate 		next = IF_SCAN_INTERVAL;
11010Sstevel@tonic-gate 
11020Sstevel@tonic-gate 	if ((getcurrenttime() - last_initifs_time) > IF_SCAN_INTERVAL) {
11030Sstevel@tonic-gate 		initifs();
11040Sstevel@tonic-gate 		check_config();
11050Sstevel@tonic-gate 	}
11060Sstevel@tonic-gate 
11070Sstevel@tonic-gate 	if (debug & D_TIMER)
11080Sstevel@tonic-gate 		logdebug("run_timeouts: %u ms\n", next);
11090Sstevel@tonic-gate 
11100Sstevel@tonic-gate 	timer_schedule(next);
11110Sstevel@tonic-gate 	timeout_running = _B_FALSE;
11120Sstevel@tonic-gate }
11130Sstevel@tonic-gate 
11140Sstevel@tonic-gate static int eventpipe_read = -1;	/* Used for synchronous signal delivery */
11150Sstevel@tonic-gate static int eventpipe_write = -1;
11160Sstevel@tonic-gate static boolean_t cleanup_started = _B_FALSE;
11170Sstevel@tonic-gate 				/* Don't write to eventpipe if in cleanup */
11180Sstevel@tonic-gate /*
11190Sstevel@tonic-gate  * Ensure that signals are processed synchronously with the rest of
11200Sstevel@tonic-gate  * the code by just writing a one character signal number on the pipe.
11210Sstevel@tonic-gate  * The poll loop will pick this up and process the signal event.
11220Sstevel@tonic-gate  */
11230Sstevel@tonic-gate static void
11240Sstevel@tonic-gate sig_handler(int signo)
11250Sstevel@tonic-gate {
11260Sstevel@tonic-gate 	uchar_t buf = (uchar_t)signo;
11270Sstevel@tonic-gate 
11280Sstevel@tonic-gate 	/*
11290Sstevel@tonic-gate 	 * Don't write to pipe if cleanup has already begun. cleanup()
11300Sstevel@tonic-gate 	 * might have closed the pipe already
11310Sstevel@tonic-gate 	 */
11320Sstevel@tonic-gate 	if (cleanup_started)
11330Sstevel@tonic-gate 		return;
11340Sstevel@tonic-gate 
11350Sstevel@tonic-gate 	if (eventpipe_write == -1) {
11360Sstevel@tonic-gate 		logerr("sig_handler: no pipe found\n");
11370Sstevel@tonic-gate 		return;
11380Sstevel@tonic-gate 	}
11390Sstevel@tonic-gate 	if (write(eventpipe_write, &buf, sizeof (buf)) < 0)
11400Sstevel@tonic-gate 		logperror("sig_handler: write");
11410Sstevel@tonic-gate }
11420Sstevel@tonic-gate 
11430Sstevel@tonic-gate extern struct probes_missed probes_missed;
11440Sstevel@tonic-gate 
11450Sstevel@tonic-gate /*
11460Sstevel@tonic-gate  * Pick up a signal "byte" from the pipe and process it.
11470Sstevel@tonic-gate  */
11480Sstevel@tonic-gate static void
11490Sstevel@tonic-gate in_signal(int fd)
11500Sstevel@tonic-gate {
11510Sstevel@tonic-gate 	uchar_t buf;
11520Sstevel@tonic-gate 	uint64_t  sent, acked, lost, unacked, unknown;
11530Sstevel@tonic-gate 	struct phyint_instance *pii;
11540Sstevel@tonic-gate 	int pr_ndx;
11550Sstevel@tonic-gate 
11560Sstevel@tonic-gate 	switch (read(fd, &buf, sizeof (buf))) {
11570Sstevel@tonic-gate 	case -1:
11580Sstevel@tonic-gate 		logperror("in_signal: read");
11590Sstevel@tonic-gate 		exit(1);
11600Sstevel@tonic-gate 		/* NOTREACHED */
11610Sstevel@tonic-gate 	case 1:
11620Sstevel@tonic-gate 		break;
11630Sstevel@tonic-gate 	case 0:
11640Sstevel@tonic-gate 		logerr("in_signal: read end of file\n");
11650Sstevel@tonic-gate 		exit(1);
11660Sstevel@tonic-gate 		/* NOTREACHED */
11670Sstevel@tonic-gate 	default:
11680Sstevel@tonic-gate 		logerr("in_signal: read > 1\n");
11690Sstevel@tonic-gate 		exit(1);
11700Sstevel@tonic-gate 	}
11710Sstevel@tonic-gate 
11720Sstevel@tonic-gate 	if (debug & D_TIMER)
11730Sstevel@tonic-gate 		logdebug("in_signal() got %d\n", buf);
11740Sstevel@tonic-gate 
11750Sstevel@tonic-gate 	switch (buf) {
11760Sstevel@tonic-gate 	case SIGALRM:
11770Sstevel@tonic-gate 		if (debug & D_TIMER) {
11780Sstevel@tonic-gate 			uint_t now = getcurrenttime();
11790Sstevel@tonic-gate 
11800Sstevel@tonic-gate 			logdebug("in_signal(SIGALRM) delta %u\n",
11810Sstevel@tonic-gate 			    now - timer_next);
11820Sstevel@tonic-gate 		}
11830Sstevel@tonic-gate 		timer_active = _B_FALSE;
11840Sstevel@tonic-gate 		run_timeouts();
11850Sstevel@tonic-gate 		break;
11860Sstevel@tonic-gate 	case SIGUSR1:
11870Sstevel@tonic-gate 		logdebug("Printing configuration:\n");
11880Sstevel@tonic-gate 		/* Print out the internal tables */
11890Sstevel@tonic-gate 		phyint_inst_print_all();
11900Sstevel@tonic-gate 
11910Sstevel@tonic-gate 		/*
11920Sstevel@tonic-gate 		 * Print out the accumulated statistics about missed
11930Sstevel@tonic-gate 		 * probes (happens due to scheduling delay).
11940Sstevel@tonic-gate 		 */
11950Sstevel@tonic-gate 		logerr("Missed sending total of %d probes spread over"
11960Sstevel@tonic-gate 		    " %d occurrences\n", probes_missed.pm_nprobes,
11970Sstevel@tonic-gate 		    probes_missed.pm_ntimes);
11980Sstevel@tonic-gate 
11990Sstevel@tonic-gate 		/*
12000Sstevel@tonic-gate 		 * Print out the accumulated statistics about probes
12010Sstevel@tonic-gate 		 * that were sent.
12020Sstevel@tonic-gate 		 */
12030Sstevel@tonic-gate 		for (pii = phyint_instances; pii != NULL;
12040Sstevel@tonic-gate 		    pii = pii->pii_next) {
12050Sstevel@tonic-gate 			unacked = 0;
12060Sstevel@tonic-gate 			acked = pii->pii_cum_stats.acked;
12070Sstevel@tonic-gate 			lost = pii->pii_cum_stats.lost;
12080Sstevel@tonic-gate 			sent = pii->pii_cum_stats.sent;
12090Sstevel@tonic-gate 			unknown = pii->pii_cum_stats.unknown;
12100Sstevel@tonic-gate 			for (pr_ndx = 0; pr_ndx < PROBE_STATS_COUNT; pr_ndx++) {
12110Sstevel@tonic-gate 				switch (pii->pii_probes[pr_ndx].pr_status) {
12120Sstevel@tonic-gate 				case PR_ACKED:
12130Sstevel@tonic-gate 					acked++;
12140Sstevel@tonic-gate 					break;
12150Sstevel@tonic-gate 				case PR_LOST:
12160Sstevel@tonic-gate 					lost++;
12170Sstevel@tonic-gate 					break;
12180Sstevel@tonic-gate 				case PR_UNACKED:
12190Sstevel@tonic-gate 					unacked++;
12200Sstevel@tonic-gate 					break;
12210Sstevel@tonic-gate 				}
12220Sstevel@tonic-gate 			}
12230Sstevel@tonic-gate 			logerr("\nProbe stats on (%s %s)\n"
12240Sstevel@tonic-gate 			    "Number of probes sent %lld\n"
12250Sstevel@tonic-gate 			    "Number of probe acks received %lld\n"
12260Sstevel@tonic-gate 			    "Number of probes/acks lost %lld\n"
12270Sstevel@tonic-gate 			    "Number of valid unacknowled probes %lld\n"
12280Sstevel@tonic-gate 			    "Number of ambiguous probe acks received %lld\n",
12290Sstevel@tonic-gate 			    AF_STR(pii->pii_af), pii->pii_name,
12300Sstevel@tonic-gate 			    sent, acked, lost, unacked, unknown);
12310Sstevel@tonic-gate 		}
12320Sstevel@tonic-gate 		break;
12330Sstevel@tonic-gate 	case SIGHUP:
12340Sstevel@tonic-gate 		logerr("SIGHUP: restart and reread config file\n");
12350Sstevel@tonic-gate 		cleanup();
12360Sstevel@tonic-gate 		(void) execv(argv0[0], argv0);
12370Sstevel@tonic-gate 		_exit(0177);
12380Sstevel@tonic-gate 		/* NOTREACHED */
12390Sstevel@tonic-gate 	case SIGINT:
12400Sstevel@tonic-gate 	case SIGTERM:
12410Sstevel@tonic-gate 	case SIGQUIT:
12420Sstevel@tonic-gate 		cleanup();
12430Sstevel@tonic-gate 		exit(0);
12440Sstevel@tonic-gate 		/* NOTREACHED */
12450Sstevel@tonic-gate 	default:
12460Sstevel@tonic-gate 		logerr("in_signal: unknown signal: %d\n", buf);
12470Sstevel@tonic-gate 	}
12480Sstevel@tonic-gate }
12490Sstevel@tonic-gate 
12500Sstevel@tonic-gate static void
12510Sstevel@tonic-gate cleanup(void)
12520Sstevel@tonic-gate {
12530Sstevel@tonic-gate 	struct phyint_instance *pii;
12540Sstevel@tonic-gate 	struct phyint_instance *next_pii;
12550Sstevel@tonic-gate 
12560Sstevel@tonic-gate 	/*
12570Sstevel@tonic-gate 	 * Make sure that we don't write to eventpipe in
12580Sstevel@tonic-gate 	 * sig_handler() if any signal notably SIGALRM,
12590Sstevel@tonic-gate 	 * occurs after we close the eventpipe descriptor below
12600Sstevel@tonic-gate 	 */
12610Sstevel@tonic-gate 	cleanup_started = _B_TRUE;
12620Sstevel@tonic-gate 
12630Sstevel@tonic-gate 	for (pii = phyint_instances; pii != NULL; pii = next_pii) {
12640Sstevel@tonic-gate 		next_pii = pii->pii_next;
12650Sstevel@tonic-gate 		phyint_inst_delete(pii);
12660Sstevel@tonic-gate 	}
12670Sstevel@tonic-gate 
12680Sstevel@tonic-gate 	(void) close(ifsock_v4);
12690Sstevel@tonic-gate 	(void) close(ifsock_v6);
12700Sstevel@tonic-gate 	(void) close(rtsock_v4);
12710Sstevel@tonic-gate 	(void) close(rtsock_v6);
12720Sstevel@tonic-gate 	(void) close(lsock_v4);
12730Sstevel@tonic-gate 	(void) close(lsock_v6);
12740Sstevel@tonic-gate 	(void) close(0);
12750Sstevel@tonic-gate 	(void) close(1);
12760Sstevel@tonic-gate 	(void) close(2);
12770Sstevel@tonic-gate 	(void) close(mibfd);
12780Sstevel@tonic-gate 	(void) close(eventpipe_read);
12790Sstevel@tonic-gate 	(void) close(eventpipe_write);
12800Sstevel@tonic-gate }
12810Sstevel@tonic-gate 
12820Sstevel@tonic-gate /*
12830Sstevel@tonic-gate  * Create pipe for signal delivery and set up signal handlers.
12840Sstevel@tonic-gate  */
12850Sstevel@tonic-gate static void
12860Sstevel@tonic-gate setup_eventpipe(void)
12870Sstevel@tonic-gate {
12880Sstevel@tonic-gate 	int fds[2];
12890Sstevel@tonic-gate 	struct sigaction act;
12900Sstevel@tonic-gate 
12910Sstevel@tonic-gate 	if ((pipe(fds)) < 0) {
12920Sstevel@tonic-gate 		logperror("setup_eventpipe: pipe");
12930Sstevel@tonic-gate 		exit(1);
12940Sstevel@tonic-gate 	}
12950Sstevel@tonic-gate 	eventpipe_read = fds[0];
12960Sstevel@tonic-gate 	eventpipe_write = fds[1];
12970Sstevel@tonic-gate 	if (poll_add(eventpipe_read) == -1) {
12980Sstevel@tonic-gate 		exit(1);
12990Sstevel@tonic-gate 	}
13000Sstevel@tonic-gate 
13010Sstevel@tonic-gate 	act.sa_handler = sig_handler;
13020Sstevel@tonic-gate 	act.sa_flags = SA_RESTART;
13030Sstevel@tonic-gate 	(void) sigaction(SIGALRM, &act, NULL);
13040Sstevel@tonic-gate 
13050Sstevel@tonic-gate 	(void) sigset(SIGHUP, sig_handler);
13060Sstevel@tonic-gate 	(void) sigset(SIGUSR1, sig_handler);
13070Sstevel@tonic-gate 	(void) sigset(SIGTERM, sig_handler);
13080Sstevel@tonic-gate 	(void) sigset(SIGINT, sig_handler);
13090Sstevel@tonic-gate 	(void) sigset(SIGQUIT, sig_handler);
13100Sstevel@tonic-gate }
13110Sstevel@tonic-gate 
13120Sstevel@tonic-gate /*
13130Sstevel@tonic-gate  * Create a routing socket for receiving RTM_IFINFO messages.
13140Sstevel@tonic-gate  */
13150Sstevel@tonic-gate static int
13160Sstevel@tonic-gate setup_rtsock(int af)
13170Sstevel@tonic-gate {
13180Sstevel@tonic-gate 	int	s;
13190Sstevel@tonic-gate 	int	flags;
13200Sstevel@tonic-gate 
13210Sstevel@tonic-gate 	s = socket(PF_ROUTE, SOCK_RAW, af);
13220Sstevel@tonic-gate 	if (s == -1) {
13230Sstevel@tonic-gate 		logperror("setup_rtsock: socket PF_ROUTE");
13240Sstevel@tonic-gate 		exit(1);
13250Sstevel@tonic-gate 	}
13260Sstevel@tonic-gate 	if ((flags = fcntl(s, F_GETFL, 0)) < 0) {
13270Sstevel@tonic-gate 		logperror("setup_rtsock: fcntl F_GETFL");
13280Sstevel@tonic-gate 		(void) close(s);
13290Sstevel@tonic-gate 		exit(1);
13300Sstevel@tonic-gate 	}
13310Sstevel@tonic-gate 	if ((fcntl(s, F_SETFL, flags | O_NONBLOCK)) < 0) {
13320Sstevel@tonic-gate 		logperror("setup_rtsock: fcntl F_SETFL");
13330Sstevel@tonic-gate 		(void) close(s);
13340Sstevel@tonic-gate 		exit(1);
13350Sstevel@tonic-gate 	}
13360Sstevel@tonic-gate 	if (poll_add(s) == -1) {
13370Sstevel@tonic-gate 		(void) close(s);
13380Sstevel@tonic-gate 		exit(1);
13390Sstevel@tonic-gate 	}
13400Sstevel@tonic-gate 	return (s);
13410Sstevel@tonic-gate }
13420Sstevel@tonic-gate 
13430Sstevel@tonic-gate /*
13440Sstevel@tonic-gate  * Process an RTM_IFINFO message received on a routing socket.
13450Sstevel@tonic-gate  * The return value indicates whether a full interface scan is required.
13460Sstevel@tonic-gate  * Link up/down notifications from the NICs are reflected in the
13470Sstevel@tonic-gate  * IFF_RUNNING flag.
13480Sstevel@tonic-gate  * If just the state of the IFF_RUNNING interface flag has changed, a
13490Sstevel@tonic-gate  * a full interface scan isn't required.
13500Sstevel@tonic-gate  */
13510Sstevel@tonic-gate static boolean_t
13520Sstevel@tonic-gate process_rtm_ifinfo(if_msghdr_t *ifm, int type)
13530Sstevel@tonic-gate {
13540Sstevel@tonic-gate 	struct sockaddr_dl *sdl;
13550Sstevel@tonic-gate 	struct phyint *pi;
13560Sstevel@tonic-gate 	uint64_t old_flags;
13570Sstevel@tonic-gate 	struct phyint_instance *pii;
13580Sstevel@tonic-gate 
13590Sstevel@tonic-gate 	assert(ifm->ifm_type == RTM_IFINFO && ifm->ifm_addrs == RTA_IFP);
13600Sstevel@tonic-gate 
13610Sstevel@tonic-gate 	/*
13620Sstevel@tonic-gate 	 * Although the sockaddr_dl structure is directly after the
13630Sstevel@tonic-gate 	 * if_msghdr_t structure. At the time of writing, the size of the
13640Sstevel@tonic-gate 	 * if_msghdr_t structure is different on 32 and 64 bit kernels, due
13650Sstevel@tonic-gate 	 * to the presence of a timeval structure, which contains longs,
13660Sstevel@tonic-gate 	 * in the if_data structure.  Anyway, we know where the message ends,
13670Sstevel@tonic-gate 	 * so we work backwards to get the start of the sockaddr_dl structure.
13680Sstevel@tonic-gate 	 */
13690Sstevel@tonic-gate 	/*LINTED*/
13700Sstevel@tonic-gate 	sdl = (struct sockaddr_dl *)((char *)ifm + ifm->ifm_msglen -
13710Sstevel@tonic-gate 		sizeof (struct sockaddr_dl));
13720Sstevel@tonic-gate 
13730Sstevel@tonic-gate 	assert(sdl->sdl_family == AF_LINK);
13740Sstevel@tonic-gate 
13750Sstevel@tonic-gate 	/*
13760Sstevel@tonic-gate 	 * The interface name is in sdl_data.
13770Sstevel@tonic-gate 	 * RTM_IFINFO messages are only generated for logical interface
13780Sstevel@tonic-gate 	 * zero, so there is no colon and logical interface number to
13790Sstevel@tonic-gate 	 * strip from the name.	 The name is not null terminated, but
13800Sstevel@tonic-gate 	 * there should be enough space in sdl_data to add the null.
13810Sstevel@tonic-gate 	 */
13820Sstevel@tonic-gate 	if (sdl->sdl_nlen >= sizeof (sdl->sdl_data)) {
13830Sstevel@tonic-gate 		if (debug & D_LINKNOTE)
13840Sstevel@tonic-gate 			logdebug("process_rtm_ifinfo: "
13850Sstevel@tonic-gate 				"phyint name too long\n");
13860Sstevel@tonic-gate 		return (_B_TRUE);
13870Sstevel@tonic-gate 	}
13880Sstevel@tonic-gate 	sdl->sdl_data[sdl->sdl_nlen] = 0;
13890Sstevel@tonic-gate 
13900Sstevel@tonic-gate 	pi = phyint_lookup(sdl->sdl_data);
13910Sstevel@tonic-gate 	if (pi == NULL) {
13920Sstevel@tonic-gate 		if (debug & D_LINKNOTE)
13930Sstevel@tonic-gate 			logdebug("process_rtm_ifinfo: phyint lookup failed"
13940Sstevel@tonic-gate 				" for %s\n", sdl->sdl_data);
13950Sstevel@tonic-gate 		return (_B_TRUE);
13960Sstevel@tonic-gate 	}
13970Sstevel@tonic-gate 
13980Sstevel@tonic-gate 	/*
13990Sstevel@tonic-gate 	 * We want to try and avoid doing a full interface scan for
14000Sstevel@tonic-gate 	 * link state notifications from the NICs, as indicated
14010Sstevel@tonic-gate 	 * by the state of the IFF_RUNNING flag.  If just the
14020Sstevel@tonic-gate 	 * IFF_RUNNING flag has changed state, the link state changes
14030Sstevel@tonic-gate 	 * are processed without a full scan.
14040Sstevel@tonic-gate 	 * If there is both an IPv4 and IPv6 instance associated with
14050Sstevel@tonic-gate 	 * the physical interface, we will get an RTM_IFINFO message
14060Sstevel@tonic-gate 	 * for each instance.  If we just maintained a single copy of
14070Sstevel@tonic-gate 	 * the physical interface flags, it would appear that no flags
14080Sstevel@tonic-gate 	 * had changed when the second message is processed, leading us
14090Sstevel@tonic-gate 	 * to believe that the message wasn't generated by a flags change,
14100Sstevel@tonic-gate 	 * and that a full interface scan is required.
14110Sstevel@tonic-gate 	 * To get around this problem, two additional copies of the flags
14120Sstevel@tonic-gate 	 * are kept, one copy for each instance.  These are only used in
14130Sstevel@tonic-gate 	 * this routine.  At any one time, all three copies of the flags
14140Sstevel@tonic-gate 	 * should be identical except for the IFF_RUNNING flag.	 The
14150Sstevel@tonic-gate 	 * copy of the flags in the "phyint" structure is always up to
14160Sstevel@tonic-gate 	 * date.
14170Sstevel@tonic-gate 	 */
14180Sstevel@tonic-gate 	pii = (type == AF_INET) ? pi->pi_v4 : pi->pi_v6;
14190Sstevel@tonic-gate 	if (pii == NULL) {
14200Sstevel@tonic-gate 		if (debug & D_LINKNOTE)
14210Sstevel@tonic-gate 			logdebug("process_rtm_ifinfo: no instance of address "
14220Sstevel@tonic-gate 			    "family %s for %s\n", AF_STR(type), pi->pi_name);
14230Sstevel@tonic-gate 		return (_B_TRUE);
14240Sstevel@tonic-gate 	}
14250Sstevel@tonic-gate 
14260Sstevel@tonic-gate 	old_flags = pii->pii_flags;
14270Sstevel@tonic-gate 	pii->pii_flags = PHYINT_FLAGS(ifm->ifm_flags);
14280Sstevel@tonic-gate 	pi->pi_flags = pii->pii_flags;
14290Sstevel@tonic-gate 
14300Sstevel@tonic-gate 	if (debug & D_LINKNOTE) {
14310Sstevel@tonic-gate 		logdebug("process_rtm_ifinfo: %s address family: %s, "
14320Sstevel@tonic-gate 		    "old flags: %llx, new flags: %llx\n", pi->pi_name,
14330Sstevel@tonic-gate 		    AF_STR(type), old_flags, pi->pi_flags);
14340Sstevel@tonic-gate 	}
14350Sstevel@tonic-gate 
14360Sstevel@tonic-gate 	/*
14370Sstevel@tonic-gate 	 * If IFF_STANDBY has changed, indicate that the interface has changed
14380Sstevel@tonic-gate 	 * types.
14390Sstevel@tonic-gate 	 */
14400Sstevel@tonic-gate 	if ((old_flags ^ pii->pii_flags) & IFF_STANDBY)
14410Sstevel@tonic-gate 		phyint_newtype(pi);
14420Sstevel@tonic-gate 
14430Sstevel@tonic-gate 	/*
14440Sstevel@tonic-gate 	 * If IFF_INACTIVE has been set, then no data addresses should be
14450Sstevel@tonic-gate 	 * hosted on the interface.  If IFF_INACTIVE has been cleared, then
14460Sstevel@tonic-gate 	 * move previously failed-over addresses back to it, provided it is
14470Sstevel@tonic-gate 	 * not failed.	For details, see the state diagram in mpd_probe.c.
14480Sstevel@tonic-gate 	 */
14490Sstevel@tonic-gate 	if ((old_flags ^ pii->pii_flags) & IFF_INACTIVE) {
14500Sstevel@tonic-gate 		if (pii->pii_flags & IFF_INACTIVE) {
1451704Sethindra 			if (!pi->pi_empty && (pi->pi_flags & IFF_STANDBY))
14520Sstevel@tonic-gate 				(void) try_failover(pi, FAILOVER_TO_NONSTANDBY);
14530Sstevel@tonic-gate 		} else {
14540Sstevel@tonic-gate 			if (pi->pi_state == PI_RUNNING && !pi->pi_full) {
14550Sstevel@tonic-gate 				pi->pi_empty = 0;
1456*2496Smeem 				(void) try_failback(pi);
14570Sstevel@tonic-gate 			}
14580Sstevel@tonic-gate 		}
14590Sstevel@tonic-gate 	}
14600Sstevel@tonic-gate 
14610Sstevel@tonic-gate 	/* Has just the IFF_RUNNING flag changed state ? */
14620Sstevel@tonic-gate 	if ((old_flags ^ pii->pii_flags) != IFF_RUNNING) {
14630Sstevel@tonic-gate 		struct phyint_instance *pii_other;
14640Sstevel@tonic-gate 		/*
14650Sstevel@tonic-gate 		 * It wasn't just a link state change.	Update
14660Sstevel@tonic-gate 		 * the other instance's copy of the flags.
14670Sstevel@tonic-gate 		 */
14680Sstevel@tonic-gate 		pii_other = phyint_inst_other(pii);
14690Sstevel@tonic-gate 		if (pii_other != NULL)
14700Sstevel@tonic-gate 			pii_other->pii_flags = pii->pii_flags;
14710Sstevel@tonic-gate 		return (_B_TRUE);
14720Sstevel@tonic-gate 	}
14730Sstevel@tonic-gate 
14740Sstevel@tonic-gate 	return (_B_FALSE);
14750Sstevel@tonic-gate }
14760Sstevel@tonic-gate 
14770Sstevel@tonic-gate /*
14780Sstevel@tonic-gate  * Retrieve as many routing socket messages as possible, and try to
14790Sstevel@tonic-gate  * empty the routing sockets. Initiate full scan of targets or interfaces
14800Sstevel@tonic-gate  * as needed.
14810Sstevel@tonic-gate  * We listen on separate IPv4 an IPv6 sockets so that we can accurately
14820Sstevel@tonic-gate  * detect changes in certain flags (see "process_rtm_ifinfo()" above).
14830Sstevel@tonic-gate  */
14840Sstevel@tonic-gate static void
14850Sstevel@tonic-gate process_rtsock(int rtsock_v4, int rtsock_v6)
14860Sstevel@tonic-gate {
14870Sstevel@tonic-gate 	int	nbytes;
14880Sstevel@tonic-gate 	int64_t msg[2048 / 8];
14890Sstevel@tonic-gate 	struct rt_msghdr *rtm;
14900Sstevel@tonic-gate 	boolean_t need_if_scan = _B_FALSE;
14910Sstevel@tonic-gate 	boolean_t need_rt_scan = _B_FALSE;
14920Sstevel@tonic-gate 	boolean_t rtm_ifinfo_seen = _B_FALSE;
14930Sstevel@tonic-gate 	int type;
14940Sstevel@tonic-gate 
14950Sstevel@tonic-gate 	/* Read as many messages as possible and try to empty the sockets */
14960Sstevel@tonic-gate 	for (type = AF_INET; ; type = AF_INET6) {
14970Sstevel@tonic-gate 		for (;;) {
14980Sstevel@tonic-gate 			nbytes = read((type == AF_INET) ? rtsock_v4 :
14990Sstevel@tonic-gate 				rtsock_v6, msg, sizeof (msg));
15000Sstevel@tonic-gate 			if (nbytes <= 0) {
15010Sstevel@tonic-gate 				/* No more messages */
15020Sstevel@tonic-gate 				break;
15030Sstevel@tonic-gate 			}
15040Sstevel@tonic-gate 			rtm = (struct rt_msghdr *)msg;
15050Sstevel@tonic-gate 			if (rtm->rtm_version != RTM_VERSION) {
15060Sstevel@tonic-gate 				logerr("process_rtsock: version %d "
15070Sstevel@tonic-gate 				    "not understood\n", rtm->rtm_version);
15080Sstevel@tonic-gate 				break;
15090Sstevel@tonic-gate 			}
15100Sstevel@tonic-gate 
15110Sstevel@tonic-gate 			if (debug & D_PHYINT) {
15120Sstevel@tonic-gate 				logdebug("process_rtsock: message %d\n",
15130Sstevel@tonic-gate 				    rtm->rtm_type);
15140Sstevel@tonic-gate 			}
15150Sstevel@tonic-gate 
15160Sstevel@tonic-gate 			switch (rtm->rtm_type) {
15170Sstevel@tonic-gate 			case RTM_NEWADDR:
15180Sstevel@tonic-gate 			case RTM_DELADDR:
15190Sstevel@tonic-gate 				/*
15200Sstevel@tonic-gate 				 * Some logical interface has changed,
15210Sstevel@tonic-gate 				 * have to scan everything to determine
15220Sstevel@tonic-gate 				 * what actually changed.
15230Sstevel@tonic-gate 				 */
15240Sstevel@tonic-gate 				need_if_scan = _B_TRUE;
15250Sstevel@tonic-gate 				break;
15260Sstevel@tonic-gate 
15270Sstevel@tonic-gate 			case RTM_IFINFO:
15280Sstevel@tonic-gate 				rtm_ifinfo_seen = _B_TRUE;
15290Sstevel@tonic-gate 				need_if_scan |=
15300Sstevel@tonic-gate 					process_rtm_ifinfo((if_msghdr_t *)rtm,
15310Sstevel@tonic-gate 					type);
15320Sstevel@tonic-gate 				break;
15330Sstevel@tonic-gate 
15340Sstevel@tonic-gate 			case RTM_ADD:
15350Sstevel@tonic-gate 			case RTM_DELETE:
15360Sstevel@tonic-gate 			case RTM_CHANGE:
15370Sstevel@tonic-gate 			case RTM_OLDADD:
15380Sstevel@tonic-gate 			case RTM_OLDDEL:
15390Sstevel@tonic-gate 				need_rt_scan = _B_TRUE;
15400Sstevel@tonic-gate 				break;
15410Sstevel@tonic-gate 
15420Sstevel@tonic-gate 			default:
15430Sstevel@tonic-gate 				/* Not interesting */
15440Sstevel@tonic-gate 				break;
15450Sstevel@tonic-gate 			}
15460Sstevel@tonic-gate 		}
15470Sstevel@tonic-gate 		if (type == AF_INET6)
15480Sstevel@tonic-gate 			break;
15490Sstevel@tonic-gate 	}
15500Sstevel@tonic-gate 
15510Sstevel@tonic-gate 	if (need_if_scan) {
15520Sstevel@tonic-gate 		if (debug & D_LINKNOTE && rtm_ifinfo_seen)
15530Sstevel@tonic-gate 			logdebug("process_rtsock: synchronizing with kernel\n");
15540Sstevel@tonic-gate 		initifs();
15550Sstevel@tonic-gate 	} else if (rtm_ifinfo_seen) {
15560Sstevel@tonic-gate 		if (debug & D_LINKNOTE)
15570Sstevel@tonic-gate 			logdebug("process_rtsock: "
15580Sstevel@tonic-gate 			    "link up/down notification(s) seen\n");
15590Sstevel@tonic-gate 		process_link_state_changes();
15600Sstevel@tonic-gate 	}
15610Sstevel@tonic-gate 
15620Sstevel@tonic-gate 	if (need_rt_scan)
15630Sstevel@tonic-gate 		init_router_targets();
15640Sstevel@tonic-gate }
15650Sstevel@tonic-gate 
15660Sstevel@tonic-gate /*
15670Sstevel@tonic-gate  * Look if the phyint instance or one of its logints have been removed from
15680Sstevel@tonic-gate  * the kernel and take appropriate action.
15690Sstevel@tonic-gate  * Uses {pii,li}_in_use.
15700Sstevel@tonic-gate  */
15710Sstevel@tonic-gate static void
15720Sstevel@tonic-gate check_if_removed(struct phyint_instance *pii)
15730Sstevel@tonic-gate {
15740Sstevel@tonic-gate 	struct logint *li;
15750Sstevel@tonic-gate 	struct logint *next_li;
15760Sstevel@tonic-gate 
15770Sstevel@tonic-gate 	/* Detect phyints that have been removed from the kernel. */
15780Sstevel@tonic-gate 	if (!pii->pii_in_use) {
15790Sstevel@tonic-gate 		logtrace("%s %s has been removed from kernel\n",
15800Sstevel@tonic-gate 		    AF_STR(pii->pii_af), pii->pii_phyint->pi_name);
15810Sstevel@tonic-gate 		phyint_inst_delete(pii);
15820Sstevel@tonic-gate 	} else {
15830Sstevel@tonic-gate 		/* Detect logints that have been removed. */
15840Sstevel@tonic-gate 		for (li = pii->pii_logint; li != NULL; li = next_li) {
15850Sstevel@tonic-gate 			next_li = li->li_next;
15860Sstevel@tonic-gate 			if (!li->li_in_use) {
15870Sstevel@tonic-gate 				logint_delete(li);
15880Sstevel@tonic-gate 			}
15890Sstevel@tonic-gate 		}
15900Sstevel@tonic-gate 	}
15910Sstevel@tonic-gate }
15920Sstevel@tonic-gate 
15930Sstevel@tonic-gate /*
15940Sstevel@tonic-gate  * Send down a T_OPTMGMT_REQ to ip asking for all data in the various
15950Sstevel@tonic-gate  * tables defined by mib2.h. Parse the returned data and extract
15960Sstevel@tonic-gate  * the 'routing' information table. Process the 'routing' table
15970Sstevel@tonic-gate  * to get the list of known onlink routers, and update our database.
15980Sstevel@tonic-gate  * These onlink routers will serve as our probe targets.
15990Sstevel@tonic-gate  * Returns false, if any system calls resulted in errors, true otherwise.
16000Sstevel@tonic-gate  */
16010Sstevel@tonic-gate static boolean_t
16020Sstevel@tonic-gate update_router_list(int fd)
16030Sstevel@tonic-gate {
16040Sstevel@tonic-gate 	union {
16050Sstevel@tonic-gate 		char	ubuf[1024];
16060Sstevel@tonic-gate 		union T_primitives uprim;
16070Sstevel@tonic-gate 	} buf;
16080Sstevel@tonic-gate 
16090Sstevel@tonic-gate 	int			flags;
16100Sstevel@tonic-gate 	struct strbuf		ctlbuf;
16110Sstevel@tonic-gate 	struct strbuf		databuf;
16120Sstevel@tonic-gate 	struct T_optmgmt_req	*tor;
16130Sstevel@tonic-gate 	struct T_optmgmt_ack	*toa;
16140Sstevel@tonic-gate 	struct T_error_ack	*tea;
16150Sstevel@tonic-gate 	struct opthdr		*optp;
16160Sstevel@tonic-gate 	struct opthdr		*req;
16170Sstevel@tonic-gate 	int			status;
16180Sstevel@tonic-gate 	t_scalar_t		prim;
16190Sstevel@tonic-gate 
16200Sstevel@tonic-gate 	tor = (struct T_optmgmt_req *)&buf;
16210Sstevel@tonic-gate 
16220Sstevel@tonic-gate 	tor->PRIM_type = T_SVR4_OPTMGMT_REQ;
16230Sstevel@tonic-gate 	tor->OPT_offset = sizeof (struct T_optmgmt_req);
16240Sstevel@tonic-gate 	tor->OPT_length = sizeof (struct opthdr);
16250Sstevel@tonic-gate 	tor->MGMT_flags = T_CURRENT;
16260Sstevel@tonic-gate 
16270Sstevel@tonic-gate 	req = (struct opthdr *)&tor[1];
16280Sstevel@tonic-gate 	req->level = MIB2_IP;	/* any MIB2_xxx value ok here */
16290Sstevel@tonic-gate 	req->name  = 0;
16300Sstevel@tonic-gate 	req->len   = 0;
16310Sstevel@tonic-gate 
16320Sstevel@tonic-gate 	ctlbuf.buf = (char *)&buf;
16330Sstevel@tonic-gate 	ctlbuf.len = tor->OPT_length + tor->OPT_offset;
16340Sstevel@tonic-gate 	ctlbuf.maxlen = sizeof (buf);
16350Sstevel@tonic-gate 	flags = 0;
16360Sstevel@tonic-gate 	if (putmsg(fd, &ctlbuf, NULL, flags) == -1) {
16370Sstevel@tonic-gate 		logperror("update_router_list: putmsg(ctl)");
16380Sstevel@tonic-gate 		return (_B_FALSE);
16390Sstevel@tonic-gate 	}
16400Sstevel@tonic-gate 
16410Sstevel@tonic-gate 	/*
16420Sstevel@tonic-gate 	 * The response consists of multiple T_OPTMGMT_ACK msgs, 1 msg for
16430Sstevel@tonic-gate 	 * each table defined in mib2.h.  Each T_OPTMGMT_ACK msg contains
16440Sstevel@tonic-gate 	 * a control and data part. The control part contains a struct
16450Sstevel@tonic-gate 	 * T_optmgmt_ack followed by a struct opthdr. The 'opthdr' identifies
16460Sstevel@tonic-gate 	 * the level, name and length of the data in the data part. The
16470Sstevel@tonic-gate 	 * data part contains the actual table data. The last message
16480Sstevel@tonic-gate 	 * is an end-of-data (EOD), consisting of a T_OPTMGMT_ACK and a
16490Sstevel@tonic-gate 	 * single option with zero optlen.
16500Sstevel@tonic-gate 	 */
16510Sstevel@tonic-gate 
16520Sstevel@tonic-gate 	for (;;) {
16530Sstevel@tonic-gate 		/*
16540Sstevel@tonic-gate 		 * Go around this loop once for each table. Ignore
16550Sstevel@tonic-gate 		 * all tables except the routing information table.
16560Sstevel@tonic-gate 		 */
16570Sstevel@tonic-gate 		flags = 0;
16580Sstevel@tonic-gate 		status = getmsg(fd, &ctlbuf, NULL, &flags);
16590Sstevel@tonic-gate 		if (status < 0) {
16600Sstevel@tonic-gate 			if (errno == EINTR)
16610Sstevel@tonic-gate 				continue;
16620Sstevel@tonic-gate 			logperror("update_router_list: getmsg(ctl)");
16630Sstevel@tonic-gate 			return (_B_FALSE);
16640Sstevel@tonic-gate 		}
16650Sstevel@tonic-gate 		if (ctlbuf.len < sizeof (t_scalar_t)) {
16660Sstevel@tonic-gate 			logerr("update_router_list: ctlbuf.len %d\n",
16670Sstevel@tonic-gate 			    ctlbuf.len);
16680Sstevel@tonic-gate 			return (_B_FALSE);
16690Sstevel@tonic-gate 		}
16700Sstevel@tonic-gate 
16710Sstevel@tonic-gate 		prim = buf.uprim.type;
16720Sstevel@tonic-gate 
16730Sstevel@tonic-gate 		switch (prim) {
16740Sstevel@tonic-gate 
16750Sstevel@tonic-gate 		case T_ERROR_ACK:
16760Sstevel@tonic-gate 			tea = &buf.uprim.error_ack;
16770Sstevel@tonic-gate 			if (ctlbuf.len < sizeof (struct T_error_ack)) {
16780Sstevel@tonic-gate 				logerr("update_router_list: T_ERROR_ACK"
16790Sstevel@tonic-gate 				    " ctlbuf.len %d\n", ctlbuf.len);
16800Sstevel@tonic-gate 				return (_B_FALSE);
16810Sstevel@tonic-gate 			}
16820Sstevel@tonic-gate 			logerr("update_router_list: T_ERROR_ACK:"
16830Sstevel@tonic-gate 			    " TLI_error = 0x%lx, UNIX_error = 0x%lx\n",
16840Sstevel@tonic-gate 			    tea->TLI_error, tea->UNIX_error);
16850Sstevel@tonic-gate 			return (_B_FALSE);
16860Sstevel@tonic-gate 
16870Sstevel@tonic-gate 		case T_OPTMGMT_ACK:
16880Sstevel@tonic-gate 			toa = &buf.uprim.optmgmt_ack;
16890Sstevel@tonic-gate 			optp = (struct opthdr *)&toa[1];
16900Sstevel@tonic-gate 			if (ctlbuf.len < sizeof (struct T_optmgmt_ack)) {
16910Sstevel@tonic-gate 				logerr("update_router_list: ctlbuf.len %d\n",
16920Sstevel@tonic-gate 				    ctlbuf.len);
16930Sstevel@tonic-gate 				return (_B_FALSE);
16940Sstevel@tonic-gate 			}
16950Sstevel@tonic-gate 			if (toa->MGMT_flags != T_SUCCESS) {
16960Sstevel@tonic-gate 				logerr("update_router_list: MGMT_flags 0x%lx\n",
16970Sstevel@tonic-gate 				    toa->MGMT_flags);
16980Sstevel@tonic-gate 				return (_B_FALSE);
16990Sstevel@tonic-gate 			}
17000Sstevel@tonic-gate 			break;
17010Sstevel@tonic-gate 
17020Sstevel@tonic-gate 		default:
17030Sstevel@tonic-gate 			logerr("update_router_list: unknown primitive %ld\n",
17040Sstevel@tonic-gate 			    prim);
17050Sstevel@tonic-gate 			return (_B_FALSE);
17060Sstevel@tonic-gate 		}
17070Sstevel@tonic-gate 
17080Sstevel@tonic-gate 		/* Process the T_OPGMGMT_ACK below */
17090Sstevel@tonic-gate 		assert(prim == T_OPTMGMT_ACK);
17100Sstevel@tonic-gate 
17110Sstevel@tonic-gate 		switch (status) {
17120Sstevel@tonic-gate 		case 0:
17130Sstevel@tonic-gate 			/*
17140Sstevel@tonic-gate 			 * We have reached the end of this T_OPTMGMT_ACK
17150Sstevel@tonic-gate 			 * message. If this is the last message i.e EOD,
17160Sstevel@tonic-gate 			 * return, else process the next T_OPTMGMT_ACK msg.
17170Sstevel@tonic-gate 			 */
17180Sstevel@tonic-gate 			if ((ctlbuf.len == sizeof (struct T_optmgmt_ack) +
17190Sstevel@tonic-gate 			    sizeof (struct opthdr)) && optp->len == 0 &&
17200Sstevel@tonic-gate 			    optp->name == 0 && optp->level == 0) {
17210Sstevel@tonic-gate 				/*
17220Sstevel@tonic-gate 				 * This is the EOD message. Return
17230Sstevel@tonic-gate 				 */
17240Sstevel@tonic-gate 				return (_B_TRUE);
17250Sstevel@tonic-gate 			}
17260Sstevel@tonic-gate 			continue;
17270Sstevel@tonic-gate 
17280Sstevel@tonic-gate 		case MORECTL:
17290Sstevel@tonic-gate 		case MORECTL | MOREDATA:
17300Sstevel@tonic-gate 			/*
17310Sstevel@tonic-gate 			 * This should not happen. We should be able to read
17320Sstevel@tonic-gate 			 * the control portion in a single getmsg.
17330Sstevel@tonic-gate 			 */
17340Sstevel@tonic-gate 			logerr("update_router_list: MORECTL\n");
17350Sstevel@tonic-gate 			return (_B_FALSE);
17360Sstevel@tonic-gate 
17370Sstevel@tonic-gate 		case MOREDATA:
17380Sstevel@tonic-gate 			databuf.maxlen = optp->len;
17390Sstevel@tonic-gate 			/* malloc of 0 bytes is ok */
17400Sstevel@tonic-gate 			databuf.buf = malloc((size_t)optp->len);
17410Sstevel@tonic-gate 			if (databuf.maxlen != 0 && databuf.buf == NULL) {
17420Sstevel@tonic-gate 				logperror("update_router_list: malloc");
17430Sstevel@tonic-gate 				return (_B_FALSE);
17440Sstevel@tonic-gate 			}
17450Sstevel@tonic-gate 			databuf.len = 0;
17460Sstevel@tonic-gate 			flags = 0;
17470Sstevel@tonic-gate 			for (;;) {
17480Sstevel@tonic-gate 				status = getmsg(fd, NULL, &databuf, &flags);
17490Sstevel@tonic-gate 				if (status >= 0) {
17500Sstevel@tonic-gate 					break;
17510Sstevel@tonic-gate 				} else if (errno == EINTR) {
17520Sstevel@tonic-gate 					continue;
17530Sstevel@tonic-gate 				} else {
17540Sstevel@tonic-gate 					logperror("update_router_list:"
17550Sstevel@tonic-gate 					    " getmsg(data)");
17560Sstevel@tonic-gate 					free(databuf.buf);
17570Sstevel@tonic-gate 					return (_B_FALSE);
17580Sstevel@tonic-gate 				}
17590Sstevel@tonic-gate 			}
17600Sstevel@tonic-gate 
17610Sstevel@tonic-gate 			if (optp->level == MIB2_IP &&
17620Sstevel@tonic-gate 			    optp->name == MIB2_IP_ROUTE) {
17630Sstevel@tonic-gate 				/* LINTED */
17640Sstevel@tonic-gate 				ire_process_v4((mib2_ipRouteEntry_t *)
17650Sstevel@tonic-gate 				    databuf.buf, databuf.len);
17660Sstevel@tonic-gate 			} else if (optp->level == MIB2_IP6 &&
17670Sstevel@tonic-gate 			    optp->name == MIB2_IP6_ROUTE) {
17680Sstevel@tonic-gate 				/* LINTED */
17690Sstevel@tonic-gate 				ire_process_v6((mib2_ipv6RouteEntry_t *)
17700Sstevel@tonic-gate 				    databuf.buf, databuf.len);
17710Sstevel@tonic-gate 			}
17720Sstevel@tonic-gate 			free(databuf.buf);
17730Sstevel@tonic-gate 		}
17740Sstevel@tonic-gate 	}
17750Sstevel@tonic-gate 	/* NOTREACHED */
17760Sstevel@tonic-gate }
17770Sstevel@tonic-gate 
17780Sstevel@tonic-gate /*
17790Sstevel@tonic-gate  * Examine the IPv4 routing table, for default routers. For each default
17800Sstevel@tonic-gate  * router, populate the list of targets of each phyint that is on the same
17810Sstevel@tonic-gate  * link as the default router
17820Sstevel@tonic-gate  */
17830Sstevel@tonic-gate static void
17840Sstevel@tonic-gate ire_process_v4(mib2_ipRouteEntry_t *buf, size_t len)
17850Sstevel@tonic-gate {
17860Sstevel@tonic-gate 	mib2_ipRouteEntry_t	*rp;
17870Sstevel@tonic-gate 	mib2_ipRouteEntry_t	*rp1;
17880Sstevel@tonic-gate 	struct	in_addr		nexthop_v4;
17890Sstevel@tonic-gate 	mib2_ipRouteEntry_t	*endp;
17900Sstevel@tonic-gate 
17910Sstevel@tonic-gate 	if (len == 0)
17920Sstevel@tonic-gate 		return;
17930Sstevel@tonic-gate 	assert((len % sizeof (mib2_ipRouteEntry_t)) == 0);
17940Sstevel@tonic-gate 
17950Sstevel@tonic-gate 	endp = buf + (len / sizeof (mib2_ipRouteEntry_t));
17960Sstevel@tonic-gate 
17970Sstevel@tonic-gate 	/*
17980Sstevel@tonic-gate 	 * Loop thru the routing table entries. Process any IRE_DEFAULT,
17990Sstevel@tonic-gate 	 * IRE_PREFIX, IRE_HOST, IRE_HOST_REDIRECT ire. Ignore the others.
18000Sstevel@tonic-gate 	 * For each such IRE_OFFSUBNET ire, get the nexthop gateway address.
18010Sstevel@tonic-gate 	 * This is a potential target for probing, which we try to add
18020Sstevel@tonic-gate 	 * to the list of probe targets.
18030Sstevel@tonic-gate 	 */
18040Sstevel@tonic-gate 	for (rp = buf; rp < endp; rp++) {
18050Sstevel@tonic-gate 		if (!(rp->ipRouteInfo.re_ire_type & IRE_OFFSUBNET))
18060Sstevel@tonic-gate 			continue;
18070Sstevel@tonic-gate 
18080Sstevel@tonic-gate 		/*  Get the nexthop address. */
18090Sstevel@tonic-gate 		nexthop_v4.s_addr = rp->ipRouteNextHop;
18100Sstevel@tonic-gate 
18110Sstevel@tonic-gate 		/*
18120Sstevel@tonic-gate 		 * Get the nexthop address. Then determine the outgoing
18130Sstevel@tonic-gate 		 * interface, by examining all interface IREs, and picking the
18140Sstevel@tonic-gate 		 * match. We don't look at the interface specified in the route
18150Sstevel@tonic-gate 		 * because we need to add the router target on all matching
18160Sstevel@tonic-gate 		 * interfaces anyway; the goal is to avoid falling back to
18170Sstevel@tonic-gate 		 * multicast when some interfaces are in the same subnet but
18180Sstevel@tonic-gate 		 * not in the same group.
18190Sstevel@tonic-gate 		 */
18200Sstevel@tonic-gate 		for (rp1 = buf; rp1 < endp; rp1++) {
18210Sstevel@tonic-gate 			if (!(rp1->ipRouteInfo.re_ire_type & IRE_INTERFACE)) {
18220Sstevel@tonic-gate 				continue;
18230Sstevel@tonic-gate 			}
18240Sstevel@tonic-gate 
18250Sstevel@tonic-gate 			/*
18260Sstevel@tonic-gate 			 * Determine the interface IRE that matches the nexthop.
18270Sstevel@tonic-gate 			 * i.e.	 (IRE addr & IRE mask) == (nexthop & IRE mask)
18280Sstevel@tonic-gate 			 */
18290Sstevel@tonic-gate 			if ((rp1->ipRouteDest & rp1->ipRouteMask) ==
18300Sstevel@tonic-gate 			    (nexthop_v4.s_addr & rp1->ipRouteMask)) {
18310Sstevel@tonic-gate 				/*
18320Sstevel@tonic-gate 				 * We found the interface ire
18330Sstevel@tonic-gate 				 */
18340Sstevel@tonic-gate 				router_add_v4(rp1, nexthop_v4);
18350Sstevel@tonic-gate 			}
18360Sstevel@tonic-gate 		}
18370Sstevel@tonic-gate 	}
18380Sstevel@tonic-gate }
18390Sstevel@tonic-gate 
18400Sstevel@tonic-gate void
18410Sstevel@tonic-gate router_add_v4(mib2_ipRouteEntry_t *rp1, struct in_addr nexthop_v4)
18420Sstevel@tonic-gate {
18430Sstevel@tonic-gate 	char *cp;
18440Sstevel@tonic-gate 	char ifname[LIFNAMSIZ + 1];
18450Sstevel@tonic-gate 	struct in6_addr	nexthop;
18460Sstevel@tonic-gate 	int len;
18470Sstevel@tonic-gate 
18480Sstevel@tonic-gate 	if (debug & D_TARGET)
18490Sstevel@tonic-gate 		logdebug("router_add_v4()\n");
18500Sstevel@tonic-gate 
18510Sstevel@tonic-gate 	len = MIN(rp1->ipRouteIfIndex.o_length, sizeof (ifname) - 1);
18520Sstevel@tonic-gate 	(void) memcpy(ifname, rp1->ipRouteIfIndex.o_bytes, len);
18530Sstevel@tonic-gate 	ifname[len] = '\0';
18540Sstevel@tonic-gate 
18550Sstevel@tonic-gate 	if (ifname[0] == '\0')
18560Sstevel@tonic-gate 		return;
18570Sstevel@tonic-gate 
18580Sstevel@tonic-gate 	cp = strchr(ifname, IF_SEPARATOR);
18590Sstevel@tonic-gate 	if (cp != NULL)
18600Sstevel@tonic-gate 		*cp = '\0';
18610Sstevel@tonic-gate 
18620Sstevel@tonic-gate 	IN6_INADDR_TO_V4MAPPED(&nexthop_v4, &nexthop);
18630Sstevel@tonic-gate 	router_add_common(AF_INET, ifname, nexthop);
18640Sstevel@tonic-gate }
18650Sstevel@tonic-gate 
18660Sstevel@tonic-gate void
18670Sstevel@tonic-gate router_add_common(int af, char *ifname, struct in6_addr nexthop)
18680Sstevel@tonic-gate {
18690Sstevel@tonic-gate 	struct phyint_instance *pii;
18700Sstevel@tonic-gate 	struct phyint *pi;
18710Sstevel@tonic-gate 
18720Sstevel@tonic-gate 	if (debug & D_TARGET)
18730Sstevel@tonic-gate 		logdebug("router_add_common(%s %s)\n", AF_STR(af), ifname);
18740Sstevel@tonic-gate 
18750Sstevel@tonic-gate 	/*
18760Sstevel@tonic-gate 	 * Retrieve the phyint instance; bail if it's not known to us yet.
18770Sstevel@tonic-gate 	 */
18780Sstevel@tonic-gate 	pii = phyint_inst_lookup(af, ifname);
18790Sstevel@tonic-gate 	if (pii == NULL)
18800Sstevel@tonic-gate 		return;
18810Sstevel@tonic-gate 
18820Sstevel@tonic-gate 	/*
18830Sstevel@tonic-gate 	 * Don't use our own addresses as targets.
18840Sstevel@tonic-gate 	 */
18852250Srk129064 	if (own_address(nexthop))
18860Sstevel@tonic-gate 		return;
18870Sstevel@tonic-gate 
18880Sstevel@tonic-gate 	/*
18890Sstevel@tonic-gate 	 * If the phyint is part a named group, then add the address to all
18900Sstevel@tonic-gate 	 * members of the group; note that this is suboptimal in the IPv4 case
18910Sstevel@tonic-gate 	 * as it has already been added to all matching interfaces in
18920Sstevel@tonic-gate 	 * ire_process_v4(). Otherwise, add the address only to the phyint
18930Sstevel@tonic-gate 	 * itself, since other phyints in the anongroup may not be on the same
18940Sstevel@tonic-gate 	 * subnet.
18950Sstevel@tonic-gate 	 */
18960Sstevel@tonic-gate 	pi = pii->pii_phyint;
18970Sstevel@tonic-gate 	if (pi->pi_group == phyint_anongroup) {
18980Sstevel@tonic-gate 		target_add(pii, nexthop, _B_TRUE);
18990Sstevel@tonic-gate 	} else {
19000Sstevel@tonic-gate 		pi = pi->pi_group->pg_phyint;
19010Sstevel@tonic-gate 		for (; pi != NULL; pi = pi->pi_pgnext)
19020Sstevel@tonic-gate 			target_add(PHYINT_INSTANCE(pi, af), nexthop, _B_TRUE);
19030Sstevel@tonic-gate 	}
19040Sstevel@tonic-gate }
19050Sstevel@tonic-gate 
19060Sstevel@tonic-gate /*
19070Sstevel@tonic-gate  * Examine the IPv6 routing table, for default routers. For each default
19080Sstevel@tonic-gate  * router, populate the list of targets of each phyint that is on the same
19090Sstevel@tonic-gate  * link as the default router
19100Sstevel@tonic-gate  */
19110Sstevel@tonic-gate static void
19120Sstevel@tonic-gate ire_process_v6(mib2_ipv6RouteEntry_t *buf, size_t len)
19130Sstevel@tonic-gate {
19140Sstevel@tonic-gate 	mib2_ipv6RouteEntry_t	*rp;
19150Sstevel@tonic-gate 	mib2_ipv6RouteEntry_t	*endp;
19160Sstevel@tonic-gate 	struct	in6_addr nexthop_v6;
19170Sstevel@tonic-gate 
19180Sstevel@tonic-gate 	if (debug & D_TARGET)
19190Sstevel@tonic-gate 		logdebug("ire_process_v6(len %d)\n", len);
19200Sstevel@tonic-gate 
19210Sstevel@tonic-gate 	if (len == 0)
19220Sstevel@tonic-gate 		return;
19230Sstevel@tonic-gate 
19240Sstevel@tonic-gate 	assert((len % sizeof (mib2_ipv6RouteEntry_t)) == 0);
19250Sstevel@tonic-gate 	endp = buf + (len / sizeof (mib2_ipv6RouteEntry_t));
19260Sstevel@tonic-gate 
19270Sstevel@tonic-gate 	/*
19280Sstevel@tonic-gate 	 * Loop thru the routing table entries. Process any IRE_DEFAULT,
19290Sstevel@tonic-gate 	 * IRE_PREFIX, IRE_HOST, IRE_HOST_REDIRECT ire. Ignore the others.
19300Sstevel@tonic-gate 	 * For each such IRE_OFFSUBNET ire, get the nexthop gateway address.
19310Sstevel@tonic-gate 	 * This is a potential target for probing, which we try to add
19320Sstevel@tonic-gate 	 * to the list of probe targets.
19330Sstevel@tonic-gate 	 */
19340Sstevel@tonic-gate 	for (rp = buf; rp < endp; rp++) {
19350Sstevel@tonic-gate 		if (!(rp->ipv6RouteInfo.re_ire_type & IRE_OFFSUBNET))
19360Sstevel@tonic-gate 			continue;
19370Sstevel@tonic-gate 
19380Sstevel@tonic-gate 		/*
19390Sstevel@tonic-gate 		 * We have the outgoing interface in ipv6RouteIfIndex
19400Sstevel@tonic-gate 		 * if ipv6RouteIfindex.o_length is non-zero. The outgoing
19410Sstevel@tonic-gate 		 * interface must be present for link-local addresses. Since
19420Sstevel@tonic-gate 		 * we use only link-local addreses for probing, we don't
19430Sstevel@tonic-gate 		 * consider the case when the outgoing interface is not
19440Sstevel@tonic-gate 		 * known and we need to scan interface ires
19450Sstevel@tonic-gate 		 */
19460Sstevel@tonic-gate 		nexthop_v6 = rp->ipv6RouteNextHop;
19470Sstevel@tonic-gate 		if (rp->ipv6RouteIfIndex.o_length != 0) {
19480Sstevel@tonic-gate 			/*
19490Sstevel@tonic-gate 			 * We already have the outgoing interface
19500Sstevel@tonic-gate 			 * in ipv6RouteIfIndex.
19510Sstevel@tonic-gate 			 */
19520Sstevel@tonic-gate 			router_add_v6(rp, nexthop_v6);
19530Sstevel@tonic-gate 		}
19540Sstevel@tonic-gate 	}
19550Sstevel@tonic-gate }
19560Sstevel@tonic-gate 
19570Sstevel@tonic-gate 
19580Sstevel@tonic-gate void
19590Sstevel@tonic-gate router_add_v6(mib2_ipv6RouteEntry_t *rp1, struct in6_addr nexthop_v6)
19600Sstevel@tonic-gate {
19610Sstevel@tonic-gate 	char ifname[LIFNAMSIZ + 1];
19620Sstevel@tonic-gate 	char *cp;
19630Sstevel@tonic-gate 	int  len;
19640Sstevel@tonic-gate 
19650Sstevel@tonic-gate 	if (debug & D_TARGET)
19660Sstevel@tonic-gate 		logdebug("router_add_v6()\n");
19670Sstevel@tonic-gate 
19680Sstevel@tonic-gate 	len = MIN(rp1->ipv6RouteIfIndex.o_length, sizeof (ifname) - 1);
19690Sstevel@tonic-gate 	(void) memcpy(ifname, rp1->ipv6RouteIfIndex.o_bytes, len);
19700Sstevel@tonic-gate 	ifname[len] = '\0';
19710Sstevel@tonic-gate 
19720Sstevel@tonic-gate 	if (ifname[0] == '\0')
19730Sstevel@tonic-gate 		return;
19740Sstevel@tonic-gate 
19750Sstevel@tonic-gate 	cp = strchr(ifname, IF_SEPARATOR);
19760Sstevel@tonic-gate 	if (cp != NULL)
19770Sstevel@tonic-gate 		*cp = '\0';
19780Sstevel@tonic-gate 
19790Sstevel@tonic-gate 	router_add_common(AF_INET6, ifname, nexthop_v6);
19800Sstevel@tonic-gate }
19810Sstevel@tonic-gate 
19820Sstevel@tonic-gate 
19830Sstevel@tonic-gate 
19840Sstevel@tonic-gate /*
19850Sstevel@tonic-gate  * Build a list of target routers, by scanning the routing tables.
19860Sstevel@tonic-gate  * It is assumed that interface routes exist, to reach the routers.
19870Sstevel@tonic-gate  */
19880Sstevel@tonic-gate static void
19890Sstevel@tonic-gate init_router_targets(void)
19900Sstevel@tonic-gate {
19910Sstevel@tonic-gate 	struct	target *tg;
19920Sstevel@tonic-gate 	struct	target *next_tg;
19930Sstevel@tonic-gate 	struct	phyint_instance *pii;
19940Sstevel@tonic-gate 	struct	phyint *pi;
19950Sstevel@tonic-gate 
19960Sstevel@tonic-gate 	if (force_mcast)
19970Sstevel@tonic-gate 		return;
19980Sstevel@tonic-gate 
19990Sstevel@tonic-gate 	for (pii = phyint_instances; pii != NULL; pii = pii->pii_next) {
20000Sstevel@tonic-gate 		pi = pii->pii_phyint;
20010Sstevel@tonic-gate 		/*
20020Sstevel@tonic-gate 		 * Exclude ptp and host targets. Set tg_in_use to false,
20030Sstevel@tonic-gate 		 * only for router targets.
20040Sstevel@tonic-gate 		 */
20050Sstevel@tonic-gate 		if (!pii->pii_targets_are_routers ||
20060Sstevel@tonic-gate 		    (pi->pi_flags & IFF_POINTOPOINT))
20070Sstevel@tonic-gate 			continue;
20080Sstevel@tonic-gate 
20090Sstevel@tonic-gate 		for (tg = pii->pii_targets; tg != NULL; tg = tg->tg_next)
20100Sstevel@tonic-gate 			tg->tg_in_use = 0;
20110Sstevel@tonic-gate 	}
20120Sstevel@tonic-gate 
20130Sstevel@tonic-gate 	if (mibfd < 0) {
20140Sstevel@tonic-gate 		mibfd = open("/dev/ip", O_RDWR);
20150Sstevel@tonic-gate 		if (mibfd < 0) {
20160Sstevel@tonic-gate 			logperror("mibopen: ip open");
20170Sstevel@tonic-gate 			exit(1);
20180Sstevel@tonic-gate 		}
20190Sstevel@tonic-gate 	}
20200Sstevel@tonic-gate 
20210Sstevel@tonic-gate 	if (!update_router_list(mibfd)) {
20220Sstevel@tonic-gate 		(void) close(mibfd);
20230Sstevel@tonic-gate 		mibfd = -1;
20240Sstevel@tonic-gate 	}
20250Sstevel@tonic-gate 
20260Sstevel@tonic-gate 	for (pii = phyint_instances; pii != NULL; pii = pii->pii_next) {
20270Sstevel@tonic-gate 		if (!pii->pii_targets_are_routers ||
20280Sstevel@tonic-gate 		    (pi->pi_flags & IFF_POINTOPOINT))
20290Sstevel@tonic-gate 			continue;
20300Sstevel@tonic-gate 
20310Sstevel@tonic-gate 		for (tg = pii->pii_targets; tg != NULL; tg = next_tg) {
20320Sstevel@tonic-gate 			next_tg = tg->tg_next;
20330Sstevel@tonic-gate 			if (!tg->tg_in_use) {
20340Sstevel@tonic-gate 				target_delete(tg);
20350Sstevel@tonic-gate 			}
20360Sstevel@tonic-gate 		}
20370Sstevel@tonic-gate 	}
20380Sstevel@tonic-gate }
20390Sstevel@tonic-gate 
20400Sstevel@tonic-gate /*
20410Sstevel@tonic-gate  * Attempt to assign host targets to any interfaces that do not currently
20420Sstevel@tonic-gate  * have probe targets by sharing targets with other interfaces in the group.
20430Sstevel@tonic-gate  */
20440Sstevel@tonic-gate static void
20450Sstevel@tonic-gate init_host_targets(void)
20460Sstevel@tonic-gate {
20470Sstevel@tonic-gate 	struct phyint_instance *pii;
20480Sstevel@tonic-gate 	struct phyint_group *pg;
20490Sstevel@tonic-gate 
20500Sstevel@tonic-gate 	for (pii = phyint_instances; pii != NULL; pii = pii->pii_next) {
20510Sstevel@tonic-gate 		pg = pii->pii_phyint->pi_group;
20520Sstevel@tonic-gate 		if (pg != phyint_anongroup && pii->pii_targets == NULL)
20530Sstevel@tonic-gate 			dup_host_targets(pii);
20540Sstevel@tonic-gate 	}
20550Sstevel@tonic-gate }
20560Sstevel@tonic-gate 
20570Sstevel@tonic-gate /*
20580Sstevel@tonic-gate  * Duplicate host targets from other phyints of the group to
20590Sstevel@tonic-gate  * the phyint instance 'desired_pii'.
20600Sstevel@tonic-gate  */
20610Sstevel@tonic-gate static void
20620Sstevel@tonic-gate dup_host_targets(struct phyint_instance	 *desired_pii)
20630Sstevel@tonic-gate {
20640Sstevel@tonic-gate 	int af;
20650Sstevel@tonic-gate 	struct phyint *pi;
20660Sstevel@tonic-gate 	struct phyint_instance *pii;
20670Sstevel@tonic-gate 	struct target *tg;
20680Sstevel@tonic-gate 
20690Sstevel@tonic-gate 	assert(desired_pii->pii_phyint->pi_group != phyint_anongroup);
20700Sstevel@tonic-gate 
20710Sstevel@tonic-gate 	af = desired_pii->pii_af;
20720Sstevel@tonic-gate 
20730Sstevel@tonic-gate 	/*
20740Sstevel@tonic-gate 	 * For every phyint in the same group as desired_pii, check if
20750Sstevel@tonic-gate 	 * it has any host targets. If so add them to desired_pii.
20760Sstevel@tonic-gate 	 */
20770Sstevel@tonic-gate 	for (pi = desired_pii->pii_phyint; pi != NULL; pi = pi->pi_pgnext) {
20780Sstevel@tonic-gate 		pii = PHYINT_INSTANCE(pi, af);
20790Sstevel@tonic-gate 		/*
20800Sstevel@tonic-gate 		 * We know that we don't have targets on this phyint instance
20810Sstevel@tonic-gate 		 * since we have been called. But we still check for
20820Sstevel@tonic-gate 		 * pii_targets_are_routers because another phyint instance
20830Sstevel@tonic-gate 		 * could have router targets, since IFF_NOFAILOVER addresses
20840Sstevel@tonic-gate 		 * on different phyint instances may belong to different
20850Sstevel@tonic-gate 		 * subnets.
20860Sstevel@tonic-gate 		 */
20870Sstevel@tonic-gate 		if ((pii == NULL) || (pii == desired_pii) ||
20880Sstevel@tonic-gate 		    pii->pii_targets_are_routers)
20890Sstevel@tonic-gate 			continue;
20900Sstevel@tonic-gate 		for (tg = pii->pii_targets; tg != NULL; tg = tg->tg_next) {
20910Sstevel@tonic-gate 			target_create(desired_pii, tg->tg_address, _B_FALSE);
20920Sstevel@tonic-gate 		}
20930Sstevel@tonic-gate 	}
20940Sstevel@tonic-gate }
20950Sstevel@tonic-gate 
20960Sstevel@tonic-gate static void
20970Sstevel@tonic-gate usage(char *cmd)
20980Sstevel@tonic-gate {
20990Sstevel@tonic-gate 	(void) fprintf(stderr, "usage: %s\n", cmd);
21000Sstevel@tonic-gate }
21010Sstevel@tonic-gate 
21020Sstevel@tonic-gate 
21030Sstevel@tonic-gate #define	MPATHD_DEFAULT_FILE	"/etc/default/mpathd"
21040Sstevel@tonic-gate 
21050Sstevel@tonic-gate /* Get an option from the /etc/default/mpathd file */
21060Sstevel@tonic-gate static char *
21070Sstevel@tonic-gate getdefault(char *name)
21080Sstevel@tonic-gate {
21090Sstevel@tonic-gate 	char namebuf[BUFSIZ];
21100Sstevel@tonic-gate 	char *value = NULL;
21110Sstevel@tonic-gate 
21120Sstevel@tonic-gate 	if (defopen(MPATHD_DEFAULT_FILE) == 0) {
21130Sstevel@tonic-gate 		char	*cp;
21140Sstevel@tonic-gate 		int	flags;
21150Sstevel@tonic-gate 
21160Sstevel@tonic-gate 		/*
21170Sstevel@tonic-gate 		 * ignore case
21180Sstevel@tonic-gate 		 */
21190Sstevel@tonic-gate 		flags = defcntl(DC_GETFLAGS, 0);
21200Sstevel@tonic-gate 		TURNOFF(flags, DC_CASE);
21210Sstevel@tonic-gate 		(void) defcntl(DC_SETFLAGS, flags);
21220Sstevel@tonic-gate 
21230Sstevel@tonic-gate 		/* Add "=" to the name */
21240Sstevel@tonic-gate 		(void) strncpy(namebuf, name, sizeof (namebuf) - 2);
21250Sstevel@tonic-gate 		(void) strncat(namebuf, "=", 2);
21260Sstevel@tonic-gate 
21270Sstevel@tonic-gate 		if ((cp = defread(namebuf)) != NULL)
21280Sstevel@tonic-gate 			value = strdup(cp);
21290Sstevel@tonic-gate 
21300Sstevel@tonic-gate 		/* close */
21310Sstevel@tonic-gate 		(void) defopen((char *)NULL);
21320Sstevel@tonic-gate 	}
21330Sstevel@tonic-gate 	return (value);
21340Sstevel@tonic-gate }
21350Sstevel@tonic-gate 
21360Sstevel@tonic-gate 
21370Sstevel@tonic-gate /*
21380Sstevel@tonic-gate  * Command line options below
21390Sstevel@tonic-gate  */
21400Sstevel@tonic-gate boolean_t	failback_enabled = _B_TRUE;	/* failback enabled/disabled */
21410Sstevel@tonic-gate boolean_t	track_all_phyints = _B_FALSE;	/* option to track all NICs */
21420Sstevel@tonic-gate static boolean_t adopt = _B_FALSE;
21430Sstevel@tonic-gate static boolean_t foreground = _B_FALSE;
21440Sstevel@tonic-gate 
21450Sstevel@tonic-gate int
21460Sstevel@tonic-gate main(int argc, char *argv[])
21470Sstevel@tonic-gate {
21480Sstevel@tonic-gate 	int i;
21490Sstevel@tonic-gate 	int c;
21500Sstevel@tonic-gate 	struct phyint_instance *pii;
21510Sstevel@tonic-gate 	char *value;
21520Sstevel@tonic-gate 
21530Sstevel@tonic-gate 	argv0 = argv;		/* Saved for re-exec on SIGHUP */
21540Sstevel@tonic-gate 	srandom(gethostid());	/* Initialize the random number generator */
21550Sstevel@tonic-gate 
21560Sstevel@tonic-gate 	/*
21570Sstevel@tonic-gate 	 * NOTE: The messages output by in.mpathd are not suitable for
21580Sstevel@tonic-gate 	 * translation, so we do not call textdomain().
21590Sstevel@tonic-gate 	 */
21600Sstevel@tonic-gate 	(void) setlocale(LC_ALL, "");
21610Sstevel@tonic-gate 
21620Sstevel@tonic-gate 	/*
21630Sstevel@tonic-gate 	 * Get the user specified value of 'failure detection time'
21640Sstevel@tonic-gate 	 * from /etc/default/mpathd
21650Sstevel@tonic-gate 	 */
21660Sstevel@tonic-gate 	value = getdefault("FAILURE_DETECTION_TIME");
21670Sstevel@tonic-gate 	if (value != NULL) {
21680Sstevel@tonic-gate 		user_failure_detection_time =
21690Sstevel@tonic-gate 		    (int)strtol((char *)value, NULL, 0);
21700Sstevel@tonic-gate 
21710Sstevel@tonic-gate 		if (user_failure_detection_time <= 0) {
21720Sstevel@tonic-gate 			user_failure_detection_time = FAILURE_DETECTION_TIME;
21730Sstevel@tonic-gate 			logerr("Invalid failure detection time %s, assuming "
21740Sstevel@tonic-gate 			    "default %d\n", value, user_failure_detection_time);
21750Sstevel@tonic-gate 
21760Sstevel@tonic-gate 		} else if (user_failure_detection_time <
21770Sstevel@tonic-gate 		    MIN_FAILURE_DETECTION_TIME) {
21780Sstevel@tonic-gate 			user_failure_detection_time =
21790Sstevel@tonic-gate 			    MIN_FAILURE_DETECTION_TIME;
21800Sstevel@tonic-gate 			logerr("Too small failure detection time of %s, "
21810Sstevel@tonic-gate 			    "assuming minimum %d\n", value,
21820Sstevel@tonic-gate 			    user_failure_detection_time);
21830Sstevel@tonic-gate 		}
21840Sstevel@tonic-gate 		free(value);
21850Sstevel@tonic-gate 	} else {
21860Sstevel@tonic-gate 		/* User has not specified the parameter, Use default value */
21870Sstevel@tonic-gate 		user_failure_detection_time = FAILURE_DETECTION_TIME;
21880Sstevel@tonic-gate 	}
21890Sstevel@tonic-gate 
21900Sstevel@tonic-gate 	/*
21910Sstevel@tonic-gate 	 * This gives the frequency at which probes will be sent.
21920Sstevel@tonic-gate 	 * When fdt ms elapses, we should be able to determine
21930Sstevel@tonic-gate 	 * whether 5 consecutive probes have failed or not.
21940Sstevel@tonic-gate 	 * 1 probe will be sent in every user_probe_interval ms,
21950Sstevel@tonic-gate 	 * randomly anytime in the (0.5  - 1.0) 2nd half of every
21960Sstevel@tonic-gate 	 * user_probe_interval. Thus when we send out probe 'n' we
21970Sstevel@tonic-gate 	 * can be sure that probe 'n - 2' is lost, if we have not
21980Sstevel@tonic-gate 	 * got the ack. (since the probe interval is > crtt). But
21990Sstevel@tonic-gate 	 * probe 'n - 1' may be a valid unacked probe, since the
22000Sstevel@tonic-gate 	 * time between 2 successive probes could be as small as
22010Sstevel@tonic-gate 	 * 0.5 * user_probe_interval.  Hence the NUM_PROBE_FAILS + 2
22020Sstevel@tonic-gate 	 */
22030Sstevel@tonic-gate 	user_probe_interval = user_failure_detection_time /
22040Sstevel@tonic-gate 	    (NUM_PROBE_FAILS + 2);
22050Sstevel@tonic-gate 
22060Sstevel@tonic-gate 	/*
22070Sstevel@tonic-gate 	 * Get the user specified value of failback_enabled from
22080Sstevel@tonic-gate 	 * /etc/default/mpathd
22090Sstevel@tonic-gate 	 */
22100Sstevel@tonic-gate 	value = getdefault("FAILBACK");
22110Sstevel@tonic-gate 	if (value != NULL) {
22120Sstevel@tonic-gate 		if (strncasecmp(value, "yes", 3) == 0)
22130Sstevel@tonic-gate 			failback_enabled = _B_TRUE;
22140Sstevel@tonic-gate 		else if (strncasecmp(value, "no", 2) == 0)
22150Sstevel@tonic-gate 			failback_enabled = _B_FALSE;
22160Sstevel@tonic-gate 		else
22170Sstevel@tonic-gate 			logerr("Invalid value for FAILBACK %s\n", value);
22180Sstevel@tonic-gate 		free(value);
22190Sstevel@tonic-gate 	} else {
22200Sstevel@tonic-gate 		failback_enabled = _B_TRUE;
22210Sstevel@tonic-gate 	}
22220Sstevel@tonic-gate 
22230Sstevel@tonic-gate 	/*
22240Sstevel@tonic-gate 	 * Get the user specified value of track_all_phyints from
22250Sstevel@tonic-gate 	 * /etc/default/mpathd. The sense is reversed in
22260Sstevel@tonic-gate 	 * TRACK_INTERFACES_ONLY_WITH_GROUPS.
22270Sstevel@tonic-gate 	 */
22280Sstevel@tonic-gate 	value = getdefault("TRACK_INTERFACES_ONLY_WITH_GROUPS");
22290Sstevel@tonic-gate 	if (value != NULL) {
22300Sstevel@tonic-gate 		if (strncasecmp(value, "yes", 3) == 0)
22310Sstevel@tonic-gate 			track_all_phyints = _B_FALSE;
22320Sstevel@tonic-gate 		else if (strncasecmp(value, "no", 2) == 0)
22330Sstevel@tonic-gate 			track_all_phyints = _B_TRUE;
22340Sstevel@tonic-gate 		else
22350Sstevel@tonic-gate 			logerr("Invalid value for "
22360Sstevel@tonic-gate 			    "TRACK_INTERFACES_ONLY_WITH_GROUPS %s\n", value);
22370Sstevel@tonic-gate 		free(value);
22380Sstevel@tonic-gate 	} else {
22390Sstevel@tonic-gate 		track_all_phyints = _B_FALSE;
22400Sstevel@tonic-gate 	}
22410Sstevel@tonic-gate 
22420Sstevel@tonic-gate 	while ((c = getopt(argc, argv, "adD:ml")) != EOF) {
22430Sstevel@tonic-gate 		switch (c) {
22440Sstevel@tonic-gate 		case 'a':
22450Sstevel@tonic-gate 			adopt = _B_TRUE;
22460Sstevel@tonic-gate 			break;
22470Sstevel@tonic-gate 		case 'm':
22480Sstevel@tonic-gate 			force_mcast = _B_TRUE;
22490Sstevel@tonic-gate 			break;
22500Sstevel@tonic-gate 		case 'd':
22510Sstevel@tonic-gate 			debug = D_ALL;
22520Sstevel@tonic-gate 			foreground = _B_TRUE;
22530Sstevel@tonic-gate 			break;
22540Sstevel@tonic-gate 		case 'D':
22550Sstevel@tonic-gate 			i = (int)strtol(optarg, NULL, 0);
22560Sstevel@tonic-gate 			if (i == 0) {
22570Sstevel@tonic-gate 				(void) fprintf(stderr, "Bad debug flags: %s\n",
22580Sstevel@tonic-gate 				    optarg);
22590Sstevel@tonic-gate 				exit(1);
22600Sstevel@tonic-gate 			}
22610Sstevel@tonic-gate 			debug |= i;
22620Sstevel@tonic-gate 			foreground = _B_TRUE;
22630Sstevel@tonic-gate 			break;
22640Sstevel@tonic-gate 		case 'l':
22650Sstevel@tonic-gate 			/*
22660Sstevel@tonic-gate 			 * Turn off link state notification handling.
22670Sstevel@tonic-gate 			 * Undocumented command line flag, for debugging
22680Sstevel@tonic-gate 			 * purposes.
22690Sstevel@tonic-gate 			 */
22700Sstevel@tonic-gate 			handle_link_notifications = _B_FALSE;
22710Sstevel@tonic-gate 			break;
22720Sstevel@tonic-gate 		default:
22730Sstevel@tonic-gate 			usage(argv[0]);
22740Sstevel@tonic-gate 			exit(1);
22750Sstevel@tonic-gate 		}
22760Sstevel@tonic-gate 	}
22770Sstevel@tonic-gate 
22780Sstevel@tonic-gate 	/*
22790Sstevel@tonic-gate 	 * The sockets for the loopback command interface should be listening
22800Sstevel@tonic-gate 	 * before we fork and exit in daemonize(). This way, whoever started us
22810Sstevel@tonic-gate 	 * can use the loopback interface as soon as they get a zero exit
22820Sstevel@tonic-gate 	 * status.
22830Sstevel@tonic-gate 	 */
22840Sstevel@tonic-gate 	lsock_v4 = setup_listener(AF_INET);
22850Sstevel@tonic-gate 	lsock_v6 = setup_listener(AF_INET6);
22860Sstevel@tonic-gate 
22870Sstevel@tonic-gate 	if (lsock_v4 < 0 && lsock_v6 < 0) {
22880Sstevel@tonic-gate 		logerr("main: setup_listener failed for both IPv4 and IPv6\n");
22890Sstevel@tonic-gate 		exit(1);
22900Sstevel@tonic-gate 	}
22910Sstevel@tonic-gate 
22920Sstevel@tonic-gate 	if (!foreground) {
22930Sstevel@tonic-gate 		if (!daemonize()) {
22940Sstevel@tonic-gate 			logerr("cannot daemonize\n");
22950Sstevel@tonic-gate 			exit(EXIT_FAILURE);
22960Sstevel@tonic-gate 		}
22970Sstevel@tonic-gate 		initlog();
22980Sstevel@tonic-gate 	}
22990Sstevel@tonic-gate 
23000Sstevel@tonic-gate 	/*
23010Sstevel@tonic-gate 	 * Initializations:
23020Sstevel@tonic-gate 	 * 1. Create ifsock* sockets. These are used for performing SIOC*
23030Sstevel@tonic-gate 	 *    ioctls. We have 2 sockets 1 each for IPv4 and IPv6.
23040Sstevel@tonic-gate 	 * 2. Initialize a pipe for handling/recording signal events.
23050Sstevel@tonic-gate 	 * 3. Create the routing sockets,  used for listening
23060Sstevel@tonic-gate 	 *    to routing / interface changes.
23070Sstevel@tonic-gate 	 * 4. phyint_init() - Initialize physical interface state
23080Sstevel@tonic-gate 	 *    (in mpd_tables.c).  Must be done before creating interfaces,
23090Sstevel@tonic-gate 	 *    which timer_init() does indirectly.
23100Sstevel@tonic-gate 	 * 5. timer_init()  - Initialize timer related stuff
23110Sstevel@tonic-gate 	 * 6. initifs() - Initialize our database of all known interfaces
23120Sstevel@tonic-gate 	 * 7. init_router_targets() - Initialize our database of all known
23130Sstevel@tonic-gate 	 *    router targets.
23140Sstevel@tonic-gate 	 */
23150Sstevel@tonic-gate 	ifsock_v4 = socket(AF_INET, SOCK_DGRAM, 0);
23160Sstevel@tonic-gate 	if (ifsock_v4 < 0) {
23170Sstevel@tonic-gate 		logperror("main: IPv4 socket open");
23180Sstevel@tonic-gate 		exit(1);
23190Sstevel@tonic-gate 	}
23200Sstevel@tonic-gate 
23210Sstevel@tonic-gate 	ifsock_v6 = socket(AF_INET6, SOCK_DGRAM, 0);
23220Sstevel@tonic-gate 	if (ifsock_v6 < 0) {
23230Sstevel@tonic-gate 		logperror("main: IPv6 socket open");
23240Sstevel@tonic-gate 		exit(1);
23250Sstevel@tonic-gate 	}
23260Sstevel@tonic-gate 
23270Sstevel@tonic-gate 	setup_eventpipe();
23280Sstevel@tonic-gate 
23290Sstevel@tonic-gate 	rtsock_v4 = setup_rtsock(AF_INET);
23300Sstevel@tonic-gate 	rtsock_v6 = setup_rtsock(AF_INET6);
23310Sstevel@tonic-gate 
23320Sstevel@tonic-gate 	if (phyint_init() == -1) {
23330Sstevel@tonic-gate 		logerr("cannot initialize physical interface structures");
23340Sstevel@tonic-gate 		exit(1);
23350Sstevel@tonic-gate 	}
23360Sstevel@tonic-gate 
23370Sstevel@tonic-gate 	timer_init();
23380Sstevel@tonic-gate 
23390Sstevel@tonic-gate 	initifs();
23400Sstevel@tonic-gate 
2341704Sethindra 	/* Inform kernel whether failback is enabled or disabled */
2342704Sethindra 	if (ioctl(ifsock_v4, SIOCSIPMPFAILBACK, (int *)&failback_enabled) < 0) {
2343704Sethindra 		logperror("main: ioctl (SIOCSIPMPFAILBACK)");
2344704Sethindra 		exit(1);
2345704Sethindra 	}
2346704Sethindra 
23470Sstevel@tonic-gate 	/*
23480Sstevel@tonic-gate 	 * If we're operating in "adopt" mode and no interfaces need to be
23490Sstevel@tonic-gate 	 * tracked, shut down (ifconfig(1M) will restart us on demand if
23500Sstevel@tonic-gate 	 * interfaces are subsequently put into multipathing groups).
23510Sstevel@tonic-gate 	 */
23520Sstevel@tonic-gate 	if (adopt && phyint_instances == NULL)
23530Sstevel@tonic-gate 		exit(0);
23540Sstevel@tonic-gate 
23550Sstevel@tonic-gate 	/*
23560Sstevel@tonic-gate 	 * Main body. Keep listening for activity on any of the sockets
23570Sstevel@tonic-gate 	 * that we are monitoring and take appropriate action as necessary.
23580Sstevel@tonic-gate 	 * signals are also handled synchronously.
23590Sstevel@tonic-gate 	 */
23600Sstevel@tonic-gate 	for (;;) {
23610Sstevel@tonic-gate 		if (poll(pollfds, pollfd_num, -1) < 0) {
23620Sstevel@tonic-gate 			if (errno == EINTR)
23630Sstevel@tonic-gate 				continue;
23640Sstevel@tonic-gate 			logperror("main: poll");
23650Sstevel@tonic-gate 			exit(1);
23660Sstevel@tonic-gate 		}
23670Sstevel@tonic-gate 		for (i = 0; i < pollfd_num; i++) {
23680Sstevel@tonic-gate 			if ((pollfds[i].fd == -1) ||
23690Sstevel@tonic-gate 			    !(pollfds[i].revents & POLLIN))
23700Sstevel@tonic-gate 				continue;
23710Sstevel@tonic-gate 			if (pollfds[i].fd == eventpipe_read) {
23720Sstevel@tonic-gate 				in_signal(eventpipe_read);
23730Sstevel@tonic-gate 				break;
23740Sstevel@tonic-gate 			}
23750Sstevel@tonic-gate 			if (pollfds[i].fd == rtsock_v4 ||
2376*2496Smeem 			    pollfds[i].fd == rtsock_v6) {
23770Sstevel@tonic-gate 				process_rtsock(rtsock_v4, rtsock_v6);
23780Sstevel@tonic-gate 				break;
23790Sstevel@tonic-gate 			}
23800Sstevel@tonic-gate 			for (pii = phyint_instances; pii != NULL;
23810Sstevel@tonic-gate 			    pii = pii->pii_next) {
23820Sstevel@tonic-gate 				if (pollfds[i].fd == pii->pii_probe_sock) {
23830Sstevel@tonic-gate 					if (pii->pii_af == AF_INET)
23840Sstevel@tonic-gate 						in_data(pii);
23850Sstevel@tonic-gate 					else
23860Sstevel@tonic-gate 						in6_data(pii);
23870Sstevel@tonic-gate 					break;
23880Sstevel@tonic-gate 				}
23890Sstevel@tonic-gate 			}
23900Sstevel@tonic-gate 			if (pollfds[i].fd == lsock_v4)
23910Sstevel@tonic-gate 				loopback_cmd(lsock_v4, AF_INET);
23920Sstevel@tonic-gate 			else if (pollfds[i].fd == lsock_v6)
23930Sstevel@tonic-gate 				loopback_cmd(lsock_v6, AF_INET6);
23940Sstevel@tonic-gate 		}
23950Sstevel@tonic-gate 		if (full_scan_required) {
23960Sstevel@tonic-gate 			initifs();
23970Sstevel@tonic-gate 			full_scan_required = _B_FALSE;
23980Sstevel@tonic-gate 		}
23990Sstevel@tonic-gate 	}
24000Sstevel@tonic-gate 	/* NOTREACHED */
24010Sstevel@tonic-gate 	return (EXIT_SUCCESS);
24020Sstevel@tonic-gate }
24030Sstevel@tonic-gate 
24040Sstevel@tonic-gate static int
24050Sstevel@tonic-gate setup_listener(int af)
24060Sstevel@tonic-gate {
24070Sstevel@tonic-gate 	int sock;
24080Sstevel@tonic-gate 	int on;
24090Sstevel@tonic-gate 	int len;
24100Sstevel@tonic-gate 	int ret;
24110Sstevel@tonic-gate 	struct sockaddr_storage laddr;
24120Sstevel@tonic-gate 	struct sockaddr_in  *sin;
24130Sstevel@tonic-gate 	struct sockaddr_in6 *sin6;
24140Sstevel@tonic-gate 	struct in6_addr loopback_addr = IN6ADDR_LOOPBACK_INIT;
24150Sstevel@tonic-gate 
24160Sstevel@tonic-gate 	assert(af == AF_INET || af == AF_INET6);
24170Sstevel@tonic-gate 
24180Sstevel@tonic-gate 	sock = socket(af, SOCK_STREAM, 0);
24190Sstevel@tonic-gate 	if (sock < 0) {
24200Sstevel@tonic-gate 		logperror("setup_listener: socket");
24210Sstevel@tonic-gate 		exit(1);
24220Sstevel@tonic-gate 	}
24230Sstevel@tonic-gate 
24240Sstevel@tonic-gate 	on = 1;
24250Sstevel@tonic-gate 	if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, (char *)&on,
24260Sstevel@tonic-gate 	    sizeof (on)) < 0) {
24270Sstevel@tonic-gate 		logperror("setup_listener: setsockopt (SO_REUSEADDR)");
24280Sstevel@tonic-gate 		exit(1);
24290Sstevel@tonic-gate 	}
24300Sstevel@tonic-gate 
24310Sstevel@tonic-gate 	bzero(&laddr, sizeof (laddr));
24320Sstevel@tonic-gate 	laddr.ss_family = af;
24330Sstevel@tonic-gate 
24340Sstevel@tonic-gate 	if (af == AF_INET) {
24350Sstevel@tonic-gate 		sin = (struct sockaddr_in *)&laddr;
24360Sstevel@tonic-gate 		sin->sin_port = htons(MPATHD_PORT);
24370Sstevel@tonic-gate 		sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
24380Sstevel@tonic-gate 		len = sizeof (struct sockaddr_in);
24390Sstevel@tonic-gate 	} else {
24400Sstevel@tonic-gate 		sin6 = (struct sockaddr_in6 *)&laddr;
24410Sstevel@tonic-gate 		sin6->sin6_port = htons(MPATHD_PORT);
24420Sstevel@tonic-gate 		sin6->sin6_addr = loopback_addr;
24430Sstevel@tonic-gate 		len = sizeof (struct sockaddr_in6);
24440Sstevel@tonic-gate 	}
24450Sstevel@tonic-gate 
24460Sstevel@tonic-gate 	ret = bind(sock, (struct sockaddr *)&laddr, len);
24470Sstevel@tonic-gate 	if (ret < 0) {
24480Sstevel@tonic-gate 		if (errno == EADDRINUSE) {
24490Sstevel@tonic-gate 			/*
24500Sstevel@tonic-gate 			 * Another instance of mpathd may be already active.
24510Sstevel@tonic-gate 			 */
24520Sstevel@tonic-gate 			logerr("main: is another instance of in.mpathd "
24530Sstevel@tonic-gate 			    "already active?\n");
24540Sstevel@tonic-gate 			exit(1);
24550Sstevel@tonic-gate 		} else {
24560Sstevel@tonic-gate 			(void) close(sock);
24570Sstevel@tonic-gate 			return (-1);
24580Sstevel@tonic-gate 		}
24590Sstevel@tonic-gate 	}
24600Sstevel@tonic-gate 	if (listen(sock, 30) < 0) {
24610Sstevel@tonic-gate 		logperror("main: listen");
24620Sstevel@tonic-gate 		exit(1);
24630Sstevel@tonic-gate 	}
24640Sstevel@tonic-gate 	if (poll_add(sock) == -1) {
24650Sstevel@tonic-gate 		(void) close(sock);
24660Sstevel@tonic-gate 		exit(1);
24670Sstevel@tonic-gate 	}
24680Sstevel@tonic-gate 
24690Sstevel@tonic-gate 	return (sock);
24700Sstevel@tonic-gate }
24710Sstevel@tonic-gate 
24720Sstevel@tonic-gate /*
24730Sstevel@tonic-gate  * Table of commands and their expected size; used by loopback_cmd().
24740Sstevel@tonic-gate  */
24750Sstevel@tonic-gate static struct {
24760Sstevel@tonic-gate 	const char	*name;
24770Sstevel@tonic-gate 	unsigned int	size;
24780Sstevel@tonic-gate } commands[] = {
24790Sstevel@tonic-gate 	{ "MI_PING",		sizeof (uint32_t)	},
24800Sstevel@tonic-gate 	{ "MI_OFFLINE",		sizeof (mi_offline_t)	},
24810Sstevel@tonic-gate 	{ "MI_UNDO_OFFLINE",	sizeof (mi_undo_offline_t) },
24820Sstevel@tonic-gate 	{ "MI_SETOINDEX",	sizeof (mi_setoindex_t) },
24830Sstevel@tonic-gate 	{ "MI_QUERY",		sizeof (mi_query_t)	}
24840Sstevel@tonic-gate };
24850Sstevel@tonic-gate 
24860Sstevel@tonic-gate /*
24870Sstevel@tonic-gate  * Commands received over the loopback interface come here. Currently
24880Sstevel@tonic-gate  * the agents that send commands are ifconfig, if_mpadm and the RCM IPMP
24890Sstevel@tonic-gate  * module. ifconfig only makes a connection, and closes it to check if
24900Sstevel@tonic-gate  * in.mpathd is running.
24910Sstevel@tonic-gate  * if_mpadm sends commands in the format specified by the mpathd_interface
24920Sstevel@tonic-gate  * structure.
24930Sstevel@tonic-gate  */
24940Sstevel@tonic-gate static void
24950Sstevel@tonic-gate loopback_cmd(int sock, int family)
24960Sstevel@tonic-gate {
24970Sstevel@tonic-gate 	int newfd;
24980Sstevel@tonic-gate 	ssize_t len;
24990Sstevel@tonic-gate 	struct sockaddr_storage	peer;
25000Sstevel@tonic-gate 	struct sockaddr_in	*peer_sin;
25010Sstevel@tonic-gate 	struct sockaddr_in6	*peer_sin6;
25020Sstevel@tonic-gate 	socklen_t peerlen;
25030Sstevel@tonic-gate 	union mi_commands mpi;
25040Sstevel@tonic-gate 	struct in6_addr loopback_addr = IN6ADDR_LOOPBACK_INIT;
25050Sstevel@tonic-gate 	char abuf[INET6_ADDRSTRLEN];
25060Sstevel@tonic-gate 	uint_t cmd;
25070Sstevel@tonic-gate 	int retval;
25080Sstevel@tonic-gate 
25090Sstevel@tonic-gate 	peerlen = sizeof (peer);
25100Sstevel@tonic-gate 	newfd = accept(sock, (struct sockaddr *)&peer, &peerlen);
25110Sstevel@tonic-gate 	if (newfd < 0) {
25120Sstevel@tonic-gate 		logperror("loopback_cmd: accept");
25130Sstevel@tonic-gate 		return;
25140Sstevel@tonic-gate 	}
25150Sstevel@tonic-gate 
25160Sstevel@tonic-gate 	switch (family) {
25170Sstevel@tonic-gate 	case AF_INET:
25180Sstevel@tonic-gate 		/*
25190Sstevel@tonic-gate 		 * Validate the address and port to make sure that
25200Sstevel@tonic-gate 		 * non privileged processes don't connect and start
25210Sstevel@tonic-gate 		 * talking to us.
25220Sstevel@tonic-gate 		 */
25230Sstevel@tonic-gate 		if (peerlen != sizeof (struct sockaddr_in)) {
25240Sstevel@tonic-gate 			logerr("loopback_cmd: AF_INET peerlen %d\n", peerlen);
25250Sstevel@tonic-gate 			(void) close(newfd);
25260Sstevel@tonic-gate 			return;
25270Sstevel@tonic-gate 		}
25280Sstevel@tonic-gate 		peer_sin = (struct sockaddr_in *)&peer;
25290Sstevel@tonic-gate 		if ((ntohs(peer_sin->sin_port) >= IPPORT_RESERVED) ||
25300Sstevel@tonic-gate 		    (ntohl(peer_sin->sin_addr.s_addr) != INADDR_LOOPBACK)) {
25310Sstevel@tonic-gate 			(void) inet_ntop(AF_INET, &peer_sin->sin_addr.s_addr,
25320Sstevel@tonic-gate 			    abuf, sizeof (abuf));
25330Sstevel@tonic-gate 			logerr("Attempt to connect from addr %s port %d\n",
25340Sstevel@tonic-gate 			    abuf, ntohs(peer_sin->sin_port));
25350Sstevel@tonic-gate 			(void) close(newfd);
25360Sstevel@tonic-gate 			return;
25370Sstevel@tonic-gate 		}
25380Sstevel@tonic-gate 		break;
25390Sstevel@tonic-gate 
25400Sstevel@tonic-gate 	case AF_INET6:
25410Sstevel@tonic-gate 		if (peerlen != sizeof (struct sockaddr_in6)) {
25420Sstevel@tonic-gate 			logerr("loopback_cmd: AF_INET6 peerlen %d\n", peerlen);
25430Sstevel@tonic-gate 			(void) close(newfd);
25440Sstevel@tonic-gate 			return;
25450Sstevel@tonic-gate 		}
25460Sstevel@tonic-gate 		/*
25470Sstevel@tonic-gate 		 * Validate the address and port to make sure that
25480Sstevel@tonic-gate 		 * non privileged processes don't connect and start
25490Sstevel@tonic-gate 		 * talking to us.
25500Sstevel@tonic-gate 		 */
25510Sstevel@tonic-gate 		peer_sin6 = (struct sockaddr_in6 *)&peer;
25520Sstevel@tonic-gate 		if ((ntohs(peer_sin6->sin6_port) >= IPPORT_RESERVED) ||
25530Sstevel@tonic-gate 		    (!IN6_ARE_ADDR_EQUAL(&peer_sin6->sin6_addr,
25540Sstevel@tonic-gate 		    &loopback_addr))) {
25550Sstevel@tonic-gate 			(void) inet_ntop(AF_INET6, &peer_sin6->sin6_addr, abuf,
25560Sstevel@tonic-gate 			    sizeof (abuf));
25570Sstevel@tonic-gate 			logerr("Attempt to connect from addr %s port %d\n",
25580Sstevel@tonic-gate 			    abuf, ntohs(peer_sin6->sin6_port));
25590Sstevel@tonic-gate 			(void) close(newfd);
25600Sstevel@tonic-gate 			return;
25610Sstevel@tonic-gate 		}
25620Sstevel@tonic-gate 
25630Sstevel@tonic-gate 	default:
25640Sstevel@tonic-gate 		logdebug("loopback_cmd: family %d\n", family);
25650Sstevel@tonic-gate 		(void) close(newfd);
25660Sstevel@tonic-gate 		return;
25670Sstevel@tonic-gate 	}
25680Sstevel@tonic-gate 
25690Sstevel@tonic-gate 	/*
25700Sstevel@tonic-gate 	 * The sizeof the 'mpi' buffer corresponds to the maximum size of
25710Sstevel@tonic-gate 	 * all supported commands
25720Sstevel@tonic-gate 	 */
25730Sstevel@tonic-gate 	len = read(newfd, &mpi, sizeof (mpi));
25740Sstevel@tonic-gate 
25750Sstevel@tonic-gate 	/*
25760Sstevel@tonic-gate 	 * ifconfig does not send any data. Just tests to see if mpathd
25770Sstevel@tonic-gate 	 * is already running.
25780Sstevel@tonic-gate 	 */
25790Sstevel@tonic-gate 	if (len <= 0) {
25800Sstevel@tonic-gate 		(void) close(newfd);
25810Sstevel@tonic-gate 		return;
25820Sstevel@tonic-gate 	}
25830Sstevel@tonic-gate 
25840Sstevel@tonic-gate 	/*
25850Sstevel@tonic-gate 	 * In theory, we can receive any sized message for a stream socket,
25860Sstevel@tonic-gate 	 * but we don't expect that to happen for a small message over a
25870Sstevel@tonic-gate 	 * loopback connection.
25880Sstevel@tonic-gate 	 */
25890Sstevel@tonic-gate 	if (len < sizeof (uint32_t)) {
25900Sstevel@tonic-gate 		logerr("loopback_cmd: bad command format or read returns "
25910Sstevel@tonic-gate 		    "partial data %d\n", len);
25920Sstevel@tonic-gate 	}
25930Sstevel@tonic-gate 
25940Sstevel@tonic-gate 	cmd = mpi.mi_command;
25950Sstevel@tonic-gate 	if (cmd >= MI_NCMD) {
25960Sstevel@tonic-gate 		logerr("loopback_cmd: unknown command id `%d'\n", cmd);
25970Sstevel@tonic-gate 		(void) close(newfd);
25980Sstevel@tonic-gate 		return;
25990Sstevel@tonic-gate 	}
26000Sstevel@tonic-gate 
26010Sstevel@tonic-gate 	if (len < commands[cmd].size) {
26020Sstevel@tonic-gate 		logerr("loopback_cmd: short %s command (expected %d, got %d)\n",
26030Sstevel@tonic-gate 		    commands[cmd].name, commands[cmd].size, len);
26040Sstevel@tonic-gate 		(void) close(newfd);
26050Sstevel@tonic-gate 		return;
26060Sstevel@tonic-gate 	}
26070Sstevel@tonic-gate 
26080Sstevel@tonic-gate 	retval = process_cmd(newfd, &mpi);
26090Sstevel@tonic-gate 	if (retval != IPMP_SUCCESS) {
26100Sstevel@tonic-gate 		logerr("failed processing %s: %s\n", commands[cmd].name,
26110Sstevel@tonic-gate 		    ipmp_errmsg(retval));
26120Sstevel@tonic-gate 	}
26130Sstevel@tonic-gate 	(void) close(newfd);
26140Sstevel@tonic-gate }
26150Sstevel@tonic-gate 
26160Sstevel@tonic-gate extern int global_errno;	/* set by failover() or failback() */
26170Sstevel@tonic-gate 
26180Sstevel@tonic-gate /*
26190Sstevel@tonic-gate  * Process the offline, undo offline and set original index commands,
26200Sstevel@tonic-gate  * received from if_mpadm(1M)
26210Sstevel@tonic-gate  */
26220Sstevel@tonic-gate static unsigned int
26230Sstevel@tonic-gate process_cmd(int newfd, union mi_commands *mpi)
26240Sstevel@tonic-gate {
26250Sstevel@tonic-gate 	uint_t	nif = 0;
26260Sstevel@tonic-gate 	uint32_t cmd;
26270Sstevel@tonic-gate 	struct phyint *pi;
26280Sstevel@tonic-gate 	struct phyint *pi2;
26290Sstevel@tonic-gate 	struct phyint_group *pg;
26300Sstevel@tonic-gate 	boolean_t success;
26310Sstevel@tonic-gate 	int error;
26320Sstevel@tonic-gate 	struct mi_offline *mio;
26330Sstevel@tonic-gate 	struct mi_undo_offline *miu;
26340Sstevel@tonic-gate 	struct lifreq lifr;
26350Sstevel@tonic-gate 	int ifsock;
26360Sstevel@tonic-gate 	struct mi_setoindex *mis;
26370Sstevel@tonic-gate 
26380Sstevel@tonic-gate 	cmd = mpi->mi_command;
26390Sstevel@tonic-gate 
26400Sstevel@tonic-gate 	switch (cmd) {
26410Sstevel@tonic-gate 	case MI_OFFLINE:
26420Sstevel@tonic-gate 		mio = &mpi->mi_ocmd;
26430Sstevel@tonic-gate 		/*
26440Sstevel@tonic-gate 		 * Lookup the interface that needs to be offlined.
26450Sstevel@tonic-gate 		 * If it does not exist, return a suitable error.
26460Sstevel@tonic-gate 		 */
26470Sstevel@tonic-gate 		pi = phyint_lookup(mio->mio_ifname);
26480Sstevel@tonic-gate 		if (pi == NULL)
26490Sstevel@tonic-gate 			return (send_result(newfd, IPMP_FAILURE, EINVAL));
26500Sstevel@tonic-gate 
26510Sstevel@tonic-gate 		/*
26520Sstevel@tonic-gate 		 * Verify that the minimum redundancy requirements are met.
26530Sstevel@tonic-gate 		 * The multipathing group must have at least the specified
26540Sstevel@tonic-gate 		 * number of functional interfaces after offlining the
26550Sstevel@tonic-gate 		 * requested interface. Otherwise return a suitable error.
26560Sstevel@tonic-gate 		 */
26570Sstevel@tonic-gate 		pg = pi->pi_group;
26580Sstevel@tonic-gate 		nif = 0;
26590Sstevel@tonic-gate 		if (pg != phyint_anongroup) {
26600Sstevel@tonic-gate 			for (nif = 0, pi2 = pg->pg_phyint; pi2 != NULL;
26610Sstevel@tonic-gate 			    pi2 = pi2->pi_pgnext) {
26620Sstevel@tonic-gate 				if ((pi2->pi_state == PI_RUNNING) ||
26630Sstevel@tonic-gate 				    (pg->pg_groupfailed &&
26640Sstevel@tonic-gate 				    !(pi2->pi_flags & IFF_OFFLINE)))
26650Sstevel@tonic-gate 					nif++;
26660Sstevel@tonic-gate 			}
26670Sstevel@tonic-gate 		}
26680Sstevel@tonic-gate 		if (nif < mio->mio_min_redundancy)
26690Sstevel@tonic-gate 			return (send_result(newfd, IPMP_EMINRED, 0));
26700Sstevel@tonic-gate 
26710Sstevel@tonic-gate 		/*
26720Sstevel@tonic-gate 		 * The order of operation is to set IFF_OFFLINE, followed by
26730Sstevel@tonic-gate 		 * failover. Setting IFF_OFFLINE ensures that no new ipif's
26740Sstevel@tonic-gate 		 * can be created. Subsequent failover moves everything on
26750Sstevel@tonic-gate 		 * the OFFLINE interface to some other functional interface.
26760Sstevel@tonic-gate 		 */
26770Sstevel@tonic-gate 		success = change_lif_flags(pi, IFF_OFFLINE, _B_TRUE);
26780Sstevel@tonic-gate 		if (success) {
26790Sstevel@tonic-gate 			if (!pi->pi_empty) {
26800Sstevel@tonic-gate 				error = try_failover(pi, FAILOVER_NORMAL);
26810Sstevel@tonic-gate 				if (error != 0) {
26820Sstevel@tonic-gate 					if (!change_lif_flags(pi, IFF_OFFLINE,
26830Sstevel@tonic-gate 					    _B_FALSE)) {
26840Sstevel@tonic-gate 						logerr("process_cmd: couldn't"
26850Sstevel@tonic-gate 						    " clear OFFLINE flag on"
26860Sstevel@tonic-gate 						    " %s\n", pi->pi_name);
26870Sstevel@tonic-gate 						/*
26880Sstevel@tonic-gate 						 * Offline interfaces should
26890Sstevel@tonic-gate 						 * not be probed.
26900Sstevel@tonic-gate 						 */
26910Sstevel@tonic-gate 						stop_probing(pi);
26920Sstevel@tonic-gate 					}
26930Sstevel@tonic-gate 					return (send_result(newfd, error,
26940Sstevel@tonic-gate 					    global_errno));
26950Sstevel@tonic-gate 				}
26960Sstevel@tonic-gate 			}
26970Sstevel@tonic-gate 		} else {
26980Sstevel@tonic-gate 			return (send_result(newfd, IPMP_FAILURE, errno));
26990Sstevel@tonic-gate 		}
27000Sstevel@tonic-gate 
27010Sstevel@tonic-gate 		/*
27020Sstevel@tonic-gate 		 * The interface is now Offline, so stop probing it.
27030Sstevel@tonic-gate 		 * Note that if_mpadm(1M) will down the test addresses,
27040Sstevel@tonic-gate 		 * after receiving a success reply from us. The routing
27050Sstevel@tonic-gate 		 * socket message will then make us close the socket used
27060Sstevel@tonic-gate 		 * for sending probes. But it is more logical that an
27070Sstevel@tonic-gate 		 * offlined interface must not be probed, even if it has
27080Sstevel@tonic-gate 		 * test addresses.
27090Sstevel@tonic-gate 		 */
27100Sstevel@tonic-gate 		stop_probing(pi);
27110Sstevel@tonic-gate 		return (send_result(newfd, IPMP_SUCCESS, 0));
27120Sstevel@tonic-gate 
27130Sstevel@tonic-gate 	case MI_UNDO_OFFLINE:
27140Sstevel@tonic-gate 		miu = &mpi->mi_ucmd;
27150Sstevel@tonic-gate 		/*
27160Sstevel@tonic-gate 		 * Undo the offline command. As usual lookup the interface.
2717*2496Smeem 		 * Send an error if it does not exist or is not offline.
27180Sstevel@tonic-gate 		 */
27190Sstevel@tonic-gate 		pi = phyint_lookup(miu->miu_ifname);
2720*2496Smeem 		if (pi == NULL || pi->pi_state != PI_OFFLINE)
27210Sstevel@tonic-gate 			return (send_result(newfd, IPMP_FAILURE, EINVAL));
27220Sstevel@tonic-gate 
27230Sstevel@tonic-gate 		/*
2724*2496Smeem 		 * Reset the state of the interface based on the current link
2725*2496Smeem 		 * state; if this phyint subsequently acquires a test address,
2726*2496Smeem 		 * the state will be updated later as a result of the probes.
27270Sstevel@tonic-gate 		 */
2728*2496Smeem 		if (LINK_UP(pi))
2729*2496Smeem 			phyint_chstate(pi, PI_RUNNING);
2730*2496Smeem 		else
2731*2496Smeem 			phyint_chstate(pi, PI_FAILED);
2732*2496Smeem 
2733*2496Smeem 		if (pi->pi_state == PI_RUNNING) {
27340Sstevel@tonic-gate 			/*
2735*2496Smeem 			 * Note that the success of MI_UNDO_OFFLINE is not
2736*2496Smeem 			 * contingent on actually failing back; in the odd
2737*2496Smeem 			 * case where we cannot do it here, we will try again
2738*2496Smeem 			 * in initifs() since pi->pi_full will still be zero.
27390Sstevel@tonic-gate 			 */
2740*2496Smeem 			if (do_failback(pi) != IPMP_SUCCESS) {
2741*2496Smeem 				logdebug("process_cmd: cannot failback from "
2742*2496Smeem 				    "%s during MI_UNDO_OFFLINE\n", pi->pi_name);
2743*2496Smeem 			}
27440Sstevel@tonic-gate 		}
2745*2496Smeem 
2746*2496Smeem 		/*
2747*2496Smeem 		 * Clear the IFF_OFFLINE flag.  We have to do this last
2748*2496Smeem 		 * because do_failback() relies on it being set to decide
2749*2496Smeem 		 * when to display messages.
2750*2496Smeem 		 */
2751*2496Smeem 		(void) change_lif_flags(pi, IFF_OFFLINE, _B_FALSE);
2752*2496Smeem 
2753*2496Smeem 		return (send_result(newfd, IPMP_SUCCESS, 0));
27540Sstevel@tonic-gate 
27550Sstevel@tonic-gate 	case MI_SETOINDEX:
27560Sstevel@tonic-gate 		mis = &mpi->mi_scmd;
27570Sstevel@tonic-gate 
27580Sstevel@tonic-gate 		/* Get the socket for doing ioctls */
27590Sstevel@tonic-gate 		ifsock = (mis->mis_iftype == AF_INET) ? ifsock_v4 : ifsock_v6;
27600Sstevel@tonic-gate 
27610Sstevel@tonic-gate 		/*
27620Sstevel@tonic-gate 		 * Get index of new original interface.
27630Sstevel@tonic-gate 		 * The index is returned in lifr.lifr_index.
27640Sstevel@tonic-gate 		 */
27650Sstevel@tonic-gate 		(void) strlcpy(lifr.lifr_name, mis->mis_new_pifname,
27660Sstevel@tonic-gate 		    sizeof (lifr.lifr_name));
27670Sstevel@tonic-gate 
27680Sstevel@tonic-gate 		if (ioctl(ifsock, SIOCGLIFINDEX, (char *)&lifr) < 0)
27690Sstevel@tonic-gate 			return (send_result(newfd, IPMP_FAILURE, errno));
27700Sstevel@tonic-gate 
27710Sstevel@tonic-gate 		/*
27720Sstevel@tonic-gate 		 * Set new original interface index.
27730Sstevel@tonic-gate 		 * The new index was put into lifr.lifr_index by the
27740Sstevel@tonic-gate 		 * SIOCGLIFINDEX ioctl.
27750Sstevel@tonic-gate 		 */
27760Sstevel@tonic-gate 		(void) strlcpy(lifr.lifr_name, mis->mis_lifname,
27770Sstevel@tonic-gate 		    sizeof (lifr.lifr_name));
27780Sstevel@tonic-gate 
27790Sstevel@tonic-gate 		if (ioctl(ifsock, SIOCSLIFOINDEX, (char *)&lifr) < 0)
27800Sstevel@tonic-gate 			return (send_result(newfd, IPMP_FAILURE, errno));
27810Sstevel@tonic-gate 
27820Sstevel@tonic-gate 		return (send_result(newfd, IPMP_SUCCESS, 0));
27830Sstevel@tonic-gate 
27840Sstevel@tonic-gate 	case MI_QUERY:
27850Sstevel@tonic-gate 		return (process_query(newfd, &mpi->mi_qcmd));
27860Sstevel@tonic-gate 
27870Sstevel@tonic-gate 	default:
27880Sstevel@tonic-gate 		break;
27890Sstevel@tonic-gate 	}
27900Sstevel@tonic-gate 
27910Sstevel@tonic-gate 	return (send_result(newfd, IPMP_EPROTO, 0));
27920Sstevel@tonic-gate }
27930Sstevel@tonic-gate 
27940Sstevel@tonic-gate /*
27950Sstevel@tonic-gate  * Process the query request pointed to by `miq' and send a reply on file
27960Sstevel@tonic-gate  * descriptor `fd'.  Returns an IPMP error code.
27970Sstevel@tonic-gate  */
27980Sstevel@tonic-gate static unsigned int
27990Sstevel@tonic-gate process_query(int fd, mi_query_t *miq)
28000Sstevel@tonic-gate {
28010Sstevel@tonic-gate 	ipmp_groupinfo_t	*grinfop;
28020Sstevel@tonic-gate 	ipmp_groupinfolist_t	*grlp;
28030Sstevel@tonic-gate 	ipmp_grouplist_t	*grlistp;
28040Sstevel@tonic-gate 	ipmp_ifinfo_t		*ifinfop;
28050Sstevel@tonic-gate 	ipmp_ifinfolist_t	*iflp;
28060Sstevel@tonic-gate 	ipmp_snap_t		*snap;
28070Sstevel@tonic-gate 	unsigned int		retval;
28080Sstevel@tonic-gate 
28090Sstevel@tonic-gate 	switch (miq->miq_inforeq) {
28100Sstevel@tonic-gate 	case IPMP_GROUPLIST:
28110Sstevel@tonic-gate 		retval = getgrouplist(&grlistp);
28120Sstevel@tonic-gate 		if (retval != IPMP_SUCCESS)
28130Sstevel@tonic-gate 			return (send_result(fd, retval, errno));
28140Sstevel@tonic-gate 
28150Sstevel@tonic-gate 		retval = send_result(fd, IPMP_SUCCESS, 0);
28160Sstevel@tonic-gate 		if (retval == IPMP_SUCCESS)
28170Sstevel@tonic-gate 			retval = send_grouplist(fd, grlistp);
28180Sstevel@tonic-gate 
28190Sstevel@tonic-gate 		ipmp_freegrouplist(grlistp);
28200Sstevel@tonic-gate 		return (retval);
28210Sstevel@tonic-gate 
28220Sstevel@tonic-gate 	case IPMP_GROUPINFO:
28230Sstevel@tonic-gate 		miq->miq_grname[LIFGRNAMSIZ - 1] = '\0';
28240Sstevel@tonic-gate 		retval = getgroupinfo(miq->miq_ifname, &grinfop);
28250Sstevel@tonic-gate 		if (retval != IPMP_SUCCESS)
28260Sstevel@tonic-gate 			return (send_result(fd, retval, errno));
28270Sstevel@tonic-gate 
28280Sstevel@tonic-gate 		retval = send_result(fd, IPMP_SUCCESS, 0);
28290Sstevel@tonic-gate 		if (retval == IPMP_SUCCESS)
28300Sstevel@tonic-gate 			retval = send_groupinfo(fd, grinfop);
28310Sstevel@tonic-gate 
28320Sstevel@tonic-gate 		ipmp_freegroupinfo(grinfop);
28330Sstevel@tonic-gate 		return (retval);
28340Sstevel@tonic-gate 
28350Sstevel@tonic-gate 	case IPMP_IFINFO:
28360Sstevel@tonic-gate 		miq->miq_ifname[LIFNAMSIZ - 1] = '\0';
28370Sstevel@tonic-gate 		retval = getifinfo(miq->miq_ifname, &ifinfop);
28380Sstevel@tonic-gate 		if (retval != IPMP_SUCCESS)
28390Sstevel@tonic-gate 			return (send_result(fd, retval, errno));
28400Sstevel@tonic-gate 
28410Sstevel@tonic-gate 		retval = send_result(fd, IPMP_SUCCESS, 0);
28420Sstevel@tonic-gate 		if (retval == IPMP_SUCCESS)
28430Sstevel@tonic-gate 			retval = send_ifinfo(fd, ifinfop);
28440Sstevel@tonic-gate 
28450Sstevel@tonic-gate 		ipmp_freeifinfo(ifinfop);
28460Sstevel@tonic-gate 		return (retval);
28470Sstevel@tonic-gate 
28480Sstevel@tonic-gate 	case IPMP_SNAP:
28490Sstevel@tonic-gate 		retval = getsnap(&snap);
28500Sstevel@tonic-gate 		if (retval != IPMP_SUCCESS)
28510Sstevel@tonic-gate 			return (send_result(fd, retval, errno));
28520Sstevel@tonic-gate 
28530Sstevel@tonic-gate 		retval = send_result(fd, IPMP_SUCCESS, 0);
28540Sstevel@tonic-gate 		if (retval != IPMP_SUCCESS)
28550Sstevel@tonic-gate 			goto out;
28560Sstevel@tonic-gate 
28570Sstevel@tonic-gate 		retval = ipmp_writetlv(fd, IPMP_SNAP, sizeof (*snap), snap);
28580Sstevel@tonic-gate 		if (retval != IPMP_SUCCESS)
28590Sstevel@tonic-gate 			goto out;
28600Sstevel@tonic-gate 
28610Sstevel@tonic-gate 		retval = send_grouplist(fd, snap->sn_grlistp);
28620Sstevel@tonic-gate 		if (retval != IPMP_SUCCESS)
28630Sstevel@tonic-gate 			goto out;
28640Sstevel@tonic-gate 
28650Sstevel@tonic-gate 		iflp = snap->sn_ifinfolistp;
28660Sstevel@tonic-gate 		for (; iflp != NULL; iflp = iflp->ifl_next) {
28670Sstevel@tonic-gate 			retval = send_ifinfo(fd, iflp->ifl_ifinfop);
28680Sstevel@tonic-gate 			if (retval != IPMP_SUCCESS)
28690Sstevel@tonic-gate 				goto out;
28700Sstevel@tonic-gate 		}
28710Sstevel@tonic-gate 
28720Sstevel@tonic-gate 		grlp = snap->sn_grinfolistp;
28730Sstevel@tonic-gate 		for (; grlp != NULL; grlp = grlp->grl_next) {
28740Sstevel@tonic-gate 			retval = send_groupinfo(fd, grlp->grl_grinfop);
28750Sstevel@tonic-gate 			if (retval != IPMP_SUCCESS)
28760Sstevel@tonic-gate 				goto out;
28770Sstevel@tonic-gate 		}
28780Sstevel@tonic-gate 	out:
28790Sstevel@tonic-gate 		ipmp_snap_free(snap);
28800Sstevel@tonic-gate 		return (retval);
28810Sstevel@tonic-gate 
28820Sstevel@tonic-gate 	default:
28830Sstevel@tonic-gate 		break;
28840Sstevel@tonic-gate 
28850Sstevel@tonic-gate 	}
28860Sstevel@tonic-gate 	return (send_result(fd, IPMP_EPROTO, 0));
28870Sstevel@tonic-gate }
28880Sstevel@tonic-gate 
28890Sstevel@tonic-gate /*
28900Sstevel@tonic-gate  * Send the group information pointed to by `grinfop' on file descriptor `fd'.
28910Sstevel@tonic-gate  * Returns an IPMP error code.
28920Sstevel@tonic-gate  */
28930Sstevel@tonic-gate static unsigned int
28940Sstevel@tonic-gate send_groupinfo(int fd, ipmp_groupinfo_t *grinfop)
28950Sstevel@tonic-gate {
28960Sstevel@tonic-gate 	ipmp_iflist_t	*iflistp = grinfop->gr_iflistp;
28970Sstevel@tonic-gate 	unsigned int	retval;
28980Sstevel@tonic-gate 
28990Sstevel@tonic-gate 	retval = ipmp_writetlv(fd, IPMP_GROUPINFO, sizeof (*grinfop), grinfop);
29000Sstevel@tonic-gate 	if (retval != IPMP_SUCCESS)
29010Sstevel@tonic-gate 		return (retval);
29020Sstevel@tonic-gate 
29030Sstevel@tonic-gate 	return (ipmp_writetlv(fd, IPMP_IFLIST,
29040Sstevel@tonic-gate 	    IPMP_IFLIST_SIZE(iflistp->il_nif), iflistp));
29050Sstevel@tonic-gate }
29060Sstevel@tonic-gate 
29070Sstevel@tonic-gate /*
29080Sstevel@tonic-gate  * Send the interface information pointed to by `ifinfop' on file descriptor
29090Sstevel@tonic-gate  * `fd'.  Returns an IPMP error code.
29100Sstevel@tonic-gate  */
29110Sstevel@tonic-gate static unsigned int
29120Sstevel@tonic-gate send_ifinfo(int fd, ipmp_ifinfo_t *ifinfop)
29130Sstevel@tonic-gate {
29140Sstevel@tonic-gate 	return (ipmp_writetlv(fd, IPMP_IFINFO, sizeof (*ifinfop), ifinfop));
29150Sstevel@tonic-gate }
29160Sstevel@tonic-gate 
29170Sstevel@tonic-gate /*
29180Sstevel@tonic-gate  * Send the group list pointed to by `grlistp' on file descriptor `fd'.
29190Sstevel@tonic-gate  * Returns an IPMP error code.
29200Sstevel@tonic-gate  */
29210Sstevel@tonic-gate static unsigned int
29220Sstevel@tonic-gate send_grouplist(int fd, ipmp_grouplist_t *grlistp)
29230Sstevel@tonic-gate {
29240Sstevel@tonic-gate 	return (ipmp_writetlv(fd, IPMP_GROUPLIST,
29250Sstevel@tonic-gate 	    IPMP_GROUPLIST_SIZE(grlistp->gl_ngroup), grlistp));
29260Sstevel@tonic-gate }
29270Sstevel@tonic-gate 
29280Sstevel@tonic-gate /*
29290Sstevel@tonic-gate  * Initialize an mi_result_t structure using `error' and `syserror' and
29300Sstevel@tonic-gate  * send it on file descriptor `fd'.  Returns an IPMP error code.
29310Sstevel@tonic-gate  */
29320Sstevel@tonic-gate static unsigned int
29330Sstevel@tonic-gate send_result(int fd, unsigned int error, int syserror)
29340Sstevel@tonic-gate {
29350Sstevel@tonic-gate 	mi_result_t me;
29360Sstevel@tonic-gate 
29370Sstevel@tonic-gate 	me.me_mpathd_error = error;
29380Sstevel@tonic-gate 	if (error == IPMP_FAILURE)
29390Sstevel@tonic-gate 		me.me_sys_error = syserror;
29400Sstevel@tonic-gate 	else
29410Sstevel@tonic-gate 		me.me_sys_error = 0;
29420Sstevel@tonic-gate 
29430Sstevel@tonic-gate 	return (ipmp_write(fd, &me, sizeof (me)));
29440Sstevel@tonic-gate }
29450Sstevel@tonic-gate 
29460Sstevel@tonic-gate /*
29470Sstevel@tonic-gate  * Daemonize the process.
29480Sstevel@tonic-gate  */
29490Sstevel@tonic-gate static boolean_t
29500Sstevel@tonic-gate daemonize(void)
29510Sstevel@tonic-gate {
29520Sstevel@tonic-gate 	switch (fork()) {
29530Sstevel@tonic-gate 	case -1:
29540Sstevel@tonic-gate 		return (_B_FALSE);
29550Sstevel@tonic-gate 
29560Sstevel@tonic-gate 	case  0:
29570Sstevel@tonic-gate 		/*
29580Sstevel@tonic-gate 		 * Lose our controlling terminal, and become both a session
29590Sstevel@tonic-gate 		 * leader and a process group leader.
29600Sstevel@tonic-gate 		 */
29610Sstevel@tonic-gate 		if (setsid() == -1)
29620Sstevel@tonic-gate 			return (_B_FALSE);
29630Sstevel@tonic-gate 
29640Sstevel@tonic-gate 		/*
29650Sstevel@tonic-gate 		 * Under POSIX, a session leader can accidentally (through
29660Sstevel@tonic-gate 		 * open(2)) acquire a controlling terminal if it does not
29670Sstevel@tonic-gate 		 * have one.  Just to be safe, fork() again so we are not a
29680Sstevel@tonic-gate 		 * session leader.
29690Sstevel@tonic-gate 		 */
29700Sstevel@tonic-gate 		switch (fork()) {
29710Sstevel@tonic-gate 		case -1:
29720Sstevel@tonic-gate 			return (_B_FALSE);
29730Sstevel@tonic-gate 
29740Sstevel@tonic-gate 		case 0:
29750Sstevel@tonic-gate 			(void) chdir("/");
29760Sstevel@tonic-gate 			(void) umask(022);
29770Sstevel@tonic-gate 			(void) fdwalk(closefunc, NULL);
29780Sstevel@tonic-gate 			break;
29790Sstevel@tonic-gate 
29800Sstevel@tonic-gate 		default:
29810Sstevel@tonic-gate 			_exit(EXIT_SUCCESS);
29820Sstevel@tonic-gate 		}
29830Sstevel@tonic-gate 		break;
29840Sstevel@tonic-gate 
29850Sstevel@tonic-gate 	default:
29860Sstevel@tonic-gate 		_exit(EXIT_SUCCESS);
29870Sstevel@tonic-gate 	}
29880Sstevel@tonic-gate 
29890Sstevel@tonic-gate 	return (_B_TRUE);
29900Sstevel@tonic-gate }
29910Sstevel@tonic-gate 
29920Sstevel@tonic-gate /*
29930Sstevel@tonic-gate  * The parent has created some fds before forking on purpose, keep them open.
29940Sstevel@tonic-gate  */
29950Sstevel@tonic-gate static int
29960Sstevel@tonic-gate closefunc(void *not_used, int fd)
29970Sstevel@tonic-gate /* ARGSUSED */
29980Sstevel@tonic-gate {
29990Sstevel@tonic-gate 	if (fd != lsock_v4 && fd != lsock_v6)
30000Sstevel@tonic-gate 		(void) close(fd);
30010Sstevel@tonic-gate 	return (0);
30020Sstevel@tonic-gate }
30030Sstevel@tonic-gate 
30040Sstevel@tonic-gate /* LOGGER */
30050Sstevel@tonic-gate 
30060Sstevel@tonic-gate #include <syslog.h>
30070Sstevel@tonic-gate 
30080Sstevel@tonic-gate /*
30090Sstevel@tonic-gate  * Logging routines.  All routines log to syslog, unless the daemon is
30100Sstevel@tonic-gate  * running in the foreground, in which case the logging goes to stderr.
30110Sstevel@tonic-gate  *
30120Sstevel@tonic-gate  * The following routines are available:
30130Sstevel@tonic-gate  *
30140Sstevel@tonic-gate  *	logdebug(): A printf-like function for outputting debug messages
30150Sstevel@tonic-gate  *	(messages at LOG_DEBUG) that are only of use to developers.
30160Sstevel@tonic-gate  *
30170Sstevel@tonic-gate  *	logtrace(): A printf-like function for outputting tracing messages
30180Sstevel@tonic-gate  *	(messages at LOG_INFO) from the daemon.	 This is typically used
30190Sstevel@tonic-gate  *	to log the receipt of interesting network-related conditions.
30200Sstevel@tonic-gate  *
30210Sstevel@tonic-gate  *	logerr(): A printf-like function for outputting error messages
30220Sstevel@tonic-gate  *	(messages at LOG_ERR) from the daemon.
30230Sstevel@tonic-gate  *
30240Sstevel@tonic-gate  *	logperror*(): A set of functions used to output error messages
30250Sstevel@tonic-gate  *	(messages at LOG_ERR); these automatically append strerror(errno)
30260Sstevel@tonic-gate  *	and a newline to the message passed to them.
30270Sstevel@tonic-gate  *
30280Sstevel@tonic-gate  * NOTE: since the logging functions write to syslog, the messages passed
30290Sstevel@tonic-gate  *	 to them are not eligible for localization.  Thus, gettext() must
30300Sstevel@tonic-gate  *	 *not* be used.
30310Sstevel@tonic-gate  */
30320Sstevel@tonic-gate 
30330Sstevel@tonic-gate static int logging = 0;
30340Sstevel@tonic-gate 
30350Sstevel@tonic-gate static void
30360Sstevel@tonic-gate initlog(void)
30370Sstevel@tonic-gate {
30380Sstevel@tonic-gate 	logging++;
30390Sstevel@tonic-gate 	openlog("in.mpathd", LOG_PID | LOG_CONS, LOG_DAEMON);
30400Sstevel@tonic-gate }
30410Sstevel@tonic-gate 
30420Sstevel@tonic-gate /* PRINTFLIKE1 */
30430Sstevel@tonic-gate void
30440Sstevel@tonic-gate logerr(char *fmt, ...)
30450Sstevel@tonic-gate {
30460Sstevel@tonic-gate 	va_list ap;
30470Sstevel@tonic-gate 
30480Sstevel@tonic-gate 	va_start(ap, fmt);
30490Sstevel@tonic-gate 
30500Sstevel@tonic-gate 	if (logging)
30510Sstevel@tonic-gate 		vsyslog(LOG_ERR, fmt, ap);
30520Sstevel@tonic-gate 	else
30530Sstevel@tonic-gate 		(void) vfprintf(stderr, fmt, ap);
30540Sstevel@tonic-gate 	va_end(ap);
30550Sstevel@tonic-gate }
30560Sstevel@tonic-gate 
30570Sstevel@tonic-gate /* PRINTFLIKE1 */
30580Sstevel@tonic-gate void
30590Sstevel@tonic-gate logtrace(char *fmt, ...)
30600Sstevel@tonic-gate {
30610Sstevel@tonic-gate 	va_list ap;
30620Sstevel@tonic-gate 
30630Sstevel@tonic-gate 	va_start(ap, fmt);
30640Sstevel@tonic-gate 
30650Sstevel@tonic-gate 	if (logging)
30660Sstevel@tonic-gate 		vsyslog(LOG_INFO, fmt, ap);
30670Sstevel@tonic-gate 	else
30680Sstevel@tonic-gate 		(void) vfprintf(stderr, fmt, ap);
30690Sstevel@tonic-gate 	va_end(ap);
30700Sstevel@tonic-gate }
30710Sstevel@tonic-gate 
30720Sstevel@tonic-gate /* PRINTFLIKE1 */
30730Sstevel@tonic-gate void
30740Sstevel@tonic-gate logdebug(char *fmt, ...)
30750Sstevel@tonic-gate {
30760Sstevel@tonic-gate 	va_list ap;
30770Sstevel@tonic-gate 
30780Sstevel@tonic-gate 	va_start(ap, fmt);
30790Sstevel@tonic-gate 
30800Sstevel@tonic-gate 	if (logging)
30810Sstevel@tonic-gate 		vsyslog(LOG_DEBUG, fmt, ap);
30820Sstevel@tonic-gate 	else
30830Sstevel@tonic-gate 		(void) vfprintf(stderr, fmt, ap);
30840Sstevel@tonic-gate 	va_end(ap);
30850Sstevel@tonic-gate }
30860Sstevel@tonic-gate 
30870Sstevel@tonic-gate /* PRINTFLIKE1 */
30880Sstevel@tonic-gate void
30890Sstevel@tonic-gate logperror(char *str)
30900Sstevel@tonic-gate {
30910Sstevel@tonic-gate 	if (logging)
30920Sstevel@tonic-gate 		syslog(LOG_ERR, "%s: %m\n", str);
30930Sstevel@tonic-gate 	else
30940Sstevel@tonic-gate 		(void) fprintf(stderr, "%s: %s\n", str, strerror(errno));
30950Sstevel@tonic-gate }
30960Sstevel@tonic-gate 
30970Sstevel@tonic-gate void
30980Sstevel@tonic-gate logperror_pii(struct phyint_instance *pii, char *str)
30990Sstevel@tonic-gate {
31000Sstevel@tonic-gate 	if (logging) {
31010Sstevel@tonic-gate 		syslog(LOG_ERR, "%s (%s %s): %m\n",
31020Sstevel@tonic-gate 		    str, AF_STR(pii->pii_af), pii->pii_phyint->pi_name);
31030Sstevel@tonic-gate 	} else {
31040Sstevel@tonic-gate 		(void) fprintf(stderr, "%s (%s %s): %s\n",
31050Sstevel@tonic-gate 		    str, AF_STR(pii->pii_af), pii->pii_phyint->pi_name,
31060Sstevel@tonic-gate 		    strerror(errno));
31070Sstevel@tonic-gate 	}
31080Sstevel@tonic-gate }
31090Sstevel@tonic-gate 
31100Sstevel@tonic-gate void
31110Sstevel@tonic-gate logperror_li(struct logint *li, char *str)
31120Sstevel@tonic-gate {
31130Sstevel@tonic-gate 	struct	phyint_instance	*pii = li->li_phyint_inst;
31140Sstevel@tonic-gate 
31150Sstevel@tonic-gate 	if (logging) {
31160Sstevel@tonic-gate 		syslog(LOG_ERR, "%s (%s %s): %m\n",
31170Sstevel@tonic-gate 		    str, AF_STR(pii->pii_af), li->li_name);
31180Sstevel@tonic-gate 	} else {
31190Sstevel@tonic-gate 		(void) fprintf(stderr, "%s (%s %s): %s\n",
31200Sstevel@tonic-gate 		    str, AF_STR(pii->pii_af), li->li_name,
31210Sstevel@tonic-gate 		    strerror(errno));
31220Sstevel@tonic-gate 	}
31230Sstevel@tonic-gate }
31240Sstevel@tonic-gate 
31250Sstevel@tonic-gate void
31260Sstevel@tonic-gate close_probe_socket(struct phyint_instance *pii, boolean_t polled)
31270Sstevel@tonic-gate {
31280Sstevel@tonic-gate 	if (polled)
31290Sstevel@tonic-gate 		(void) poll_remove(pii->pii_probe_sock);
31300Sstevel@tonic-gate 	(void) close(pii->pii_probe_sock);
31310Sstevel@tonic-gate 	pii->pii_probe_sock = -1;
31320Sstevel@tonic-gate 	pii->pii_basetime_inited = 0;
31330Sstevel@tonic-gate }
3134