xref: /onnv-gate/usr/src/cmd/cmd-inet/usr.lib/in.mpathd/mpd_tables.c (revision 4262:6c2e7bc181f4)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
52074Smeem  * Common Development and Distribution License (the "License").
62074Smeem  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
22*4262Smeem  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
230Sstevel@tonic-gate  * Use is subject to license terms.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate 
260Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
270Sstevel@tonic-gate 
280Sstevel@tonic-gate #include "mpd_defs.h"
290Sstevel@tonic-gate #include "mpd_tables.h"
300Sstevel@tonic-gate 
310Sstevel@tonic-gate /*
320Sstevel@tonic-gate  * Global list of phyints, phyint instances, phyint groups and the anonymous
330Sstevel@tonic-gate  * group; the latter is initialized in phyint_init().
340Sstevel@tonic-gate  */
350Sstevel@tonic-gate struct phyint *phyints = NULL;
360Sstevel@tonic-gate struct phyint_instance	*phyint_instances = NULL;
370Sstevel@tonic-gate struct phyint_group *phyint_groups = NULL;
380Sstevel@tonic-gate struct phyint_group *phyint_anongroup;
390Sstevel@tonic-gate 
400Sstevel@tonic-gate /*
410Sstevel@tonic-gate  * Grouplist signature; initialized in phyint_init().
420Sstevel@tonic-gate  */
430Sstevel@tonic-gate static uint64_t phyint_grouplistsig;
440Sstevel@tonic-gate 
450Sstevel@tonic-gate static void phyint_inst_insert(struct phyint_instance *pii);
460Sstevel@tonic-gate static void phyint_inst_print(struct phyint_instance *pii);
470Sstevel@tonic-gate 
480Sstevel@tonic-gate static void phyint_insert(struct phyint *pi, struct phyint_group *pg);
490Sstevel@tonic-gate static void phyint_delete(struct phyint *pi);
500Sstevel@tonic-gate 
510Sstevel@tonic-gate static void phyint_group_insert(struct phyint_group *pg);
520Sstevel@tonic-gate static void phyint_group_delete(struct phyint_group *pg);
530Sstevel@tonic-gate static struct phyint_group *phyint_group_lookup(const char *pg_name);
540Sstevel@tonic-gate static struct phyint_group *phyint_group_create(const char *pg_name);
550Sstevel@tonic-gate 
560Sstevel@tonic-gate static void logint_print(struct logint *li);
570Sstevel@tonic-gate static void logint_insert(struct phyint_instance *pii, struct logint *li);
580Sstevel@tonic-gate static struct logint *logint_lookup(struct phyint_instance *pii, char *li_name);
590Sstevel@tonic-gate 
600Sstevel@tonic-gate static void target_print(struct target *tg);
610Sstevel@tonic-gate static void target_insert(struct phyint_instance *pii, struct target *tg);
620Sstevel@tonic-gate static struct target *target_first(struct phyint_instance *pii);
630Sstevel@tonic-gate static struct target *target_select_best(struct phyint_instance *pii);
640Sstevel@tonic-gate static void target_flush_hosts(struct phyint_group *pg);
650Sstevel@tonic-gate 
660Sstevel@tonic-gate static void reset_pii_probes(struct phyint_instance *pii, struct target *tg);
670Sstevel@tonic-gate 
680Sstevel@tonic-gate static boolean_t phyint_inst_v6_sockinit(struct phyint_instance *pii);
690Sstevel@tonic-gate static boolean_t phyint_inst_v4_sockinit(struct phyint_instance *pii);
700Sstevel@tonic-gate 
710Sstevel@tonic-gate static void ip_index_to_mask_v6(uint_t masklen, struct in6_addr *bitmask);
720Sstevel@tonic-gate static boolean_t prefix_equal(struct in6_addr p1, struct in6_addr p2,
730Sstevel@tonic-gate     int prefix_len);
740Sstevel@tonic-gate 
750Sstevel@tonic-gate static int phyint_state_event(struct phyint_group *pg, struct phyint *pi);
760Sstevel@tonic-gate static int phyint_group_state_event(struct phyint_group *pg);
770Sstevel@tonic-gate static int phyint_group_change_event(struct phyint_group *pg, ipmp_group_op_t);
780Sstevel@tonic-gate static int phyint_group_member_event(struct phyint_group *pg, struct phyint *pi,
790Sstevel@tonic-gate     ipmp_if_op_t op);
800Sstevel@tonic-gate 
810Sstevel@tonic-gate static uint64_t gensig(void);
820Sstevel@tonic-gate 
830Sstevel@tonic-gate /* Initialize any per-file global state.  Returns 0 on success, -1 on failure */
840Sstevel@tonic-gate int
850Sstevel@tonic-gate phyint_init(void)
860Sstevel@tonic-gate {
870Sstevel@tonic-gate 	phyint_grouplistsig = gensig();
880Sstevel@tonic-gate 	if (track_all_phyints) {
890Sstevel@tonic-gate 		phyint_anongroup = phyint_group_create("");
900Sstevel@tonic-gate 		if (phyint_anongroup == NULL)
910Sstevel@tonic-gate 			return (-1);
920Sstevel@tonic-gate 		phyint_group_insert(phyint_anongroup);
930Sstevel@tonic-gate 	}
940Sstevel@tonic-gate 	return (0);
950Sstevel@tonic-gate }
960Sstevel@tonic-gate 
970Sstevel@tonic-gate /* Return the phyint with the given name */
980Sstevel@tonic-gate struct phyint *
990Sstevel@tonic-gate phyint_lookup(const char *name)
1000Sstevel@tonic-gate {
1010Sstevel@tonic-gate 	struct phyint *pi;
1020Sstevel@tonic-gate 
1030Sstevel@tonic-gate 	if (debug & D_PHYINT)
1040Sstevel@tonic-gate 		logdebug("phyint_lookup(%s)\n", name);
1050Sstevel@tonic-gate 
1060Sstevel@tonic-gate 	for (pi = phyints; pi != NULL; pi = pi->pi_next) {
1070Sstevel@tonic-gate 		if (strncmp(pi->pi_name, name, sizeof (pi->pi_name)) == 0)
1080Sstevel@tonic-gate 			break;
1090Sstevel@tonic-gate 	}
1100Sstevel@tonic-gate 	return (pi);
1110Sstevel@tonic-gate }
1120Sstevel@tonic-gate 
1130Sstevel@tonic-gate /* Return the phyint instance with the given name and the given family */
1140Sstevel@tonic-gate struct phyint_instance *
1150Sstevel@tonic-gate phyint_inst_lookup(int af, char *name)
1160Sstevel@tonic-gate {
1170Sstevel@tonic-gate 	struct phyint *pi;
1180Sstevel@tonic-gate 
1190Sstevel@tonic-gate 	if (debug & D_PHYINT)
1200Sstevel@tonic-gate 		logdebug("phyint_inst_lookup(%s %s)\n", AF_STR(af), name);
1210Sstevel@tonic-gate 
1220Sstevel@tonic-gate 	assert(af == AF_INET || af == AF_INET6);
1230Sstevel@tonic-gate 
1240Sstevel@tonic-gate 	pi = phyint_lookup(name);
1250Sstevel@tonic-gate 	if (pi == NULL)
1260Sstevel@tonic-gate 		return (NULL);
1270Sstevel@tonic-gate 
1280Sstevel@tonic-gate 	return (PHYINT_INSTANCE(pi, af));
1290Sstevel@tonic-gate }
1300Sstevel@tonic-gate 
1310Sstevel@tonic-gate static struct phyint_group *
1320Sstevel@tonic-gate phyint_group_lookup(const char *pg_name)
1330Sstevel@tonic-gate {
1340Sstevel@tonic-gate 	struct phyint_group *pg;
1350Sstevel@tonic-gate 
1360Sstevel@tonic-gate 	if (debug & D_PHYINT)
1370Sstevel@tonic-gate 		logdebug("phyint_group_lookup(%s)\n", pg_name);
1380Sstevel@tonic-gate 
1390Sstevel@tonic-gate 	for (pg = phyint_groups; pg != NULL; pg = pg->pg_next) {
1400Sstevel@tonic-gate 		if (strncmp(pg->pg_name, pg_name, sizeof (pg->pg_name)) == 0)
1410Sstevel@tonic-gate 			break;
1420Sstevel@tonic-gate 	}
1430Sstevel@tonic-gate 	return (pg);
1440Sstevel@tonic-gate }
1450Sstevel@tonic-gate 
1460Sstevel@tonic-gate /*
1470Sstevel@tonic-gate  * Insert the phyint in the linked list of all phyints. If the phyint belongs
1480Sstevel@tonic-gate  * to some group, insert it in the phyint group list.
1490Sstevel@tonic-gate  */
1500Sstevel@tonic-gate static void
1510Sstevel@tonic-gate phyint_insert(struct phyint *pi, struct phyint_group *pg)
1520Sstevel@tonic-gate {
1530Sstevel@tonic-gate 	if (debug & D_PHYINT)
1540Sstevel@tonic-gate 		logdebug("phyint_insert(%s '%s')\n", pi->pi_name, pg->pg_name);
1550Sstevel@tonic-gate 
1560Sstevel@tonic-gate 	/* Insert the phyint at the head of the 'all phyints' list */
1570Sstevel@tonic-gate 	pi->pi_next = phyints;
1580Sstevel@tonic-gate 	pi->pi_prev = NULL;
1590Sstevel@tonic-gate 	if (phyints != NULL)
1600Sstevel@tonic-gate 		phyints->pi_prev = pi;
1610Sstevel@tonic-gate 	phyints = pi;
1620Sstevel@tonic-gate 
1630Sstevel@tonic-gate 	/*
1640Sstevel@tonic-gate 	 * Insert the phyint at the head of the 'phyint_group members' list
1650Sstevel@tonic-gate 	 * of the phyint group to which it belongs.
1660Sstevel@tonic-gate 	 */
1670Sstevel@tonic-gate 	pi->pi_pgnext = NULL;
1680Sstevel@tonic-gate 	pi->pi_pgprev = NULL;
1690Sstevel@tonic-gate 	pi->pi_group = pg;
1700Sstevel@tonic-gate 
1710Sstevel@tonic-gate 	pi->pi_pgnext = pg->pg_phyint;
1720Sstevel@tonic-gate 	if (pi->pi_pgnext != NULL)
1730Sstevel@tonic-gate 		pi->pi_pgnext->pi_pgprev = pi;
1740Sstevel@tonic-gate 	pg->pg_phyint = pi;
1750Sstevel@tonic-gate 
1760Sstevel@tonic-gate 	pg->pg_sig++;
1770Sstevel@tonic-gate 	(void) phyint_group_member_event(pg, pi, IPMP_IF_ADD);
1780Sstevel@tonic-gate }
1790Sstevel@tonic-gate 
1800Sstevel@tonic-gate /* Insert the phyint instance in the linked list of all phyint instances. */
1810Sstevel@tonic-gate static void
1820Sstevel@tonic-gate phyint_inst_insert(struct phyint_instance *pii)
1830Sstevel@tonic-gate {
1840Sstevel@tonic-gate 	if (debug & D_PHYINT) {
1850Sstevel@tonic-gate 		logdebug("phyint_inst_insert(%s %s)\n",
1860Sstevel@tonic-gate 		    AF_STR(pii->pii_af), pii->pii_name);
1870Sstevel@tonic-gate 	}
1880Sstevel@tonic-gate 
1890Sstevel@tonic-gate 	/*
1900Sstevel@tonic-gate 	 * Insert the phyint at the head of the 'all phyint instances' list.
1910Sstevel@tonic-gate 	 */
1920Sstevel@tonic-gate 	pii->pii_next = phyint_instances;
1930Sstevel@tonic-gate 	pii->pii_prev = NULL;
1940Sstevel@tonic-gate 	if (phyint_instances != NULL)
1950Sstevel@tonic-gate 		phyint_instances->pii_prev = pii;
1960Sstevel@tonic-gate 	phyint_instances = pii;
1970Sstevel@tonic-gate }
1980Sstevel@tonic-gate 
1990Sstevel@tonic-gate /*
2000Sstevel@tonic-gate  * Create a new phyint with the given parameters. Also insert it into
2010Sstevel@tonic-gate  * the list of all phyints and the list of phyint group members by calling
2020Sstevel@tonic-gate  * phyint_insert().
2030Sstevel@tonic-gate  */
2040Sstevel@tonic-gate static struct phyint *
2050Sstevel@tonic-gate phyint_create(char *pi_name, struct phyint_group *pg, uint_t ifindex,
2060Sstevel@tonic-gate     uint64_t flags)
2070Sstevel@tonic-gate {
2080Sstevel@tonic-gate 	struct phyint *pi;
2090Sstevel@tonic-gate 
2100Sstevel@tonic-gate 	pi = calloc(1, sizeof (struct phyint));
2110Sstevel@tonic-gate 	if (pi == NULL) {
2120Sstevel@tonic-gate 		logperror("phyint_create: calloc");
2130Sstevel@tonic-gate 		return (NULL);
2140Sstevel@tonic-gate 	}
2150Sstevel@tonic-gate 
2160Sstevel@tonic-gate 	/*
2170Sstevel@tonic-gate 	 * Record the phyint values. Also insert the phyint into the
2180Sstevel@tonic-gate 	 * phyint group by calling phyint_insert().
2190Sstevel@tonic-gate 	 */
2200Sstevel@tonic-gate 	(void) strncpy(pi->pi_name, pi_name, sizeof (pi->pi_name));
2210Sstevel@tonic-gate 	pi->pi_name[sizeof (pi->pi_name) - 1] = '\0';
2220Sstevel@tonic-gate 	pi->pi_ifindex = ifindex;
2230Sstevel@tonic-gate 	pi->pi_icmpid =
2240Sstevel@tonic-gate 	    htons(((getpid() & 0xFF) << 8) | (pi->pi_ifindex & 0xFF));
2250Sstevel@tonic-gate 	/*
2260Sstevel@tonic-gate 	 * We optimistically start in the PI_RUNNING state.  Later (in
2270Sstevel@tonic-gate 	 * process_link_state_changes()), we will readjust this to match the
2280Sstevel@tonic-gate 	 * current state of the link.  Further, if test addresses are
2290Sstevel@tonic-gate 	 * subsequently assigned, we will transition to PI_NOTARGETS and then
2300Sstevel@tonic-gate 	 * either PI_RUNNING or PI_FAILED, depending on the result of the test
2310Sstevel@tonic-gate 	 * probes.
2320Sstevel@tonic-gate 	 */
2330Sstevel@tonic-gate 	pi->pi_state = PI_RUNNING;
2340Sstevel@tonic-gate 	pi->pi_flags = PHYINT_FLAGS(flags);
2350Sstevel@tonic-gate 	/*
2360Sstevel@tonic-gate 	 * Initialise the link state.  The link state is initialised to
2370Sstevel@tonic-gate 	 * up, so that if the link is down when IPMP starts monitoring
2380Sstevel@tonic-gate 	 * the interface, it will appear as though there has been a
2390Sstevel@tonic-gate 	 * transition from the link up to link down.  This avoids
2400Sstevel@tonic-gate 	 * having to treat this situation as a special case.
2410Sstevel@tonic-gate 	 */
2420Sstevel@tonic-gate 	INIT_LINK_STATE(pi);
2430Sstevel@tonic-gate 
2440Sstevel@tonic-gate 	/*
2450Sstevel@tonic-gate 	 * Insert the phyint in the list of all phyints, and the
2460Sstevel@tonic-gate 	 * list of phyint group members
2470Sstevel@tonic-gate 	 */
2480Sstevel@tonic-gate 	phyint_insert(pi, pg);
2490Sstevel@tonic-gate 
2500Sstevel@tonic-gate 	/*
2510Sstevel@tonic-gate 	 * If we are joining a failed group, mark the interface as
2520Sstevel@tonic-gate 	 * failed.
2530Sstevel@tonic-gate 	 */
2540Sstevel@tonic-gate 	if (GROUP_FAILED(pg))
2550Sstevel@tonic-gate 		(void) change_lif_flags(pi, IFF_FAILED, _B_TRUE);
2560Sstevel@tonic-gate 
2570Sstevel@tonic-gate 	return (pi);
2580Sstevel@tonic-gate }
2590Sstevel@tonic-gate 
2600Sstevel@tonic-gate /*
2610Sstevel@tonic-gate  * Create a new phyint instance belonging to the phyint 'pi' and address
2620Sstevel@tonic-gate  * family 'af'. Also insert it into the list of all phyint instances by
2630Sstevel@tonic-gate  * calling phyint_inst_insert().
2640Sstevel@tonic-gate  */
2650Sstevel@tonic-gate static struct phyint_instance *
2660Sstevel@tonic-gate phyint_inst_create(struct phyint *pi, int af)
2670Sstevel@tonic-gate {
2680Sstevel@tonic-gate 	struct phyint_instance *pii;
2690Sstevel@tonic-gate 
2700Sstevel@tonic-gate 	pii = calloc(1, sizeof (struct phyint_instance));
2710Sstevel@tonic-gate 	if (pii == NULL) {
2720Sstevel@tonic-gate 		logperror("phyint_inst_create: calloc");
2730Sstevel@tonic-gate 		return (NULL);
2740Sstevel@tonic-gate 	}
2750Sstevel@tonic-gate 
2760Sstevel@tonic-gate 	/*
2770Sstevel@tonic-gate 	 * Attach the phyint instance to the phyint.
2780Sstevel@tonic-gate 	 * Set the back pointers as well
2790Sstevel@tonic-gate 	 */
2800Sstevel@tonic-gate 	pii->pii_phyint = pi;
2810Sstevel@tonic-gate 	if (af == AF_INET)
2820Sstevel@tonic-gate 		pi->pi_v4 = pii;
2830Sstevel@tonic-gate 	else
2840Sstevel@tonic-gate 		pi->pi_v6 = pii;
2850Sstevel@tonic-gate 
2860Sstevel@tonic-gate 	pii->pii_in_use = 1;
2870Sstevel@tonic-gate 	pii->pii_probe_sock = -1;
2880Sstevel@tonic-gate 	pii->pii_snxt = 1;
2890Sstevel@tonic-gate 	pii->pii_af = af;
2900Sstevel@tonic-gate 	pii->pii_fd_hrtime = gethrtime() +
2910Sstevel@tonic-gate 	    (FAILURE_DETECTION_QP * (hrtime_t)NANOSEC);
2920Sstevel@tonic-gate 	pii->pii_flags = pi->pi_flags;
2930Sstevel@tonic-gate 
2940Sstevel@tonic-gate 	/* Insert the phyint instance in the list of all phyint instances. */
2950Sstevel@tonic-gate 	phyint_inst_insert(pii);
2960Sstevel@tonic-gate 	return (pii);
2970Sstevel@tonic-gate }
2980Sstevel@tonic-gate 
2990Sstevel@tonic-gate /*
3000Sstevel@tonic-gate  * Change the state of phyint `pi' to state `state'.
3010Sstevel@tonic-gate  */
3020Sstevel@tonic-gate void
3030Sstevel@tonic-gate phyint_chstate(struct phyint *pi, enum pi_state state)
3040Sstevel@tonic-gate {
3050Sstevel@tonic-gate 	/*
3060Sstevel@tonic-gate 	 * To simplify things, some callers always set a given state
3070Sstevel@tonic-gate 	 * regardless of the previous state of the phyint (e.g., setting
3080Sstevel@tonic-gate 	 * PI_RUNNING when it's already set).  We shouldn't bother
3090Sstevel@tonic-gate 	 * generating an event or consuming a signature for these, since
3100Sstevel@tonic-gate 	 * the actual state of the interface is unchanged.
3110Sstevel@tonic-gate 	 */
3120Sstevel@tonic-gate 	if (pi->pi_state == state)
3130Sstevel@tonic-gate 		return;
3140Sstevel@tonic-gate 
3150Sstevel@tonic-gate 	pi->pi_state = state;
3160Sstevel@tonic-gate 	pi->pi_group->pg_sig++;
3170Sstevel@tonic-gate 	(void) phyint_state_event(pi->pi_group, pi);
3180Sstevel@tonic-gate }
3190Sstevel@tonic-gate 
3200Sstevel@tonic-gate /*
3210Sstevel@tonic-gate  * Note that the type of phyint `pi' has changed.
3220Sstevel@tonic-gate  */
3230Sstevel@tonic-gate void
3240Sstevel@tonic-gate phyint_newtype(struct phyint *pi)
3250Sstevel@tonic-gate {
3260Sstevel@tonic-gate 	pi->pi_group->pg_sig++;
3270Sstevel@tonic-gate 	(void) phyint_state_event(pi->pi_group, pi);
3280Sstevel@tonic-gate }
3290Sstevel@tonic-gate 
3300Sstevel@tonic-gate /*
3310Sstevel@tonic-gate  * Insert the phyint group in the linked list of all phyint groups
3320Sstevel@tonic-gate  * at the head of the list
3330Sstevel@tonic-gate  */
3340Sstevel@tonic-gate static void
3350Sstevel@tonic-gate phyint_group_insert(struct phyint_group *pg)
3360Sstevel@tonic-gate {
3370Sstevel@tonic-gate 	pg->pg_next = phyint_groups;
3380Sstevel@tonic-gate 	pg->pg_prev = NULL;
3390Sstevel@tonic-gate 	if (phyint_groups != NULL)
3400Sstevel@tonic-gate 		phyint_groups->pg_prev = pg;
3410Sstevel@tonic-gate 	phyint_groups = pg;
3420Sstevel@tonic-gate 
3430Sstevel@tonic-gate 	phyint_grouplistsig++;
3440Sstevel@tonic-gate 	(void) phyint_group_change_event(pg, IPMP_GROUP_ADD);
3450Sstevel@tonic-gate }
3460Sstevel@tonic-gate 
3470Sstevel@tonic-gate /*
3480Sstevel@tonic-gate  * Create a new phyint group called 'name'.
3490Sstevel@tonic-gate  */
3500Sstevel@tonic-gate static struct phyint_group *
3510Sstevel@tonic-gate phyint_group_create(const char *name)
3520Sstevel@tonic-gate {
3530Sstevel@tonic-gate 	struct	phyint_group *pg;
3540Sstevel@tonic-gate 
3550Sstevel@tonic-gate 	if (debug & D_PHYINT)
3560Sstevel@tonic-gate 		logdebug("phyint_group_create(%s)\n", name);
3570Sstevel@tonic-gate 
3580Sstevel@tonic-gate 	pg = calloc(1, sizeof (struct phyint_group));
3590Sstevel@tonic-gate 	if (pg == NULL) {
3600Sstevel@tonic-gate 		logperror("phyint_group_create: calloc");
3610Sstevel@tonic-gate 		return (NULL);
3620Sstevel@tonic-gate 	}
3630Sstevel@tonic-gate 
3640Sstevel@tonic-gate 	(void) strncpy(pg->pg_name, name, sizeof (pg->pg_name));
3650Sstevel@tonic-gate 	pg->pg_name[sizeof (pg->pg_name) - 1] = '\0';
3660Sstevel@tonic-gate 	pg->pg_sig = gensig();
3670Sstevel@tonic-gate 
3680Sstevel@tonic-gate 	pg->pg_fdt = user_failure_detection_time;
3690Sstevel@tonic-gate 	pg->pg_probeint = user_probe_interval;
3700Sstevel@tonic-gate 
3710Sstevel@tonic-gate 	return (pg);
3720Sstevel@tonic-gate }
3730Sstevel@tonic-gate 
3740Sstevel@tonic-gate /*
3750Sstevel@tonic-gate  * Change the state of the phyint group `pg' to state `state'.
3760Sstevel@tonic-gate  */
3770Sstevel@tonic-gate void
3780Sstevel@tonic-gate phyint_group_chstate(struct phyint_group *pg, enum pg_state state)
3790Sstevel@tonic-gate {
3800Sstevel@tonic-gate 	assert(pg != phyint_anongroup);
3810Sstevel@tonic-gate 
3820Sstevel@tonic-gate 	switch (state) {
3830Sstevel@tonic-gate 	case PG_FAILED:
3840Sstevel@tonic-gate 		pg->pg_groupfailed = 1;
3850Sstevel@tonic-gate 
3860Sstevel@tonic-gate 		/*
3870Sstevel@tonic-gate 		 * We can never know with certainty that a group has
3880Sstevel@tonic-gate 		 * failed.  It is possible that all known targets have
3890Sstevel@tonic-gate 		 * failed simultaneously, and new targets have come up
3900Sstevel@tonic-gate 		 * instead. If the targets are routers then router
3910Sstevel@tonic-gate 		 * discovery will kick in, and we will see the new routers
3920Sstevel@tonic-gate 		 * thru routing socket messages. But if the targets are
3930Sstevel@tonic-gate 		 * hosts, we have to discover it by multicast.	So flush
3940Sstevel@tonic-gate 		 * all the host targets. The next probe will send out a
3950Sstevel@tonic-gate 		 * multicast echo request. If this is a group failure, we
3960Sstevel@tonic-gate 		 * will still not see any response, otherwise we will
3970Sstevel@tonic-gate 		 * clear the pg_groupfailed flag after we get
3980Sstevel@tonic-gate 		 * NUM_PROBE_REPAIRS consecutive unicast replies on any
3990Sstevel@tonic-gate 		 * phyint.
4000Sstevel@tonic-gate 		 */
4010Sstevel@tonic-gate 		target_flush_hosts(pg);
4020Sstevel@tonic-gate 		break;
4030Sstevel@tonic-gate 
4040Sstevel@tonic-gate 	case PG_RUNNING:
4050Sstevel@tonic-gate 		pg->pg_groupfailed = 0;
4060Sstevel@tonic-gate 		break;
4070Sstevel@tonic-gate 
4080Sstevel@tonic-gate 	default:
4090Sstevel@tonic-gate 		logerr("phyint_group_chstate: invalid group state %d; "
4100Sstevel@tonic-gate 		    "aborting\n", state);
4110Sstevel@tonic-gate 		abort();
4120Sstevel@tonic-gate 	}
4130Sstevel@tonic-gate 
4140Sstevel@tonic-gate 	pg->pg_sig++;
4150Sstevel@tonic-gate 	(void) phyint_group_state_event(pg);
4160Sstevel@tonic-gate }
4170Sstevel@tonic-gate 
4180Sstevel@tonic-gate /*
4190Sstevel@tonic-gate  * Create a new phyint instance and initialize it from the values supplied by
4200Sstevel@tonic-gate  * the kernel. Always check for ENXIO before logging any error, because the
4210Sstevel@tonic-gate  * interface could have vanished after completion of SIOCGLIFCONF.
4220Sstevel@tonic-gate  * Return values:
4230Sstevel@tonic-gate  *	pointer to the phyint instance on success
4240Sstevel@tonic-gate  *	NULL on failure Eg. if the phyint instance is not found in the kernel
4250Sstevel@tonic-gate  */
4260Sstevel@tonic-gate struct phyint_instance *
4270Sstevel@tonic-gate phyint_inst_init_from_k(int af, char *pi_name)
4280Sstevel@tonic-gate {
4290Sstevel@tonic-gate 	char	pg_name[LIFNAMSIZ + 1];
4300Sstevel@tonic-gate 	int	ifsock;
4310Sstevel@tonic-gate 	uint_t	ifindex;
4320Sstevel@tonic-gate 	uint64_t	flags;
4330Sstevel@tonic-gate 	struct lifreq	lifr;
4340Sstevel@tonic-gate 	struct phyint	*pi;
4350Sstevel@tonic-gate 	struct phyint_instance	*pii;
4360Sstevel@tonic-gate 	boolean_t	pg_created;
4370Sstevel@tonic-gate 	boolean_t	pi_created;
4380Sstevel@tonic-gate 	struct phyint_group	*pg;
4390Sstevel@tonic-gate 
4400Sstevel@tonic-gate retry:
4410Sstevel@tonic-gate 	pii = NULL;
4420Sstevel@tonic-gate 	pi = NULL;
4430Sstevel@tonic-gate 	pg = NULL;
4440Sstevel@tonic-gate 	pi_created = _B_FALSE;
4450Sstevel@tonic-gate 	pg_created = _B_FALSE;
4460Sstevel@tonic-gate 
4470Sstevel@tonic-gate 	if (debug & D_PHYINT) {
4480Sstevel@tonic-gate 		logdebug("phyint_inst_init_from_k(%s %s)\n",
4490Sstevel@tonic-gate 		    AF_STR(af), pi_name);
4500Sstevel@tonic-gate 	}
4510Sstevel@tonic-gate 
4520Sstevel@tonic-gate 	assert(af == AF_INET || af == AF_INET6);
4530Sstevel@tonic-gate 
4540Sstevel@tonic-gate 	/* Get the socket for doing ioctls */
4550Sstevel@tonic-gate 	ifsock = (af == AF_INET) ? ifsock_v4 : ifsock_v6;
4560Sstevel@tonic-gate 
4570Sstevel@tonic-gate 	/*
4580Sstevel@tonic-gate 	 * Get the interface flags. Ignore loopback and multipoint
4590Sstevel@tonic-gate 	 * interfaces.
4600Sstevel@tonic-gate 	 */
4610Sstevel@tonic-gate 	(void) strncpy(lifr.lifr_name, pi_name, sizeof (lifr.lifr_name));
4620Sstevel@tonic-gate 	lifr.lifr_name[sizeof (lifr.lifr_name) - 1] = '\0';
4630Sstevel@tonic-gate 	if (ioctl(ifsock, SIOCGLIFFLAGS, (char *)&lifr) < 0) {
4640Sstevel@tonic-gate 		if (errno != ENXIO) {
4650Sstevel@tonic-gate 			logperror("phyint_inst_init_from_k:"
4660Sstevel@tonic-gate 			    " ioctl (get flags)");
4670Sstevel@tonic-gate 		}
4680Sstevel@tonic-gate 		return (NULL);
4690Sstevel@tonic-gate 	}
4700Sstevel@tonic-gate 	flags = lifr.lifr_flags;
4710Sstevel@tonic-gate 	if (!(flags & IFF_MULTICAST) || (flags & IFF_LOOPBACK))
4720Sstevel@tonic-gate 		return (NULL);
4730Sstevel@tonic-gate 
4740Sstevel@tonic-gate 	/*
4750Sstevel@tonic-gate 	 * Get the ifindex for recording later in our tables, in case we need
4760Sstevel@tonic-gate 	 * to create a new phyint.
4770Sstevel@tonic-gate 	 */
4780Sstevel@tonic-gate 	if (ioctl(ifsock, SIOCGLIFINDEX, (char *)&lifr) < 0) {
4790Sstevel@tonic-gate 		if (errno != ENXIO) {
4800Sstevel@tonic-gate 			logperror("phyint_inst_init_from_k: "
4810Sstevel@tonic-gate 			    " ioctl (get lifindex)");
4820Sstevel@tonic-gate 		}
4830Sstevel@tonic-gate 		return (NULL);
4840Sstevel@tonic-gate 	}
4850Sstevel@tonic-gate 	ifindex = lifr.lifr_index;
4860Sstevel@tonic-gate 
4870Sstevel@tonic-gate 	/*
4880Sstevel@tonic-gate 	 * Get the phyint group name of this phyint, from the kernel.
4890Sstevel@tonic-gate 	 */
4900Sstevel@tonic-gate 	if (ioctl(ifsock, SIOCGLIFGROUPNAME, (char *)&lifr) < 0) {
4910Sstevel@tonic-gate 		if (errno != ENXIO) {
4920Sstevel@tonic-gate 			logperror("phyint_inst_init_from_k: "
4930Sstevel@tonic-gate 			    "ioctl (get group name)");
4940Sstevel@tonic-gate 		}
4950Sstevel@tonic-gate 		return (NULL);
4960Sstevel@tonic-gate 	}
4970Sstevel@tonic-gate 	(void) strncpy(pg_name, lifr.lifr_groupname, sizeof (pg_name));
4980Sstevel@tonic-gate 	pg_name[sizeof (pg_name) - 1] = '\0';
4990Sstevel@tonic-gate 
5000Sstevel@tonic-gate 	/*
5010Sstevel@tonic-gate 	 * If the phyint is not part of any group, pg_name is the
5020Sstevel@tonic-gate 	 * null string. If 'track_all_phyints' is false, there is no
5030Sstevel@tonic-gate 	 * need to create a phyint.
5040Sstevel@tonic-gate 	 */
5050Sstevel@tonic-gate 	if (pg_name[0] == '\0' && !track_all_phyints) {
5060Sstevel@tonic-gate 		/*
5070Sstevel@tonic-gate 		 * If the IFF_FAILED or IFF_OFFLINE flags are set, reset
5080Sstevel@tonic-gate 		 * them. These flags shouldn't be set if IPMP isn't
5090Sstevel@tonic-gate 		 * tracking the interface.
5100Sstevel@tonic-gate 		 */
5110Sstevel@tonic-gate 		if ((flags & (IFF_FAILED | IFF_OFFLINE)) != 0) {
5120Sstevel@tonic-gate 			lifr.lifr_flags = flags & ~(IFF_FAILED | IFF_OFFLINE);
5130Sstevel@tonic-gate 			if (ioctl(ifsock, SIOCSLIFFLAGS, (char *)&lifr) < 0) {
5140Sstevel@tonic-gate 				if (errno != ENXIO) {
5150Sstevel@tonic-gate 					logperror("phyint_inst_init_from_k:"
5160Sstevel@tonic-gate 					    " ioctl (set flags)");
5170Sstevel@tonic-gate 				}
5180Sstevel@tonic-gate 			}
5190Sstevel@tonic-gate 		}
5200Sstevel@tonic-gate 		return (NULL);
5210Sstevel@tonic-gate 	}
5220Sstevel@tonic-gate 
5230Sstevel@tonic-gate 	/*
5240Sstevel@tonic-gate 	 * We need to create a new phyint instance. A phyint instance
5250Sstevel@tonic-gate 	 * belongs to a phyint, and the phyint belongs to a phyint group.
5260Sstevel@tonic-gate 	 * So we first lookup the 'parents' and if they don't exist then
5270Sstevel@tonic-gate 	 * we create them.
5280Sstevel@tonic-gate 	 */
5290Sstevel@tonic-gate 	pg = phyint_group_lookup(pg_name);
5300Sstevel@tonic-gate 	if (pg == NULL) {
5310Sstevel@tonic-gate 		pg = phyint_group_create(pg_name);
5320Sstevel@tonic-gate 		if (pg == NULL) {
5330Sstevel@tonic-gate 			logerr("phyint_inst_init_from_k:"
5340Sstevel@tonic-gate 			    " unable to create group %s\n", pg_name);
5350Sstevel@tonic-gate 			return (NULL);
5360Sstevel@tonic-gate 		}
5370Sstevel@tonic-gate 		phyint_group_insert(pg);
5380Sstevel@tonic-gate 		pg_created = _B_TRUE;
5390Sstevel@tonic-gate 	}
5400Sstevel@tonic-gate 
5410Sstevel@tonic-gate 	/*
5420Sstevel@tonic-gate 	 * Lookup the phyint. If the phyint does not exist create it.
5430Sstevel@tonic-gate 	 */
5440Sstevel@tonic-gate 	pi = phyint_lookup(pi_name);
5450Sstevel@tonic-gate 	if (pi == NULL) {
5460Sstevel@tonic-gate 		pi = phyint_create(pi_name, pg, ifindex, flags);
5470Sstevel@tonic-gate 		if (pi == NULL) {
5480Sstevel@tonic-gate 			logerr("phyint_inst_init_from_k:"
5490Sstevel@tonic-gate 			    " unable to create phyint %s\n", pi_name);
5500Sstevel@tonic-gate 			if (pg_created)
5510Sstevel@tonic-gate 				phyint_group_delete(pg);
5520Sstevel@tonic-gate 			return (NULL);
5530Sstevel@tonic-gate 		}
5540Sstevel@tonic-gate 		pi_created = _B_TRUE;
5550Sstevel@tonic-gate 	} else {
5560Sstevel@tonic-gate 		/* The phyint exists already. */
5570Sstevel@tonic-gate 		assert(pi_created == _B_FALSE);
5580Sstevel@tonic-gate 		/*
5590Sstevel@tonic-gate 		 * Normally we should see consistent values for the IPv4 and
5600Sstevel@tonic-gate 		 * IPv6 instances, for phyint properties. If we don't, it
5610Sstevel@tonic-gate 		 * means things have changed underneath us, and we should
5620Sstevel@tonic-gate 		 * resync our tables with the kernel. Check whether the
5630Sstevel@tonic-gate 		 * interface index has changed. If so, it is most likely
5640Sstevel@tonic-gate 		 * the interface has been unplumbed and replumbed,
5650Sstevel@tonic-gate 		 * while we are yet to update our tables. Do it now.
5660Sstevel@tonic-gate 		 */
5670Sstevel@tonic-gate 		if (pi->pi_ifindex != ifindex) {
5680Sstevel@tonic-gate 			if (pg_created)
5690Sstevel@tonic-gate 				phyint_group_delete(pg);
5700Sstevel@tonic-gate 			phyint_inst_delete(PHYINT_INSTANCE(pi, AF_OTHER(af)));
5710Sstevel@tonic-gate 			goto retry;
5720Sstevel@tonic-gate 		}
5730Sstevel@tonic-gate 		assert(PHYINT_INSTANCE(pi, af) == NULL);
5740Sstevel@tonic-gate 
5750Sstevel@tonic-gate 		/*
5760Sstevel@tonic-gate 		 * If the group name seen by the IPv4 and IPv6 instances
5770Sstevel@tonic-gate 		 * are different, it is most likely the groupname has
5780Sstevel@tonic-gate 		 * changed, while we are yet to update our tables. Do it now.
5790Sstevel@tonic-gate 		 */
5800Sstevel@tonic-gate 		if (strcmp(pi->pi_group->pg_name, pg_name) != 0) {
5810Sstevel@tonic-gate 			if (pg_created)
5820Sstevel@tonic-gate 				phyint_group_delete(pg);
5830Sstevel@tonic-gate 			restore_phyint(pi);
5840Sstevel@tonic-gate 			phyint_inst_delete(PHYINT_INSTANCE(pi,
5850Sstevel@tonic-gate 			    AF_OTHER(af)));
5860Sstevel@tonic-gate 			goto retry;
5870Sstevel@tonic-gate 		}
5880Sstevel@tonic-gate 	}
5890Sstevel@tonic-gate 
5900Sstevel@tonic-gate 	/*
5910Sstevel@tonic-gate 	 * Create a new phyint instance, corresponding to the 'af'
5920Sstevel@tonic-gate 	 * passed in.
5930Sstevel@tonic-gate 	 */
5940Sstevel@tonic-gate 	pii = phyint_inst_create(pi, af);
5950Sstevel@tonic-gate 	if (pii == NULL) {
5960Sstevel@tonic-gate 		logerr("phyint_inst_init_from_k: unable to create"
5970Sstevel@tonic-gate 		    "phyint inst %s\n", pi->pi_name);
5980Sstevel@tonic-gate 		if (pi_created) {
5990Sstevel@tonic-gate 			/*
6000Sstevel@tonic-gate 			 * Deleting the phyint will delete the phyint group
6010Sstevel@tonic-gate 			 * if this is the last phyint in the group.
6020Sstevel@tonic-gate 			 */
6030Sstevel@tonic-gate 			phyint_delete(pi);
6040Sstevel@tonic-gate 		}
6050Sstevel@tonic-gate 		return (NULL);
6060Sstevel@tonic-gate 	}
6070Sstevel@tonic-gate 
6080Sstevel@tonic-gate 	return (pii);
6090Sstevel@tonic-gate }
6100Sstevel@tonic-gate 
6110Sstevel@tonic-gate /*
6122074Smeem  * Bind pii_probe_sock to the address associated with pii_probe_logint.
6132074Smeem  * This socket will be used for sending and receiving ICMP/ICMPv6 probes to
6142074Smeem  * targets. Do the common part in this function, and complete the
6152074Smeem  * initializations by calling the protocol specific functions
6160Sstevel@tonic-gate  * phyint_inst_v{4,6}_sockinit() respectively.
6170Sstevel@tonic-gate  *
6180Sstevel@tonic-gate  * Return values: _B_TRUE/_B_FALSE for success or failure respectively.
6190Sstevel@tonic-gate  */
6200Sstevel@tonic-gate boolean_t
6210Sstevel@tonic-gate phyint_inst_sockinit(struct phyint_instance *pii)
6220Sstevel@tonic-gate {
6230Sstevel@tonic-gate 	boolean_t success;
6240Sstevel@tonic-gate 	struct phyint_group *pg;
6250Sstevel@tonic-gate 
6260Sstevel@tonic-gate 	if (debug & D_PHYINT) {
6270Sstevel@tonic-gate 		logdebug("phyint_inst_sockinit(%s %s)\n",
6280Sstevel@tonic-gate 		    AF_STR(pii->pii_af), pii->pii_name);
6290Sstevel@tonic-gate 	}
6300Sstevel@tonic-gate 
6310Sstevel@tonic-gate 	assert(pii->pii_probe_logint != NULL);
6320Sstevel@tonic-gate 	assert(pii->pii_probe_logint->li_flags & IFF_UP);
6332074Smeem 	assert(pii->pii_probe_logint->li_flags & IFF_NOFAILOVER);
6340Sstevel@tonic-gate 	assert(pii->pii_af == AF_INET || pii->pii_af == AF_INET6);
6350Sstevel@tonic-gate 
6360Sstevel@tonic-gate 	/*
6370Sstevel@tonic-gate 	 * If the socket is already bound, close pii_probe_sock
6380Sstevel@tonic-gate 	 */
6390Sstevel@tonic-gate 	if (pii->pii_probe_sock != -1)
6400Sstevel@tonic-gate 		close_probe_socket(pii, _B_TRUE);
6410Sstevel@tonic-gate 
6420Sstevel@tonic-gate 	/*
6430Sstevel@tonic-gate 	 * If the phyint is not part of a named group and track_all_phyints is
6440Sstevel@tonic-gate 	 * false, simply return.
6450Sstevel@tonic-gate 	 */
6460Sstevel@tonic-gate 	pg = pii->pii_phyint->pi_group;
6470Sstevel@tonic-gate 	if (pg == phyint_anongroup && !track_all_phyints) {
6480Sstevel@tonic-gate 		if (debug & D_PHYINT)
6490Sstevel@tonic-gate 			logdebug("phyint_inst_sockinit: no group\n");
6500Sstevel@tonic-gate 		return (_B_FALSE);
6510Sstevel@tonic-gate 	}
6520Sstevel@tonic-gate 
6530Sstevel@tonic-gate 	/*
6540Sstevel@tonic-gate 	 * Initialize the socket by calling the protocol specific function.
6550Sstevel@tonic-gate 	 * If it succeeds, add the socket to the poll list.
6560Sstevel@tonic-gate 	 */
6570Sstevel@tonic-gate 	if (pii->pii_af == AF_INET6)
6580Sstevel@tonic-gate 		success = phyint_inst_v6_sockinit(pii);
6590Sstevel@tonic-gate 	else
6600Sstevel@tonic-gate 		success = phyint_inst_v4_sockinit(pii);
6610Sstevel@tonic-gate 
6620Sstevel@tonic-gate 	if (success && (poll_add(pii->pii_probe_sock) == 0))
6630Sstevel@tonic-gate 		return (_B_TRUE);
6640Sstevel@tonic-gate 
6650Sstevel@tonic-gate 	/* Something failed, cleanup and return false */
6660Sstevel@tonic-gate 	if (pii->pii_probe_sock != -1)
6670Sstevel@tonic-gate 		close_probe_socket(pii, _B_FALSE);
6680Sstevel@tonic-gate 
6690Sstevel@tonic-gate 	return (_B_FALSE);
6700Sstevel@tonic-gate }
6710Sstevel@tonic-gate 
6720Sstevel@tonic-gate /*
6730Sstevel@tonic-gate  * IPv6 specific part in initializing the pii_probe_sock. This socket is
6740Sstevel@tonic-gate  * used to send/receive ICMPv6 probe packets.
6750Sstevel@tonic-gate  */
6760Sstevel@tonic-gate static boolean_t
6770Sstevel@tonic-gate phyint_inst_v6_sockinit(struct phyint_instance *pii)
6780Sstevel@tonic-gate {
6790Sstevel@tonic-gate 	icmp6_filter_t filter;
6800Sstevel@tonic-gate 	int hopcount = 1;
6810Sstevel@tonic-gate 	int int_op;
6820Sstevel@tonic-gate 	struct	sockaddr_in6	testaddr;
6830Sstevel@tonic-gate 
6840Sstevel@tonic-gate 	/*
6850Sstevel@tonic-gate 	 * Open a raw socket with ICMPv6 protocol.
6860Sstevel@tonic-gate 	 *
6870Sstevel@tonic-gate 	 * Use IPV6_DONTFAILOVER_IF to make sure that probes go out
6880Sstevel@tonic-gate 	 * on the specified phyint only, and are not subject to load
6890Sstevel@tonic-gate 	 * balancing. Bind to the src address chosen will ensure that
6900Sstevel@tonic-gate 	 * the responses are received only on the specified phyint.
6910Sstevel@tonic-gate 	 *
6920Sstevel@tonic-gate 	 * Set the hopcount to 1 so that probe packets are not routed.
6930Sstevel@tonic-gate 	 * Disable multicast loopback. Set the receive filter to
6940Sstevel@tonic-gate 	 * receive only ICMPv6 echo replies.
6950Sstevel@tonic-gate 	 */
6960Sstevel@tonic-gate 	pii->pii_probe_sock = socket(pii->pii_af, SOCK_RAW, IPPROTO_ICMPV6);
6970Sstevel@tonic-gate 	if (pii->pii_probe_sock < 0) {
6980Sstevel@tonic-gate 		logperror_pii(pii, "phyint_inst_v6_sockinit: socket");
6990Sstevel@tonic-gate 		return (_B_FALSE);
7000Sstevel@tonic-gate }
7010Sstevel@tonic-gate 
7020Sstevel@tonic-gate 	bzero(&testaddr, sizeof (testaddr));
7030Sstevel@tonic-gate 	testaddr.sin6_family = AF_INET6;
7040Sstevel@tonic-gate 	testaddr.sin6_port = 0;
7050Sstevel@tonic-gate 	testaddr.sin6_addr = pii->pii_probe_logint->li_addr;
7060Sstevel@tonic-gate 
7070Sstevel@tonic-gate 	if (bind(pii->pii_probe_sock, (struct sockaddr *)&testaddr,
7080Sstevel@tonic-gate 	    sizeof (testaddr)) < 0) {
7090Sstevel@tonic-gate 		logperror_pii(pii, "phyint_inst_v6_sockinit: IPv6 bind");
7100Sstevel@tonic-gate 		return (_B_FALSE);
7110Sstevel@tonic-gate 	}
7120Sstevel@tonic-gate 
7130Sstevel@tonic-gate 	/*
7140Sstevel@tonic-gate 	 * IPV6_DONTFAILOVER_IF option takes precedence over setting
7150Sstevel@tonic-gate 	 * IP_MULTICAST_IF. So we don't set IPV6_MULTICAST_IF again.
7160Sstevel@tonic-gate 	 */
7170Sstevel@tonic-gate 	if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_DONTFAILOVER_IF,
7180Sstevel@tonic-gate 	    (char *)&pii->pii_ifindex, sizeof (uint_t)) < 0) {
7190Sstevel@tonic-gate 		logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt"
7200Sstevel@tonic-gate 		    " IPV6_DONTFAILOVER_IF");
7210Sstevel@tonic-gate 		return (_B_FALSE);
7220Sstevel@tonic-gate 	}
7230Sstevel@tonic-gate 
7240Sstevel@tonic-gate 	if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_UNICAST_HOPS,
7250Sstevel@tonic-gate 	    (char *)&hopcount, sizeof (hopcount)) < 0) {
7260Sstevel@tonic-gate 		logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt"
7270Sstevel@tonic-gate 		    " IPV6_UNICAST_HOPS");
7280Sstevel@tonic-gate 		return (_B_FALSE);
7290Sstevel@tonic-gate 	}
7300Sstevel@tonic-gate 
7310Sstevel@tonic-gate 	if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_MULTICAST_HOPS,
7320Sstevel@tonic-gate 	    (char *)&hopcount, sizeof (hopcount)) < 0) {
7330Sstevel@tonic-gate 		logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt"
7340Sstevel@tonic-gate 		    " IPV6_MULTICAST_HOPS");
7350Sstevel@tonic-gate 		return (_B_FALSE);
7360Sstevel@tonic-gate 	}
7370Sstevel@tonic-gate 
7380Sstevel@tonic-gate 	int_op = 0;	/* used to turn off option */
7390Sstevel@tonic-gate 	if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_MULTICAST_LOOP,
7400Sstevel@tonic-gate 	    (char *)&int_op, sizeof (int_op)) < 0) {
7410Sstevel@tonic-gate 		logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt"
7420Sstevel@tonic-gate 		    " IPV6_MULTICAST_LOOP");
7430Sstevel@tonic-gate 		return (_B_FALSE);
7440Sstevel@tonic-gate 	}
7450Sstevel@tonic-gate 
7460Sstevel@tonic-gate 	/*
7470Sstevel@tonic-gate 	 * Filter out so that we only receive ICMP echo replies
7480Sstevel@tonic-gate 	 */
7490Sstevel@tonic-gate 	ICMP6_FILTER_SETBLOCKALL(&filter);
7500Sstevel@tonic-gate 	ICMP6_FILTER_SETPASS(ICMP6_ECHO_REPLY, &filter);
7510Sstevel@tonic-gate 
7520Sstevel@tonic-gate 	if (setsockopt(pii->pii_probe_sock, IPPROTO_ICMPV6, ICMP6_FILTER,
7530Sstevel@tonic-gate 	    (char *)&filter, sizeof (filter)) < 0) {
7540Sstevel@tonic-gate 		logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt"
7550Sstevel@tonic-gate 		    " ICMP6_FILTER");
7560Sstevel@tonic-gate 		return (_B_FALSE);
7570Sstevel@tonic-gate 	}
7580Sstevel@tonic-gate 
7590Sstevel@tonic-gate 	/* Enable receipt of ancillary data */
7600Sstevel@tonic-gate 	int_op = 1;
7610Sstevel@tonic-gate 	if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_RECVHOPLIMIT,
7620Sstevel@tonic-gate 	    (char *)&int_op, sizeof (int_op)) < 0) {
7630Sstevel@tonic-gate 		logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt"
7640Sstevel@tonic-gate 		    " IPV6_RECVHOPLIMIT");
7650Sstevel@tonic-gate 		return (_B_FALSE);
7660Sstevel@tonic-gate 	}
7670Sstevel@tonic-gate 
7680Sstevel@tonic-gate 	return (_B_TRUE);
7690Sstevel@tonic-gate }
7700Sstevel@tonic-gate 
7710Sstevel@tonic-gate /*
7720Sstevel@tonic-gate  * IPv4 specific part in initializing the pii_probe_sock. This socket is
7730Sstevel@tonic-gate  * used to send/receive ICMPv4 probe packets.
7740Sstevel@tonic-gate  */
7750Sstevel@tonic-gate static boolean_t
7760Sstevel@tonic-gate phyint_inst_v4_sockinit(struct phyint_instance *pii)
7770Sstevel@tonic-gate {
7780Sstevel@tonic-gate 	struct sockaddr_in  testaddr;
7790Sstevel@tonic-gate 	char	char_op;
7800Sstevel@tonic-gate 	int	ttl = 1;
7810Sstevel@tonic-gate 	char	char_ttl = 1;
7820Sstevel@tonic-gate 
7830Sstevel@tonic-gate 	/*
7840Sstevel@tonic-gate 	 * Open a raw socket with ICMPv4 protocol.
7850Sstevel@tonic-gate 	 *
7860Sstevel@tonic-gate 	 * Use IP_DONTFAILOVER_IF to make sure that probes go out
7870Sstevel@tonic-gate 	 * on the specified phyint only, and are not subject to load
7880Sstevel@tonic-gate 	 * balancing. Bind to the src address chosen will ensure that
7890Sstevel@tonic-gate 	 * the responses are received only on the specified phyint.
7900Sstevel@tonic-gate 	 *
7910Sstevel@tonic-gate 	 * Set the ttl to 1 so that probe packets are not routed.
7920Sstevel@tonic-gate 	 * Disable multicast loopback.
7930Sstevel@tonic-gate 	 */
7940Sstevel@tonic-gate 	pii->pii_probe_sock = socket(pii->pii_af, SOCK_RAW, IPPROTO_ICMP);
7950Sstevel@tonic-gate 	if (pii->pii_probe_sock < 0) {
7960Sstevel@tonic-gate 		logperror_pii(pii, "phyint_inst_v4_sockinit: socket");
7970Sstevel@tonic-gate 		return (_B_FALSE);
7980Sstevel@tonic-gate 	}
7990Sstevel@tonic-gate 
8000Sstevel@tonic-gate 	bzero(&testaddr, sizeof (testaddr));
8010Sstevel@tonic-gate 	testaddr.sin_family = AF_INET;
8020Sstevel@tonic-gate 	testaddr.sin_port = 0;
8030Sstevel@tonic-gate 	IN6_V4MAPPED_TO_INADDR(&pii->pii_probe_logint->li_addr,
8040Sstevel@tonic-gate 	    &testaddr.sin_addr);
8050Sstevel@tonic-gate 
8060Sstevel@tonic-gate 	if (bind(pii->pii_probe_sock, (struct sockaddr *)&testaddr,
8070Sstevel@tonic-gate 	    sizeof (testaddr)) < 0) {
8080Sstevel@tonic-gate 		logperror_pii(pii, "phyint_inst_v4_sockinit: IPv4 bind");
8090Sstevel@tonic-gate 		return (_B_FALSE);
8100Sstevel@tonic-gate 	}
8110Sstevel@tonic-gate 
8120Sstevel@tonic-gate 	/*
8130Sstevel@tonic-gate 	 * IP_DONTFAILOVER_IF option takes precedence over setting
8140Sstevel@tonic-gate 	 * IP_MULTICAST_IF. So we don't set IP_MULTICAST_IF again.
8150Sstevel@tonic-gate 	 */
8160Sstevel@tonic-gate 	if (setsockopt(pii->pii_probe_sock, IPPROTO_IP, IP_DONTFAILOVER_IF,
8170Sstevel@tonic-gate 	    (char *)&testaddr.sin_addr, sizeof (struct in_addr)) < 0) {
8180Sstevel@tonic-gate 		logperror_pii(pii, "phyint_inst_v4_sockinit: setsockopt"
8190Sstevel@tonic-gate 		    " IP_DONTFAILOVER");
8200Sstevel@tonic-gate 		return (_B_FALSE);
8210Sstevel@tonic-gate 	}
8220Sstevel@tonic-gate 
8230Sstevel@tonic-gate 	if (setsockopt(pii->pii_probe_sock, IPPROTO_IP, IP_TTL,
8240Sstevel@tonic-gate 	    (char *)&ttl, sizeof (ttl)) < 0) {
8250Sstevel@tonic-gate 		logperror_pii(pii, "phyint_inst_v4_sockinit: setsockopt"
8260Sstevel@tonic-gate 		    " IP_TTL");
8270Sstevel@tonic-gate 		return (_B_FALSE);
8280Sstevel@tonic-gate 	}
8290Sstevel@tonic-gate 
8300Sstevel@tonic-gate 	char_op = 0;	/* used to turn off option */
8310Sstevel@tonic-gate 	if (setsockopt(pii->pii_probe_sock, IPPROTO_IP, IP_MULTICAST_LOOP,
8320Sstevel@tonic-gate 	    (char *)&char_op, sizeof (char_op)) == -1) {
8330Sstevel@tonic-gate 		logperror_pii(pii, "phyint_inst_v4_sockinit: setsockopt"
8340Sstevel@tonic-gate 		    " IP_MULTICAST_LOOP");
8350Sstevel@tonic-gate 		return (_B_FALSE);
8360Sstevel@tonic-gate 	}
8370Sstevel@tonic-gate 
8380Sstevel@tonic-gate 	if (setsockopt(pii->pii_probe_sock, IPPROTO_IP, IP_MULTICAST_TTL,
8390Sstevel@tonic-gate 	    (char *)&char_ttl, sizeof (char_ttl)) == -1) {
8400Sstevel@tonic-gate 		logperror_pii(pii, "phyint_inst_v4_sockinit: setsockopt"
8410Sstevel@tonic-gate 		    " IP_MULTICAST_TTL");
8420Sstevel@tonic-gate 		return (_B_FALSE);
8430Sstevel@tonic-gate 	}
8440Sstevel@tonic-gate 
8450Sstevel@tonic-gate 	return (_B_TRUE);
8460Sstevel@tonic-gate }
8470Sstevel@tonic-gate 
8480Sstevel@tonic-gate /*
8490Sstevel@tonic-gate  * Remove the phyint group from the list of 'all phyint groups'
8500Sstevel@tonic-gate  * and free it.
8510Sstevel@tonic-gate  */
8520Sstevel@tonic-gate static void
8530Sstevel@tonic-gate phyint_group_delete(struct phyint_group *pg)
8540Sstevel@tonic-gate {
8550Sstevel@tonic-gate 	/*
8560Sstevel@tonic-gate 	 * The anonymous group always exists, even when empty.
8570Sstevel@tonic-gate 	 */
8580Sstevel@tonic-gate 	if (pg == phyint_anongroup)
8590Sstevel@tonic-gate 		return;
8600Sstevel@tonic-gate 
8610Sstevel@tonic-gate 	if (debug & D_PHYINT)
8620Sstevel@tonic-gate 		logdebug("phyint_group_delete('%s')\n", pg->pg_name);
8630Sstevel@tonic-gate 
8640Sstevel@tonic-gate 	/*
8650Sstevel@tonic-gate 	 * The phyint group must be empty, and must not have any phyints.
8660Sstevel@tonic-gate 	 * The phyint group must be in the list of all phyint groups
8670Sstevel@tonic-gate 	 */
8680Sstevel@tonic-gate 	assert(pg->pg_phyint == NULL);
8690Sstevel@tonic-gate 	assert(phyint_groups == pg || pg->pg_prev != NULL);
8700Sstevel@tonic-gate 
8710Sstevel@tonic-gate 	if (pg->pg_prev != NULL)
8720Sstevel@tonic-gate 		pg->pg_prev->pg_next = pg->pg_next;
8730Sstevel@tonic-gate 	else
8740Sstevel@tonic-gate 		phyint_groups = pg->pg_next;
8750Sstevel@tonic-gate 
8760Sstevel@tonic-gate 	if (pg->pg_next != NULL)
8770Sstevel@tonic-gate 		pg->pg_next->pg_prev = pg->pg_prev;
8780Sstevel@tonic-gate 
8790Sstevel@tonic-gate 	pg->pg_next = NULL;
8800Sstevel@tonic-gate 	pg->pg_prev = NULL;
8810Sstevel@tonic-gate 
8820Sstevel@tonic-gate 	phyint_grouplistsig++;
8830Sstevel@tonic-gate 	(void) phyint_group_change_event(pg, IPMP_GROUP_REMOVE);
8840Sstevel@tonic-gate 
8850Sstevel@tonic-gate 	free(pg);
8860Sstevel@tonic-gate }
8870Sstevel@tonic-gate 
8880Sstevel@tonic-gate /*
8890Sstevel@tonic-gate  * Extract information from the kernel about the desired phyint.
8900Sstevel@tonic-gate  * Look only for properties of the phyint and not properties of logints.
8910Sstevel@tonic-gate  * Take appropriate action on the changes.
8920Sstevel@tonic-gate  * Return codes:
8930Sstevel@tonic-gate  *	PI_OK
8940Sstevel@tonic-gate  *		The phyint exists in the kernel and matches our knowledge
8950Sstevel@tonic-gate  *		of the phyint.
8960Sstevel@tonic-gate  *	PI_DELETED
8970Sstevel@tonic-gate  *		The phyint has vanished in the kernel.
8980Sstevel@tonic-gate  *	PI_IFINDEX_CHANGED
8990Sstevel@tonic-gate  *		The phyint's interface index has changed.
9000Sstevel@tonic-gate  *		Ask the caller to delete and recreate the phyint.
9010Sstevel@tonic-gate  *	PI_IOCTL_ERROR
9020Sstevel@tonic-gate  *		Some ioctl error. Don't change anything.
9030Sstevel@tonic-gate  *	PI_GROUP_CHANGED
9040Sstevel@tonic-gate  *		The phyint has changed group.
9050Sstevel@tonic-gate  */
9060Sstevel@tonic-gate int
9070Sstevel@tonic-gate phyint_inst_update_from_k(struct phyint_instance *pii)
9080Sstevel@tonic-gate {
9090Sstevel@tonic-gate 	struct lifreq lifr;
9100Sstevel@tonic-gate 	int	ifsock;
9110Sstevel@tonic-gate 	struct phyint *pi;
9120Sstevel@tonic-gate 
9130Sstevel@tonic-gate 	pi = pii->pii_phyint;
9140Sstevel@tonic-gate 
9150Sstevel@tonic-gate 	if (debug & D_PHYINT) {
9160Sstevel@tonic-gate 		logdebug("phyint_inst_update_from_k(%s %s)\n",
9170Sstevel@tonic-gate 		    AF_STR(pii->pii_af), pi->pi_name);
9180Sstevel@tonic-gate 	}
9190Sstevel@tonic-gate 
9200Sstevel@tonic-gate 	/*
9210Sstevel@tonic-gate 	 * Get the ifindex from the kernel, for comparison with the
9220Sstevel@tonic-gate 	 * value in our tables.
9230Sstevel@tonic-gate 	 */
9240Sstevel@tonic-gate 	(void) strncpy(lifr.lifr_name, pi->pi_name, sizeof (lifr.lifr_name));
9250Sstevel@tonic-gate 	lifr.lifr_name[sizeof (lifr.lifr_name) - 1] = '\0';
9260Sstevel@tonic-gate 
9270Sstevel@tonic-gate 	ifsock = (pii->pii_af == AF_INET) ? ifsock_v4 : ifsock_v6;
9280Sstevel@tonic-gate 	if (ioctl(ifsock, SIOCGLIFINDEX, &lifr) < 0) {
9290Sstevel@tonic-gate 		if (errno == ENXIO) {
9300Sstevel@tonic-gate 			return (PI_DELETED);
9310Sstevel@tonic-gate 		} else {
9320Sstevel@tonic-gate 			logperror_pii(pii, "phyint_inst_update_from_k:"
9330Sstevel@tonic-gate 			    " ioctl (get lifindex)");
9340Sstevel@tonic-gate 			return (PI_IOCTL_ERROR);
9350Sstevel@tonic-gate 		}
9360Sstevel@tonic-gate 	}
9370Sstevel@tonic-gate 
9380Sstevel@tonic-gate 	if (lifr.lifr_index != pi->pi_ifindex) {
9390Sstevel@tonic-gate 		/*
9400Sstevel@tonic-gate 		 * The index has changed. Most likely the interface has
9410Sstevel@tonic-gate 		 * been unplumbed and replumbed. Ask the caller to take
9420Sstevel@tonic-gate 		 * appropriate action.
9430Sstevel@tonic-gate 		 */
9440Sstevel@tonic-gate 		if (debug & D_PHYINT) {
9450Sstevel@tonic-gate 			logdebug("phyint_inst_update_from_k:"
9460Sstevel@tonic-gate 			    " old index %d new index %d\n",
9470Sstevel@tonic-gate 			    pi->pi_ifindex, lifr.lifr_index);
9480Sstevel@tonic-gate 		}
9490Sstevel@tonic-gate 		return (PI_IFINDEX_CHANGED);
9500Sstevel@tonic-gate 	}
9510Sstevel@tonic-gate 
9520Sstevel@tonic-gate 	/*
9530Sstevel@tonic-gate 	 * Get the group name from the kernel, for comparison with
9540Sstevel@tonic-gate 	 * the value in our tables.
9550Sstevel@tonic-gate 	 */
9560Sstevel@tonic-gate 	if (ioctl(ifsock, SIOCGLIFGROUPNAME, &lifr) < 0) {
9570Sstevel@tonic-gate 		if (errno == ENXIO) {
9580Sstevel@tonic-gate 			return (PI_DELETED);
9590Sstevel@tonic-gate 		} else {
9600Sstevel@tonic-gate 			logperror_pii(pii, "phyint_inst_update_from_k:"
9610Sstevel@tonic-gate 			    " ioctl (get groupname)");
9620Sstevel@tonic-gate 			return (PI_IOCTL_ERROR);
9630Sstevel@tonic-gate 		}
9640Sstevel@tonic-gate 	}
9650Sstevel@tonic-gate 
9660Sstevel@tonic-gate 	/*
9670Sstevel@tonic-gate 	 * If the phyint has changed group i.e. if the phyint group name
9680Sstevel@tonic-gate 	 * returned by the kernel is different, ask the caller to delete
9690Sstevel@tonic-gate 	 * and recreate the phyint in the right group
9700Sstevel@tonic-gate 	 */
9710Sstevel@tonic-gate 	if (strcmp(lifr.lifr_groupname, pi->pi_group->pg_name) != 0) {
9720Sstevel@tonic-gate 		/* Groupname has changed */
9730Sstevel@tonic-gate 		if (debug & D_PHYINT) {
9740Sstevel@tonic-gate 			logdebug("phyint_inst_update_from_k:"
9750Sstevel@tonic-gate 			    " groupname change\n");
9760Sstevel@tonic-gate 		}
9770Sstevel@tonic-gate 		return (PI_GROUP_CHANGED);
9780Sstevel@tonic-gate 	}
9790Sstevel@tonic-gate 
9800Sstevel@tonic-gate 	/*
9810Sstevel@tonic-gate 	 * Get the current phyint flags from the kernel, and determine what
9820Sstevel@tonic-gate 	 * flags have changed by comparing against our tables.	Note that the
9830Sstevel@tonic-gate 	 * IFF_INACTIVE processing in initifs() relies on this call to ensure
9840Sstevel@tonic-gate 	 * that IFF_INACTIVE is really still set on the interface.
9850Sstevel@tonic-gate 	 */
9860Sstevel@tonic-gate 	if (ioctl(ifsock, SIOCGLIFFLAGS, &lifr) < 0) {
9870Sstevel@tonic-gate 		if (errno == ENXIO) {
9880Sstevel@tonic-gate 			return (PI_DELETED);
9890Sstevel@tonic-gate 		} else {
9900Sstevel@tonic-gate 			logperror_pii(pii, "phyint_inst_update_from_k: "
9910Sstevel@tonic-gate 			    " ioctl (get flags)");
9920Sstevel@tonic-gate 			return (PI_IOCTL_ERROR);
9930Sstevel@tonic-gate 		}
9940Sstevel@tonic-gate 	}
9950Sstevel@tonic-gate 
9960Sstevel@tonic-gate 	pi->pi_flags = PHYINT_FLAGS(lifr.lifr_flags);
9970Sstevel@tonic-gate 	if (pi->pi_v4 != NULL)
9980Sstevel@tonic-gate 		pi->pi_v4->pii_flags = pi->pi_flags;
9990Sstevel@tonic-gate 	if (pi->pi_v6 != NULL)
10000Sstevel@tonic-gate 		pi->pi_v6->pii_flags = pi->pi_flags;
10010Sstevel@tonic-gate 
10020Sstevel@tonic-gate 	if (pi->pi_flags & IFF_FAILED) {
10030Sstevel@tonic-gate 		/*
10040Sstevel@tonic-gate 		 * If we are in the running and full state, we have
10050Sstevel@tonic-gate 		 * completed failbacks successfully and we would have
10060Sstevel@tonic-gate 		 * expected IFF_FAILED to have been clear. That it is
10070Sstevel@tonic-gate 		 * set means there was a race condition. Some other
10080Sstevel@tonic-gate 		 * process turned on the IFF_FAILED flag. Since the
10090Sstevel@tonic-gate 		 * flag setting is not atomic, i.e. a get ioctl followed
10100Sstevel@tonic-gate 		 * by a set ioctl, and since there is no way to set an
10110Sstevel@tonic-gate 		 * individual flag bit, this could have occurred.
10120Sstevel@tonic-gate 		 */
10130Sstevel@tonic-gate 		if (pi->pi_state == PI_RUNNING && pi->pi_full)
10140Sstevel@tonic-gate 			(void) change_lif_flags(pi, IFF_FAILED, _B_FALSE);
10150Sstevel@tonic-gate 	} else {
10160Sstevel@tonic-gate 		/*
10170Sstevel@tonic-gate 		 * If we are in the failed state, there was a race.
10180Sstevel@tonic-gate 		 * we have completed failover successfully because our
10190Sstevel@tonic-gate 		 * state is failed and empty. Some other process turned
10200Sstevel@tonic-gate 		 * off the IFF_FAILED flag. Same comment as above
10210Sstevel@tonic-gate 		 */
10220Sstevel@tonic-gate 		if (pi->pi_state == PI_FAILED && pi->pi_empty)
10230Sstevel@tonic-gate 			(void) change_lif_flags(pi, IFF_FAILED, _B_TRUE);
10240Sstevel@tonic-gate 	}
10250Sstevel@tonic-gate 
10260Sstevel@tonic-gate 	/* No change in phyint status */
10270Sstevel@tonic-gate 	return (PI_OK);
10280Sstevel@tonic-gate }
10290Sstevel@tonic-gate 
10300Sstevel@tonic-gate /*
10310Sstevel@tonic-gate  * Delete the phyint. Remove it from the list of all phyints, and the
10320Sstevel@tonic-gate  * list of phyint group members. If the group becomes empty, delete the
10330Sstevel@tonic-gate  * group also.
10340Sstevel@tonic-gate  */
10350Sstevel@tonic-gate static void
10360Sstevel@tonic-gate phyint_delete(struct phyint *pi)
10370Sstevel@tonic-gate {
10380Sstevel@tonic-gate 	struct phyint_group *pg = pi->pi_group;
10390Sstevel@tonic-gate 
10400Sstevel@tonic-gate 	if (debug & D_PHYINT)
10410Sstevel@tonic-gate 		logdebug("phyint_delete(%s)\n", pi->pi_name);
10420Sstevel@tonic-gate 
10430Sstevel@tonic-gate 	/* Both IPv4 and IPv6 phyint instances must have been deleted. */
10440Sstevel@tonic-gate 	assert(pi->pi_v4 == NULL && pi->pi_v6 == NULL);
10450Sstevel@tonic-gate 
10460Sstevel@tonic-gate 	/*
10470Sstevel@tonic-gate 	 * The phyint must belong to a group.
10480Sstevel@tonic-gate 	 */
10490Sstevel@tonic-gate 	assert(pg->pg_phyint == pi || pi->pi_pgprev != NULL);
10500Sstevel@tonic-gate 
10510Sstevel@tonic-gate 	/* The phyint must be in the list of all phyints */
10520Sstevel@tonic-gate 	assert(phyints == pi || pi->pi_prev != NULL);
10530Sstevel@tonic-gate 
10540Sstevel@tonic-gate 	/* Remove the phyint from the phyint group list */
10550Sstevel@tonic-gate 	pg->pg_sig++;
10560Sstevel@tonic-gate 	(void) phyint_group_member_event(pg, pi, IPMP_IF_REMOVE);
10570Sstevel@tonic-gate 
10580Sstevel@tonic-gate 	if (pi->pi_pgprev == NULL) {
10590Sstevel@tonic-gate 		/* Phyint is the 1st in the phyint group list */
10600Sstevel@tonic-gate 		pg->pg_phyint = pi->pi_pgnext;
10610Sstevel@tonic-gate 	} else {
10620Sstevel@tonic-gate 		pi->pi_pgprev->pi_pgnext = pi->pi_pgnext;
10630Sstevel@tonic-gate 	}
10640Sstevel@tonic-gate 	if (pi->pi_pgnext != NULL)
10650Sstevel@tonic-gate 		pi->pi_pgnext->pi_pgprev = pi->pi_pgprev;
10660Sstevel@tonic-gate 	pi->pi_pgnext = NULL;
10670Sstevel@tonic-gate 	pi->pi_pgprev = NULL;
10680Sstevel@tonic-gate 
10690Sstevel@tonic-gate 	/* Remove the phyint from the global list of phyints */
10700Sstevel@tonic-gate 	if (pi->pi_prev == NULL) {
10710Sstevel@tonic-gate 		/* Phyint is the 1st in the list */
10720Sstevel@tonic-gate 		phyints = pi->pi_next;
10730Sstevel@tonic-gate 	} else {
10740Sstevel@tonic-gate 		pi->pi_prev->pi_next = pi->pi_next;
10750Sstevel@tonic-gate 	}
10760Sstevel@tonic-gate 	if (pi->pi_next != NULL)
10770Sstevel@tonic-gate 		pi->pi_next->pi_prev = pi->pi_prev;
10780Sstevel@tonic-gate 	pi->pi_next = NULL;
10790Sstevel@tonic-gate 	pi->pi_prev = NULL;
10800Sstevel@tonic-gate 
10810Sstevel@tonic-gate 	free(pi);
10820Sstevel@tonic-gate 
10830Sstevel@tonic-gate 	/* Delete the phyint_group if the last phyint has been deleted */
10840Sstevel@tonic-gate 	if (pg->pg_phyint == NULL)
10850Sstevel@tonic-gate 		phyint_group_delete(pg);
10860Sstevel@tonic-gate }
10870Sstevel@tonic-gate 
10880Sstevel@tonic-gate /*
10890Sstevel@tonic-gate  * Delete (unlink and free), the phyint instance.
10900Sstevel@tonic-gate  */
10910Sstevel@tonic-gate void
10920Sstevel@tonic-gate phyint_inst_delete(struct phyint_instance *pii)
10930Sstevel@tonic-gate {
10940Sstevel@tonic-gate 	struct phyint *pi = pii->pii_phyint;
10950Sstevel@tonic-gate 
10960Sstevel@tonic-gate 	assert(pi != NULL);
10970Sstevel@tonic-gate 
10980Sstevel@tonic-gate 	if (debug & D_PHYINT) {
10990Sstevel@tonic-gate 		logdebug("phyint_inst_delete(%s %s)\n",
11000Sstevel@tonic-gate 		    AF_STR(pii->pii_af), pi->pi_name);
11010Sstevel@tonic-gate 	}
11020Sstevel@tonic-gate 
11030Sstevel@tonic-gate 	/*
11040Sstevel@tonic-gate 	 * If the phyint instance has associated probe targets
11050Sstevel@tonic-gate 	 * delete all the targets
11060Sstevel@tonic-gate 	 */
11070Sstevel@tonic-gate 	while (pii->pii_targets != NULL)
11080Sstevel@tonic-gate 		target_delete(pii->pii_targets);
11090Sstevel@tonic-gate 
11100Sstevel@tonic-gate 	/*
11110Sstevel@tonic-gate 	 * Delete all the logints associated with this phyint
11120Sstevel@tonic-gate 	 * instance.
11130Sstevel@tonic-gate 	 */
11140Sstevel@tonic-gate 	while (pii->pii_logint != NULL)
11150Sstevel@tonic-gate 		logint_delete(pii->pii_logint);
11160Sstevel@tonic-gate 
11170Sstevel@tonic-gate 	/*
11182074Smeem 	 * Close the socket used to send probes to targets from this phyint.
11190Sstevel@tonic-gate 	 */
11200Sstevel@tonic-gate 	if (pii->pii_probe_sock != -1)
11210Sstevel@tonic-gate 		close_probe_socket(pii, _B_TRUE);
11220Sstevel@tonic-gate 
11230Sstevel@tonic-gate 	/*
11240Sstevel@tonic-gate 	 * Phyint instance must be in the list of all phyint instances.
11250Sstevel@tonic-gate 	 * Remove phyint instance from the global list of phyint instances.
11260Sstevel@tonic-gate 	 */
11270Sstevel@tonic-gate 	assert(phyint_instances == pii || pii->pii_prev != NULL);
11280Sstevel@tonic-gate 	if (pii->pii_prev == NULL) {
11290Sstevel@tonic-gate 		/* Phyint is the 1st in the list */
11300Sstevel@tonic-gate 		phyint_instances = pii->pii_next;
11310Sstevel@tonic-gate 	} else {
11320Sstevel@tonic-gate 		pii->pii_prev->pii_next = pii->pii_next;
11330Sstevel@tonic-gate 	}
11340Sstevel@tonic-gate 	if (pii->pii_next != NULL)
11350Sstevel@tonic-gate 		pii->pii_next->pii_prev = pii->pii_prev;
11360Sstevel@tonic-gate 	pii->pii_next = NULL;
11370Sstevel@tonic-gate 	pii->pii_prev = NULL;
11380Sstevel@tonic-gate 
11390Sstevel@tonic-gate 	/*
11400Sstevel@tonic-gate 	 * Reset the phyint instance pointer in the phyint.
11410Sstevel@tonic-gate 	 * If this is the last phyint instance (being deleted) on this
11420Sstevel@tonic-gate 	 * phyint, then delete the phyint.
11430Sstevel@tonic-gate 	 */
11440Sstevel@tonic-gate 	if (pii->pii_af == AF_INET)
11450Sstevel@tonic-gate 		pi->pi_v4 = NULL;
11460Sstevel@tonic-gate 	else
11470Sstevel@tonic-gate 		pi->pi_v6 = NULL;
11480Sstevel@tonic-gate 
11490Sstevel@tonic-gate 	if (pi->pi_v4 == NULL && pi->pi_v6 == NULL)
11500Sstevel@tonic-gate 		phyint_delete(pi);
11510Sstevel@tonic-gate 
11520Sstevel@tonic-gate 	free(pii);
11530Sstevel@tonic-gate }
11540Sstevel@tonic-gate 
11550Sstevel@tonic-gate static void
11560Sstevel@tonic-gate phyint_inst_print(struct phyint_instance *pii)
11570Sstevel@tonic-gate {
11580Sstevel@tonic-gate 	struct logint *li;
11590Sstevel@tonic-gate 	struct target *tg;
11600Sstevel@tonic-gate 	char abuf[INET6_ADDRSTRLEN];
11610Sstevel@tonic-gate 	int most_recent;
11620Sstevel@tonic-gate 	int i;
11630Sstevel@tonic-gate 
11640Sstevel@tonic-gate 	if (pii->pii_phyint == NULL) {
11650Sstevel@tonic-gate 		logdebug("pii->pi_phyint NULL can't print\n");
11660Sstevel@tonic-gate 		return;
11670Sstevel@tonic-gate 	}
11680Sstevel@tonic-gate 
11690Sstevel@tonic-gate 	logdebug("\nPhyint instance: %s %s index %u state %x flags %llx	 "
11700Sstevel@tonic-gate 	    "sock %x in_use %d empty %x full %x\n",
11710Sstevel@tonic-gate 	    AF_STR(pii->pii_af), pii->pii_name, pii->pii_ifindex,
11720Sstevel@tonic-gate 	    pii->pii_state, pii->pii_phyint->pi_flags, pii->pii_probe_sock,
11730Sstevel@tonic-gate 	    pii->pii_in_use, pii->pii_phyint->pi_empty,
11740Sstevel@tonic-gate 	    pii->pii_phyint->pi_full);
11750Sstevel@tonic-gate 
11760Sstevel@tonic-gate 	for (li = pii->pii_logint; li != NULL; li = li->li_next)
11770Sstevel@tonic-gate 		logint_print(li);
11780Sstevel@tonic-gate 
11790Sstevel@tonic-gate 	logdebug("\n");
11800Sstevel@tonic-gate 	for (tg = pii->pii_targets; tg != NULL; tg = tg->tg_next)
11810Sstevel@tonic-gate 		target_print(tg);
11820Sstevel@tonic-gate 
11830Sstevel@tonic-gate 	if (pii->pii_targets == NULL)
11840Sstevel@tonic-gate 		logdebug("pi_targets NULL\n");
11850Sstevel@tonic-gate 
11860Sstevel@tonic-gate 	if (pii->pii_target_next != NULL) {
11870Sstevel@tonic-gate 		logdebug("pi_target_next %s %s\n", AF_STR(pii->pii_af),
11880Sstevel@tonic-gate 		    pr_addr(pii->pii_af, pii->pii_target_next->tg_address,
11890Sstevel@tonic-gate 			abuf, sizeof (abuf)));
11900Sstevel@tonic-gate 	} else {
11910Sstevel@tonic-gate 		logdebug("pi_target_next NULL\n");
11920Sstevel@tonic-gate 	}
11930Sstevel@tonic-gate 
11940Sstevel@tonic-gate 	if (pii->pii_rtt_target_next != NULL) {
11950Sstevel@tonic-gate 		logdebug("pi_rtt_target_next %s %s\n", AF_STR(pii->pii_af),
11960Sstevel@tonic-gate 		    pr_addr(pii->pii_af, pii->pii_rtt_target_next->tg_address,
11970Sstevel@tonic-gate 			abuf, sizeof (abuf)));
11980Sstevel@tonic-gate 	} else {
11990Sstevel@tonic-gate 		logdebug("pi_rtt_target_next NULL\n");
12000Sstevel@tonic-gate 	}
12010Sstevel@tonic-gate 
12020Sstevel@tonic-gate 	if (pii->pii_targets != NULL) {
12030Sstevel@tonic-gate 		most_recent = PROBE_INDEX_PREV(pii->pii_probe_next);
12040Sstevel@tonic-gate 
12050Sstevel@tonic-gate 		i = most_recent;
12060Sstevel@tonic-gate 		do {
12070Sstevel@tonic-gate 			if (pii->pii_probes[i].pr_target != NULL) {
12080Sstevel@tonic-gate 				logdebug("#%d target %s ", i,
12090Sstevel@tonic-gate 				    pr_addr(pii->pii_af,
12100Sstevel@tonic-gate 				    pii->pii_probes[i].pr_target->tg_address,
12110Sstevel@tonic-gate 				    abuf, sizeof (abuf)));
12120Sstevel@tonic-gate 			} else {
12130Sstevel@tonic-gate 				logdebug("#%d target NULL ", i);
12140Sstevel@tonic-gate 			}
12150Sstevel@tonic-gate 			logdebug("time_sent %u status %d time_ack/lost %u\n",
12160Sstevel@tonic-gate 			    pii->pii_probes[i].pr_time_sent,
12170Sstevel@tonic-gate 			    pii->pii_probes[i].pr_status,
12180Sstevel@tonic-gate 			    pii->pii_probes[i].pr_time_lost);
12190Sstevel@tonic-gate 			i = PROBE_INDEX_PREV(i);
12200Sstevel@tonic-gate 		} while (i != most_recent);
12210Sstevel@tonic-gate 	}
12220Sstevel@tonic-gate }
12230Sstevel@tonic-gate 
12240Sstevel@tonic-gate /*
12250Sstevel@tonic-gate  * Lookup a logint based on the logical interface name, on the given
12260Sstevel@tonic-gate  * phyint instance.
12270Sstevel@tonic-gate  */
12280Sstevel@tonic-gate static struct logint *
12290Sstevel@tonic-gate logint_lookup(struct phyint_instance *pii, char *name)
12300Sstevel@tonic-gate {
12310Sstevel@tonic-gate 	struct logint *li;
12320Sstevel@tonic-gate 
12330Sstevel@tonic-gate 	if (debug & D_LOGINT) {
12340Sstevel@tonic-gate 		logdebug("logint_lookup(%s, %s)\n",
12350Sstevel@tonic-gate 		    AF_STR(pii->pii_af), name);
12360Sstevel@tonic-gate 	}
12370Sstevel@tonic-gate 
12380Sstevel@tonic-gate 	for (li = pii->pii_logint; li != NULL; li = li->li_next) {
12390Sstevel@tonic-gate 		if (strncmp(name, li->li_name, sizeof (li->li_name)) == 0)
12400Sstevel@tonic-gate 			break;
12410Sstevel@tonic-gate 	}
12420Sstevel@tonic-gate 	return (li);
12430Sstevel@tonic-gate }
12440Sstevel@tonic-gate 
12450Sstevel@tonic-gate /*
12460Sstevel@tonic-gate  * Insert a logint at the head of the list of logints of the given
12470Sstevel@tonic-gate  * phyint instance
12480Sstevel@tonic-gate  */
12490Sstevel@tonic-gate static void
12500Sstevel@tonic-gate logint_insert(struct phyint_instance *pii, struct logint *li)
12510Sstevel@tonic-gate {
12520Sstevel@tonic-gate 	li->li_next = pii->pii_logint;
12530Sstevel@tonic-gate 	li->li_prev = NULL;
12540Sstevel@tonic-gate 	if (pii->pii_logint != NULL)
12550Sstevel@tonic-gate 		pii->pii_logint->li_prev = li;
12560Sstevel@tonic-gate 	pii->pii_logint = li;
12570Sstevel@tonic-gate 	li->li_phyint_inst = pii;
12580Sstevel@tonic-gate }
12590Sstevel@tonic-gate 
12600Sstevel@tonic-gate /*
12610Sstevel@tonic-gate  * Create a new named logint, on the specified phyint instance.
12620Sstevel@tonic-gate  */
12630Sstevel@tonic-gate static struct logint *
12640Sstevel@tonic-gate logint_create(struct phyint_instance *pii, char *name)
12650Sstevel@tonic-gate {
12660Sstevel@tonic-gate 	struct logint *li;
12670Sstevel@tonic-gate 
12680Sstevel@tonic-gate 	if (debug & D_LOGINT) {
12690Sstevel@tonic-gate 		logdebug("logint_create(%s %s %s)\n",
12700Sstevel@tonic-gate 		    AF_STR(pii->pii_af), pii->pii_name, name);
12710Sstevel@tonic-gate 	}
12720Sstevel@tonic-gate 
12730Sstevel@tonic-gate 	li = calloc(1, sizeof (struct logint));
12740Sstevel@tonic-gate 	if (li == NULL) {
12750Sstevel@tonic-gate 		logperror("logint_create: calloc");
12760Sstevel@tonic-gate 		return (NULL);
12770Sstevel@tonic-gate 	}
12780Sstevel@tonic-gate 
12790Sstevel@tonic-gate 	(void) strncpy(li->li_name, name, sizeof (li->li_name));
12800Sstevel@tonic-gate 	li->li_name[sizeof (li->li_name) - 1] = '\0';
12810Sstevel@tonic-gate 	logint_insert(pii, li);
12820Sstevel@tonic-gate 	return (li);
12830Sstevel@tonic-gate }
12840Sstevel@tonic-gate 
12850Sstevel@tonic-gate /*
12860Sstevel@tonic-gate  * Initialize the logint based on the data returned by the kernel.
12870Sstevel@tonic-gate  */
12880Sstevel@tonic-gate void
12890Sstevel@tonic-gate logint_init_from_k(struct phyint_instance *pii, char *li_name)
12900Sstevel@tonic-gate {
12910Sstevel@tonic-gate 	int	ifsock;
12920Sstevel@tonic-gate 	uint64_t flags;
12930Sstevel@tonic-gate 	uint64_t saved_flags;
12940Sstevel@tonic-gate 	struct	logint	*li;
12950Sstevel@tonic-gate 	struct lifreq	lifr;
12960Sstevel@tonic-gate 	struct in6_addr	test_subnet;
12970Sstevel@tonic-gate 	struct in6_addr	test_subnet_mask;
12980Sstevel@tonic-gate 	struct in6_addr	testaddr;
12990Sstevel@tonic-gate 	int	test_subnet_len;
13000Sstevel@tonic-gate 	struct sockaddr_in6	*sin6;
13010Sstevel@tonic-gate 	struct sockaddr_in	*sin;
13020Sstevel@tonic-gate 	char abuf[INET6_ADDRSTRLEN];
13030Sstevel@tonic-gate 	boolean_t  ptp = _B_FALSE;
13040Sstevel@tonic-gate 	struct in6_addr tgaddr;
13050Sstevel@tonic-gate 
13060Sstevel@tonic-gate 	if (debug & D_LOGINT) {
13070Sstevel@tonic-gate 		logdebug("logint_init_from_k(%s %s)\n",
13080Sstevel@tonic-gate 		    AF_STR(pii->pii_af), li_name);
13090Sstevel@tonic-gate 	}
13100Sstevel@tonic-gate 
13110Sstevel@tonic-gate 	/* Get the socket for doing ioctls */
13120Sstevel@tonic-gate 	ifsock = (pii->pii_af == AF_INET) ? ifsock_v4 : ifsock_v6;
13130Sstevel@tonic-gate 
13140Sstevel@tonic-gate 	/*
13150Sstevel@tonic-gate 	 * Get the flags from the kernel. Also serves as a check whether
13160Sstevel@tonic-gate 	 * the logical still exists. If it doesn't exist, no need to proceed
13170Sstevel@tonic-gate 	 * any further. li_in_use will make the caller clean up the logint
13180Sstevel@tonic-gate 	 */
13190Sstevel@tonic-gate 	(void) strncpy(lifr.lifr_name, li_name, sizeof (lifr.lifr_name));
13200Sstevel@tonic-gate 	lifr.lifr_name[sizeof (lifr.lifr_name) - 1] = '\0';
13210Sstevel@tonic-gate 	if (ioctl(ifsock, SIOCGLIFFLAGS, (char *)&lifr) < 0) {
13220Sstevel@tonic-gate 		/* Interface may have vanished */
13230Sstevel@tonic-gate 		if (errno != ENXIO) {
13240Sstevel@tonic-gate 			logperror_pii(pii, "logint_init_from_k: "
13250Sstevel@tonic-gate 			    "ioctl (get flags)");
13260Sstevel@tonic-gate 		}
13270Sstevel@tonic-gate 		return;
13280Sstevel@tonic-gate 	}
13290Sstevel@tonic-gate 
13300Sstevel@tonic-gate 	flags = lifr.lifr_flags;
13310Sstevel@tonic-gate 
13320Sstevel@tonic-gate 	/*
13330Sstevel@tonic-gate 	 * Verified the logint exists. Now lookup the logint in our tables.
13340Sstevel@tonic-gate 	 * If it does not exist, create a new logint.
13350Sstevel@tonic-gate 	 */
13360Sstevel@tonic-gate 	li = logint_lookup(pii, li_name);
13370Sstevel@tonic-gate 	if (li == NULL) {
13380Sstevel@tonic-gate 		li = logint_create(pii, li_name);
13390Sstevel@tonic-gate 		if (li == NULL) {
13400Sstevel@tonic-gate 			/*
13410Sstevel@tonic-gate 			 * Pretend the interface does not exist
13420Sstevel@tonic-gate 			 * in the kernel
13430Sstevel@tonic-gate 			 */
13440Sstevel@tonic-gate 			return;
13450Sstevel@tonic-gate 		}
13460Sstevel@tonic-gate 	}
13470Sstevel@tonic-gate 
13480Sstevel@tonic-gate 	/*
13490Sstevel@tonic-gate 	 * Update li->li_flags with the new flags, after saving the old
13500Sstevel@tonic-gate 	 * value. This is used later to check what flags has changed and
13510Sstevel@tonic-gate 	 * take any action
13520Sstevel@tonic-gate 	 */
13530Sstevel@tonic-gate 	saved_flags = li->li_flags;
13540Sstevel@tonic-gate 	li->li_flags = flags;
13550Sstevel@tonic-gate 
13560Sstevel@tonic-gate 	/*
13570Sstevel@tonic-gate 	 * Get the address, prefix, prefixlength and update the logint.
13580Sstevel@tonic-gate 	 * Check if anything has changed. If the logint used for the
13590Sstevel@tonic-gate 	 * test address has changed, take suitable action.
13600Sstevel@tonic-gate 	 */
13610Sstevel@tonic-gate 	if (ioctl(ifsock, SIOCGLIFADDR, (char *)&lifr) < 0) {
13620Sstevel@tonic-gate 		/* Interface may have vanished */
13630Sstevel@tonic-gate 		if (errno != ENXIO) {
13640Sstevel@tonic-gate 			logperror_li(li, "logint_init_from_k: (get addr)");
13650Sstevel@tonic-gate 		}
13660Sstevel@tonic-gate 		goto error;
13670Sstevel@tonic-gate 	}
13680Sstevel@tonic-gate 
13690Sstevel@tonic-gate 	if (pii->pii_af == AF_INET) {
13700Sstevel@tonic-gate 		sin = (struct sockaddr_in *)&lifr.lifr_addr;
13710Sstevel@tonic-gate 		IN6_INADDR_TO_V4MAPPED(&sin->sin_addr, &testaddr);
13720Sstevel@tonic-gate 	} else {
13730Sstevel@tonic-gate 		sin6 = (struct sockaddr_in6 *)&lifr.lifr_addr;
13740Sstevel@tonic-gate 		testaddr = sin6->sin6_addr;
13750Sstevel@tonic-gate 	}
13760Sstevel@tonic-gate 
13770Sstevel@tonic-gate 	if (pii->pii_phyint->pi_flags & IFF_POINTOPOINT) {
13780Sstevel@tonic-gate 		ptp = _B_TRUE;
13790Sstevel@tonic-gate 		if (ioctl(ifsock, SIOCGLIFDSTADDR, (char *)&lifr) < 0) {
13800Sstevel@tonic-gate 			if (errno != ENXIO) {
13810Sstevel@tonic-gate 				logperror_li(li, "logint_init_from_k:"
13820Sstevel@tonic-gate 				    " (get dstaddr)");
13830Sstevel@tonic-gate 			}
13840Sstevel@tonic-gate 			goto error;
13850Sstevel@tonic-gate 		}
13860Sstevel@tonic-gate 		if (pii->pii_af == AF_INET) {
13870Sstevel@tonic-gate 			sin = (struct sockaddr_in *)&lifr.lifr_addr;
13880Sstevel@tonic-gate 			IN6_INADDR_TO_V4MAPPED(&sin->sin_addr, &tgaddr);
13890Sstevel@tonic-gate 		} else {
13900Sstevel@tonic-gate 			sin6 = (struct sockaddr_in6 *)&lifr.lifr_addr;
13910Sstevel@tonic-gate 			tgaddr = sin6->sin6_addr;
13920Sstevel@tonic-gate 		}
13930Sstevel@tonic-gate 	} else {
13940Sstevel@tonic-gate 		if (ioctl(ifsock, SIOCGLIFSUBNET, (char *)&lifr) < 0) {
13950Sstevel@tonic-gate 			/* Interface may have vanished */
13960Sstevel@tonic-gate 			if (errno != ENXIO) {
13970Sstevel@tonic-gate 				logperror_li(li, "logint_init_from_k:"
13980Sstevel@tonic-gate 				    " (get subnet)");
13990Sstevel@tonic-gate 			}
14000Sstevel@tonic-gate 			goto error;
14010Sstevel@tonic-gate 		}
14020Sstevel@tonic-gate 		if (lifr.lifr_subnet.ss_family == AF_INET6) {
14030Sstevel@tonic-gate 			sin6 = (struct sockaddr_in6 *)&lifr.lifr_subnet;
14040Sstevel@tonic-gate 			test_subnet = sin6->sin6_addr;
14050Sstevel@tonic-gate 			test_subnet_len = lifr.lifr_addrlen;
14060Sstevel@tonic-gate 		} else {
14070Sstevel@tonic-gate 			sin = (struct sockaddr_in *)&lifr.lifr_subnet;
14080Sstevel@tonic-gate 			IN6_INADDR_TO_V4MAPPED(&sin->sin_addr, &test_subnet);
14090Sstevel@tonic-gate 			test_subnet_len = lifr.lifr_addrlen +
14100Sstevel@tonic-gate 			    (IPV6_ABITS - IP_ABITS);
14110Sstevel@tonic-gate 		}
14120Sstevel@tonic-gate 		(void) ip_index_to_mask_v6(test_subnet_len, &test_subnet_mask);
14130Sstevel@tonic-gate 	}
14140Sstevel@tonic-gate 
14150Sstevel@tonic-gate 	/*
14160Sstevel@tonic-gate 	 * Also record the OINDEX for completeness. This information is
14170Sstevel@tonic-gate 	 * not used.
14180Sstevel@tonic-gate 	 */
14190Sstevel@tonic-gate 	if (ioctl(ifsock, SIOCGLIFOINDEX, (char *)&lifr) < 0) {
14200Sstevel@tonic-gate 		if (errno != ENXIO)  {
14210Sstevel@tonic-gate 			logperror_li(li, "logint_init_from_k:"
14220Sstevel@tonic-gate 			    " (get lifoindex)");
14230Sstevel@tonic-gate 		}
14240Sstevel@tonic-gate 		goto error;
14250Sstevel@tonic-gate 	}
14260Sstevel@tonic-gate 
14270Sstevel@tonic-gate 	/*
14280Sstevel@tonic-gate 	 * If this is the logint corresponding to the test address used for
14290Sstevel@tonic-gate 	 * sending probes, then if anything significant has changed we need to
14300Sstevel@tonic-gate 	 * determine the test address again.  We ignore changes to the
14310Sstevel@tonic-gate 	 * IFF_FAILED and IFF_RUNNING flags since those happen as a matter of
14320Sstevel@tonic-gate 	 * course.
14330Sstevel@tonic-gate 	 */
14340Sstevel@tonic-gate 	if (pii->pii_probe_logint == li) {
14350Sstevel@tonic-gate 		if (((li->li_flags ^ saved_flags) &
14360Sstevel@tonic-gate 		    ~(IFF_FAILED | IFF_RUNNING)) != 0 ||
14370Sstevel@tonic-gate 		    !IN6_ARE_ADDR_EQUAL(&testaddr, &li->li_addr) ||
14380Sstevel@tonic-gate 		    (!ptp && !IN6_ARE_ADDR_EQUAL(&test_subnet,
14390Sstevel@tonic-gate 			&li->li_subnet)) ||
14400Sstevel@tonic-gate 		    (!ptp && test_subnet_len != li->li_subnet_len) ||
14410Sstevel@tonic-gate 		    (ptp && !IN6_ARE_ADDR_EQUAL(&tgaddr, &li->li_dstaddr))) {
14420Sstevel@tonic-gate 			/*
14430Sstevel@tonic-gate 			 * Something significant that affects the testaddress
14440Sstevel@tonic-gate 			 * has changed. Redo the testaddress selection later on
14450Sstevel@tonic-gate 			 * in select_test_ifs(). For now do the cleanup and
14460Sstevel@tonic-gate 			 * set pii_probe_logint to NULL.
14470Sstevel@tonic-gate 			 */
14480Sstevel@tonic-gate 			if (pii->pii_probe_sock != -1)
14490Sstevel@tonic-gate 				close_probe_socket(pii, _B_TRUE);
14500Sstevel@tonic-gate 			pii->pii_probe_logint = NULL;
14510Sstevel@tonic-gate 		}
14520Sstevel@tonic-gate 	}
14530Sstevel@tonic-gate 
14540Sstevel@tonic-gate 
14550Sstevel@tonic-gate 	/* Update the logint with the values obtained from the kernel.	*/
14560Sstevel@tonic-gate 	li->li_addr = testaddr;
14570Sstevel@tonic-gate 	li->li_in_use = 1;
14580Sstevel@tonic-gate 	li->li_oifindex = lifr.lifr_index;
14590Sstevel@tonic-gate 	if (ptp) {
14600Sstevel@tonic-gate 		li->li_dstaddr = tgaddr;
14610Sstevel@tonic-gate 		li->li_subnet_len = (pii->pii_af == AF_INET) ?
14620Sstevel@tonic-gate 		    IP_ABITS : IPV6_ABITS;
14630Sstevel@tonic-gate 	} else {
14640Sstevel@tonic-gate 		li->li_subnet = test_subnet;
14650Sstevel@tonic-gate 		li->li_subnet_len = test_subnet_len;
14660Sstevel@tonic-gate 	}
14670Sstevel@tonic-gate 
14680Sstevel@tonic-gate 	if (debug & D_LOGINT)
14690Sstevel@tonic-gate 		logint_print(li);
14700Sstevel@tonic-gate 
14710Sstevel@tonic-gate 	return;
14720Sstevel@tonic-gate 
14730Sstevel@tonic-gate error:
14740Sstevel@tonic-gate 	logerr("logint_init_from_k: IGNORED %s %s %s addr %s\n",
14750Sstevel@tonic-gate 	    AF_STR(pii->pii_af), pii->pii_name, li->li_name,
14760Sstevel@tonic-gate 	    pr_addr(pii->pii_af, testaddr, abuf, sizeof (abuf)));
14770Sstevel@tonic-gate 	logint_delete(li);
14780Sstevel@tonic-gate }
14790Sstevel@tonic-gate 
14800Sstevel@tonic-gate /*
14810Sstevel@tonic-gate  * Delete (unlink and free) a logint.
14820Sstevel@tonic-gate  */
14830Sstevel@tonic-gate void
14840Sstevel@tonic-gate logint_delete(struct logint *li)
14850Sstevel@tonic-gate {
14860Sstevel@tonic-gate 	struct phyint_instance *pii;
14870Sstevel@tonic-gate 
14880Sstevel@tonic-gate 	pii = li->li_phyint_inst;
14890Sstevel@tonic-gate 	assert(pii != NULL);
14900Sstevel@tonic-gate 
14910Sstevel@tonic-gate 	if (debug & D_LOGINT) {
14920Sstevel@tonic-gate 		int af;
14930Sstevel@tonic-gate 		char abuf[INET6_ADDRSTRLEN];
14940Sstevel@tonic-gate 
14950Sstevel@tonic-gate 		af = pii->pii_af;
14960Sstevel@tonic-gate 		logdebug("logint_delete(%s %s %s/%u)\n",
14970Sstevel@tonic-gate 		    AF_STR(af), li->li_name,
14980Sstevel@tonic-gate 		    pr_addr(af, li->li_addr, abuf, sizeof (abuf)),
14990Sstevel@tonic-gate 		    li->li_subnet_len);
15000Sstevel@tonic-gate 	}
15010Sstevel@tonic-gate 
15020Sstevel@tonic-gate 	/* logint must be in the list of logints */
15030Sstevel@tonic-gate 	assert(pii->pii_logint == li || li->li_prev != NULL);
15040Sstevel@tonic-gate 
15050Sstevel@tonic-gate 	/* Remove the logint from the list of logints  */
15060Sstevel@tonic-gate 	if (li->li_prev == NULL) {
15070Sstevel@tonic-gate 		/* logint is the 1st in the list */
15080Sstevel@tonic-gate 		pii->pii_logint = li->li_next;
15090Sstevel@tonic-gate 	} else {
15100Sstevel@tonic-gate 		li->li_prev->li_next = li->li_next;
15110Sstevel@tonic-gate 	}
15120Sstevel@tonic-gate 	if (li->li_next != NULL)
15130Sstevel@tonic-gate 		li->li_next->li_prev = li->li_prev;
15140Sstevel@tonic-gate 	li->li_next = NULL;
15150Sstevel@tonic-gate 	li->li_prev = NULL;
15160Sstevel@tonic-gate 
15170Sstevel@tonic-gate 	/*
15182074Smeem 	 * If this logint is also being used for probing, then close the
15192074Smeem 	 * associated socket, if it exists.
15200Sstevel@tonic-gate 	 */
15210Sstevel@tonic-gate 	if (pii->pii_probe_logint == li) {
15220Sstevel@tonic-gate 		if (pii->pii_probe_sock != -1)
15230Sstevel@tonic-gate 			close_probe_socket(pii, _B_TRUE);
15240Sstevel@tonic-gate 		pii->pii_probe_logint = NULL;
15250Sstevel@tonic-gate 	}
15260Sstevel@tonic-gate 
15270Sstevel@tonic-gate 	free(li);
15280Sstevel@tonic-gate }
15290Sstevel@tonic-gate 
15300Sstevel@tonic-gate static void
15310Sstevel@tonic-gate logint_print(struct logint *li)
15320Sstevel@tonic-gate {
15330Sstevel@tonic-gate 	char abuf[INET6_ADDRSTRLEN];
15340Sstevel@tonic-gate 	int af;
15350Sstevel@tonic-gate 
15360Sstevel@tonic-gate 	af = li->li_phyint_inst->pii_af;
15370Sstevel@tonic-gate 
15380Sstevel@tonic-gate 	logdebug("logint: %s %s addr %s/%u", AF_STR(af), li->li_name,
15390Sstevel@tonic-gate 	    pr_addr(af, li->li_addr, abuf, sizeof (abuf)), li->li_subnet_len);
15400Sstevel@tonic-gate 
15410Sstevel@tonic-gate 	logdebug("\tFlags: %llx in_use %d oifindex %d\n",
15420Sstevel@tonic-gate 	    li->li_flags, li->li_in_use, li->li_oifindex);
15430Sstevel@tonic-gate }
15440Sstevel@tonic-gate 
15450Sstevel@tonic-gate char *
15460Sstevel@tonic-gate pr_addr(int af, struct in6_addr addr, char *abuf, int len)
15470Sstevel@tonic-gate {
15480Sstevel@tonic-gate 	struct in_addr	addr_v4;
15490Sstevel@tonic-gate 
15500Sstevel@tonic-gate 	if (af == AF_INET) {
15510Sstevel@tonic-gate 		IN6_V4MAPPED_TO_INADDR(&addr, &addr_v4);
15520Sstevel@tonic-gate 		(void) inet_ntop(AF_INET, (void *)&addr_v4, abuf, len);
15530Sstevel@tonic-gate 	} else {
15540Sstevel@tonic-gate 		(void) inet_ntop(AF_INET6, (void *)&addr, abuf, len);
15550Sstevel@tonic-gate 	}
15560Sstevel@tonic-gate 	return (abuf);
15570Sstevel@tonic-gate }
15580Sstevel@tonic-gate 
15590Sstevel@tonic-gate /* Lookup target on its address */
15600Sstevel@tonic-gate struct target *
15610Sstevel@tonic-gate target_lookup(struct phyint_instance *pii, struct in6_addr addr)
15620Sstevel@tonic-gate {
15630Sstevel@tonic-gate 	struct target *tg;
15640Sstevel@tonic-gate 
15650Sstevel@tonic-gate 	if (debug & D_TARGET) {
15660Sstevel@tonic-gate 		char abuf[INET6_ADDRSTRLEN];
15670Sstevel@tonic-gate 
15680Sstevel@tonic-gate 		logdebug("target_lookup(%s %s): addr %s\n",
15690Sstevel@tonic-gate 		    AF_STR(pii->pii_af), pii->pii_name,
15700Sstevel@tonic-gate 		    pr_addr(pii->pii_af, addr, abuf, sizeof (abuf)));
15710Sstevel@tonic-gate 	}
15720Sstevel@tonic-gate 
15730Sstevel@tonic-gate 	for (tg = pii->pii_targets; tg != NULL; tg = tg->tg_next) {
15740Sstevel@tonic-gate 		if (IN6_ARE_ADDR_EQUAL(&tg->tg_address, &addr))
15750Sstevel@tonic-gate 			break;
15760Sstevel@tonic-gate 	}
15770Sstevel@tonic-gate 	return (tg);
15780Sstevel@tonic-gate }
15790Sstevel@tonic-gate 
15800Sstevel@tonic-gate /*
15810Sstevel@tonic-gate  * Find and return the next active target, for the next probe.
15820Sstevel@tonic-gate  * If no active targets are available, return NULL.
15830Sstevel@tonic-gate  */
15840Sstevel@tonic-gate struct target *
15850Sstevel@tonic-gate target_next(struct target *tg)
15860Sstevel@tonic-gate {
15870Sstevel@tonic-gate 	struct	phyint_instance	*pii = tg->tg_phyint_inst;
15880Sstevel@tonic-gate 	struct	target	*marker = tg;
15890Sstevel@tonic-gate 	hrtime_t now;
15900Sstevel@tonic-gate 
15910Sstevel@tonic-gate 	now = gethrtime();
15920Sstevel@tonic-gate 
15930Sstevel@tonic-gate 	/*
15940Sstevel@tonic-gate 	 * Target must be in the list of targets for this phyint
15950Sstevel@tonic-gate 	 * instance.
15960Sstevel@tonic-gate 	 */
15970Sstevel@tonic-gate 	assert(pii->pii_targets == tg || tg->tg_prev != NULL);
15980Sstevel@tonic-gate 	assert(pii->pii_targets != NULL);
15990Sstevel@tonic-gate 
16000Sstevel@tonic-gate 	/* Return the next active target */
16010Sstevel@tonic-gate 	do {
16020Sstevel@tonic-gate 		/*
16030Sstevel@tonic-gate 		 * Go to the next target. If we hit the end,
16040Sstevel@tonic-gate 		 * reset the ptr to the head
16050Sstevel@tonic-gate 		 */
16060Sstevel@tonic-gate 		tg = tg->tg_next;
16070Sstevel@tonic-gate 		if (tg == NULL)
16080Sstevel@tonic-gate 			tg = pii->pii_targets;
16090Sstevel@tonic-gate 
16100Sstevel@tonic-gate 		assert(TG_STATUS_VALID(tg->tg_status));
16110Sstevel@tonic-gate 
16120Sstevel@tonic-gate 		switch (tg->tg_status) {
16130Sstevel@tonic-gate 		case TG_ACTIVE:
16140Sstevel@tonic-gate 			return (tg);
16150Sstevel@tonic-gate 
16160Sstevel@tonic-gate 		case TG_UNUSED:
16170Sstevel@tonic-gate 			assert(pii->pii_targets_are_routers);
16180Sstevel@tonic-gate 			if (pii->pii_ntargets < MAX_PROBE_TARGETS) {
16190Sstevel@tonic-gate 				/*
16200Sstevel@tonic-gate 				 * Bubble up the unused target to active
16210Sstevel@tonic-gate 				 */
16220Sstevel@tonic-gate 				tg->tg_status = TG_ACTIVE;
16230Sstevel@tonic-gate 				pii->pii_ntargets++;
16240Sstevel@tonic-gate 				return (tg);
16250Sstevel@tonic-gate 			}
16260Sstevel@tonic-gate 			break;
16270Sstevel@tonic-gate 
16280Sstevel@tonic-gate 		case TG_SLOW:
16290Sstevel@tonic-gate 			assert(pii->pii_targets_are_routers);
16300Sstevel@tonic-gate 			if (tg->tg_latime + MIN_RECOVERY_TIME < now) {
16310Sstevel@tonic-gate 				/*
16320Sstevel@tonic-gate 				 * Bubble up the slow target to unused
16330Sstevel@tonic-gate 				 */
16340Sstevel@tonic-gate 				tg->tg_status = TG_UNUSED;
16350Sstevel@tonic-gate 			}
16360Sstevel@tonic-gate 			break;
16370Sstevel@tonic-gate 
16380Sstevel@tonic-gate 		case TG_DEAD:
16390Sstevel@tonic-gate 			assert(pii->pii_targets_are_routers);
16400Sstevel@tonic-gate 			if (tg->tg_latime + MIN_RECOVERY_TIME < now) {
16410Sstevel@tonic-gate 				/*
16420Sstevel@tonic-gate 				 * Bubble up the dead target to slow
16430Sstevel@tonic-gate 				 */
16440Sstevel@tonic-gate 				tg->tg_status = TG_SLOW;
16450Sstevel@tonic-gate 				tg->tg_latime = now;
16460Sstevel@tonic-gate 			}
16470Sstevel@tonic-gate 			break;
16480Sstevel@tonic-gate 		}
16490Sstevel@tonic-gate 
16500Sstevel@tonic-gate 	} while (tg != marker);
16510Sstevel@tonic-gate 
16520Sstevel@tonic-gate 	return (NULL);
16530Sstevel@tonic-gate }
16540Sstevel@tonic-gate 
16550Sstevel@tonic-gate /*
16560Sstevel@tonic-gate  * Select the best available target, that is not already TG_ACTIVE,
16570Sstevel@tonic-gate  * for the caller. The caller will determine whether it wants to
16580Sstevel@tonic-gate  * make the returned target TG_ACTIVE.
16590Sstevel@tonic-gate  * The selection order is as follows.
16600Sstevel@tonic-gate  * 1. pick a TG_UNSED target, if it exists.
16610Sstevel@tonic-gate  * 2. else pick a TG_SLOW target that has recovered, if it exists
16620Sstevel@tonic-gate  * 3. else pick any TG_SLOW target, if it exists
16630Sstevel@tonic-gate  * 4. else pick a TG_DEAD target that has recovered, if it exists
16640Sstevel@tonic-gate  * 5. else pick any TG_DEAD target, if it exists
16650Sstevel@tonic-gate  * 6. else return null
16660Sstevel@tonic-gate  */
16670Sstevel@tonic-gate static struct target *
16680Sstevel@tonic-gate target_select_best(struct phyint_instance *pii)
16690Sstevel@tonic-gate {
16700Sstevel@tonic-gate 	struct target *tg;
16710Sstevel@tonic-gate 	struct target *slow = NULL;
16720Sstevel@tonic-gate 	struct target *dead = NULL;
16730Sstevel@tonic-gate 	struct target *slow_recovered = NULL;
16740Sstevel@tonic-gate 	struct target *dead_recovered = NULL;
16750Sstevel@tonic-gate 	hrtime_t now;
16760Sstevel@tonic-gate 
16770Sstevel@tonic-gate 	now = gethrtime();
16780Sstevel@tonic-gate 
16790Sstevel@tonic-gate 	for (tg = pii->pii_targets; tg != NULL; tg = tg->tg_next) {
16800Sstevel@tonic-gate 		assert(TG_STATUS_VALID(tg->tg_status));
16810Sstevel@tonic-gate 
16820Sstevel@tonic-gate 		switch (tg->tg_status) {
16830Sstevel@tonic-gate 		case TG_UNUSED:
16840Sstevel@tonic-gate 			return (tg);
16850Sstevel@tonic-gate 
16860Sstevel@tonic-gate 		case TG_SLOW:
16870Sstevel@tonic-gate 			if (tg->tg_latime + MIN_RECOVERY_TIME < now) {
16880Sstevel@tonic-gate 				slow_recovered = tg;
16890Sstevel@tonic-gate 				/*
16900Sstevel@tonic-gate 				 * Promote the slow_recoverd to unused
16910Sstevel@tonic-gate 				 */
16920Sstevel@tonic-gate 				tg->tg_status = TG_UNUSED;
16930Sstevel@tonic-gate 			} else {
16940Sstevel@tonic-gate 				slow = tg;
16950Sstevel@tonic-gate 			}
16960Sstevel@tonic-gate 			break;
16970Sstevel@tonic-gate 
16980Sstevel@tonic-gate 		case TG_DEAD:
16990Sstevel@tonic-gate 			if (tg->tg_latime + MIN_RECOVERY_TIME < now) {
17000Sstevel@tonic-gate 				dead_recovered = tg;
17010Sstevel@tonic-gate 				/*
17020Sstevel@tonic-gate 				 * Promote the dead_recoverd to slow
17030Sstevel@tonic-gate 				 */
17040Sstevel@tonic-gate 				tg->tg_status = TG_SLOW;
17050Sstevel@tonic-gate 				tg->tg_latime = now;
17060Sstevel@tonic-gate 			} else {
17070Sstevel@tonic-gate 				dead = tg;
17080Sstevel@tonic-gate 			}
17090Sstevel@tonic-gate 			break;
17100Sstevel@tonic-gate 
17110Sstevel@tonic-gate 		default:
17120Sstevel@tonic-gate 			break;
17130Sstevel@tonic-gate 		}
17140Sstevel@tonic-gate 	}
17150Sstevel@tonic-gate 
17160Sstevel@tonic-gate 	if (slow_recovered != NULL)
17170Sstevel@tonic-gate 		return (slow_recovered);
17180Sstevel@tonic-gate 	else if (slow != NULL)
17190Sstevel@tonic-gate 		return (slow);
17200Sstevel@tonic-gate 	else if (dead_recovered != NULL)
17210Sstevel@tonic-gate 		return (dead_recovered);
17220Sstevel@tonic-gate 	else
17230Sstevel@tonic-gate 		return (dead);
17240Sstevel@tonic-gate }
17250Sstevel@tonic-gate 
17260Sstevel@tonic-gate /*
17270Sstevel@tonic-gate  * Some target was deleted. If we don't have even MIN_PROBE_TARGETS
17280Sstevel@tonic-gate  * that are active, pick the next best below.
17290Sstevel@tonic-gate  */
17300Sstevel@tonic-gate static void
17310Sstevel@tonic-gate target_activate_all(struct phyint_instance *pii)
17320Sstevel@tonic-gate {
17330Sstevel@tonic-gate 	struct target *tg;
17340Sstevel@tonic-gate 
17350Sstevel@tonic-gate 	assert(pii->pii_ntargets == 0);
17360Sstevel@tonic-gate 	assert(pii->pii_target_next == NULL);
17370Sstevel@tonic-gate 	assert(pii->pii_rtt_target_next == NULL);
17380Sstevel@tonic-gate 	assert(pii->pii_targets_are_routers);
17390Sstevel@tonic-gate 
17400Sstevel@tonic-gate 	while (pii->pii_ntargets < MIN_PROBE_TARGETS) {
17410Sstevel@tonic-gate 		tg = target_select_best(pii);
17420Sstevel@tonic-gate 		if (tg == NULL) {
17430Sstevel@tonic-gate 			/* We are out of targets */
17440Sstevel@tonic-gate 			return;
17450Sstevel@tonic-gate 		}
17460Sstevel@tonic-gate 
17470Sstevel@tonic-gate 		assert(TG_STATUS_VALID(tg->tg_status));
17480Sstevel@tonic-gate 		assert(tg->tg_status != TG_ACTIVE);
17490Sstevel@tonic-gate 		tg->tg_status = TG_ACTIVE;
17500Sstevel@tonic-gate 		pii->pii_ntargets++;
17510Sstevel@tonic-gate 		if (pii->pii_target_next == NULL) {
17520Sstevel@tonic-gate 			pii->pii_target_next = tg;
17530Sstevel@tonic-gate 			pii->pii_rtt_target_next = tg;
17540Sstevel@tonic-gate 		}
17550Sstevel@tonic-gate 	}
17560Sstevel@tonic-gate }
17570Sstevel@tonic-gate 
17580Sstevel@tonic-gate static struct target *
17590Sstevel@tonic-gate target_first(struct phyint_instance *pii)
17600Sstevel@tonic-gate {
17610Sstevel@tonic-gate 	struct target *tg;
17620Sstevel@tonic-gate 
17630Sstevel@tonic-gate 	for (tg = pii->pii_targets; tg != NULL; tg = tg->tg_next) {
17640Sstevel@tonic-gate 		assert(TG_STATUS_VALID(tg->tg_status));
17650Sstevel@tonic-gate 		if (tg->tg_status == TG_ACTIVE)
17660Sstevel@tonic-gate 			break;
17670Sstevel@tonic-gate 	}
17680Sstevel@tonic-gate 
17690Sstevel@tonic-gate 	return (tg);
17700Sstevel@tonic-gate }
17710Sstevel@tonic-gate 
17720Sstevel@tonic-gate /*
17730Sstevel@tonic-gate  * Create a default target entry.
17740Sstevel@tonic-gate  */
17750Sstevel@tonic-gate void
17760Sstevel@tonic-gate target_create(struct phyint_instance *pii, struct in6_addr addr,
17770Sstevel@tonic-gate     boolean_t is_router)
17780Sstevel@tonic-gate {
17790Sstevel@tonic-gate 	struct target *tg;
17800Sstevel@tonic-gate 	struct phyint *pi;
17810Sstevel@tonic-gate 	struct logint *li;
17820Sstevel@tonic-gate 
17830Sstevel@tonic-gate 	if (debug & D_TARGET) {
17840Sstevel@tonic-gate 		char abuf[INET6_ADDRSTRLEN];
17850Sstevel@tonic-gate 
17860Sstevel@tonic-gate 		logdebug("target_create(%s %s, %s)\n",
17870Sstevel@tonic-gate 		    AF_STR(pii->pii_af), pii->pii_name,
17880Sstevel@tonic-gate 		    pr_addr(pii->pii_af, addr, abuf, sizeof (abuf)));
17890Sstevel@tonic-gate 	}
17900Sstevel@tonic-gate 
17910Sstevel@tonic-gate 	/*
17920Sstevel@tonic-gate 	 * If the test address is not yet initialized, do not add
17930Sstevel@tonic-gate 	 * any target, since we cannot determine whether the target
17940Sstevel@tonic-gate 	 * belongs to the same subnet as the test address.
17950Sstevel@tonic-gate 	 */
17960Sstevel@tonic-gate 	li = pii->pii_probe_logint;
17970Sstevel@tonic-gate 	if (li == NULL)
17980Sstevel@tonic-gate 		return;
17990Sstevel@tonic-gate 
18000Sstevel@tonic-gate 	/*
18010Sstevel@tonic-gate 	 * If there are multiple subnets associated with an interface, then
18020Sstevel@tonic-gate 	 * add the target to this phyint instance, only if it belongs to the
18030Sstevel@tonic-gate 	 * same subnet as the test address. The reason is that interface
18040Sstevel@tonic-gate 	 * routes derived from non-test-addresses i.e. non-IFF_NOFAILOVER
18050Sstevel@tonic-gate 	 * addresses, will disappear after failover, and the targets will not
18060Sstevel@tonic-gate 	 * be reachable from this interface.
18070Sstevel@tonic-gate 	 */
18080Sstevel@tonic-gate 	if (!prefix_equal(li->li_subnet, addr, li->li_subnet_len))
18090Sstevel@tonic-gate 		return;
18100Sstevel@tonic-gate 
18110Sstevel@tonic-gate 	if (pii->pii_targets != NULL) {
18120Sstevel@tonic-gate 		assert(pii->pii_ntargets <= MAX_PROBE_TARGETS);
18130Sstevel@tonic-gate 		if (is_router) {
18140Sstevel@tonic-gate 			if (!pii->pii_targets_are_routers) {
18150Sstevel@tonic-gate 				/*
18160Sstevel@tonic-gate 				 * Prefer router over hosts. Using hosts is a
18170Sstevel@tonic-gate 				 * fallback mechanism, hence delete all host
18180Sstevel@tonic-gate 				 * targets.
18190Sstevel@tonic-gate 				 */
18200Sstevel@tonic-gate 				while (pii->pii_targets != NULL)
18210Sstevel@tonic-gate 					target_delete(pii->pii_targets);
18220Sstevel@tonic-gate 			}
18230Sstevel@tonic-gate 		} else {
18240Sstevel@tonic-gate 			/*
18250Sstevel@tonic-gate 			 * Routers take precedence over hosts. If this
18260Sstevel@tonic-gate 			 * is a router list and we are trying to add a
18270Sstevel@tonic-gate 			 * host, just return. If this is a host list
18280Sstevel@tonic-gate 			 * and if we have sufficient targets, just return
18290Sstevel@tonic-gate 			 */
18300Sstevel@tonic-gate 			if (pii->pii_targets_are_routers ||
18310Sstevel@tonic-gate 			    pii->pii_ntargets == MAX_PROBE_TARGETS)
18320Sstevel@tonic-gate 				return;
18330Sstevel@tonic-gate 		}
18340Sstevel@tonic-gate 	}
18350Sstevel@tonic-gate 
18360Sstevel@tonic-gate 	tg = calloc(1, sizeof (struct target));
18370Sstevel@tonic-gate 	if (tg == NULL) {
18380Sstevel@tonic-gate 		logperror("target_create: calloc");
18390Sstevel@tonic-gate 		return;
18400Sstevel@tonic-gate 	}
18410Sstevel@tonic-gate 
18420Sstevel@tonic-gate 	tg->tg_phyint_inst = pii;
18430Sstevel@tonic-gate 	tg->tg_address = addr;
18440Sstevel@tonic-gate 	tg->tg_in_use = 1;
18450Sstevel@tonic-gate 	tg->tg_rtt_sa = -1;
18460Sstevel@tonic-gate 	tg->tg_num_deferred = 0;
18470Sstevel@tonic-gate 
18480Sstevel@tonic-gate 	/*
18490Sstevel@tonic-gate 	 * If this is the first target, set 'pii_targets_are_routers'
18500Sstevel@tonic-gate 	 * The list of targets is either a list of hosts or list or
18510Sstevel@tonic-gate 	 * routers, but not a mix.
18520Sstevel@tonic-gate 	 */
18530Sstevel@tonic-gate 	if (pii->pii_targets == NULL) {
18540Sstevel@tonic-gate 		assert(pii->pii_ntargets == 0);
18550Sstevel@tonic-gate 		assert(pii->pii_target_next == NULL);
18560Sstevel@tonic-gate 		assert(pii->pii_rtt_target_next == NULL);
18570Sstevel@tonic-gate 		pii->pii_targets_are_routers = is_router ? 1 : 0;
18580Sstevel@tonic-gate 	}
18590Sstevel@tonic-gate 
18600Sstevel@tonic-gate 	if (pii->pii_ntargets == MAX_PROBE_TARGETS) {
18610Sstevel@tonic-gate 		assert(pii->pii_targets_are_routers);
18620Sstevel@tonic-gate 		assert(pii->pii_target_next != NULL);
18630Sstevel@tonic-gate 		assert(pii->pii_rtt_target_next != NULL);
18640Sstevel@tonic-gate 		tg->tg_status = TG_UNUSED;
18650Sstevel@tonic-gate 	} else {
18660Sstevel@tonic-gate 		if (pii->pii_ntargets == 0) {
18670Sstevel@tonic-gate 			assert(pii->pii_target_next == NULL);
18680Sstevel@tonic-gate 			pii->pii_target_next = tg;
18690Sstevel@tonic-gate 			pii->pii_rtt_target_next = tg;
18700Sstevel@tonic-gate 		}
18710Sstevel@tonic-gate 		pii->pii_ntargets++;
18720Sstevel@tonic-gate 		tg->tg_status = TG_ACTIVE;
18730Sstevel@tonic-gate 	}
18740Sstevel@tonic-gate 
18750Sstevel@tonic-gate 	target_insert(pii, tg);
18760Sstevel@tonic-gate 
18770Sstevel@tonic-gate 	/*
18782074Smeem 	 * Change state to PI_RUNNING if this phyint instance is capable of
18792074Smeem 	 * sending and receiving probes -- that is, if we know of at least 1
18802074Smeem 	 * target, and this phyint instance is probe-capable.  For more
18812074Smeem 	 * details, see the phyint state diagram in mpd_probe.c.
18820Sstevel@tonic-gate 	 */
18830Sstevel@tonic-gate 	pi = pii->pii_phyint;
18840Sstevel@tonic-gate 	if (pi->pi_state == PI_NOTARGETS && PROBE_CAPABLE(pii)) {
18850Sstevel@tonic-gate 		if (pi->pi_flags & IFF_FAILED)
18860Sstevel@tonic-gate 			phyint_chstate(pi, PI_FAILED);
18870Sstevel@tonic-gate 		else
18880Sstevel@tonic-gate 			phyint_chstate(pi, PI_RUNNING);
18890Sstevel@tonic-gate 	}
18900Sstevel@tonic-gate }
18910Sstevel@tonic-gate 
18920Sstevel@tonic-gate /*
18930Sstevel@tonic-gate  * Add the target address named by `addr' to phyint instance `pii' if it does
18940Sstevel@tonic-gate  * not already exist.  If the target is a router, `is_router' should be set to
18950Sstevel@tonic-gate  * B_TRUE.
18960Sstevel@tonic-gate  */
18970Sstevel@tonic-gate void
18980Sstevel@tonic-gate target_add(struct phyint_instance *pii, struct in6_addr addr,
18990Sstevel@tonic-gate     boolean_t is_router)
19000Sstevel@tonic-gate {
19010Sstevel@tonic-gate 	struct target *tg;
19020Sstevel@tonic-gate 
19030Sstevel@tonic-gate 	if (pii == NULL)
19040Sstevel@tonic-gate 		return;
19050Sstevel@tonic-gate 
19060Sstevel@tonic-gate 	tg = target_lookup(pii, addr);
19070Sstevel@tonic-gate 
19080Sstevel@tonic-gate 	/*
19090Sstevel@tonic-gate 	 * If the target does not exist, create it; target_create() will set
19100Sstevel@tonic-gate 	 * tg_in_use to true.  If it exists already, and it is a router
19110Sstevel@tonic-gate 	 * target, set tg_in_use to to true, so that init_router_targets()
19120Sstevel@tonic-gate 	 * won't delete it
19130Sstevel@tonic-gate 	 */
19140Sstevel@tonic-gate 	if (tg == NULL)
19150Sstevel@tonic-gate 		target_create(pii, addr, is_router);
19160Sstevel@tonic-gate 	else if (is_router)
19170Sstevel@tonic-gate 		tg->tg_in_use = 1;
19180Sstevel@tonic-gate }
19190Sstevel@tonic-gate 
19200Sstevel@tonic-gate /*
19210Sstevel@tonic-gate  * Insert target at head of linked list of targets for the associated
19220Sstevel@tonic-gate  * phyint instance
19230Sstevel@tonic-gate  */
19240Sstevel@tonic-gate static void
19250Sstevel@tonic-gate target_insert(struct phyint_instance *pii, struct target *tg)
19260Sstevel@tonic-gate {
19270Sstevel@tonic-gate 	tg->tg_next = pii->pii_targets;
19280Sstevel@tonic-gate 	tg->tg_prev = NULL;
19290Sstevel@tonic-gate 	if (tg->tg_next != NULL)
19300Sstevel@tonic-gate 		tg->tg_next->tg_prev = tg;
19310Sstevel@tonic-gate 	pii->pii_targets = tg;
19320Sstevel@tonic-gate }
19330Sstevel@tonic-gate 
19340Sstevel@tonic-gate /*
19350Sstevel@tonic-gate  * Delete a target (unlink and free).
19360Sstevel@tonic-gate  */
19370Sstevel@tonic-gate void
19380Sstevel@tonic-gate target_delete(struct target *tg)
19390Sstevel@tonic-gate {
19400Sstevel@tonic-gate 	int af;
19410Sstevel@tonic-gate 	struct phyint_instance	*pii;
19420Sstevel@tonic-gate 	struct phyint_instance	*pii_other;
19430Sstevel@tonic-gate 
19440Sstevel@tonic-gate 	pii = tg->tg_phyint_inst;
19450Sstevel@tonic-gate 	af = pii->pii_af;
19460Sstevel@tonic-gate 
19470Sstevel@tonic-gate 	if (debug & D_TARGET) {
19480Sstevel@tonic-gate 		char abuf[INET6_ADDRSTRLEN];
19490Sstevel@tonic-gate 
19500Sstevel@tonic-gate 		logdebug("target_delete(%s %s, %s)\n",
19510Sstevel@tonic-gate 		    AF_STR(af), pii->pii_name,
19520Sstevel@tonic-gate 		    pr_addr(af, tg->tg_address, abuf, sizeof (abuf)));
19530Sstevel@tonic-gate 	}
19540Sstevel@tonic-gate 
19550Sstevel@tonic-gate 	/*
19560Sstevel@tonic-gate 	 * Target must be in the list of targets for this phyint
19570Sstevel@tonic-gate 	 * instance.
19580Sstevel@tonic-gate 	 */
19590Sstevel@tonic-gate 	assert(pii->pii_targets == tg || tg->tg_prev != NULL);
19600Sstevel@tonic-gate 
19610Sstevel@tonic-gate 	/*
19620Sstevel@tonic-gate 	 * Reset all references to 'tg' in the probe information
19630Sstevel@tonic-gate 	 * for this phyint.
19640Sstevel@tonic-gate 	 */
19650Sstevel@tonic-gate 	reset_pii_probes(pii, tg);
19660Sstevel@tonic-gate 
19670Sstevel@tonic-gate 	/*
19680Sstevel@tonic-gate 	 * Remove this target from the list of targets of this
19690Sstevel@tonic-gate 	 * phyint instance.
19700Sstevel@tonic-gate 	 */
19710Sstevel@tonic-gate 	if (tg->tg_prev == NULL) {
19720Sstevel@tonic-gate 		pii->pii_targets = tg->tg_next;
19730Sstevel@tonic-gate 	} else {
19740Sstevel@tonic-gate 		tg->tg_prev->tg_next = tg->tg_next;
19750Sstevel@tonic-gate 	}
19760Sstevel@tonic-gate 
19770Sstevel@tonic-gate 	if (tg->tg_next != NULL)
19780Sstevel@tonic-gate 		tg->tg_next->tg_prev = tg->tg_prev;
19790Sstevel@tonic-gate 
19800Sstevel@tonic-gate 	tg->tg_next = NULL;
19810Sstevel@tonic-gate 	tg->tg_prev = NULL;
19820Sstevel@tonic-gate 
19830Sstevel@tonic-gate 	if (tg->tg_status == TG_ACTIVE)
19840Sstevel@tonic-gate 		pii->pii_ntargets--;
19850Sstevel@tonic-gate 
19860Sstevel@tonic-gate 	/*
19870Sstevel@tonic-gate 	 * Adjust the next target to probe, if it points to
19880Sstevel@tonic-gate 	 * to the currently deleted target.
19890Sstevel@tonic-gate 	 */
19900Sstevel@tonic-gate 	if (pii->pii_target_next == tg)
19910Sstevel@tonic-gate 		pii->pii_target_next = target_first(pii);
19920Sstevel@tonic-gate 
19930Sstevel@tonic-gate 	if (pii->pii_rtt_target_next == tg)
19940Sstevel@tonic-gate 		pii->pii_rtt_target_next = target_first(pii);
19950Sstevel@tonic-gate 
19960Sstevel@tonic-gate 	free(tg);
19970Sstevel@tonic-gate 
19980Sstevel@tonic-gate 	/*
19990Sstevel@tonic-gate 	 * The number of active targets pii_ntargets == 0 iff
20000Sstevel@tonic-gate 	 * the next active target pii->pii_target_next == NULL
20010Sstevel@tonic-gate 	 */
20020Sstevel@tonic-gate 	if (pii->pii_ntargets != 0) {
20030Sstevel@tonic-gate 		assert(pii->pii_target_next != NULL);
20040Sstevel@tonic-gate 		assert(pii->pii_rtt_target_next != NULL);
20050Sstevel@tonic-gate 		assert(pii->pii_target_next->tg_status == TG_ACTIVE);
20060Sstevel@tonic-gate 		assert(pii->pii_rtt_target_next->tg_status == TG_ACTIVE);
20070Sstevel@tonic-gate 		return;
20080Sstevel@tonic-gate 	}
20090Sstevel@tonic-gate 
20100Sstevel@tonic-gate 	/* At this point, we don't have any active targets. */
20110Sstevel@tonic-gate 	assert(pii->pii_target_next == NULL);
20120Sstevel@tonic-gate 	assert(pii->pii_rtt_target_next == NULL);
20130Sstevel@tonic-gate 
20140Sstevel@tonic-gate 	if (pii->pii_targets_are_routers) {
20150Sstevel@tonic-gate 		/*
20160Sstevel@tonic-gate 		 * Activate any TG_SLOW or TG_DEAD router targets,
20170Sstevel@tonic-gate 		 * since we don't have any other targets
20180Sstevel@tonic-gate 		 */
20190Sstevel@tonic-gate 		target_activate_all(pii);
20200Sstevel@tonic-gate 
20210Sstevel@tonic-gate 		if (pii->pii_ntargets != 0) {
20220Sstevel@tonic-gate 			assert(pii->pii_target_next != NULL);
20230Sstevel@tonic-gate 			assert(pii->pii_rtt_target_next != NULL);
20240Sstevel@tonic-gate 			assert(pii->pii_target_next->tg_status == TG_ACTIVE);
20250Sstevel@tonic-gate 			assert(pii->pii_rtt_target_next->tg_status ==
20260Sstevel@tonic-gate 			    TG_ACTIVE);
20270Sstevel@tonic-gate 			return;
20280Sstevel@tonic-gate 		}
20290Sstevel@tonic-gate 	}
20300Sstevel@tonic-gate 
20310Sstevel@tonic-gate 	/*
20320Sstevel@tonic-gate 	 * If we still don't have any active targets, the list must
20330Sstevel@tonic-gate 	 * must be really empty. There aren't even TG_SLOW or TG_DEAD
20340Sstevel@tonic-gate 	 * targets. Zero out the probe stats since it will not be
20350Sstevel@tonic-gate 	 * relevant any longer.
20360Sstevel@tonic-gate 	 */
20370Sstevel@tonic-gate 	assert(pii->pii_targets == NULL);
20380Sstevel@tonic-gate 	clear_pii_probe_stats(pii);
20390Sstevel@tonic-gate 	pii_other = phyint_inst_other(pii);
20400Sstevel@tonic-gate 
20410Sstevel@tonic-gate 	/*
20420Sstevel@tonic-gate 	 * If there are no targets on both instances,
20430Sstevel@tonic-gate 	 * go back to PI_NOTARGETS state, since we cannot
20440Sstevel@tonic-gate 	 * probe this phyint any more. For more details,
20450Sstevel@tonic-gate 	 * please see phyint state diagram in mpd_probe.c.
20460Sstevel@tonic-gate 	 */
20470Sstevel@tonic-gate 	if (!PROBE_CAPABLE(pii_other))
20480Sstevel@tonic-gate 		phyint_chstate(pii->pii_phyint, PI_NOTARGETS);
20490Sstevel@tonic-gate }
20500Sstevel@tonic-gate 
20510Sstevel@tonic-gate /*
20520Sstevel@tonic-gate  * Flush the target list of every phyint in the group, if the list
20530Sstevel@tonic-gate  * is a host target list. This is called if group failure is suspected.
20540Sstevel@tonic-gate  * If all targets have failed, multicast will subsequently discover new
20550Sstevel@tonic-gate  * targets. Else it is a group failure.
20560Sstevel@tonic-gate  * Note: This function is a no-op if the list is a router target list.
20570Sstevel@tonic-gate  */
20580Sstevel@tonic-gate static void
20590Sstevel@tonic-gate target_flush_hosts(struct phyint_group *pg)
20600Sstevel@tonic-gate {
20610Sstevel@tonic-gate 	struct phyint *pi;
20620Sstevel@tonic-gate 	struct phyint_instance *pii;
20630Sstevel@tonic-gate 
20640Sstevel@tonic-gate 	if (debug & D_TARGET)
20650Sstevel@tonic-gate 		logdebug("target_flush_hosts(%s)\n", pg->pg_name);
20660Sstevel@tonic-gate 
20670Sstevel@tonic-gate 	for (pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext) {
20680Sstevel@tonic-gate 		pii = pi->pi_v4;
20690Sstevel@tonic-gate 		if (pii != NULL && !pii->pii_targets_are_routers) {
20700Sstevel@tonic-gate 			/*
20710Sstevel@tonic-gate 			 * Delete all the targets. When the list becomes
20720Sstevel@tonic-gate 			 * empty, target_delete() will set pii->pii_targets
20730Sstevel@tonic-gate 			 * to NULL.
20740Sstevel@tonic-gate 			 */
20750Sstevel@tonic-gate 			while (pii->pii_targets != NULL)
20760Sstevel@tonic-gate 				target_delete(pii->pii_targets);
20770Sstevel@tonic-gate 		}
20780Sstevel@tonic-gate 		pii = pi->pi_v6;
20790Sstevel@tonic-gate 		if (pii != NULL && !pii->pii_targets_are_routers) {
20800Sstevel@tonic-gate 			/*
20810Sstevel@tonic-gate 			 * Delete all the targets. When the list becomes
20820Sstevel@tonic-gate 			 * empty, target_delete() will set pii->pii_targets
20830Sstevel@tonic-gate 			 * to NULL.
20840Sstevel@tonic-gate 			 */
20850Sstevel@tonic-gate 			while (pii->pii_targets != NULL)
20860Sstevel@tonic-gate 				target_delete(pii->pii_targets);
20870Sstevel@tonic-gate 		}
20880Sstevel@tonic-gate 	}
20890Sstevel@tonic-gate }
20900Sstevel@tonic-gate 
20910Sstevel@tonic-gate /*
20920Sstevel@tonic-gate  * Reset all references to 'target' in the probe info, as this target is
20930Sstevel@tonic-gate  * being deleted. The pr_target field is guaranteed to be non-null if
20940Sstevel@tonic-gate  * pr_status is PR_UNACKED. So we change the pr_status to PR_LOST, so that
20950Sstevel@tonic-gate  * pr_target will not be accessed unconditionally.
20960Sstevel@tonic-gate  */
20970Sstevel@tonic-gate static void
20980Sstevel@tonic-gate reset_pii_probes(struct phyint_instance *pii, struct target *tg)
20990Sstevel@tonic-gate {
21000Sstevel@tonic-gate 	int i;
21010Sstevel@tonic-gate 
21020Sstevel@tonic-gate 	for (i = 0; i < PROBE_STATS_COUNT; i++) {
21030Sstevel@tonic-gate 		if (pii->pii_probes[i].pr_target == tg) {
21040Sstevel@tonic-gate 			pii->pii_probes[i].pr_target = NULL;
21050Sstevel@tonic-gate 			if (pii->pii_probes[i].pr_status == PR_UNACKED)
21060Sstevel@tonic-gate 				pii->pii_probes[i].pr_status = PR_LOST;
21070Sstevel@tonic-gate 		}
21080Sstevel@tonic-gate 	}
21090Sstevel@tonic-gate 
21100Sstevel@tonic-gate }
21110Sstevel@tonic-gate 
21120Sstevel@tonic-gate /*
21130Sstevel@tonic-gate  * Clear the probe statistics array.
21140Sstevel@tonic-gate  */
21150Sstevel@tonic-gate void
21160Sstevel@tonic-gate clear_pii_probe_stats(struct phyint_instance *pii)
21170Sstevel@tonic-gate {
21180Sstevel@tonic-gate 	bzero(pii->pii_probes, sizeof (struct probe_stats) * PROBE_STATS_COUNT);
21190Sstevel@tonic-gate 	/* Reset the next probe index in the probe stats array */
21200Sstevel@tonic-gate 	pii->pii_probe_next = 0;
21210Sstevel@tonic-gate }
21220Sstevel@tonic-gate 
21230Sstevel@tonic-gate static void
21240Sstevel@tonic-gate target_print(struct target *tg)
21250Sstevel@tonic-gate {
21260Sstevel@tonic-gate 	char	abuf[INET6_ADDRSTRLEN];
21270Sstevel@tonic-gate 	char	buf[128];
21280Sstevel@tonic-gate 	char	buf2[128];
21290Sstevel@tonic-gate 	int	af;
21300Sstevel@tonic-gate 	int	i;
21310Sstevel@tonic-gate 
21320Sstevel@tonic-gate 	af = tg->tg_phyint_inst->pii_af;
21330Sstevel@tonic-gate 
21340Sstevel@tonic-gate 	logdebug("Target on %s %s addr %s\n"
21350Sstevel@tonic-gate 	    "status %d rtt_sa %d rtt_sd %d crtt %d tg_in_use %d\n",
21360Sstevel@tonic-gate 	    AF_STR(af), tg->tg_phyint_inst->pii_name,
21370Sstevel@tonic-gate 	    pr_addr(af, tg->tg_address, abuf, sizeof (abuf)),
21380Sstevel@tonic-gate 	    tg->tg_status, tg->tg_rtt_sa, tg->tg_rtt_sd,
21390Sstevel@tonic-gate 	    tg->tg_crtt, tg->tg_in_use);
21400Sstevel@tonic-gate 
21410Sstevel@tonic-gate 	buf[0] = '\0';
21420Sstevel@tonic-gate 	for (i = 0; i < tg->tg_num_deferred; i++) {
21430Sstevel@tonic-gate 		(void) snprintf(buf2, sizeof (buf2), " %dms",
21440Sstevel@tonic-gate 		    tg->tg_deferred[i]);
21450Sstevel@tonic-gate 		(void) strlcat(buf, buf2, sizeof (buf));
21460Sstevel@tonic-gate 	}
21470Sstevel@tonic-gate 	logdebug("deferred rtts:%s\n", buf);
21480Sstevel@tonic-gate }
21490Sstevel@tonic-gate 
21500Sstevel@tonic-gate void
21510Sstevel@tonic-gate phyint_inst_print_all(void)
21520Sstevel@tonic-gate {
21530Sstevel@tonic-gate 	struct phyint_instance *pii;
21540Sstevel@tonic-gate 
21550Sstevel@tonic-gate 	for (pii = phyint_instances; pii != NULL; pii = pii->pii_next) {
21560Sstevel@tonic-gate 		phyint_inst_print(pii);
21570Sstevel@tonic-gate 	}
21580Sstevel@tonic-gate }
21590Sstevel@tonic-gate 
21600Sstevel@tonic-gate /*
21610Sstevel@tonic-gate  * Convert length for a mask to the mask.
21620Sstevel@tonic-gate  */
21630Sstevel@tonic-gate static void
21640Sstevel@tonic-gate ip_index_to_mask_v6(uint_t masklen, struct in6_addr *bitmask)
21650Sstevel@tonic-gate {
21660Sstevel@tonic-gate 	int	j;
21670Sstevel@tonic-gate 
21680Sstevel@tonic-gate 	assert(masklen <= IPV6_ABITS);
21690Sstevel@tonic-gate 	bzero((char *)bitmask, sizeof (*bitmask));
21700Sstevel@tonic-gate 
21710Sstevel@tonic-gate 	/* Make the 'masklen' leftmost bits one */
21720Sstevel@tonic-gate 	for (j = 0; masklen > 8; masklen -= 8, j++)
21730Sstevel@tonic-gate 		bitmask->s6_addr[j] = 0xff;
21740Sstevel@tonic-gate 
21750Sstevel@tonic-gate 	bitmask->s6_addr[j] = 0xff << (8 - masklen);
21760Sstevel@tonic-gate 
21770Sstevel@tonic-gate }
21780Sstevel@tonic-gate 
21790Sstevel@tonic-gate /*
21800Sstevel@tonic-gate  * Compare two prefixes that have the same prefix length.
21810Sstevel@tonic-gate  * Fails if the prefix length is unreasonable.
21820Sstevel@tonic-gate  */
21830Sstevel@tonic-gate static boolean_t
21840Sstevel@tonic-gate prefix_equal(struct in6_addr p1, struct in6_addr p2, int prefix_len)
21850Sstevel@tonic-gate {
21860Sstevel@tonic-gate 	uchar_t mask;
21870Sstevel@tonic-gate 	int j;
21880Sstevel@tonic-gate 
21890Sstevel@tonic-gate 	if (prefix_len < 0 || prefix_len > IPV6_ABITS)
21900Sstevel@tonic-gate 		return (_B_FALSE);
21910Sstevel@tonic-gate 
21920Sstevel@tonic-gate 	for (j = 0; prefix_len > 8; prefix_len -= 8, j++)
21930Sstevel@tonic-gate 		if (p1.s6_addr[j] != p2.s6_addr[j])
21940Sstevel@tonic-gate 			return (_B_FALSE);
21950Sstevel@tonic-gate 
21960Sstevel@tonic-gate 	/* Make the N leftmost bits one */
21970Sstevel@tonic-gate 	mask = 0xff << (8 - prefix_len);
21980Sstevel@tonic-gate 	if ((p1.s6_addr[j] & mask) != (p2.s6_addr[j] & mask))
21990Sstevel@tonic-gate 		return (_B_FALSE);
22000Sstevel@tonic-gate 
22010Sstevel@tonic-gate 	return (_B_TRUE);
22020Sstevel@tonic-gate }
22030Sstevel@tonic-gate 
22040Sstevel@tonic-gate /*
22050Sstevel@tonic-gate  * Get the number of UP logints (excluding IFF_NOFAILOVERs), on both
22060Sstevel@tonic-gate  * IPv4 and IPv6 put together. The phyint with the least such number
22070Sstevel@tonic-gate  * will be used as the failover destination, if no standby interface is
22080Sstevel@tonic-gate  * available
22090Sstevel@tonic-gate  */
22100Sstevel@tonic-gate int
22110Sstevel@tonic-gate logint_upcount(struct phyint *pi)
22120Sstevel@tonic-gate {
22130Sstevel@tonic-gate 	struct	logint	*li;
22140Sstevel@tonic-gate 	struct	phyint_instance *pii;
22150Sstevel@tonic-gate 	int count = 0;
22160Sstevel@tonic-gate 
22170Sstevel@tonic-gate 	pii = pi->pi_v4;
22180Sstevel@tonic-gate 	if (pii != NULL) {
22190Sstevel@tonic-gate 		for (li = pii->pii_logint; li != NULL; li = li->li_next) {
22200Sstevel@tonic-gate 			if ((li->li_flags &
22210Sstevel@tonic-gate 			    (IFF_UP | IFF_NOFAILOVER)) == IFF_UP) {
22220Sstevel@tonic-gate 				count++;
22230Sstevel@tonic-gate 			}
22240Sstevel@tonic-gate 		}
22250Sstevel@tonic-gate 	}
22260Sstevel@tonic-gate 
22270Sstevel@tonic-gate 	pii = pi->pi_v6;
22280Sstevel@tonic-gate 	if (pii != NULL) {
22290Sstevel@tonic-gate 		for (li = pii->pii_logint; li != NULL; li = li->li_next) {
22300Sstevel@tonic-gate 			if ((li->li_flags &
22310Sstevel@tonic-gate 			    (IFF_UP | IFF_NOFAILOVER)) == IFF_UP) {
22320Sstevel@tonic-gate 				count++;
22330Sstevel@tonic-gate 			}
22340Sstevel@tonic-gate 		}
22350Sstevel@tonic-gate 	}
22360Sstevel@tonic-gate 
22370Sstevel@tonic-gate 	return (count);
22380Sstevel@tonic-gate }
22390Sstevel@tonic-gate 
22400Sstevel@tonic-gate /*
22410Sstevel@tonic-gate  * Get the phyint instance with the other (IPv4 / IPv6) protocol
22420Sstevel@tonic-gate  */
22430Sstevel@tonic-gate struct phyint_instance *
22440Sstevel@tonic-gate phyint_inst_other(struct phyint_instance *pii)
22450Sstevel@tonic-gate {
22460Sstevel@tonic-gate 	if (pii->pii_af == AF_INET)
22470Sstevel@tonic-gate 		return (pii->pii_phyint->pi_v6);
22480Sstevel@tonic-gate 	else
22490Sstevel@tonic-gate 		return (pii->pii_phyint->pi_v4);
22500Sstevel@tonic-gate }
22510Sstevel@tonic-gate 
22520Sstevel@tonic-gate /*
22530Sstevel@tonic-gate  * Post an EC_IPMP sysevent of subclass `subclass' and attributes `nvl'.
22540Sstevel@tonic-gate  * Before sending the event, it prepends the current version of the IPMP
22550Sstevel@tonic-gate  * sysevent API.  Returns 0 on success, -1 on failure (in either case,
22560Sstevel@tonic-gate  * `nvl' is freed).
22570Sstevel@tonic-gate  */
22580Sstevel@tonic-gate static int
22590Sstevel@tonic-gate post_event(const char *subclass, nvlist_t *nvl)
22600Sstevel@tonic-gate {
22610Sstevel@tonic-gate 	sysevent_id_t eid;
22620Sstevel@tonic-gate 
2263*4262Smeem 	/*
2264*4262Smeem 	 * Since sysevents don't work yet in non-global zones, there cannot
2265*4262Smeem 	 * possibly be any consumers yet, so don't bother trying to generate
2266*4262Smeem 	 * them.  (Otherwise, we'll spew warnings.)
2267*4262Smeem 	 */
2268*4262Smeem 	if (getzoneid() != GLOBAL_ZONEID) {
2269*4262Smeem 		nvlist_free(nvl);
2270*4262Smeem 		return (0);
2271*4262Smeem 	}
2272*4262Smeem 
22730Sstevel@tonic-gate 	errno = nvlist_add_uint32(nvl, IPMP_EVENT_VERSION,
22740Sstevel@tonic-gate 	    IPMP_EVENT_CUR_VERSION);
22750Sstevel@tonic-gate 	if (errno != 0) {
22760Sstevel@tonic-gate 		logerr("cannot create `%s' event: %s", subclass,
22770Sstevel@tonic-gate 		    strerror(errno));
22780Sstevel@tonic-gate 		goto failed;
22790Sstevel@tonic-gate 	}
22800Sstevel@tonic-gate 
22810Sstevel@tonic-gate 	if (sysevent_post_event(EC_IPMP, (char *)subclass, SUNW_VENDOR,
22820Sstevel@tonic-gate 	    "in.mpathd", nvl, &eid) == -1) {
22830Sstevel@tonic-gate 		logerr("cannot send `%s' event: %s\n", subclass,
22840Sstevel@tonic-gate 		    strerror(errno));
22850Sstevel@tonic-gate 		goto failed;
22860Sstevel@tonic-gate 	}
22870Sstevel@tonic-gate 
22880Sstevel@tonic-gate 	nvlist_free(nvl);
22890Sstevel@tonic-gate 	return (0);
22900Sstevel@tonic-gate failed:
22910Sstevel@tonic-gate 	nvlist_free(nvl);
22920Sstevel@tonic-gate 	return (-1);
22930Sstevel@tonic-gate }
22940Sstevel@tonic-gate 
22950Sstevel@tonic-gate /*
22960Sstevel@tonic-gate  * Return the external IPMP state associated with phyint `pi'.
22970Sstevel@tonic-gate  */
22980Sstevel@tonic-gate static ipmp_if_state_t
22990Sstevel@tonic-gate ifstate(struct phyint *pi)
23000Sstevel@tonic-gate {
23010Sstevel@tonic-gate 	switch (pi->pi_state) {
23020Sstevel@tonic-gate 	case PI_NOTARGETS:
23030Sstevel@tonic-gate 		return (IPMP_IF_UNKNOWN);
23040Sstevel@tonic-gate 
23050Sstevel@tonic-gate 	case PI_OFFLINE:
23060Sstevel@tonic-gate 		return (IPMP_IF_OFFLINE);
23070Sstevel@tonic-gate 
23080Sstevel@tonic-gate 	case PI_FAILED:
23090Sstevel@tonic-gate 		return (IPMP_IF_FAILED);
23100Sstevel@tonic-gate 
23110Sstevel@tonic-gate 	case PI_RUNNING:
23120Sstevel@tonic-gate 		return (IPMP_IF_OK);
23130Sstevel@tonic-gate 	}
23140Sstevel@tonic-gate 
23150Sstevel@tonic-gate 	logerr("ifstate: unknown state %d; aborting\n", pi->pi_state);
23160Sstevel@tonic-gate 	abort();
23170Sstevel@tonic-gate 	/* NOTREACHED */
23180Sstevel@tonic-gate }
23190Sstevel@tonic-gate 
23200Sstevel@tonic-gate /*
23210Sstevel@tonic-gate  * Return the external IPMP interface type associated with phyint `pi'.
23220Sstevel@tonic-gate  */
23230Sstevel@tonic-gate static ipmp_if_type_t
23240Sstevel@tonic-gate iftype(struct phyint *pi)
23250Sstevel@tonic-gate {
23260Sstevel@tonic-gate 	if (pi->pi_flags & IFF_STANDBY)
23270Sstevel@tonic-gate 		return (IPMP_IF_STANDBY);
23280Sstevel@tonic-gate 	else
23290Sstevel@tonic-gate 		return (IPMP_IF_NORMAL);
23300Sstevel@tonic-gate }
23310Sstevel@tonic-gate 
23320Sstevel@tonic-gate /*
23330Sstevel@tonic-gate  * Return the external IPMP group state associated with phyint group `pg'.
23340Sstevel@tonic-gate  */
23350Sstevel@tonic-gate static ipmp_group_state_t
23360Sstevel@tonic-gate groupstate(struct phyint_group *pg)
23370Sstevel@tonic-gate {
23380Sstevel@tonic-gate 	return (GROUP_FAILED(pg) ? IPMP_GROUP_FAILED : IPMP_GROUP_OK);
23390Sstevel@tonic-gate }
23400Sstevel@tonic-gate 
23410Sstevel@tonic-gate /*
23420Sstevel@tonic-gate  * Generate an ESC_IPMP_GROUP_STATE sysevent for phyint group `pg'.
23430Sstevel@tonic-gate  * Returns 0 on success, -1 on failure.
23440Sstevel@tonic-gate  */
23450Sstevel@tonic-gate static int
23460Sstevel@tonic-gate phyint_group_state_event(struct phyint_group *pg)
23470Sstevel@tonic-gate {
23480Sstevel@tonic-gate 	nvlist_t	*nvl;
23490Sstevel@tonic-gate 
23500Sstevel@tonic-gate 	errno = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0);
23510Sstevel@tonic-gate 	if (errno != 0) {
23520Sstevel@tonic-gate 		logperror("cannot create `group state change' event");
23530Sstevel@tonic-gate 		return (-1);
23540Sstevel@tonic-gate 	}
23550Sstevel@tonic-gate 
23560Sstevel@tonic-gate 	errno = nvlist_add_string(nvl, IPMP_GROUP_NAME, pg->pg_name);
23570Sstevel@tonic-gate 	if (errno != 0)
23580Sstevel@tonic-gate 		goto failed;
23590Sstevel@tonic-gate 
23600Sstevel@tonic-gate 	errno = nvlist_add_uint64(nvl, IPMP_GROUP_SIGNATURE, pg->pg_sig);
23610Sstevel@tonic-gate 	if (errno != 0)
23620Sstevel@tonic-gate 		goto failed;
23630Sstevel@tonic-gate 
23640Sstevel@tonic-gate 	errno = nvlist_add_uint32(nvl, IPMP_GROUP_STATE, groupstate(pg));
23650Sstevel@tonic-gate 	if (errno != 0)
23660Sstevel@tonic-gate 		goto failed;
23670Sstevel@tonic-gate 
23680Sstevel@tonic-gate 	return (post_event(ESC_IPMP_GROUP_STATE, nvl));
23690Sstevel@tonic-gate failed:
23700Sstevel@tonic-gate 	logperror("cannot create `group state change' event");
23710Sstevel@tonic-gate 	nvlist_free(nvl);
23720Sstevel@tonic-gate 	return (-1);
23730Sstevel@tonic-gate }
23740Sstevel@tonic-gate 
23750Sstevel@tonic-gate /*
23760Sstevel@tonic-gate  * Generate an ESC_IPMP_GROUP_CHANGE sysevent of type `op' for phyint group
23770Sstevel@tonic-gate  * `pg'.  Returns 0 on success, -1 on failure.
23780Sstevel@tonic-gate  */
23790Sstevel@tonic-gate static int
23800Sstevel@tonic-gate phyint_group_change_event(struct phyint_group *pg, ipmp_group_op_t op)
23810Sstevel@tonic-gate {
23820Sstevel@tonic-gate 	nvlist_t *nvl;
23830Sstevel@tonic-gate 
23840Sstevel@tonic-gate 	errno = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0);
23850Sstevel@tonic-gate 	if (errno != 0) {
23860Sstevel@tonic-gate 		logperror("cannot create `group change' event");
23870Sstevel@tonic-gate 		return (-1);
23880Sstevel@tonic-gate 	}
23890Sstevel@tonic-gate 
23900Sstevel@tonic-gate 	errno = nvlist_add_string(nvl, IPMP_GROUP_NAME, pg->pg_name);
23910Sstevel@tonic-gate 	if (errno != 0)
23920Sstevel@tonic-gate 		goto failed;
23930Sstevel@tonic-gate 
23940Sstevel@tonic-gate 	errno = nvlist_add_uint64(nvl, IPMP_GROUP_SIGNATURE, pg->pg_sig);
23950Sstevel@tonic-gate 	if (errno != 0)
23960Sstevel@tonic-gate 		goto failed;
23970Sstevel@tonic-gate 
23980Sstevel@tonic-gate 	errno = nvlist_add_uint64(nvl, IPMP_GROUPLIST_SIGNATURE,
23990Sstevel@tonic-gate 	    phyint_grouplistsig);
24000Sstevel@tonic-gate 	if (errno != 0)
24010Sstevel@tonic-gate 		goto failed;
24020Sstevel@tonic-gate 
24030Sstevel@tonic-gate 	errno = nvlist_add_uint32(nvl, IPMP_GROUP_OPERATION, op);
24040Sstevel@tonic-gate 	if (errno != 0)
24050Sstevel@tonic-gate 		goto failed;
24060Sstevel@tonic-gate 
24070Sstevel@tonic-gate 	return (post_event(ESC_IPMP_GROUP_CHANGE, nvl));
24080Sstevel@tonic-gate failed:
24090Sstevel@tonic-gate 	logperror("cannot create `group change' event");
24100Sstevel@tonic-gate 	nvlist_free(nvl);
24110Sstevel@tonic-gate 	return (-1);
24120Sstevel@tonic-gate }
24130Sstevel@tonic-gate 
24140Sstevel@tonic-gate /*
24150Sstevel@tonic-gate  * Generate an ESC_IPMP_GROUP_MEMBER_CHANGE sysevent for phyint `pi' in
24160Sstevel@tonic-gate  * group `pg'.	Returns 0 on success, -1 on failure.
24170Sstevel@tonic-gate  */
24180Sstevel@tonic-gate static int
24190Sstevel@tonic-gate phyint_group_member_event(struct phyint_group *pg, struct phyint *pi,
24200Sstevel@tonic-gate     ipmp_if_op_t op)
24210Sstevel@tonic-gate {
24220Sstevel@tonic-gate 	nvlist_t *nvl;
24230Sstevel@tonic-gate 
24240Sstevel@tonic-gate 	errno = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0);
24250Sstevel@tonic-gate 	if (errno != 0) {
24260Sstevel@tonic-gate 		logperror("cannot create `group member change' event");
24270Sstevel@tonic-gate 		return (-1);
24280Sstevel@tonic-gate 	}
24290Sstevel@tonic-gate 
24300Sstevel@tonic-gate 	errno = nvlist_add_string(nvl, IPMP_GROUP_NAME, pg->pg_name);
24310Sstevel@tonic-gate 	if (errno != 0)
24320Sstevel@tonic-gate 		goto failed;
24330Sstevel@tonic-gate 
24340Sstevel@tonic-gate 	errno = nvlist_add_uint64(nvl, IPMP_GROUP_SIGNATURE, pg->pg_sig);
24350Sstevel@tonic-gate 	if (errno != 0)
24360Sstevel@tonic-gate 		goto failed;
24370Sstevel@tonic-gate 
24380Sstevel@tonic-gate 	errno = nvlist_add_uint32(nvl, IPMP_IF_OPERATION, op);
24390Sstevel@tonic-gate 	if (errno != 0)
24400Sstevel@tonic-gate 		goto failed;
24410Sstevel@tonic-gate 
24420Sstevel@tonic-gate 	errno = nvlist_add_string(nvl, IPMP_IF_NAME, pi->pi_name);
24430Sstevel@tonic-gate 	if (errno != 0)
24440Sstevel@tonic-gate 		goto failed;
24450Sstevel@tonic-gate 
24460Sstevel@tonic-gate 	errno = nvlist_add_uint32(nvl, IPMP_IF_TYPE, iftype(pi));
24470Sstevel@tonic-gate 	if (errno != 0)
24480Sstevel@tonic-gate 		goto failed;
24490Sstevel@tonic-gate 
24500Sstevel@tonic-gate 	errno = nvlist_add_uint32(nvl, IPMP_IF_STATE, ifstate(pi));
24510Sstevel@tonic-gate 	if (errno != 0)
24520Sstevel@tonic-gate 		goto failed;
24530Sstevel@tonic-gate 
24540Sstevel@tonic-gate 	return (post_event(ESC_IPMP_GROUP_MEMBER_CHANGE, nvl));
24550Sstevel@tonic-gate failed:
24560Sstevel@tonic-gate 	logperror("cannot create `group member change' event");
24570Sstevel@tonic-gate 	nvlist_free(nvl);
24580Sstevel@tonic-gate 	return (-1);
24590Sstevel@tonic-gate 
24600Sstevel@tonic-gate }
24610Sstevel@tonic-gate 
24620Sstevel@tonic-gate /*
24630Sstevel@tonic-gate  * Generate an ESC_IPMP_IF_CHANGE sysevent for phyint `pi' in group `pg'.
24640Sstevel@tonic-gate  * Returns 0 on success, -1 on failure.
24650Sstevel@tonic-gate  */
24660Sstevel@tonic-gate static int
24670Sstevel@tonic-gate phyint_state_event(struct phyint_group *pg, struct phyint *pi)
24680Sstevel@tonic-gate {
24690Sstevel@tonic-gate 	nvlist_t *nvl;
24700Sstevel@tonic-gate 
24710Sstevel@tonic-gate 	errno = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0);
24720Sstevel@tonic-gate 	if (errno != 0) {
24730Sstevel@tonic-gate 		logperror("cannot create `interface change' event");
24740Sstevel@tonic-gate 		return (-1);
24750Sstevel@tonic-gate 	}
24760Sstevel@tonic-gate 
24770Sstevel@tonic-gate 	errno = nvlist_add_string(nvl, IPMP_GROUP_NAME, pg->pg_name);
24780Sstevel@tonic-gate 	if (errno != 0)
24790Sstevel@tonic-gate 		goto failed;
24800Sstevel@tonic-gate 
24810Sstevel@tonic-gate 	errno = nvlist_add_uint64(nvl, IPMP_GROUP_SIGNATURE, pg->pg_sig);
24820Sstevel@tonic-gate 	if (errno != 0)
24830Sstevel@tonic-gate 		goto failed;
24840Sstevel@tonic-gate 
24850Sstevel@tonic-gate 	errno = nvlist_add_string(nvl, IPMP_IF_NAME, pi->pi_name);
24860Sstevel@tonic-gate 	if (errno != 0)
24870Sstevel@tonic-gate 		goto failed;
24880Sstevel@tonic-gate 
24890Sstevel@tonic-gate 	errno = nvlist_add_uint32(nvl, IPMP_IF_TYPE, iftype(pi));
24900Sstevel@tonic-gate 	if (errno != 0)
24910Sstevel@tonic-gate 		goto failed;
24920Sstevel@tonic-gate 
24930Sstevel@tonic-gate 	errno = nvlist_add_uint32(nvl, IPMP_IF_STATE, ifstate(pi));
24940Sstevel@tonic-gate 	if (errno != 0)
24950Sstevel@tonic-gate 		goto failed;
24960Sstevel@tonic-gate 
24970Sstevel@tonic-gate 	return (post_event(ESC_IPMP_IF_CHANGE, nvl));
24980Sstevel@tonic-gate failed:
24990Sstevel@tonic-gate 	logperror("cannot create `interface change' event");
25000Sstevel@tonic-gate 	nvlist_free(nvl);
25010Sstevel@tonic-gate 	return (-1);
25020Sstevel@tonic-gate 
25030Sstevel@tonic-gate }
25040Sstevel@tonic-gate 
25050Sstevel@tonic-gate /*
25060Sstevel@tonic-gate  * Generate a signature for use.  The signature is conceptually divided
25070Sstevel@tonic-gate  * into two pieces: a random 16-bit "generation number" and a 48-bit
25080Sstevel@tonic-gate  * monotonically increasing integer.  The generation number protects
25090Sstevel@tonic-gate  * against stale updates to entities (e.g., IPMP groups) that have been
25100Sstevel@tonic-gate  * deleted and since recreated.
25110Sstevel@tonic-gate  */
25120Sstevel@tonic-gate static uint64_t
25130Sstevel@tonic-gate gensig(void)
25140Sstevel@tonic-gate {
25150Sstevel@tonic-gate 	static int seeded = 0;
25160Sstevel@tonic-gate 
25170Sstevel@tonic-gate 	if (seeded == 0) {
25180Sstevel@tonic-gate 		srand48((long)gethrtime());
25190Sstevel@tonic-gate 		seeded++;
25200Sstevel@tonic-gate 	}
25210Sstevel@tonic-gate 
25220Sstevel@tonic-gate 	return ((uint64_t)lrand48() << 48 | 1);
25230Sstevel@tonic-gate }
25240Sstevel@tonic-gate 
25250Sstevel@tonic-gate /*
25260Sstevel@tonic-gate  * Store the information associated with group `grname' into a dynamically
25270Sstevel@tonic-gate  * allocated structure pointed to by `*grinfopp'.  Returns an IPMP error code.
25280Sstevel@tonic-gate  */
25290Sstevel@tonic-gate unsigned int
25300Sstevel@tonic-gate getgroupinfo(const char *grname, ipmp_groupinfo_t **grinfopp)
25310Sstevel@tonic-gate {
25320Sstevel@tonic-gate 	struct phyint_group	*pg;
25330Sstevel@tonic-gate 	struct phyint		*pi;
25340Sstevel@tonic-gate 	char			(*ifs)[LIFNAMSIZ];
25350Sstevel@tonic-gate 	unsigned int		nif, i;
25360Sstevel@tonic-gate 
25370Sstevel@tonic-gate 	pg = phyint_group_lookup(grname);
25380Sstevel@tonic-gate 	if (pg == NULL)
25390Sstevel@tonic-gate 		return (IPMP_EUNKGROUP);
25400Sstevel@tonic-gate 
25410Sstevel@tonic-gate 	/*
25420Sstevel@tonic-gate 	 * Tally up the number of interfaces, allocate an array to hold them,
25430Sstevel@tonic-gate 	 * and insert their names into the array.
25440Sstevel@tonic-gate 	 */
25450Sstevel@tonic-gate 	for (nif = 0, pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext)
25460Sstevel@tonic-gate 		nif++;
25470Sstevel@tonic-gate 
25480Sstevel@tonic-gate 	ifs = alloca(nif * sizeof (*ifs));
25490Sstevel@tonic-gate 	for (i = 0, pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext, i++) {
25500Sstevel@tonic-gate 		assert(i < nif);
25510Sstevel@tonic-gate 		(void) strlcpy(ifs[i], pi->pi_name, LIFNAMSIZ);
25520Sstevel@tonic-gate 	}
25530Sstevel@tonic-gate 	assert(i == nif);
25540Sstevel@tonic-gate 
25550Sstevel@tonic-gate 	*grinfopp = ipmp_groupinfo_create(pg->pg_name, pg->pg_sig,
25560Sstevel@tonic-gate 	    groupstate(pg), nif, ifs);
25570Sstevel@tonic-gate 	return (*grinfopp == NULL ? IPMP_ENOMEM : IPMP_SUCCESS);
25580Sstevel@tonic-gate }
25590Sstevel@tonic-gate 
25600Sstevel@tonic-gate /*
25610Sstevel@tonic-gate  * Store the information associated with interface `ifname' into a dynamically
25620Sstevel@tonic-gate  * allocated structure pointed to by `*ifinfopp'.  Returns an IPMP error code.
25630Sstevel@tonic-gate  */
25640Sstevel@tonic-gate unsigned int
25650Sstevel@tonic-gate getifinfo(const char *ifname, ipmp_ifinfo_t **ifinfopp)
25660Sstevel@tonic-gate {
25670Sstevel@tonic-gate 	struct phyint	*pi;
25680Sstevel@tonic-gate 
25690Sstevel@tonic-gate 	pi = phyint_lookup(ifname);
25700Sstevel@tonic-gate 	if (pi == NULL)
25710Sstevel@tonic-gate 		return (IPMP_EUNKIF);
25720Sstevel@tonic-gate 
25730Sstevel@tonic-gate 	*ifinfopp = ipmp_ifinfo_create(pi->pi_name, pi->pi_group->pg_name,
25740Sstevel@tonic-gate 	    ifstate(pi), iftype(pi));
25750Sstevel@tonic-gate 	return (*ifinfopp == NULL ? IPMP_ENOMEM : IPMP_SUCCESS);
25760Sstevel@tonic-gate }
25770Sstevel@tonic-gate 
25780Sstevel@tonic-gate /*
25790Sstevel@tonic-gate  * Store the current list of IPMP groups into a dynamically allocated
25800Sstevel@tonic-gate  * structure pointed to by `*grlistpp'.	 Returns an IPMP error code.
25810Sstevel@tonic-gate  */
25820Sstevel@tonic-gate unsigned int
25830Sstevel@tonic-gate getgrouplist(ipmp_grouplist_t **grlistpp)
25840Sstevel@tonic-gate {
25850Sstevel@tonic-gate 	struct phyint_group	*pg;
25860Sstevel@tonic-gate 	char			(*groups)[LIFGRNAMSIZ];
25870Sstevel@tonic-gate 	unsigned int		i, ngroup;
25880Sstevel@tonic-gate 
25890Sstevel@tonic-gate 	/*
25900Sstevel@tonic-gate 	 * Tally up the number of groups, allocate an array to hold them, and
25910Sstevel@tonic-gate 	 * insert their names into the array.
25920Sstevel@tonic-gate 	 */
25930Sstevel@tonic-gate 	for (ngroup = 0, pg = phyint_groups; pg != NULL; pg = pg->pg_next)
25940Sstevel@tonic-gate 		ngroup++;
25950Sstevel@tonic-gate 
25960Sstevel@tonic-gate 	groups = alloca(ngroup * sizeof (*groups));
25970Sstevel@tonic-gate 	for (i = 0, pg = phyint_groups; pg != NULL; pg = pg->pg_next, i++) {
25980Sstevel@tonic-gate 		assert(i < ngroup);
25990Sstevel@tonic-gate 		(void) strlcpy(groups[i], pg->pg_name, LIFGRNAMSIZ);
26000Sstevel@tonic-gate 	}
26010Sstevel@tonic-gate 	assert(i == ngroup);
26020Sstevel@tonic-gate 
26030Sstevel@tonic-gate 	*grlistpp = ipmp_grouplist_create(phyint_grouplistsig, ngroup, groups);
26040Sstevel@tonic-gate 	return (*grlistpp == NULL ? IPMP_ENOMEM : IPMP_SUCCESS);
26050Sstevel@tonic-gate }
26060Sstevel@tonic-gate 
26070Sstevel@tonic-gate /*
26080Sstevel@tonic-gate  * Store a snapshot of the IPMP subsystem into a dynamically allocated
26090Sstevel@tonic-gate  * structure pointed to by `*snapp'.  Returns an IPMP error code.
26100Sstevel@tonic-gate  */
26110Sstevel@tonic-gate unsigned int
26120Sstevel@tonic-gate getsnap(ipmp_snap_t **snapp)
26130Sstevel@tonic-gate {
26140Sstevel@tonic-gate 	ipmp_grouplist_t	*grlistp;
26150Sstevel@tonic-gate 	ipmp_groupinfo_t	*grinfop;
26160Sstevel@tonic-gate 	ipmp_ifinfo_t		*ifinfop;
26170Sstevel@tonic-gate 	ipmp_snap_t		*snap;
26180Sstevel@tonic-gate 	struct phyint		*pi;
26190Sstevel@tonic-gate 	unsigned int		i;
26200Sstevel@tonic-gate 	int			retval;
26210Sstevel@tonic-gate 
26220Sstevel@tonic-gate 	snap = ipmp_snap_create();
26230Sstevel@tonic-gate 	if (snap == NULL)
26240Sstevel@tonic-gate 		return (IPMP_ENOMEM);
26250Sstevel@tonic-gate 
26260Sstevel@tonic-gate 	/*
26270Sstevel@tonic-gate 	 * Add group list.
26280Sstevel@tonic-gate 	 */
26290Sstevel@tonic-gate 	retval = getgrouplist(&snap->sn_grlistp);
26300Sstevel@tonic-gate 	if (retval != IPMP_SUCCESS) {
26310Sstevel@tonic-gate 		ipmp_snap_free(snap);
26320Sstevel@tonic-gate 		return (retval);
26330Sstevel@tonic-gate 	}
26340Sstevel@tonic-gate 
26350Sstevel@tonic-gate 	/*
26360Sstevel@tonic-gate 	 * Add information for each group in the list.
26370Sstevel@tonic-gate 	 */
26380Sstevel@tonic-gate 	grlistp = snap->sn_grlistp;
26390Sstevel@tonic-gate 	for (i = 0; i < grlistp->gl_ngroup; i++) {
26400Sstevel@tonic-gate 		retval = getgroupinfo(grlistp->gl_groups[i], &grinfop);
26410Sstevel@tonic-gate 		if (retval != IPMP_SUCCESS) {
26420Sstevel@tonic-gate 			ipmp_snap_free(snap);
26430Sstevel@tonic-gate 			return (retval);
26440Sstevel@tonic-gate 		}
26450Sstevel@tonic-gate 		retval = ipmp_snap_addgroupinfo(snap, grinfop);
26460Sstevel@tonic-gate 		if (retval != IPMP_SUCCESS) {
26470Sstevel@tonic-gate 			ipmp_freegroupinfo(grinfop);
26480Sstevel@tonic-gate 			ipmp_snap_free(snap);
26490Sstevel@tonic-gate 			return (retval);
26500Sstevel@tonic-gate 		}
26510Sstevel@tonic-gate 	}
26520Sstevel@tonic-gate 
26530Sstevel@tonic-gate 	/*
26540Sstevel@tonic-gate 	 * Add information for each configured phyint.
26550Sstevel@tonic-gate 	 */
26560Sstevel@tonic-gate 	for (pi = phyints; pi != NULL; pi = pi->pi_next) {
26570Sstevel@tonic-gate 		retval = getifinfo(pi->pi_name, &ifinfop);
26580Sstevel@tonic-gate 		if (retval != IPMP_SUCCESS) {
26590Sstevel@tonic-gate 			ipmp_snap_free(snap);
26600Sstevel@tonic-gate 			return (retval);
26610Sstevel@tonic-gate 		}
26620Sstevel@tonic-gate 		retval = ipmp_snap_addifinfo(snap, ifinfop);
26630Sstevel@tonic-gate 		if (retval != IPMP_SUCCESS) {
26640Sstevel@tonic-gate 			ipmp_freeifinfo(ifinfop);
26650Sstevel@tonic-gate 			ipmp_snap_free(snap);
26660Sstevel@tonic-gate 			return (retval);
26670Sstevel@tonic-gate 		}
26680Sstevel@tonic-gate 	}
26690Sstevel@tonic-gate 
26700Sstevel@tonic-gate 	*snapp = snap;
26710Sstevel@tonic-gate 	return (IPMP_SUCCESS);
26720Sstevel@tonic-gate }
2673