1*0Sstevel@tonic-gate /*
2*0Sstevel@tonic-gate  * CDDL HEADER START
3*0Sstevel@tonic-gate  *
4*0Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*0Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*0Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*0Sstevel@tonic-gate  * with the License.
8*0Sstevel@tonic-gate  *
9*0Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*0Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*0Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*0Sstevel@tonic-gate  * and limitations under the License.
13*0Sstevel@tonic-gate  *
14*0Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*0Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*0Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*0Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*0Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*0Sstevel@tonic-gate  *
20*0Sstevel@tonic-gate  * CDDL HEADER END
21*0Sstevel@tonic-gate  */
22*0Sstevel@tonic-gate /*
23*0Sstevel@tonic-gate  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24*0Sstevel@tonic-gate  * Use is subject to license terms.
25*0Sstevel@tonic-gate  */
26*0Sstevel@tonic-gate 
27*0Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
28*0Sstevel@tonic-gate 
29*0Sstevel@tonic-gate #include "mpd_defs.h"
30*0Sstevel@tonic-gate #include "mpd_tables.h"
31*0Sstevel@tonic-gate 
32*0Sstevel@tonic-gate /*
33*0Sstevel@tonic-gate  * Global list of phyints, phyint instances, phyint groups and the anonymous
34*0Sstevel@tonic-gate  * group; the latter is initialized in phyint_init().
35*0Sstevel@tonic-gate  */
36*0Sstevel@tonic-gate struct phyint *phyints = NULL;
37*0Sstevel@tonic-gate struct phyint_instance	*phyint_instances = NULL;
38*0Sstevel@tonic-gate struct phyint_group *phyint_groups = NULL;
39*0Sstevel@tonic-gate struct phyint_group *phyint_anongroup;
40*0Sstevel@tonic-gate 
41*0Sstevel@tonic-gate /*
42*0Sstevel@tonic-gate  * Grouplist signature; initialized in phyint_init().
43*0Sstevel@tonic-gate  */
44*0Sstevel@tonic-gate static uint64_t phyint_grouplistsig;
45*0Sstevel@tonic-gate 
46*0Sstevel@tonic-gate static void phyint_inst_insert(struct phyint_instance *pii);
47*0Sstevel@tonic-gate static void phyint_inst_print(struct phyint_instance *pii);
48*0Sstevel@tonic-gate 
49*0Sstevel@tonic-gate static void phyint_insert(struct phyint *pi, struct phyint_group *pg);
50*0Sstevel@tonic-gate static void phyint_delete(struct phyint *pi);
51*0Sstevel@tonic-gate 
52*0Sstevel@tonic-gate static void phyint_group_insert(struct phyint_group *pg);
53*0Sstevel@tonic-gate static void phyint_group_delete(struct phyint_group *pg);
54*0Sstevel@tonic-gate static struct phyint_group *phyint_group_lookup(const char *pg_name);
55*0Sstevel@tonic-gate static struct phyint_group *phyint_group_create(const char *pg_name);
56*0Sstevel@tonic-gate 
57*0Sstevel@tonic-gate static void logint_print(struct logint *li);
58*0Sstevel@tonic-gate static void logint_insert(struct phyint_instance *pii, struct logint *li);
59*0Sstevel@tonic-gate static struct logint *logint_lookup(struct phyint_instance *pii, char *li_name);
60*0Sstevel@tonic-gate 
61*0Sstevel@tonic-gate static void target_print(struct target *tg);
62*0Sstevel@tonic-gate static void target_insert(struct phyint_instance *pii, struct target *tg);
63*0Sstevel@tonic-gate static struct target *target_first(struct phyint_instance *pii);
64*0Sstevel@tonic-gate static struct target *target_select_best(struct phyint_instance *pii);
65*0Sstevel@tonic-gate static void target_flush_hosts(struct phyint_group *pg);
66*0Sstevel@tonic-gate 
67*0Sstevel@tonic-gate static void reset_pii_probes(struct phyint_instance *pii, struct target *tg);
68*0Sstevel@tonic-gate 
69*0Sstevel@tonic-gate static boolean_t phyint_inst_v6_sockinit(struct phyint_instance *pii);
70*0Sstevel@tonic-gate static boolean_t phyint_inst_v4_sockinit(struct phyint_instance *pii);
71*0Sstevel@tonic-gate 
72*0Sstevel@tonic-gate static void ip_index_to_mask_v6(uint_t masklen, struct in6_addr *bitmask);
73*0Sstevel@tonic-gate static boolean_t prefix_equal(struct in6_addr p1, struct in6_addr p2,
74*0Sstevel@tonic-gate     int prefix_len);
75*0Sstevel@tonic-gate 
76*0Sstevel@tonic-gate static int phyint_state_event(struct phyint_group *pg, struct phyint *pi);
77*0Sstevel@tonic-gate static int phyint_group_state_event(struct phyint_group *pg);
78*0Sstevel@tonic-gate static int phyint_group_change_event(struct phyint_group *pg, ipmp_group_op_t);
79*0Sstevel@tonic-gate static int phyint_group_member_event(struct phyint_group *pg, struct phyint *pi,
80*0Sstevel@tonic-gate     ipmp_if_op_t op);
81*0Sstevel@tonic-gate 
82*0Sstevel@tonic-gate static uint64_t gensig(void);
83*0Sstevel@tonic-gate 
84*0Sstevel@tonic-gate /* Initialize any per-file global state.  Returns 0 on success, -1 on failure */
85*0Sstevel@tonic-gate int
86*0Sstevel@tonic-gate phyint_init(void)
87*0Sstevel@tonic-gate {
88*0Sstevel@tonic-gate 	phyint_grouplistsig = gensig();
89*0Sstevel@tonic-gate 	if (track_all_phyints) {
90*0Sstevel@tonic-gate 		phyint_anongroup = phyint_group_create("");
91*0Sstevel@tonic-gate 		if (phyint_anongroup == NULL)
92*0Sstevel@tonic-gate 			return (-1);
93*0Sstevel@tonic-gate 		phyint_group_insert(phyint_anongroup);
94*0Sstevel@tonic-gate 	}
95*0Sstevel@tonic-gate 	return (0);
96*0Sstevel@tonic-gate }
97*0Sstevel@tonic-gate 
98*0Sstevel@tonic-gate /* Return the phyint with the given name */
99*0Sstevel@tonic-gate struct phyint *
100*0Sstevel@tonic-gate phyint_lookup(const char *name)
101*0Sstevel@tonic-gate {
102*0Sstevel@tonic-gate 	struct phyint *pi;
103*0Sstevel@tonic-gate 
104*0Sstevel@tonic-gate 	if (debug & D_PHYINT)
105*0Sstevel@tonic-gate 		logdebug("phyint_lookup(%s)\n", name);
106*0Sstevel@tonic-gate 
107*0Sstevel@tonic-gate 	for (pi = phyints; pi != NULL; pi = pi->pi_next) {
108*0Sstevel@tonic-gate 		if (strncmp(pi->pi_name, name, sizeof (pi->pi_name)) == 0)
109*0Sstevel@tonic-gate 			break;
110*0Sstevel@tonic-gate 	}
111*0Sstevel@tonic-gate 	return (pi);
112*0Sstevel@tonic-gate }
113*0Sstevel@tonic-gate 
114*0Sstevel@tonic-gate /* Return the phyint instance with the given name and the given family */
115*0Sstevel@tonic-gate struct phyint_instance *
116*0Sstevel@tonic-gate phyint_inst_lookup(int af, char *name)
117*0Sstevel@tonic-gate {
118*0Sstevel@tonic-gate 	struct phyint *pi;
119*0Sstevel@tonic-gate 
120*0Sstevel@tonic-gate 	if (debug & D_PHYINT)
121*0Sstevel@tonic-gate 		logdebug("phyint_inst_lookup(%s %s)\n", AF_STR(af), name);
122*0Sstevel@tonic-gate 
123*0Sstevel@tonic-gate 	assert(af == AF_INET || af == AF_INET6);
124*0Sstevel@tonic-gate 
125*0Sstevel@tonic-gate 	pi = phyint_lookup(name);
126*0Sstevel@tonic-gate 	if (pi == NULL)
127*0Sstevel@tonic-gate 		return (NULL);
128*0Sstevel@tonic-gate 
129*0Sstevel@tonic-gate 	return (PHYINT_INSTANCE(pi, af));
130*0Sstevel@tonic-gate }
131*0Sstevel@tonic-gate 
132*0Sstevel@tonic-gate static struct phyint_group *
133*0Sstevel@tonic-gate phyint_group_lookup(const char *pg_name)
134*0Sstevel@tonic-gate {
135*0Sstevel@tonic-gate 	struct phyint_group *pg;
136*0Sstevel@tonic-gate 
137*0Sstevel@tonic-gate 	if (debug & D_PHYINT)
138*0Sstevel@tonic-gate 		logdebug("phyint_group_lookup(%s)\n", pg_name);
139*0Sstevel@tonic-gate 
140*0Sstevel@tonic-gate 	for (pg = phyint_groups; pg != NULL; pg = pg->pg_next) {
141*0Sstevel@tonic-gate 		if (strncmp(pg->pg_name, pg_name, sizeof (pg->pg_name)) == 0)
142*0Sstevel@tonic-gate 			break;
143*0Sstevel@tonic-gate 	}
144*0Sstevel@tonic-gate 	return (pg);
145*0Sstevel@tonic-gate }
146*0Sstevel@tonic-gate 
147*0Sstevel@tonic-gate /*
148*0Sstevel@tonic-gate  * Insert the phyint in the linked list of all phyints. If the phyint belongs
149*0Sstevel@tonic-gate  * to some group, insert it in the phyint group list.
150*0Sstevel@tonic-gate  */
151*0Sstevel@tonic-gate static void
152*0Sstevel@tonic-gate phyint_insert(struct phyint *pi, struct phyint_group *pg)
153*0Sstevel@tonic-gate {
154*0Sstevel@tonic-gate 	if (debug & D_PHYINT)
155*0Sstevel@tonic-gate 		logdebug("phyint_insert(%s '%s')\n", pi->pi_name, pg->pg_name);
156*0Sstevel@tonic-gate 
157*0Sstevel@tonic-gate 	/* Insert the phyint at the head of the 'all phyints' list */
158*0Sstevel@tonic-gate 	pi->pi_next = phyints;
159*0Sstevel@tonic-gate 	pi->pi_prev = NULL;
160*0Sstevel@tonic-gate 	if (phyints != NULL)
161*0Sstevel@tonic-gate 		phyints->pi_prev = pi;
162*0Sstevel@tonic-gate 	phyints = pi;
163*0Sstevel@tonic-gate 
164*0Sstevel@tonic-gate 	/*
165*0Sstevel@tonic-gate 	 * Insert the phyint at the head of the 'phyint_group members' list
166*0Sstevel@tonic-gate 	 * of the phyint group to which it belongs.
167*0Sstevel@tonic-gate 	 */
168*0Sstevel@tonic-gate 	pi->pi_pgnext = NULL;
169*0Sstevel@tonic-gate 	pi->pi_pgprev = NULL;
170*0Sstevel@tonic-gate 	pi->pi_group = pg;
171*0Sstevel@tonic-gate 
172*0Sstevel@tonic-gate 	pi->pi_pgnext = pg->pg_phyint;
173*0Sstevel@tonic-gate 	if (pi->pi_pgnext != NULL)
174*0Sstevel@tonic-gate 		pi->pi_pgnext->pi_pgprev = pi;
175*0Sstevel@tonic-gate 	pg->pg_phyint = pi;
176*0Sstevel@tonic-gate 
177*0Sstevel@tonic-gate 	pg->pg_sig++;
178*0Sstevel@tonic-gate 	(void) phyint_group_member_event(pg, pi, IPMP_IF_ADD);
179*0Sstevel@tonic-gate }
180*0Sstevel@tonic-gate 
181*0Sstevel@tonic-gate /* Insert the phyint instance in the linked list of all phyint instances. */
182*0Sstevel@tonic-gate static void
183*0Sstevel@tonic-gate phyint_inst_insert(struct phyint_instance *pii)
184*0Sstevel@tonic-gate {
185*0Sstevel@tonic-gate 	if (debug & D_PHYINT) {
186*0Sstevel@tonic-gate 		logdebug("phyint_inst_insert(%s %s)\n",
187*0Sstevel@tonic-gate 		    AF_STR(pii->pii_af), pii->pii_name);
188*0Sstevel@tonic-gate 	}
189*0Sstevel@tonic-gate 
190*0Sstevel@tonic-gate 	/*
191*0Sstevel@tonic-gate 	 * Insert the phyint at the head of the 'all phyint instances' list.
192*0Sstevel@tonic-gate 	 */
193*0Sstevel@tonic-gate 	pii->pii_next = phyint_instances;
194*0Sstevel@tonic-gate 	pii->pii_prev = NULL;
195*0Sstevel@tonic-gate 	if (phyint_instances != NULL)
196*0Sstevel@tonic-gate 		phyint_instances->pii_prev = pii;
197*0Sstevel@tonic-gate 	phyint_instances = pii;
198*0Sstevel@tonic-gate }
199*0Sstevel@tonic-gate 
200*0Sstevel@tonic-gate /*
201*0Sstevel@tonic-gate  * Create a new phyint with the given parameters. Also insert it into
202*0Sstevel@tonic-gate  * the list of all phyints and the list of phyint group members by calling
203*0Sstevel@tonic-gate  * phyint_insert().
204*0Sstevel@tonic-gate  */
205*0Sstevel@tonic-gate static struct phyint *
206*0Sstevel@tonic-gate phyint_create(char *pi_name, struct phyint_group *pg, uint_t ifindex,
207*0Sstevel@tonic-gate     uint64_t flags)
208*0Sstevel@tonic-gate {
209*0Sstevel@tonic-gate 	struct phyint *pi;
210*0Sstevel@tonic-gate 
211*0Sstevel@tonic-gate 	pi = calloc(1, sizeof (struct phyint));
212*0Sstevel@tonic-gate 	if (pi == NULL) {
213*0Sstevel@tonic-gate 		logperror("phyint_create: calloc");
214*0Sstevel@tonic-gate 		return (NULL);
215*0Sstevel@tonic-gate 	}
216*0Sstevel@tonic-gate 
217*0Sstevel@tonic-gate 	/*
218*0Sstevel@tonic-gate 	 * Record the phyint values. Also insert the phyint into the
219*0Sstevel@tonic-gate 	 * phyint group by calling phyint_insert().
220*0Sstevel@tonic-gate 	 */
221*0Sstevel@tonic-gate 	(void) strncpy(pi->pi_name, pi_name, sizeof (pi->pi_name));
222*0Sstevel@tonic-gate 	pi->pi_name[sizeof (pi->pi_name) - 1] = '\0';
223*0Sstevel@tonic-gate 	pi->pi_ifindex = ifindex;
224*0Sstevel@tonic-gate 	pi->pi_icmpid =
225*0Sstevel@tonic-gate 	    htons(((getpid() & 0xFF) << 8) | (pi->pi_ifindex & 0xFF));
226*0Sstevel@tonic-gate 	/*
227*0Sstevel@tonic-gate 	 * We optimistically start in the PI_RUNNING state.  Later (in
228*0Sstevel@tonic-gate 	 * process_link_state_changes()), we will readjust this to match the
229*0Sstevel@tonic-gate 	 * current state of the link.  Further, if test addresses are
230*0Sstevel@tonic-gate 	 * subsequently assigned, we will transition to PI_NOTARGETS and then
231*0Sstevel@tonic-gate 	 * either PI_RUNNING or PI_FAILED, depending on the result of the test
232*0Sstevel@tonic-gate 	 * probes.
233*0Sstevel@tonic-gate 	 */
234*0Sstevel@tonic-gate 	pi->pi_state = PI_RUNNING;
235*0Sstevel@tonic-gate 	pi->pi_flags = PHYINT_FLAGS(flags);
236*0Sstevel@tonic-gate 	/*
237*0Sstevel@tonic-gate 	 * Initialise the link state.  The link state is initialised to
238*0Sstevel@tonic-gate 	 * up, so that if the link is down when IPMP starts monitoring
239*0Sstevel@tonic-gate 	 * the interface, it will appear as though there has been a
240*0Sstevel@tonic-gate 	 * transition from the link up to link down.  This avoids
241*0Sstevel@tonic-gate 	 * having to treat this situation as a special case.
242*0Sstevel@tonic-gate 	 */
243*0Sstevel@tonic-gate 	INIT_LINK_STATE(pi);
244*0Sstevel@tonic-gate 
245*0Sstevel@tonic-gate 	/*
246*0Sstevel@tonic-gate 	 * Insert the phyint in the list of all phyints, and the
247*0Sstevel@tonic-gate 	 * list of phyint group members
248*0Sstevel@tonic-gate 	 */
249*0Sstevel@tonic-gate 	phyint_insert(pi, pg);
250*0Sstevel@tonic-gate 
251*0Sstevel@tonic-gate 	/*
252*0Sstevel@tonic-gate 	 * If we are joining a failed group, mark the interface as
253*0Sstevel@tonic-gate 	 * failed.
254*0Sstevel@tonic-gate 	 */
255*0Sstevel@tonic-gate 	if (GROUP_FAILED(pg))
256*0Sstevel@tonic-gate 		(void) change_lif_flags(pi, IFF_FAILED, _B_TRUE);
257*0Sstevel@tonic-gate 
258*0Sstevel@tonic-gate 	return (pi);
259*0Sstevel@tonic-gate }
260*0Sstevel@tonic-gate 
261*0Sstevel@tonic-gate /*
262*0Sstevel@tonic-gate  * Create a new phyint instance belonging to the phyint 'pi' and address
263*0Sstevel@tonic-gate  * family 'af'. Also insert it into the list of all phyint instances by
264*0Sstevel@tonic-gate  * calling phyint_inst_insert().
265*0Sstevel@tonic-gate  */
266*0Sstevel@tonic-gate static struct phyint_instance *
267*0Sstevel@tonic-gate phyint_inst_create(struct phyint *pi, int af)
268*0Sstevel@tonic-gate {
269*0Sstevel@tonic-gate 	struct phyint_instance *pii;
270*0Sstevel@tonic-gate 
271*0Sstevel@tonic-gate 	pii = calloc(1, sizeof (struct phyint_instance));
272*0Sstevel@tonic-gate 	if (pii == NULL) {
273*0Sstevel@tonic-gate 		logperror("phyint_inst_create: calloc");
274*0Sstevel@tonic-gate 		return (NULL);
275*0Sstevel@tonic-gate 	}
276*0Sstevel@tonic-gate 
277*0Sstevel@tonic-gate 	/*
278*0Sstevel@tonic-gate 	 * Attach the phyint instance to the phyint.
279*0Sstevel@tonic-gate 	 * Set the back pointers as well
280*0Sstevel@tonic-gate 	 */
281*0Sstevel@tonic-gate 	pii->pii_phyint = pi;
282*0Sstevel@tonic-gate 	if (af == AF_INET)
283*0Sstevel@tonic-gate 		pi->pi_v4 = pii;
284*0Sstevel@tonic-gate 	else
285*0Sstevel@tonic-gate 		pi->pi_v6 = pii;
286*0Sstevel@tonic-gate 
287*0Sstevel@tonic-gate 	pii->pii_in_use = 1;
288*0Sstevel@tonic-gate 	pii->pii_probe_sock = -1;
289*0Sstevel@tonic-gate 	pii->pii_snxt = 1;
290*0Sstevel@tonic-gate 	pii->pii_af = af;
291*0Sstevel@tonic-gate 	pii->pii_fd_hrtime = gethrtime() +
292*0Sstevel@tonic-gate 	    (FAILURE_DETECTION_QP * (hrtime_t)NANOSEC);
293*0Sstevel@tonic-gate 	pii->pii_flags = pi->pi_flags;
294*0Sstevel@tonic-gate 
295*0Sstevel@tonic-gate 	/* Insert the phyint instance in the list of all phyint instances. */
296*0Sstevel@tonic-gate 	phyint_inst_insert(pii);
297*0Sstevel@tonic-gate 	return (pii);
298*0Sstevel@tonic-gate }
299*0Sstevel@tonic-gate 
300*0Sstevel@tonic-gate /*
301*0Sstevel@tonic-gate  * Change the state of phyint `pi' to state `state'.
302*0Sstevel@tonic-gate  */
303*0Sstevel@tonic-gate void
304*0Sstevel@tonic-gate phyint_chstate(struct phyint *pi, enum pi_state state)
305*0Sstevel@tonic-gate {
306*0Sstevel@tonic-gate 	/*
307*0Sstevel@tonic-gate 	 * To simplify things, some callers always set a given state
308*0Sstevel@tonic-gate 	 * regardless of the previous state of the phyint (e.g., setting
309*0Sstevel@tonic-gate 	 * PI_RUNNING when it's already set).  We shouldn't bother
310*0Sstevel@tonic-gate 	 * generating an event or consuming a signature for these, since
311*0Sstevel@tonic-gate 	 * the actual state of the interface is unchanged.
312*0Sstevel@tonic-gate 	 */
313*0Sstevel@tonic-gate 	if (pi->pi_state == state)
314*0Sstevel@tonic-gate 		return;
315*0Sstevel@tonic-gate 
316*0Sstevel@tonic-gate 	pi->pi_state = state;
317*0Sstevel@tonic-gate 	pi->pi_group->pg_sig++;
318*0Sstevel@tonic-gate 	(void) phyint_state_event(pi->pi_group, pi);
319*0Sstevel@tonic-gate }
320*0Sstevel@tonic-gate 
321*0Sstevel@tonic-gate /*
322*0Sstevel@tonic-gate  * Note that the type of phyint `pi' has changed.
323*0Sstevel@tonic-gate  */
324*0Sstevel@tonic-gate void
325*0Sstevel@tonic-gate phyint_newtype(struct phyint *pi)
326*0Sstevel@tonic-gate {
327*0Sstevel@tonic-gate 	pi->pi_group->pg_sig++;
328*0Sstevel@tonic-gate 	(void) phyint_state_event(pi->pi_group, pi);
329*0Sstevel@tonic-gate }
330*0Sstevel@tonic-gate 
331*0Sstevel@tonic-gate /*
332*0Sstevel@tonic-gate  * Insert the phyint group in the linked list of all phyint groups
333*0Sstevel@tonic-gate  * at the head of the list
334*0Sstevel@tonic-gate  */
335*0Sstevel@tonic-gate static void
336*0Sstevel@tonic-gate phyint_group_insert(struct phyint_group *pg)
337*0Sstevel@tonic-gate {
338*0Sstevel@tonic-gate 	pg->pg_next = phyint_groups;
339*0Sstevel@tonic-gate 	pg->pg_prev = NULL;
340*0Sstevel@tonic-gate 	if (phyint_groups != NULL)
341*0Sstevel@tonic-gate 		phyint_groups->pg_prev = pg;
342*0Sstevel@tonic-gate 	phyint_groups = pg;
343*0Sstevel@tonic-gate 
344*0Sstevel@tonic-gate 	phyint_grouplistsig++;
345*0Sstevel@tonic-gate 	(void) phyint_group_change_event(pg, IPMP_GROUP_ADD);
346*0Sstevel@tonic-gate }
347*0Sstevel@tonic-gate 
348*0Sstevel@tonic-gate /*
349*0Sstevel@tonic-gate  * Create a new phyint group called 'name'.
350*0Sstevel@tonic-gate  */
351*0Sstevel@tonic-gate static struct phyint_group *
352*0Sstevel@tonic-gate phyint_group_create(const char *name)
353*0Sstevel@tonic-gate {
354*0Sstevel@tonic-gate 	struct	phyint_group *pg;
355*0Sstevel@tonic-gate 
356*0Sstevel@tonic-gate 	if (debug & D_PHYINT)
357*0Sstevel@tonic-gate 		logdebug("phyint_group_create(%s)\n", name);
358*0Sstevel@tonic-gate 
359*0Sstevel@tonic-gate 	pg = calloc(1, sizeof (struct phyint_group));
360*0Sstevel@tonic-gate 	if (pg == NULL) {
361*0Sstevel@tonic-gate 		logperror("phyint_group_create: calloc");
362*0Sstevel@tonic-gate 		return (NULL);
363*0Sstevel@tonic-gate 	}
364*0Sstevel@tonic-gate 
365*0Sstevel@tonic-gate 	(void) strncpy(pg->pg_name, name, sizeof (pg->pg_name));
366*0Sstevel@tonic-gate 	pg->pg_name[sizeof (pg->pg_name) - 1] = '\0';
367*0Sstevel@tonic-gate 	pg->pg_sig = gensig();
368*0Sstevel@tonic-gate 
369*0Sstevel@tonic-gate 	pg->pg_fdt = user_failure_detection_time;
370*0Sstevel@tonic-gate 	pg->pg_probeint = user_probe_interval;
371*0Sstevel@tonic-gate 
372*0Sstevel@tonic-gate 	return (pg);
373*0Sstevel@tonic-gate }
374*0Sstevel@tonic-gate 
375*0Sstevel@tonic-gate /*
376*0Sstevel@tonic-gate  * Change the state of the phyint group `pg' to state `state'.
377*0Sstevel@tonic-gate  */
378*0Sstevel@tonic-gate void
379*0Sstevel@tonic-gate phyint_group_chstate(struct phyint_group *pg, enum pg_state state)
380*0Sstevel@tonic-gate {
381*0Sstevel@tonic-gate 	assert(pg != phyint_anongroup);
382*0Sstevel@tonic-gate 
383*0Sstevel@tonic-gate 	switch (state) {
384*0Sstevel@tonic-gate 	case PG_FAILED:
385*0Sstevel@tonic-gate 		pg->pg_groupfailed = 1;
386*0Sstevel@tonic-gate 
387*0Sstevel@tonic-gate 		/*
388*0Sstevel@tonic-gate 		 * We can never know with certainty that a group has
389*0Sstevel@tonic-gate 		 * failed.  It is possible that all known targets have
390*0Sstevel@tonic-gate 		 * failed simultaneously, and new targets have come up
391*0Sstevel@tonic-gate 		 * instead. If the targets are routers then router
392*0Sstevel@tonic-gate 		 * discovery will kick in, and we will see the new routers
393*0Sstevel@tonic-gate 		 * thru routing socket messages. But if the targets are
394*0Sstevel@tonic-gate 		 * hosts, we have to discover it by multicast.	So flush
395*0Sstevel@tonic-gate 		 * all the host targets. The next probe will send out a
396*0Sstevel@tonic-gate 		 * multicast echo request. If this is a group failure, we
397*0Sstevel@tonic-gate 		 * will still not see any response, otherwise we will
398*0Sstevel@tonic-gate 		 * clear the pg_groupfailed flag after we get
399*0Sstevel@tonic-gate 		 * NUM_PROBE_REPAIRS consecutive unicast replies on any
400*0Sstevel@tonic-gate 		 * phyint.
401*0Sstevel@tonic-gate 		 */
402*0Sstevel@tonic-gate 		target_flush_hosts(pg);
403*0Sstevel@tonic-gate 		break;
404*0Sstevel@tonic-gate 
405*0Sstevel@tonic-gate 	case PG_RUNNING:
406*0Sstevel@tonic-gate 		pg->pg_groupfailed = 0;
407*0Sstevel@tonic-gate 		break;
408*0Sstevel@tonic-gate 
409*0Sstevel@tonic-gate 	default:
410*0Sstevel@tonic-gate 		logerr("phyint_group_chstate: invalid group state %d; "
411*0Sstevel@tonic-gate 		    "aborting\n", state);
412*0Sstevel@tonic-gate 		abort();
413*0Sstevel@tonic-gate 	}
414*0Sstevel@tonic-gate 
415*0Sstevel@tonic-gate 	pg->pg_sig++;
416*0Sstevel@tonic-gate 	(void) phyint_group_state_event(pg);
417*0Sstevel@tonic-gate }
418*0Sstevel@tonic-gate 
419*0Sstevel@tonic-gate /*
420*0Sstevel@tonic-gate  * Create a new phyint instance and initialize it from the values supplied by
421*0Sstevel@tonic-gate  * the kernel. Always check for ENXIO before logging any error, because the
422*0Sstevel@tonic-gate  * interface could have vanished after completion of SIOCGLIFCONF.
423*0Sstevel@tonic-gate  * Return values:
424*0Sstevel@tonic-gate  *	pointer to the phyint instance on success
425*0Sstevel@tonic-gate  *	NULL on failure Eg. if the phyint instance is not found in the kernel
426*0Sstevel@tonic-gate  */
427*0Sstevel@tonic-gate struct phyint_instance *
428*0Sstevel@tonic-gate phyint_inst_init_from_k(int af, char *pi_name)
429*0Sstevel@tonic-gate {
430*0Sstevel@tonic-gate 	char	pg_name[LIFNAMSIZ + 1];
431*0Sstevel@tonic-gate 	int	ifsock;
432*0Sstevel@tonic-gate 	uint_t	ifindex;
433*0Sstevel@tonic-gate 	uint64_t	flags;
434*0Sstevel@tonic-gate 	struct lifreq	lifr;
435*0Sstevel@tonic-gate 	struct phyint	*pi;
436*0Sstevel@tonic-gate 	struct phyint_instance	*pii;
437*0Sstevel@tonic-gate 	boolean_t	pg_created;
438*0Sstevel@tonic-gate 	boolean_t	pi_created;
439*0Sstevel@tonic-gate 	struct phyint_group	*pg;
440*0Sstevel@tonic-gate 
441*0Sstevel@tonic-gate retry:
442*0Sstevel@tonic-gate 	pii = NULL;
443*0Sstevel@tonic-gate 	pi = NULL;
444*0Sstevel@tonic-gate 	pg = NULL;
445*0Sstevel@tonic-gate 	pi_created = _B_FALSE;
446*0Sstevel@tonic-gate 	pg_created = _B_FALSE;
447*0Sstevel@tonic-gate 
448*0Sstevel@tonic-gate 	if (debug & D_PHYINT) {
449*0Sstevel@tonic-gate 		logdebug("phyint_inst_init_from_k(%s %s)\n",
450*0Sstevel@tonic-gate 		    AF_STR(af), pi_name);
451*0Sstevel@tonic-gate 	}
452*0Sstevel@tonic-gate 
453*0Sstevel@tonic-gate 	assert(af == AF_INET || af == AF_INET6);
454*0Sstevel@tonic-gate 
455*0Sstevel@tonic-gate 	/* Get the socket for doing ioctls */
456*0Sstevel@tonic-gate 	ifsock = (af == AF_INET) ? ifsock_v4 : ifsock_v6;
457*0Sstevel@tonic-gate 
458*0Sstevel@tonic-gate 	/*
459*0Sstevel@tonic-gate 	 * Get the interface flags. Ignore loopback and multipoint
460*0Sstevel@tonic-gate 	 * interfaces.
461*0Sstevel@tonic-gate 	 */
462*0Sstevel@tonic-gate 	(void) strncpy(lifr.lifr_name, pi_name, sizeof (lifr.lifr_name));
463*0Sstevel@tonic-gate 	lifr.lifr_name[sizeof (lifr.lifr_name) - 1] = '\0';
464*0Sstevel@tonic-gate 	if (ioctl(ifsock, SIOCGLIFFLAGS, (char *)&lifr) < 0) {
465*0Sstevel@tonic-gate 		if (errno != ENXIO) {
466*0Sstevel@tonic-gate 			logperror("phyint_inst_init_from_k:"
467*0Sstevel@tonic-gate 			    " ioctl (get flags)");
468*0Sstevel@tonic-gate 		}
469*0Sstevel@tonic-gate 		return (NULL);
470*0Sstevel@tonic-gate 	}
471*0Sstevel@tonic-gate 	flags = lifr.lifr_flags;
472*0Sstevel@tonic-gate 	if (!(flags & IFF_MULTICAST) || (flags & IFF_LOOPBACK))
473*0Sstevel@tonic-gate 		return (NULL);
474*0Sstevel@tonic-gate 
475*0Sstevel@tonic-gate 	/*
476*0Sstevel@tonic-gate 	 * Get the ifindex for recording later in our tables, in case we need
477*0Sstevel@tonic-gate 	 * to create a new phyint.
478*0Sstevel@tonic-gate 	 */
479*0Sstevel@tonic-gate 	if (ioctl(ifsock, SIOCGLIFINDEX, (char *)&lifr) < 0) {
480*0Sstevel@tonic-gate 		if (errno != ENXIO) {
481*0Sstevel@tonic-gate 			logperror("phyint_inst_init_from_k: "
482*0Sstevel@tonic-gate 			    " ioctl (get lifindex)");
483*0Sstevel@tonic-gate 		}
484*0Sstevel@tonic-gate 		return (NULL);
485*0Sstevel@tonic-gate 	}
486*0Sstevel@tonic-gate 	ifindex = lifr.lifr_index;
487*0Sstevel@tonic-gate 
488*0Sstevel@tonic-gate 	/*
489*0Sstevel@tonic-gate 	 * Get the phyint group name of this phyint, from the kernel.
490*0Sstevel@tonic-gate 	 */
491*0Sstevel@tonic-gate 	if (ioctl(ifsock, SIOCGLIFGROUPNAME, (char *)&lifr) < 0) {
492*0Sstevel@tonic-gate 		if (errno != ENXIO) {
493*0Sstevel@tonic-gate 			logperror("phyint_inst_init_from_k: "
494*0Sstevel@tonic-gate 			    "ioctl (get group name)");
495*0Sstevel@tonic-gate 		}
496*0Sstevel@tonic-gate 		return (NULL);
497*0Sstevel@tonic-gate 	}
498*0Sstevel@tonic-gate 	(void) strncpy(pg_name, lifr.lifr_groupname, sizeof (pg_name));
499*0Sstevel@tonic-gate 	pg_name[sizeof (pg_name) - 1] = '\0';
500*0Sstevel@tonic-gate 
501*0Sstevel@tonic-gate 	/*
502*0Sstevel@tonic-gate 	 * If the phyint is not part of any group, pg_name is the
503*0Sstevel@tonic-gate 	 * null string. If 'track_all_phyints' is false, there is no
504*0Sstevel@tonic-gate 	 * need to create a phyint.
505*0Sstevel@tonic-gate 	 */
506*0Sstevel@tonic-gate 	if (pg_name[0] == '\0' && !track_all_phyints) {
507*0Sstevel@tonic-gate 		/*
508*0Sstevel@tonic-gate 		 * If the IFF_FAILED or IFF_OFFLINE flags are set, reset
509*0Sstevel@tonic-gate 		 * them. These flags shouldn't be set if IPMP isn't
510*0Sstevel@tonic-gate 		 * tracking the interface.
511*0Sstevel@tonic-gate 		 */
512*0Sstevel@tonic-gate 		if ((flags & (IFF_FAILED | IFF_OFFLINE)) != 0) {
513*0Sstevel@tonic-gate 			lifr.lifr_flags = flags & ~(IFF_FAILED | IFF_OFFLINE);
514*0Sstevel@tonic-gate 			if (ioctl(ifsock, SIOCSLIFFLAGS, (char *)&lifr) < 0) {
515*0Sstevel@tonic-gate 				if (errno != ENXIO) {
516*0Sstevel@tonic-gate 					logperror("phyint_inst_init_from_k:"
517*0Sstevel@tonic-gate 					    " ioctl (set flags)");
518*0Sstevel@tonic-gate 				}
519*0Sstevel@tonic-gate 			}
520*0Sstevel@tonic-gate 		}
521*0Sstevel@tonic-gate 		return (NULL);
522*0Sstevel@tonic-gate 	}
523*0Sstevel@tonic-gate 
524*0Sstevel@tonic-gate 	/*
525*0Sstevel@tonic-gate 	 * We need to create a new phyint instance. A phyint instance
526*0Sstevel@tonic-gate 	 * belongs to a phyint, and the phyint belongs to a phyint group.
527*0Sstevel@tonic-gate 	 * So we first lookup the 'parents' and if they don't exist then
528*0Sstevel@tonic-gate 	 * we create them.
529*0Sstevel@tonic-gate 	 */
530*0Sstevel@tonic-gate 	pg = phyint_group_lookup(pg_name);
531*0Sstevel@tonic-gate 	if (pg == NULL) {
532*0Sstevel@tonic-gate 		pg = phyint_group_create(pg_name);
533*0Sstevel@tonic-gate 		if (pg == NULL) {
534*0Sstevel@tonic-gate 			logerr("phyint_inst_init_from_k:"
535*0Sstevel@tonic-gate 			    " unable to create group %s\n", pg_name);
536*0Sstevel@tonic-gate 			return (NULL);
537*0Sstevel@tonic-gate 		}
538*0Sstevel@tonic-gate 		phyint_group_insert(pg);
539*0Sstevel@tonic-gate 		pg_created = _B_TRUE;
540*0Sstevel@tonic-gate 	}
541*0Sstevel@tonic-gate 
542*0Sstevel@tonic-gate 	/*
543*0Sstevel@tonic-gate 	 * Lookup the phyint. If the phyint does not exist create it.
544*0Sstevel@tonic-gate 	 */
545*0Sstevel@tonic-gate 	pi = phyint_lookup(pi_name);
546*0Sstevel@tonic-gate 	if (pi == NULL) {
547*0Sstevel@tonic-gate 		pi = phyint_create(pi_name, pg, ifindex, flags);
548*0Sstevel@tonic-gate 		if (pi == NULL) {
549*0Sstevel@tonic-gate 			logerr("phyint_inst_init_from_k:"
550*0Sstevel@tonic-gate 			    " unable to create phyint %s\n", pi_name);
551*0Sstevel@tonic-gate 			if (pg_created)
552*0Sstevel@tonic-gate 				phyint_group_delete(pg);
553*0Sstevel@tonic-gate 			return (NULL);
554*0Sstevel@tonic-gate 		}
555*0Sstevel@tonic-gate 		pi_created = _B_TRUE;
556*0Sstevel@tonic-gate 	} else {
557*0Sstevel@tonic-gate 		/* The phyint exists already. */
558*0Sstevel@tonic-gate 		assert(pi_created == _B_FALSE);
559*0Sstevel@tonic-gate 		/*
560*0Sstevel@tonic-gate 		 * Normally we should see consistent values for the IPv4 and
561*0Sstevel@tonic-gate 		 * IPv6 instances, for phyint properties. If we don't, it
562*0Sstevel@tonic-gate 		 * means things have changed underneath us, and we should
563*0Sstevel@tonic-gate 		 * resync our tables with the kernel. Check whether the
564*0Sstevel@tonic-gate 		 * interface index has changed. If so, it is most likely
565*0Sstevel@tonic-gate 		 * the interface has been unplumbed and replumbed,
566*0Sstevel@tonic-gate 		 * while we are yet to update our tables. Do it now.
567*0Sstevel@tonic-gate 		 */
568*0Sstevel@tonic-gate 		if (pi->pi_ifindex != ifindex) {
569*0Sstevel@tonic-gate 			if (pg_created)
570*0Sstevel@tonic-gate 				phyint_group_delete(pg);
571*0Sstevel@tonic-gate 			phyint_inst_delete(PHYINT_INSTANCE(pi, AF_OTHER(af)));
572*0Sstevel@tonic-gate 			goto retry;
573*0Sstevel@tonic-gate 		}
574*0Sstevel@tonic-gate 		assert(PHYINT_INSTANCE(pi, af) == NULL);
575*0Sstevel@tonic-gate 
576*0Sstevel@tonic-gate 		/*
577*0Sstevel@tonic-gate 		 * If the group name seen by the IPv4 and IPv6 instances
578*0Sstevel@tonic-gate 		 * are different, it is most likely the groupname has
579*0Sstevel@tonic-gate 		 * changed, while we are yet to update our tables. Do it now.
580*0Sstevel@tonic-gate 		 */
581*0Sstevel@tonic-gate 		if (strcmp(pi->pi_group->pg_name, pg_name) != 0) {
582*0Sstevel@tonic-gate 			if (pg_created)
583*0Sstevel@tonic-gate 				phyint_group_delete(pg);
584*0Sstevel@tonic-gate 			restore_phyint(pi);
585*0Sstevel@tonic-gate 			phyint_inst_delete(PHYINT_INSTANCE(pi,
586*0Sstevel@tonic-gate 			    AF_OTHER(af)));
587*0Sstevel@tonic-gate 			goto retry;
588*0Sstevel@tonic-gate 		}
589*0Sstevel@tonic-gate 	}
590*0Sstevel@tonic-gate 
591*0Sstevel@tonic-gate 	/*
592*0Sstevel@tonic-gate 	 * Create a new phyint instance, corresponding to the 'af'
593*0Sstevel@tonic-gate 	 * passed in.
594*0Sstevel@tonic-gate 	 */
595*0Sstevel@tonic-gate 	pii = phyint_inst_create(pi, af);
596*0Sstevel@tonic-gate 	if (pii == NULL) {
597*0Sstevel@tonic-gate 		logerr("phyint_inst_init_from_k: unable to create"
598*0Sstevel@tonic-gate 		    "phyint inst %s\n", pi->pi_name);
599*0Sstevel@tonic-gate 		if (pi_created) {
600*0Sstevel@tonic-gate 			/*
601*0Sstevel@tonic-gate 			 * Deleting the phyint will delete the phyint group
602*0Sstevel@tonic-gate 			 * if this is the last phyint in the group.
603*0Sstevel@tonic-gate 			 */
604*0Sstevel@tonic-gate 			phyint_delete(pi);
605*0Sstevel@tonic-gate 		}
606*0Sstevel@tonic-gate 		return (NULL);
607*0Sstevel@tonic-gate 	}
608*0Sstevel@tonic-gate 
609*0Sstevel@tonic-gate 	return (pii);
610*0Sstevel@tonic-gate }
611*0Sstevel@tonic-gate 
612*0Sstevel@tonic-gate /*
613*0Sstevel@tonic-gate  * Bind the pii_probe_sock to the chosen IFF_NOFAILOVER address in
614*0Sstevel@tonic-gate  * pii_probe_logint. This socket will be used for sending and receiving
615*0Sstevel@tonic-gate  * ICMP/ICMPv6 probes to targets. Do the common part in this function, and
616*0Sstevel@tonic-gate  * complete the initializations by calling the protocol specific functions
617*0Sstevel@tonic-gate  * phyint_inst_v{4,6}_sockinit() respectively.
618*0Sstevel@tonic-gate  *
619*0Sstevel@tonic-gate  * Return values: _B_TRUE/_B_FALSE for success or failure respectively.
620*0Sstevel@tonic-gate  */
621*0Sstevel@tonic-gate boolean_t
622*0Sstevel@tonic-gate phyint_inst_sockinit(struct phyint_instance *pii)
623*0Sstevel@tonic-gate {
624*0Sstevel@tonic-gate 	boolean_t success;
625*0Sstevel@tonic-gate 	struct phyint_group *pg;
626*0Sstevel@tonic-gate 
627*0Sstevel@tonic-gate 	if (debug & D_PHYINT) {
628*0Sstevel@tonic-gate 		logdebug("phyint_inst_sockinit(%s %s)\n",
629*0Sstevel@tonic-gate 		    AF_STR(pii->pii_af), pii->pii_name);
630*0Sstevel@tonic-gate 	}
631*0Sstevel@tonic-gate 
632*0Sstevel@tonic-gate 	assert(pii->pii_probe_logint != NULL);
633*0Sstevel@tonic-gate 	assert(pii->pii_probe_logint->li_flags & IFF_UP);
634*0Sstevel@tonic-gate 	assert(SINGLETON_GROUP(pii->pii_phyint) ||
635*0Sstevel@tonic-gate 	    (pii->pii_probe_logint->li_flags & IFF_NOFAILOVER));
636*0Sstevel@tonic-gate 	assert(pii->pii_af == AF_INET || pii->pii_af == AF_INET6);
637*0Sstevel@tonic-gate 
638*0Sstevel@tonic-gate 	/*
639*0Sstevel@tonic-gate 	 * If the socket is already bound, close pii_probe_sock
640*0Sstevel@tonic-gate 	 */
641*0Sstevel@tonic-gate 	if (pii->pii_probe_sock != -1)
642*0Sstevel@tonic-gate 		close_probe_socket(pii, _B_TRUE);
643*0Sstevel@tonic-gate 
644*0Sstevel@tonic-gate 	/*
645*0Sstevel@tonic-gate 	 * If the phyint is not part of a named group and track_all_phyints is
646*0Sstevel@tonic-gate 	 * false, simply return.
647*0Sstevel@tonic-gate 	 */
648*0Sstevel@tonic-gate 	pg = pii->pii_phyint->pi_group;
649*0Sstevel@tonic-gate 	if (pg == phyint_anongroup && !track_all_phyints) {
650*0Sstevel@tonic-gate 		if (debug & D_PHYINT)
651*0Sstevel@tonic-gate 			logdebug("phyint_inst_sockinit: no group\n");
652*0Sstevel@tonic-gate 		return (_B_FALSE);
653*0Sstevel@tonic-gate 	}
654*0Sstevel@tonic-gate 
655*0Sstevel@tonic-gate 	/*
656*0Sstevel@tonic-gate 	 * Initialize the socket by calling the protocol specific function.
657*0Sstevel@tonic-gate 	 * If it succeeds, add the socket to the poll list.
658*0Sstevel@tonic-gate 	 */
659*0Sstevel@tonic-gate 	if (pii->pii_af == AF_INET6)
660*0Sstevel@tonic-gate 		success = phyint_inst_v6_sockinit(pii);
661*0Sstevel@tonic-gate 	else
662*0Sstevel@tonic-gate 		success = phyint_inst_v4_sockinit(pii);
663*0Sstevel@tonic-gate 
664*0Sstevel@tonic-gate 	if (success && (poll_add(pii->pii_probe_sock) == 0))
665*0Sstevel@tonic-gate 		return (_B_TRUE);
666*0Sstevel@tonic-gate 
667*0Sstevel@tonic-gate 	/* Something failed, cleanup and return false */
668*0Sstevel@tonic-gate 	if (pii->pii_probe_sock != -1)
669*0Sstevel@tonic-gate 		close_probe_socket(pii, _B_FALSE);
670*0Sstevel@tonic-gate 
671*0Sstevel@tonic-gate 	return (_B_FALSE);
672*0Sstevel@tonic-gate }
673*0Sstevel@tonic-gate 
674*0Sstevel@tonic-gate /*
675*0Sstevel@tonic-gate  * IPv6 specific part in initializing the pii_probe_sock. This socket is
676*0Sstevel@tonic-gate  * used to send/receive ICMPv6 probe packets.
677*0Sstevel@tonic-gate  */
678*0Sstevel@tonic-gate static boolean_t
679*0Sstevel@tonic-gate phyint_inst_v6_sockinit(struct phyint_instance *pii)
680*0Sstevel@tonic-gate {
681*0Sstevel@tonic-gate 	icmp6_filter_t filter;
682*0Sstevel@tonic-gate 	int hopcount = 1;
683*0Sstevel@tonic-gate 	int int_op;
684*0Sstevel@tonic-gate 	struct	sockaddr_in6	testaddr;
685*0Sstevel@tonic-gate 
686*0Sstevel@tonic-gate 	/*
687*0Sstevel@tonic-gate 	 * Open a raw socket with ICMPv6 protocol.
688*0Sstevel@tonic-gate 	 *
689*0Sstevel@tonic-gate 	 * Use IPV6_DONTFAILOVER_IF to make sure that probes go out
690*0Sstevel@tonic-gate 	 * on the specified phyint only, and are not subject to load
691*0Sstevel@tonic-gate 	 * balancing. Bind to the src address chosen will ensure that
692*0Sstevel@tonic-gate 	 * the responses are received only on the specified phyint.
693*0Sstevel@tonic-gate 	 *
694*0Sstevel@tonic-gate 	 * Set the hopcount to 1 so that probe packets are not routed.
695*0Sstevel@tonic-gate 	 * Disable multicast loopback. Set the receive filter to
696*0Sstevel@tonic-gate 	 * receive only ICMPv6 echo replies.
697*0Sstevel@tonic-gate 	 */
698*0Sstevel@tonic-gate 	pii->pii_probe_sock = socket(pii->pii_af, SOCK_RAW, IPPROTO_ICMPV6);
699*0Sstevel@tonic-gate 	if (pii->pii_probe_sock < 0) {
700*0Sstevel@tonic-gate 		logperror_pii(pii, "phyint_inst_v6_sockinit: socket");
701*0Sstevel@tonic-gate 		return (_B_FALSE);
702*0Sstevel@tonic-gate }
703*0Sstevel@tonic-gate 
704*0Sstevel@tonic-gate 	bzero(&testaddr, sizeof (testaddr));
705*0Sstevel@tonic-gate 	testaddr.sin6_family = AF_INET6;
706*0Sstevel@tonic-gate 	testaddr.sin6_port = 0;
707*0Sstevel@tonic-gate 	testaddr.sin6_addr = pii->pii_probe_logint->li_addr;
708*0Sstevel@tonic-gate 
709*0Sstevel@tonic-gate 	if (bind(pii->pii_probe_sock, (struct sockaddr *)&testaddr,
710*0Sstevel@tonic-gate 	    sizeof (testaddr)) < 0) {
711*0Sstevel@tonic-gate 		logperror_pii(pii, "phyint_inst_v6_sockinit: IPv6 bind");
712*0Sstevel@tonic-gate 		return (_B_FALSE);
713*0Sstevel@tonic-gate 	}
714*0Sstevel@tonic-gate 
715*0Sstevel@tonic-gate 	/*
716*0Sstevel@tonic-gate 	 * IPV6_DONTFAILOVER_IF option takes precedence over setting
717*0Sstevel@tonic-gate 	 * IP_MULTICAST_IF. So we don't set IPV6_MULTICAST_IF again.
718*0Sstevel@tonic-gate 	 */
719*0Sstevel@tonic-gate 	if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_DONTFAILOVER_IF,
720*0Sstevel@tonic-gate 	    (char *)&pii->pii_ifindex, sizeof (uint_t)) < 0) {
721*0Sstevel@tonic-gate 		logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt"
722*0Sstevel@tonic-gate 		    " IPV6_DONTFAILOVER_IF");
723*0Sstevel@tonic-gate 		return (_B_FALSE);
724*0Sstevel@tonic-gate 	}
725*0Sstevel@tonic-gate 
726*0Sstevel@tonic-gate 	if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_UNICAST_HOPS,
727*0Sstevel@tonic-gate 	    (char *)&hopcount, sizeof (hopcount)) < 0) {
728*0Sstevel@tonic-gate 		logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt"
729*0Sstevel@tonic-gate 		    " IPV6_UNICAST_HOPS");
730*0Sstevel@tonic-gate 		return (_B_FALSE);
731*0Sstevel@tonic-gate 	}
732*0Sstevel@tonic-gate 
733*0Sstevel@tonic-gate 	if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_MULTICAST_HOPS,
734*0Sstevel@tonic-gate 	    (char *)&hopcount, sizeof (hopcount)) < 0) {
735*0Sstevel@tonic-gate 		logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt"
736*0Sstevel@tonic-gate 		    " IPV6_MULTICAST_HOPS");
737*0Sstevel@tonic-gate 		return (_B_FALSE);
738*0Sstevel@tonic-gate 	}
739*0Sstevel@tonic-gate 
740*0Sstevel@tonic-gate 	int_op = 0;	/* used to turn off option */
741*0Sstevel@tonic-gate 	if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_MULTICAST_LOOP,
742*0Sstevel@tonic-gate 	    (char *)&int_op, sizeof (int_op)) < 0) {
743*0Sstevel@tonic-gate 		logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt"
744*0Sstevel@tonic-gate 		    " IPV6_MULTICAST_LOOP");
745*0Sstevel@tonic-gate 		return (_B_FALSE);
746*0Sstevel@tonic-gate 	}
747*0Sstevel@tonic-gate 
748*0Sstevel@tonic-gate 	/*
749*0Sstevel@tonic-gate 	 * Filter out so that we only receive ICMP echo replies
750*0Sstevel@tonic-gate 	 */
751*0Sstevel@tonic-gate 	ICMP6_FILTER_SETBLOCKALL(&filter);
752*0Sstevel@tonic-gate 	ICMP6_FILTER_SETPASS(ICMP6_ECHO_REPLY, &filter);
753*0Sstevel@tonic-gate 
754*0Sstevel@tonic-gate 	if (setsockopt(pii->pii_probe_sock, IPPROTO_ICMPV6, ICMP6_FILTER,
755*0Sstevel@tonic-gate 	    (char *)&filter, sizeof (filter)) < 0) {
756*0Sstevel@tonic-gate 		logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt"
757*0Sstevel@tonic-gate 		    " ICMP6_FILTER");
758*0Sstevel@tonic-gate 		return (_B_FALSE);
759*0Sstevel@tonic-gate 	}
760*0Sstevel@tonic-gate 
761*0Sstevel@tonic-gate 	/* Enable receipt of ancillary data */
762*0Sstevel@tonic-gate 	int_op = 1;
763*0Sstevel@tonic-gate 	if (setsockopt(pii->pii_probe_sock, IPPROTO_IPV6, IPV6_RECVHOPLIMIT,
764*0Sstevel@tonic-gate 	    (char *)&int_op, sizeof (int_op)) < 0) {
765*0Sstevel@tonic-gate 		logperror_pii(pii, "phyint_inst_v6_sockinit: setsockopt"
766*0Sstevel@tonic-gate 		    " IPV6_RECVHOPLIMIT");
767*0Sstevel@tonic-gate 		return (_B_FALSE);
768*0Sstevel@tonic-gate 	}
769*0Sstevel@tonic-gate 
770*0Sstevel@tonic-gate 	return (_B_TRUE);
771*0Sstevel@tonic-gate }
772*0Sstevel@tonic-gate 
773*0Sstevel@tonic-gate /*
774*0Sstevel@tonic-gate  * IPv4 specific part in initializing the pii_probe_sock. This socket is
775*0Sstevel@tonic-gate  * used to send/receive ICMPv4 probe packets.
776*0Sstevel@tonic-gate  */
777*0Sstevel@tonic-gate static boolean_t
778*0Sstevel@tonic-gate phyint_inst_v4_sockinit(struct phyint_instance *pii)
779*0Sstevel@tonic-gate {
780*0Sstevel@tonic-gate 	struct sockaddr_in  testaddr;
781*0Sstevel@tonic-gate 	char	char_op;
782*0Sstevel@tonic-gate 	int	ttl = 1;
783*0Sstevel@tonic-gate 	char	char_ttl = 1;
784*0Sstevel@tonic-gate 
785*0Sstevel@tonic-gate 	/*
786*0Sstevel@tonic-gate 	 * Open a raw socket with ICMPv4 protocol.
787*0Sstevel@tonic-gate 	 *
788*0Sstevel@tonic-gate 	 * Use IP_DONTFAILOVER_IF to make sure that probes go out
789*0Sstevel@tonic-gate 	 * on the specified phyint only, and are not subject to load
790*0Sstevel@tonic-gate 	 * balancing. Bind to the src address chosen will ensure that
791*0Sstevel@tonic-gate 	 * the responses are received only on the specified phyint.
792*0Sstevel@tonic-gate 	 *
793*0Sstevel@tonic-gate 	 * Set the ttl to 1 so that probe packets are not routed.
794*0Sstevel@tonic-gate 	 * Disable multicast loopback.
795*0Sstevel@tonic-gate 	 */
796*0Sstevel@tonic-gate 	pii->pii_probe_sock = socket(pii->pii_af, SOCK_RAW, IPPROTO_ICMP);
797*0Sstevel@tonic-gate 	if (pii->pii_probe_sock < 0) {
798*0Sstevel@tonic-gate 		logperror_pii(pii, "phyint_inst_v4_sockinit: socket");
799*0Sstevel@tonic-gate 		return (_B_FALSE);
800*0Sstevel@tonic-gate 	}
801*0Sstevel@tonic-gate 
802*0Sstevel@tonic-gate 	bzero(&testaddr, sizeof (testaddr));
803*0Sstevel@tonic-gate 	testaddr.sin_family = AF_INET;
804*0Sstevel@tonic-gate 	testaddr.sin_port = 0;
805*0Sstevel@tonic-gate 	IN6_V4MAPPED_TO_INADDR(&pii->pii_probe_logint->li_addr,
806*0Sstevel@tonic-gate 	    &testaddr.sin_addr);
807*0Sstevel@tonic-gate 
808*0Sstevel@tonic-gate 	if (bind(pii->pii_probe_sock, (struct sockaddr *)&testaddr,
809*0Sstevel@tonic-gate 	    sizeof (testaddr)) < 0) {
810*0Sstevel@tonic-gate 		logperror_pii(pii, "phyint_inst_v4_sockinit: IPv4 bind");
811*0Sstevel@tonic-gate 		return (_B_FALSE);
812*0Sstevel@tonic-gate 	}
813*0Sstevel@tonic-gate 
814*0Sstevel@tonic-gate 	/*
815*0Sstevel@tonic-gate 	 * IP_DONTFAILOVER_IF option takes precedence over setting
816*0Sstevel@tonic-gate 	 * IP_MULTICAST_IF. So we don't set IP_MULTICAST_IF again.
817*0Sstevel@tonic-gate 	 */
818*0Sstevel@tonic-gate 	if (setsockopt(pii->pii_probe_sock, IPPROTO_IP, IP_DONTFAILOVER_IF,
819*0Sstevel@tonic-gate 	    (char *)&testaddr.sin_addr, sizeof (struct in_addr)) < 0) {
820*0Sstevel@tonic-gate 		logperror_pii(pii, "phyint_inst_v4_sockinit: setsockopt"
821*0Sstevel@tonic-gate 		    " IP_DONTFAILOVER");
822*0Sstevel@tonic-gate 		return (_B_FALSE);
823*0Sstevel@tonic-gate 	}
824*0Sstevel@tonic-gate 
825*0Sstevel@tonic-gate 	if (setsockopt(pii->pii_probe_sock, IPPROTO_IP, IP_TTL,
826*0Sstevel@tonic-gate 	    (char *)&ttl, sizeof (ttl)) < 0) {
827*0Sstevel@tonic-gate 		logperror_pii(pii, "phyint_inst_v4_sockinit: setsockopt"
828*0Sstevel@tonic-gate 		    " IP_TTL");
829*0Sstevel@tonic-gate 		return (_B_FALSE);
830*0Sstevel@tonic-gate 	}
831*0Sstevel@tonic-gate 
832*0Sstevel@tonic-gate 	char_op = 0;	/* used to turn off option */
833*0Sstevel@tonic-gate 	if (setsockopt(pii->pii_probe_sock, IPPROTO_IP, IP_MULTICAST_LOOP,
834*0Sstevel@tonic-gate 	    (char *)&char_op, sizeof (char_op)) == -1) {
835*0Sstevel@tonic-gate 		logperror_pii(pii, "phyint_inst_v4_sockinit: setsockopt"
836*0Sstevel@tonic-gate 		    " IP_MULTICAST_LOOP");
837*0Sstevel@tonic-gate 		return (_B_FALSE);
838*0Sstevel@tonic-gate 	}
839*0Sstevel@tonic-gate 
840*0Sstevel@tonic-gate 	if (setsockopt(pii->pii_probe_sock, IPPROTO_IP, IP_MULTICAST_TTL,
841*0Sstevel@tonic-gate 	    (char *)&char_ttl, sizeof (char_ttl)) == -1) {
842*0Sstevel@tonic-gate 		logperror_pii(pii, "phyint_inst_v4_sockinit: setsockopt"
843*0Sstevel@tonic-gate 		    " IP_MULTICAST_TTL");
844*0Sstevel@tonic-gate 		return (_B_FALSE);
845*0Sstevel@tonic-gate 	}
846*0Sstevel@tonic-gate 
847*0Sstevel@tonic-gate 	return (_B_TRUE);
848*0Sstevel@tonic-gate }
849*0Sstevel@tonic-gate 
850*0Sstevel@tonic-gate /*
851*0Sstevel@tonic-gate  * Remove the phyint group from the list of 'all phyint groups'
852*0Sstevel@tonic-gate  * and free it.
853*0Sstevel@tonic-gate  */
854*0Sstevel@tonic-gate static void
855*0Sstevel@tonic-gate phyint_group_delete(struct phyint_group *pg)
856*0Sstevel@tonic-gate {
857*0Sstevel@tonic-gate 	/*
858*0Sstevel@tonic-gate 	 * The anonymous group always exists, even when empty.
859*0Sstevel@tonic-gate 	 */
860*0Sstevel@tonic-gate 	if (pg == phyint_anongroup)
861*0Sstevel@tonic-gate 		return;
862*0Sstevel@tonic-gate 
863*0Sstevel@tonic-gate 	if (debug & D_PHYINT)
864*0Sstevel@tonic-gate 		logdebug("phyint_group_delete('%s')\n", pg->pg_name);
865*0Sstevel@tonic-gate 
866*0Sstevel@tonic-gate 	/*
867*0Sstevel@tonic-gate 	 * The phyint group must be empty, and must not have any phyints.
868*0Sstevel@tonic-gate 	 * The phyint group must be in the list of all phyint groups
869*0Sstevel@tonic-gate 	 */
870*0Sstevel@tonic-gate 	assert(pg->pg_phyint == NULL);
871*0Sstevel@tonic-gate 	assert(phyint_groups == pg || pg->pg_prev != NULL);
872*0Sstevel@tonic-gate 
873*0Sstevel@tonic-gate 	if (pg->pg_prev != NULL)
874*0Sstevel@tonic-gate 		pg->pg_prev->pg_next = pg->pg_next;
875*0Sstevel@tonic-gate 	else
876*0Sstevel@tonic-gate 		phyint_groups = pg->pg_next;
877*0Sstevel@tonic-gate 
878*0Sstevel@tonic-gate 	if (pg->pg_next != NULL)
879*0Sstevel@tonic-gate 		pg->pg_next->pg_prev = pg->pg_prev;
880*0Sstevel@tonic-gate 
881*0Sstevel@tonic-gate 	pg->pg_next = NULL;
882*0Sstevel@tonic-gate 	pg->pg_prev = NULL;
883*0Sstevel@tonic-gate 
884*0Sstevel@tonic-gate 	phyint_grouplistsig++;
885*0Sstevel@tonic-gate 	(void) phyint_group_change_event(pg, IPMP_GROUP_REMOVE);
886*0Sstevel@tonic-gate 
887*0Sstevel@tonic-gate 	free(pg);
888*0Sstevel@tonic-gate }
889*0Sstevel@tonic-gate 
890*0Sstevel@tonic-gate /*
891*0Sstevel@tonic-gate  * Extract information from the kernel about the desired phyint.
892*0Sstevel@tonic-gate  * Look only for properties of the phyint and not properties of logints.
893*0Sstevel@tonic-gate  * Take appropriate action on the changes.
894*0Sstevel@tonic-gate  * Return codes:
895*0Sstevel@tonic-gate  *	PI_OK
896*0Sstevel@tonic-gate  *		The phyint exists in the kernel and matches our knowledge
897*0Sstevel@tonic-gate  *		of the phyint.
898*0Sstevel@tonic-gate  *	PI_DELETED
899*0Sstevel@tonic-gate  *		The phyint has vanished in the kernel.
900*0Sstevel@tonic-gate  *	PI_IFINDEX_CHANGED
901*0Sstevel@tonic-gate  *		The phyint's interface index has changed.
902*0Sstevel@tonic-gate  *		Ask the caller to delete and recreate the phyint.
903*0Sstevel@tonic-gate  *	PI_IOCTL_ERROR
904*0Sstevel@tonic-gate  *		Some ioctl error. Don't change anything.
905*0Sstevel@tonic-gate  *	PI_GROUP_CHANGED
906*0Sstevel@tonic-gate  *		The phyint has changed group.
907*0Sstevel@tonic-gate  */
908*0Sstevel@tonic-gate int
909*0Sstevel@tonic-gate phyint_inst_update_from_k(struct phyint_instance *pii)
910*0Sstevel@tonic-gate {
911*0Sstevel@tonic-gate 	struct lifreq lifr;
912*0Sstevel@tonic-gate 	int	ifsock;
913*0Sstevel@tonic-gate 	struct phyint *pi;
914*0Sstevel@tonic-gate 
915*0Sstevel@tonic-gate 	pi = pii->pii_phyint;
916*0Sstevel@tonic-gate 
917*0Sstevel@tonic-gate 	if (debug & D_PHYINT) {
918*0Sstevel@tonic-gate 		logdebug("phyint_inst_update_from_k(%s %s)\n",
919*0Sstevel@tonic-gate 		    AF_STR(pii->pii_af), pi->pi_name);
920*0Sstevel@tonic-gate 	}
921*0Sstevel@tonic-gate 
922*0Sstevel@tonic-gate 	/*
923*0Sstevel@tonic-gate 	 * Get the ifindex from the kernel, for comparison with the
924*0Sstevel@tonic-gate 	 * value in our tables.
925*0Sstevel@tonic-gate 	 */
926*0Sstevel@tonic-gate 	(void) strncpy(lifr.lifr_name, pi->pi_name, sizeof (lifr.lifr_name));
927*0Sstevel@tonic-gate 	lifr.lifr_name[sizeof (lifr.lifr_name) - 1] = '\0';
928*0Sstevel@tonic-gate 
929*0Sstevel@tonic-gate 	ifsock = (pii->pii_af == AF_INET) ? ifsock_v4 : ifsock_v6;
930*0Sstevel@tonic-gate 	if (ioctl(ifsock, SIOCGLIFINDEX, &lifr) < 0) {
931*0Sstevel@tonic-gate 		if (errno == ENXIO) {
932*0Sstevel@tonic-gate 			return (PI_DELETED);
933*0Sstevel@tonic-gate 		} else {
934*0Sstevel@tonic-gate 			logperror_pii(pii, "phyint_inst_update_from_k:"
935*0Sstevel@tonic-gate 			    " ioctl (get lifindex)");
936*0Sstevel@tonic-gate 			return (PI_IOCTL_ERROR);
937*0Sstevel@tonic-gate 		}
938*0Sstevel@tonic-gate 	}
939*0Sstevel@tonic-gate 
940*0Sstevel@tonic-gate 	if (lifr.lifr_index != pi->pi_ifindex) {
941*0Sstevel@tonic-gate 		/*
942*0Sstevel@tonic-gate 		 * The index has changed. Most likely the interface has
943*0Sstevel@tonic-gate 		 * been unplumbed and replumbed. Ask the caller to take
944*0Sstevel@tonic-gate 		 * appropriate action.
945*0Sstevel@tonic-gate 		 */
946*0Sstevel@tonic-gate 		if (debug & D_PHYINT) {
947*0Sstevel@tonic-gate 			logdebug("phyint_inst_update_from_k:"
948*0Sstevel@tonic-gate 			    " old index %d new index %d\n",
949*0Sstevel@tonic-gate 			    pi->pi_ifindex, lifr.lifr_index);
950*0Sstevel@tonic-gate 		}
951*0Sstevel@tonic-gate 		return (PI_IFINDEX_CHANGED);
952*0Sstevel@tonic-gate 	}
953*0Sstevel@tonic-gate 
954*0Sstevel@tonic-gate 	/*
955*0Sstevel@tonic-gate 	 * Get the group name from the kernel, for comparison with
956*0Sstevel@tonic-gate 	 * the value in our tables.
957*0Sstevel@tonic-gate 	 */
958*0Sstevel@tonic-gate 	if (ioctl(ifsock, SIOCGLIFGROUPNAME, &lifr) < 0) {
959*0Sstevel@tonic-gate 		if (errno == ENXIO) {
960*0Sstevel@tonic-gate 			return (PI_DELETED);
961*0Sstevel@tonic-gate 		} else {
962*0Sstevel@tonic-gate 			logperror_pii(pii, "phyint_inst_update_from_k:"
963*0Sstevel@tonic-gate 			    " ioctl (get groupname)");
964*0Sstevel@tonic-gate 			return (PI_IOCTL_ERROR);
965*0Sstevel@tonic-gate 		}
966*0Sstevel@tonic-gate 	}
967*0Sstevel@tonic-gate 
968*0Sstevel@tonic-gate 	/*
969*0Sstevel@tonic-gate 	 * If the phyint has changed group i.e. if the phyint group name
970*0Sstevel@tonic-gate 	 * returned by the kernel is different, ask the caller to delete
971*0Sstevel@tonic-gate 	 * and recreate the phyint in the right group
972*0Sstevel@tonic-gate 	 */
973*0Sstevel@tonic-gate 	if (strcmp(lifr.lifr_groupname, pi->pi_group->pg_name) != 0) {
974*0Sstevel@tonic-gate 		/* Groupname has changed */
975*0Sstevel@tonic-gate 		if (debug & D_PHYINT) {
976*0Sstevel@tonic-gate 			logdebug("phyint_inst_update_from_k:"
977*0Sstevel@tonic-gate 			    " groupname change\n");
978*0Sstevel@tonic-gate 		}
979*0Sstevel@tonic-gate 		return (PI_GROUP_CHANGED);
980*0Sstevel@tonic-gate 	}
981*0Sstevel@tonic-gate 
982*0Sstevel@tonic-gate 	/*
983*0Sstevel@tonic-gate 	 * Get the current phyint flags from the kernel, and determine what
984*0Sstevel@tonic-gate 	 * flags have changed by comparing against our tables.	Note that the
985*0Sstevel@tonic-gate 	 * IFF_INACTIVE processing in initifs() relies on this call to ensure
986*0Sstevel@tonic-gate 	 * that IFF_INACTIVE is really still set on the interface.
987*0Sstevel@tonic-gate 	 */
988*0Sstevel@tonic-gate 	if (ioctl(ifsock, SIOCGLIFFLAGS, &lifr) < 0) {
989*0Sstevel@tonic-gate 		if (errno == ENXIO) {
990*0Sstevel@tonic-gate 			return (PI_DELETED);
991*0Sstevel@tonic-gate 		} else {
992*0Sstevel@tonic-gate 			logperror_pii(pii, "phyint_inst_update_from_k: "
993*0Sstevel@tonic-gate 			    " ioctl (get flags)");
994*0Sstevel@tonic-gate 			return (PI_IOCTL_ERROR);
995*0Sstevel@tonic-gate 		}
996*0Sstevel@tonic-gate 	}
997*0Sstevel@tonic-gate 
998*0Sstevel@tonic-gate 	pi->pi_flags = PHYINT_FLAGS(lifr.lifr_flags);
999*0Sstevel@tonic-gate 	if (pi->pi_v4 != NULL)
1000*0Sstevel@tonic-gate 		pi->pi_v4->pii_flags = pi->pi_flags;
1001*0Sstevel@tonic-gate 	if (pi->pi_v6 != NULL)
1002*0Sstevel@tonic-gate 		pi->pi_v6->pii_flags = pi->pi_flags;
1003*0Sstevel@tonic-gate 
1004*0Sstevel@tonic-gate 	if (pi->pi_flags & IFF_FAILED) {
1005*0Sstevel@tonic-gate 		/*
1006*0Sstevel@tonic-gate 		 * If we are in the running and full state, we have
1007*0Sstevel@tonic-gate 		 * completed failbacks successfully and we would have
1008*0Sstevel@tonic-gate 		 * expected IFF_FAILED to have been clear. That it is
1009*0Sstevel@tonic-gate 		 * set means there was a race condition. Some other
1010*0Sstevel@tonic-gate 		 * process turned on the IFF_FAILED flag. Since the
1011*0Sstevel@tonic-gate 		 * flag setting is not atomic, i.e. a get ioctl followed
1012*0Sstevel@tonic-gate 		 * by a set ioctl, and since there is no way to set an
1013*0Sstevel@tonic-gate 		 * individual flag bit, this could have occurred.
1014*0Sstevel@tonic-gate 		 */
1015*0Sstevel@tonic-gate 		if (pi->pi_state == PI_RUNNING && pi->pi_full)
1016*0Sstevel@tonic-gate 			(void) change_lif_flags(pi, IFF_FAILED, _B_FALSE);
1017*0Sstevel@tonic-gate 	} else {
1018*0Sstevel@tonic-gate 		/*
1019*0Sstevel@tonic-gate 		 * If we are in the failed state, there was a race.
1020*0Sstevel@tonic-gate 		 * we have completed failover successfully because our
1021*0Sstevel@tonic-gate 		 * state is failed and empty. Some other process turned
1022*0Sstevel@tonic-gate 		 * off the IFF_FAILED flag. Same comment as above
1023*0Sstevel@tonic-gate 		 */
1024*0Sstevel@tonic-gate 		if (pi->pi_state == PI_FAILED && pi->pi_empty)
1025*0Sstevel@tonic-gate 			(void) change_lif_flags(pi, IFF_FAILED, _B_TRUE);
1026*0Sstevel@tonic-gate 	}
1027*0Sstevel@tonic-gate 
1028*0Sstevel@tonic-gate 	/* No change in phyint status */
1029*0Sstevel@tonic-gate 	return (PI_OK);
1030*0Sstevel@tonic-gate }
1031*0Sstevel@tonic-gate 
1032*0Sstevel@tonic-gate /*
1033*0Sstevel@tonic-gate  * Delete the phyint. Remove it from the list of all phyints, and the
1034*0Sstevel@tonic-gate  * list of phyint group members. If the group becomes empty, delete the
1035*0Sstevel@tonic-gate  * group also.
1036*0Sstevel@tonic-gate  */
1037*0Sstevel@tonic-gate static void
1038*0Sstevel@tonic-gate phyint_delete(struct phyint *pi)
1039*0Sstevel@tonic-gate {
1040*0Sstevel@tonic-gate 	struct phyint_group *pg = pi->pi_group;
1041*0Sstevel@tonic-gate 
1042*0Sstevel@tonic-gate 	if (debug & D_PHYINT)
1043*0Sstevel@tonic-gate 		logdebug("phyint_delete(%s)\n", pi->pi_name);
1044*0Sstevel@tonic-gate 
1045*0Sstevel@tonic-gate 	/* Both IPv4 and IPv6 phyint instances must have been deleted. */
1046*0Sstevel@tonic-gate 	assert(pi->pi_v4 == NULL && pi->pi_v6 == NULL);
1047*0Sstevel@tonic-gate 
1048*0Sstevel@tonic-gate 	/*
1049*0Sstevel@tonic-gate 	 * The phyint must belong to a group.
1050*0Sstevel@tonic-gate 	 */
1051*0Sstevel@tonic-gate 	assert(pg->pg_phyint == pi || pi->pi_pgprev != NULL);
1052*0Sstevel@tonic-gate 
1053*0Sstevel@tonic-gate 	/* The phyint must be in the list of all phyints */
1054*0Sstevel@tonic-gate 	assert(phyints == pi || pi->pi_prev != NULL);
1055*0Sstevel@tonic-gate 
1056*0Sstevel@tonic-gate 	/* Remove the phyint from the phyint group list */
1057*0Sstevel@tonic-gate 	pg->pg_sig++;
1058*0Sstevel@tonic-gate 	(void) phyint_group_member_event(pg, pi, IPMP_IF_REMOVE);
1059*0Sstevel@tonic-gate 
1060*0Sstevel@tonic-gate 	if (pi->pi_pgprev == NULL) {
1061*0Sstevel@tonic-gate 		/* Phyint is the 1st in the phyint group list */
1062*0Sstevel@tonic-gate 		pg->pg_phyint = pi->pi_pgnext;
1063*0Sstevel@tonic-gate 	} else {
1064*0Sstevel@tonic-gate 		pi->pi_pgprev->pi_pgnext = pi->pi_pgnext;
1065*0Sstevel@tonic-gate 	}
1066*0Sstevel@tonic-gate 	if (pi->pi_pgnext != NULL)
1067*0Sstevel@tonic-gate 		pi->pi_pgnext->pi_pgprev = pi->pi_pgprev;
1068*0Sstevel@tonic-gate 	pi->pi_pgnext = NULL;
1069*0Sstevel@tonic-gate 	pi->pi_pgprev = NULL;
1070*0Sstevel@tonic-gate 
1071*0Sstevel@tonic-gate 	/* Remove the phyint from the global list of phyints */
1072*0Sstevel@tonic-gate 	if (pi->pi_prev == NULL) {
1073*0Sstevel@tonic-gate 		/* Phyint is the 1st in the list */
1074*0Sstevel@tonic-gate 		phyints = pi->pi_next;
1075*0Sstevel@tonic-gate 	} else {
1076*0Sstevel@tonic-gate 		pi->pi_prev->pi_next = pi->pi_next;
1077*0Sstevel@tonic-gate 	}
1078*0Sstevel@tonic-gate 	if (pi->pi_next != NULL)
1079*0Sstevel@tonic-gate 		pi->pi_next->pi_prev = pi->pi_prev;
1080*0Sstevel@tonic-gate 	pi->pi_next = NULL;
1081*0Sstevel@tonic-gate 	pi->pi_prev = NULL;
1082*0Sstevel@tonic-gate 
1083*0Sstevel@tonic-gate 	free(pi);
1084*0Sstevel@tonic-gate 
1085*0Sstevel@tonic-gate 	/* Delete the phyint_group if the last phyint has been deleted */
1086*0Sstevel@tonic-gate 	if (pg->pg_phyint == NULL)
1087*0Sstevel@tonic-gate 		phyint_group_delete(pg);
1088*0Sstevel@tonic-gate }
1089*0Sstevel@tonic-gate 
1090*0Sstevel@tonic-gate /*
1091*0Sstevel@tonic-gate  * Delete (unlink and free), the phyint instance.
1092*0Sstevel@tonic-gate  */
1093*0Sstevel@tonic-gate void
1094*0Sstevel@tonic-gate phyint_inst_delete(struct phyint_instance *pii)
1095*0Sstevel@tonic-gate {
1096*0Sstevel@tonic-gate 	struct phyint *pi = pii->pii_phyint;
1097*0Sstevel@tonic-gate 
1098*0Sstevel@tonic-gate 	assert(pi != NULL);
1099*0Sstevel@tonic-gate 
1100*0Sstevel@tonic-gate 	if (debug & D_PHYINT) {
1101*0Sstevel@tonic-gate 		logdebug("phyint_inst_delete(%s %s)\n",
1102*0Sstevel@tonic-gate 		    AF_STR(pii->pii_af), pi->pi_name);
1103*0Sstevel@tonic-gate 	}
1104*0Sstevel@tonic-gate 
1105*0Sstevel@tonic-gate 	/*
1106*0Sstevel@tonic-gate 	 * If the phyint instance has associated probe targets
1107*0Sstevel@tonic-gate 	 * delete all the targets
1108*0Sstevel@tonic-gate 	 */
1109*0Sstevel@tonic-gate 	while (pii->pii_targets != NULL)
1110*0Sstevel@tonic-gate 		target_delete(pii->pii_targets);
1111*0Sstevel@tonic-gate 
1112*0Sstevel@tonic-gate 	/*
1113*0Sstevel@tonic-gate 	 * Delete all the logints associated with this phyint
1114*0Sstevel@tonic-gate 	 * instance.
1115*0Sstevel@tonic-gate 	 */
1116*0Sstevel@tonic-gate 	while (pii->pii_logint != NULL)
1117*0Sstevel@tonic-gate 		logint_delete(pii->pii_logint);
1118*0Sstevel@tonic-gate 
1119*0Sstevel@tonic-gate 	/*
1120*0Sstevel@tonic-gate 	 * Close the IFF_NOFAILOVER socket used to send probes to targets
1121*0Sstevel@tonic-gate 	 * from this phyint.
1122*0Sstevel@tonic-gate 	 */
1123*0Sstevel@tonic-gate 	if (pii->pii_probe_sock != -1)
1124*0Sstevel@tonic-gate 		close_probe_socket(pii, _B_TRUE);
1125*0Sstevel@tonic-gate 
1126*0Sstevel@tonic-gate 	/*
1127*0Sstevel@tonic-gate 	 * Phyint instance must be in the list of all phyint instances.
1128*0Sstevel@tonic-gate 	 * Remove phyint instance from the global list of phyint instances.
1129*0Sstevel@tonic-gate 	 */
1130*0Sstevel@tonic-gate 	assert(phyint_instances == pii || pii->pii_prev != NULL);
1131*0Sstevel@tonic-gate 	if (pii->pii_prev == NULL) {
1132*0Sstevel@tonic-gate 		/* Phyint is the 1st in the list */
1133*0Sstevel@tonic-gate 		phyint_instances = pii->pii_next;
1134*0Sstevel@tonic-gate 	} else {
1135*0Sstevel@tonic-gate 		pii->pii_prev->pii_next = pii->pii_next;
1136*0Sstevel@tonic-gate 	}
1137*0Sstevel@tonic-gate 	if (pii->pii_next != NULL)
1138*0Sstevel@tonic-gate 		pii->pii_next->pii_prev = pii->pii_prev;
1139*0Sstevel@tonic-gate 	pii->pii_next = NULL;
1140*0Sstevel@tonic-gate 	pii->pii_prev = NULL;
1141*0Sstevel@tonic-gate 
1142*0Sstevel@tonic-gate 	/*
1143*0Sstevel@tonic-gate 	 * Reset the phyint instance pointer in the phyint.
1144*0Sstevel@tonic-gate 	 * If this is the last phyint instance (being deleted) on this
1145*0Sstevel@tonic-gate 	 * phyint, then delete the phyint.
1146*0Sstevel@tonic-gate 	 */
1147*0Sstevel@tonic-gate 	if (pii->pii_af == AF_INET)
1148*0Sstevel@tonic-gate 		pi->pi_v4 = NULL;
1149*0Sstevel@tonic-gate 	else
1150*0Sstevel@tonic-gate 		pi->pi_v6 = NULL;
1151*0Sstevel@tonic-gate 
1152*0Sstevel@tonic-gate 	if (pi->pi_v4 == NULL && pi->pi_v6 == NULL)
1153*0Sstevel@tonic-gate 		phyint_delete(pi);
1154*0Sstevel@tonic-gate 
1155*0Sstevel@tonic-gate 	free(pii);
1156*0Sstevel@tonic-gate }
1157*0Sstevel@tonic-gate 
1158*0Sstevel@tonic-gate static void
1159*0Sstevel@tonic-gate phyint_inst_print(struct phyint_instance *pii)
1160*0Sstevel@tonic-gate {
1161*0Sstevel@tonic-gate 	struct logint *li;
1162*0Sstevel@tonic-gate 	struct target *tg;
1163*0Sstevel@tonic-gate 	char abuf[INET6_ADDRSTRLEN];
1164*0Sstevel@tonic-gate 	int most_recent;
1165*0Sstevel@tonic-gate 	int i;
1166*0Sstevel@tonic-gate 
1167*0Sstevel@tonic-gate 	if (pii->pii_phyint == NULL) {
1168*0Sstevel@tonic-gate 		logdebug("pii->pi_phyint NULL can't print\n");
1169*0Sstevel@tonic-gate 		return;
1170*0Sstevel@tonic-gate 	}
1171*0Sstevel@tonic-gate 
1172*0Sstevel@tonic-gate 	logdebug("\nPhyint instance: %s %s index %u state %x flags %llx	 "
1173*0Sstevel@tonic-gate 	    "sock %x in_use %d empty %x full %x\n",
1174*0Sstevel@tonic-gate 	    AF_STR(pii->pii_af), pii->pii_name, pii->pii_ifindex,
1175*0Sstevel@tonic-gate 	    pii->pii_state, pii->pii_phyint->pi_flags, pii->pii_probe_sock,
1176*0Sstevel@tonic-gate 	    pii->pii_in_use, pii->pii_phyint->pi_empty,
1177*0Sstevel@tonic-gate 	    pii->pii_phyint->pi_full);
1178*0Sstevel@tonic-gate 
1179*0Sstevel@tonic-gate 	for (li = pii->pii_logint; li != NULL; li = li->li_next)
1180*0Sstevel@tonic-gate 		logint_print(li);
1181*0Sstevel@tonic-gate 
1182*0Sstevel@tonic-gate 	logdebug("\n");
1183*0Sstevel@tonic-gate 	for (tg = pii->pii_targets; tg != NULL; tg = tg->tg_next)
1184*0Sstevel@tonic-gate 		target_print(tg);
1185*0Sstevel@tonic-gate 
1186*0Sstevel@tonic-gate 	if (pii->pii_targets == NULL)
1187*0Sstevel@tonic-gate 		logdebug("pi_targets NULL\n");
1188*0Sstevel@tonic-gate 
1189*0Sstevel@tonic-gate 	if (pii->pii_target_next != NULL) {
1190*0Sstevel@tonic-gate 		logdebug("pi_target_next %s %s\n", AF_STR(pii->pii_af),
1191*0Sstevel@tonic-gate 		    pr_addr(pii->pii_af, pii->pii_target_next->tg_address,
1192*0Sstevel@tonic-gate 			abuf, sizeof (abuf)));
1193*0Sstevel@tonic-gate 	} else {
1194*0Sstevel@tonic-gate 		logdebug("pi_target_next NULL\n");
1195*0Sstevel@tonic-gate 	}
1196*0Sstevel@tonic-gate 
1197*0Sstevel@tonic-gate 	if (pii->pii_rtt_target_next != NULL) {
1198*0Sstevel@tonic-gate 		logdebug("pi_rtt_target_next %s %s\n", AF_STR(pii->pii_af),
1199*0Sstevel@tonic-gate 		    pr_addr(pii->pii_af, pii->pii_rtt_target_next->tg_address,
1200*0Sstevel@tonic-gate 			abuf, sizeof (abuf)));
1201*0Sstevel@tonic-gate 	} else {
1202*0Sstevel@tonic-gate 		logdebug("pi_rtt_target_next NULL\n");
1203*0Sstevel@tonic-gate 	}
1204*0Sstevel@tonic-gate 
1205*0Sstevel@tonic-gate 	if (pii->pii_targets != NULL) {
1206*0Sstevel@tonic-gate 		most_recent = PROBE_INDEX_PREV(pii->pii_probe_next);
1207*0Sstevel@tonic-gate 
1208*0Sstevel@tonic-gate 		i = most_recent;
1209*0Sstevel@tonic-gate 		do {
1210*0Sstevel@tonic-gate 			if (pii->pii_probes[i].pr_target != NULL) {
1211*0Sstevel@tonic-gate 				logdebug("#%d target %s ", i,
1212*0Sstevel@tonic-gate 				    pr_addr(pii->pii_af,
1213*0Sstevel@tonic-gate 				    pii->pii_probes[i].pr_target->tg_address,
1214*0Sstevel@tonic-gate 				    abuf, sizeof (abuf)));
1215*0Sstevel@tonic-gate 			} else {
1216*0Sstevel@tonic-gate 				logdebug("#%d target NULL ", i);
1217*0Sstevel@tonic-gate 			}
1218*0Sstevel@tonic-gate 			logdebug("time_sent %u status %d time_ack/lost %u\n",
1219*0Sstevel@tonic-gate 			    pii->pii_probes[i].pr_time_sent,
1220*0Sstevel@tonic-gate 			    pii->pii_probes[i].pr_status,
1221*0Sstevel@tonic-gate 			    pii->pii_probes[i].pr_time_lost);
1222*0Sstevel@tonic-gate 			i = PROBE_INDEX_PREV(i);
1223*0Sstevel@tonic-gate 		} while (i != most_recent);
1224*0Sstevel@tonic-gate 	}
1225*0Sstevel@tonic-gate }
1226*0Sstevel@tonic-gate 
1227*0Sstevel@tonic-gate /*
1228*0Sstevel@tonic-gate  * Lookup a logint based on the logical interface name, on the given
1229*0Sstevel@tonic-gate  * phyint instance.
1230*0Sstevel@tonic-gate  */
1231*0Sstevel@tonic-gate static struct logint *
1232*0Sstevel@tonic-gate logint_lookup(struct phyint_instance *pii, char *name)
1233*0Sstevel@tonic-gate {
1234*0Sstevel@tonic-gate 	struct logint *li;
1235*0Sstevel@tonic-gate 
1236*0Sstevel@tonic-gate 	if (debug & D_LOGINT) {
1237*0Sstevel@tonic-gate 		logdebug("logint_lookup(%s, %s)\n",
1238*0Sstevel@tonic-gate 		    AF_STR(pii->pii_af), name);
1239*0Sstevel@tonic-gate 	}
1240*0Sstevel@tonic-gate 
1241*0Sstevel@tonic-gate 	for (li = pii->pii_logint; li != NULL; li = li->li_next) {
1242*0Sstevel@tonic-gate 		if (strncmp(name, li->li_name, sizeof (li->li_name)) == 0)
1243*0Sstevel@tonic-gate 			break;
1244*0Sstevel@tonic-gate 	}
1245*0Sstevel@tonic-gate 	return (li);
1246*0Sstevel@tonic-gate }
1247*0Sstevel@tonic-gate 
1248*0Sstevel@tonic-gate /*
1249*0Sstevel@tonic-gate  * Insert a logint at the head of the list of logints of the given
1250*0Sstevel@tonic-gate  * phyint instance
1251*0Sstevel@tonic-gate  */
1252*0Sstevel@tonic-gate static void
1253*0Sstevel@tonic-gate logint_insert(struct phyint_instance *pii, struct logint *li)
1254*0Sstevel@tonic-gate {
1255*0Sstevel@tonic-gate 	li->li_next = pii->pii_logint;
1256*0Sstevel@tonic-gate 	li->li_prev = NULL;
1257*0Sstevel@tonic-gate 	if (pii->pii_logint != NULL)
1258*0Sstevel@tonic-gate 		pii->pii_logint->li_prev = li;
1259*0Sstevel@tonic-gate 	pii->pii_logint = li;
1260*0Sstevel@tonic-gate 	li->li_phyint_inst = pii;
1261*0Sstevel@tonic-gate }
1262*0Sstevel@tonic-gate 
1263*0Sstevel@tonic-gate /*
1264*0Sstevel@tonic-gate  * Create a new named logint, on the specified phyint instance.
1265*0Sstevel@tonic-gate  */
1266*0Sstevel@tonic-gate static struct logint *
1267*0Sstevel@tonic-gate logint_create(struct phyint_instance *pii, char *name)
1268*0Sstevel@tonic-gate {
1269*0Sstevel@tonic-gate 	struct logint *li;
1270*0Sstevel@tonic-gate 
1271*0Sstevel@tonic-gate 	if (debug & D_LOGINT) {
1272*0Sstevel@tonic-gate 		logdebug("logint_create(%s %s %s)\n",
1273*0Sstevel@tonic-gate 		    AF_STR(pii->pii_af), pii->pii_name, name);
1274*0Sstevel@tonic-gate 	}
1275*0Sstevel@tonic-gate 
1276*0Sstevel@tonic-gate 	li = calloc(1, sizeof (struct logint));
1277*0Sstevel@tonic-gate 	if (li == NULL) {
1278*0Sstevel@tonic-gate 		logperror("logint_create: calloc");
1279*0Sstevel@tonic-gate 		return (NULL);
1280*0Sstevel@tonic-gate 	}
1281*0Sstevel@tonic-gate 
1282*0Sstevel@tonic-gate 	(void) strncpy(li->li_name, name, sizeof (li->li_name));
1283*0Sstevel@tonic-gate 	li->li_name[sizeof (li->li_name) - 1] = '\0';
1284*0Sstevel@tonic-gate 	logint_insert(pii, li);
1285*0Sstevel@tonic-gate 	return (li);
1286*0Sstevel@tonic-gate }
1287*0Sstevel@tonic-gate 
1288*0Sstevel@tonic-gate /*
1289*0Sstevel@tonic-gate  * Initialize the logint based on the data returned by the kernel.
1290*0Sstevel@tonic-gate  */
1291*0Sstevel@tonic-gate void
1292*0Sstevel@tonic-gate logint_init_from_k(struct phyint_instance *pii, char *li_name)
1293*0Sstevel@tonic-gate {
1294*0Sstevel@tonic-gate 	int	ifsock;
1295*0Sstevel@tonic-gate 	uint64_t flags;
1296*0Sstevel@tonic-gate 	uint64_t saved_flags;
1297*0Sstevel@tonic-gate 	struct	logint	*li;
1298*0Sstevel@tonic-gate 	struct lifreq	lifr;
1299*0Sstevel@tonic-gate 	struct in6_addr	test_subnet;
1300*0Sstevel@tonic-gate 	struct in6_addr	test_subnet_mask;
1301*0Sstevel@tonic-gate 	struct in6_addr	testaddr;
1302*0Sstevel@tonic-gate 	int	test_subnet_len;
1303*0Sstevel@tonic-gate 	struct sockaddr_in6	*sin6;
1304*0Sstevel@tonic-gate 	struct sockaddr_in	*sin;
1305*0Sstevel@tonic-gate 	char abuf[INET6_ADDRSTRLEN];
1306*0Sstevel@tonic-gate 	boolean_t  ptp = _B_FALSE;
1307*0Sstevel@tonic-gate 	struct in6_addr tgaddr;
1308*0Sstevel@tonic-gate 
1309*0Sstevel@tonic-gate 	if (debug & D_LOGINT) {
1310*0Sstevel@tonic-gate 		logdebug("logint_init_from_k(%s %s)\n",
1311*0Sstevel@tonic-gate 		    AF_STR(pii->pii_af), li_name);
1312*0Sstevel@tonic-gate 	}
1313*0Sstevel@tonic-gate 
1314*0Sstevel@tonic-gate 	/* Get the socket for doing ioctls */
1315*0Sstevel@tonic-gate 	ifsock = (pii->pii_af == AF_INET) ? ifsock_v4 : ifsock_v6;
1316*0Sstevel@tonic-gate 
1317*0Sstevel@tonic-gate 	/*
1318*0Sstevel@tonic-gate 	 * Get the flags from the kernel. Also serves as a check whether
1319*0Sstevel@tonic-gate 	 * the logical still exists. If it doesn't exist, no need to proceed
1320*0Sstevel@tonic-gate 	 * any further. li_in_use will make the caller clean up the logint
1321*0Sstevel@tonic-gate 	 */
1322*0Sstevel@tonic-gate 	(void) strncpy(lifr.lifr_name, li_name, sizeof (lifr.lifr_name));
1323*0Sstevel@tonic-gate 	lifr.lifr_name[sizeof (lifr.lifr_name) - 1] = '\0';
1324*0Sstevel@tonic-gate 	if (ioctl(ifsock, SIOCGLIFFLAGS, (char *)&lifr) < 0) {
1325*0Sstevel@tonic-gate 		/* Interface may have vanished */
1326*0Sstevel@tonic-gate 		if (errno != ENXIO) {
1327*0Sstevel@tonic-gate 			logperror_pii(pii, "logint_init_from_k: "
1328*0Sstevel@tonic-gate 			    "ioctl (get flags)");
1329*0Sstevel@tonic-gate 		}
1330*0Sstevel@tonic-gate 		return;
1331*0Sstevel@tonic-gate 	}
1332*0Sstevel@tonic-gate 
1333*0Sstevel@tonic-gate 	flags = lifr.lifr_flags;
1334*0Sstevel@tonic-gate 
1335*0Sstevel@tonic-gate 	/*
1336*0Sstevel@tonic-gate 	 * Verified the logint exists. Now lookup the logint in our tables.
1337*0Sstevel@tonic-gate 	 * If it does not exist, create a new logint.
1338*0Sstevel@tonic-gate 	 */
1339*0Sstevel@tonic-gate 	li = logint_lookup(pii, li_name);
1340*0Sstevel@tonic-gate 	if (li == NULL) {
1341*0Sstevel@tonic-gate 		li = logint_create(pii, li_name);
1342*0Sstevel@tonic-gate 		if (li == NULL) {
1343*0Sstevel@tonic-gate 			/*
1344*0Sstevel@tonic-gate 			 * Pretend the interface does not exist
1345*0Sstevel@tonic-gate 			 * in the kernel
1346*0Sstevel@tonic-gate 			 */
1347*0Sstevel@tonic-gate 			return;
1348*0Sstevel@tonic-gate 		}
1349*0Sstevel@tonic-gate 	}
1350*0Sstevel@tonic-gate 
1351*0Sstevel@tonic-gate 	/*
1352*0Sstevel@tonic-gate 	 * Update li->li_flags with the new flags, after saving the old
1353*0Sstevel@tonic-gate 	 * value. This is used later to check what flags has changed and
1354*0Sstevel@tonic-gate 	 * take any action
1355*0Sstevel@tonic-gate 	 */
1356*0Sstevel@tonic-gate 	saved_flags = li->li_flags;
1357*0Sstevel@tonic-gate 	li->li_flags = flags;
1358*0Sstevel@tonic-gate 
1359*0Sstevel@tonic-gate 	/*
1360*0Sstevel@tonic-gate 	 * Get the address, prefix, prefixlength and update the logint.
1361*0Sstevel@tonic-gate 	 * Check if anything has changed. If the logint used for the
1362*0Sstevel@tonic-gate 	 * test address has changed, take suitable action.
1363*0Sstevel@tonic-gate 	 */
1364*0Sstevel@tonic-gate 	if (ioctl(ifsock, SIOCGLIFADDR, (char *)&lifr) < 0) {
1365*0Sstevel@tonic-gate 		/* Interface may have vanished */
1366*0Sstevel@tonic-gate 		if (errno != ENXIO) {
1367*0Sstevel@tonic-gate 			logperror_li(li, "logint_init_from_k: (get addr)");
1368*0Sstevel@tonic-gate 		}
1369*0Sstevel@tonic-gate 		goto error;
1370*0Sstevel@tonic-gate 	}
1371*0Sstevel@tonic-gate 
1372*0Sstevel@tonic-gate 	if (pii->pii_af == AF_INET) {
1373*0Sstevel@tonic-gate 		sin = (struct sockaddr_in *)&lifr.lifr_addr;
1374*0Sstevel@tonic-gate 		IN6_INADDR_TO_V4MAPPED(&sin->sin_addr, &testaddr);
1375*0Sstevel@tonic-gate 	} else {
1376*0Sstevel@tonic-gate 		sin6 = (struct sockaddr_in6 *)&lifr.lifr_addr;
1377*0Sstevel@tonic-gate 		testaddr = sin6->sin6_addr;
1378*0Sstevel@tonic-gate 	}
1379*0Sstevel@tonic-gate 
1380*0Sstevel@tonic-gate 	if (pii->pii_phyint->pi_flags & IFF_POINTOPOINT) {
1381*0Sstevel@tonic-gate 		ptp = _B_TRUE;
1382*0Sstevel@tonic-gate 		if (ioctl(ifsock, SIOCGLIFDSTADDR, (char *)&lifr) < 0) {
1383*0Sstevel@tonic-gate 			if (errno != ENXIO) {
1384*0Sstevel@tonic-gate 				logperror_li(li, "logint_init_from_k:"
1385*0Sstevel@tonic-gate 				    " (get dstaddr)");
1386*0Sstevel@tonic-gate 			}
1387*0Sstevel@tonic-gate 			goto error;
1388*0Sstevel@tonic-gate 		}
1389*0Sstevel@tonic-gate 		if (pii->pii_af == AF_INET) {
1390*0Sstevel@tonic-gate 			sin = (struct sockaddr_in *)&lifr.lifr_addr;
1391*0Sstevel@tonic-gate 			IN6_INADDR_TO_V4MAPPED(&sin->sin_addr, &tgaddr);
1392*0Sstevel@tonic-gate 		} else {
1393*0Sstevel@tonic-gate 			sin6 = (struct sockaddr_in6 *)&lifr.lifr_addr;
1394*0Sstevel@tonic-gate 			tgaddr = sin6->sin6_addr;
1395*0Sstevel@tonic-gate 		}
1396*0Sstevel@tonic-gate 	} else {
1397*0Sstevel@tonic-gate 		if (ioctl(ifsock, SIOCGLIFSUBNET, (char *)&lifr) < 0) {
1398*0Sstevel@tonic-gate 			/* Interface may have vanished */
1399*0Sstevel@tonic-gate 			if (errno != ENXIO) {
1400*0Sstevel@tonic-gate 				logperror_li(li, "logint_init_from_k:"
1401*0Sstevel@tonic-gate 				    " (get subnet)");
1402*0Sstevel@tonic-gate 			}
1403*0Sstevel@tonic-gate 			goto error;
1404*0Sstevel@tonic-gate 		}
1405*0Sstevel@tonic-gate 		if (lifr.lifr_subnet.ss_family == AF_INET6) {
1406*0Sstevel@tonic-gate 			sin6 = (struct sockaddr_in6 *)&lifr.lifr_subnet;
1407*0Sstevel@tonic-gate 			test_subnet = sin6->sin6_addr;
1408*0Sstevel@tonic-gate 			test_subnet_len = lifr.lifr_addrlen;
1409*0Sstevel@tonic-gate 		} else {
1410*0Sstevel@tonic-gate 			sin = (struct sockaddr_in *)&lifr.lifr_subnet;
1411*0Sstevel@tonic-gate 			IN6_INADDR_TO_V4MAPPED(&sin->sin_addr, &test_subnet);
1412*0Sstevel@tonic-gate 			test_subnet_len = lifr.lifr_addrlen +
1413*0Sstevel@tonic-gate 			    (IPV6_ABITS - IP_ABITS);
1414*0Sstevel@tonic-gate 		}
1415*0Sstevel@tonic-gate 		(void) ip_index_to_mask_v6(test_subnet_len, &test_subnet_mask);
1416*0Sstevel@tonic-gate 	}
1417*0Sstevel@tonic-gate 
1418*0Sstevel@tonic-gate 	/*
1419*0Sstevel@tonic-gate 	 * Also record the OINDEX for completeness. This information is
1420*0Sstevel@tonic-gate 	 * not used.
1421*0Sstevel@tonic-gate 	 */
1422*0Sstevel@tonic-gate 	if (ioctl(ifsock, SIOCGLIFOINDEX, (char *)&lifr) < 0) {
1423*0Sstevel@tonic-gate 		if (errno != ENXIO)  {
1424*0Sstevel@tonic-gate 			logperror_li(li, "logint_init_from_k:"
1425*0Sstevel@tonic-gate 			    " (get lifoindex)");
1426*0Sstevel@tonic-gate 		}
1427*0Sstevel@tonic-gate 		goto error;
1428*0Sstevel@tonic-gate 	}
1429*0Sstevel@tonic-gate 
1430*0Sstevel@tonic-gate 	/*
1431*0Sstevel@tonic-gate 	 * If this is the logint corresponding to the test address used for
1432*0Sstevel@tonic-gate 	 * sending probes, then if anything significant has changed we need to
1433*0Sstevel@tonic-gate 	 * determine the test address again.  We ignore changes to the
1434*0Sstevel@tonic-gate 	 * IFF_FAILED and IFF_RUNNING flags since those happen as a matter of
1435*0Sstevel@tonic-gate 	 * course.
1436*0Sstevel@tonic-gate 	 */
1437*0Sstevel@tonic-gate 	if (pii->pii_probe_logint == li) {
1438*0Sstevel@tonic-gate 		if (((li->li_flags ^ saved_flags) &
1439*0Sstevel@tonic-gate 		    ~(IFF_FAILED | IFF_RUNNING)) != 0 ||
1440*0Sstevel@tonic-gate 		    !IN6_ARE_ADDR_EQUAL(&testaddr, &li->li_addr) ||
1441*0Sstevel@tonic-gate 		    (!ptp && !IN6_ARE_ADDR_EQUAL(&test_subnet,
1442*0Sstevel@tonic-gate 			&li->li_subnet)) ||
1443*0Sstevel@tonic-gate 		    (!ptp && test_subnet_len != li->li_subnet_len) ||
1444*0Sstevel@tonic-gate 		    (ptp && !IN6_ARE_ADDR_EQUAL(&tgaddr, &li->li_dstaddr))) {
1445*0Sstevel@tonic-gate 			/*
1446*0Sstevel@tonic-gate 			 * Something significant that affects the testaddress
1447*0Sstevel@tonic-gate 			 * has changed. Redo the testaddress selection later on
1448*0Sstevel@tonic-gate 			 * in select_test_ifs(). For now do the cleanup and
1449*0Sstevel@tonic-gate 			 * set pii_probe_logint to NULL.
1450*0Sstevel@tonic-gate 			 */
1451*0Sstevel@tonic-gate 			if (pii->pii_probe_sock != -1)
1452*0Sstevel@tonic-gate 				close_probe_socket(pii, _B_TRUE);
1453*0Sstevel@tonic-gate 			pii->pii_probe_logint = NULL;
1454*0Sstevel@tonic-gate 		}
1455*0Sstevel@tonic-gate 	}
1456*0Sstevel@tonic-gate 
1457*0Sstevel@tonic-gate 
1458*0Sstevel@tonic-gate 	/* Update the logint with the values obtained from the kernel.	*/
1459*0Sstevel@tonic-gate 	li->li_addr = testaddr;
1460*0Sstevel@tonic-gate 	li->li_in_use = 1;
1461*0Sstevel@tonic-gate 	li->li_oifindex = lifr.lifr_index;
1462*0Sstevel@tonic-gate 	if (ptp) {
1463*0Sstevel@tonic-gate 		li->li_dstaddr = tgaddr;
1464*0Sstevel@tonic-gate 		li->li_subnet_len = (pii->pii_af == AF_INET) ?
1465*0Sstevel@tonic-gate 		    IP_ABITS : IPV6_ABITS;
1466*0Sstevel@tonic-gate 	} else {
1467*0Sstevel@tonic-gate 		li->li_subnet = test_subnet;
1468*0Sstevel@tonic-gate 		li->li_subnet_len = test_subnet_len;
1469*0Sstevel@tonic-gate 	}
1470*0Sstevel@tonic-gate 
1471*0Sstevel@tonic-gate 	if (debug & D_LOGINT)
1472*0Sstevel@tonic-gate 		logint_print(li);
1473*0Sstevel@tonic-gate 
1474*0Sstevel@tonic-gate 	return;
1475*0Sstevel@tonic-gate 
1476*0Sstevel@tonic-gate error:
1477*0Sstevel@tonic-gate 	logerr("logint_init_from_k: IGNORED %s %s %s addr %s\n",
1478*0Sstevel@tonic-gate 	    AF_STR(pii->pii_af), pii->pii_name, li->li_name,
1479*0Sstevel@tonic-gate 	    pr_addr(pii->pii_af, testaddr, abuf, sizeof (abuf)));
1480*0Sstevel@tonic-gate 	logint_delete(li);
1481*0Sstevel@tonic-gate }
1482*0Sstevel@tonic-gate 
1483*0Sstevel@tonic-gate /*
1484*0Sstevel@tonic-gate  * Delete (unlink and free) a logint.
1485*0Sstevel@tonic-gate  */
1486*0Sstevel@tonic-gate void
1487*0Sstevel@tonic-gate logint_delete(struct logint *li)
1488*0Sstevel@tonic-gate {
1489*0Sstevel@tonic-gate 	struct phyint_instance *pii;
1490*0Sstevel@tonic-gate 
1491*0Sstevel@tonic-gate 	pii = li->li_phyint_inst;
1492*0Sstevel@tonic-gate 	assert(pii != NULL);
1493*0Sstevel@tonic-gate 
1494*0Sstevel@tonic-gate 	if (debug & D_LOGINT) {
1495*0Sstevel@tonic-gate 		int af;
1496*0Sstevel@tonic-gate 		char abuf[INET6_ADDRSTRLEN];
1497*0Sstevel@tonic-gate 
1498*0Sstevel@tonic-gate 		af = pii->pii_af;
1499*0Sstevel@tonic-gate 		logdebug("logint_delete(%s %s %s/%u)\n",
1500*0Sstevel@tonic-gate 		    AF_STR(af), li->li_name,
1501*0Sstevel@tonic-gate 		    pr_addr(af, li->li_addr, abuf, sizeof (abuf)),
1502*0Sstevel@tonic-gate 		    li->li_subnet_len);
1503*0Sstevel@tonic-gate 	}
1504*0Sstevel@tonic-gate 
1505*0Sstevel@tonic-gate 	/* logint must be in the list of logints */
1506*0Sstevel@tonic-gate 	assert(pii->pii_logint == li || li->li_prev != NULL);
1507*0Sstevel@tonic-gate 
1508*0Sstevel@tonic-gate 	/* Remove the logint from the list of logints  */
1509*0Sstevel@tonic-gate 	if (li->li_prev == NULL) {
1510*0Sstevel@tonic-gate 		/* logint is the 1st in the list */
1511*0Sstevel@tonic-gate 		pii->pii_logint = li->li_next;
1512*0Sstevel@tonic-gate 	} else {
1513*0Sstevel@tonic-gate 		li->li_prev->li_next = li->li_next;
1514*0Sstevel@tonic-gate 	}
1515*0Sstevel@tonic-gate 	if (li->li_next != NULL)
1516*0Sstevel@tonic-gate 		li->li_next->li_prev = li->li_prev;
1517*0Sstevel@tonic-gate 	li->li_next = NULL;
1518*0Sstevel@tonic-gate 	li->li_prev = NULL;
1519*0Sstevel@tonic-gate 
1520*0Sstevel@tonic-gate 	/*
1521*0Sstevel@tonic-gate 	 * If this logint corresponds to the IFF_NOFAILOVER testaddress of
1522*0Sstevel@tonic-gate 	 * this phyint, then close the associated socket, if it exists
1523*0Sstevel@tonic-gate 	 */
1524*0Sstevel@tonic-gate 	if (pii->pii_probe_logint == li) {
1525*0Sstevel@tonic-gate 		if (pii->pii_probe_sock != -1)
1526*0Sstevel@tonic-gate 			close_probe_socket(pii, _B_TRUE);
1527*0Sstevel@tonic-gate 		pii->pii_probe_logint = NULL;
1528*0Sstevel@tonic-gate 	}
1529*0Sstevel@tonic-gate 
1530*0Sstevel@tonic-gate 	free(li);
1531*0Sstevel@tonic-gate }
1532*0Sstevel@tonic-gate 
1533*0Sstevel@tonic-gate static void
1534*0Sstevel@tonic-gate logint_print(struct logint *li)
1535*0Sstevel@tonic-gate {
1536*0Sstevel@tonic-gate 	char abuf[INET6_ADDRSTRLEN];
1537*0Sstevel@tonic-gate 	int af;
1538*0Sstevel@tonic-gate 
1539*0Sstevel@tonic-gate 	af = li->li_phyint_inst->pii_af;
1540*0Sstevel@tonic-gate 
1541*0Sstevel@tonic-gate 	logdebug("logint: %s %s addr %s/%u", AF_STR(af), li->li_name,
1542*0Sstevel@tonic-gate 	    pr_addr(af, li->li_addr, abuf, sizeof (abuf)), li->li_subnet_len);
1543*0Sstevel@tonic-gate 
1544*0Sstevel@tonic-gate 	logdebug("\tFlags: %llx in_use %d oifindex %d\n",
1545*0Sstevel@tonic-gate 	    li->li_flags, li->li_in_use, li->li_oifindex);
1546*0Sstevel@tonic-gate }
1547*0Sstevel@tonic-gate 
1548*0Sstevel@tonic-gate char *
1549*0Sstevel@tonic-gate pr_addr(int af, struct in6_addr addr, char *abuf, int len)
1550*0Sstevel@tonic-gate {
1551*0Sstevel@tonic-gate 	struct in_addr	addr_v4;
1552*0Sstevel@tonic-gate 
1553*0Sstevel@tonic-gate 	if (af == AF_INET) {
1554*0Sstevel@tonic-gate 		IN6_V4MAPPED_TO_INADDR(&addr, &addr_v4);
1555*0Sstevel@tonic-gate 		(void) inet_ntop(AF_INET, (void *)&addr_v4, abuf, len);
1556*0Sstevel@tonic-gate 	} else {
1557*0Sstevel@tonic-gate 		(void) inet_ntop(AF_INET6, (void *)&addr, abuf, len);
1558*0Sstevel@tonic-gate 	}
1559*0Sstevel@tonic-gate 	return (abuf);
1560*0Sstevel@tonic-gate }
1561*0Sstevel@tonic-gate 
1562*0Sstevel@tonic-gate /* Lookup target on its address */
1563*0Sstevel@tonic-gate struct target *
1564*0Sstevel@tonic-gate target_lookup(struct phyint_instance *pii, struct in6_addr addr)
1565*0Sstevel@tonic-gate {
1566*0Sstevel@tonic-gate 	struct target *tg;
1567*0Sstevel@tonic-gate 
1568*0Sstevel@tonic-gate 	if (debug & D_TARGET) {
1569*0Sstevel@tonic-gate 		char abuf[INET6_ADDRSTRLEN];
1570*0Sstevel@tonic-gate 
1571*0Sstevel@tonic-gate 		logdebug("target_lookup(%s %s): addr %s\n",
1572*0Sstevel@tonic-gate 		    AF_STR(pii->pii_af), pii->pii_name,
1573*0Sstevel@tonic-gate 		    pr_addr(pii->pii_af, addr, abuf, sizeof (abuf)));
1574*0Sstevel@tonic-gate 	}
1575*0Sstevel@tonic-gate 
1576*0Sstevel@tonic-gate 	for (tg = pii->pii_targets; tg != NULL; tg = tg->tg_next) {
1577*0Sstevel@tonic-gate 		if (IN6_ARE_ADDR_EQUAL(&tg->tg_address, &addr))
1578*0Sstevel@tonic-gate 			break;
1579*0Sstevel@tonic-gate 	}
1580*0Sstevel@tonic-gate 	return (tg);
1581*0Sstevel@tonic-gate }
1582*0Sstevel@tonic-gate 
1583*0Sstevel@tonic-gate /*
1584*0Sstevel@tonic-gate  * Find and return the next active target, for the next probe.
1585*0Sstevel@tonic-gate  * If no active targets are available, return NULL.
1586*0Sstevel@tonic-gate  */
1587*0Sstevel@tonic-gate struct target *
1588*0Sstevel@tonic-gate target_next(struct target *tg)
1589*0Sstevel@tonic-gate {
1590*0Sstevel@tonic-gate 	struct	phyint_instance	*pii = tg->tg_phyint_inst;
1591*0Sstevel@tonic-gate 	struct	target	*marker = tg;
1592*0Sstevel@tonic-gate 	hrtime_t now;
1593*0Sstevel@tonic-gate 
1594*0Sstevel@tonic-gate 	now = gethrtime();
1595*0Sstevel@tonic-gate 
1596*0Sstevel@tonic-gate 	/*
1597*0Sstevel@tonic-gate 	 * Target must be in the list of targets for this phyint
1598*0Sstevel@tonic-gate 	 * instance.
1599*0Sstevel@tonic-gate 	 */
1600*0Sstevel@tonic-gate 	assert(pii->pii_targets == tg || tg->tg_prev != NULL);
1601*0Sstevel@tonic-gate 	assert(pii->pii_targets != NULL);
1602*0Sstevel@tonic-gate 
1603*0Sstevel@tonic-gate 	/* Return the next active target */
1604*0Sstevel@tonic-gate 	do {
1605*0Sstevel@tonic-gate 		/*
1606*0Sstevel@tonic-gate 		 * Go to the next target. If we hit the end,
1607*0Sstevel@tonic-gate 		 * reset the ptr to the head
1608*0Sstevel@tonic-gate 		 */
1609*0Sstevel@tonic-gate 		tg = tg->tg_next;
1610*0Sstevel@tonic-gate 		if (tg == NULL)
1611*0Sstevel@tonic-gate 			tg = pii->pii_targets;
1612*0Sstevel@tonic-gate 
1613*0Sstevel@tonic-gate 		assert(TG_STATUS_VALID(tg->tg_status));
1614*0Sstevel@tonic-gate 
1615*0Sstevel@tonic-gate 		switch (tg->tg_status) {
1616*0Sstevel@tonic-gate 		case TG_ACTIVE:
1617*0Sstevel@tonic-gate 			return (tg);
1618*0Sstevel@tonic-gate 
1619*0Sstevel@tonic-gate 		case TG_UNUSED:
1620*0Sstevel@tonic-gate 			assert(pii->pii_targets_are_routers);
1621*0Sstevel@tonic-gate 			if (pii->pii_ntargets < MAX_PROBE_TARGETS) {
1622*0Sstevel@tonic-gate 				/*
1623*0Sstevel@tonic-gate 				 * Bubble up the unused target to active
1624*0Sstevel@tonic-gate 				 */
1625*0Sstevel@tonic-gate 				tg->tg_status = TG_ACTIVE;
1626*0Sstevel@tonic-gate 				pii->pii_ntargets++;
1627*0Sstevel@tonic-gate 				return (tg);
1628*0Sstevel@tonic-gate 			}
1629*0Sstevel@tonic-gate 			break;
1630*0Sstevel@tonic-gate 
1631*0Sstevel@tonic-gate 		case TG_SLOW:
1632*0Sstevel@tonic-gate 			assert(pii->pii_targets_are_routers);
1633*0Sstevel@tonic-gate 			if (tg->tg_latime + MIN_RECOVERY_TIME < now) {
1634*0Sstevel@tonic-gate 				/*
1635*0Sstevel@tonic-gate 				 * Bubble up the slow target to unused
1636*0Sstevel@tonic-gate 				 */
1637*0Sstevel@tonic-gate 				tg->tg_status = TG_UNUSED;
1638*0Sstevel@tonic-gate 			}
1639*0Sstevel@tonic-gate 			break;
1640*0Sstevel@tonic-gate 
1641*0Sstevel@tonic-gate 		case TG_DEAD:
1642*0Sstevel@tonic-gate 			assert(pii->pii_targets_are_routers);
1643*0Sstevel@tonic-gate 			if (tg->tg_latime + MIN_RECOVERY_TIME < now) {
1644*0Sstevel@tonic-gate 				/*
1645*0Sstevel@tonic-gate 				 * Bubble up the dead target to slow
1646*0Sstevel@tonic-gate 				 */
1647*0Sstevel@tonic-gate 				tg->tg_status = TG_SLOW;
1648*0Sstevel@tonic-gate 				tg->tg_latime = now;
1649*0Sstevel@tonic-gate 			}
1650*0Sstevel@tonic-gate 			break;
1651*0Sstevel@tonic-gate 		}
1652*0Sstevel@tonic-gate 
1653*0Sstevel@tonic-gate 	} while (tg != marker);
1654*0Sstevel@tonic-gate 
1655*0Sstevel@tonic-gate 	return (NULL);
1656*0Sstevel@tonic-gate }
1657*0Sstevel@tonic-gate 
1658*0Sstevel@tonic-gate /*
1659*0Sstevel@tonic-gate  * Select the best available target, that is not already TG_ACTIVE,
1660*0Sstevel@tonic-gate  * for the caller. The caller will determine whether it wants to
1661*0Sstevel@tonic-gate  * make the returned target TG_ACTIVE.
1662*0Sstevel@tonic-gate  * The selection order is as follows.
1663*0Sstevel@tonic-gate  * 1. pick a TG_UNSED target, if it exists.
1664*0Sstevel@tonic-gate  * 2. else pick a TG_SLOW target that has recovered, if it exists
1665*0Sstevel@tonic-gate  * 3. else pick any TG_SLOW target, if it exists
1666*0Sstevel@tonic-gate  * 4. else pick a TG_DEAD target that has recovered, if it exists
1667*0Sstevel@tonic-gate  * 5. else pick any TG_DEAD target, if it exists
1668*0Sstevel@tonic-gate  * 6. else return null
1669*0Sstevel@tonic-gate  */
1670*0Sstevel@tonic-gate static struct target *
1671*0Sstevel@tonic-gate target_select_best(struct phyint_instance *pii)
1672*0Sstevel@tonic-gate {
1673*0Sstevel@tonic-gate 	struct target *tg;
1674*0Sstevel@tonic-gate 	struct target *slow = NULL;
1675*0Sstevel@tonic-gate 	struct target *dead = NULL;
1676*0Sstevel@tonic-gate 	struct target *slow_recovered = NULL;
1677*0Sstevel@tonic-gate 	struct target *dead_recovered = NULL;
1678*0Sstevel@tonic-gate 	hrtime_t now;
1679*0Sstevel@tonic-gate 
1680*0Sstevel@tonic-gate 	now = gethrtime();
1681*0Sstevel@tonic-gate 
1682*0Sstevel@tonic-gate 	for (tg = pii->pii_targets; tg != NULL; tg = tg->tg_next) {
1683*0Sstevel@tonic-gate 		assert(TG_STATUS_VALID(tg->tg_status));
1684*0Sstevel@tonic-gate 
1685*0Sstevel@tonic-gate 		switch (tg->tg_status) {
1686*0Sstevel@tonic-gate 		case TG_UNUSED:
1687*0Sstevel@tonic-gate 			return (tg);
1688*0Sstevel@tonic-gate 
1689*0Sstevel@tonic-gate 		case TG_SLOW:
1690*0Sstevel@tonic-gate 			if (tg->tg_latime + MIN_RECOVERY_TIME < now) {
1691*0Sstevel@tonic-gate 				slow_recovered = tg;
1692*0Sstevel@tonic-gate 				/*
1693*0Sstevel@tonic-gate 				 * Promote the slow_recoverd to unused
1694*0Sstevel@tonic-gate 				 */
1695*0Sstevel@tonic-gate 				tg->tg_status = TG_UNUSED;
1696*0Sstevel@tonic-gate 			} else {
1697*0Sstevel@tonic-gate 				slow = tg;
1698*0Sstevel@tonic-gate 			}
1699*0Sstevel@tonic-gate 			break;
1700*0Sstevel@tonic-gate 
1701*0Sstevel@tonic-gate 		case TG_DEAD:
1702*0Sstevel@tonic-gate 			if (tg->tg_latime + MIN_RECOVERY_TIME < now) {
1703*0Sstevel@tonic-gate 				dead_recovered = tg;
1704*0Sstevel@tonic-gate 				/*
1705*0Sstevel@tonic-gate 				 * Promote the dead_recoverd to slow
1706*0Sstevel@tonic-gate 				 */
1707*0Sstevel@tonic-gate 				tg->tg_status = TG_SLOW;
1708*0Sstevel@tonic-gate 				tg->tg_latime = now;
1709*0Sstevel@tonic-gate 			} else {
1710*0Sstevel@tonic-gate 				dead = tg;
1711*0Sstevel@tonic-gate 			}
1712*0Sstevel@tonic-gate 			break;
1713*0Sstevel@tonic-gate 
1714*0Sstevel@tonic-gate 		default:
1715*0Sstevel@tonic-gate 			break;
1716*0Sstevel@tonic-gate 		}
1717*0Sstevel@tonic-gate 	}
1718*0Sstevel@tonic-gate 
1719*0Sstevel@tonic-gate 	if (slow_recovered != NULL)
1720*0Sstevel@tonic-gate 		return (slow_recovered);
1721*0Sstevel@tonic-gate 	else if (slow != NULL)
1722*0Sstevel@tonic-gate 		return (slow);
1723*0Sstevel@tonic-gate 	else if (dead_recovered != NULL)
1724*0Sstevel@tonic-gate 		return (dead_recovered);
1725*0Sstevel@tonic-gate 	else
1726*0Sstevel@tonic-gate 		return (dead);
1727*0Sstevel@tonic-gate }
1728*0Sstevel@tonic-gate 
1729*0Sstevel@tonic-gate /*
1730*0Sstevel@tonic-gate  * Some target was deleted. If we don't have even MIN_PROBE_TARGETS
1731*0Sstevel@tonic-gate  * that are active, pick the next best below.
1732*0Sstevel@tonic-gate  */
1733*0Sstevel@tonic-gate static void
1734*0Sstevel@tonic-gate target_activate_all(struct phyint_instance *pii)
1735*0Sstevel@tonic-gate {
1736*0Sstevel@tonic-gate 	struct target *tg;
1737*0Sstevel@tonic-gate 
1738*0Sstevel@tonic-gate 	assert(pii->pii_ntargets == 0);
1739*0Sstevel@tonic-gate 	assert(pii->pii_target_next == NULL);
1740*0Sstevel@tonic-gate 	assert(pii->pii_rtt_target_next == NULL);
1741*0Sstevel@tonic-gate 	assert(pii->pii_targets_are_routers);
1742*0Sstevel@tonic-gate 
1743*0Sstevel@tonic-gate 	while (pii->pii_ntargets < MIN_PROBE_TARGETS) {
1744*0Sstevel@tonic-gate 		tg = target_select_best(pii);
1745*0Sstevel@tonic-gate 		if (tg == NULL) {
1746*0Sstevel@tonic-gate 			/* We are out of targets */
1747*0Sstevel@tonic-gate 			return;
1748*0Sstevel@tonic-gate 		}
1749*0Sstevel@tonic-gate 
1750*0Sstevel@tonic-gate 		assert(TG_STATUS_VALID(tg->tg_status));
1751*0Sstevel@tonic-gate 		assert(tg->tg_status != TG_ACTIVE);
1752*0Sstevel@tonic-gate 		tg->tg_status = TG_ACTIVE;
1753*0Sstevel@tonic-gate 		pii->pii_ntargets++;
1754*0Sstevel@tonic-gate 		if (pii->pii_target_next == NULL) {
1755*0Sstevel@tonic-gate 			pii->pii_target_next = tg;
1756*0Sstevel@tonic-gate 			pii->pii_rtt_target_next = tg;
1757*0Sstevel@tonic-gate 		}
1758*0Sstevel@tonic-gate 	}
1759*0Sstevel@tonic-gate }
1760*0Sstevel@tonic-gate 
1761*0Sstevel@tonic-gate static struct target *
1762*0Sstevel@tonic-gate target_first(struct phyint_instance *pii)
1763*0Sstevel@tonic-gate {
1764*0Sstevel@tonic-gate 	struct target *tg;
1765*0Sstevel@tonic-gate 
1766*0Sstevel@tonic-gate 	for (tg = pii->pii_targets; tg != NULL; tg = tg->tg_next) {
1767*0Sstevel@tonic-gate 		assert(TG_STATUS_VALID(tg->tg_status));
1768*0Sstevel@tonic-gate 		if (tg->tg_status == TG_ACTIVE)
1769*0Sstevel@tonic-gate 			break;
1770*0Sstevel@tonic-gate 	}
1771*0Sstevel@tonic-gate 
1772*0Sstevel@tonic-gate 	return (tg);
1773*0Sstevel@tonic-gate }
1774*0Sstevel@tonic-gate 
1775*0Sstevel@tonic-gate /*
1776*0Sstevel@tonic-gate  * Create a default target entry.
1777*0Sstevel@tonic-gate  */
1778*0Sstevel@tonic-gate void
1779*0Sstevel@tonic-gate target_create(struct phyint_instance *pii, struct in6_addr addr,
1780*0Sstevel@tonic-gate     boolean_t is_router)
1781*0Sstevel@tonic-gate {
1782*0Sstevel@tonic-gate 	struct target *tg;
1783*0Sstevel@tonic-gate 	struct phyint *pi;
1784*0Sstevel@tonic-gate 	struct logint *li;
1785*0Sstevel@tonic-gate 
1786*0Sstevel@tonic-gate 	if (debug & D_TARGET) {
1787*0Sstevel@tonic-gate 		char abuf[INET6_ADDRSTRLEN];
1788*0Sstevel@tonic-gate 
1789*0Sstevel@tonic-gate 		logdebug("target_create(%s %s, %s)\n",
1790*0Sstevel@tonic-gate 		    AF_STR(pii->pii_af), pii->pii_name,
1791*0Sstevel@tonic-gate 		    pr_addr(pii->pii_af, addr, abuf, sizeof (abuf)));
1792*0Sstevel@tonic-gate 	}
1793*0Sstevel@tonic-gate 
1794*0Sstevel@tonic-gate 	/*
1795*0Sstevel@tonic-gate 	 * If the test address is not yet initialized, do not add
1796*0Sstevel@tonic-gate 	 * any target, since we cannot determine whether the target
1797*0Sstevel@tonic-gate 	 * belongs to the same subnet as the test address.
1798*0Sstevel@tonic-gate 	 */
1799*0Sstevel@tonic-gate 	li = pii->pii_probe_logint;
1800*0Sstevel@tonic-gate 	if (li == NULL)
1801*0Sstevel@tonic-gate 		return;
1802*0Sstevel@tonic-gate 
1803*0Sstevel@tonic-gate 	/*
1804*0Sstevel@tonic-gate 	 * If there are multiple subnets associated with an interface, then
1805*0Sstevel@tonic-gate 	 * add the target to this phyint instance, only if it belongs to the
1806*0Sstevel@tonic-gate 	 * same subnet as the test address. The reason is that interface
1807*0Sstevel@tonic-gate 	 * routes derived from non-test-addresses i.e. non-IFF_NOFAILOVER
1808*0Sstevel@tonic-gate 	 * addresses, will disappear after failover, and the targets will not
1809*0Sstevel@tonic-gate 	 * be reachable from this interface.
1810*0Sstevel@tonic-gate 	 */
1811*0Sstevel@tonic-gate 	if (!prefix_equal(li->li_subnet, addr, li->li_subnet_len))
1812*0Sstevel@tonic-gate 		return;
1813*0Sstevel@tonic-gate 
1814*0Sstevel@tonic-gate 	if (pii->pii_targets != NULL) {
1815*0Sstevel@tonic-gate 		assert(pii->pii_ntargets <= MAX_PROBE_TARGETS);
1816*0Sstevel@tonic-gate 		if (is_router) {
1817*0Sstevel@tonic-gate 			if (!pii->pii_targets_are_routers) {
1818*0Sstevel@tonic-gate 				/*
1819*0Sstevel@tonic-gate 				 * Prefer router over hosts. Using hosts is a
1820*0Sstevel@tonic-gate 				 * fallback mechanism, hence delete all host
1821*0Sstevel@tonic-gate 				 * targets.
1822*0Sstevel@tonic-gate 				 */
1823*0Sstevel@tonic-gate 				while (pii->pii_targets != NULL)
1824*0Sstevel@tonic-gate 					target_delete(pii->pii_targets);
1825*0Sstevel@tonic-gate 			}
1826*0Sstevel@tonic-gate 		} else {
1827*0Sstevel@tonic-gate 			/*
1828*0Sstevel@tonic-gate 			 * Routers take precedence over hosts. If this
1829*0Sstevel@tonic-gate 			 * is a router list and we are trying to add a
1830*0Sstevel@tonic-gate 			 * host, just return. If this is a host list
1831*0Sstevel@tonic-gate 			 * and if we have sufficient targets, just return
1832*0Sstevel@tonic-gate 			 */
1833*0Sstevel@tonic-gate 			if (pii->pii_targets_are_routers ||
1834*0Sstevel@tonic-gate 			    pii->pii_ntargets == MAX_PROBE_TARGETS)
1835*0Sstevel@tonic-gate 				return;
1836*0Sstevel@tonic-gate 		}
1837*0Sstevel@tonic-gate 	}
1838*0Sstevel@tonic-gate 
1839*0Sstevel@tonic-gate 	tg = calloc(1, sizeof (struct target));
1840*0Sstevel@tonic-gate 	if (tg == NULL) {
1841*0Sstevel@tonic-gate 		logperror("target_create: calloc");
1842*0Sstevel@tonic-gate 		return;
1843*0Sstevel@tonic-gate 	}
1844*0Sstevel@tonic-gate 
1845*0Sstevel@tonic-gate 	tg->tg_phyint_inst = pii;
1846*0Sstevel@tonic-gate 	tg->tg_address = addr;
1847*0Sstevel@tonic-gate 	tg->tg_in_use = 1;
1848*0Sstevel@tonic-gate 	tg->tg_rtt_sa = -1;
1849*0Sstevel@tonic-gate 	tg->tg_num_deferred = 0;
1850*0Sstevel@tonic-gate 
1851*0Sstevel@tonic-gate 	/*
1852*0Sstevel@tonic-gate 	 * If this is the first target, set 'pii_targets_are_routers'
1853*0Sstevel@tonic-gate 	 * The list of targets is either a list of hosts or list or
1854*0Sstevel@tonic-gate 	 * routers, but not a mix.
1855*0Sstevel@tonic-gate 	 */
1856*0Sstevel@tonic-gate 	if (pii->pii_targets == NULL) {
1857*0Sstevel@tonic-gate 		assert(pii->pii_ntargets == 0);
1858*0Sstevel@tonic-gate 		assert(pii->pii_target_next == NULL);
1859*0Sstevel@tonic-gate 		assert(pii->pii_rtt_target_next == NULL);
1860*0Sstevel@tonic-gate 		pii->pii_targets_are_routers = is_router ? 1 : 0;
1861*0Sstevel@tonic-gate 	}
1862*0Sstevel@tonic-gate 
1863*0Sstevel@tonic-gate 	if (pii->pii_ntargets == MAX_PROBE_TARGETS) {
1864*0Sstevel@tonic-gate 		assert(pii->pii_targets_are_routers);
1865*0Sstevel@tonic-gate 		assert(pii->pii_target_next != NULL);
1866*0Sstevel@tonic-gate 		assert(pii->pii_rtt_target_next != NULL);
1867*0Sstevel@tonic-gate 		tg->tg_status = TG_UNUSED;
1868*0Sstevel@tonic-gate 	} else {
1869*0Sstevel@tonic-gate 		if (pii->pii_ntargets == 0) {
1870*0Sstevel@tonic-gate 			assert(pii->pii_target_next == NULL);
1871*0Sstevel@tonic-gate 			pii->pii_target_next = tg;
1872*0Sstevel@tonic-gate 			pii->pii_rtt_target_next = tg;
1873*0Sstevel@tonic-gate 		}
1874*0Sstevel@tonic-gate 		pii->pii_ntargets++;
1875*0Sstevel@tonic-gate 		tg->tg_status = TG_ACTIVE;
1876*0Sstevel@tonic-gate 	}
1877*0Sstevel@tonic-gate 
1878*0Sstevel@tonic-gate 	target_insert(pii, tg);
1879*0Sstevel@tonic-gate 
1880*0Sstevel@tonic-gate 	/*
1881*0Sstevel@tonic-gate 	 * Change to running state, if this phyint instance is capable of
1882*0Sstevel@tonic-gate 	 * sending and receiving probes. i.e if we know of at least 1 target,
1883*0Sstevel@tonic-gate 	 * and this phyint instance socket is bound to the IFF_NOFAILOVER
1884*0Sstevel@tonic-gate 	 * address. More details in phyint state diagram in probe.c.
1885*0Sstevel@tonic-gate 	 */
1886*0Sstevel@tonic-gate 	pi = pii->pii_phyint;
1887*0Sstevel@tonic-gate 	if (pi->pi_state == PI_NOTARGETS && PROBE_CAPABLE(pii)) {
1888*0Sstevel@tonic-gate 		if (pi->pi_flags & IFF_FAILED)
1889*0Sstevel@tonic-gate 			phyint_chstate(pi, PI_FAILED);
1890*0Sstevel@tonic-gate 		else
1891*0Sstevel@tonic-gate 			phyint_chstate(pi, PI_RUNNING);
1892*0Sstevel@tonic-gate 	}
1893*0Sstevel@tonic-gate }
1894*0Sstevel@tonic-gate 
1895*0Sstevel@tonic-gate /*
1896*0Sstevel@tonic-gate  * Add the target address named by `addr' to phyint instance `pii' if it does
1897*0Sstevel@tonic-gate  * not already exist.  If the target is a router, `is_router' should be set to
1898*0Sstevel@tonic-gate  * B_TRUE.
1899*0Sstevel@tonic-gate  */
1900*0Sstevel@tonic-gate void
1901*0Sstevel@tonic-gate target_add(struct phyint_instance *pii, struct in6_addr addr,
1902*0Sstevel@tonic-gate     boolean_t is_router)
1903*0Sstevel@tonic-gate {
1904*0Sstevel@tonic-gate 	struct target *tg;
1905*0Sstevel@tonic-gate 
1906*0Sstevel@tonic-gate 	if (pii == NULL)
1907*0Sstevel@tonic-gate 		return;
1908*0Sstevel@tonic-gate 
1909*0Sstevel@tonic-gate 	tg = target_lookup(pii, addr);
1910*0Sstevel@tonic-gate 
1911*0Sstevel@tonic-gate 	/*
1912*0Sstevel@tonic-gate 	 * If the target does not exist, create it; target_create() will set
1913*0Sstevel@tonic-gate 	 * tg_in_use to true.  If it exists already, and it is a router
1914*0Sstevel@tonic-gate 	 * target, set tg_in_use to to true, so that init_router_targets()
1915*0Sstevel@tonic-gate 	 * won't delete it
1916*0Sstevel@tonic-gate 	 */
1917*0Sstevel@tonic-gate 	if (tg == NULL)
1918*0Sstevel@tonic-gate 		target_create(pii, addr, is_router);
1919*0Sstevel@tonic-gate 	else if (is_router)
1920*0Sstevel@tonic-gate 		tg->tg_in_use = 1;
1921*0Sstevel@tonic-gate }
1922*0Sstevel@tonic-gate 
1923*0Sstevel@tonic-gate /*
1924*0Sstevel@tonic-gate  * Insert target at head of linked list of targets for the associated
1925*0Sstevel@tonic-gate  * phyint instance
1926*0Sstevel@tonic-gate  */
1927*0Sstevel@tonic-gate static void
1928*0Sstevel@tonic-gate target_insert(struct phyint_instance *pii, struct target *tg)
1929*0Sstevel@tonic-gate {
1930*0Sstevel@tonic-gate 	tg->tg_next = pii->pii_targets;
1931*0Sstevel@tonic-gate 	tg->tg_prev = NULL;
1932*0Sstevel@tonic-gate 	if (tg->tg_next != NULL)
1933*0Sstevel@tonic-gate 		tg->tg_next->tg_prev = tg;
1934*0Sstevel@tonic-gate 	pii->pii_targets = tg;
1935*0Sstevel@tonic-gate }
1936*0Sstevel@tonic-gate 
1937*0Sstevel@tonic-gate /*
1938*0Sstevel@tonic-gate  * Delete a target (unlink and free).
1939*0Sstevel@tonic-gate  */
1940*0Sstevel@tonic-gate void
1941*0Sstevel@tonic-gate target_delete(struct target *tg)
1942*0Sstevel@tonic-gate {
1943*0Sstevel@tonic-gate 	int af;
1944*0Sstevel@tonic-gate 	struct phyint_instance	*pii;
1945*0Sstevel@tonic-gate 	struct phyint_instance	*pii_other;
1946*0Sstevel@tonic-gate 
1947*0Sstevel@tonic-gate 	pii = tg->tg_phyint_inst;
1948*0Sstevel@tonic-gate 	af = pii->pii_af;
1949*0Sstevel@tonic-gate 
1950*0Sstevel@tonic-gate 	if (debug & D_TARGET) {
1951*0Sstevel@tonic-gate 		char abuf[INET6_ADDRSTRLEN];
1952*0Sstevel@tonic-gate 
1953*0Sstevel@tonic-gate 		logdebug("target_delete(%s %s, %s)\n",
1954*0Sstevel@tonic-gate 		    AF_STR(af), pii->pii_name,
1955*0Sstevel@tonic-gate 		    pr_addr(af, tg->tg_address, abuf, sizeof (abuf)));
1956*0Sstevel@tonic-gate 	}
1957*0Sstevel@tonic-gate 
1958*0Sstevel@tonic-gate 	/*
1959*0Sstevel@tonic-gate 	 * Target must be in the list of targets for this phyint
1960*0Sstevel@tonic-gate 	 * instance.
1961*0Sstevel@tonic-gate 	 */
1962*0Sstevel@tonic-gate 	assert(pii->pii_targets == tg || tg->tg_prev != NULL);
1963*0Sstevel@tonic-gate 
1964*0Sstevel@tonic-gate 	/*
1965*0Sstevel@tonic-gate 	 * Reset all references to 'tg' in the probe information
1966*0Sstevel@tonic-gate 	 * for this phyint.
1967*0Sstevel@tonic-gate 	 */
1968*0Sstevel@tonic-gate 	reset_pii_probes(pii, tg);
1969*0Sstevel@tonic-gate 
1970*0Sstevel@tonic-gate 	/*
1971*0Sstevel@tonic-gate 	 * Remove this target from the list of targets of this
1972*0Sstevel@tonic-gate 	 * phyint instance.
1973*0Sstevel@tonic-gate 	 */
1974*0Sstevel@tonic-gate 	if (tg->tg_prev == NULL) {
1975*0Sstevel@tonic-gate 		pii->pii_targets = tg->tg_next;
1976*0Sstevel@tonic-gate 	} else {
1977*0Sstevel@tonic-gate 		tg->tg_prev->tg_next = tg->tg_next;
1978*0Sstevel@tonic-gate 	}
1979*0Sstevel@tonic-gate 
1980*0Sstevel@tonic-gate 	if (tg->tg_next != NULL)
1981*0Sstevel@tonic-gate 		tg->tg_next->tg_prev = tg->tg_prev;
1982*0Sstevel@tonic-gate 
1983*0Sstevel@tonic-gate 	tg->tg_next = NULL;
1984*0Sstevel@tonic-gate 	tg->tg_prev = NULL;
1985*0Sstevel@tonic-gate 
1986*0Sstevel@tonic-gate 	if (tg->tg_status == TG_ACTIVE)
1987*0Sstevel@tonic-gate 		pii->pii_ntargets--;
1988*0Sstevel@tonic-gate 
1989*0Sstevel@tonic-gate 	/*
1990*0Sstevel@tonic-gate 	 * Adjust the next target to probe, if it points to
1991*0Sstevel@tonic-gate 	 * to the currently deleted target.
1992*0Sstevel@tonic-gate 	 */
1993*0Sstevel@tonic-gate 	if (pii->pii_target_next == tg)
1994*0Sstevel@tonic-gate 		pii->pii_target_next = target_first(pii);
1995*0Sstevel@tonic-gate 
1996*0Sstevel@tonic-gate 	if (pii->pii_rtt_target_next == tg)
1997*0Sstevel@tonic-gate 		pii->pii_rtt_target_next = target_first(pii);
1998*0Sstevel@tonic-gate 
1999*0Sstevel@tonic-gate 	free(tg);
2000*0Sstevel@tonic-gate 
2001*0Sstevel@tonic-gate 	/*
2002*0Sstevel@tonic-gate 	 * The number of active targets pii_ntargets == 0 iff
2003*0Sstevel@tonic-gate 	 * the next active target pii->pii_target_next == NULL
2004*0Sstevel@tonic-gate 	 */
2005*0Sstevel@tonic-gate 	if (pii->pii_ntargets != 0) {
2006*0Sstevel@tonic-gate 		assert(pii->pii_target_next != NULL);
2007*0Sstevel@tonic-gate 		assert(pii->pii_rtt_target_next != NULL);
2008*0Sstevel@tonic-gate 		assert(pii->pii_target_next->tg_status == TG_ACTIVE);
2009*0Sstevel@tonic-gate 		assert(pii->pii_rtt_target_next->tg_status == TG_ACTIVE);
2010*0Sstevel@tonic-gate 		return;
2011*0Sstevel@tonic-gate 	}
2012*0Sstevel@tonic-gate 
2013*0Sstevel@tonic-gate 	/* At this point, we don't have any active targets. */
2014*0Sstevel@tonic-gate 	assert(pii->pii_target_next == NULL);
2015*0Sstevel@tonic-gate 	assert(pii->pii_rtt_target_next == NULL);
2016*0Sstevel@tonic-gate 
2017*0Sstevel@tonic-gate 	if (pii->pii_targets_are_routers) {
2018*0Sstevel@tonic-gate 		/*
2019*0Sstevel@tonic-gate 		 * Activate any TG_SLOW or TG_DEAD router targets,
2020*0Sstevel@tonic-gate 		 * since we don't have any other targets
2021*0Sstevel@tonic-gate 		 */
2022*0Sstevel@tonic-gate 		target_activate_all(pii);
2023*0Sstevel@tonic-gate 
2024*0Sstevel@tonic-gate 		if (pii->pii_ntargets != 0) {
2025*0Sstevel@tonic-gate 			assert(pii->pii_target_next != NULL);
2026*0Sstevel@tonic-gate 			assert(pii->pii_rtt_target_next != NULL);
2027*0Sstevel@tonic-gate 			assert(pii->pii_target_next->tg_status == TG_ACTIVE);
2028*0Sstevel@tonic-gate 			assert(pii->pii_rtt_target_next->tg_status ==
2029*0Sstevel@tonic-gate 			    TG_ACTIVE);
2030*0Sstevel@tonic-gate 			return;
2031*0Sstevel@tonic-gate 		}
2032*0Sstevel@tonic-gate 	}
2033*0Sstevel@tonic-gate 
2034*0Sstevel@tonic-gate 	/*
2035*0Sstevel@tonic-gate 	 * If we still don't have any active targets, the list must
2036*0Sstevel@tonic-gate 	 * must be really empty. There aren't even TG_SLOW or TG_DEAD
2037*0Sstevel@tonic-gate 	 * targets. Zero out the probe stats since it will not be
2038*0Sstevel@tonic-gate 	 * relevant any longer.
2039*0Sstevel@tonic-gate 	 */
2040*0Sstevel@tonic-gate 	assert(pii->pii_targets == NULL);
2041*0Sstevel@tonic-gate 	clear_pii_probe_stats(pii);
2042*0Sstevel@tonic-gate 	pii_other = phyint_inst_other(pii);
2043*0Sstevel@tonic-gate 
2044*0Sstevel@tonic-gate 	/*
2045*0Sstevel@tonic-gate 	 * If there are no targets on both instances,
2046*0Sstevel@tonic-gate 	 * go back to PI_NOTARGETS state, since we cannot
2047*0Sstevel@tonic-gate 	 * probe this phyint any more. For more details,
2048*0Sstevel@tonic-gate 	 * please see phyint state diagram in mpd_probe.c.
2049*0Sstevel@tonic-gate 	 */
2050*0Sstevel@tonic-gate 	if (!PROBE_CAPABLE(pii_other))
2051*0Sstevel@tonic-gate 		phyint_chstate(pii->pii_phyint, PI_NOTARGETS);
2052*0Sstevel@tonic-gate }
2053*0Sstevel@tonic-gate 
2054*0Sstevel@tonic-gate /*
2055*0Sstevel@tonic-gate  * Flush the target list of every phyint in the group, if the list
2056*0Sstevel@tonic-gate  * is a host target list. This is called if group failure is suspected.
2057*0Sstevel@tonic-gate  * If all targets have failed, multicast will subsequently discover new
2058*0Sstevel@tonic-gate  * targets. Else it is a group failure.
2059*0Sstevel@tonic-gate  * Note: This function is a no-op if the list is a router target list.
2060*0Sstevel@tonic-gate  */
2061*0Sstevel@tonic-gate static void
2062*0Sstevel@tonic-gate target_flush_hosts(struct phyint_group *pg)
2063*0Sstevel@tonic-gate {
2064*0Sstevel@tonic-gate 	struct phyint *pi;
2065*0Sstevel@tonic-gate 	struct phyint_instance *pii;
2066*0Sstevel@tonic-gate 
2067*0Sstevel@tonic-gate 	if (debug & D_TARGET)
2068*0Sstevel@tonic-gate 		logdebug("target_flush_hosts(%s)\n", pg->pg_name);
2069*0Sstevel@tonic-gate 
2070*0Sstevel@tonic-gate 	for (pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext) {
2071*0Sstevel@tonic-gate 		pii = pi->pi_v4;
2072*0Sstevel@tonic-gate 		if (pii != NULL && !pii->pii_targets_are_routers) {
2073*0Sstevel@tonic-gate 			/*
2074*0Sstevel@tonic-gate 			 * Delete all the targets. When the list becomes
2075*0Sstevel@tonic-gate 			 * empty, target_delete() will set pii->pii_targets
2076*0Sstevel@tonic-gate 			 * to NULL.
2077*0Sstevel@tonic-gate 			 */
2078*0Sstevel@tonic-gate 			while (pii->pii_targets != NULL)
2079*0Sstevel@tonic-gate 				target_delete(pii->pii_targets);
2080*0Sstevel@tonic-gate 		}
2081*0Sstevel@tonic-gate 		pii = pi->pi_v6;
2082*0Sstevel@tonic-gate 		if (pii != NULL && !pii->pii_targets_are_routers) {
2083*0Sstevel@tonic-gate 			/*
2084*0Sstevel@tonic-gate 			 * Delete all the targets. When the list becomes
2085*0Sstevel@tonic-gate 			 * empty, target_delete() will set pii->pii_targets
2086*0Sstevel@tonic-gate 			 * to NULL.
2087*0Sstevel@tonic-gate 			 */
2088*0Sstevel@tonic-gate 			while (pii->pii_targets != NULL)
2089*0Sstevel@tonic-gate 				target_delete(pii->pii_targets);
2090*0Sstevel@tonic-gate 		}
2091*0Sstevel@tonic-gate 	}
2092*0Sstevel@tonic-gate }
2093*0Sstevel@tonic-gate 
2094*0Sstevel@tonic-gate /*
2095*0Sstevel@tonic-gate  * Reset all references to 'target' in the probe info, as this target is
2096*0Sstevel@tonic-gate  * being deleted. The pr_target field is guaranteed to be non-null if
2097*0Sstevel@tonic-gate  * pr_status is PR_UNACKED. So we change the pr_status to PR_LOST, so that
2098*0Sstevel@tonic-gate  * pr_target will not be accessed unconditionally.
2099*0Sstevel@tonic-gate  */
2100*0Sstevel@tonic-gate static void
2101*0Sstevel@tonic-gate reset_pii_probes(struct phyint_instance *pii, struct target *tg)
2102*0Sstevel@tonic-gate {
2103*0Sstevel@tonic-gate 	int i;
2104*0Sstevel@tonic-gate 
2105*0Sstevel@tonic-gate 	for (i = 0; i < PROBE_STATS_COUNT; i++) {
2106*0Sstevel@tonic-gate 		if (pii->pii_probes[i].pr_target == tg) {
2107*0Sstevel@tonic-gate 			pii->pii_probes[i].pr_target = NULL;
2108*0Sstevel@tonic-gate 			if (pii->pii_probes[i].pr_status == PR_UNACKED)
2109*0Sstevel@tonic-gate 				pii->pii_probes[i].pr_status = PR_LOST;
2110*0Sstevel@tonic-gate 		}
2111*0Sstevel@tonic-gate 	}
2112*0Sstevel@tonic-gate 
2113*0Sstevel@tonic-gate }
2114*0Sstevel@tonic-gate 
2115*0Sstevel@tonic-gate /*
2116*0Sstevel@tonic-gate  * Clear the probe statistics array.
2117*0Sstevel@tonic-gate  */
2118*0Sstevel@tonic-gate void
2119*0Sstevel@tonic-gate clear_pii_probe_stats(struct phyint_instance *pii)
2120*0Sstevel@tonic-gate {
2121*0Sstevel@tonic-gate 	bzero(pii->pii_probes, sizeof (struct probe_stats) * PROBE_STATS_COUNT);
2122*0Sstevel@tonic-gate 	/* Reset the next probe index in the probe stats array */
2123*0Sstevel@tonic-gate 	pii->pii_probe_next = 0;
2124*0Sstevel@tonic-gate }
2125*0Sstevel@tonic-gate 
2126*0Sstevel@tonic-gate static void
2127*0Sstevel@tonic-gate target_print(struct target *tg)
2128*0Sstevel@tonic-gate {
2129*0Sstevel@tonic-gate 	char	abuf[INET6_ADDRSTRLEN];
2130*0Sstevel@tonic-gate 	char	buf[128];
2131*0Sstevel@tonic-gate 	char	buf2[128];
2132*0Sstevel@tonic-gate 	int	af;
2133*0Sstevel@tonic-gate 	int	i;
2134*0Sstevel@tonic-gate 
2135*0Sstevel@tonic-gate 	af = tg->tg_phyint_inst->pii_af;
2136*0Sstevel@tonic-gate 
2137*0Sstevel@tonic-gate 	logdebug("Target on %s %s addr %s\n"
2138*0Sstevel@tonic-gate 	    "status %d rtt_sa %d rtt_sd %d crtt %d tg_in_use %d\n",
2139*0Sstevel@tonic-gate 	    AF_STR(af), tg->tg_phyint_inst->pii_name,
2140*0Sstevel@tonic-gate 	    pr_addr(af, tg->tg_address, abuf, sizeof (abuf)),
2141*0Sstevel@tonic-gate 	    tg->tg_status, tg->tg_rtt_sa, tg->tg_rtt_sd,
2142*0Sstevel@tonic-gate 	    tg->tg_crtt, tg->tg_in_use);
2143*0Sstevel@tonic-gate 
2144*0Sstevel@tonic-gate 	buf[0] = '\0';
2145*0Sstevel@tonic-gate 	for (i = 0; i < tg->tg_num_deferred; i++) {
2146*0Sstevel@tonic-gate 		(void) snprintf(buf2, sizeof (buf2), " %dms",
2147*0Sstevel@tonic-gate 		    tg->tg_deferred[i]);
2148*0Sstevel@tonic-gate 		(void) strlcat(buf, buf2, sizeof (buf));
2149*0Sstevel@tonic-gate 	}
2150*0Sstevel@tonic-gate 	logdebug("deferred rtts:%s\n", buf);
2151*0Sstevel@tonic-gate }
2152*0Sstevel@tonic-gate 
2153*0Sstevel@tonic-gate void
2154*0Sstevel@tonic-gate phyint_inst_print_all(void)
2155*0Sstevel@tonic-gate {
2156*0Sstevel@tonic-gate 	struct phyint_instance *pii;
2157*0Sstevel@tonic-gate 
2158*0Sstevel@tonic-gate 	for (pii = phyint_instances; pii != NULL; pii = pii->pii_next) {
2159*0Sstevel@tonic-gate 		phyint_inst_print(pii);
2160*0Sstevel@tonic-gate 	}
2161*0Sstevel@tonic-gate }
2162*0Sstevel@tonic-gate 
2163*0Sstevel@tonic-gate /*
2164*0Sstevel@tonic-gate  * Convert length for a mask to the mask.
2165*0Sstevel@tonic-gate  */
2166*0Sstevel@tonic-gate static void
2167*0Sstevel@tonic-gate ip_index_to_mask_v6(uint_t masklen, struct in6_addr *bitmask)
2168*0Sstevel@tonic-gate {
2169*0Sstevel@tonic-gate 	int	j;
2170*0Sstevel@tonic-gate 
2171*0Sstevel@tonic-gate 	assert(masklen <= IPV6_ABITS);
2172*0Sstevel@tonic-gate 	bzero((char *)bitmask, sizeof (*bitmask));
2173*0Sstevel@tonic-gate 
2174*0Sstevel@tonic-gate 	/* Make the 'masklen' leftmost bits one */
2175*0Sstevel@tonic-gate 	for (j = 0; masklen > 8; masklen -= 8, j++)
2176*0Sstevel@tonic-gate 		bitmask->s6_addr[j] = 0xff;
2177*0Sstevel@tonic-gate 
2178*0Sstevel@tonic-gate 	bitmask->s6_addr[j] = 0xff << (8 - masklen);
2179*0Sstevel@tonic-gate 
2180*0Sstevel@tonic-gate }
2181*0Sstevel@tonic-gate 
2182*0Sstevel@tonic-gate /*
2183*0Sstevel@tonic-gate  * Compare two prefixes that have the same prefix length.
2184*0Sstevel@tonic-gate  * Fails if the prefix length is unreasonable.
2185*0Sstevel@tonic-gate  */
2186*0Sstevel@tonic-gate static boolean_t
2187*0Sstevel@tonic-gate prefix_equal(struct in6_addr p1, struct in6_addr p2, int prefix_len)
2188*0Sstevel@tonic-gate {
2189*0Sstevel@tonic-gate 	uchar_t mask;
2190*0Sstevel@tonic-gate 	int j;
2191*0Sstevel@tonic-gate 
2192*0Sstevel@tonic-gate 	if (prefix_len < 0 || prefix_len > IPV6_ABITS)
2193*0Sstevel@tonic-gate 		return (_B_FALSE);
2194*0Sstevel@tonic-gate 
2195*0Sstevel@tonic-gate 	for (j = 0; prefix_len > 8; prefix_len -= 8, j++)
2196*0Sstevel@tonic-gate 		if (p1.s6_addr[j] != p2.s6_addr[j])
2197*0Sstevel@tonic-gate 			return (_B_FALSE);
2198*0Sstevel@tonic-gate 
2199*0Sstevel@tonic-gate 	/* Make the N leftmost bits one */
2200*0Sstevel@tonic-gate 	mask = 0xff << (8 - prefix_len);
2201*0Sstevel@tonic-gate 	if ((p1.s6_addr[j] & mask) != (p2.s6_addr[j] & mask))
2202*0Sstevel@tonic-gate 		return (_B_FALSE);
2203*0Sstevel@tonic-gate 
2204*0Sstevel@tonic-gate 	return (_B_TRUE);
2205*0Sstevel@tonic-gate }
2206*0Sstevel@tonic-gate 
2207*0Sstevel@tonic-gate /*
2208*0Sstevel@tonic-gate  * Get the number of UP logints (excluding IFF_NOFAILOVERs), on both
2209*0Sstevel@tonic-gate  * IPv4 and IPv6 put together. The phyint with the least such number
2210*0Sstevel@tonic-gate  * will be used as the failover destination, if no standby interface is
2211*0Sstevel@tonic-gate  * available
2212*0Sstevel@tonic-gate  */
2213*0Sstevel@tonic-gate int
2214*0Sstevel@tonic-gate logint_upcount(struct phyint *pi)
2215*0Sstevel@tonic-gate {
2216*0Sstevel@tonic-gate 	struct	logint	*li;
2217*0Sstevel@tonic-gate 	struct	phyint_instance *pii;
2218*0Sstevel@tonic-gate 	int count = 0;
2219*0Sstevel@tonic-gate 
2220*0Sstevel@tonic-gate 	pii = pi->pi_v4;
2221*0Sstevel@tonic-gate 	if (pii != NULL) {
2222*0Sstevel@tonic-gate 		for (li = pii->pii_logint; li != NULL; li = li->li_next) {
2223*0Sstevel@tonic-gate 			if ((li->li_flags &
2224*0Sstevel@tonic-gate 			    (IFF_UP | IFF_NOFAILOVER)) == IFF_UP) {
2225*0Sstevel@tonic-gate 				count++;
2226*0Sstevel@tonic-gate 			}
2227*0Sstevel@tonic-gate 		}
2228*0Sstevel@tonic-gate 	}
2229*0Sstevel@tonic-gate 
2230*0Sstevel@tonic-gate 	pii = pi->pi_v6;
2231*0Sstevel@tonic-gate 	if (pii != NULL) {
2232*0Sstevel@tonic-gate 		for (li = pii->pii_logint; li != NULL; li = li->li_next) {
2233*0Sstevel@tonic-gate 			if ((li->li_flags &
2234*0Sstevel@tonic-gate 			    (IFF_UP | IFF_NOFAILOVER)) == IFF_UP) {
2235*0Sstevel@tonic-gate 				count++;
2236*0Sstevel@tonic-gate 			}
2237*0Sstevel@tonic-gate 		}
2238*0Sstevel@tonic-gate 	}
2239*0Sstevel@tonic-gate 
2240*0Sstevel@tonic-gate 	return (count);
2241*0Sstevel@tonic-gate }
2242*0Sstevel@tonic-gate 
2243*0Sstevel@tonic-gate /*
2244*0Sstevel@tonic-gate  * Get the phyint instance with the other (IPv4 / IPv6) protocol
2245*0Sstevel@tonic-gate  */
2246*0Sstevel@tonic-gate struct phyint_instance *
2247*0Sstevel@tonic-gate phyint_inst_other(struct phyint_instance *pii)
2248*0Sstevel@tonic-gate {
2249*0Sstevel@tonic-gate 	if (pii->pii_af == AF_INET)
2250*0Sstevel@tonic-gate 		return (pii->pii_phyint->pi_v6);
2251*0Sstevel@tonic-gate 	else
2252*0Sstevel@tonic-gate 		return (pii->pii_phyint->pi_v4);
2253*0Sstevel@tonic-gate }
2254*0Sstevel@tonic-gate 
2255*0Sstevel@tonic-gate /*
2256*0Sstevel@tonic-gate  * Post an EC_IPMP sysevent of subclass `subclass' and attributes `nvl'.
2257*0Sstevel@tonic-gate  * Before sending the event, it prepends the current version of the IPMP
2258*0Sstevel@tonic-gate  * sysevent API.  Returns 0 on success, -1 on failure (in either case,
2259*0Sstevel@tonic-gate  * `nvl' is freed).
2260*0Sstevel@tonic-gate  */
2261*0Sstevel@tonic-gate static int
2262*0Sstevel@tonic-gate post_event(const char *subclass, nvlist_t *nvl)
2263*0Sstevel@tonic-gate {
2264*0Sstevel@tonic-gate 	sysevent_id_t eid;
2265*0Sstevel@tonic-gate 
2266*0Sstevel@tonic-gate 	errno = nvlist_add_uint32(nvl, IPMP_EVENT_VERSION,
2267*0Sstevel@tonic-gate 	    IPMP_EVENT_CUR_VERSION);
2268*0Sstevel@tonic-gate 	if (errno != 0) {
2269*0Sstevel@tonic-gate 		logerr("cannot create `%s' event: %s", subclass,
2270*0Sstevel@tonic-gate 		    strerror(errno));
2271*0Sstevel@tonic-gate 		goto failed;
2272*0Sstevel@tonic-gate 	}
2273*0Sstevel@tonic-gate 
2274*0Sstevel@tonic-gate 	if (sysevent_post_event(EC_IPMP, (char *)subclass, SUNW_VENDOR,
2275*0Sstevel@tonic-gate 	    "in.mpathd", nvl, &eid) == -1) {
2276*0Sstevel@tonic-gate 		logerr("cannot send `%s' event: %s\n", subclass,
2277*0Sstevel@tonic-gate 		    strerror(errno));
2278*0Sstevel@tonic-gate 		goto failed;
2279*0Sstevel@tonic-gate 	}
2280*0Sstevel@tonic-gate 
2281*0Sstevel@tonic-gate 	nvlist_free(nvl);
2282*0Sstevel@tonic-gate 	return (0);
2283*0Sstevel@tonic-gate failed:
2284*0Sstevel@tonic-gate 	nvlist_free(nvl);
2285*0Sstevel@tonic-gate 	return (-1);
2286*0Sstevel@tonic-gate }
2287*0Sstevel@tonic-gate 
2288*0Sstevel@tonic-gate /*
2289*0Sstevel@tonic-gate  * Return the external IPMP state associated with phyint `pi'.
2290*0Sstevel@tonic-gate  */
2291*0Sstevel@tonic-gate static ipmp_if_state_t
2292*0Sstevel@tonic-gate ifstate(struct phyint *pi)
2293*0Sstevel@tonic-gate {
2294*0Sstevel@tonic-gate 	switch (pi->pi_state) {
2295*0Sstevel@tonic-gate 	case PI_NOTARGETS:
2296*0Sstevel@tonic-gate 		return (IPMP_IF_UNKNOWN);
2297*0Sstevel@tonic-gate 
2298*0Sstevel@tonic-gate 	case PI_OFFLINE:
2299*0Sstevel@tonic-gate 		return (IPMP_IF_OFFLINE);
2300*0Sstevel@tonic-gate 
2301*0Sstevel@tonic-gate 	case PI_FAILED:
2302*0Sstevel@tonic-gate 		return (IPMP_IF_FAILED);
2303*0Sstevel@tonic-gate 
2304*0Sstevel@tonic-gate 	case PI_RUNNING:
2305*0Sstevel@tonic-gate 		return (IPMP_IF_OK);
2306*0Sstevel@tonic-gate 	}
2307*0Sstevel@tonic-gate 
2308*0Sstevel@tonic-gate 	logerr("ifstate: unknown state %d; aborting\n", pi->pi_state);
2309*0Sstevel@tonic-gate 	abort();
2310*0Sstevel@tonic-gate 	/* NOTREACHED */
2311*0Sstevel@tonic-gate }
2312*0Sstevel@tonic-gate 
2313*0Sstevel@tonic-gate /*
2314*0Sstevel@tonic-gate  * Return the external IPMP interface type associated with phyint `pi'.
2315*0Sstevel@tonic-gate  */
2316*0Sstevel@tonic-gate static ipmp_if_type_t
2317*0Sstevel@tonic-gate iftype(struct phyint *pi)
2318*0Sstevel@tonic-gate {
2319*0Sstevel@tonic-gate 	if (pi->pi_flags & IFF_STANDBY)
2320*0Sstevel@tonic-gate 		return (IPMP_IF_STANDBY);
2321*0Sstevel@tonic-gate 	else
2322*0Sstevel@tonic-gate 		return (IPMP_IF_NORMAL);
2323*0Sstevel@tonic-gate }
2324*0Sstevel@tonic-gate 
2325*0Sstevel@tonic-gate /*
2326*0Sstevel@tonic-gate  * Return the external IPMP group state associated with phyint group `pg'.
2327*0Sstevel@tonic-gate  */
2328*0Sstevel@tonic-gate static ipmp_group_state_t
2329*0Sstevel@tonic-gate groupstate(struct phyint_group *pg)
2330*0Sstevel@tonic-gate {
2331*0Sstevel@tonic-gate 	return (GROUP_FAILED(pg) ? IPMP_GROUP_FAILED : IPMP_GROUP_OK);
2332*0Sstevel@tonic-gate }
2333*0Sstevel@tonic-gate 
2334*0Sstevel@tonic-gate /*
2335*0Sstevel@tonic-gate  * Generate an ESC_IPMP_GROUP_STATE sysevent for phyint group `pg'.
2336*0Sstevel@tonic-gate  * Returns 0 on success, -1 on failure.
2337*0Sstevel@tonic-gate  */
2338*0Sstevel@tonic-gate static int
2339*0Sstevel@tonic-gate phyint_group_state_event(struct phyint_group *pg)
2340*0Sstevel@tonic-gate {
2341*0Sstevel@tonic-gate 	nvlist_t	*nvl;
2342*0Sstevel@tonic-gate 
2343*0Sstevel@tonic-gate 	errno = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0);
2344*0Sstevel@tonic-gate 	if (errno != 0) {
2345*0Sstevel@tonic-gate 		logperror("cannot create `group state change' event");
2346*0Sstevel@tonic-gate 		return (-1);
2347*0Sstevel@tonic-gate 	}
2348*0Sstevel@tonic-gate 
2349*0Sstevel@tonic-gate 	errno = nvlist_add_string(nvl, IPMP_GROUP_NAME, pg->pg_name);
2350*0Sstevel@tonic-gate 	if (errno != 0)
2351*0Sstevel@tonic-gate 		goto failed;
2352*0Sstevel@tonic-gate 
2353*0Sstevel@tonic-gate 	errno = nvlist_add_uint64(nvl, IPMP_GROUP_SIGNATURE, pg->pg_sig);
2354*0Sstevel@tonic-gate 	if (errno != 0)
2355*0Sstevel@tonic-gate 		goto failed;
2356*0Sstevel@tonic-gate 
2357*0Sstevel@tonic-gate 	errno = nvlist_add_uint32(nvl, IPMP_GROUP_STATE, groupstate(pg));
2358*0Sstevel@tonic-gate 	if (errno != 0)
2359*0Sstevel@tonic-gate 		goto failed;
2360*0Sstevel@tonic-gate 
2361*0Sstevel@tonic-gate 	return (post_event(ESC_IPMP_GROUP_STATE, nvl));
2362*0Sstevel@tonic-gate failed:
2363*0Sstevel@tonic-gate 	logperror("cannot create `group state change' event");
2364*0Sstevel@tonic-gate 	nvlist_free(nvl);
2365*0Sstevel@tonic-gate 	return (-1);
2366*0Sstevel@tonic-gate }
2367*0Sstevel@tonic-gate 
2368*0Sstevel@tonic-gate /*
2369*0Sstevel@tonic-gate  * Generate an ESC_IPMP_GROUP_CHANGE sysevent of type `op' for phyint group
2370*0Sstevel@tonic-gate  * `pg'.  Returns 0 on success, -1 on failure.
2371*0Sstevel@tonic-gate  */
2372*0Sstevel@tonic-gate static int
2373*0Sstevel@tonic-gate phyint_group_change_event(struct phyint_group *pg, ipmp_group_op_t op)
2374*0Sstevel@tonic-gate {
2375*0Sstevel@tonic-gate 	nvlist_t *nvl;
2376*0Sstevel@tonic-gate 
2377*0Sstevel@tonic-gate 	errno = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0);
2378*0Sstevel@tonic-gate 	if (errno != 0) {
2379*0Sstevel@tonic-gate 		logperror("cannot create `group change' event");
2380*0Sstevel@tonic-gate 		return (-1);
2381*0Sstevel@tonic-gate 	}
2382*0Sstevel@tonic-gate 
2383*0Sstevel@tonic-gate 	errno = nvlist_add_string(nvl, IPMP_GROUP_NAME, pg->pg_name);
2384*0Sstevel@tonic-gate 	if (errno != 0)
2385*0Sstevel@tonic-gate 		goto failed;
2386*0Sstevel@tonic-gate 
2387*0Sstevel@tonic-gate 	errno = nvlist_add_uint64(nvl, IPMP_GROUP_SIGNATURE, pg->pg_sig);
2388*0Sstevel@tonic-gate 	if (errno != 0)
2389*0Sstevel@tonic-gate 		goto failed;
2390*0Sstevel@tonic-gate 
2391*0Sstevel@tonic-gate 	errno = nvlist_add_uint64(nvl, IPMP_GROUPLIST_SIGNATURE,
2392*0Sstevel@tonic-gate 	    phyint_grouplistsig);
2393*0Sstevel@tonic-gate 	if (errno != 0)
2394*0Sstevel@tonic-gate 		goto failed;
2395*0Sstevel@tonic-gate 
2396*0Sstevel@tonic-gate 	errno = nvlist_add_uint32(nvl, IPMP_GROUP_OPERATION, op);
2397*0Sstevel@tonic-gate 	if (errno != 0)
2398*0Sstevel@tonic-gate 		goto failed;
2399*0Sstevel@tonic-gate 
2400*0Sstevel@tonic-gate 	return (post_event(ESC_IPMP_GROUP_CHANGE, nvl));
2401*0Sstevel@tonic-gate failed:
2402*0Sstevel@tonic-gate 	logperror("cannot create `group change' event");
2403*0Sstevel@tonic-gate 	nvlist_free(nvl);
2404*0Sstevel@tonic-gate 	return (-1);
2405*0Sstevel@tonic-gate }
2406*0Sstevel@tonic-gate 
2407*0Sstevel@tonic-gate /*
2408*0Sstevel@tonic-gate  * Generate an ESC_IPMP_GROUP_MEMBER_CHANGE sysevent for phyint `pi' in
2409*0Sstevel@tonic-gate  * group `pg'.	Returns 0 on success, -1 on failure.
2410*0Sstevel@tonic-gate  */
2411*0Sstevel@tonic-gate static int
2412*0Sstevel@tonic-gate phyint_group_member_event(struct phyint_group *pg, struct phyint *pi,
2413*0Sstevel@tonic-gate     ipmp_if_op_t op)
2414*0Sstevel@tonic-gate {
2415*0Sstevel@tonic-gate 	nvlist_t *nvl;
2416*0Sstevel@tonic-gate 
2417*0Sstevel@tonic-gate 	errno = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0);
2418*0Sstevel@tonic-gate 	if (errno != 0) {
2419*0Sstevel@tonic-gate 		logperror("cannot create `group member change' event");
2420*0Sstevel@tonic-gate 		return (-1);
2421*0Sstevel@tonic-gate 	}
2422*0Sstevel@tonic-gate 
2423*0Sstevel@tonic-gate 	errno = nvlist_add_string(nvl, IPMP_GROUP_NAME, pg->pg_name);
2424*0Sstevel@tonic-gate 	if (errno != 0)
2425*0Sstevel@tonic-gate 		goto failed;
2426*0Sstevel@tonic-gate 
2427*0Sstevel@tonic-gate 	errno = nvlist_add_uint64(nvl, IPMP_GROUP_SIGNATURE, pg->pg_sig);
2428*0Sstevel@tonic-gate 	if (errno != 0)
2429*0Sstevel@tonic-gate 		goto failed;
2430*0Sstevel@tonic-gate 
2431*0Sstevel@tonic-gate 	errno = nvlist_add_uint32(nvl, IPMP_IF_OPERATION, op);
2432*0Sstevel@tonic-gate 	if (errno != 0)
2433*0Sstevel@tonic-gate 		goto failed;
2434*0Sstevel@tonic-gate 
2435*0Sstevel@tonic-gate 	errno = nvlist_add_string(nvl, IPMP_IF_NAME, pi->pi_name);
2436*0Sstevel@tonic-gate 	if (errno != 0)
2437*0Sstevel@tonic-gate 		goto failed;
2438*0Sstevel@tonic-gate 
2439*0Sstevel@tonic-gate 	errno = nvlist_add_uint32(nvl, IPMP_IF_TYPE, iftype(pi));
2440*0Sstevel@tonic-gate 	if (errno != 0)
2441*0Sstevel@tonic-gate 		goto failed;
2442*0Sstevel@tonic-gate 
2443*0Sstevel@tonic-gate 	errno = nvlist_add_uint32(nvl, IPMP_IF_STATE, ifstate(pi));
2444*0Sstevel@tonic-gate 	if (errno != 0)
2445*0Sstevel@tonic-gate 		goto failed;
2446*0Sstevel@tonic-gate 
2447*0Sstevel@tonic-gate 	return (post_event(ESC_IPMP_GROUP_MEMBER_CHANGE, nvl));
2448*0Sstevel@tonic-gate failed:
2449*0Sstevel@tonic-gate 	logperror("cannot create `group member change' event");
2450*0Sstevel@tonic-gate 	nvlist_free(nvl);
2451*0Sstevel@tonic-gate 	return (-1);
2452*0Sstevel@tonic-gate 
2453*0Sstevel@tonic-gate }
2454*0Sstevel@tonic-gate 
2455*0Sstevel@tonic-gate /*
2456*0Sstevel@tonic-gate  * Generate an ESC_IPMP_IF_CHANGE sysevent for phyint `pi' in group `pg'.
2457*0Sstevel@tonic-gate  * Returns 0 on success, -1 on failure.
2458*0Sstevel@tonic-gate  */
2459*0Sstevel@tonic-gate static int
2460*0Sstevel@tonic-gate phyint_state_event(struct phyint_group *pg, struct phyint *pi)
2461*0Sstevel@tonic-gate {
2462*0Sstevel@tonic-gate 	nvlist_t *nvl;
2463*0Sstevel@tonic-gate 
2464*0Sstevel@tonic-gate 	errno = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0);
2465*0Sstevel@tonic-gate 	if (errno != 0) {
2466*0Sstevel@tonic-gate 		logperror("cannot create `interface change' event");
2467*0Sstevel@tonic-gate 		return (-1);
2468*0Sstevel@tonic-gate 	}
2469*0Sstevel@tonic-gate 
2470*0Sstevel@tonic-gate 	errno = nvlist_add_string(nvl, IPMP_GROUP_NAME, pg->pg_name);
2471*0Sstevel@tonic-gate 	if (errno != 0)
2472*0Sstevel@tonic-gate 		goto failed;
2473*0Sstevel@tonic-gate 
2474*0Sstevel@tonic-gate 	errno = nvlist_add_uint64(nvl, IPMP_GROUP_SIGNATURE, pg->pg_sig);
2475*0Sstevel@tonic-gate 	if (errno != 0)
2476*0Sstevel@tonic-gate 		goto failed;
2477*0Sstevel@tonic-gate 
2478*0Sstevel@tonic-gate 	errno = nvlist_add_string(nvl, IPMP_IF_NAME, pi->pi_name);
2479*0Sstevel@tonic-gate 	if (errno != 0)
2480*0Sstevel@tonic-gate 		goto failed;
2481*0Sstevel@tonic-gate 
2482*0Sstevel@tonic-gate 	errno = nvlist_add_uint32(nvl, IPMP_IF_TYPE, iftype(pi));
2483*0Sstevel@tonic-gate 	if (errno != 0)
2484*0Sstevel@tonic-gate 		goto failed;
2485*0Sstevel@tonic-gate 
2486*0Sstevel@tonic-gate 	errno = nvlist_add_uint32(nvl, IPMP_IF_STATE, ifstate(pi));
2487*0Sstevel@tonic-gate 	if (errno != 0)
2488*0Sstevel@tonic-gate 		goto failed;
2489*0Sstevel@tonic-gate 
2490*0Sstevel@tonic-gate 	return (post_event(ESC_IPMP_IF_CHANGE, nvl));
2491*0Sstevel@tonic-gate failed:
2492*0Sstevel@tonic-gate 	logperror("cannot create `interface change' event");
2493*0Sstevel@tonic-gate 	nvlist_free(nvl);
2494*0Sstevel@tonic-gate 	return (-1);
2495*0Sstevel@tonic-gate 
2496*0Sstevel@tonic-gate }
2497*0Sstevel@tonic-gate 
2498*0Sstevel@tonic-gate /*
2499*0Sstevel@tonic-gate  * Generate a signature for use.  The signature is conceptually divided
2500*0Sstevel@tonic-gate  * into two pieces: a random 16-bit "generation number" and a 48-bit
2501*0Sstevel@tonic-gate  * monotonically increasing integer.  The generation number protects
2502*0Sstevel@tonic-gate  * against stale updates to entities (e.g., IPMP groups) that have been
2503*0Sstevel@tonic-gate  * deleted and since recreated.
2504*0Sstevel@tonic-gate  */
2505*0Sstevel@tonic-gate static uint64_t
2506*0Sstevel@tonic-gate gensig(void)
2507*0Sstevel@tonic-gate {
2508*0Sstevel@tonic-gate 	static int seeded = 0;
2509*0Sstevel@tonic-gate 
2510*0Sstevel@tonic-gate 	if (seeded == 0) {
2511*0Sstevel@tonic-gate 		srand48((long)gethrtime());
2512*0Sstevel@tonic-gate 		seeded++;
2513*0Sstevel@tonic-gate 	}
2514*0Sstevel@tonic-gate 
2515*0Sstevel@tonic-gate 	return ((uint64_t)lrand48() << 48 | 1);
2516*0Sstevel@tonic-gate }
2517*0Sstevel@tonic-gate 
2518*0Sstevel@tonic-gate /*
2519*0Sstevel@tonic-gate  * Store the information associated with group `grname' into a dynamically
2520*0Sstevel@tonic-gate  * allocated structure pointed to by `*grinfopp'.  Returns an IPMP error code.
2521*0Sstevel@tonic-gate  */
2522*0Sstevel@tonic-gate unsigned int
2523*0Sstevel@tonic-gate getgroupinfo(const char *grname, ipmp_groupinfo_t **grinfopp)
2524*0Sstevel@tonic-gate {
2525*0Sstevel@tonic-gate 	struct phyint_group	*pg;
2526*0Sstevel@tonic-gate 	struct phyint		*pi;
2527*0Sstevel@tonic-gate 	char			(*ifs)[LIFNAMSIZ];
2528*0Sstevel@tonic-gate 	unsigned int		nif, i;
2529*0Sstevel@tonic-gate 
2530*0Sstevel@tonic-gate 	pg = phyint_group_lookup(grname);
2531*0Sstevel@tonic-gate 	if (pg == NULL)
2532*0Sstevel@tonic-gate 		return (IPMP_EUNKGROUP);
2533*0Sstevel@tonic-gate 
2534*0Sstevel@tonic-gate 	/*
2535*0Sstevel@tonic-gate 	 * Tally up the number of interfaces, allocate an array to hold them,
2536*0Sstevel@tonic-gate 	 * and insert their names into the array.
2537*0Sstevel@tonic-gate 	 */
2538*0Sstevel@tonic-gate 	for (nif = 0, pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext)
2539*0Sstevel@tonic-gate 		nif++;
2540*0Sstevel@tonic-gate 
2541*0Sstevel@tonic-gate 	ifs = alloca(nif * sizeof (*ifs));
2542*0Sstevel@tonic-gate 	for (i = 0, pi = pg->pg_phyint; pi != NULL; pi = pi->pi_pgnext, i++) {
2543*0Sstevel@tonic-gate 		assert(i < nif);
2544*0Sstevel@tonic-gate 		(void) strlcpy(ifs[i], pi->pi_name, LIFNAMSIZ);
2545*0Sstevel@tonic-gate 	}
2546*0Sstevel@tonic-gate 	assert(i == nif);
2547*0Sstevel@tonic-gate 
2548*0Sstevel@tonic-gate 	*grinfopp = ipmp_groupinfo_create(pg->pg_name, pg->pg_sig,
2549*0Sstevel@tonic-gate 	    groupstate(pg), nif, ifs);
2550*0Sstevel@tonic-gate 	return (*grinfopp == NULL ? IPMP_ENOMEM : IPMP_SUCCESS);
2551*0Sstevel@tonic-gate }
2552*0Sstevel@tonic-gate 
2553*0Sstevel@tonic-gate /*
2554*0Sstevel@tonic-gate  * Store the information associated with interface `ifname' into a dynamically
2555*0Sstevel@tonic-gate  * allocated structure pointed to by `*ifinfopp'.  Returns an IPMP error code.
2556*0Sstevel@tonic-gate  */
2557*0Sstevel@tonic-gate unsigned int
2558*0Sstevel@tonic-gate getifinfo(const char *ifname, ipmp_ifinfo_t **ifinfopp)
2559*0Sstevel@tonic-gate {
2560*0Sstevel@tonic-gate 	struct phyint	*pi;
2561*0Sstevel@tonic-gate 
2562*0Sstevel@tonic-gate 	pi = phyint_lookup(ifname);
2563*0Sstevel@tonic-gate 	if (pi == NULL)
2564*0Sstevel@tonic-gate 		return (IPMP_EUNKIF);
2565*0Sstevel@tonic-gate 
2566*0Sstevel@tonic-gate 	*ifinfopp = ipmp_ifinfo_create(pi->pi_name, pi->pi_group->pg_name,
2567*0Sstevel@tonic-gate 	    ifstate(pi), iftype(pi));
2568*0Sstevel@tonic-gate 	return (*ifinfopp == NULL ? IPMP_ENOMEM : IPMP_SUCCESS);
2569*0Sstevel@tonic-gate }
2570*0Sstevel@tonic-gate 
2571*0Sstevel@tonic-gate /*
2572*0Sstevel@tonic-gate  * Store the current list of IPMP groups into a dynamically allocated
2573*0Sstevel@tonic-gate  * structure pointed to by `*grlistpp'.	 Returns an IPMP error code.
2574*0Sstevel@tonic-gate  */
2575*0Sstevel@tonic-gate unsigned int
2576*0Sstevel@tonic-gate getgrouplist(ipmp_grouplist_t **grlistpp)
2577*0Sstevel@tonic-gate {
2578*0Sstevel@tonic-gate 	struct phyint_group	*pg;
2579*0Sstevel@tonic-gate 	char			(*groups)[LIFGRNAMSIZ];
2580*0Sstevel@tonic-gate 	unsigned int		i, ngroup;
2581*0Sstevel@tonic-gate 
2582*0Sstevel@tonic-gate 	/*
2583*0Sstevel@tonic-gate 	 * Tally up the number of groups, allocate an array to hold them, and
2584*0Sstevel@tonic-gate 	 * insert their names into the array.
2585*0Sstevel@tonic-gate 	 */
2586*0Sstevel@tonic-gate 	for (ngroup = 0, pg = phyint_groups; pg != NULL; pg = pg->pg_next)
2587*0Sstevel@tonic-gate 		ngroup++;
2588*0Sstevel@tonic-gate 
2589*0Sstevel@tonic-gate 	groups = alloca(ngroup * sizeof (*groups));
2590*0Sstevel@tonic-gate 	for (i = 0, pg = phyint_groups; pg != NULL; pg = pg->pg_next, i++) {
2591*0Sstevel@tonic-gate 		assert(i < ngroup);
2592*0Sstevel@tonic-gate 		(void) strlcpy(groups[i], pg->pg_name, LIFGRNAMSIZ);
2593*0Sstevel@tonic-gate 	}
2594*0Sstevel@tonic-gate 	assert(i == ngroup);
2595*0Sstevel@tonic-gate 
2596*0Sstevel@tonic-gate 	*grlistpp = ipmp_grouplist_create(phyint_grouplistsig, ngroup, groups);
2597*0Sstevel@tonic-gate 	return (*grlistpp == NULL ? IPMP_ENOMEM : IPMP_SUCCESS);
2598*0Sstevel@tonic-gate }
2599*0Sstevel@tonic-gate 
2600*0Sstevel@tonic-gate /*
2601*0Sstevel@tonic-gate  * Store a snapshot of the IPMP subsystem into a dynamically allocated
2602*0Sstevel@tonic-gate  * structure pointed to by `*snapp'.  Returns an IPMP error code.
2603*0Sstevel@tonic-gate  */
2604*0Sstevel@tonic-gate unsigned int
2605*0Sstevel@tonic-gate getsnap(ipmp_snap_t **snapp)
2606*0Sstevel@tonic-gate {
2607*0Sstevel@tonic-gate 	ipmp_grouplist_t	*grlistp;
2608*0Sstevel@tonic-gate 	ipmp_groupinfo_t	*grinfop;
2609*0Sstevel@tonic-gate 	ipmp_ifinfo_t		*ifinfop;
2610*0Sstevel@tonic-gate 	ipmp_snap_t		*snap;
2611*0Sstevel@tonic-gate 	struct phyint		*pi;
2612*0Sstevel@tonic-gate 	unsigned int		i;
2613*0Sstevel@tonic-gate 	int			retval;
2614*0Sstevel@tonic-gate 
2615*0Sstevel@tonic-gate 	snap = ipmp_snap_create();
2616*0Sstevel@tonic-gate 	if (snap == NULL)
2617*0Sstevel@tonic-gate 		return (IPMP_ENOMEM);
2618*0Sstevel@tonic-gate 
2619*0Sstevel@tonic-gate 	/*
2620*0Sstevel@tonic-gate 	 * Add group list.
2621*0Sstevel@tonic-gate 	 */
2622*0Sstevel@tonic-gate 	retval = getgrouplist(&snap->sn_grlistp);
2623*0Sstevel@tonic-gate 	if (retval != IPMP_SUCCESS) {
2624*0Sstevel@tonic-gate 		ipmp_snap_free(snap);
2625*0Sstevel@tonic-gate 		return (retval);
2626*0Sstevel@tonic-gate 	}
2627*0Sstevel@tonic-gate 
2628*0Sstevel@tonic-gate 	/*
2629*0Sstevel@tonic-gate 	 * Add information for each group in the list.
2630*0Sstevel@tonic-gate 	 */
2631*0Sstevel@tonic-gate 	grlistp = snap->sn_grlistp;
2632*0Sstevel@tonic-gate 	for (i = 0; i < grlistp->gl_ngroup; i++) {
2633*0Sstevel@tonic-gate 		retval = getgroupinfo(grlistp->gl_groups[i], &grinfop);
2634*0Sstevel@tonic-gate 		if (retval != IPMP_SUCCESS) {
2635*0Sstevel@tonic-gate 			ipmp_snap_free(snap);
2636*0Sstevel@tonic-gate 			return (retval);
2637*0Sstevel@tonic-gate 		}
2638*0Sstevel@tonic-gate 		retval = ipmp_snap_addgroupinfo(snap, grinfop);
2639*0Sstevel@tonic-gate 		if (retval != IPMP_SUCCESS) {
2640*0Sstevel@tonic-gate 			ipmp_freegroupinfo(grinfop);
2641*0Sstevel@tonic-gate 			ipmp_snap_free(snap);
2642*0Sstevel@tonic-gate 			return (retval);
2643*0Sstevel@tonic-gate 		}
2644*0Sstevel@tonic-gate 	}
2645*0Sstevel@tonic-gate 
2646*0Sstevel@tonic-gate 	/*
2647*0Sstevel@tonic-gate 	 * Add information for each configured phyint.
2648*0Sstevel@tonic-gate 	 */
2649*0Sstevel@tonic-gate 	for (pi = phyints; pi != NULL; pi = pi->pi_next) {
2650*0Sstevel@tonic-gate 		retval = getifinfo(pi->pi_name, &ifinfop);
2651*0Sstevel@tonic-gate 		if (retval != IPMP_SUCCESS) {
2652*0Sstevel@tonic-gate 			ipmp_snap_free(snap);
2653*0Sstevel@tonic-gate 			return (retval);
2654*0Sstevel@tonic-gate 		}
2655*0Sstevel@tonic-gate 		retval = ipmp_snap_addifinfo(snap, ifinfop);
2656*0Sstevel@tonic-gate 		if (retval != IPMP_SUCCESS) {
2657*0Sstevel@tonic-gate 			ipmp_freeifinfo(ifinfop);
2658*0Sstevel@tonic-gate 			ipmp_snap_free(snap);
2659*0Sstevel@tonic-gate 			return (retval);
2660*0Sstevel@tonic-gate 		}
2661*0Sstevel@tonic-gate 	}
2662*0Sstevel@tonic-gate 
2663*0Sstevel@tonic-gate 	*snapp = snap;
2664*0Sstevel@tonic-gate 	return (IPMP_SUCCESS);
2665*0Sstevel@tonic-gate }
2666