xref: /openbsd-src/sys/netinet/ip_carp.c (revision 99fd087599a8791921855f21bd7e36130f39aadc)
1 /*	$OpenBSD: ip_carp.c,v 1.342 2019/11/08 07:51:41 dlg Exp $	*/
2 
3 /*
4  * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
5  * Copyright (c) 2003 Ryan McBride. All rights reserved.
6  * Copyright (c) 2006-2008 Marco Pfatschbacher. All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
21  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
25  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
26  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
27  * THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 /*
31  * TODO:
32  *	- iface reconfigure
33  *	- support for hardware checksum calculations;
34  *
35  */
36 
37 #include "ether.h"
38 
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/mbuf.h>
42 #include <sys/socket.h>
43 #include <sys/socketvar.h>
44 #include <sys/timeout.h>
45 #include <sys/ioctl.h>
46 #include <sys/errno.h>
47 #include <sys/device.h>
48 #include <sys/kernel.h>
49 #include <sys/sysctl.h>
50 #include <sys/syslog.h>
51 #include <sys/refcnt.h>
52 
53 #include <net/if.h>
54 #include <net/if_var.h>
55 #include <net/if_types.h>
56 #include <net/netisr.h>
57 
58 #include <crypto/sha1.h>
59 
60 #include <netinet/in.h>
61 #include <netinet/in_var.h>
62 #include <netinet/ip.h>
63 #include <netinet/ip_var.h>
64 #include <netinet/if_ether.h>
65 #include <netinet/ip_ipsp.h>
66 
67 #include <net/if_dl.h>
68 
69 #ifdef INET6
70 #include <netinet6/in6_var.h>
71 #include <netinet/icmp6.h>
72 #include <netinet/ip6.h>
73 #include <netinet6/ip6_var.h>
74 #include <netinet6/nd6.h>
75 #include <netinet6/in6_ifattach.h>
76 #endif
77 
78 #include "bpfilter.h"
79 #if NBPFILTER > 0
80 #include <net/bpf.h>
81 #endif
82 
83 #include "vlan.h"
84 #if NVLAN > 0
85 #include <net/if_vlan_var.h>
86 #endif
87 
88 #include <netinet/ip_carp.h>
89 
90 struct carp_mc_entry {
91 	LIST_ENTRY(carp_mc_entry)	mc_entries;
92 	union {
93 		struct ether_multi	*mcu_enm;
94 	} mc_u;
95 	struct sockaddr_storage		mc_addr;
96 };
97 #define	mc_enm	mc_u.mcu_enm
98 
99 enum { HMAC_ORIG=0, HMAC_NOV6LL=1, HMAC_MAX=2 };
100 
101 struct carp_vhost_entry {
102 	SRPL_ENTRY(carp_vhost_entry) vhost_entries;
103 	struct refcnt vhost_refcnt;
104 
105 	struct carp_softc *parent_sc;
106 	int vhe_leader;
107 	int vhid;
108 	int advskew;
109 	enum { INIT = 0, BACKUP, MASTER }	state;
110 	struct timeout ad_tmo;	/* advertisement timeout */
111 	struct timeout md_tmo;	/* master down timeout */
112 	struct timeout md6_tmo;	/* master down timeout */
113 
114 	u_int64_t vhe_replay_cookie;
115 
116 	/* authentication */
117 #define CARP_HMAC_PAD	64
118 	unsigned char vhe_pad[CARP_HMAC_PAD];
119 	SHA1_CTX vhe_sha1[HMAC_MAX];
120 
121 	u_int8_t vhe_enaddr[ETHER_ADDR_LEN];
122 };
123 
124 void	carp_vh_ref(void *, void *);
125 void	carp_vh_unref(void *, void *);
126 
127 struct srpl_rc carp_vh_rc =
128     SRPL_RC_INITIALIZER(carp_vh_ref, carp_vh_unref, NULL);
129 
130 struct carp_softc {
131 	struct arpcom sc_ac;
132 #define	sc_if		sc_ac.ac_if
133 #define	sc_carpdev	sc_ac.ac_if.if_carpdev
134 	struct task sc_atask;
135 	struct task sc_ltask;
136 	struct task sc_dtask;
137 	struct ip_moptions sc_imo;
138 #ifdef INET6
139 	struct ip6_moptions sc_im6o;
140 #endif /* INET6 */
141 
142 	SRPL_ENTRY(carp_softc) sc_list;
143 	struct refcnt sc_refcnt;
144 
145 	int sc_suppress;
146 	int sc_bow_out;
147 	int sc_demote_cnt;
148 
149 	int sc_sendad_errors;
150 #define CARP_SENDAD_MAX_ERRORS(sc) (3 * (sc)->sc_vhe_count)
151 	int sc_sendad_success;
152 #define CARP_SENDAD_MIN_SUCCESS(sc) (3 * (sc)->sc_vhe_count)
153 
154 	char sc_curlladdr[ETHER_ADDR_LEN];
155 
156 	SRPL_HEAD(, carp_vhost_entry) carp_vhosts;
157 	int sc_vhe_count;
158 	u_int8_t sc_vhids[CARP_MAXNODES];
159 	u_int8_t sc_advskews[CARP_MAXNODES];
160 	u_int8_t sc_balancing;
161 
162 	int sc_naddrs;
163 	int sc_naddrs6;
164 	int sc_advbase;		/* seconds */
165 
166 	/* authentication */
167 	unsigned char sc_key[CARP_KEY_LEN];
168 
169 	u_int32_t sc_hashkey[2];
170 	u_int32_t sc_lsmask;		/* load sharing mask */
171 	int sc_lscount;			/* # load sharing interfaces (max 32) */
172 	int sc_delayed_arp;		/* delayed ARP request countdown */
173 	int sc_realmac;			/* using real mac */
174 
175 	struct in_addr sc_peer;
176 
177 	LIST_HEAD(__carp_mchead, carp_mc_entry)	carp_mc_listhead;
178 	struct carp_vhost_entry *cur_vhe; /* current active vhe */
179 };
180 
181 void	carp_sc_ref(void *, void *);
182 void	carp_sc_unref(void *, void *);
183 
184 struct srpl_rc carp_sc_rc =
185     SRPL_RC_INITIALIZER(carp_sc_ref, carp_sc_unref, NULL);
186 
187 int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, LOG_CRIT };	/* XXX for now */
188 struct cpumem *carpcounters;
189 
190 int	carp_send_all_recur = 0;
191 
192 #define	CARP_LOG(l, sc, s)						\
193 	do {								\
194 		if (carp_opts[CARPCTL_LOG] >= l) {			\
195 			if (sc)						\
196 				log(l, "%s: ",				\
197 				    (sc)->sc_if.if_xname);		\
198 			else						\
199 				log(l, "carp: ");			\
200 			addlog s;					\
201 			addlog("\n");					\
202 		}							\
203 	} while (0)
204 
205 void	carp_hmac_prepare(struct carp_softc *);
206 void	carp_hmac_prepare_ctx(struct carp_vhost_entry *, u_int8_t);
207 void	carp_hmac_generate(struct carp_vhost_entry *, u_int32_t *,
208 	    unsigned char *, u_int8_t);
209 int	carp_hmac_verify(struct carp_vhost_entry *, u_int32_t *,
210 	    unsigned char *);
211 int	carp_input(struct ifnet *, struct mbuf *, void *);
212 void	carp_proto_input_c(struct ifnet *, struct mbuf *,
213 	    struct carp_header *, int, sa_family_t);
214 int	carp_proto_input_if(struct ifnet *, struct mbuf **, int *, int);
215 #ifdef INET6
216 int	carp6_proto_input_if(struct ifnet *, struct mbuf **, int *, int);
217 #endif
218 void	carpattach(int);
219 void	carpdetach(void *);
220 void	carp_prepare_ad(struct mbuf *, struct carp_vhost_entry *,
221 	    struct carp_header *);
222 void	carp_send_ad_all(void);
223 void	carp_vhe_send_ad_all(struct carp_softc *);
224 void	carp_timer_ad(void *);
225 void	carp_send_ad(struct carp_vhost_entry *);
226 void	carp_send_arp(struct carp_softc *);
227 void	carp_timer_down(void *);
228 void	carp_master_down(struct carp_vhost_entry *);
229 int	carp_ioctl(struct ifnet *, u_long, caddr_t);
230 int	carp_vhids_ioctl(struct carp_softc *, struct carpreq *);
231 int	carp_check_dup_vhids(struct carp_softc *, struct srpl *,
232 	    struct carpreq *);
233 void	carp_ifgroup_ioctl(struct ifnet *, u_long, caddr_t);
234 void	carp_ifgattr_ioctl(struct ifnet *, u_long, caddr_t);
235 void	carp_start(struct ifnet *);
236 void	carp_setrun_all(struct carp_softc *, sa_family_t);
237 void	carp_setrun(struct carp_vhost_entry *, sa_family_t);
238 void	carp_set_state_all(struct carp_softc *, int);
239 void	carp_set_state(struct carp_vhost_entry *, int);
240 void	carp_multicast_cleanup(struct carp_softc *);
241 int	carp_set_ifp(struct carp_softc *, struct ifnet *);
242 void	carp_set_enaddr(struct carp_softc *);
243 void	carp_set_vhe_enaddr(struct carp_vhost_entry *);
244 void	carp_addr_updated(void *);
245 int	carp_set_addr(struct carp_softc *, struct sockaddr_in *);
246 int	carp_join_multicast(struct carp_softc *);
247 #ifdef INET6
248 void	carp_send_na(struct carp_softc *);
249 int	carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *);
250 int	carp_join_multicast6(struct carp_softc *);
251 #endif
252 int	carp_clone_create(struct if_clone *, int);
253 int	carp_clone_destroy(struct ifnet *);
254 int	carp_ether_addmulti(struct carp_softc *, struct ifreq *);
255 int	carp_ether_delmulti(struct carp_softc *, struct ifreq *);
256 void	carp_ether_purgemulti(struct carp_softc *);
257 int	carp_group_demote_count(struct carp_softc *);
258 void	carp_update_lsmask(struct carp_softc *);
259 int	carp_new_vhost(struct carp_softc *, int, int);
260 void	carp_destroy_vhosts(struct carp_softc *);
261 void	carp_del_all_timeouts(struct carp_softc *);
262 int	carp_vhe_match(struct carp_softc *, uint8_t *);
263 
264 struct if_clone carp_cloner =
265     IF_CLONE_INITIALIZER("carp", carp_clone_create, carp_clone_destroy);
266 
267 #define carp_cksum(_m, _l)	((u_int16_t)in_cksum((_m), (_l)))
268 #define CARP_IFQ_PRIO	6
269 
270 void
271 carp_hmac_prepare(struct carp_softc *sc)
272 {
273 	struct carp_vhost_entry *vhe;
274 	u_int8_t i;
275 
276 	KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
277 
278 	SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
279 		for (i = 0; i < HMAC_MAX; i++) {
280 			carp_hmac_prepare_ctx(vhe, i);
281 		}
282 	}
283 }
284 
285 void
286 carp_hmac_prepare_ctx(struct carp_vhost_entry *vhe, u_int8_t ctx)
287 {
288 	struct carp_softc *sc = vhe->parent_sc;
289 
290 	u_int8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
291 	u_int8_t vhid = vhe->vhid & 0xff;
292 	SHA1_CTX sha1ctx;
293 	u_int32_t kmd[5];
294 	struct ifaddr *ifa;
295 	int i, found;
296 	struct in_addr last, cur, in;
297 #ifdef INET6
298 	struct in6_addr last6, cur6, in6;
299 #endif /* INET6 */
300 
301 	/* compute ipad from key */
302 	memset(vhe->vhe_pad, 0, sizeof(vhe->vhe_pad));
303 	bcopy(sc->sc_key, vhe->vhe_pad, sizeof(sc->sc_key));
304 	for (i = 0; i < sizeof(vhe->vhe_pad); i++)
305 		vhe->vhe_pad[i] ^= 0x36;
306 
307 	/* precompute first part of inner hash */
308 	SHA1Init(&vhe->vhe_sha1[ctx]);
309 	SHA1Update(&vhe->vhe_sha1[ctx], vhe->vhe_pad, sizeof(vhe->vhe_pad));
310 	SHA1Update(&vhe->vhe_sha1[ctx], (void *)&version, sizeof(version));
311 	SHA1Update(&vhe->vhe_sha1[ctx], (void *)&type, sizeof(type));
312 
313 	/* generate a key for the arpbalance hash, before the vhid is hashed */
314 	if (vhe->vhe_leader) {
315 		bcopy(&vhe->vhe_sha1[ctx], &sha1ctx, sizeof(sha1ctx));
316 		SHA1Final((unsigned char *)kmd, &sha1ctx);
317 		sc->sc_hashkey[0] = kmd[0] ^ kmd[1];
318 		sc->sc_hashkey[1] = kmd[2] ^ kmd[3];
319 	}
320 
321 	/* the rest of the precomputation */
322 	if (!sc->sc_realmac && vhe->vhe_leader &&
323 	    memcmp(sc->sc_ac.ac_enaddr, vhe->vhe_enaddr, ETHER_ADDR_LEN) != 0)
324 		SHA1Update(&vhe->vhe_sha1[ctx], sc->sc_ac.ac_enaddr,
325 		    ETHER_ADDR_LEN);
326 
327 	SHA1Update(&vhe->vhe_sha1[ctx], (void *)&vhid, sizeof(vhid));
328 
329 	/* Hash the addresses from smallest to largest, not interface order */
330 	cur.s_addr = 0;
331 	do {
332 		found = 0;
333 		last = cur;
334 		cur.s_addr = 0xffffffff;
335 		TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
336 			if (ifa->ifa_addr->sa_family != AF_INET)
337 				continue;
338 			in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
339 			if (ntohl(in.s_addr) > ntohl(last.s_addr) &&
340 			    ntohl(in.s_addr) < ntohl(cur.s_addr)) {
341 				cur.s_addr = in.s_addr;
342 				found++;
343 			}
344 		}
345 		if (found)
346 			SHA1Update(&vhe->vhe_sha1[ctx],
347 			    (void *)&cur, sizeof(cur));
348 	} while (found);
349 #ifdef INET6
350 	memset(&cur6, 0x00, sizeof(cur6));
351 	do {
352 		found = 0;
353 		last6 = cur6;
354 		memset(&cur6, 0xff, sizeof(cur6));
355 		TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
356 			if (ifa->ifa_addr->sa_family != AF_INET6)
357 				continue;
358 			in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
359 			if (IN6_IS_SCOPE_EMBED(&in6)) {
360 				if (ctx == HMAC_NOV6LL)
361 					continue;
362 				in6.s6_addr16[1] = 0;
363 			}
364 			if (memcmp(&in6, &last6, sizeof(in6)) > 0 &&
365 			    memcmp(&in6, &cur6, sizeof(in6)) < 0) {
366 				cur6 = in6;
367 				found++;
368 			}
369 		}
370 		if (found)
371 			SHA1Update(&vhe->vhe_sha1[ctx],
372 			    (void *)&cur6, sizeof(cur6));
373 	} while (found);
374 #endif /* INET6 */
375 
376 	/* convert ipad to opad */
377 	for (i = 0; i < sizeof(vhe->vhe_pad); i++)
378 		vhe->vhe_pad[i] ^= 0x36 ^ 0x5c;
379 }
380 
381 void
382 carp_hmac_generate(struct carp_vhost_entry *vhe, u_int32_t counter[2],
383     unsigned char md[20], u_int8_t ctx)
384 {
385 	SHA1_CTX sha1ctx;
386 
387 	/* fetch first half of inner hash */
388 	bcopy(&vhe->vhe_sha1[ctx], &sha1ctx, sizeof(sha1ctx));
389 
390 	SHA1Update(&sha1ctx, (void *)counter, sizeof(vhe->vhe_replay_cookie));
391 	SHA1Final(md, &sha1ctx);
392 
393 	/* outer hash */
394 	SHA1Init(&sha1ctx);
395 	SHA1Update(&sha1ctx, vhe->vhe_pad, sizeof(vhe->vhe_pad));
396 	SHA1Update(&sha1ctx, md, 20);
397 	SHA1Final(md, &sha1ctx);
398 }
399 
400 int
401 carp_hmac_verify(struct carp_vhost_entry *vhe, u_int32_t counter[2],
402     unsigned char md[20])
403 {
404 	unsigned char md2[20];
405 	u_int8_t i;
406 
407 	for (i = 0; i < HMAC_MAX; i++) {
408 		carp_hmac_generate(vhe, counter, md2, i);
409 		if (!timingsafe_bcmp(md, md2, sizeof(md2)))
410 			return (0);
411 	}
412 	return (1);
413 }
414 
415 int
416 carp_proto_input(struct mbuf **mp, int *offp, int proto, int af)
417 {
418 	struct ifnet *ifp;
419 
420 	ifp = if_get((*mp)->m_pkthdr.ph_ifidx);
421 	if (ifp == NULL) {
422 		m_freemp(mp);
423 		return IPPROTO_DONE;
424 	}
425 
426 	proto = carp_proto_input_if(ifp, mp, offp, proto);
427 	if_put(ifp);
428 	return proto;
429 }
430 
431 /*
432  * process input packet.
433  * we have rearranged checks order compared to the rfc,
434  * but it seems more efficient this way or not possible otherwise.
435  */
436 int
437 carp_proto_input_if(struct ifnet *ifp, struct mbuf **mp, int *offp, int proto)
438 {
439 	struct mbuf *m = *mp;
440 	struct ip *ip = mtod(m, struct ip *);
441 	struct carp_softc *sc = NULL;
442 	struct carp_header *ch;
443 	int iplen, len, ismulti;
444 
445 	carpstat_inc(carps_ipackets);
446 
447 	if (!carp_opts[CARPCTL_ALLOW]) {
448 		m_freem(m);
449 		return IPPROTO_DONE;
450 	}
451 
452 	ismulti = IN_MULTICAST(ip->ip_dst.s_addr);
453 
454 	/* check if received on a valid carp interface */
455 	switch (ifp->if_type) {
456 	case IFT_CARP:
457 		break;
458 	case IFT_ETHER:
459 		if (ismulti || !SRPL_EMPTY_LOCKED(&ifp->if_carp))
460 			break;
461 		/* FALLTHROUGH */
462 	default:
463 		carpstat_inc(carps_badif);
464 		CARP_LOG(LOG_INFO, sc,
465 		    ("packet received on non-carp interface: %s",
466 		     ifp->if_xname));
467 		m_freem(m);
468 		return IPPROTO_DONE;
469 	}
470 
471 	/* verify that the IP TTL is 255.  */
472 	if (ip->ip_ttl != CARP_DFLTTL) {
473 		carpstat_inc(carps_badttl);
474 		CARP_LOG(LOG_NOTICE, sc, ("received ttl %d != %d on %s",
475 		    ip->ip_ttl, CARP_DFLTTL, ifp->if_xname));
476 		m_freem(m);
477 		return IPPROTO_DONE;
478 	}
479 
480 	/*
481 	 * verify that the received packet length is
482 	 * equal to the CARP header
483 	 */
484 	iplen = ip->ip_hl << 2;
485 	len = iplen + sizeof(*ch);
486 	if (len > m->m_pkthdr.len) {
487 		carpstat_inc(carps_badlen);
488 		CARP_LOG(LOG_INFO, sc, ("packet too short %d on %s",
489 		    m->m_pkthdr.len, ifp->if_xname));
490 		m_freem(m);
491 		return IPPROTO_DONE;
492 	}
493 
494 	if ((m = *mp = m_pullup(m, len)) == NULL) {
495 		carpstat_inc(carps_hdrops);
496 		return IPPROTO_DONE;
497 	}
498 	ip = mtod(m, struct ip *);
499 	ch = (struct carp_header *)(mtod(m, caddr_t) + iplen);
500 
501 	/* verify the CARP checksum */
502 	m->m_data += iplen;
503 	if (carp_cksum(m, len - iplen)) {
504 		carpstat_inc(carps_badsum);
505 		CARP_LOG(LOG_INFO, sc, ("checksum failed on %s",
506 		    ifp->if_xname));
507 		m_freem(m);
508 		return IPPROTO_DONE;
509 	}
510 	m->m_data -= iplen;
511 
512 	KERNEL_LOCK();
513 	carp_proto_input_c(ifp, m, ch, ismulti, AF_INET);
514 	KERNEL_UNLOCK();
515 	return IPPROTO_DONE;
516 }
517 
518 #ifdef INET6
519 int
520 carp6_proto_input(struct mbuf **mp, int *offp, int proto, int af)
521 {
522 	struct ifnet *ifp;
523 
524 	ifp = if_get((*mp)->m_pkthdr.ph_ifidx);
525 	if (ifp == NULL) {
526 		m_freemp(mp);
527 		return IPPROTO_DONE;
528 	}
529 
530 	proto = carp6_proto_input_if(ifp, mp, offp, proto);
531 	if_put(ifp);
532 	return proto;
533 }
534 
535 int
536 carp6_proto_input_if(struct ifnet *ifp, struct mbuf **mp, int *offp, int proto)
537 {
538 	struct mbuf *m = *mp;
539 	struct carp_softc *sc = NULL;
540 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
541 	struct carp_header *ch;
542 	u_int len;
543 
544 	carpstat_inc(carps_ipackets6);
545 
546 	if (!carp_opts[CARPCTL_ALLOW]) {
547 		m_freem(m);
548 		return IPPROTO_DONE;
549 	}
550 
551 	/* check if received on a valid carp interface */
552 	if (ifp->if_type != IFT_CARP) {
553 		carpstat_inc(carps_badif);
554 		CARP_LOG(LOG_INFO, sc, ("packet received on non-carp interface: %s",
555 		    ifp->if_xname));
556 		m_freem(m);
557 		return IPPROTO_DONE;
558 	}
559 
560 	/* verify that the IP TTL is 255 */
561 	if (ip6->ip6_hlim != CARP_DFLTTL) {
562 		carpstat_inc(carps_badttl);
563 		CARP_LOG(LOG_NOTICE, sc, ("received ttl %d != %d on %s",
564 		    ip6->ip6_hlim, CARP_DFLTTL, ifp->if_xname));
565 		m_freem(m);
566 		return IPPROTO_DONE;
567 	}
568 
569 	/* verify that we have a complete carp packet */
570 	len = m->m_len;
571 	if ((m = *mp = m_pullup(m, *offp + sizeof(*ch))) == NULL) {
572 		carpstat_inc(carps_badlen);
573 		CARP_LOG(LOG_INFO, sc, ("packet size %u too small", len));
574 		return IPPROTO_DONE;
575 	}
576 	ch = (struct carp_header *)(mtod(m, caddr_t) + *offp);
577 
578 	/* verify the CARP checksum */
579 	m->m_data += *offp;
580 	if (carp_cksum(m, sizeof(*ch))) {
581 		carpstat_inc(carps_badsum);
582 		CARP_LOG(LOG_INFO, sc, ("checksum failed, on %s",
583 		    ifp->if_xname));
584 		m_freem(m);
585 		return IPPROTO_DONE;
586 	}
587 	m->m_data -= *offp;
588 
589 	KERNEL_LOCK();
590 	carp_proto_input_c(ifp, m, ch, 1, AF_INET6);
591 	KERNEL_UNLOCK();
592 	return IPPROTO_DONE;
593 }
594 #endif /* INET6 */
595 
596 void
597 carp_proto_input_c(struct ifnet *ifp, struct mbuf *m, struct carp_header *ch,
598     int ismulti, sa_family_t af)
599 {
600 	struct carp_softc *sc;
601 	struct carp_vhost_entry *vhe;
602 	struct timeval sc_tv, ch_tv;
603 	struct srpl *cif;
604 
605 	KERNEL_ASSERT_LOCKED(); /* touching if_carp + carp_vhosts */
606 
607 	if (ifp->if_type == IFT_CARP) {
608 		/*
609 		 * If the parent of this carp(4) got destroyed while
610 		 * `m' was being processed, silently drop it.
611 		 */
612 		if (ifp->if_carpdev == NULL) {
613 			m_freem(m);
614 			return;
615 		}
616 		cif = &ifp->if_carpdev->if_carp;
617 	} else
618 		cif = &ifp->if_carp;
619 
620 	SRPL_FOREACH_LOCKED(sc, cif, sc_list) {
621 		if (af == AF_INET &&
622 		    ismulti != IN_MULTICAST(sc->sc_peer.s_addr))
623 			continue;
624 		SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
625 			if (vhe->vhid == ch->carp_vhid)
626 				goto found;
627 		}
628 	}
629  found:
630 
631 	if (!sc || (sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) !=
632 	    (IFF_UP|IFF_RUNNING)) {
633 		carpstat_inc(carps_badvhid);
634 		m_freem(m);
635 		return;
636 	}
637 
638 	getmicrotime(&sc->sc_if.if_lastchange);
639 	sc->sc_if.if_ipackets++;
640 	sc->sc_if.if_ibytes += m->m_pkthdr.len;
641 
642 	/* verify the CARP version. */
643 	if (ch->carp_version != CARP_VERSION) {
644 		carpstat_inc(carps_badver);
645 		sc->sc_if.if_ierrors++;
646 		CARP_LOG(LOG_NOTICE, sc, ("invalid version %d != %d",
647 		    ch->carp_version, CARP_VERSION));
648 		m_freem(m);
649 		return;
650 	}
651 
652 	/* verify the hash */
653 	if (carp_hmac_verify(vhe, ch->carp_counter, ch->carp_md)) {
654 		carpstat_inc(carps_badauth);
655 		sc->sc_if.if_ierrors++;
656 		CARP_LOG(LOG_INFO, sc, ("incorrect hash"));
657 		m_freem(m);
658 		return;
659 	}
660 
661 	if (!memcmp(&vhe->vhe_replay_cookie, ch->carp_counter,
662 	    sizeof(ch->carp_counter))) {
663 		/* Do not log duplicates from non simplex interfaces */
664 		if (sc->sc_carpdev->if_flags & IFF_SIMPLEX) {
665 			carpstat_inc(carps_badauth);
666 			sc->sc_if.if_ierrors++;
667 			CARP_LOG(LOG_WARNING, sc,
668 			    ("replay or network loop detected"));
669 		}
670 		m_freem(m);
671 		return;
672 	}
673 
674 	sc_tv.tv_sec = sc->sc_advbase;
675 	sc_tv.tv_usec = vhe->advskew * 1000000 / 256;
676 	ch_tv.tv_sec = ch->carp_advbase;
677 	ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
678 
679 	switch (vhe->state) {
680 	case INIT:
681 		break;
682 	case MASTER:
683 		/*
684 		 * If we receive an advertisement from a master who's going to
685 		 * be more frequent than us, and whose demote count is not higher
686 		 * than ours, go into BACKUP state. If his demote count is lower,
687 		 * also go into BACKUP.
688 		 */
689 		if (((timercmp(&sc_tv, &ch_tv, >) ||
690 		    timercmp(&sc_tv, &ch_tv, ==)) &&
691 		    (ch->carp_demote <= carp_group_demote_count(sc))) ||
692 		    ch->carp_demote < carp_group_demote_count(sc)) {
693 			timeout_del(&vhe->ad_tmo);
694 			carp_set_state(vhe, BACKUP);
695 			carp_setrun(vhe, 0);
696 		}
697 		break;
698 	case BACKUP:
699 		/*
700 		 * If we're pre-empting masters who advertise slower than us,
701 		 * and do not have a better demote count, treat them as down.
702 		 *
703 		 */
704 		if (carp_opts[CARPCTL_PREEMPT] &&
705 		    timercmp(&sc_tv, &ch_tv, <) &&
706 		    ch->carp_demote >= carp_group_demote_count(sc)) {
707 			carp_master_down(vhe);
708 			break;
709 		}
710 
711 		/*
712 		 * Take over masters advertising with a higher demote count,
713 		 * regardless of CARPCTL_PREEMPT.
714 		 */
715 		if (ch->carp_demote > carp_group_demote_count(sc)) {
716 			carp_master_down(vhe);
717 			break;
718 		}
719 
720 		/*
721 		 *  If the master is going to advertise at such a low frequency
722 		 *  that he's guaranteed to time out, we'd might as well just
723 		 *  treat him as timed out now.
724 		 */
725 		sc_tv.tv_sec = sc->sc_advbase * 3;
726 		if (sc->sc_advbase && timercmp(&sc_tv, &ch_tv, <)) {
727 			carp_master_down(vhe);
728 			break;
729 		}
730 
731 		/*
732 		 * Otherwise, we reset the counter and wait for the next
733 		 * advertisement.
734 		 */
735 		carp_setrun(vhe, af);
736 		break;
737 	}
738 
739 	m_freem(m);
740 	return;
741 }
742 
743 int
744 carp_sysctl_carpstat(void *oldp, size_t *oldlenp, void *newp)
745 {
746 	struct carpstats carpstat;
747 
748 	CTASSERT(sizeof(carpstat) == (carps_ncounters * sizeof(uint64_t)));
749 	memset(&carpstat, 0, sizeof carpstat);
750 	counters_read(carpcounters, (uint64_t *)&carpstat, carps_ncounters);
751 	return (sysctl_rdstruct(oldp, oldlenp, newp,
752 	    &carpstat, sizeof(carpstat)));
753 }
754 
755 int
756 carp_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
757     size_t newlen)
758 {
759 	int error;
760 
761 	/* All sysctl names at this level are terminal. */
762 	if (namelen != 1)
763 		return (ENOTDIR);
764 
765 	switch (name[0]) {
766 	case CARPCTL_STATS:
767 		return (carp_sysctl_carpstat(oldp, oldlenp, newp));
768 	default:
769 		if (name[0] <= 0 || name[0] >= CARPCTL_MAXID)
770 			return (ENOPROTOOPT);
771 		NET_LOCK();
772 		error = sysctl_int(oldp, oldlenp, newp, newlen,
773 		    &carp_opts[name[0]]);
774 		NET_UNLOCK();
775 		return (error);
776 	}
777 }
778 
779 /*
780  * Interface side of the CARP implementation.
781  */
782 
783 /* ARGSUSED */
784 void
785 carpattach(int n)
786 {
787 	struct ifg_group	*ifg;
788 
789 	if ((ifg = if_creategroup("carp")) != NULL)
790 		ifg->ifg_refcnt++;	/* keep around even if empty */
791 	if_clone_attach(&carp_cloner);
792 	carpcounters = counters_alloc(carps_ncounters);
793 }
794 
795 int
796 carp_clone_create(struct if_clone *ifc, int unit)
797 {
798 	struct carp_softc *sc;
799 	struct ifnet *ifp;
800 
801 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
802 	refcnt_init(&sc->sc_refcnt);
803 
804 	SRPL_INIT(&sc->carp_vhosts);
805 	sc->sc_vhe_count = 0;
806 	if (carp_new_vhost(sc, 0, 0)) {
807 		free(sc, M_DEVBUF, sizeof(*sc));
808 		return (ENOMEM);
809 	}
810 
811 	task_set(&sc->sc_atask, carp_addr_updated, sc);
812 	task_set(&sc->sc_ltask, carp_carpdev_state, sc);
813 	task_set(&sc->sc_dtask, carpdetach, sc);
814 
815 	sc->sc_suppress = 0;
816 	sc->sc_advbase = CARP_DFLTINTV;
817 	sc->sc_naddrs = sc->sc_naddrs6 = 0;
818 #ifdef INET6
819 	sc->sc_im6o.im6o_hlim = CARP_DFLTTL;
820 #endif /* INET6 */
821 	sc->sc_imo.imo_membership = mallocarray(IP_MIN_MEMBERSHIPS,
822 	    sizeof(struct in_multi *), M_IPMOPTS, M_WAITOK|M_ZERO);
823 	sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
824 
825 	LIST_INIT(&sc->carp_mc_listhead);
826 	ifp = &sc->sc_if;
827 	ifp->if_softc = sc;
828 	snprintf(ifp->if_xname, sizeof ifp->if_xname, "%s%d", ifc->ifc_name,
829 	    unit);
830 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
831 	ifp->if_ioctl = carp_ioctl;
832 	ifp->if_start = carp_start;
833 	ifp->if_xflags = IFXF_CLONED;
834 	IFQ_SET_MAXLEN(&ifp->if_snd, 1);
835 	if_counters_alloc(ifp);
836 	if_attach(ifp);
837 	ether_ifattach(ifp);
838 	ifp->if_type = IFT_CARP;
839 	ifp->if_sadl->sdl_type = IFT_CARP;
840 	ifp->if_output = carp_output;
841 	ifp->if_priority = IF_CARP_DEFAULT_PRIORITY;
842 	ifp->if_link_state = LINK_STATE_INVALID;
843 
844 	/* Hook carp_addr_updated to cope with address and route changes. */
845 	if_addrhook_add(&sc->sc_if, &sc->sc_atask);
846 
847 	return (0);
848 }
849 
850 int
851 carp_new_vhost(struct carp_softc *sc, int vhid, int advskew)
852 {
853 	struct carp_vhost_entry *vhe, *vhe0;
854 
855 	vhe = malloc(sizeof(*vhe), M_DEVBUF, M_NOWAIT | M_ZERO);
856 	if (vhe == NULL)
857 		return (ENOMEM);
858 
859 	refcnt_init(&vhe->vhost_refcnt);
860 	carp_sc_ref(NULL, sc); /* give a sc ref to the vhe */
861 	vhe->parent_sc = sc;
862 	vhe->vhid = vhid;
863 	vhe->advskew = advskew;
864 	vhe->state = INIT;
865 	timeout_set_proc(&vhe->ad_tmo, carp_timer_ad, vhe);
866 	timeout_set_proc(&vhe->md_tmo, carp_timer_down, vhe);
867 	timeout_set_proc(&vhe->md6_tmo, carp_timer_down, vhe);
868 
869 	KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
870 
871 	/* mark the first vhe as leader */
872 	if (SRPL_EMPTY_LOCKED(&sc->carp_vhosts)) {
873 		vhe->vhe_leader = 1;
874 		SRPL_INSERT_HEAD_LOCKED(&carp_vh_rc, &sc->carp_vhosts,
875 		    vhe, vhost_entries);
876 		sc->sc_vhe_count = 1;
877 		return (0);
878 	}
879 
880 	SRPL_FOREACH_LOCKED(vhe0, &sc->carp_vhosts, vhost_entries) {
881 		if (SRPL_NEXT_LOCKED(vhe0, vhost_entries) == NULL)
882 			break;
883 	}
884 
885 	SRPL_INSERT_AFTER_LOCKED(&carp_vh_rc, vhe0, vhe, vhost_entries);
886 	sc->sc_vhe_count++;
887 
888 	return (0);
889 }
890 
891 int
892 carp_clone_destroy(struct ifnet *ifp)
893 {
894 	struct carp_softc *sc = ifp->if_softc;
895 
896 	if_addrhook_del(&sc->sc_if, &sc->sc_atask);
897 
898 	NET_LOCK();
899 	carpdetach(sc);
900 	NET_UNLOCK();
901 
902 	ether_ifdetach(ifp);
903 	if_detach(ifp);
904 	carp_destroy_vhosts(ifp->if_softc);
905 	refcnt_finalize(&sc->sc_refcnt, "carpdtor");
906 	free(sc->sc_imo.imo_membership, M_IPMOPTS,
907 	    sc->sc_imo.imo_max_memberships * sizeof(struct in_multi *));
908 	free(sc, M_DEVBUF, sizeof(*sc));
909 	return (0);
910 }
911 
912 void
913 carp_del_all_timeouts(struct carp_softc *sc)
914 {
915 	struct carp_vhost_entry *vhe;
916 
917 	KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
918 	SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
919 		timeout_del(&vhe->ad_tmo);
920 		timeout_del(&vhe->md_tmo);
921 		timeout_del(&vhe->md6_tmo);
922 	}
923 }
924 
925 void
926 carpdetach(void *arg)
927 {
928 	struct carp_softc *sc = arg;
929 	struct ifnet *ifp0;
930 	struct srpl *cif;
931 
932 	carp_del_all_timeouts(sc);
933 
934 	if (sc->sc_demote_cnt)
935 		carp_group_demote_adj(&sc->sc_if, -sc->sc_demote_cnt, "detach");
936 	sc->sc_suppress = 0;
937 	sc->sc_sendad_errors = 0;
938 
939 	carp_set_state_all(sc, INIT);
940 	sc->sc_if.if_flags &= ~IFF_UP;
941 	carp_setrun_all(sc, 0);
942 	carp_multicast_cleanup(sc);
943 
944 	ifp0 = sc->sc_carpdev;
945 	if (ifp0 == NULL)
946 		return;
947 
948 	KERNEL_ASSERT_LOCKED(); /* touching if_carp */
949 
950 	cif = &ifp0->if_carp;
951 
952 	/* Restore previous input handler. */
953 	if_ih_remove(ifp0, carp_input, NULL);
954 
955 	SRPL_REMOVE_LOCKED(&carp_sc_rc, cif, sc, carp_softc, sc_list);
956 	sc->sc_carpdev = NULL;
957 
958 	if_linkstatehook_del(ifp0, &sc->sc_ltask);
959 	if_detachhook_del(ifp0, &sc->sc_dtask);
960 	ifpromisc(ifp0, 0);
961 }
962 
963 void
964 carp_destroy_vhosts(struct carp_softc *sc)
965 {
966 	/* XXX bow out? */
967 	struct carp_vhost_entry *vhe;
968 
969 	KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
970 
971 	while ((vhe = SRPL_FIRST_LOCKED(&sc->carp_vhosts)) != NULL) {
972 		SRPL_REMOVE_LOCKED(&carp_vh_rc, &sc->carp_vhosts, vhe,
973 		    carp_vhost_entry, vhost_entries);
974 		carp_vh_unref(NULL, vhe); /* drop last ref */
975 	}
976 	sc->sc_vhe_count = 0;
977 }
978 
979 void
980 carp_prepare_ad(struct mbuf *m, struct carp_vhost_entry *vhe,
981     struct carp_header *ch)
982 {
983 	if (!vhe->vhe_replay_cookie) {
984 		arc4random_buf(&vhe->vhe_replay_cookie,
985 		    sizeof(vhe->vhe_replay_cookie));
986 	}
987 
988 	bcopy(&vhe->vhe_replay_cookie, ch->carp_counter,
989 	    sizeof(ch->carp_counter));
990 
991 	/*
992 	 * For the time being, do not include the IPv6 linklayer addresses
993 	 * in the HMAC.
994 	 */
995 	carp_hmac_generate(vhe, ch->carp_counter, ch->carp_md, HMAC_NOV6LL);
996 }
997 
998 void
999 carp_send_ad_all(void)
1000 {
1001 	struct ifnet *ifp0;
1002 	struct srpl *cif;
1003 	struct carp_softc *vh;
1004 
1005 	KERNEL_ASSERT_LOCKED(); /* touching if_carp */
1006 
1007 	if (carp_send_all_recur > 0)
1008 		return;
1009 	++carp_send_all_recur;
1010 	TAILQ_FOREACH(ifp0, &ifnet, if_list) {
1011 		if (ifp0->if_type != IFT_ETHER)
1012 			continue;
1013 
1014 		cif = &ifp0->if_carp;
1015 		SRPL_FOREACH_LOCKED(vh, cif, sc_list) {
1016 			if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
1017 			    (IFF_UP|IFF_RUNNING)) {
1018 				carp_vhe_send_ad_all(vh);
1019 			}
1020 		}
1021 	}
1022 	--carp_send_all_recur;
1023 }
1024 
1025 void
1026 carp_vhe_send_ad_all(struct carp_softc *sc)
1027 {
1028 	struct carp_vhost_entry *vhe;
1029 
1030 	KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
1031 
1032 	SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
1033 		if (vhe->state == MASTER)
1034 			carp_send_ad(vhe);
1035 	}
1036 }
1037 
1038 void
1039 carp_timer_ad(void *v)
1040 {
1041 	NET_LOCK();
1042 	carp_send_ad(v);
1043 	NET_UNLOCK();
1044 }
1045 
1046 void
1047 carp_send_ad(struct carp_vhost_entry *vhe)
1048 {
1049 	struct carp_header ch;
1050 	struct timeval tv;
1051 	struct carp_softc *sc = vhe->parent_sc;
1052 	struct carp_header *ch_ptr;
1053 	struct mbuf *m;
1054 	int error, len, advbase, advskew;
1055 	struct ifaddr *ifa;
1056 	struct sockaddr sa;
1057 
1058 	NET_ASSERT_LOCKED();
1059 
1060 	if (sc->sc_carpdev == NULL) {
1061 		sc->sc_if.if_oerrors++;
1062 		return;
1063 	}
1064 
1065 	/* bow out if we've gone to backup (the carp interface is going down) */
1066 	if (sc->sc_bow_out) {
1067 		advbase = 255;
1068 		advskew = 255;
1069 	} else {
1070 		advbase = sc->sc_advbase;
1071 		advskew = vhe->advskew;
1072 		tv.tv_sec = advbase;
1073 		if (advbase == 0 && advskew == 0)
1074 			tv.tv_usec = 1 * 1000000 / 256;
1075 		else
1076 			tv.tv_usec = advskew * 1000000 / 256;
1077 	}
1078 
1079 	ch.carp_version = CARP_VERSION;
1080 	ch.carp_type = CARP_ADVERTISEMENT;
1081 	ch.carp_vhid = vhe->vhid;
1082 	ch.carp_demote = carp_group_demote_count(sc) & 0xff;
1083 	ch.carp_advbase = advbase;
1084 	ch.carp_advskew = advskew;
1085 	ch.carp_authlen = 7;	/* XXX DEFINE */
1086 	ch.carp_cksum = 0;
1087 
1088 	sc->cur_vhe = vhe; /* we need the vhe later on the output path */
1089 
1090 	if (sc->sc_naddrs) {
1091 		struct ip *ip;
1092 
1093 		MGETHDR(m, M_DONTWAIT, MT_HEADER);
1094 		if (m == NULL) {
1095 			sc->sc_if.if_oerrors++;
1096 			carpstat_inc(carps_onomem);
1097 			/* XXX maybe less ? */
1098 			goto retry_later;
1099 		}
1100 		len = sizeof(*ip) + sizeof(ch);
1101 		m->m_pkthdr.pf.prio = CARP_IFQ_PRIO;
1102 		m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain;
1103 		m->m_pkthdr.len = len;
1104 		m->m_len = len;
1105 		m_align(m, len);
1106 		ip = mtod(m, struct ip *);
1107 		ip->ip_v = IPVERSION;
1108 		ip->ip_hl = sizeof(*ip) >> 2;
1109 		ip->ip_tos = IPTOS_LOWDELAY;
1110 		ip->ip_len = htons(len);
1111 		ip->ip_id = htons(ip_randomid());
1112 		ip->ip_off = htons(IP_DF);
1113 		ip->ip_ttl = CARP_DFLTTL;
1114 		ip->ip_p = IPPROTO_CARP;
1115 		ip->ip_sum = 0;
1116 
1117 		memset(&sa, 0, sizeof(sa));
1118 		sa.sa_family = AF_INET;
1119 		/* Prefer addresses on the parent interface as source for AD. */
1120 		ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
1121 		if (ifa == NULL)
1122 			ifa = ifaof_ifpforaddr(&sa, &sc->sc_if);
1123 		KASSERT(ifa != NULL);
1124 		ip->ip_src.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
1125 		ip->ip_dst.s_addr = sc->sc_peer.s_addr;
1126 		if (IN_MULTICAST(ip->ip_dst.s_addr))
1127 			m->m_flags |= M_MCAST;
1128 
1129 		ch_ptr = (struct carp_header *)(ip + 1);
1130 		bcopy(&ch, ch_ptr, sizeof(ch));
1131 		carp_prepare_ad(m, vhe, ch_ptr);
1132 
1133 		m->m_data += sizeof(*ip);
1134 		ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip));
1135 		m->m_data -= sizeof(*ip);
1136 
1137 		getmicrotime(&sc->sc_if.if_lastchange);
1138 		sc->sc_if.if_opackets++;
1139 		sc->sc_if.if_obytes += len;
1140 		carpstat_inc(carps_opackets);
1141 
1142 		error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo,
1143 		    NULL, 0);
1144 		if (error) {
1145 			if (error == ENOBUFS)
1146 				carpstat_inc(carps_onomem);
1147 			else
1148 				CARP_LOG(LOG_WARNING, sc,
1149 				    ("ip_output failed: %d", error));
1150 			sc->sc_if.if_oerrors++;
1151 			if (sc->sc_sendad_errors < INT_MAX)
1152 				sc->sc_sendad_errors++;
1153 			if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS(sc))
1154 				carp_group_demote_adj(&sc->sc_if, 1,
1155 				    "> snderrors");
1156 			sc->sc_sendad_success = 0;
1157 		} else {
1158 			if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS(sc)) {
1159 				if (++sc->sc_sendad_success >=
1160 				    CARP_SENDAD_MIN_SUCCESS(sc)) {
1161 					carp_group_demote_adj(&sc->sc_if, -1,
1162 					    "< snderrors");
1163 					sc->sc_sendad_errors = 0;
1164 				}
1165 			} else
1166 				sc->sc_sendad_errors = 0;
1167 		}
1168 		if (vhe->vhe_leader) {
1169 			if (sc->sc_delayed_arp > 0)
1170 				sc->sc_delayed_arp--;
1171 			if (sc->sc_delayed_arp == 0) {
1172 				carp_send_arp(sc);
1173 				sc->sc_delayed_arp = -1;
1174 			}
1175 		}
1176 	}
1177 #ifdef INET6
1178 	if (sc->sc_naddrs6) {
1179 		struct ip6_hdr *ip6;
1180 
1181 		MGETHDR(m, M_DONTWAIT, MT_HEADER);
1182 		if (m == NULL) {
1183 			sc->sc_if.if_oerrors++;
1184 			carpstat_inc(carps_onomem);
1185 			/* XXX maybe less ? */
1186 			goto retry_later;
1187 		}
1188 		len = sizeof(*ip6) + sizeof(ch);
1189 		m->m_pkthdr.pf.prio = CARP_IFQ_PRIO;
1190 		m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain;
1191 		m->m_pkthdr.len = len;
1192 		m->m_len = len;
1193 		m_align(m, len);
1194 		m->m_flags |= M_MCAST;
1195 		ip6 = mtod(m, struct ip6_hdr *);
1196 		memset(ip6, 0, sizeof(*ip6));
1197 		ip6->ip6_vfc |= IPV6_VERSION;
1198 		ip6->ip6_hlim = CARP_DFLTTL;
1199 		ip6->ip6_nxt = IPPROTO_CARP;
1200 
1201 		/* set the source address */
1202 		memset(&sa, 0, sizeof(sa));
1203 		sa.sa_family = AF_INET6;
1204 		/* Prefer addresses on the parent interface as source for AD. */
1205 		ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
1206 		if (ifa == NULL)
1207 			ifa = ifaof_ifpforaddr(&sa, &sc->sc_if);
1208 		KASSERT(ifa != NULL);
1209 		bcopy(ifatoia6(ifa)->ia_addr.sin6_addr.s6_addr,
1210 		    &ip6->ip6_src, sizeof(struct in6_addr));
1211 		/* set the multicast destination */
1212 
1213 		ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
1214 		ip6->ip6_dst.s6_addr16[1] = htons(sc->sc_carpdev->if_index);
1215 		ip6->ip6_dst.s6_addr8[15] = 0x12;
1216 
1217 		ch_ptr = (struct carp_header *)(ip6 + 1);
1218 		bcopy(&ch, ch_ptr, sizeof(ch));
1219 		carp_prepare_ad(m, vhe, ch_ptr);
1220 
1221 		m->m_data += sizeof(*ip6);
1222 		ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip6));
1223 		m->m_data -= sizeof(*ip6);
1224 
1225 		getmicrotime(&sc->sc_if.if_lastchange);
1226 		sc->sc_if.if_opackets++;
1227 		sc->sc_if.if_obytes += len;
1228 		carpstat_inc(carps_opackets6);
1229 
1230 		error = ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL);
1231 		if (error) {
1232 			if (error == ENOBUFS)
1233 				carpstat_inc(carps_onomem);
1234 			else
1235 				CARP_LOG(LOG_WARNING, sc,
1236 				    ("ip6_output failed: %d", error));
1237 			sc->sc_if.if_oerrors++;
1238 			if (sc->sc_sendad_errors < INT_MAX)
1239 				sc->sc_sendad_errors++;
1240 			if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS(sc))
1241 				carp_group_demote_adj(&sc->sc_if, 1,
1242 					    "> snd6errors");
1243 			sc->sc_sendad_success = 0;
1244 		} else {
1245 			if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS(sc)) {
1246 				if (++sc->sc_sendad_success >=
1247 				    CARP_SENDAD_MIN_SUCCESS(sc)) {
1248 					carp_group_demote_adj(&sc->sc_if, -1,
1249 					    "< snd6errors");
1250 					sc->sc_sendad_errors = 0;
1251 				}
1252 			} else
1253 				sc->sc_sendad_errors = 0;
1254 		}
1255 	}
1256 #endif /* INET6 */
1257 
1258 retry_later:
1259 	sc->cur_vhe = NULL;
1260 	if (advbase != 255 || advskew != 255)
1261 		timeout_add_tv(&vhe->ad_tmo, &tv);
1262 }
1263 
1264 /*
1265  * Broadcast a gratuitous ARP request containing
1266  * the virtual router MAC address for each IP address
1267  * associated with the virtual router.
1268  */
1269 void
1270 carp_send_arp(struct carp_softc *sc)
1271 {
1272 	struct ifaddr *ifa;
1273 	in_addr_t in;
1274 
1275 	TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
1276 
1277 		if (ifa->ifa_addr->sa_family != AF_INET)
1278 			continue;
1279 
1280 		in = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
1281 		arprequest(&sc->sc_if, &in, &in, sc->sc_ac.ac_enaddr);
1282 	}
1283 }
1284 
1285 #ifdef INET6
1286 void
1287 carp_send_na(struct carp_softc *sc)
1288 {
1289 	struct ifaddr *ifa;
1290 	struct in6_addr *in6;
1291 	static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
1292 
1293 	TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
1294 
1295 		if (ifa->ifa_addr->sa_family != AF_INET6)
1296 			continue;
1297 
1298 		in6 = &ifatoia6(ifa)->ia_addr.sin6_addr;
1299 		nd6_na_output(&sc->sc_if, &mcast, in6,
1300 		    ND_NA_FLAG_OVERRIDE |
1301 		    (ip6_forwarding ? ND_NA_FLAG_ROUTER : 0), 1, NULL);
1302 	}
1303 }
1304 #endif /* INET6 */
1305 
1306 void
1307 carp_update_lsmask(struct carp_softc *sc)
1308 {
1309 	struct carp_vhost_entry *vhe;
1310 	int count;
1311 
1312 	if (sc->sc_balancing == CARP_BAL_NONE)
1313 		return;
1314 
1315 	sc->sc_lsmask = 0;
1316 	count = 0;
1317 
1318 	KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
1319 	SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
1320 		if (vhe->state == MASTER && count < sizeof(sc->sc_lsmask) * 8)
1321 			sc->sc_lsmask |= 1 << count;
1322 		count++;
1323 	}
1324 	sc->sc_lscount = count;
1325 	CARP_LOG(LOG_DEBUG, sc, ("carp_update_lsmask: %x", sc->sc_lsmask));
1326 }
1327 
1328 int
1329 carp_iamatch(struct ifnet *ifp)
1330 {
1331 	struct carp_softc *sc = ifp->if_softc;
1332 	struct carp_vhost_entry *vhe;
1333 	struct srp_ref sr;
1334 	int match = 0;
1335 
1336 	vhe = SRPL_FIRST(&sr, &sc->carp_vhosts);
1337 	if (vhe->state == MASTER)
1338 		match = 1;
1339 	SRPL_LEAVE(&sr);
1340 
1341 	return (match);
1342 }
1343 
1344 int
1345 carp_ourether(struct ifnet *ifp, uint8_t *ena)
1346 {
1347 	struct srpl *cif = &ifp->if_carp;
1348 	struct carp_softc *sc;
1349 	struct srp_ref sr;
1350 	int match = 0;
1351 
1352 	KASSERT(ifp->if_type == IFT_ETHER);
1353 
1354 	SRPL_FOREACH(sc, &sr, cif, sc_list) {
1355 		if ((sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) !=
1356 		    (IFF_UP|IFF_RUNNING))
1357 			continue;
1358 		if (carp_vhe_match(sc, ena)) {
1359 			match = 1;
1360 			break;
1361 		}
1362 	}
1363 	SRPL_LEAVE(&sr);
1364 
1365 	return (match);
1366 }
1367 
1368 int
1369 carp_vhe_match(struct carp_softc *sc, uint8_t *ena)
1370 {
1371 	struct carp_vhost_entry *vhe;
1372 	struct srp_ref sr;
1373 	int match = 0;
1374 
1375 	vhe = SRPL_FIRST(&sr, &sc->carp_vhosts);
1376 	match = (vhe->state == MASTER || sc->sc_balancing >= CARP_BAL_IP) &&
1377 	    !memcmp(ena, sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN);
1378 	SRPL_LEAVE(&sr);
1379 
1380 	return (match);
1381 }
1382 
1383 int
1384 carp_input(struct ifnet *ifp0, struct mbuf *m, void *cookie)
1385 {
1386 	struct ether_header *eh;
1387 	struct srpl *cif;
1388 	struct carp_softc *sc;
1389 	struct srp_ref sr;
1390 
1391 #if NVLAN > 0
1392 	/*
1393 	 * If the underlying interface removed the VLAN header itself,
1394 	 * it's not for us.
1395 	 */
1396 	if (ISSET(m->m_flags, M_VLANTAG))
1397 		return (0);
1398 #endif
1399 
1400 	eh = mtod(m, struct ether_header *);
1401 	cif = &ifp0->if_carp;
1402 
1403 	SRPL_FOREACH(sc, &sr, cif, sc_list) {
1404 		if ((sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) !=
1405 		    (IFF_UP|IFF_RUNNING))
1406 			continue;
1407 
1408 		if (carp_vhe_match(sc, eh->ether_dhost)) {
1409 			/*
1410 			 * These packets look like layer 2 multicast but they
1411 			 * are unicast at layer 3. With help of the tag the
1412 			 * mbuf's M_MCAST flag can be removed by carp_lsdrop()
1413 			 * after we have passed layer 2.
1414 			 */
1415 			if (sc->sc_balancing == CARP_BAL_IP) {
1416 				struct m_tag *mtag;
1417 				mtag = m_tag_get(PACKET_TAG_CARP_BAL_IP, 0,
1418 				    M_NOWAIT);
1419 				if (mtag == NULL) {
1420 					m_freem(m);
1421 					goto out;
1422 				}
1423 				m_tag_prepend(m, mtag);
1424 			}
1425 			break;
1426 		}
1427 	}
1428 
1429 	if (sc == NULL) {
1430 		SRPL_LEAVE(&sr);
1431 
1432 		if (!ETHER_IS_MULTICAST(eh->ether_dhost))
1433 			return (0);
1434 
1435 		/*
1436 		 * XXX Should really check the list of multicast addresses
1437 		 * for each CARP interface _before_ copying.
1438 		 */
1439 		SRPL_FOREACH(sc, &sr, cif, sc_list) {
1440 			struct mbuf *m0;
1441 
1442 			if (!(sc->sc_if.if_flags & IFF_UP))
1443 				continue;
1444 
1445 			m0 = m_dup_pkt(m, ETHER_ALIGN, M_DONTWAIT);
1446 			if (m0 == NULL)
1447 				continue;
1448 
1449 			if_vinput(&sc->sc_if, m0);
1450 		}
1451 		SRPL_LEAVE(&sr);
1452 
1453 		return (0);
1454 	}
1455 
1456 	if_vinput(&sc->sc_if, m);
1457 out:
1458 	SRPL_LEAVE(&sr);
1459 
1460 	return (1);
1461 }
1462 
1463 int
1464 carp_lsdrop(struct ifnet *ifp, struct mbuf *m, sa_family_t af, u_int32_t *src,
1465     u_int32_t *dst, int drop)
1466 {
1467 	struct carp_softc *sc;
1468 	u_int32_t fold;
1469 	struct m_tag *mtag;
1470 
1471 	if (ifp->if_type != IFT_CARP)
1472 		return 0;
1473 	sc = ifp->if_softc;
1474 	if (sc->sc_balancing == CARP_BAL_NONE)
1475 		return 0;
1476 
1477 	/*
1478 	 * Remove M_MCAST flag from mbuf of balancing ip traffic, since the fact
1479 	 * that it is layer 2 multicast does not implicate that it is also layer
1480 	 * 3 multicast.
1481 	 */
1482 	if (m->m_flags & M_MCAST &&
1483 	    (mtag = m_tag_find(m, PACKET_TAG_CARP_BAL_IP, NULL))) {
1484 		m_tag_delete(m, mtag);
1485 		m->m_flags &= ~M_MCAST;
1486 	}
1487 
1488 	/*
1489 	 * Return without making a drop decision. This allows to clear the
1490 	 * M_MCAST flag and do nothing else.
1491 	 */
1492 	if (!drop)
1493 		return 0;
1494 
1495 	/*
1496 	 * Never drop carp advertisements.
1497 	 * XXX Bad idea to pass all broadcast / multicast traffic?
1498 	 */
1499 	if (m->m_flags & (M_BCAST|M_MCAST))
1500 		return 0;
1501 
1502 	fold = src[0] ^ dst[0];
1503 #ifdef INET6
1504 	if (af == AF_INET6) {
1505 		int i;
1506 		for (i = 1; i < 4; i++)
1507 			fold ^= src[i] ^ dst[i];
1508 	}
1509 #endif
1510 	if (sc->sc_lscount == 0) /* just to be safe */
1511 		return 1;
1512 
1513 	return ((1 << (ntohl(fold) % sc->sc_lscount)) & sc->sc_lsmask) == 0;
1514 }
1515 
1516 void
1517 carp_timer_down(void *v)
1518 {
1519 	NET_LOCK();
1520 	carp_master_down(v);
1521 	NET_UNLOCK();
1522 }
1523 
1524 void
1525 carp_master_down(struct carp_vhost_entry *vhe)
1526 {
1527 	struct carp_softc *sc = vhe->parent_sc;
1528 
1529 	NET_ASSERT_LOCKED();
1530 
1531 	switch (vhe->state) {
1532 	case INIT:
1533 		printf("%s: master_down event in INIT state\n",
1534 		    sc->sc_if.if_xname);
1535 		break;
1536 	case MASTER:
1537 		break;
1538 	case BACKUP:
1539 		carp_set_state(vhe, MASTER);
1540 		carp_send_ad(vhe);
1541 		if (sc->sc_balancing == CARP_BAL_NONE && vhe->vhe_leader) {
1542 			carp_send_arp(sc);
1543 			/* Schedule a delayed ARP to deal w/ some L3 switches */
1544 			sc->sc_delayed_arp = 2;
1545 #ifdef INET6
1546 			carp_send_na(sc);
1547 #endif /* INET6 */
1548 		}
1549 		carp_setrun(vhe, 0);
1550 		carpstat_inc(carps_preempt);
1551 		break;
1552 	}
1553 }
1554 
1555 void
1556 carp_setrun_all(struct carp_softc *sc, sa_family_t af)
1557 {
1558 	struct carp_vhost_entry *vhe;
1559 
1560 	KERNEL_ASSERT_LOCKED(); /* touching carp_vhost */
1561 	SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
1562 		carp_setrun(vhe, af);
1563 	}
1564 }
1565 
1566 /*
1567  * When in backup state, af indicates whether to reset the master down timer
1568  * for v4 or v6. If it's set to zero, reset the ones which are already pending.
1569  */
1570 void
1571 carp_setrun(struct carp_vhost_entry *vhe, sa_family_t af)
1572 {
1573 	struct timeval tv;
1574 	struct carp_softc *sc = vhe->parent_sc;
1575 
1576 	if (sc->sc_carpdev == NULL) {
1577 		sc->sc_if.if_flags &= ~IFF_RUNNING;
1578 		carp_set_state_all(sc, INIT);
1579 		return;
1580 	}
1581 
1582 	if (memcmp(((struct arpcom *)sc->sc_carpdev)->ac_enaddr,
1583 	    sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN) == 0)
1584 		sc->sc_realmac = 1;
1585 	else
1586 		sc->sc_realmac = 0;
1587 
1588 	if (sc->sc_if.if_flags & IFF_UP && vhe->vhid > 0 &&
1589 	    (sc->sc_naddrs || sc->sc_naddrs6) && !sc->sc_suppress) {
1590 		sc->sc_if.if_flags |= IFF_RUNNING;
1591 	} else {
1592 		sc->sc_if.if_flags &= ~IFF_RUNNING;
1593 		return;
1594 	}
1595 
1596 	switch (vhe->state) {
1597 	case INIT:
1598 		carp_set_state(vhe, BACKUP);
1599 		carp_setrun(vhe, 0);
1600 		break;
1601 	case BACKUP:
1602 		timeout_del(&vhe->ad_tmo);
1603 		tv.tv_sec = 3 * sc->sc_advbase;
1604 		if (sc->sc_advbase == 0 && vhe->advskew == 0)
1605 			tv.tv_usec = 3 * 1000000 / 256;
1606 		else if (sc->sc_advbase == 0)
1607 			tv.tv_usec = 3 * vhe->advskew * 1000000 / 256;
1608 		else
1609 			tv.tv_usec = vhe->advskew * 1000000 / 256;
1610 		if (vhe->vhe_leader)
1611 			sc->sc_delayed_arp = -1;
1612 		switch (af) {
1613 		case AF_INET:
1614 			timeout_add_tv(&vhe->md_tmo, &tv);
1615 			break;
1616 #ifdef INET6
1617 		case AF_INET6:
1618 			timeout_add_tv(&vhe->md6_tmo, &tv);
1619 			break;
1620 #endif /* INET6 */
1621 		default:
1622 			if (sc->sc_naddrs)
1623 				timeout_add_tv(&vhe->md_tmo, &tv);
1624 			if (sc->sc_naddrs6)
1625 				timeout_add_tv(&vhe->md6_tmo, &tv);
1626 			break;
1627 		}
1628 		break;
1629 	case MASTER:
1630 		tv.tv_sec = sc->sc_advbase;
1631 		if (sc->sc_advbase == 0 && vhe->advskew == 0)
1632 			tv.tv_usec = 1 * 1000000 / 256;
1633 		else
1634 			tv.tv_usec = vhe->advskew * 1000000 / 256;
1635 		timeout_add_tv(&vhe->ad_tmo, &tv);
1636 		break;
1637 	}
1638 }
1639 
1640 void
1641 carp_multicast_cleanup(struct carp_softc *sc)
1642 {
1643 	struct ip_moptions *imo = &sc->sc_imo;
1644 #ifdef INET6
1645 	struct ip6_moptions *im6o = &sc->sc_im6o;
1646 #endif
1647 	u_int16_t n = imo->imo_num_memberships;
1648 
1649 	/* Clean up our own multicast memberships */
1650 	while (n-- > 0) {
1651 		if (imo->imo_membership[n] != NULL) {
1652 			in_delmulti(imo->imo_membership[n]);
1653 			imo->imo_membership[n] = NULL;
1654 		}
1655 	}
1656 	imo->imo_num_memberships = 0;
1657 	imo->imo_ifidx = 0;
1658 
1659 #ifdef INET6
1660 	while (!LIST_EMPTY(&im6o->im6o_memberships)) {
1661 		struct in6_multi_mship *imm =
1662 		    LIST_FIRST(&im6o->im6o_memberships);
1663 
1664 		LIST_REMOVE(imm, i6mm_chain);
1665 		in6_leavegroup(imm);
1666 	}
1667 	im6o->im6o_ifidx = 0;
1668 #endif
1669 
1670 	/* And any other multicast memberships */
1671 	carp_ether_purgemulti(sc);
1672 }
1673 
1674 int
1675 carp_set_ifp(struct carp_softc *sc, struct ifnet *ifp0)
1676 {
1677 	struct srpl *cif;
1678 	struct carp_softc *vr, *last = NULL, *after = NULL;
1679 	int myself = 0, error = 0;
1680 
1681 	KASSERT(ifp0 != sc->sc_carpdev);
1682 	KERNEL_ASSERT_LOCKED(); /* touching if_carp */
1683 
1684 	if ((ifp0->if_flags & IFF_MULTICAST) == 0)
1685 		return (EADDRNOTAVAIL);
1686 
1687 	if (ifp0->if_type != IFT_ETHER)
1688 		return (EINVAL);
1689 
1690 	cif = &ifp0->if_carp;
1691 	if (carp_check_dup_vhids(sc, cif, NULL))
1692 		return (EINVAL);
1693 
1694 	if ((error = ifpromisc(ifp0, 1)))
1695 		return (error);
1696 
1697 	/* detach from old interface */
1698 	if (sc->sc_carpdev != NULL)
1699 		carpdetach(sc);
1700 
1701 	/* attach carp interface to physical interface */
1702 	if_detachhook_add(ifp0, &sc->sc_dtask);
1703 	if_linkstatehook_add(ifp0, &sc->sc_ltask);
1704 
1705 	sc->sc_carpdev = ifp0;
1706 	sc->sc_if.if_capabilities = ifp0->if_capabilities &
1707 	    IFCAP_CSUM_MASK;
1708 
1709 	SRPL_FOREACH_LOCKED(vr, cif, sc_list) {
1710 		struct carp_vhost_entry *vrhead, *schead;
1711 		last = vr;
1712 
1713 		if (vr == sc)
1714 			myself = 1;
1715 
1716 		vrhead = SRPL_FIRST_LOCKED(&vr->carp_vhosts);
1717 		schead = SRPL_FIRST_LOCKED(&sc->carp_vhosts);
1718 		if (vrhead->vhid < schead->vhid)
1719 			after = vr;
1720 	}
1721 
1722 	if (!myself) {
1723 		/* We're trying to keep things in order */
1724 		if (last == NULL) {
1725 			SRPL_INSERT_HEAD_LOCKED(&carp_sc_rc, cif,
1726 			    sc, sc_list);
1727 		} else if (after == NULL) {
1728 			SRPL_INSERT_AFTER_LOCKED(&carp_sc_rc, last,
1729 			    sc, sc_list);
1730 		} else {
1731 			SRPL_INSERT_AFTER_LOCKED(&carp_sc_rc, after,
1732 			    sc, sc_list);
1733 		}
1734 	}
1735 	if (sc->sc_naddrs || sc->sc_naddrs6)
1736 		sc->sc_if.if_flags |= IFF_UP;
1737 	carp_set_enaddr(sc);
1738 
1739 	/* Change input handler of the physical interface. */
1740 	if_ih_insert(ifp0, carp_input, NULL);
1741 
1742 	carp_carpdev_state(sc);
1743 
1744 	return (0);
1745 }
1746 
1747 void
1748 carp_set_vhe_enaddr(struct carp_vhost_entry *vhe)
1749 {
1750 	struct carp_softc *sc = vhe->parent_sc;
1751 
1752 	if (vhe->vhid != 0 && sc->sc_carpdev) {
1753 		if (vhe->vhe_leader && sc->sc_balancing == CARP_BAL_IP)
1754 			vhe->vhe_enaddr[0] = 1;
1755 		else
1756 			vhe->vhe_enaddr[0] = 0;
1757 		vhe->vhe_enaddr[1] = 0;
1758 		vhe->vhe_enaddr[2] = 0x5e;
1759 		vhe->vhe_enaddr[3] = 0;
1760 		vhe->vhe_enaddr[4] = 1;
1761 		vhe->vhe_enaddr[5] = vhe->vhid;
1762 	} else
1763 		memset(vhe->vhe_enaddr, 0, ETHER_ADDR_LEN);
1764 }
1765 
1766 void
1767 carp_set_enaddr(struct carp_softc *sc)
1768 {
1769 	struct carp_vhost_entry *vhe;
1770 
1771 	KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
1772 	SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries)
1773 		carp_set_vhe_enaddr(vhe);
1774 
1775 	vhe = SRPL_FIRST_LOCKED(&sc->carp_vhosts);
1776 
1777 	/*
1778 	 * Use the carp lladdr if the running one isn't manually set.
1779 	 * Only compare static parts of the lladdr.
1780 	 */
1781 	if ((memcmp(sc->sc_ac.ac_enaddr + 1, vhe->vhe_enaddr + 1,
1782 	    ETHER_ADDR_LEN - 2) == 0) ||
1783 	    (!sc->sc_ac.ac_enaddr[0] && !sc->sc_ac.ac_enaddr[1] &&
1784 	    !sc->sc_ac.ac_enaddr[2] && !sc->sc_ac.ac_enaddr[3] &&
1785 	    !sc->sc_ac.ac_enaddr[4] && !sc->sc_ac.ac_enaddr[5]))
1786 		bcopy(vhe->vhe_enaddr, sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN);
1787 
1788 	/* Make sure the enaddr has changed before further twiddling. */
1789 	if (memcmp(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN) != 0) {
1790 		bcopy(sc->sc_ac.ac_enaddr, LLADDR(sc->sc_if.if_sadl),
1791 		    ETHER_ADDR_LEN);
1792 		bcopy(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN);
1793 #ifdef INET6
1794 		/*
1795 		 * (re)attach a link-local address which matches
1796 		 * our new MAC address.
1797 		 */
1798 		if (sc->sc_naddrs6)
1799 			in6_ifattach_linklocal(&sc->sc_if, NULL);
1800 #endif
1801 		carp_set_state_all(sc, INIT);
1802 		carp_setrun_all(sc, 0);
1803 	}
1804 }
1805 
1806 void
1807 carp_addr_updated(void *v)
1808 {
1809 	struct carp_softc *sc = (struct carp_softc *) v;
1810 	struct ifaddr *ifa;
1811 	int new_naddrs = 0, new_naddrs6 = 0;
1812 
1813 	TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
1814 		if (ifa->ifa_addr->sa_family == AF_INET)
1815 			new_naddrs++;
1816 #ifdef INET6
1817 		else if (ifa->ifa_addr->sa_family == AF_INET6)
1818 			new_naddrs6++;
1819 #endif /* INET6 */
1820 	}
1821 
1822 	/* We received address changes from if_addrhooks callback */
1823 	if (new_naddrs != sc->sc_naddrs || new_naddrs6 != sc->sc_naddrs6) {
1824 
1825 		sc->sc_naddrs = new_naddrs;
1826 		sc->sc_naddrs6 = new_naddrs6;
1827 
1828 		/* Re-establish multicast membership removed by in_control */
1829 		if (IN_MULTICAST(sc->sc_peer.s_addr)) {
1830 			if (!in_hasmulti(&sc->sc_peer, &sc->sc_if)) {
1831 				struct in_multi **imm =
1832 				    sc->sc_imo.imo_membership;
1833 				u_int16_t maxmem =
1834 				    sc->sc_imo.imo_max_memberships;
1835 
1836 				memset(&sc->sc_imo, 0, sizeof(sc->sc_imo));
1837 				sc->sc_imo.imo_membership = imm;
1838 				sc->sc_imo.imo_max_memberships = maxmem;
1839 
1840 				if (sc->sc_carpdev != NULL && sc->sc_naddrs > 0)
1841 					carp_join_multicast(sc);
1842 			}
1843 		}
1844 
1845 		if (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) {
1846 			sc->sc_if.if_flags &= ~IFF_UP;
1847 			carp_set_state_all(sc, INIT);
1848 		} else
1849 			carp_hmac_prepare(sc);
1850 	}
1851 
1852 	carp_setrun_all(sc, 0);
1853 }
1854 
1855 int
1856 carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin)
1857 {
1858 	struct in_addr *in = &sin->sin_addr;
1859 	int error;
1860 
1861 	KASSERT(sc->sc_carpdev != NULL);
1862 
1863 	/* XXX is this necessary? */
1864 	if (in->s_addr == INADDR_ANY) {
1865 		carp_setrun_all(sc, 0);
1866 		return (0);
1867 	}
1868 
1869 	if (sc->sc_naddrs == 0 && (error = carp_join_multicast(sc)) != 0)
1870 		return (error);
1871 
1872 	carp_set_state_all(sc, INIT);
1873 
1874 	return (0);
1875 }
1876 
1877 int
1878 carp_join_multicast(struct carp_softc *sc)
1879 {
1880 	struct ip_moptions *imo = &sc->sc_imo;
1881 	struct in_multi *imm;
1882 	struct in_addr addr;
1883 
1884 	if (!IN_MULTICAST(sc->sc_peer.s_addr))
1885 		return (0);
1886 
1887 	addr.s_addr = sc->sc_peer.s_addr;
1888 	if ((imm = in_addmulti(&addr, &sc->sc_if)) == NULL)
1889 		return (ENOBUFS);
1890 
1891 	imo->imo_membership[0] = imm;
1892 	imo->imo_num_memberships = 1;
1893 	imo->imo_ifidx = sc->sc_if.if_index;
1894 	imo->imo_ttl = CARP_DFLTTL;
1895 	imo->imo_loop = 0;
1896 	return (0);
1897 }
1898 
1899 
1900 #ifdef INET6
1901 int
1902 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
1903 {
1904 	int error;
1905 
1906 	KASSERT(sc->sc_carpdev != NULL);
1907 
1908 	if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
1909 		carp_setrun_all(sc, 0);
1910 		return (0);
1911 	}
1912 
1913 	if (sc->sc_naddrs6 == 0 && (error = carp_join_multicast6(sc)) != 0)
1914 		return (error);
1915 
1916 	carp_set_state_all(sc, INIT);
1917 
1918 	return (0);
1919 }
1920 
1921 int
1922 carp_join_multicast6(struct carp_softc *sc)
1923 {
1924 	struct in6_multi_mship *imm, *imm2;
1925 	struct ip6_moptions *im6o = &sc->sc_im6o;
1926 	struct sockaddr_in6 addr6;
1927 	int error;
1928 
1929 	/* Join IPv6 CARP multicast group */
1930 	memset(&addr6, 0, sizeof(addr6));
1931 	addr6.sin6_family = AF_INET6;
1932 	addr6.sin6_len = sizeof(addr6);
1933 	addr6.sin6_addr.s6_addr16[0] = htons(0xff02);
1934 	addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index);
1935 	addr6.sin6_addr.s6_addr8[15] = 0x12;
1936 	if ((imm = in6_joingroup(&sc->sc_if,
1937 	    &addr6.sin6_addr, &error)) == NULL) {
1938 		return (error);
1939 	}
1940 	/* join solicited multicast address */
1941 	memset(&addr6.sin6_addr, 0, sizeof(addr6.sin6_addr));
1942 	addr6.sin6_addr.s6_addr16[0] = htons(0xff02);
1943 	addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index);
1944 	addr6.sin6_addr.s6_addr32[1] = 0;
1945 	addr6.sin6_addr.s6_addr32[2] = htonl(1);
1946 	addr6.sin6_addr.s6_addr32[3] = 0;
1947 	addr6.sin6_addr.s6_addr8[12] = 0xff;
1948 	if ((imm2 = in6_joingroup(&sc->sc_if,
1949 	    &addr6.sin6_addr, &error)) == NULL) {
1950 		in6_leavegroup(imm);
1951 		return (error);
1952 	}
1953 
1954 	/* apply v6 multicast membership */
1955 	im6o->im6o_ifidx = sc->sc_if.if_index;
1956 	if (imm)
1957 		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm,
1958 		    i6mm_chain);
1959 	if (imm2)
1960 		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm2,
1961 		    i6mm_chain);
1962 
1963 	return (0);
1964 }
1965 
1966 #endif /* INET6 */
1967 
1968 int
1969 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr)
1970 {
1971 	struct proc *p = curproc;	/* XXX */
1972 	struct carp_softc *sc = ifp->if_softc;
1973 	struct carp_vhost_entry *vhe;
1974 	struct carpreq carpr;
1975 	struct ifaddr *ifa = (struct ifaddr *)addr;
1976 	struct ifreq *ifr = (struct ifreq *)addr;
1977 	struct ifnet *ifp0 = sc->sc_carpdev;
1978 	int i, error = 0;
1979 
1980 	switch (cmd) {
1981 	case SIOCSIFADDR:
1982 		if (ifp0 == NULL)
1983 			return (EINVAL);
1984 
1985 		switch (ifa->ifa_addr->sa_family) {
1986 		case AF_INET:
1987 			sc->sc_if.if_flags |= IFF_UP;
1988 			error = carp_set_addr(sc, satosin(ifa->ifa_addr));
1989 			break;
1990 #ifdef INET6
1991 		case AF_INET6:
1992 			sc->sc_if.if_flags |= IFF_UP;
1993 			error = carp_set_addr6(sc, satosin6(ifa->ifa_addr));
1994 			break;
1995 #endif /* INET6 */
1996 		default:
1997 			error = EAFNOSUPPORT;
1998 			break;
1999 		}
2000 		break;
2001 
2002 	case SIOCSIFFLAGS:
2003 		KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
2004 		vhe = SRPL_FIRST_LOCKED(&sc->carp_vhosts);
2005 		if (vhe->state != INIT && !(ifr->ifr_flags & IFF_UP)) {
2006 			carp_del_all_timeouts(sc);
2007 
2008 			/* we need the interface up to bow out */
2009 			sc->sc_if.if_flags |= IFF_UP;
2010 			sc->sc_bow_out = 1;
2011 			carp_vhe_send_ad_all(sc);
2012 			sc->sc_bow_out = 0;
2013 
2014 			sc->sc_if.if_flags &= ~IFF_UP;
2015 			carp_set_state_all(sc, INIT);
2016 			carp_setrun_all(sc, 0);
2017 		} else if (vhe->state == INIT && (ifr->ifr_flags & IFF_UP)) {
2018 			sc->sc_if.if_flags |= IFF_UP;
2019 			carp_setrun_all(sc, 0);
2020 		}
2021 		break;
2022 
2023 	case SIOCSVH:
2024 		KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
2025 		vhe = SRPL_FIRST_LOCKED(&sc->carp_vhosts);
2026 		if ((error = suser(p)) != 0)
2027 			break;
2028 		if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr)))
2029 			break;
2030 		error = 1;
2031 		if (carpr.carpr_carpdev[0] != '\0' &&
2032 		    (ifp0 = ifunit(carpr.carpr_carpdev)) == NULL)
2033 			return (EINVAL);
2034 		if (carpr.carpr_peer.s_addr == 0)
2035 			sc->sc_peer.s_addr = INADDR_CARP_GROUP;
2036 		else
2037 			sc->sc_peer.s_addr = carpr.carpr_peer.s_addr;
2038 		if (ifp0 != sc->sc_carpdev) {
2039 			if ((error = carp_set_ifp(sc, ifp0)))
2040 				return (error);
2041 		}
2042 		if (vhe->state != INIT && carpr.carpr_state != vhe->state) {
2043 			switch (carpr.carpr_state) {
2044 			case BACKUP:
2045 				timeout_del(&vhe->ad_tmo);
2046 				carp_set_state_all(sc, BACKUP);
2047 				carp_setrun_all(sc, 0);
2048 				break;
2049 			case MASTER:
2050 				KERNEL_ASSERT_LOCKED();
2051 				/* touching carp_vhosts */
2052 				SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts,
2053 				    vhost_entries)
2054 					carp_master_down(vhe);
2055 				break;
2056 			default:
2057 				break;
2058 			}
2059 		}
2060 		if ((error = carp_vhids_ioctl(sc, &carpr)))
2061 			return (error);
2062 		if (carpr.carpr_advbase >= 0) {
2063 			if (carpr.carpr_advbase > 255) {
2064 				error = EINVAL;
2065 				break;
2066 			}
2067 			sc->sc_advbase = carpr.carpr_advbase;
2068 			error--;
2069 		}
2070 		if (memcmp(sc->sc_advskews, carpr.carpr_advskews,
2071 		    sizeof(sc->sc_advskews))) {
2072 			i = 0;
2073 			KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
2074 			SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts,
2075 			    vhost_entries)
2076 				vhe->advskew = carpr.carpr_advskews[i++];
2077 			bcopy(carpr.carpr_advskews, sc->sc_advskews,
2078 			    sizeof(sc->sc_advskews));
2079 		}
2080 		if (sc->sc_balancing != carpr.carpr_balancing) {
2081 			if (carpr.carpr_balancing > CARP_BAL_MAXID) {
2082 				error = EINVAL;
2083 				break;
2084 			}
2085 			sc->sc_balancing = carpr.carpr_balancing;
2086 			carp_set_enaddr(sc);
2087 			carp_update_lsmask(sc);
2088 		}
2089 		bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key));
2090 		if (error > 0)
2091 			error = EINVAL;
2092 		else {
2093 			error = 0;
2094 			carp_hmac_prepare(sc);
2095 			carp_setrun_all(sc, 0);
2096 		}
2097 		break;
2098 
2099 	case SIOCGVH:
2100 		memset(&carpr, 0, sizeof(carpr));
2101 		if (ifp0 != NULL)
2102 			strlcpy(carpr.carpr_carpdev, ifp0->if_xname, IFNAMSIZ);
2103 		i = 0;
2104 		KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
2105 		SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
2106 			carpr.carpr_vhids[i] = vhe->vhid;
2107 			carpr.carpr_advskews[i] = vhe->advskew;
2108 			carpr.carpr_states[i] = vhe->state;
2109 			i++;
2110 		}
2111 		carpr.carpr_advbase = sc->sc_advbase;
2112 		carpr.carpr_balancing = sc->sc_balancing;
2113 		if (suser(p) == 0)
2114 			bcopy(sc->sc_key, carpr.carpr_key,
2115 			    sizeof(carpr.carpr_key));
2116 		carpr.carpr_peer.s_addr = sc->sc_peer.s_addr;
2117 		error = copyout(&carpr, ifr->ifr_data, sizeof(carpr));
2118 		break;
2119 
2120 	case SIOCADDMULTI:
2121 		error = carp_ether_addmulti(sc, ifr);
2122 		break;
2123 
2124 	case SIOCDELMULTI:
2125 		error = carp_ether_delmulti(sc, ifr);
2126 		break;
2127 	case SIOCAIFGROUP:
2128 	case SIOCDIFGROUP:
2129 		if (sc->sc_demote_cnt)
2130 			carp_ifgroup_ioctl(ifp, cmd, addr);
2131 		break;
2132 	case SIOCSIFGATTR:
2133 		carp_ifgattr_ioctl(ifp, cmd, addr);
2134 		break;
2135 	default:
2136 		error = ENOTTY;
2137 	}
2138 
2139 	if (memcmp(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN) != 0)
2140 		carp_set_enaddr(sc);
2141 	return (error);
2142 }
2143 
2144 int
2145 carp_check_dup_vhids(struct carp_softc *sc, struct srpl *cif,
2146     struct carpreq *carpr)
2147 {
2148 	struct carp_softc *vr;
2149 	struct carp_vhost_entry *vhe, *vhe0;
2150 	int i;
2151 
2152 	KERNEL_ASSERT_LOCKED(); /* touching if_carp + carp_vhosts */
2153 
2154 	SRPL_FOREACH_LOCKED(vr, cif, sc_list) {
2155 		if (vr == sc)
2156 			continue;
2157 		SRPL_FOREACH_LOCKED(vhe, &vr->carp_vhosts, vhost_entries) {
2158 			if (carpr) {
2159 				for (i = 0; carpr->carpr_vhids[i]; i++) {
2160 					if (vhe->vhid == carpr->carpr_vhids[i])
2161 						return (EINVAL);
2162 				}
2163 			}
2164 			SRPL_FOREACH_LOCKED(vhe0, &sc->carp_vhosts,
2165 			    vhost_entries) {
2166 				if (vhe->vhid == vhe0->vhid)
2167 					return (EINVAL);
2168 			}
2169 		}
2170 	}
2171 	return (0);
2172 }
2173 
2174 int
2175 carp_vhids_ioctl(struct carp_softc *sc, struct carpreq *carpr)
2176 {
2177 	int i, j;
2178 	u_int8_t taken_vhids[256];
2179 
2180 	if (carpr->carpr_vhids[0] == 0 ||
2181 	    !memcmp(sc->sc_vhids, carpr->carpr_vhids, sizeof(sc->sc_vhids)))
2182 		return (0);
2183 
2184 	memset(taken_vhids, 0, sizeof(taken_vhids));
2185 	for (i = 0; carpr->carpr_vhids[i]; i++) {
2186 		if (taken_vhids[carpr->carpr_vhids[i]])
2187 			return (EINVAL);
2188 		taken_vhids[carpr->carpr_vhids[i]] = 1;
2189 
2190 		if (sc->sc_carpdev) {
2191 			struct srpl *cif;
2192 			cif = &sc->sc_carpdev->if_carp;
2193 			if (carp_check_dup_vhids(sc, cif, carpr))
2194 				return (EINVAL);
2195 		}
2196 		if (carpr->carpr_advskews[i] >= 255)
2197 			return (EINVAL);
2198 	}
2199 	/* set sane balancing defaults */
2200 	if (i <= 1)
2201 		carpr->carpr_balancing = CARP_BAL_NONE;
2202 	else if (carpr->carpr_balancing == CARP_BAL_NONE &&
2203 	    sc->sc_balancing == CARP_BAL_NONE)
2204 		carpr->carpr_balancing = CARP_BAL_IP;
2205 
2206 	/* destroy all */
2207 	carp_del_all_timeouts(sc);
2208 	carp_destroy_vhosts(sc);
2209 	memset(sc->sc_vhids, 0, sizeof(sc->sc_vhids));
2210 
2211 	/* sort vhosts list by vhid */
2212 	for (j = 1; j <= 255; j++) {
2213 		for (i = 0; carpr->carpr_vhids[i]; i++) {
2214 			if (carpr->carpr_vhids[i] != j)
2215 				continue;
2216 			if (carp_new_vhost(sc, carpr->carpr_vhids[i],
2217 			    carpr->carpr_advskews[i]))
2218 				return (ENOMEM);
2219 			sc->sc_vhids[i] = carpr->carpr_vhids[i];
2220 			sc->sc_advskews[i] = carpr->carpr_advskews[i];
2221 		}
2222 	}
2223 	carp_set_enaddr(sc);
2224 	carp_set_state_all(sc, INIT);
2225 	return (0);
2226 }
2227 
2228 void
2229 carp_ifgroup_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr)
2230 {
2231 	struct ifgroupreq *ifgr = (struct ifgroupreq *)addr;
2232 	struct ifg_list	*ifgl;
2233 	int *dm, adj;
2234 
2235 	if (!strcmp(ifgr->ifgr_group, IFG_ALL))
2236 		return;
2237 	adj = ((struct carp_softc *)ifp->if_softc)->sc_demote_cnt;
2238 	if (cmd == SIOCDIFGROUP)
2239 		adj = adj * -1;
2240 
2241 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2242 		if (!strcmp(ifgl->ifgl_group->ifg_group, ifgr->ifgr_group)) {
2243 			dm = &ifgl->ifgl_group->ifg_carp_demoted;
2244 			if (*dm + adj >= 0)
2245 				*dm += adj;
2246 			else
2247 				*dm = 0;
2248 		}
2249 }
2250 
2251 void
2252 carp_ifgattr_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr)
2253 {
2254 	struct ifgroupreq *ifgr = (struct ifgroupreq *)addr;
2255 	struct carp_softc *sc = ifp->if_softc;
2256 
2257 	if (ifgr->ifgr_attrib.ifg_carp_demoted > 0 && (sc->sc_if.if_flags &
2258 	    (IFF_UP|IFF_RUNNING)) == (IFF_UP|IFF_RUNNING))
2259 		carp_vhe_send_ad_all(sc);
2260 }
2261 
2262 void
2263 carp_start(struct ifnet *ifp)
2264 {
2265 	struct carp_softc *sc = ifp->if_softc;
2266 	struct mbuf *m;
2267 
2268 	for (;;) {
2269 		IFQ_DEQUEUE(&ifp->if_snd, m);
2270 		if (m == NULL)
2271 			break;
2272 
2273 #if NBPFILTER > 0
2274 		if (ifp->if_bpf)
2275 			bpf_mtap_ether(ifp->if_bpf, m, BPF_DIRECTION_OUT);
2276 #endif /* NBPFILTER > 0 */
2277 
2278 		if ((ifp->if_carpdev->if_flags & (IFF_UP|IFF_RUNNING)) !=
2279 		    (IFF_UP|IFF_RUNNING)) {
2280 			ifp->if_oerrors++;
2281 			m_freem(m);
2282 			continue;
2283 		}
2284 
2285 		/*
2286 		 * Do not leak the multicast address when sending
2287 		 * advertisements in 'ip' and 'ip-stealth' balacing
2288 		 * modes.
2289 		 */
2290 		if (sc->sc_balancing == CARP_BAL_IP ||
2291 		    sc->sc_balancing == CARP_BAL_IPSTEALTH) {
2292 			struct ether_header *eh;
2293 			uint8_t *esrc;
2294 
2295 			eh = mtod(m, struct ether_header *);
2296 			esrc = ((struct arpcom*)ifp->if_carpdev)->ac_enaddr;
2297 			memcpy(eh->ether_shost, esrc, sizeof(eh->ether_shost));
2298 		}
2299 
2300 		if (if_enqueue(ifp->if_carpdev, m)) {
2301 			ifp->if_oerrors++;
2302 			continue;
2303 		}
2304 		ifp->if_opackets++;
2305 	}
2306 }
2307 
2308 int
2309 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
2310     struct rtentry *rt)
2311 {
2312 	struct carp_softc *sc = ((struct carp_softc *)ifp->if_softc);
2313 	struct carp_vhost_entry *vhe;
2314 	struct srp_ref sr;
2315 	int ismaster;
2316 
2317 	/*
2318 	 * If the parent of this carp(4) got destroyed while
2319 	 * `m' was being processed, silently drop it.
2320 	 */
2321 	if (sc->sc_carpdev == NULL) {
2322 		m_freem(m);
2323 		return (0);
2324 	}
2325 
2326 	if (sc->cur_vhe == NULL) {
2327 		vhe = SRPL_FIRST(&sr, &sc->carp_vhosts);
2328 		ismaster = (vhe->state == MASTER);
2329 		SRPL_LEAVE(&sr);
2330 	} else {
2331 		ismaster = (sc->cur_vhe->state == MASTER);
2332 	}
2333 
2334 	if ((sc->sc_balancing == CARP_BAL_NONE && !ismaster)) {
2335 		m_freem(m);
2336 		return (ENETUNREACH);
2337 	}
2338 
2339 	return (ether_output(ifp, m, sa, rt));
2340 }
2341 
2342 void
2343 carp_set_state_all(struct carp_softc *sc, int state)
2344 {
2345 	struct carp_vhost_entry *vhe;
2346 
2347 	KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
2348 
2349 	SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
2350 		if (vhe->state == state)
2351 			continue;
2352 
2353 		carp_set_state(vhe, state);
2354 	}
2355 }
2356 
2357 void
2358 carp_set_state(struct carp_vhost_entry *vhe, int state)
2359 {
2360 	struct carp_softc *sc = vhe->parent_sc;
2361 	static const char *carp_states[] = { CARP_STATES };
2362 	int loglevel;
2363 	struct carp_vhost_entry *vhe0;
2364 
2365 	KASSERT(vhe->state != state);
2366 
2367 	if (vhe->state == INIT || state == INIT)
2368 		loglevel = LOG_WARNING;
2369 	else
2370 		loglevel = LOG_CRIT;
2371 
2372 	if (sc->sc_vhe_count > 1)
2373 		CARP_LOG(loglevel, sc,
2374 		    ("state transition (vhid %d): %s -> %s", vhe->vhid,
2375 		    carp_states[vhe->state], carp_states[state]));
2376 	else
2377 		CARP_LOG(loglevel, sc,
2378 		    ("state transition: %s -> %s",
2379 		    carp_states[vhe->state], carp_states[state]));
2380 
2381 	vhe->state = state;
2382 	carp_update_lsmask(sc);
2383 
2384 	KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
2385 
2386 	sc->sc_if.if_link_state = LINK_STATE_INVALID;
2387 	SRPL_FOREACH_LOCKED(vhe0, &sc->carp_vhosts, vhost_entries) {
2388 		/*
2389 		 * Link must be up if at least one vhe is in state MASTER to
2390 		 * bring or keep route up.
2391 		 */
2392 		if (vhe0->state == MASTER) {
2393 			sc->sc_if.if_link_state = LINK_STATE_UP;
2394 			break;
2395 		} else if (vhe0->state == BACKUP) {
2396 			sc->sc_if.if_link_state = LINK_STATE_DOWN;
2397 		}
2398 	}
2399 	if_link_state_change(&sc->sc_if);
2400 }
2401 
2402 void
2403 carp_group_demote_adj(struct ifnet *ifp, int adj, char *reason)
2404 {
2405 	struct ifg_list	*ifgl;
2406 	int *dm, need_ad;
2407 	struct carp_softc *nil = NULL;
2408 
2409 	if (ifp->if_type == IFT_CARP) {
2410 		dm = &((struct carp_softc *)ifp->if_softc)->sc_demote_cnt;
2411 		if (*dm + adj >= 0)
2412 			*dm += adj;
2413 		else
2414 			*dm = 0;
2415 	}
2416 
2417 	need_ad = 0;
2418 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
2419 		if (!strcmp(ifgl->ifgl_group->ifg_group, IFG_ALL))
2420 			continue;
2421 		dm = &ifgl->ifgl_group->ifg_carp_demoted;
2422 
2423 		if (*dm + adj >= 0)
2424 			*dm += adj;
2425 		else
2426 			*dm = 0;
2427 
2428 		if (adj > 0 && *dm == 1)
2429 			need_ad = 1;
2430 		CARP_LOG(LOG_ERR, nil,
2431 		    ("%s demoted group %s by %d to %d (%s)",
2432 		    ifp->if_xname, ifgl->ifgl_group->ifg_group,
2433 		    adj, *dm, reason));
2434 	}
2435 	if (need_ad)
2436 		carp_send_ad_all();
2437 }
2438 
2439 int
2440 carp_group_demote_count(struct carp_softc *sc)
2441 {
2442 	struct ifg_list	*ifgl;
2443 	int count = 0;
2444 
2445 	TAILQ_FOREACH(ifgl, &sc->sc_if.if_groups, ifgl_next)
2446 		count += ifgl->ifgl_group->ifg_carp_demoted;
2447 
2448 	if (count == 0 && sc->sc_demote_cnt)
2449 		count = sc->sc_demote_cnt;
2450 
2451 	return (count > 255 ? 255 : count);
2452 }
2453 
2454 void
2455 carp_carpdev_state(void *v)
2456 {
2457 	struct carp_softc *sc = v;
2458 	struct ifnet *ifp0 = sc->sc_carpdev;
2459 	int suppressed = sc->sc_suppress;
2460 
2461 	if (ifp0->if_link_state == LINK_STATE_DOWN ||
2462 	    !(ifp0->if_flags & IFF_UP)) {
2463 		sc->sc_if.if_flags &= ~IFF_RUNNING;
2464 		carp_del_all_timeouts(sc);
2465 		carp_set_state_all(sc, INIT);
2466 		sc->sc_suppress = 1;
2467 		carp_setrun_all(sc, 0);
2468 		if (!suppressed)
2469 			carp_group_demote_adj(&sc->sc_if, 1, "carpdev");
2470 	} else if (suppressed) {
2471 		carp_set_state_all(sc, INIT);
2472 		sc->sc_suppress = 0;
2473 		carp_setrun_all(sc, 0);
2474 		carp_group_demote_adj(&sc->sc_if, -1, "carpdev");
2475 	}
2476 }
2477 
2478 int
2479 carp_ether_addmulti(struct carp_softc *sc, struct ifreq *ifr)
2480 {
2481 	struct ifnet *ifp0;
2482 	struct carp_mc_entry *mc;
2483 	u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN];
2484 	int error;
2485 
2486 	ifp0 = sc->sc_carpdev;
2487 	if (ifp0 == NULL)
2488 		return (EINVAL);
2489 
2490 	error = ether_addmulti(ifr, (struct arpcom *)&sc->sc_ac);
2491 	if (error != ENETRESET)
2492 		return (error);
2493 
2494 	/*
2495 	 * This is new multicast address.  We have to tell parent
2496 	 * about it.  Also, remember this multicast address so that
2497 	 * we can delete them on unconfigure.
2498 	 */
2499 	mc = malloc(sizeof(*mc), M_DEVBUF, M_NOWAIT);
2500 	if (mc == NULL) {
2501 		error = ENOMEM;
2502 		goto alloc_failed;
2503 	}
2504 
2505 	/*
2506 	 * As ether_addmulti() returns ENETRESET, following two
2507 	 * statement shouldn't fail.
2508 	 */
2509 	(void)ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi);
2510 	ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, mc->mc_enm);
2511 	memcpy(&mc->mc_addr, &ifr->ifr_addr, ifr->ifr_addr.sa_len);
2512 	LIST_INSERT_HEAD(&sc->carp_mc_listhead, mc, mc_entries);
2513 
2514 	error = (*ifp0->if_ioctl)(ifp0, SIOCADDMULTI, (caddr_t)ifr);
2515 	if (error != 0)
2516 		goto ioctl_failed;
2517 
2518 	return (error);
2519 
2520  ioctl_failed:
2521 	LIST_REMOVE(mc, mc_entries);
2522 	free(mc, M_DEVBUF, sizeof(*mc));
2523  alloc_failed:
2524 	(void)ether_delmulti(ifr, (struct arpcom *)&sc->sc_ac);
2525 
2526 	return (error);
2527 }
2528 
2529 int
2530 carp_ether_delmulti(struct carp_softc *sc, struct ifreq *ifr)
2531 {
2532 	struct ifnet *ifp0;
2533 	struct ether_multi *enm;
2534 	struct carp_mc_entry *mc;
2535 	u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN];
2536 	int error;
2537 
2538 	ifp0 = sc->sc_carpdev;
2539 	if (ifp0 == NULL)
2540 		return (EINVAL);
2541 
2542 	/*
2543 	 * Find a key to lookup carp_mc_entry.  We have to do this
2544 	 * before calling ether_delmulti for obvious reason.
2545 	 */
2546 	if ((error = ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi)) != 0)
2547 		return (error);
2548 	ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, enm);
2549 	if (enm == NULL)
2550 		return (EINVAL);
2551 
2552 	LIST_FOREACH(mc, &sc->carp_mc_listhead, mc_entries)
2553 		if (mc->mc_enm == enm)
2554 			break;
2555 
2556 	/* We won't delete entries we didn't add */
2557 	if (mc == NULL)
2558 		return (EINVAL);
2559 
2560 	error = ether_delmulti(ifr, (struct arpcom *)&sc->sc_ac);
2561 	if (error != ENETRESET)
2562 		return (error);
2563 
2564 	/* We no longer use this multicast address.  Tell parent so. */
2565 	error = (*ifp0->if_ioctl)(ifp0, SIOCDELMULTI, (caddr_t)ifr);
2566 	if (error == 0) {
2567 		/* And forget about this address. */
2568 		LIST_REMOVE(mc, mc_entries);
2569 		free(mc, M_DEVBUF, sizeof(*mc));
2570 	} else
2571 		(void)ether_addmulti(ifr, (struct arpcom *)&sc->sc_ac);
2572 	return (error);
2573 }
2574 
2575 /*
2576  * Delete any multicast address we have asked to add from parent
2577  * interface.  Called when the carp is being unconfigured.
2578  */
2579 void
2580 carp_ether_purgemulti(struct carp_softc *sc)
2581 {
2582 	struct ifnet *ifp0 = sc->sc_carpdev;		/* Parent. */
2583 	struct carp_mc_entry *mc;
2584 	union {
2585 		struct ifreq ifreq;
2586 		struct {
2587 			char ifr_name[IFNAMSIZ];
2588 			struct sockaddr_storage ifr_ss;
2589 		} ifreq_storage;
2590 	} u;
2591 	struct ifreq *ifr = &u.ifreq;
2592 
2593 	if (ifp0 == NULL)
2594 		return;
2595 
2596 	memcpy(ifr->ifr_name, ifp0->if_xname, IFNAMSIZ);
2597 	while ((mc = LIST_FIRST(&sc->carp_mc_listhead)) != NULL) {
2598 		memcpy(&ifr->ifr_addr, &mc->mc_addr, mc->mc_addr.ss_len);
2599 		(void)(*ifp0->if_ioctl)(ifp0, SIOCDELMULTI, (caddr_t)ifr);
2600 		LIST_REMOVE(mc, mc_entries);
2601 		free(mc, M_DEVBUF, sizeof(*mc));
2602 	}
2603 }
2604 
2605 void
2606 carp_vh_ref(void *null, void *v)
2607 {
2608 	struct carp_vhost_entry *vhe = v;
2609 
2610 	refcnt_take(&vhe->vhost_refcnt);
2611 }
2612 
2613 void
2614 carp_vh_unref(void *null, void *v)
2615 {
2616 	struct carp_vhost_entry *vhe = v;
2617 
2618 	if (refcnt_rele(&vhe->vhost_refcnt)) {
2619 		carp_sc_unref(NULL, vhe->parent_sc);
2620 		free(vhe, M_DEVBUF, sizeof(*vhe));
2621 	}
2622 }
2623 
2624 void
2625 carp_sc_ref(void *null, void *s)
2626 {
2627 	struct carp_softc *sc = s;
2628 
2629 	refcnt_take(&sc->sc_refcnt);
2630 }
2631 
2632 void
2633 carp_sc_unref(void *null, void *s)
2634 {
2635 	struct carp_softc *sc = s;
2636 
2637 	refcnt_rele_wake(&sc->sc_refcnt);
2638 }
2639