xref: /openbsd-src/sys/netinet/ip_carp.c (revision f2da64fbbbf1b03f09f390ab01267c93dfd77c4c)
1 /*	$OpenBSD: ip_carp.c,v 1.293 2016/07/25 16:44:04 benno Exp $	*/
2 
3 /*
4  * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
5  * Copyright (c) 2003 Ryan McBride. All rights reserved.
6  * Copyright (c) 2006-2008 Marco Pfatschbacher. All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
21  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
25  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
26  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
27  * THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 /*
31  * TODO:
32  *	- iface reconfigure
33  *	- support for hardware checksum calculations;
34  *
35  */
36 
37 #include "ether.h"
38 
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/mbuf.h>
42 #include <sys/socket.h>
43 #include <sys/socketvar.h>
44 #include <sys/timeout.h>
45 #include <sys/ioctl.h>
46 #include <sys/errno.h>
47 #include <sys/device.h>
48 #include <sys/kernel.h>
49 #include <sys/sysctl.h>
50 #include <sys/syslog.h>
51 #include <sys/refcnt.h>
52 
53 #include <net/if.h>
54 #include <net/if_var.h>
55 #include <net/if_types.h>
56 #include <net/netisr.h>
57 
58 #include <crypto/sha1.h>
59 
60 #include <netinet/in.h>
61 #include <netinet/in_var.h>
62 #include <netinet/ip.h>
63 #include <netinet/ip_var.h>
64 #include <netinet/if_ether.h>
65 #include <netinet/ip_ipsp.h>
66 
67 #include <net/if_dl.h>
68 
69 #ifdef INET6
70 #include <netinet6/in6_var.h>
71 #include <netinet/icmp6.h>
72 #include <netinet/ip6.h>
73 #include <netinet6/ip6_var.h>
74 #include <netinet6/nd6.h>
75 #include <netinet6/in6_ifattach.h>
76 #endif
77 
78 #include "bpfilter.h"
79 #if NBPFILTER > 0
80 #include <net/bpf.h>
81 #endif
82 
83 #include "vlan.h"
84 #if NVLAN > 0
85 #include <net/if_vlan_var.h>
86 #endif
87 
88 #include <netinet/ip_carp.h>
89 
90 struct carp_mc_entry {
91 	LIST_ENTRY(carp_mc_entry)	mc_entries;
92 	union {
93 		struct ether_multi	*mcu_enm;
94 	} mc_u;
95 	struct sockaddr_storage		mc_addr;
96 };
97 #define	mc_enm	mc_u.mcu_enm
98 
99 enum { HMAC_ORIG=0, HMAC_NOV6LL=1, HMAC_MAX=2 };
100 
101 struct carp_vhost_entry {
102 	SRPL_ENTRY(carp_vhost_entry) vhost_entries;
103 	struct refcnt vhost_refcnt;
104 
105 	struct carp_softc *parent_sc;
106 	int vhe_leader;
107 	int vhid;
108 	int advskew;
109 	enum { INIT = 0, BACKUP, MASTER }	state;
110 	struct timeout ad_tmo;	/* advertisement timeout */
111 	struct timeout md_tmo;	/* master down timeout */
112 	struct timeout md6_tmo;	/* master down timeout */
113 
114 	u_int64_t vhe_replay_cookie;
115 
116 	/* authentication */
117 #define CARP_HMAC_PAD	64
118 	unsigned char vhe_pad[CARP_HMAC_PAD];
119 	SHA1_CTX vhe_sha1[HMAC_MAX];
120 
121 	u_int8_t vhe_enaddr[ETHER_ADDR_LEN];
122 };
123 
124 void	carp_vh_ref(void *, void *);
125 void	carp_vh_unref(void *, void *);
126 
127 struct srpl_rc carp_vh_rc =
128     SRPL_RC_INITIALIZER(carp_vh_ref, carp_vh_unref, NULL);
129 
130 struct carp_softc {
131 	struct arpcom sc_ac;
132 #define	sc_if		sc_ac.ac_if
133 #define	sc_carpdev	sc_ac.ac_if.if_carpdev
134 	void *ah_cookie;
135 	void *lh_cookie;
136 	struct ip_moptions sc_imo;
137 #ifdef INET6
138 	struct ip6_moptions sc_im6o;
139 #endif /* INET6 */
140 
141 	SRPL_ENTRY(carp_softc) sc_list;
142 	struct refcnt sc_refcnt;
143 
144 	int sc_suppress;
145 	int sc_bow_out;
146 	int sc_demote_cnt;
147 
148 	int sc_sendad_errors;
149 #define CARP_SENDAD_MAX_ERRORS(sc) (3 * (sc)->sc_vhe_count)
150 	int sc_sendad_success;
151 #define CARP_SENDAD_MIN_SUCCESS(sc) (3 * (sc)->sc_vhe_count)
152 
153 	char sc_curlladdr[ETHER_ADDR_LEN];
154 
155 	SRPL_HEAD(, carp_vhost_entry) carp_vhosts;
156 	int sc_vhe_count;
157 	u_int8_t sc_vhids[CARP_MAXNODES];
158 	u_int8_t sc_advskews[CARP_MAXNODES];
159 	u_int8_t sc_balancing;
160 
161 	int sc_naddrs;
162 	int sc_naddrs6;
163 	int sc_advbase;		/* seconds */
164 
165 	/* authentication */
166 	unsigned char sc_key[CARP_KEY_LEN];
167 
168 	u_int32_t sc_hashkey[2];
169 	u_int32_t sc_lsmask;		/* load sharing mask */
170 	int sc_lscount;			/* # load sharing interfaces (max 32) */
171 	int sc_delayed_arp;		/* delayed ARP request countdown */
172 	int sc_realmac;			/* using real mac */
173 
174 	struct in_addr sc_peer;
175 
176 	LIST_HEAD(__carp_mchead, carp_mc_entry)	carp_mc_listhead;
177 	struct carp_vhost_entry *cur_vhe; /* current active vhe */
178 };
179 
180 void	carp_sc_ref(void *, void *);
181 void	carp_sc_unref(void *, void *);
182 
183 struct srpl_rc carp_sc_rc =
184     SRPL_RC_INITIALIZER(carp_sc_ref, carp_sc_unref, NULL);
185 
186 int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, LOG_CRIT };	/* XXX for now */
187 struct carpstats carpstats;
188 
189 int	carp_send_all_recur = 0;
190 
191 struct carp_if {
192 	struct srpl vhif_vrs;
193 };
194 
195 #define	CARP_LOG(l, sc, s)						\
196 	do {								\
197 		if (carp_opts[CARPCTL_LOG] >= l) {			\
198 			if (sc)						\
199 				log(l, "%s: ",				\
200 				    (sc)->sc_if.if_xname);		\
201 			else						\
202 				log(l, "carp: ");			\
203 			addlog s;					\
204 			addlog("\n");					\
205 		}							\
206 	} while (0)
207 
208 void	carp_hmac_prepare(struct carp_softc *);
209 void	carp_hmac_prepare_ctx(struct carp_vhost_entry *, u_int8_t);
210 void	carp_hmac_generate(struct carp_vhost_entry *, u_int32_t *,
211 	    unsigned char *, u_int8_t);
212 int	carp_hmac_verify(struct carp_vhost_entry *, u_int32_t *,
213 	    unsigned char *);
214 int	carp_input(struct ifnet *, struct mbuf *, void *);
215 void	carp_proto_input_c(struct ifnet *, struct mbuf *,
216 	    struct carp_header *, int, sa_family_t);
217 void	carp_proto_input_if(struct ifnet *, struct mbuf *, int);
218 void	carpattach(int);
219 void	carpdetach(struct carp_softc *);
220 int	carp_prepare_ad(struct mbuf *, struct carp_vhost_entry *,
221 	    struct carp_header *);
222 void	carp_send_ad_all(void);
223 void	carp_vhe_send_ad_all(struct carp_softc *);
224 void	carp_send_ad(void *);
225 void	carp_send_arp(struct carp_softc *);
226 void	carp_master_down(void *);
227 int	carp_ioctl(struct ifnet *, u_long, caddr_t);
228 int	carp_vhids_ioctl(struct carp_softc *, struct carpreq *);
229 int	carp_check_dup_vhids(struct carp_softc *, struct carp_if *,
230 	    struct carpreq *);
231 void	carp_ifgroup_ioctl(struct ifnet *, u_long, caddr_t);
232 void	carp_ifgattr_ioctl(struct ifnet *, u_long, caddr_t);
233 void	carp_start(struct ifnet *);
234 void	carp_setrun_all(struct carp_softc *, sa_family_t);
235 void	carp_setrun(struct carp_vhost_entry *, sa_family_t);
236 void	carp_set_state_all(struct carp_softc *, int);
237 void	carp_set_state(struct carp_vhost_entry *, int);
238 void	carp_multicast_cleanup(struct carp_softc *);
239 int	carp_set_ifp(struct carp_softc *, struct ifnet *);
240 void	carp_set_enaddr(struct carp_softc *);
241 void	carp_set_vhe_enaddr(struct carp_vhost_entry *);
242 void	carp_addr_updated(void *);
243 int	carp_set_addr(struct carp_softc *, struct sockaddr_in *);
244 int	carp_join_multicast(struct carp_softc *);
245 #ifdef INET6
246 void	carp_send_na(struct carp_softc *);
247 int	carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *);
248 int	carp_join_multicast6(struct carp_softc *);
249 #endif
250 int	carp_clone_create(struct if_clone *, int);
251 int	carp_clone_destroy(struct ifnet *);
252 int	carp_ether_addmulti(struct carp_softc *, struct ifreq *);
253 int	carp_ether_delmulti(struct carp_softc *, struct ifreq *);
254 void	carp_ether_purgemulti(struct carp_softc *);
255 int	carp_group_demote_count(struct carp_softc *);
256 void	carp_update_lsmask(struct carp_softc *);
257 int	carp_new_vhost(struct carp_softc *, int, int);
258 void	carp_destroy_vhosts(struct carp_softc *);
259 void	carp_del_all_timeouts(struct carp_softc *);
260 
261 struct if_clone carp_cloner =
262     IF_CLONE_INITIALIZER("carp", carp_clone_create, carp_clone_destroy);
263 
264 #define carp_cksum(_m, _l)	((u_int16_t)in_cksum((_m), (_l)))
265 #define CARP_IFQ_PRIO	6
266 
267 void
268 carp_hmac_prepare(struct carp_softc *sc)
269 {
270 	struct carp_vhost_entry *vhe;
271 	u_int8_t i;
272 
273 	KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
274 
275 	SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
276 		for (i = 0; i < HMAC_MAX; i++) {
277 			carp_hmac_prepare_ctx(vhe, i);
278 		}
279 	}
280 }
281 
282 void
283 carp_hmac_prepare_ctx(struct carp_vhost_entry *vhe, u_int8_t ctx)
284 {
285 	struct carp_softc *sc = vhe->parent_sc;
286 
287 	u_int8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
288 	u_int8_t vhid = vhe->vhid & 0xff;
289 	SHA1_CTX sha1ctx;
290 	u_int32_t kmd[5];
291 	struct ifaddr *ifa;
292 	int i, found;
293 	struct in_addr last, cur, in;
294 #ifdef INET6
295 	struct in6_addr last6, cur6, in6;
296 #endif /* INET6 */
297 
298 	/* compute ipad from key */
299 	memset(vhe->vhe_pad, 0, sizeof(vhe->vhe_pad));
300 	bcopy(sc->sc_key, vhe->vhe_pad, sizeof(sc->sc_key));
301 	for (i = 0; i < sizeof(vhe->vhe_pad); i++)
302 		vhe->vhe_pad[i] ^= 0x36;
303 
304 	/* precompute first part of inner hash */
305 	SHA1Init(&vhe->vhe_sha1[ctx]);
306 	SHA1Update(&vhe->vhe_sha1[ctx], vhe->vhe_pad, sizeof(vhe->vhe_pad));
307 	SHA1Update(&vhe->vhe_sha1[ctx], (void *)&version, sizeof(version));
308 	SHA1Update(&vhe->vhe_sha1[ctx], (void *)&type, sizeof(type));
309 
310 	/* generate a key for the arpbalance hash, before the vhid is hashed */
311 	if (vhe->vhe_leader) {
312 		bcopy(&vhe->vhe_sha1[ctx], &sha1ctx, sizeof(sha1ctx));
313 		SHA1Final((unsigned char *)kmd, &sha1ctx);
314 		sc->sc_hashkey[0] = kmd[0] ^ kmd[1];
315 		sc->sc_hashkey[1] = kmd[2] ^ kmd[3];
316 	}
317 
318 	/* the rest of the precomputation */
319 	if (!sc->sc_realmac && vhe->vhe_leader &&
320 	    memcmp(sc->sc_ac.ac_enaddr, vhe->vhe_enaddr, ETHER_ADDR_LEN) != 0)
321 		SHA1Update(&vhe->vhe_sha1[ctx], sc->sc_ac.ac_enaddr,
322 		    ETHER_ADDR_LEN);
323 
324 	SHA1Update(&vhe->vhe_sha1[ctx], (void *)&vhid, sizeof(vhid));
325 
326 	/* Hash the addresses from smallest to largest, not interface order */
327 	cur.s_addr = 0;
328 	do {
329 		found = 0;
330 		last = cur;
331 		cur.s_addr = 0xffffffff;
332 		TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
333 			if (ifa->ifa_addr->sa_family != AF_INET)
334 				continue;
335 			in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
336 			if (ntohl(in.s_addr) > ntohl(last.s_addr) &&
337 			    ntohl(in.s_addr) < ntohl(cur.s_addr)) {
338 				cur.s_addr = in.s_addr;
339 				found++;
340 			}
341 		}
342 		if (found)
343 			SHA1Update(&vhe->vhe_sha1[ctx],
344 			    (void *)&cur, sizeof(cur));
345 	} while (found);
346 #ifdef INET6
347 	memset(&cur6, 0x00, sizeof(cur6));
348 	do {
349 		found = 0;
350 		last6 = cur6;
351 		memset(&cur6, 0xff, sizeof(cur6));
352 		TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
353 			if (ifa->ifa_addr->sa_family != AF_INET6)
354 				continue;
355 			in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
356 			if (IN6_IS_SCOPE_EMBED(&in6)) {
357 				if (ctx == HMAC_NOV6LL)
358 					continue;
359 				in6.s6_addr16[1] = 0;
360 			}
361 			if (memcmp(&in6, &last6, sizeof(in6)) > 0 &&
362 			    memcmp(&in6, &cur6, sizeof(in6)) < 0) {
363 				cur6 = in6;
364 				found++;
365 			}
366 		}
367 		if (found)
368 			SHA1Update(&vhe->vhe_sha1[ctx],
369 			    (void *)&cur6, sizeof(cur6));
370 	} while (found);
371 #endif /* INET6 */
372 
373 	/* convert ipad to opad */
374 	for (i = 0; i < sizeof(vhe->vhe_pad); i++)
375 		vhe->vhe_pad[i] ^= 0x36 ^ 0x5c;
376 }
377 
378 void
379 carp_hmac_generate(struct carp_vhost_entry *vhe, u_int32_t counter[2],
380     unsigned char md[20], u_int8_t ctx)
381 {
382 	SHA1_CTX sha1ctx;
383 
384 	/* fetch first half of inner hash */
385 	bcopy(&vhe->vhe_sha1[ctx], &sha1ctx, sizeof(sha1ctx));
386 
387 	SHA1Update(&sha1ctx, (void *)counter, sizeof(vhe->vhe_replay_cookie));
388 	SHA1Final(md, &sha1ctx);
389 
390 	/* outer hash */
391 	SHA1Init(&sha1ctx);
392 	SHA1Update(&sha1ctx, vhe->vhe_pad, sizeof(vhe->vhe_pad));
393 	SHA1Update(&sha1ctx, md, 20);
394 	SHA1Final(md, &sha1ctx);
395 }
396 
397 int
398 carp_hmac_verify(struct carp_vhost_entry *vhe, u_int32_t counter[2],
399     unsigned char md[20])
400 {
401 	unsigned char md2[20];
402 	u_int8_t i;
403 
404 	for (i = 0; i < HMAC_MAX; i++) {
405 		carp_hmac_generate(vhe, counter, md2, i);
406 		if (!timingsafe_bcmp(md, md2, sizeof(md2)))
407 			return (0);
408 	}
409 	return (1);
410 }
411 
412 void
413 carp_proto_input(struct mbuf *m, ...)
414 {
415 	struct ifnet *ifp;
416 	int hlen;
417 	va_list ap;
418 
419 	va_start(ap, m);
420 	hlen = va_arg(ap, int);
421 	va_end(ap);
422 
423 	ifp = if_get(m->m_pkthdr.ph_ifidx);
424 	if (ifp == NULL) {
425 		m_freem(m);
426 		return;
427 	}
428 
429 	carp_proto_input_if(ifp, m, hlen);
430 	if_put(ifp);
431 }
432 
433 /*
434  * process input packet.
435  * we have rearranged checks order compared to the rfc,
436  * but it seems more efficient this way or not possible otherwise.
437  */
438 void
439 carp_proto_input_if(struct ifnet *ifp, struct mbuf *m, int hlen)
440 {
441 	struct ip *ip = mtod(m, struct ip *);
442 	struct carp_softc *sc = NULL;
443 	struct carp_header *ch;
444 	int iplen, len, ismulti;
445 
446 	carpstats.carps_ipackets++;
447 
448 	if (!carp_opts[CARPCTL_ALLOW]) {
449 		m_freem(m);
450 		return;
451 	}
452 
453 	ismulti = IN_MULTICAST(ip->ip_dst.s_addr);
454 
455 	/* check if received on a valid carp interface */
456 	if (!((ifp->if_type == IFT_CARP && ismulti) ||
457 	    (ifp->if_type != IFT_CARP && !ismulti && ifp->if_carp != NULL))) {
458 		carpstats.carps_badif++;
459 		CARP_LOG(LOG_INFO, sc,
460 		    ("packet received on non-carp interface: %s",
461 		     ifp->if_xname));
462 		m_freem(m);
463 		return;
464 	}
465 
466 	/* verify that the IP TTL is 255.  */
467 	if (ip->ip_ttl != CARP_DFLTTL) {
468 		carpstats.carps_badttl++;
469 		CARP_LOG(LOG_NOTICE, sc, ("received ttl %d != %d on %s",
470 		    ip->ip_ttl, CARP_DFLTTL, ifp->if_xname));
471 		m_freem(m);
472 		return;
473 	}
474 
475 	/*
476 	 * verify that the received packet length is
477 	 * equal to the CARP header
478 	 */
479 	iplen = ip->ip_hl << 2;
480 	len = iplen + sizeof(*ch);
481 	if (len > m->m_pkthdr.len) {
482 		carpstats.carps_badlen++;
483 		CARP_LOG(LOG_INFO, sc, ("packet too short %d on %s",
484 		    m->m_pkthdr.len, ifp->if_xname));
485 		m_freem(m);
486 		return;
487 	}
488 
489 	if ((m = m_pullup(m, len)) == NULL) {
490 		carpstats.carps_hdrops++;
491 		return;
492 	}
493 	ip = mtod(m, struct ip *);
494 	ch = (struct carp_header *)(mtod(m, caddr_t) + iplen);
495 
496 	/* verify the CARP checksum */
497 	m->m_data += iplen;
498 	if (carp_cksum(m, len - iplen)) {
499 		carpstats.carps_badsum++;
500 		CARP_LOG(LOG_INFO, sc, ("checksum failed on %s",
501 		    ifp->if_xname));
502 		m_freem(m);
503 		return;
504 	}
505 	m->m_data -= iplen;
506 
507 	carp_proto_input_c(ifp, m, ch, ismulti, AF_INET);
508 }
509 
510 #ifdef INET6
511 int	carp6_proto_input_if(struct ifnet *, struct mbuf *, int *);
512 
513 int
514 carp6_proto_input(struct mbuf **mp, int *offp, int proto)
515 {
516 	struct mbuf *m = *mp;
517 	struct ifnet *ifp;
518 	int rv;
519 
520 	ifp = if_get(m->m_pkthdr.ph_ifidx);
521 	if (ifp == NULL) {
522 		m_freem(m);
523 		return (IPPROTO_DONE);
524 	}
525 
526 	rv = carp6_proto_input_if(ifp, m, offp);
527 	if_put(ifp);
528 
529 	return (rv);
530 }
531 
532 int
533 carp6_proto_input_if(struct ifnet *ifp, struct mbuf *m, int *offp)
534 {
535 	struct carp_softc *sc = NULL;
536 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
537 	struct carp_header *ch;
538 	u_int len;
539 
540 	carpstats.carps_ipackets6++;
541 
542 	if (!carp_opts[CARPCTL_ALLOW]) {
543 		m_freem(m);
544 		return (IPPROTO_DONE);
545 	}
546 
547 	/* check if received on a valid carp interface */
548 	if (ifp->if_type != IFT_CARP) {
549 		carpstats.carps_badif++;
550 		CARP_LOG(LOG_INFO, sc, ("packet received on non-carp interface: %s",
551 		    ifp->if_xname));
552 		m_freem(m);
553 		return (IPPROTO_DONE);
554 	}
555 
556 	/* verify that the IP TTL is 255 */
557 	if (ip6->ip6_hlim != CARP_DFLTTL) {
558 		carpstats.carps_badttl++;
559 		CARP_LOG(LOG_NOTICE, sc, ("received ttl %d != %d on %s",
560 		    ip6->ip6_hlim, CARP_DFLTTL, ifp->if_xname));
561 		m_freem(m);
562 		return (IPPROTO_DONE);
563 	}
564 
565 	/* verify that we have a complete carp packet */
566 	len = m->m_len;
567 	if ((m = m_pullup(m, *offp + sizeof(*ch))) == NULL) {
568 		carpstats.carps_badlen++;
569 		CARP_LOG(LOG_INFO, sc, ("packet size %u too small", len));
570 		return (IPPROTO_DONE);
571 	}
572 	ch = (struct carp_header *)(mtod(m, caddr_t) + *offp);
573 
574 	/* verify the CARP checksum */
575 	m->m_data += *offp;
576 	if (carp_cksum(m, sizeof(*ch))) {
577 		carpstats.carps_badsum++;
578 		CARP_LOG(LOG_INFO, sc, ("checksum failed, on %s",
579 		    ifp->if_xname));
580 		m_freem(m);
581 		return (IPPROTO_DONE);
582 	}
583 	m->m_data -= *offp;
584 
585 	carp_proto_input_c(ifp, m, ch, 1, AF_INET6);
586 	return (IPPROTO_DONE);
587 }
588 #endif /* INET6 */
589 
590 void
591 carp_proto_input_c(struct ifnet *ifp, struct mbuf *m, struct carp_header *ch,
592     int ismulti, sa_family_t af)
593 {
594 	struct carp_softc *sc;
595 	struct carp_vhost_entry *vhe;
596 	struct timeval sc_tv, ch_tv;
597 	struct carp_if *cif;
598 
599 	if (ifp->if_type == IFT_CARP)
600 		cif = (struct carp_if *)ifp->if_carpdev->if_carp;
601 	else
602 		cif = (struct carp_if *)ifp->if_carp;
603 
604 	KERNEL_ASSERT_LOCKED(); /* touching vhif_vrs + carp_vhosts */
605 	SRPL_FOREACH_LOCKED(sc, &cif->vhif_vrs, sc_list) {
606 		if (af == AF_INET &&
607 		    ismulti != IN_MULTICAST(sc->sc_peer.s_addr))
608 			continue;
609 		SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
610 			if (vhe->vhid == ch->carp_vhid)
611 				goto found;
612 		}
613 	}
614  found:
615 
616 	if (!sc || (sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) !=
617 	    (IFF_UP|IFF_RUNNING)) {
618 		carpstats.carps_badvhid++;
619 		m_freem(m);
620 		return;
621 	}
622 
623 	getmicrotime(&sc->sc_if.if_lastchange);
624 	sc->sc_if.if_ipackets++;
625 	sc->sc_if.if_ibytes += m->m_pkthdr.len;
626 
627 	/* verify the CARP version. */
628 	if (ch->carp_version != CARP_VERSION) {
629 		carpstats.carps_badver++;
630 		sc->sc_if.if_ierrors++;
631 		CARP_LOG(LOG_NOTICE, sc, ("invalid version %d != %d",
632 		    ch->carp_version, CARP_VERSION));
633 		m_freem(m);
634 		return;
635 	}
636 
637 	/* verify the hash */
638 	if (carp_hmac_verify(vhe, ch->carp_counter, ch->carp_md)) {
639 		carpstats.carps_badauth++;
640 		sc->sc_if.if_ierrors++;
641 		CARP_LOG(LOG_INFO, sc, ("incorrect hash"));
642 		m_freem(m);
643 		return;
644 	}
645 
646 	if (!memcmp(&vhe->vhe_replay_cookie, ch->carp_counter,
647 	    sizeof(ch->carp_counter))) {
648 		/* Do not log duplicates from non simplex interfaces */
649 		if (sc->sc_carpdev->if_flags & IFF_SIMPLEX) {
650 			carpstats.carps_badauth++;
651 			sc->sc_if.if_ierrors++;
652 			CARP_LOG(LOG_WARNING, sc,
653 			    ("replay or network loop detected"));
654 		}
655 		m_freem(m);
656 		return;
657 	}
658 
659 	sc_tv.tv_sec = sc->sc_advbase;
660 	sc_tv.tv_usec = vhe->advskew * 1000000 / 256;
661 	ch_tv.tv_sec = ch->carp_advbase;
662 	ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
663 
664 	switch (vhe->state) {
665 	case INIT:
666 		break;
667 	case MASTER:
668 		/*
669 		 * If we receive an advertisement from a master who's going to
670 		 * be more frequent than us, and whose demote count is not higher
671 		 * than ours, go into BACKUP state. If his demote count is lower,
672 		 * also go into BACKUP.
673 		 */
674 		if (((timercmp(&sc_tv, &ch_tv, >) ||
675 		    timercmp(&sc_tv, &ch_tv, ==)) &&
676 		    (ch->carp_demote <= carp_group_demote_count(sc))) ||
677 		    ch->carp_demote < carp_group_demote_count(sc)) {
678 			timeout_del(&vhe->ad_tmo);
679 			carp_set_state(vhe, BACKUP);
680 			carp_setrun(vhe, 0);
681 		}
682 		break;
683 	case BACKUP:
684 		/*
685 		 * If we're pre-empting masters who advertise slower than us,
686 		 * and do not have a better demote count, treat them as down.
687 		 *
688 		 */
689 		if (carp_opts[CARPCTL_PREEMPT] &&
690 		    timercmp(&sc_tv, &ch_tv, <) &&
691 		    ch->carp_demote >= carp_group_demote_count(sc)) {
692 			carp_master_down(vhe);
693 			break;
694 		}
695 
696 		/*
697 		 * Take over masters advertising with a higher demote count,
698 		 * regardless of CARPCTL_PREEMPT.
699 		 */
700 		if (ch->carp_demote > carp_group_demote_count(sc)) {
701 			carp_master_down(vhe);
702 			break;
703 		}
704 
705 		/*
706 		 *  If the master is going to advertise at such a low frequency
707 		 *  that he's guaranteed to time out, we'd might as well just
708 		 *  treat him as timed out now.
709 		 */
710 		sc_tv.tv_sec = sc->sc_advbase * 3;
711 		if (sc->sc_advbase && timercmp(&sc_tv, &ch_tv, <)) {
712 			carp_master_down(vhe);
713 			break;
714 		}
715 
716 		/*
717 		 * Otherwise, we reset the counter and wait for the next
718 		 * advertisement.
719 		 */
720 		carp_setrun(vhe, af);
721 		break;
722 	}
723 
724 	m_freem(m);
725 	return;
726 }
727 
728 int
729 carp_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
730     size_t newlen)
731 {
732 	/* All sysctl names at this level are terminal. */
733 	if (namelen != 1)
734 		return (ENOTDIR);
735 
736 	switch (name[0]) {
737 	case CARPCTL_STATS:
738 		if (newp != NULL)
739 			return (EPERM);
740 		return (sysctl_struct(oldp, oldlenp, newp, newlen,
741 		    &carpstats, sizeof(carpstats)));
742 	default:
743 		if (name[0] <= 0 || name[0] >= CARPCTL_MAXID)
744 			return (ENOPROTOOPT);
745 		return sysctl_int(oldp, oldlenp, newp, newlen,
746 		    &carp_opts[name[0]]);
747 	}
748 }
749 
750 /*
751  * Interface side of the CARP implementation.
752  */
753 
754 /* ARGSUSED */
755 void
756 carpattach(int n)
757 {
758 	struct ifg_group	*ifg;
759 
760 	if ((ifg = if_creategroup("carp")) != NULL)
761 		ifg->ifg_refcnt++;	/* keep around even if empty */
762 	if_clone_attach(&carp_cloner);
763 }
764 
765 int
766 carp_clone_create(struct if_clone *ifc, int unit)
767 {
768 	struct carp_softc *sc;
769 	struct ifnet *ifp;
770 
771 	sc = malloc(sizeof(*sc), M_DEVBUF, M_NOWAIT|M_ZERO);
772 	if (!sc)
773 		return (ENOMEM);
774 
775 	refcnt_init(&sc->sc_refcnt);
776 
777 	SRPL_INIT(&sc->carp_vhosts);
778 	sc->sc_vhe_count = 0;
779 	if (carp_new_vhost(sc, 0, 0)) {
780 		free(sc, M_DEVBUF, sizeof(*sc));
781 		return (ENOMEM);
782 	}
783 
784 	sc->sc_suppress = 0;
785 	sc->sc_advbase = CARP_DFLTINTV;
786 	sc->sc_naddrs = sc->sc_naddrs6 = 0;
787 #ifdef INET6
788 	sc->sc_im6o.im6o_hlim = CARP_DFLTTL;
789 #endif /* INET6 */
790 	sc->sc_imo.imo_membership = (struct in_multi **)malloc(
791 	    (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_IPMOPTS,
792 	    M_WAITOK|M_ZERO);
793 	sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
794 
795 	LIST_INIT(&sc->carp_mc_listhead);
796 	ifp = &sc->sc_if;
797 	ifp->if_softc = sc;
798 	snprintf(ifp->if_xname, sizeof ifp->if_xname, "%s%d", ifc->ifc_name,
799 	    unit);
800 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
801 	ifp->if_ioctl = carp_ioctl;
802 	ifp->if_start = carp_start;
803 	IFQ_SET_MAXLEN(&ifp->if_snd, 1);
804 	if_attach(ifp);
805 	ether_ifattach(ifp);
806 	ifp->if_type = IFT_CARP;
807 	ifp->if_sadl->sdl_type = IFT_CARP;
808 	ifp->if_output = carp_output;
809 	ifp->if_priority = IF_CARP_DEFAULT_PRIORITY;
810 	ifp->if_link_state = LINK_STATE_INVALID;
811 
812 	/* Hook carp_addr_updated to cope with address and route changes. */
813 	sc->ah_cookie = hook_establish(sc->sc_if.if_addrhooks, 0,
814 	    carp_addr_updated, sc);
815 
816 	return (0);
817 }
818 
819 int
820 carp_new_vhost(struct carp_softc *sc, int vhid, int advskew)
821 {
822 	struct carp_vhost_entry *vhe, *vhe0;
823 
824 	vhe = malloc(sizeof(*vhe), M_DEVBUF, M_NOWAIT | M_ZERO);
825 	if (vhe == NULL)
826 		return (ENOMEM);
827 
828 	refcnt_init(&vhe->vhost_refcnt);
829 	carp_sc_ref(NULL, sc); /* give a sc ref to the vhe */
830 	vhe->parent_sc = sc;
831 	vhe->vhid = vhid;
832 	vhe->advskew = advskew;
833 	vhe->state = INIT;
834 	timeout_set(&vhe->ad_tmo, carp_send_ad, vhe);
835 	timeout_set(&vhe->md_tmo, carp_master_down, vhe);
836 	timeout_set(&vhe->md6_tmo, carp_master_down, vhe);
837 
838 	KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
839 
840 	/* mark the first vhe as leader */
841 	if (SRPL_EMPTY_LOCKED(&sc->carp_vhosts)) {
842 		vhe->vhe_leader = 1;
843 		SRPL_INSERT_HEAD_LOCKED(&carp_vh_rc, &sc->carp_vhosts,
844 		    vhe, vhost_entries);
845 		sc->sc_vhe_count = 1;
846 		return (0);
847 	}
848 
849 	SRPL_FOREACH_LOCKED(vhe0, &sc->carp_vhosts, vhost_entries) {
850 		if (SRPL_NEXT_LOCKED(vhe0, vhost_entries) == NULL)
851 			break;
852 	}
853 
854 	SRPL_INSERT_AFTER_LOCKED(&carp_vh_rc, vhe0, vhe, vhost_entries);
855 	sc->sc_vhe_count++;
856 
857 	return (0);
858 }
859 
860 int
861 carp_clone_destroy(struct ifnet *ifp)
862 {
863 	struct carp_softc *sc = ifp->if_softc;
864 
865 	carpdetach(sc);
866 	ether_ifdetach(ifp);
867 	if_detach(ifp);
868 	carp_destroy_vhosts(ifp->if_softc);
869 	refcnt_finalize(&sc->sc_refcnt, "carpdtor");
870 	free(sc->sc_imo.imo_membership, M_IPMOPTS, 0);
871 	free(sc, M_DEVBUF, sizeof(*sc));
872 	return (0);
873 }
874 
875 void
876 carp_del_all_timeouts(struct carp_softc *sc)
877 {
878 	struct carp_vhost_entry *vhe;
879 
880 	KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
881 	SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
882 		timeout_del(&vhe->ad_tmo);
883 		timeout_del(&vhe->md_tmo);
884 		timeout_del(&vhe->md6_tmo);
885 	}
886 }
887 
888 void
889 carpdetach(struct carp_softc *sc)
890 {
891 	struct ifnet *ifp0;
892 	struct carp_if *cif;
893 	int s;
894 
895 	carp_del_all_timeouts(sc);
896 
897 	if (sc->sc_demote_cnt)
898 		carp_group_demote_adj(&sc->sc_if, -sc->sc_demote_cnt, "detach");
899 	sc->sc_suppress = 0;
900 	sc->sc_sendad_errors = 0;
901 
902 	carp_set_state_all(sc, INIT);
903 	sc->sc_if.if_flags &= ~IFF_UP;
904 	carp_setrun_all(sc, 0);
905 	carp_multicast_cleanup(sc);
906 
907 	if (sc->ah_cookie != NULL)
908 		hook_disestablish(sc->sc_if.if_addrhooks, sc->ah_cookie);
909 
910 	ifp0 = sc->sc_carpdev;
911 	if (ifp0 == NULL)
912 		return;
913 
914 	KERNEL_ASSERT_LOCKED(); /* touching vhif_vrs */
915 
916 	cif = (struct carp_if *)ifp0->if_carp;
917 
918 	/* Restore previous input handler. */
919 	if_ih_remove(ifp0, carp_input, cif);
920 
921 	s = splnet();
922 	if (sc->lh_cookie != NULL)
923 		hook_disestablish(ifp0->if_linkstatehooks, sc->lh_cookie);
924 
925 	SRPL_REMOVE_LOCKED(&carp_sc_rc, &cif->vhif_vrs, sc,
926 	    carp_softc, sc_list);
927 	if (SRPL_EMPTY_LOCKED(&cif->vhif_vrs)) {
928 		ifpromisc(ifp0, 0);
929 		ifp0->if_carp = NULL;
930 		free(cif, M_IFADDR, sizeof(*cif));
931 	}
932 	sc->sc_carpdev = NULL;
933 	splx(s);
934 }
935 
936 /* Detach an interface from the carp. */
937 void
938 carp_ifdetach(struct ifnet *ifp0)
939 {
940 	struct carp_softc *sc, *nextsc;
941 	struct carp_if *cif = (struct carp_if *)ifp0->if_carp;
942 
943 	KERNEL_ASSERT_LOCKED(); /* touching vhif_vrs */
944 
945 	for (sc = SRPL_FIRST_LOCKED(&cif->vhif_vrs); sc != NULL; sc = nextsc) {
946 		nextsc = SRPL_NEXT_LOCKED(sc, sc_list);
947 
948 		carpdetach(sc); /* this can free cif */
949 	}
950 }
951 
952 void
953 carp_destroy_vhosts(struct carp_softc *sc)
954 {
955 	/* XXX bow out? */
956 	struct carp_vhost_entry *vhe;
957 
958 	KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
959 
960 	while ((vhe = SRPL_FIRST_LOCKED(&sc->carp_vhosts)) != NULL) {
961 		SRPL_REMOVE_LOCKED(&carp_vh_rc, &sc->carp_vhosts, vhe,
962 		    carp_vhost_entry, vhost_entries);
963 		carp_vh_unref(NULL, vhe); /* drop last ref */
964 	}
965 	sc->sc_vhe_count = 0;
966 }
967 
968 int
969 carp_prepare_ad(struct mbuf *m, struct carp_vhost_entry *vhe,
970     struct carp_header *ch)
971 {
972 	if (!vhe->vhe_replay_cookie) {
973 		arc4random_buf(&vhe->vhe_replay_cookie,
974 		    sizeof(vhe->vhe_replay_cookie));
975 	}
976 
977 	bcopy(&vhe->vhe_replay_cookie, ch->carp_counter,
978 	    sizeof(ch->carp_counter));
979 
980 	/*
981 	 * For the time being, do not include the IPv6 linklayer addresses
982 	 * in the HMAC.
983 	 */
984 	carp_hmac_generate(vhe, ch->carp_counter, ch->carp_md, HMAC_NOV6LL);
985 
986 	return (0);
987 }
988 
989 void
990 carp_send_ad_all(void)
991 {
992 	struct ifnet *ifp0;
993 	struct carp_if *cif;
994 	struct carp_softc *vh;
995 
996 	KERNEL_ASSERT_LOCKED(); /* touching vhif_vrs */
997 
998 	if (carp_send_all_recur > 0)
999 		return;
1000 	++carp_send_all_recur;
1001 	TAILQ_FOREACH(ifp0, &ifnet, if_list) {
1002 		if (ifp0->if_carp == NULL || ifp0->if_type == IFT_CARP)
1003 			continue;
1004 
1005 		cif = (struct carp_if *)ifp0->if_carp;
1006 		SRPL_FOREACH_LOCKED(vh, &cif->vhif_vrs, sc_list) {
1007 			if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
1008 			    (IFF_UP|IFF_RUNNING)) {
1009 				carp_vhe_send_ad_all(vh);
1010 			}
1011 		}
1012 	}
1013 	--carp_send_all_recur;
1014 }
1015 
1016 void
1017 carp_vhe_send_ad_all(struct carp_softc *sc)
1018 {
1019 	struct carp_vhost_entry *vhe;
1020 
1021 	KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
1022 
1023 	SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
1024 		if (vhe->state == MASTER)
1025 			carp_send_ad(vhe);
1026 	}
1027 }
1028 
1029 void
1030 carp_send_ad(void *v)
1031 {
1032 	struct carp_header ch;
1033 	struct timeval tv;
1034 	struct carp_vhost_entry *vhe = v;
1035 	struct carp_softc *sc = vhe->parent_sc;
1036 	struct carp_header *ch_ptr;
1037 
1038 	struct mbuf *m;
1039 	int error, len, advbase, advskew, s;
1040 	struct ifaddr *ifa;
1041 	struct sockaddr sa;
1042 
1043 	if (sc->sc_carpdev == NULL) {
1044 		sc->sc_if.if_oerrors++;
1045 		return;
1046 	}
1047 
1048 	s = splsoftnet();
1049 
1050 	/* bow out if we've gone to backup (the carp interface is going down) */
1051 	if (sc->sc_bow_out) {
1052 		advbase = 255;
1053 		advskew = 255;
1054 	} else {
1055 		advbase = sc->sc_advbase;
1056 		advskew = vhe->advskew;
1057 		tv.tv_sec = advbase;
1058 		if (advbase == 0 && advskew == 0)
1059 			tv.tv_usec = 1 * 1000000 / 256;
1060 		else
1061 			tv.tv_usec = advskew * 1000000 / 256;
1062 	}
1063 
1064 	ch.carp_version = CARP_VERSION;
1065 	ch.carp_type = CARP_ADVERTISEMENT;
1066 	ch.carp_vhid = vhe->vhid;
1067 	ch.carp_demote = carp_group_demote_count(sc) & 0xff;
1068 	ch.carp_advbase = advbase;
1069 	ch.carp_advskew = advskew;
1070 	ch.carp_authlen = 7;	/* XXX DEFINE */
1071 	ch.carp_cksum = 0;
1072 
1073 	sc->cur_vhe = vhe; /* we need the vhe later on the output path */
1074 
1075 	if (sc->sc_naddrs) {
1076 		struct ip *ip;
1077 
1078 		MGETHDR(m, M_DONTWAIT, MT_HEADER);
1079 		if (m == NULL) {
1080 			sc->sc_if.if_oerrors++;
1081 			carpstats.carps_onomem++;
1082 			/* XXX maybe less ? */
1083 			goto retry_later;
1084 		}
1085 		len = sizeof(*ip) + sizeof(ch);
1086 		m->m_pkthdr.len = len;
1087 		m->m_pkthdr.ph_ifidx = 0;
1088 		m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain;
1089 		m->m_pkthdr.pf.prio = CARP_IFQ_PRIO;
1090 		m->m_len = len;
1091 		MH_ALIGN(m, m->m_len);
1092 		ip = mtod(m, struct ip *);
1093 		ip->ip_v = IPVERSION;
1094 		ip->ip_hl = sizeof(*ip) >> 2;
1095 		ip->ip_tos = IPTOS_LOWDELAY;
1096 		ip->ip_len = htons(len);
1097 		ip->ip_id = htons(ip_randomid());
1098 		ip->ip_off = htons(IP_DF);
1099 		ip->ip_ttl = CARP_DFLTTL;
1100 		ip->ip_p = IPPROTO_CARP;
1101 		ip->ip_sum = 0;
1102 
1103 		memset(&sa, 0, sizeof(sa));
1104 		sa.sa_family = AF_INET;
1105 		/* Prefer addresses on the parent interface as source for AD. */
1106 		ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
1107 		if (ifa == NULL)
1108 			ifa = ifaof_ifpforaddr(&sa, &sc->sc_if);
1109 		KASSERT(ifa != NULL);
1110 		ip->ip_src.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
1111 		ip->ip_dst.s_addr = sc->sc_peer.s_addr;
1112 		if (IN_MULTICAST(ip->ip_dst.s_addr))
1113 			m->m_flags |= M_MCAST;
1114 
1115 		ch_ptr = (struct carp_header *)(ip + 1);
1116 		bcopy(&ch, ch_ptr, sizeof(ch));
1117 		if (carp_prepare_ad(m, vhe, ch_ptr))
1118 			goto retry_later;
1119 
1120 		m->m_data += sizeof(*ip);
1121 		ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip));
1122 		m->m_data -= sizeof(*ip);
1123 
1124 		getmicrotime(&sc->sc_if.if_lastchange);
1125 		sc->sc_if.if_opackets++;
1126 		sc->sc_if.if_obytes += len;
1127 		carpstats.carps_opackets++;
1128 
1129 		error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo,
1130 		    NULL, 0);
1131 		if (error) {
1132 			if (error == ENOBUFS)
1133 				carpstats.carps_onomem++;
1134 			else
1135 				CARP_LOG(LOG_WARNING, sc,
1136 				    ("ip_output failed: %d", error));
1137 			sc->sc_if.if_oerrors++;
1138 			if (sc->sc_sendad_errors < INT_MAX)
1139 				sc->sc_sendad_errors++;
1140 			if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS(sc))
1141 				carp_group_demote_adj(&sc->sc_if, 1,
1142 				    "> snderrors");
1143 			sc->sc_sendad_success = 0;
1144 		} else {
1145 			if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS(sc)) {
1146 				if (++sc->sc_sendad_success >=
1147 				    CARP_SENDAD_MIN_SUCCESS(sc)) {
1148 					carp_group_demote_adj(&sc->sc_if, -1,
1149 					    "< snderrors");
1150 					sc->sc_sendad_errors = 0;
1151 				}
1152 			} else
1153 				sc->sc_sendad_errors = 0;
1154 		}
1155 		if (vhe->vhe_leader) {
1156 			if (sc->sc_delayed_arp > 0)
1157 				sc->sc_delayed_arp--;
1158 			if (sc->sc_delayed_arp == 0) {
1159 				carp_send_arp(sc);
1160 				sc->sc_delayed_arp = -1;
1161 			}
1162 		}
1163 	}
1164 #ifdef INET6
1165 	if (sc->sc_naddrs6) {
1166 		struct ip6_hdr *ip6;
1167 
1168 		MGETHDR(m, M_DONTWAIT, MT_HEADER);
1169 		if (m == NULL) {
1170 			sc->sc_if.if_oerrors++;
1171 			carpstats.carps_onomem++;
1172 			/* XXX maybe less ? */
1173 			goto retry_later;
1174 		}
1175 		len = sizeof(*ip6) + sizeof(ch);
1176 		m->m_pkthdr.len = len;
1177 		m->m_pkthdr.ph_ifidx = 0;
1178 		m->m_pkthdr.pf.prio = CARP_IFQ_PRIO;
1179 		m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain;
1180 		m->m_len = len;
1181 		MH_ALIGN(m, m->m_len);
1182 		m->m_flags |= M_MCAST;
1183 		ip6 = mtod(m, struct ip6_hdr *);
1184 		memset(ip6, 0, sizeof(*ip6));
1185 		ip6->ip6_vfc |= IPV6_VERSION;
1186 		ip6->ip6_hlim = CARP_DFLTTL;
1187 		ip6->ip6_nxt = IPPROTO_CARP;
1188 
1189 		/* set the source address */
1190 		memset(&sa, 0, sizeof(sa));
1191 		sa.sa_family = AF_INET6;
1192 		/* Prefer addresses on the parent interface as source for AD. */
1193 		ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
1194 		if (ifa == NULL)
1195 			ifa = ifaof_ifpforaddr(&sa, &sc->sc_if);
1196 		KASSERT(ifa != NULL);
1197 		bcopy(ifatoia6(ifa)->ia_addr.sin6_addr.s6_addr,
1198 		    &ip6->ip6_src, sizeof(struct in6_addr));
1199 		/* set the multicast destination */
1200 
1201 		ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
1202 		ip6->ip6_dst.s6_addr16[1] = htons(sc->sc_carpdev->if_index);
1203 		ip6->ip6_dst.s6_addr8[15] = 0x12;
1204 
1205 		ch_ptr = (struct carp_header *)(ip6 + 1);
1206 		bcopy(&ch, ch_ptr, sizeof(ch));
1207 		if (carp_prepare_ad(m, vhe, ch_ptr))
1208 			goto retry_later;
1209 
1210 		m->m_data += sizeof(*ip6);
1211 		ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip6));
1212 		m->m_data -= sizeof(*ip6);
1213 
1214 		getmicrotime(&sc->sc_if.if_lastchange);
1215 		sc->sc_if.if_opackets++;
1216 		sc->sc_if.if_obytes += len;
1217 		carpstats.carps_opackets6++;
1218 
1219 		error = ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL);
1220 		if (error) {
1221 			if (error == ENOBUFS)
1222 				carpstats.carps_onomem++;
1223 			else
1224 				CARP_LOG(LOG_WARNING, sc,
1225 				    ("ip6_output failed: %d", error));
1226 			sc->sc_if.if_oerrors++;
1227 			if (sc->sc_sendad_errors < INT_MAX)
1228 				sc->sc_sendad_errors++;
1229 			if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS(sc))
1230 				carp_group_demote_adj(&sc->sc_if, 1,
1231 					    "> snd6errors");
1232 			sc->sc_sendad_success = 0;
1233 		} else {
1234 			if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS(sc)) {
1235 				if (++sc->sc_sendad_success >=
1236 				    CARP_SENDAD_MIN_SUCCESS(sc)) {
1237 					carp_group_demote_adj(&sc->sc_if, -1,
1238 					    "< snd6errors");
1239 					sc->sc_sendad_errors = 0;
1240 				}
1241 			} else
1242 				sc->sc_sendad_errors = 0;
1243 		}
1244 	}
1245 #endif /* INET6 */
1246 
1247 retry_later:
1248 	sc->cur_vhe = NULL;
1249 	splx(s);
1250 	if (advbase != 255 || advskew != 255)
1251 		timeout_add(&vhe->ad_tmo, tvtohz(&tv));
1252 }
1253 
1254 /*
1255  * Broadcast a gratuitous ARP request containing
1256  * the virtual router MAC address for each IP address
1257  * associated with the virtual router.
1258  */
1259 void
1260 carp_send_arp(struct carp_softc *sc)
1261 {
1262 	struct ifaddr *ifa;
1263 	in_addr_t in;
1264 	int s = splsoftnet();
1265 
1266 	TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
1267 
1268 		if (ifa->ifa_addr->sa_family != AF_INET)
1269 			continue;
1270 
1271 		in = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
1272 		arprequest(&sc->sc_if, &in, &in, sc->sc_ac.ac_enaddr);
1273 		DELAY(1000);	/* XXX */
1274 	}
1275 	splx(s);
1276 }
1277 
1278 #ifdef INET6
1279 void
1280 carp_send_na(struct carp_softc *sc)
1281 {
1282 	struct ifaddr *ifa;
1283 	struct in6_addr *in6;
1284 	static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
1285 	int s = splsoftnet();
1286 
1287 	TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
1288 
1289 		if (ifa->ifa_addr->sa_family != AF_INET6)
1290 			continue;
1291 
1292 		in6 = &ifatoia6(ifa)->ia_addr.sin6_addr;
1293 		nd6_na_output(&sc->sc_if, &mcast, in6,
1294 		    ND_NA_FLAG_OVERRIDE |
1295 		    (ip6_forwarding ? ND_NA_FLAG_ROUTER : 0), 1, NULL);
1296 		DELAY(1000);	/* XXX */
1297 	}
1298 	splx(s);
1299 }
1300 #endif /* INET6 */
1301 
1302 void
1303 carp_update_lsmask(struct carp_softc *sc)
1304 {
1305 	struct carp_vhost_entry *vhe;
1306 	int count;
1307 
1308 	if (sc->sc_balancing == CARP_BAL_NONE)
1309 		return;
1310 
1311 	sc->sc_lsmask = 0;
1312 	count = 0;
1313 
1314 	KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
1315 	SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
1316 		if (vhe->state == MASTER && count < sizeof(sc->sc_lsmask) * 8)
1317 			sc->sc_lsmask |= 1 << count;
1318 		count++;
1319 	}
1320 	sc->sc_lscount = count;
1321 	CARP_LOG(LOG_DEBUG, sc, ("carp_update_lsmask: %x", sc->sc_lsmask));
1322 }
1323 
1324 int
1325 carp_iamatch(struct ifnet *ifp)
1326 {
1327 	struct carp_softc *sc = ifp->if_softc;
1328 	struct carp_vhost_entry *vhe;
1329 	struct srp_ref sr;
1330 	int match = 0;
1331 
1332 	vhe = SRPL_ENTER(&sr, &sc->carp_vhosts); /* head */
1333 	if (vhe->state == MASTER)
1334 		match = 1;
1335 	SRPL_LEAVE(&sr);
1336 
1337 	return (match);
1338 }
1339 
1340 #ifdef INET6
1341 int
1342 carp_iamatch6(struct ifnet *ifp)
1343 {
1344 	struct carp_softc *sc = ifp->if_softc;
1345 	struct carp_vhost_entry *vhe = SRPL_FIRST_LOCKED(&sc->carp_vhosts);
1346 
1347 	KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
1348 
1349 	if (vhe->state == MASTER)
1350 		return (1);
1351 
1352 	return (0);
1353 }
1354 #endif /* INET6 */
1355 
1356 struct ifnet *
1357 carp_ourether(void *v, u_int8_t *ena)
1358 {
1359 	struct carp_if *cif = (struct carp_if *)v;
1360 	struct carp_softc *vh;
1361 
1362 	KERNEL_ASSERT_LOCKED(); /* touching vhif_vrs + carp_vhosts */
1363 
1364 	SRPL_FOREACH_LOCKED(vh, &cif->vhif_vrs, sc_list) {
1365 		struct carp_vhost_entry *vhe;
1366 		if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) !=
1367 		    (IFF_UP|IFF_RUNNING))
1368 			continue;
1369 		vhe = SRPL_FIRST_LOCKED(&vh->carp_vhosts);
1370 		if ((vhe->state == MASTER || vh->sc_balancing >= CARP_BAL_IP) &&
1371 		    !memcmp(ena, vh->sc_ac.ac_enaddr, ETHER_ADDR_LEN))
1372 			return (&vh->sc_if);
1373 	}
1374 	return (NULL);
1375 }
1376 
1377 int
1378 carp_vhe_match(struct carp_softc *sc, uint8_t *ena)
1379 {
1380 	struct carp_vhost_entry *vhe;
1381 	struct srp_ref sr;
1382 	int match = 0;
1383 
1384 	vhe = SRPL_ENTER(&sr, &sc->carp_vhosts); /* head */
1385 	match = (vhe->state == MASTER || sc->sc_balancing >= CARP_BAL_IP) &&
1386 	    !memcmp(ena, sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN);
1387 	SRPL_LEAVE(&sr);
1388 
1389 	return (match);
1390 }
1391 
1392 int
1393 carp_input(struct ifnet *ifp0, struct mbuf *m, void *cookie)
1394 {
1395 	struct ether_header *eh;
1396 	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
1397 	struct carp_if *cif;
1398 	struct carp_softc *sc;
1399 	struct srp_ref sr;
1400 
1401 #if NVLAN > 0
1402 	/*
1403 	 * If the underlying interface removed the VLAN header itself,
1404 	 * it's not for us.
1405 	 */
1406 	if (ISSET(m->m_flags, M_VLANTAG))
1407 		return (0);
1408 #endif
1409 
1410 	eh = mtod(m, struct ether_header *);
1411 	cif = (struct carp_if *)cookie;
1412 	KASSERT(cif == (struct carp_if *)ifp0->if_carp);
1413 
1414 	SRPL_FOREACH(sc, &sr, &cif->vhif_vrs, sc_list) {
1415 		if ((sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) !=
1416 		    (IFF_UP|IFF_RUNNING))
1417 			continue;
1418 
1419 		if (carp_vhe_match(sc, eh->ether_dhost))
1420 			break;
1421 	}
1422 
1423 	if (sc == NULL) {
1424 		SRPL_LEAVE(&sr);
1425 
1426 		if (!ETHER_IS_MULTICAST(eh->ether_dhost))
1427 			return (0);
1428 
1429 		/*
1430 		 * XXX Should really check the list of multicast addresses
1431 		 * for each CARP interface _before_ copying.
1432 		 */
1433 		SRPL_FOREACH(sc, &sr, &cif->vhif_vrs, sc_list) {
1434 			struct mbuf *m0;
1435 
1436 			if (!(sc->sc_if.if_flags & IFF_UP))
1437 				continue;
1438 
1439 			m0 = m_dup_pkt(m, ETHER_ALIGN, M_DONTWAIT);
1440 			if (m0 == NULL)
1441 				continue;
1442 
1443 			ml_init(&ml);
1444 			ml_enqueue(&ml, m0);
1445 
1446 			if_input(&sc->sc_if, &ml);
1447 		}
1448 		SRPL_LEAVE(&sr);
1449 
1450 		return (0);
1451 	}
1452 
1453 	/*
1454 	 * Clear mcast if received on a carp IP balanced address.
1455 	 */
1456 	if (sc->sc_balancing == CARP_BAL_IP &&
1457 	    ETHER_IS_MULTICAST(eh->ether_dhost))
1458 		*(eh->ether_dhost) &= ~0x01;
1459 
1460 	ml_enqueue(&ml, m);
1461 	if_input(&sc->sc_if, &ml);
1462 	SRPL_LEAVE(&sr);
1463 
1464 	return (1);
1465 }
1466 
1467 int
1468 carp_lsdrop(struct mbuf *m, sa_family_t af, u_int32_t *src, u_int32_t *dst)
1469 {
1470 	struct ifnet *ifp;
1471 	struct carp_softc *sc;
1472 	int match = 1;
1473 	u_int32_t fold;
1474 
1475 	ifp = if_get(m->m_pkthdr.ph_ifidx);
1476 	KASSERT(ifp != NULL);
1477 
1478 	sc = ifp->if_softc;
1479 	if (sc->sc_balancing == CARP_BAL_NONE)
1480 		goto done;
1481 	/*
1482 	 * Never drop carp advertisements.
1483 	 * XXX Bad idea to pass all broadcast / multicast traffic?
1484 	 */
1485 	if (m->m_flags & (M_BCAST|M_MCAST))
1486 		goto done;
1487 
1488 	fold = src[0] ^ dst[0];
1489 #ifdef INET6
1490 	if (af == AF_INET6) {
1491 		int i;
1492 		for (i = 1; i < 4; i++)
1493 			fold ^= src[i] ^ dst[i];
1494 	}
1495 #endif
1496 	if (sc->sc_lscount == 0) /* just to be safe */
1497 		match = 0;
1498 	else
1499 		match = (1 << (ntohl(fold) % sc->sc_lscount)) & sc->sc_lsmask;
1500 
1501 done:
1502 	if_put(ifp);
1503 	return (!match);
1504 }
1505 
1506 void
1507 carp_master_down(void *v)
1508 {
1509 	struct carp_vhost_entry *vhe = v;
1510 	struct carp_softc *sc = vhe->parent_sc;
1511 
1512 	switch (vhe->state) {
1513 	case INIT:
1514 		printf("%s: master_down event in INIT state\n",
1515 		    sc->sc_if.if_xname);
1516 		break;
1517 	case MASTER:
1518 		break;
1519 	case BACKUP:
1520 		carp_set_state(vhe, MASTER);
1521 		carp_send_ad(vhe);
1522 		if (sc->sc_balancing == CARP_BAL_NONE && vhe->vhe_leader) {
1523 			carp_send_arp(sc);
1524 			/* Schedule a delayed ARP to deal w/ some L3 switches */
1525 			sc->sc_delayed_arp = 2;
1526 #ifdef INET6
1527 			carp_send_na(sc);
1528 #endif /* INET6 */
1529 		}
1530 		carp_setrun(vhe, 0);
1531 		carpstats.carps_preempt++;
1532 		break;
1533 	}
1534 }
1535 
1536 void
1537 carp_setrun_all(struct carp_softc *sc, sa_family_t af)
1538 {
1539 	struct carp_vhost_entry *vhe;
1540 
1541 	KERNEL_ASSERT_LOCKED(); /* touching carp_vhost */
1542 	SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
1543 		carp_setrun(vhe, af);
1544 	}
1545 }
1546 
1547 /*
1548  * When in backup state, af indicates whether to reset the master down timer
1549  * for v4 or v6. If it's set to zero, reset the ones which are already pending.
1550  */
1551 void
1552 carp_setrun(struct carp_vhost_entry *vhe, sa_family_t af)
1553 {
1554 	struct timeval tv;
1555 	struct carp_softc *sc = vhe->parent_sc;
1556 
1557 	if (sc->sc_carpdev == NULL) {
1558 		sc->sc_if.if_flags &= ~IFF_RUNNING;
1559 		carp_set_state_all(sc, INIT);
1560 		return;
1561 	}
1562 
1563 	if (memcmp(((struct arpcom *)sc->sc_carpdev)->ac_enaddr,
1564 	    sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN) == 0)
1565 		sc->sc_realmac = 1;
1566 	else
1567 		sc->sc_realmac = 0;
1568 
1569 	if (sc->sc_if.if_flags & IFF_UP && vhe->vhid > 0 &&
1570 	    (sc->sc_naddrs || sc->sc_naddrs6) && !sc->sc_suppress) {
1571 		sc->sc_if.if_flags |= IFF_RUNNING;
1572 	} else {
1573 		sc->sc_if.if_flags &= ~IFF_RUNNING;
1574 		return;
1575 	}
1576 
1577 	switch (vhe->state) {
1578 	case INIT:
1579 		carp_set_state(vhe, BACKUP);
1580 		carp_setrun(vhe, 0);
1581 		break;
1582 	case BACKUP:
1583 		timeout_del(&vhe->ad_tmo);
1584 		tv.tv_sec = 3 * sc->sc_advbase;
1585 		if (sc->sc_advbase == 0 && vhe->advskew == 0)
1586 			tv.tv_usec = 3 * 1000000 / 256;
1587 		else if (sc->sc_advbase == 0)
1588 			tv.tv_usec = 3 * vhe->advskew * 1000000 / 256;
1589 		else
1590 			tv.tv_usec = vhe->advskew * 1000000 / 256;
1591 		if (vhe->vhe_leader)
1592 			sc->sc_delayed_arp = -1;
1593 		switch (af) {
1594 		case AF_INET:
1595 			timeout_add(&vhe->md_tmo, tvtohz(&tv));
1596 			break;
1597 #ifdef INET6
1598 		case AF_INET6:
1599 			timeout_add(&vhe->md6_tmo, tvtohz(&tv));
1600 			break;
1601 #endif /* INET6 */
1602 		default:
1603 			if (sc->sc_naddrs)
1604 				timeout_add(&vhe->md_tmo, tvtohz(&tv));
1605 			if (sc->sc_naddrs6)
1606 				timeout_add(&vhe->md6_tmo, tvtohz(&tv));
1607 			break;
1608 		}
1609 		break;
1610 	case MASTER:
1611 		tv.tv_sec = sc->sc_advbase;
1612 		if (sc->sc_advbase == 0 && vhe->advskew == 0)
1613 			tv.tv_usec = 1 * 1000000 / 256;
1614 		else
1615 			tv.tv_usec = vhe->advskew * 1000000 / 256;
1616 		timeout_add(&vhe->ad_tmo, tvtohz(&tv));
1617 		break;
1618 	}
1619 }
1620 
1621 void
1622 carp_multicast_cleanup(struct carp_softc *sc)
1623 {
1624 	struct ip_moptions *imo = &sc->sc_imo;
1625 #ifdef INET6
1626 	struct ip6_moptions *im6o = &sc->sc_im6o;
1627 #endif
1628 	u_int16_t n = imo->imo_num_memberships;
1629 
1630 	/* Clean up our own multicast memberships */
1631 	while (n-- > 0) {
1632 		if (imo->imo_membership[n] != NULL) {
1633 			in_delmulti(imo->imo_membership[n]);
1634 			imo->imo_membership[n] = NULL;
1635 		}
1636 	}
1637 	imo->imo_num_memberships = 0;
1638 	imo->imo_ifidx = 0;
1639 
1640 #ifdef INET6
1641 	while (!LIST_EMPTY(&im6o->im6o_memberships)) {
1642 		struct in6_multi_mship *imm =
1643 		    LIST_FIRST(&im6o->im6o_memberships);
1644 
1645 		LIST_REMOVE(imm, i6mm_chain);
1646 		in6_leavegroup(imm);
1647 	}
1648 	im6o->im6o_ifidx = 0;
1649 #endif
1650 
1651 	/* And any other multicast memberships */
1652 	carp_ether_purgemulti(sc);
1653 }
1654 
1655 int
1656 carp_set_ifp(struct carp_softc *sc, struct ifnet *ifp0)
1657 {
1658 	struct carp_if *cif, *ncif = NULL;
1659 	struct carp_softc *vr, *last = NULL, *after = NULL;
1660 	int myself = 0, error = 0;
1661 	int s;
1662 
1663 	KASSERT(ifp0 != sc->sc_carpdev);
1664 	KERNEL_ASSERT_LOCKED(); /* touching vhif_vrs */
1665 
1666 	if ((ifp0->if_flags & IFF_MULTICAST) == 0)
1667 		return (EADDRNOTAVAIL);
1668 
1669 	if (ifp0->if_type == IFT_CARP)
1670 		return (EINVAL);
1671 
1672 	if (ifp0->if_carp == NULL) {
1673 		ncif = malloc(sizeof(*cif), M_IFADDR, M_NOWAIT|M_ZERO);
1674 		if (ncif == NULL)
1675 			return (ENOBUFS);
1676 		if ((error = ifpromisc(ifp0, 1))) {
1677 			free(ncif, M_IFADDR, sizeof(*ncif));
1678 			return (error);
1679 		}
1680 
1681 		SRPL_INIT(&ncif->vhif_vrs);
1682 	} else {
1683 		cif = (struct carp_if *)ifp0->if_carp;
1684 		if (carp_check_dup_vhids(sc, cif, NULL))
1685 			return (EINVAL);
1686 	}
1687 
1688 	/* detach from old interface */
1689 	if (sc->sc_carpdev != NULL)
1690 		carpdetach(sc);
1691 
1692 	/* attach carp interface to physical interface */
1693 	if (ncif != NULL)
1694 		ifp0->if_carp = (caddr_t)ncif;
1695 	sc->sc_carpdev = ifp0;
1696 	sc->sc_if.if_capabilities = ifp0->if_capabilities &
1697 	    IFCAP_CSUM_MASK;
1698 	cif = (struct carp_if *)ifp0->if_carp;
1699 	SRPL_FOREACH_LOCKED(vr, &cif->vhif_vrs, sc_list) {
1700 		struct carp_vhost_entry *vrhead, *schead;
1701 		last = vr;
1702 
1703 		if (vr == sc)
1704 			myself = 1;
1705 
1706 		vrhead = SRPL_FIRST_LOCKED(&vr->carp_vhosts);
1707 		schead = SRPL_FIRST_LOCKED(&sc->carp_vhosts);
1708 		if (vrhead->vhid < schead->vhid)
1709 			after = vr;
1710 	}
1711 
1712 	if (!myself) {
1713 		/* We're trying to keep things in order */
1714 		if (last == NULL) {
1715 			SRPL_INSERT_HEAD_LOCKED(&carp_sc_rc, &cif->vhif_vrs,
1716 			    sc, sc_list);
1717 		} else if (after == NULL) {
1718 			SRPL_INSERT_AFTER_LOCKED(&carp_sc_rc, last,
1719 			    sc, sc_list);
1720 		} else {
1721 			SRPL_INSERT_AFTER_LOCKED(&carp_sc_rc, after,
1722 			    sc, sc_list);
1723 		}
1724 	}
1725 	if (sc->sc_naddrs || sc->sc_naddrs6)
1726 		sc->sc_if.if_flags |= IFF_UP;
1727 	carp_set_enaddr(sc);
1728 
1729 	sc->lh_cookie = hook_establish(ifp0->if_linkstatehooks, 1,
1730 	    carp_carpdev_state, ifp0);
1731 
1732 	/* Change input handler of the physical interface. */
1733 	if_ih_insert(ifp0, carp_input, cif);
1734 
1735 	s = splnet();
1736 	carp_carpdev_state(ifp0);
1737 	splx(s);
1738 
1739 	return (0);
1740 }
1741 
1742 void
1743 carp_set_vhe_enaddr(struct carp_vhost_entry *vhe)
1744 {
1745 	struct carp_softc *sc = vhe->parent_sc;
1746 
1747 	if (vhe->vhid != 0 && sc->sc_carpdev) {
1748 		if (vhe->vhe_leader && sc->sc_balancing == CARP_BAL_IP)
1749 			vhe->vhe_enaddr[0] = 1;
1750 		else
1751 			vhe->vhe_enaddr[0] = 0;
1752 		vhe->vhe_enaddr[1] = 0;
1753 		vhe->vhe_enaddr[2] = 0x5e;
1754 		vhe->vhe_enaddr[3] = 0;
1755 		vhe->vhe_enaddr[4] = 1;
1756 		vhe->vhe_enaddr[5] = vhe->vhid;
1757 	} else
1758 		memset(vhe->vhe_enaddr, 0, ETHER_ADDR_LEN);
1759 }
1760 
1761 void
1762 carp_set_enaddr(struct carp_softc *sc)
1763 {
1764 	struct carp_vhost_entry *vhe;
1765 
1766 	KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
1767 	SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries)
1768 		carp_set_vhe_enaddr(vhe);
1769 
1770 	vhe = SRPL_FIRST_LOCKED(&sc->carp_vhosts);
1771 
1772 	/*
1773 	 * Use the carp lladdr if the running one isn't manually set.
1774 	 * Only compare static parts of the lladdr.
1775 	 */
1776 	if ((memcmp(sc->sc_ac.ac_enaddr + 1, vhe->vhe_enaddr + 1,
1777 	    ETHER_ADDR_LEN - 2) == 0) ||
1778 	    (!sc->sc_ac.ac_enaddr[0] && !sc->sc_ac.ac_enaddr[1] &&
1779 	    !sc->sc_ac.ac_enaddr[2] && !sc->sc_ac.ac_enaddr[3] &&
1780 	    !sc->sc_ac.ac_enaddr[4] && !sc->sc_ac.ac_enaddr[5]))
1781 		bcopy(vhe->vhe_enaddr, sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN);
1782 
1783 	/* Make sure the enaddr has changed before further twiddling. */
1784 	if (memcmp(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN) != 0) {
1785 		bcopy(sc->sc_ac.ac_enaddr, LLADDR(sc->sc_if.if_sadl),
1786 		    ETHER_ADDR_LEN);
1787 		bcopy(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN);
1788 #ifdef INET6
1789 		/*
1790 		 * (re)attach a link-local address which matches
1791 		 * our new MAC address.
1792 		 */
1793 		if (sc->sc_naddrs6)
1794 			in6_ifattach_linklocal(&sc->sc_if, NULL);
1795 #endif
1796 		carp_set_state_all(sc, INIT);
1797 		carp_setrun_all(sc, 0);
1798 	}
1799 }
1800 
1801 void
1802 carp_addr_updated(void *v)
1803 {
1804 	struct carp_softc *sc = (struct carp_softc *) v;
1805 	struct ifaddr *ifa;
1806 	int new_naddrs = 0, new_naddrs6 = 0;
1807 
1808 	TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
1809 		if (ifa->ifa_addr->sa_family == AF_INET)
1810 			new_naddrs++;
1811 #ifdef INET6
1812 		else if (ifa->ifa_addr->sa_family == AF_INET6)
1813 			new_naddrs6++;
1814 #endif /* INET6 */
1815 	}
1816 
1817 	/* We received address changes from if_addrhooks callback */
1818 	if (new_naddrs != sc->sc_naddrs || new_naddrs6 != sc->sc_naddrs6) {
1819 
1820 		sc->sc_naddrs = new_naddrs;
1821 		sc->sc_naddrs6 = new_naddrs6;
1822 
1823 		/* Re-establish multicast membership removed by in_control */
1824 		if (IN_MULTICAST(sc->sc_peer.s_addr)) {
1825 			if (!in_hasmulti(&sc->sc_peer, &sc->sc_if)) {
1826 				struct in_multi **imm =
1827 				    sc->sc_imo.imo_membership;
1828 				u_int16_t maxmem =
1829 				    sc->sc_imo.imo_max_memberships;
1830 
1831 				memset(&sc->sc_imo, 0, sizeof(sc->sc_imo));
1832 				sc->sc_imo.imo_membership = imm;
1833 				sc->sc_imo.imo_max_memberships = maxmem;
1834 
1835 				if (sc->sc_carpdev != NULL && sc->sc_naddrs > 0)
1836 					carp_join_multicast(sc);
1837 			}
1838 		}
1839 
1840 		if (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) {
1841 			sc->sc_if.if_flags &= ~IFF_UP;
1842 			carp_set_state_all(sc, INIT);
1843 		} else
1844 			carp_hmac_prepare(sc);
1845 	}
1846 
1847 	carp_setrun_all(sc, 0);
1848 }
1849 
1850 int
1851 carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin)
1852 {
1853 	struct in_addr *in = &sin->sin_addr;
1854 	int error;
1855 
1856 	KASSERT(sc->sc_carpdev != NULL);
1857 
1858 	/* XXX is this necessary? */
1859 	if (in->s_addr == INADDR_ANY) {
1860 		carp_setrun_all(sc, 0);
1861 		return (0);
1862 	}
1863 
1864 	if (sc->sc_naddrs == 0 && (error = carp_join_multicast(sc)) != 0)
1865 		return (error);
1866 
1867 	carp_set_state_all(sc, INIT);
1868 
1869 	return (0);
1870 }
1871 
1872 int
1873 carp_join_multicast(struct carp_softc *sc)
1874 {
1875 	struct ip_moptions *imo = &sc->sc_imo;
1876 	struct in_multi *imm;
1877 	struct in_addr addr;
1878 
1879 	if (!IN_MULTICAST(sc->sc_peer.s_addr))
1880 		return (0);
1881 
1882 	addr.s_addr = sc->sc_peer.s_addr;
1883 	if ((imm = in_addmulti(&addr, &sc->sc_if)) == NULL)
1884 		return (ENOBUFS);
1885 
1886 	imo->imo_membership[0] = imm;
1887 	imo->imo_num_memberships = 1;
1888 	imo->imo_ifidx = sc->sc_if.if_index;
1889 	imo->imo_ttl = CARP_DFLTTL;
1890 	imo->imo_loop = 0;
1891 	return (0);
1892 }
1893 
1894 
1895 #ifdef INET6
1896 int
1897 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
1898 {
1899 	int error;
1900 
1901 	KASSERT(sc->sc_carpdev != NULL);
1902 
1903 	if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
1904 		carp_setrun_all(sc, 0);
1905 		return (0);
1906 	}
1907 
1908 	if (sc->sc_naddrs6 == 0 && (error = carp_join_multicast6(sc)) != 0)
1909 		return (error);
1910 
1911 	carp_set_state_all(sc, INIT);
1912 
1913 	return (0);
1914 }
1915 
1916 int
1917 carp_join_multicast6(struct carp_softc *sc)
1918 {
1919 	struct in6_multi_mship *imm, *imm2;
1920 	struct ip6_moptions *im6o = &sc->sc_im6o;
1921 	struct sockaddr_in6 addr6;
1922 	int error;
1923 
1924 	/* Join IPv6 CARP multicast group */
1925 	memset(&addr6, 0, sizeof(addr6));
1926 	addr6.sin6_family = AF_INET6;
1927 	addr6.sin6_len = sizeof(addr6);
1928 	addr6.sin6_addr.s6_addr16[0] = htons(0xff02);
1929 	addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index);
1930 	addr6.sin6_addr.s6_addr8[15] = 0x12;
1931 	if ((imm = in6_joingroup(&sc->sc_if,
1932 	    &addr6.sin6_addr, &error)) == NULL) {
1933 		return (error);
1934 	}
1935 	/* join solicited multicast address */
1936 	memset(&addr6.sin6_addr, 0, sizeof(addr6.sin6_addr));
1937 	addr6.sin6_addr.s6_addr16[0] = htons(0xff02);
1938 	addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index);
1939 	addr6.sin6_addr.s6_addr32[1] = 0;
1940 	addr6.sin6_addr.s6_addr32[2] = htonl(1);
1941 	addr6.sin6_addr.s6_addr32[3] = 0;
1942 	addr6.sin6_addr.s6_addr8[12] = 0xff;
1943 	if ((imm2 = in6_joingroup(&sc->sc_if,
1944 	    &addr6.sin6_addr, &error)) == NULL) {
1945 		in6_leavegroup(imm);
1946 		return (error);
1947 	}
1948 
1949 	/* apply v6 multicast membership */
1950 	im6o->im6o_ifidx = sc->sc_if.if_index;
1951 	if (imm)
1952 		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm,
1953 		    i6mm_chain);
1954 	if (imm2)
1955 		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm2,
1956 		    i6mm_chain);
1957 
1958 	return (0);
1959 }
1960 
1961 #endif /* INET6 */
1962 
1963 int
1964 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr)
1965 {
1966 	struct proc *p = curproc;	/* XXX */
1967 	struct carp_softc *sc = ifp->if_softc;
1968 	struct carp_vhost_entry *vhe;
1969 	struct carpreq carpr;
1970 	struct ifaddr *ifa = (struct ifaddr *)addr;
1971 	struct ifreq *ifr = (struct ifreq *)addr;
1972 	struct ifnet *ifp0 = sc->sc_carpdev;
1973 	int i, error = 0;
1974 
1975 	switch (cmd) {
1976 	case SIOCSIFADDR:
1977 		if (ifp0 == NULL)
1978 			return (EINVAL);
1979 
1980 		switch (ifa->ifa_addr->sa_family) {
1981 		case AF_INET:
1982 			sc->sc_if.if_flags |= IFF_UP;
1983 			error = carp_set_addr(sc, satosin(ifa->ifa_addr));
1984 			break;
1985 #ifdef INET6
1986 		case AF_INET6:
1987 			sc->sc_if.if_flags |= IFF_UP;
1988 			error = carp_set_addr6(sc, satosin6(ifa->ifa_addr));
1989 			break;
1990 #endif /* INET6 */
1991 		default:
1992 			error = EAFNOSUPPORT;
1993 			break;
1994 		}
1995 		break;
1996 
1997 	case SIOCSIFFLAGS:
1998 		KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
1999 		vhe = SRPL_FIRST_LOCKED(&sc->carp_vhosts);
2000 		if (vhe->state != INIT && !(ifr->ifr_flags & IFF_UP)) {
2001 			carp_del_all_timeouts(sc);
2002 
2003 			/* we need the interface up to bow out */
2004 			sc->sc_if.if_flags |= IFF_UP;
2005 			sc->sc_bow_out = 1;
2006 			carp_vhe_send_ad_all(sc);
2007 			sc->sc_bow_out = 0;
2008 
2009 			sc->sc_if.if_flags &= ~IFF_UP;
2010 			carp_set_state_all(sc, INIT);
2011 			carp_setrun_all(sc, 0);
2012 		} else if (vhe->state == INIT && (ifr->ifr_flags & IFF_UP)) {
2013 			sc->sc_if.if_flags |= IFF_UP;
2014 			carp_setrun_all(sc, 0);
2015 		}
2016 		break;
2017 
2018 	case SIOCSVH:
2019 		KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
2020 		vhe = SRPL_FIRST_LOCKED(&sc->carp_vhosts);
2021 		if ((error = suser(p, 0)) != 0)
2022 			break;
2023 		if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr)))
2024 			break;
2025 		error = 1;
2026 		if (carpr.carpr_carpdev[0] != '\0' &&
2027 		    (ifp0 = ifunit(carpr.carpr_carpdev)) == NULL)
2028 			return (EINVAL);
2029 		if (carpr.carpr_peer.s_addr == 0)
2030 			sc->sc_peer.s_addr = INADDR_CARP_GROUP;
2031 		else
2032 			sc->sc_peer.s_addr = carpr.carpr_peer.s_addr;
2033 		if (ifp0 != sc->sc_carpdev) {
2034 			if ((error = carp_set_ifp(sc, ifp0)))
2035 				return (error);
2036 		}
2037 		if (vhe->state != INIT && carpr.carpr_state != vhe->state) {
2038 			switch (carpr.carpr_state) {
2039 			case BACKUP:
2040 				timeout_del(&vhe->ad_tmo);
2041 				carp_set_state_all(sc, BACKUP);
2042 				carp_setrun_all(sc, 0);
2043 				break;
2044 			case MASTER:
2045 				KERNEL_ASSERT_LOCKED();
2046 				/* touching carp_vhosts */
2047 				SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts,
2048 				    vhost_entries)
2049 					carp_master_down(vhe);
2050 				break;
2051 			default:
2052 				break;
2053 			}
2054 		}
2055 		if ((error = carp_vhids_ioctl(sc, &carpr)))
2056 			return (error);
2057 		if (carpr.carpr_advbase >= 0) {
2058 			if (carpr.carpr_advbase > 255) {
2059 				error = EINVAL;
2060 				break;
2061 			}
2062 			sc->sc_advbase = carpr.carpr_advbase;
2063 			error--;
2064 		}
2065 		if (memcmp(sc->sc_advskews, carpr.carpr_advskews,
2066 		    sizeof(sc->sc_advskews))) {
2067 			i = 0;
2068 			KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
2069 			SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts,
2070 			    vhost_entries)
2071 				vhe->advskew = carpr.carpr_advskews[i++];
2072 			bcopy(carpr.carpr_advskews, sc->sc_advskews,
2073 			    sizeof(sc->sc_advskews));
2074 		}
2075 		if (sc->sc_balancing != carpr.carpr_balancing) {
2076 			if (carpr.carpr_balancing > CARP_BAL_MAXID) {
2077 				error = EINVAL;
2078 				break;
2079 			}
2080 			sc->sc_balancing = carpr.carpr_balancing;
2081 			carp_set_enaddr(sc);
2082 			carp_update_lsmask(sc);
2083 		}
2084 		bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key));
2085 		if (error > 0)
2086 			error = EINVAL;
2087 		else {
2088 			error = 0;
2089 			carp_hmac_prepare(sc);
2090 			carp_setrun_all(sc, 0);
2091 		}
2092 		break;
2093 
2094 	case SIOCGVH:
2095 		memset(&carpr, 0, sizeof(carpr));
2096 		if (ifp0 != NULL)
2097 			strlcpy(carpr.carpr_carpdev, ifp0->if_xname, IFNAMSIZ);
2098 		i = 0;
2099 		KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
2100 		SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
2101 			carpr.carpr_vhids[i] = vhe->vhid;
2102 			carpr.carpr_advskews[i] = vhe->advskew;
2103 			carpr.carpr_states[i] = vhe->state;
2104 			i++;
2105 		}
2106 		carpr.carpr_advbase = sc->sc_advbase;
2107 		carpr.carpr_balancing = sc->sc_balancing;
2108 		if (suser(p, 0) == 0)
2109 			bcopy(sc->sc_key, carpr.carpr_key,
2110 			    sizeof(carpr.carpr_key));
2111 		carpr.carpr_peer.s_addr = sc->sc_peer.s_addr;
2112 		error = copyout(&carpr, ifr->ifr_data, sizeof(carpr));
2113 		break;
2114 
2115 	case SIOCADDMULTI:
2116 		error = carp_ether_addmulti(sc, ifr);
2117 		break;
2118 
2119 	case SIOCDELMULTI:
2120 		error = carp_ether_delmulti(sc, ifr);
2121 		break;
2122 	case SIOCAIFGROUP:
2123 	case SIOCDIFGROUP:
2124 		if (sc->sc_demote_cnt)
2125 			carp_ifgroup_ioctl(ifp, cmd, addr);
2126 		break;
2127 	case SIOCSIFGATTR:
2128 		carp_ifgattr_ioctl(ifp, cmd, addr);
2129 		break;
2130 	default:
2131 		error = ENOTTY;
2132 	}
2133 
2134 	if (memcmp(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN) != 0)
2135 		carp_set_enaddr(sc);
2136 	return (error);
2137 }
2138 
2139 int
2140 carp_check_dup_vhids(struct carp_softc *sc, struct carp_if *cif,
2141     struct carpreq *carpr)
2142 {
2143 	struct carp_softc *vr;
2144 	struct carp_vhost_entry *vhe, *vhe0;
2145 	int i;
2146 
2147 	KERNEL_ASSERT_LOCKED(); /* touching vhif_vrs + carp_vhosts */
2148 
2149 	SRPL_FOREACH_LOCKED(vr, &cif->vhif_vrs, sc_list) {
2150 		if (vr == sc)
2151 			continue;
2152 		SRPL_FOREACH_LOCKED(vhe, &vr->carp_vhosts, vhost_entries) {
2153 			if (carpr) {
2154 				for (i = 0; carpr->carpr_vhids[i]; i++) {
2155 					if (vhe->vhid == carpr->carpr_vhids[i])
2156 						return (EINVAL);
2157 				}
2158 			}
2159 			SRPL_FOREACH_LOCKED(vhe0, &sc->carp_vhosts,
2160 			    vhost_entries) {
2161 				if (vhe->vhid == vhe0->vhid)
2162 					return (EINVAL);
2163 			}
2164 		}
2165 	}
2166 	return (0);
2167 }
2168 
2169 int
2170 carp_vhids_ioctl(struct carp_softc *sc, struct carpreq *carpr)
2171 {
2172 	int i, j;
2173 	u_int8_t taken_vhids[256];
2174 
2175 	if (carpr->carpr_vhids[0] == 0 ||
2176 	    !memcmp(sc->sc_vhids, carpr->carpr_vhids, sizeof(sc->sc_vhids)))
2177 		return (0);
2178 
2179 	memset(taken_vhids, 0, sizeof(taken_vhids));
2180 	for (i = 0; carpr->carpr_vhids[i]; i++) {
2181 		if (taken_vhids[carpr->carpr_vhids[i]])
2182 			return (EINVAL);
2183 		taken_vhids[carpr->carpr_vhids[i]] = 1;
2184 
2185 		if (sc->sc_carpdev) {
2186 			struct carp_if *cif;
2187 			cif = (struct carp_if *)sc->sc_carpdev->if_carp;
2188 			if (carp_check_dup_vhids(sc, cif, carpr))
2189 				return (EINVAL);
2190 		}
2191 		if (carpr->carpr_advskews[i] >= 255)
2192 			return (EINVAL);
2193 	}
2194 	/* set sane balancing defaults */
2195 	if (i <= 1)
2196 		carpr->carpr_balancing = CARP_BAL_NONE;
2197 	else if (carpr->carpr_balancing == CARP_BAL_NONE &&
2198 	    sc->sc_balancing == CARP_BAL_NONE)
2199 		carpr->carpr_balancing = CARP_BAL_IP;
2200 
2201 	/* destroy all */
2202 	carp_del_all_timeouts(sc);
2203 	carp_destroy_vhosts(sc);
2204 	memset(sc->sc_vhids, 0, sizeof(sc->sc_vhids));
2205 
2206 	/* sort vhosts list by vhid */
2207 	for (j = 1; j <= 255; j++) {
2208 		for (i = 0; carpr->carpr_vhids[i]; i++) {
2209 			if (carpr->carpr_vhids[i] != j)
2210 				continue;
2211 			if (carp_new_vhost(sc, carpr->carpr_vhids[i],
2212 			    carpr->carpr_advskews[i]))
2213 				return (ENOMEM);
2214 			sc->sc_vhids[i] = carpr->carpr_vhids[i];
2215 			sc->sc_advskews[i] = carpr->carpr_advskews[i];
2216 		}
2217 	}
2218 	carp_set_enaddr(sc);
2219 	carp_set_state_all(sc, INIT);
2220 	return (0);
2221 }
2222 
2223 void
2224 carp_ifgroup_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr)
2225 {
2226 	struct ifgroupreq *ifgr = (struct ifgroupreq *)addr;
2227 	struct ifg_list	*ifgl;
2228 	int *dm, adj;
2229 
2230 	if (!strcmp(ifgr->ifgr_group, IFG_ALL))
2231 		return;
2232 	adj = ((struct carp_softc *)ifp->if_softc)->sc_demote_cnt;
2233 	if (cmd == SIOCDIFGROUP)
2234 		adj = adj * -1;
2235 
2236 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2237 		if (!strcmp(ifgl->ifgl_group->ifg_group, ifgr->ifgr_group)) {
2238 			dm = &ifgl->ifgl_group->ifg_carp_demoted;
2239 			if (*dm + adj >= 0)
2240 				*dm += adj;
2241 			else
2242 				*dm = 0;
2243 		}
2244 }
2245 
2246 void
2247 carp_ifgattr_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr)
2248 {
2249 	struct ifgroupreq *ifgr = (struct ifgroupreq *)addr;
2250 	struct carp_softc *sc = ifp->if_softc;
2251 
2252 	if (ifgr->ifgr_attrib.ifg_carp_demoted > 0 && (sc->sc_if.if_flags &
2253 	    (IFF_UP|IFF_RUNNING)) == (IFF_UP|IFF_RUNNING))
2254 		carp_vhe_send_ad_all(sc);
2255 }
2256 
2257 void
2258 carp_start(struct ifnet *ifp)
2259 {
2260 	struct carp_softc *sc = ifp->if_softc;
2261 	struct mbuf *m;
2262 
2263 	for (;;) {
2264 		IFQ_DEQUEUE(&ifp->if_snd, m);
2265 		if (m == NULL)
2266 			break;
2267 
2268 #if NBPFILTER > 0
2269 		if (ifp->if_bpf)
2270 			bpf_mtap_ether(ifp->if_bpf, m, BPF_DIRECTION_OUT);
2271 #endif /* NBPFILTER > 0 */
2272 
2273 		if ((ifp->if_carpdev->if_flags & (IFF_UP|IFF_RUNNING)) !=
2274 		    (IFF_UP|IFF_RUNNING)) {
2275 			ifp->if_oerrors++;
2276 			m_freem(m);
2277 			continue;
2278 		}
2279 
2280 		/*
2281 		 * Do not leak the multicast address when sending
2282 		 * advertisements in 'ip' and 'ip-stealth' balacing
2283 		 * modes.
2284 		 */
2285 		if (sc->sc_balancing == CARP_BAL_IP ||
2286 		    sc->sc_balancing == CARP_BAL_IPSTEALTH) {
2287 			struct ether_header *eh;
2288 			uint8_t *esrc;
2289 
2290 			eh = mtod(m, struct ether_header *);
2291 			esrc = ((struct arpcom*)ifp->if_carpdev)->ac_enaddr;;
2292 			memcpy(eh->ether_shost, esrc, sizeof(eh->ether_shost));
2293 		}
2294 
2295 		if (if_enqueue(ifp->if_carpdev, m)) {
2296 			ifp->if_oerrors++;
2297 			continue;
2298 		}
2299 		ifp->if_opackets++;
2300 	}
2301 }
2302 
2303 int
2304 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
2305     struct rtentry *rt)
2306 {
2307 	struct carp_softc *sc = ((struct carp_softc *)ifp->if_softc);
2308 	struct carp_vhost_entry *vhe;
2309 	struct srp_ref sr;
2310 	int ismaster;
2311 
2312 	KASSERT(sc->sc_carpdev != NULL);
2313 
2314 	if (sc->cur_vhe == NULL) {
2315 		vhe = SRPL_ENTER(&sr, &sc->carp_vhosts); /* head */
2316 		ismaster = (vhe->state == MASTER);
2317 		SRPL_LEAVE(&sr);
2318 	} else {
2319 		ismaster = (sc->cur_vhe->state == MASTER);
2320 	}
2321 
2322 	if ((sc->sc_balancing == CARP_BAL_NONE && !ismaster)) {
2323 		m_freem(m);
2324 		return (ENETUNREACH);
2325 	}
2326 
2327 	return (ether_output(ifp, m, sa, rt));
2328 }
2329 
2330 void
2331 carp_set_state_all(struct carp_softc *sc, int state)
2332 {
2333 	struct carp_vhost_entry *vhe;
2334 
2335 	KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
2336 
2337 	SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
2338 		if (vhe->state == state)
2339 			continue;
2340 
2341 		carp_set_state(vhe, state);
2342 	}
2343 }
2344 
2345 void
2346 carp_set_state(struct carp_vhost_entry *vhe, int state)
2347 {
2348 	struct carp_softc *sc = vhe->parent_sc;
2349 	static const char *carp_states[] = { CARP_STATES };
2350 	int loglevel;
2351 
2352 	KASSERT(vhe->state != state);
2353 
2354 	if (vhe->state == INIT || state == INIT)
2355 		loglevel = LOG_WARNING;
2356 	else
2357 		loglevel = LOG_CRIT;
2358 
2359 	if (sc->sc_vhe_count > 1)
2360 		CARP_LOG(loglevel, sc,
2361 		    ("state transition (vhid %d): %s -> %s", vhe->vhid,
2362 		    carp_states[vhe->state], carp_states[state]));
2363 	else
2364 		CARP_LOG(loglevel, sc,
2365 		    ("state transition: %s -> %s",
2366 		    carp_states[vhe->state], carp_states[state]));
2367 
2368 	vhe->state = state;
2369 	carp_update_lsmask(sc);
2370 
2371 	/* only the master vhe creates link state messages */
2372 	if (!vhe->vhe_leader)
2373 		return;
2374 
2375 	switch (state) {
2376 	case BACKUP:
2377 		sc->sc_if.if_link_state = LINK_STATE_DOWN;
2378 		break;
2379 	case MASTER:
2380 		sc->sc_if.if_link_state = LINK_STATE_UP;
2381 		break;
2382 	default:
2383 		sc->sc_if.if_link_state = LINK_STATE_INVALID;
2384 		break;
2385 	}
2386 	if_link_state_change(&sc->sc_if);
2387 }
2388 
2389 void
2390 carp_group_demote_adj(struct ifnet *ifp, int adj, char *reason)
2391 {
2392 	struct ifg_list	*ifgl;
2393 	int *dm, need_ad;
2394 	struct carp_softc *nil = NULL;
2395 
2396 	if (ifp->if_type == IFT_CARP) {
2397 		dm = &((struct carp_softc *)ifp->if_softc)->sc_demote_cnt;
2398 		if (*dm + adj >= 0)
2399 			*dm += adj;
2400 		else
2401 			*dm = 0;
2402 	}
2403 
2404 	need_ad = 0;
2405 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
2406 		if (!strcmp(ifgl->ifgl_group->ifg_group, IFG_ALL))
2407 			continue;
2408 		dm = &ifgl->ifgl_group->ifg_carp_demoted;
2409 
2410 		if (*dm + adj >= 0)
2411 			*dm += adj;
2412 		else
2413 			*dm = 0;
2414 
2415 		if (adj > 0 && *dm == 1)
2416 			need_ad = 1;
2417 		CARP_LOG(LOG_ERR, nil,
2418 		    ("%s demoted group %s by %d to %d (%s)",
2419 		    ifp->if_xname, ifgl->ifgl_group->ifg_group,
2420 		    adj, *dm, reason));
2421 	}
2422 	if (need_ad)
2423 		carp_send_ad_all();
2424 }
2425 
2426 int
2427 carp_group_demote_count(struct carp_softc *sc)
2428 {
2429 	struct ifg_list	*ifgl;
2430 	int count = 0;
2431 
2432 	TAILQ_FOREACH(ifgl, &sc->sc_if.if_groups, ifgl_next)
2433 		count += ifgl->ifgl_group->ifg_carp_demoted;
2434 
2435 	if (count == 0 && sc->sc_demote_cnt)
2436 		count = sc->sc_demote_cnt;
2437 
2438 	return (count > 255 ? 255 : count);
2439 }
2440 
2441 void
2442 carp_carpdev_state(void *v)
2443 {
2444 	struct carp_if *cif;
2445 	struct carp_softc *sc;
2446 	struct ifnet *ifp0 = v;
2447 
2448 	if (ifp0->if_type == IFT_CARP)
2449 		return;
2450 
2451 	cif = (struct carp_if *)ifp0->if_carp;
2452 
2453 	KERNEL_ASSERT_LOCKED(); /* touching vhif_vrs */
2454 
2455 	SRPL_FOREACH_LOCKED(sc, &cif->vhif_vrs, sc_list) {
2456 		int suppressed = sc->sc_suppress;
2457 
2458 		if (sc->sc_carpdev->if_link_state == LINK_STATE_DOWN ||
2459 		    !(sc->sc_carpdev->if_flags & IFF_UP)) {
2460 			sc->sc_if.if_flags &= ~IFF_RUNNING;
2461 			carp_del_all_timeouts(sc);
2462 			carp_set_state_all(sc, INIT);
2463 			sc->sc_suppress = 1;
2464 			carp_setrun_all(sc, 0);
2465 			if (!suppressed)
2466 				carp_group_demote_adj(&sc->sc_if, 1, "carpdev");
2467 		} else if (suppressed) {
2468 			carp_set_state_all(sc, INIT);
2469 			sc->sc_suppress = 0;
2470 			carp_setrun_all(sc, 0);
2471 			carp_group_demote_adj(&sc->sc_if, -1, "carpdev");
2472 		}
2473 	}
2474 }
2475 
2476 int
2477 carp_ether_addmulti(struct carp_softc *sc, struct ifreq *ifr)
2478 {
2479 	struct ifnet *ifp0;
2480 	struct carp_mc_entry *mc;
2481 	u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN];
2482 	int error;
2483 
2484 	ifp0 = sc->sc_carpdev;
2485 	if (ifp0 == NULL)
2486 		return (EINVAL);
2487 
2488 	error = ether_addmulti(ifr, (struct arpcom *)&sc->sc_ac);
2489 	if (error != ENETRESET)
2490 		return (error);
2491 
2492 	/*
2493 	 * This is new multicast address.  We have to tell parent
2494 	 * about it.  Also, remember this multicast address so that
2495 	 * we can delete them on unconfigure.
2496 	 */
2497 	mc = malloc(sizeof(*mc), M_DEVBUF, M_NOWAIT);
2498 	if (mc == NULL) {
2499 		error = ENOMEM;
2500 		goto alloc_failed;
2501 	}
2502 
2503 	/*
2504 	 * As ether_addmulti() returns ENETRESET, following two
2505 	 * statement shouldn't fail.
2506 	 */
2507 	(void)ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi);
2508 	ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, mc->mc_enm);
2509 	memcpy(&mc->mc_addr, &ifr->ifr_addr, ifr->ifr_addr.sa_len);
2510 	LIST_INSERT_HEAD(&sc->carp_mc_listhead, mc, mc_entries);
2511 
2512 	error = (*ifp0->if_ioctl)(ifp0, SIOCADDMULTI, (caddr_t)ifr);
2513 	if (error != 0)
2514 		goto ioctl_failed;
2515 
2516 	return (error);
2517 
2518  ioctl_failed:
2519 	LIST_REMOVE(mc, mc_entries);
2520 	free(mc, M_DEVBUF, sizeof(*mc));
2521  alloc_failed:
2522 	(void)ether_delmulti(ifr, (struct arpcom *)&sc->sc_ac);
2523 
2524 	return (error);
2525 }
2526 
2527 int
2528 carp_ether_delmulti(struct carp_softc *sc, struct ifreq *ifr)
2529 {
2530 	struct ifnet *ifp0;
2531 	struct ether_multi *enm;
2532 	struct carp_mc_entry *mc;
2533 	u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN];
2534 	int error;
2535 
2536 	ifp0 = sc->sc_carpdev;
2537 	if (ifp0 == NULL)
2538 		return (EINVAL);
2539 
2540 	/*
2541 	 * Find a key to lookup carp_mc_entry.  We have to do this
2542 	 * before calling ether_delmulti for obvious reason.
2543 	 */
2544 	if ((error = ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi)) != 0)
2545 		return (error);
2546 	ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, enm);
2547 	if (enm == NULL)
2548 		return (EINVAL);
2549 
2550 	LIST_FOREACH(mc, &sc->carp_mc_listhead, mc_entries)
2551 		if (mc->mc_enm == enm)
2552 			break;
2553 
2554 	/* We won't delete entries we didn't add */
2555 	if (mc == NULL)
2556 		return (EINVAL);
2557 
2558 	error = ether_delmulti(ifr, (struct arpcom *)&sc->sc_ac);
2559 	if (error != ENETRESET)
2560 		return (error);
2561 
2562 	/* We no longer use this multicast address.  Tell parent so. */
2563 	error = (*ifp0->if_ioctl)(ifp0, SIOCDELMULTI, (caddr_t)ifr);
2564 	if (error == 0) {
2565 		/* And forget about this address. */
2566 		LIST_REMOVE(mc, mc_entries);
2567 		free(mc, M_DEVBUF, sizeof(*mc));
2568 	} else
2569 		(void)ether_addmulti(ifr, (struct arpcom *)&sc->sc_ac);
2570 	return (error);
2571 }
2572 
2573 /*
2574  * Delete any multicast address we have asked to add from parent
2575  * interface.  Called when the carp is being unconfigured.
2576  */
2577 void
2578 carp_ether_purgemulti(struct carp_softc *sc)
2579 {
2580 	struct ifnet *ifp0 = sc->sc_carpdev;		/* Parent. */
2581 	struct carp_mc_entry *mc;
2582 	union {
2583 		struct ifreq ifreq;
2584 		struct {
2585 			char ifr_name[IFNAMSIZ];
2586 			struct sockaddr_storage ifr_ss;
2587 		} ifreq_storage;
2588 	} u;
2589 	struct ifreq *ifr = &u.ifreq;
2590 
2591 	if (ifp0 == NULL)
2592 		return;
2593 
2594 	memcpy(ifr->ifr_name, ifp0->if_xname, IFNAMSIZ);
2595 	while ((mc = LIST_FIRST(&sc->carp_mc_listhead)) != NULL) {
2596 		memcpy(&ifr->ifr_addr, &mc->mc_addr, mc->mc_addr.ss_len);
2597 		(void)(*ifp0->if_ioctl)(ifp0, SIOCDELMULTI, (caddr_t)ifr);
2598 		LIST_REMOVE(mc, mc_entries);
2599 		free(mc, M_DEVBUF, sizeof(*mc));
2600 	}
2601 }
2602 
2603 void
2604 carp_vh_ref(void *null, void *v)
2605 {
2606 	struct carp_vhost_entry *vhe = v;
2607 
2608 	refcnt_take(&vhe->vhost_refcnt);
2609 }
2610 
2611 void
2612 carp_vh_unref(void *null, void *v)
2613 {
2614 	struct carp_vhost_entry *vhe = v;
2615 
2616 	if (refcnt_rele(&vhe->vhost_refcnt)) {
2617 		carp_sc_unref(NULL, vhe->parent_sc);
2618 		free(vhe, M_DEVBUF, sizeof(*vhe));
2619 	}
2620 }
2621 
2622 void
2623 carp_sc_ref(void *null, void *s)
2624 {
2625 	struct carp_softc *sc = s;
2626 
2627 	refcnt_take(&sc->sc_refcnt);
2628 }
2629 
2630 void
2631 carp_sc_unref(void *null, void *s)
2632 {
2633 	struct carp_softc *sc = s;
2634 
2635 	refcnt_rele_wake(&sc->sc_refcnt);
2636 }
2637