xref: /openbsd-src/sys/netinet/ip_carp.c (revision d13be5d47e4149db2549a9828e244d59dbc43f15)
1 /*	$OpenBSD: ip_carp.c,v 1.190 2011/09/06 16:00:22 mpf Exp $	*/
2 
3 /*
4  * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
5  * Copyright (c) 2003 Ryan McBride. All rights reserved.
6  * Copyright (c) 2006-2008 Marco Pfatschbacher. All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
21  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
25  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
26  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
27  * THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 /*
31  * TODO:
32  *	- iface reconfigure
33  *	- support for hardware checksum calculations;
34  *
35  */
36 
37 #include "ether.h"
38 
39 #include <sys/param.h>
40 #include <sys/proc.h>
41 #include <sys/systm.h>
42 #include <sys/mbuf.h>
43 #include <sys/socket.h>
44 #include <sys/socketvar.h>
45 #include <sys/ioctl.h>
46 #include <sys/errno.h>
47 #include <sys/device.h>
48 #include <sys/kernel.h>
49 #include <sys/sysctl.h>
50 #include <sys/syslog.h>
51 
52 #include <machine/cpu.h>
53 
54 #include <net/if.h>
55 #include <net/if_types.h>
56 #include <net/if_llc.h>
57 #include <net/route.h>
58 #include <net/netisr.h>
59 
60 /* for arc4random() */
61 #include <dev/rndvar.h>
62 
63 #if NFDDI > 0
64 #include <net/if_fddi.h>
65 #endif
66 
67 #include <crypto/sha1.h>
68 
69 #ifdef INET
70 #include <netinet/in.h>
71 #include <netinet/in_systm.h>
72 #include <netinet/in_var.h>
73 #include <netinet/ip.h>
74 #include <netinet/ip_var.h>
75 #include <netinet/if_ether.h>
76 #include <netinet/ip_ipsp.h>
77 
78 #include <net/if_enc.h>
79 #include <net/if_dl.h>
80 #endif
81 
82 #ifdef INET6
83 #include <netinet/icmp6.h>
84 #include <netinet/ip6.h>
85 #include <netinet6/ip6_var.h>
86 #include <netinet6/nd6.h>
87 #include <netinet6/in6_ifattach.h>
88 #endif
89 
90 #include "bpfilter.h"
91 #if NBPFILTER > 0
92 #include <net/bpf.h>
93 #endif
94 
95 #include <netinet/ip_carp.h>
96 
97 struct carp_mc_entry {
98 	LIST_ENTRY(carp_mc_entry)	mc_entries;
99 	union {
100 		struct ether_multi	*mcu_enm;
101 	} mc_u;
102 	struct sockaddr_storage		mc_addr;
103 };
104 #define	mc_enm	mc_u.mcu_enm
105 
106 enum { HMAC_ORIG=0, HMAC_NOV6LL=1, HMAC_MAX=2 };
107 
108 struct carp_vhost_entry {
109 	LIST_ENTRY(carp_vhost_entry)	vhost_entries;
110 	struct carp_softc *parent_sc;
111 	int vhe_leader;
112 	int vhid;
113 	int advskew;
114 	enum { INIT = 0, BACKUP, MASTER }	state;
115 	struct timeout ad_tmo;	/* advertisement timeout */
116 	struct timeout md_tmo;	/* master down timeout */
117 	struct timeout md6_tmo;	/* master down timeout */
118 
119 	u_int64_t vhe_replay_cookie;
120 
121 	/* authentication */
122 #define CARP_HMAC_PAD	64
123 	unsigned char vhe_pad[CARP_HMAC_PAD];
124 	SHA1_CTX vhe_sha1[HMAC_MAX];
125 
126 	u_int8_t vhe_enaddr[ETHER_ADDR_LEN];
127 	struct sockaddr_dl vhe_sdl;	/* for IPv6 ndp balancing */
128 };
129 
130 struct carp_softc {
131 	struct arpcom sc_ac;
132 #define	sc_if		sc_ac.ac_if
133 #define	sc_carpdev	sc_ac.ac_if.if_carpdev
134 	void *ah_cookie;
135 	void *lh_cookie;
136 	struct ip_moptions sc_imo;
137 #ifdef INET6
138 	struct ip6_moptions sc_im6o;
139 #endif /* INET6 */
140 	TAILQ_ENTRY(carp_softc) sc_list;
141 
142 	int sc_suppress;
143 	int sc_bow_out;
144 	int sc_demote_cnt;
145 
146 	int sc_sendad_errors;
147 #define CARP_SENDAD_MAX_ERRORS(sc) (3 * (sc)->sc_vhe_count)
148 	int sc_sendad_success;
149 #define CARP_SENDAD_MIN_SUCCESS(sc) (3 * (sc)->sc_vhe_count)
150 
151 	char sc_curlladdr[ETHER_ADDR_LEN];
152 
153 	LIST_HEAD(__carp_vhosthead, carp_vhost_entry)	carp_vhosts;
154 	int sc_vhe_count;
155 	u_int8_t sc_vhids[CARP_MAXNODES];
156 	u_int8_t sc_advskews[CARP_MAXNODES];
157 	u_int8_t sc_balancing;
158 
159 	int sc_naddrs;
160 	int sc_naddrs6;
161 	int sc_advbase;		/* seconds */
162 
163 	/* authentication */
164 	unsigned char sc_key[CARP_KEY_LEN];
165 
166 	u_int32_t sc_hashkey[2];
167 	u_int32_t sc_lsmask;		/* load sharing mask */
168 	int sc_lscount;			/* # load sharing interfaces (max 32) */
169 	int sc_delayed_arp;		/* delayed ARP request countdown */
170 
171 	struct in_addr sc_peer;
172 
173 	LIST_HEAD(__carp_mchead, carp_mc_entry)	carp_mc_listhead;
174 	struct carp_vhost_entry *cur_vhe; /* current active vhe */
175 };
176 
177 int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, LOG_CRIT };	/* XXX for now */
178 struct carpstats carpstats;
179 
180 struct carp_if {
181 	TAILQ_HEAD(, carp_softc) vhif_vrs;
182 	int vhif_nvrs;
183 
184 	struct ifnet *vhif_ifp;
185 };
186 
187 #define	CARP_LOG(l, sc, s)						\
188 	do {								\
189 		if (carp_opts[CARPCTL_LOG] >= l) {			\
190 			if (sc)						\
191 				log(l, "%s: ",				\
192 				    (sc)->sc_if.if_xname);		\
193 			else						\
194 				log(l, "carp: ");			\
195 			addlog s;					\
196 			addlog("\n");					\
197 		}							\
198 	} while (0)
199 
200 void	carp_hmac_prepare(struct carp_softc *);
201 void	carp_hmac_prepare_ctx(struct carp_vhost_entry *, u_int8_t);
202 void	carp_hmac_generate(struct carp_vhost_entry *, u_int32_t *,
203 	    unsigned char *, u_int8_t);
204 int	carp_hmac_verify(struct carp_vhost_entry *, u_int32_t *,
205 	    unsigned char *);
206 void	carp_setroute(struct carp_softc *, int);
207 void	carp_proto_input_c(struct mbuf *, struct carp_header *, int,
208 	    sa_family_t);
209 void	carpattach(int);
210 void	carpdetach(struct carp_softc *);
211 int	carp_prepare_ad(struct mbuf *, struct carp_vhost_entry *,
212 	    struct carp_header *);
213 void	carp_send_ad_all(void);
214 void	carp_vhe_send_ad_all(struct carp_softc *);
215 void	carp_send_ad(void *);
216 void	carp_send_arp(struct carp_softc *);
217 void	carp_master_down(void *);
218 int	carp_ioctl(struct ifnet *, u_long, caddr_t);
219 int	carp_vhids_ioctl(struct carp_softc *, struct carpreq *);
220 int	carp_check_dup_vhids(struct carp_softc *, struct carp_if *,
221 	    struct carpreq *);
222 void	carp_ifgroup_ioctl(struct ifnet *, u_long, caddr_t);
223 void	carp_ifgattr_ioctl(struct ifnet *, u_long, caddr_t);
224 void	carp_start(struct ifnet *);
225 void	carp_setrun_all(struct carp_softc *, sa_family_t);
226 void	carp_setrun(struct carp_vhost_entry *, sa_family_t);
227 void	carp_set_state_all(struct carp_softc *, int);
228 void	carp_set_state(struct carp_vhost_entry *, int);
229 void	carp_multicast_cleanup(struct carp_softc *);
230 int	carp_set_ifp(struct carp_softc *, struct ifnet *);
231 void	carp_set_enaddr(struct carp_softc *);
232 void	carp_set_vhe_enaddr(struct carp_vhost_entry *);
233 void	carp_addr_updated(void *);
234 u_int32_t	carp_hash(struct carp_softc *, u_char *);
235 int	carp_set_addr(struct carp_softc *, struct sockaddr_in *);
236 int	carp_join_multicast(struct carp_softc *);
237 #ifdef INET6
238 void	carp_send_na(struct carp_softc *);
239 int	carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *);
240 int	carp_join_multicast6(struct carp_softc *);
241 #endif
242 int	carp_clone_create(struct if_clone *, int);
243 int	carp_clone_destroy(struct ifnet *);
244 int	carp_ether_addmulti(struct carp_softc *, struct ifreq *);
245 int	carp_ether_delmulti(struct carp_softc *, struct ifreq *);
246 void	carp_ether_purgemulti(struct carp_softc *);
247 int	carp_group_demote_count(struct carp_softc *);
248 void	carp_update_lsmask(struct carp_softc *);
249 int	carp_new_vhost(struct carp_softc *, int, int);
250 void	carp_destroy_vhosts(struct carp_softc *);
251 void	carp_del_all_timeouts(struct carp_softc *);
252 
253 struct if_clone carp_cloner =
254     IF_CLONE_INITIALIZER("carp", carp_clone_create, carp_clone_destroy);
255 
256 #define carp_cksum(_m, _l)	((u_int16_t)in_cksum((_m), (_l)))
257 #define CARP_IFQ_PRIO	6
258 
259 void
260 carp_hmac_prepare(struct carp_softc *sc)
261 {
262 	struct carp_vhost_entry *vhe;
263 	u_int8_t i;
264 
265 	LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) {
266 		for (i = 0; i < HMAC_MAX; i++) {
267 			carp_hmac_prepare_ctx(vhe, i);
268 		}
269 	}
270 }
271 
272 void
273 carp_hmac_prepare_ctx(struct carp_vhost_entry *vhe, u_int8_t ctx)
274 {
275 	struct carp_softc *sc = vhe->parent_sc;
276 
277 	u_int8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
278 	u_int8_t vhid = vhe->vhid & 0xff;
279 	SHA1_CTX sha1ctx;
280 	u_int32_t kmd[5];
281 	struct ifaddr *ifa;
282 	int i, found;
283 	struct in_addr last, cur, in;
284 #ifdef INET6
285 	struct in6_addr last6, cur6, in6;
286 #endif /* INET6 */
287 
288 	/* compute ipad from key */
289 	bzero(vhe->vhe_pad, sizeof(vhe->vhe_pad));
290 	bcopy(sc->sc_key, vhe->vhe_pad, sizeof(sc->sc_key));
291 	for (i = 0; i < sizeof(vhe->vhe_pad); i++)
292 		vhe->vhe_pad[i] ^= 0x36;
293 
294 	/* precompute first part of inner hash */
295 	SHA1Init(&vhe->vhe_sha1[ctx]);
296 	SHA1Update(&vhe->vhe_sha1[ctx], vhe->vhe_pad, sizeof(vhe->vhe_pad));
297 	SHA1Update(&vhe->vhe_sha1[ctx], (void *)&version, sizeof(version));
298 	SHA1Update(&vhe->vhe_sha1[ctx], (void *)&type, sizeof(type));
299 
300 	/* generate a key for the arpbalance hash, before the vhid is hashed */
301 	if (vhe->vhe_leader) {
302 		bcopy(&vhe->vhe_sha1[ctx], &sha1ctx, sizeof(sha1ctx));
303 		SHA1Final((unsigned char *)kmd, &sha1ctx);
304 		sc->sc_hashkey[0] = kmd[0] ^ kmd[1];
305 		sc->sc_hashkey[1] = kmd[2] ^ kmd[3];
306 	}
307 
308 	/* the rest of the precomputation */
309 	if (vhe->vhe_leader && bcmp(sc->sc_ac.ac_enaddr, vhe->vhe_enaddr,
310 	    ETHER_ADDR_LEN) != 0)
311 		SHA1Update(&vhe->vhe_sha1[ctx], sc->sc_ac.ac_enaddr,
312 		    ETHER_ADDR_LEN);
313 
314 	SHA1Update(&vhe->vhe_sha1[ctx], (void *)&vhid, sizeof(vhid));
315 
316 	/* Hash the addresses from smallest to largest, not interface order */
317 #ifdef INET
318 	cur.s_addr = 0;
319 	do {
320 		found = 0;
321 		last = cur;
322 		cur.s_addr = 0xffffffff;
323 		TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
324 			if (ifa->ifa_addr->sa_family != AF_INET)
325 				continue;
326 			in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
327 			if (ntohl(in.s_addr) > ntohl(last.s_addr) &&
328 			    ntohl(in.s_addr) < ntohl(cur.s_addr)) {
329 				cur.s_addr = in.s_addr;
330 				found++;
331 			}
332 		}
333 		if (found)
334 			SHA1Update(&vhe->vhe_sha1[ctx],
335 			    (void *)&cur, sizeof(cur));
336 	} while (found);
337 #endif /* INET */
338 #ifdef INET6
339 	memset(&cur6, 0x00, sizeof(cur6));
340 	do {
341 		found = 0;
342 		last6 = cur6;
343 		memset(&cur6, 0xff, sizeof(cur6));
344 		TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
345 			if (ifa->ifa_addr->sa_family != AF_INET6)
346 				continue;
347 			in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
348 			if (IN6_IS_SCOPE_EMBED(&in6)) {
349 				if (ctx == HMAC_NOV6LL)
350 					continue;
351 				in6.s6_addr16[1] = 0;
352 			}
353 			if (memcmp(&in6, &last6, sizeof(in6)) > 0 &&
354 			    memcmp(&in6, &cur6, sizeof(in6)) < 0) {
355 				cur6 = in6;
356 				found++;
357 			}
358 		}
359 		if (found)
360 			SHA1Update(&vhe->vhe_sha1[ctx],
361 			    (void *)&cur6, sizeof(cur6));
362 	} while (found);
363 #endif /* INET6 */
364 
365 	/* convert ipad to opad */
366 	for (i = 0; i < sizeof(vhe->vhe_pad); i++)
367 		vhe->vhe_pad[i] ^= 0x36 ^ 0x5c;
368 }
369 
370 void
371 carp_hmac_generate(struct carp_vhost_entry *vhe, u_int32_t counter[2],
372     unsigned char md[20], u_int8_t ctx)
373 {
374 	SHA1_CTX sha1ctx;
375 
376 	/* fetch first half of inner hash */
377 	bcopy(&vhe->vhe_sha1[ctx], &sha1ctx, sizeof(sha1ctx));
378 
379 	SHA1Update(&sha1ctx, (void *)counter, sizeof(vhe->vhe_replay_cookie));
380 	SHA1Final(md, &sha1ctx);
381 
382 	/* outer hash */
383 	SHA1Init(&sha1ctx);
384 	SHA1Update(&sha1ctx, vhe->vhe_pad, sizeof(vhe->vhe_pad));
385 	SHA1Update(&sha1ctx, md, 20);
386 	SHA1Final(md, &sha1ctx);
387 }
388 
389 int
390 carp_hmac_verify(struct carp_vhost_entry *vhe, u_int32_t counter[2],
391     unsigned char md[20])
392 {
393 	unsigned char md2[20];
394 	u_int8_t i;
395 
396 	for (i = 0; i < HMAC_MAX; i++) {
397 		carp_hmac_generate(vhe, counter, md2, i);
398 		if (!timingsafe_bcmp(md, md2, sizeof(md2)))
399 			return (0);
400 	}
401 	return (1);
402 }
403 
404 void
405 carp_setroute(struct carp_softc *sc, int cmd)
406 {
407 	struct ifaddr *ifa;
408 	int s;
409 
410 	/* XXX this mess needs fixing */
411 
412 	s = splsoftnet();
413 	TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
414 		switch (ifa->ifa_addr->sa_family) {
415 		case AF_INET: {
416 			int error;
417 			struct sockaddr sa;
418 			struct rtentry *rt;
419 			struct radix_node_head *rnh;
420 			struct radix_node *rn;
421 			struct rt_addrinfo info;
422 			int hr_otherif, nr_ourif;
423 			struct sockaddr_rtlabel	sa_rl;
424 			const char *label;
425 
426 			/* Remove the existing host route, if any */
427 			bzero(&info, sizeof(info));
428 			info.rti_info[RTAX_DST] = ifa->ifa_addr;
429 			info.rti_flags = RTF_HOST;
430 			error = rtrequest1(RTM_DELETE, &info, RTP_CONNECTED,
431 			    NULL, sc->sc_if.if_rdomain);
432 			rt_missmsg(RTM_DELETE, &info, info.rti_flags, NULL,
433 			    error, sc->sc_if.if_rdomain);
434 
435 			/* Check for our address on another interface */
436 			/* XXX cries for proper API */
437 			rnh = rt_gettable(ifa->ifa_addr->sa_family, 0);
438 			rn = rnh->rnh_matchaddr(ifa->ifa_addr, rnh);
439 			rt = (struct rtentry *)rn;
440 			hr_otherif = (rt && rt->rt_ifp != &sc->sc_if &&
441 			    rt->rt_flags & (RTF_CLONING|RTF_CLONED));
442 
443 			/* Check for a network route on our interface */
444 			bcopy(ifa->ifa_addr, &sa, sizeof(sa));
445 			satosin(&sa)->sin_addr.s_addr = satosin(ifa->ifa_netmask
446 			    )->sin_addr.s_addr & satosin(&sa)->sin_addr.s_addr;
447 			rt = (struct rtentry *)rt_lookup(&sa,
448 			    ifa->ifa_netmask, sc->sc_if.if_rdomain);
449 			nr_ourif = (rt && rt->rt_ifp == &sc->sc_if);
450 
451 			/* Restore the route label */
452 			bzero(&sa_rl, sizeof(sa_rl));
453 			if (rt && rt->rt_labelid) {
454 				sa_rl.sr_len = sizeof(sa_rl);
455 				sa_rl.sr_family = AF_UNSPEC;
456 				label = rtlabel_id2name(rt->rt_labelid);
457 				if (label != NULL)
458 					strlcpy(sa_rl.sr_label, label,
459 					    sizeof(sa_rl.sr_label));
460 			}
461 
462 			switch (cmd) {
463 			case RTM_ADD:
464 				if (hr_otherif) {
465 					ifa->ifa_rtrequest = NULL;
466 					ifa->ifa_flags &= ~RTF_CLONING;
467 					bzero(&info, sizeof(info));
468 					info.rti_info[RTAX_DST] = ifa->ifa_addr;
469 					info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
470 					info.rti_flags = RTF_UP | RTF_HOST;
471 					error = rtrequest1(RTM_ADD, &info,
472 					    RTP_CONNECTED, NULL,
473 					    sc->sc_if.if_rdomain);
474 					rt_missmsg(RTM_ADD, &info,
475 					    info.rti_flags, &sc->sc_if,
476 					    error, sc->sc_if.if_rdomain);
477 				}
478 				if (!hr_otherif || nr_ourif || !rt) {
479 					if (nr_ourif && !(rt->rt_flags &
480 					    RTF_CLONING)) {
481 						bzero(&info, sizeof(info));
482 						info.rti_info[RTAX_DST] = &sa;
483 						info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
484 						error = rtrequest1(RTM_DELETE,
485 						    &info, RTP_CONNECTED, NULL,
486 						    sc->sc_if.if_rdomain);
487 						rt_missmsg(RTM_DELETE, &info, info.rti_flags, NULL,
488 						    error, sc->sc_if.if_rdomain);
489 					}
490 
491 					ifa->ifa_rtrequest = arp_rtrequest;
492 					ifa->ifa_flags |= RTF_CLONING;
493 
494 					bzero(&info, sizeof(info));
495 					info.rti_info[RTAX_DST] = &sa;
496 					info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
497 					info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
498 					info.rti_info[RTAX_LABEL] =
499 					    (struct sockaddr *)&sa_rl;
500 					error = rtrequest1(RTM_ADD, &info,
501 					    RTP_CONNECTED, NULL,
502 					    sc->sc_if.if_rdomain);
503 					if (error == 0)
504 						ifa->ifa_flags |= IFA_ROUTE;
505 					rt_missmsg(RTM_ADD, &info, info.rti_flags,
506 					    &sc->sc_if, error, sc->sc_if.if_rdomain);
507 				}
508 				break;
509 			case RTM_DELETE:
510 				break;
511 			default:
512 				break;
513 			}
514 			break;
515 		}
516 
517 #ifdef INET6
518 		case AF_INET6:
519 			if (sc->sc_balancing >= CARP_BAL_IP)
520 				continue;
521 			if (cmd == RTM_ADD)
522 				in6_ifaddloop(ifa);
523 			else
524 				in6_ifremloop(ifa);
525 			break;
526 #endif /* INET6 */
527 		default:
528 			break;
529 		}
530 	}
531 	splx(s);
532 }
533 
534 /*
535  * process input packet.
536  * we have rearranged checks order compared to the rfc,
537  * but it seems more efficient this way or not possible otherwise.
538  */
539 void
540 carp_proto_input(struct mbuf *m, ...)
541 {
542 	struct ip *ip = mtod(m, struct ip *);
543 	struct ifnet *ifp = m->m_pkthdr.rcvif;
544 	struct carp_softc *sc = NULL;
545 	struct carp_header *ch;
546 	int iplen, len, hlen, ismulti;
547 	va_list ap;
548 
549 	va_start(ap, m);
550 	hlen = va_arg(ap, int);
551 	va_end(ap);
552 
553 	carpstats.carps_ipackets++;
554 
555 	if (!carp_opts[CARPCTL_ALLOW]) {
556 		m_freem(m);
557 		return;
558 	}
559 
560 	ismulti = IN_MULTICAST(ip->ip_dst.s_addr);
561 
562 	/* check if received on a valid carp interface */
563 	if (!((ifp->if_type == IFT_CARP && ismulti) ||
564 	    (ifp->if_type != IFT_CARP && !ismulti && ifp->if_carp != NULL))) {
565 		carpstats.carps_badif++;
566 		CARP_LOG(LOG_INFO, sc, ("packet received on non-carp interface: %s",
567 		    m->m_pkthdr.rcvif->if_xname));
568 		m_freem(m);
569 		return;
570 	}
571 
572 	/* verify that the IP TTL is 255.  */
573 	if (ip->ip_ttl != CARP_DFLTTL) {
574 		carpstats.carps_badttl++;
575 		CARP_LOG(LOG_NOTICE, sc, ("received ttl %d != %d on %s", ip->ip_ttl,
576 		    CARP_DFLTTL, m->m_pkthdr.rcvif->if_xname));
577 		m_freem(m);
578 		return;
579 	}
580 
581 	/*
582 	 * verify that the received packet length is
583 	 * equal to the CARP header
584 	 */
585 	iplen = ip->ip_hl << 2;
586 	len = iplen + sizeof(*ch);
587 	if (len > m->m_pkthdr.len) {
588 		carpstats.carps_badlen++;
589 		CARP_LOG(LOG_INFO, sc, ("packet too short %d on %s", m->m_pkthdr.len,
590 		    m->m_pkthdr.rcvif->if_xname));
591 		m_freem(m);
592 		return;
593 	}
594 
595 	if ((m = m_pullup(m, len)) == NULL) {
596 		carpstats.carps_hdrops++;
597 		return;
598 	}
599 	ip = mtod(m, struct ip *);
600 	ch = (struct carp_header *)(mtod(m, caddr_t) + iplen);
601 
602 	/* verify the CARP checksum */
603 	m->m_data += iplen;
604 	if (carp_cksum(m, len - iplen)) {
605 		carpstats.carps_badsum++;
606 		CARP_LOG(LOG_INFO, sc, ("checksum failed on %s",
607 		    m->m_pkthdr.rcvif->if_xname));
608 		m_freem(m);
609 		return;
610 	}
611 	m->m_data -= iplen;
612 
613 	carp_proto_input_c(m, ch, ismulti, AF_INET);
614 }
615 
616 #ifdef INET6
617 int
618 carp6_proto_input(struct mbuf **mp, int *offp, int proto)
619 {
620 	struct mbuf *m = *mp;
621 	struct carp_softc *sc = NULL;
622 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
623 	struct carp_header *ch;
624 	u_int len;
625 
626 	carpstats.carps_ipackets6++;
627 
628 	if (!carp_opts[CARPCTL_ALLOW]) {
629 		m_freem(m);
630 		return (IPPROTO_DONE);
631 	}
632 
633 	/* check if received on a valid carp interface */
634 	if (m->m_pkthdr.rcvif->if_type != IFT_CARP) {
635 		carpstats.carps_badif++;
636 		CARP_LOG(LOG_INFO, sc, ("packet received on non-carp interface: %s",
637 		    m->m_pkthdr.rcvif->if_xname));
638 		m_freem(m);
639 		return (IPPROTO_DONE);
640 	}
641 
642 	/* verify that the IP TTL is 255 */
643 	if (ip6->ip6_hlim != CARP_DFLTTL) {
644 		carpstats.carps_badttl++;
645 		CARP_LOG(LOG_NOTICE, sc, ("received ttl %d != %d on %s", ip6->ip6_hlim,
646 		    CARP_DFLTTL, m->m_pkthdr.rcvif->if_xname));
647 		m_freem(m);
648 		return (IPPROTO_DONE);
649 	}
650 
651 	/* verify that we have a complete carp packet */
652 	len = m->m_len;
653 	IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch));
654 	if (ch == NULL) {
655 		carpstats.carps_badlen++;
656 		CARP_LOG(LOG_INFO, sc, ("packet size %u too small", len));
657 		return (IPPROTO_DONE);
658 	}
659 
660 
661 	/* verify the CARP checksum */
662 	m->m_data += *offp;
663 	if (carp_cksum(m, sizeof(*ch))) {
664 		carpstats.carps_badsum++;
665 		CARP_LOG(LOG_INFO, sc, ("checksum failed, on %s",
666 		    m->m_pkthdr.rcvif->if_xname));
667 		m_freem(m);
668 		return (IPPROTO_DONE);
669 	}
670 	m->m_data -= *offp;
671 
672 	carp_proto_input_c(m, ch, 1, AF_INET6);
673 	return (IPPROTO_DONE);
674 }
675 #endif /* INET6 */
676 
677 void
678 carp_proto_input_c(struct mbuf *m, struct carp_header *ch, int ismulti,
679     sa_family_t af)
680 {
681 	struct ifnet *ifp = m->m_pkthdr.rcvif;
682 	struct carp_softc *sc;
683 	struct carp_vhost_entry *vhe;
684 	struct timeval sc_tv, ch_tv;
685 	struct carp_if *cif;
686 
687 	if (ifp->if_type == IFT_CARP)
688 		cif = (struct carp_if *)ifp->if_carpdev->if_carp;
689 	else
690 		cif = (struct carp_if *)ifp->if_carp;
691 
692 	TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) {
693 		if (af == AF_INET &&
694 		    ismulti != IN_MULTICAST(sc->sc_peer.s_addr))
695 			continue;
696 		LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) {
697 			if (vhe->vhid == ch->carp_vhid)
698 				goto found;
699 		}
700 	}
701  found:
702 
703 	if (!sc || (sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) !=
704 	    (IFF_UP|IFF_RUNNING)) {
705 		carpstats.carps_badvhid++;
706 		m_freem(m);
707 		return;
708 	}
709 
710 	getmicrotime(&sc->sc_if.if_lastchange);
711 	sc->sc_if.if_ipackets++;
712 	sc->sc_if.if_ibytes += m->m_pkthdr.len;
713 
714 	/* verify the CARP version. */
715 	if (ch->carp_version != CARP_VERSION) {
716 		carpstats.carps_badver++;
717 		sc->sc_if.if_ierrors++;
718 		CARP_LOG(LOG_NOTICE, sc, ("invalid version %d != %d",
719 		    ch->carp_version, CARP_VERSION));
720 		m_freem(m);
721 		return;
722 	}
723 
724 	/* verify the hash */
725 	if (carp_hmac_verify(vhe, ch->carp_counter, ch->carp_md)) {
726 		carpstats.carps_badauth++;
727 		sc->sc_if.if_ierrors++;
728 		CARP_LOG(LOG_INFO, sc, ("incorrect hash"));
729 		m_freem(m);
730 		return;
731 	}
732 
733 	if (!bcmp(&vhe->vhe_replay_cookie, ch->carp_counter,
734 	    sizeof(ch->carp_counter))) {
735 		/* Do not log duplicates from non simplex interfaces */
736 		if (sc->sc_carpdev->if_flags & IFF_SIMPLEX) {
737 			carpstats.carps_badauth++;
738 			sc->sc_if.if_ierrors++;
739 			CARP_LOG(LOG_WARNING, sc,
740 			    ("replay or network loop detected"));
741 		}
742 		m_freem(m);
743 		return;
744 	}
745 
746 	sc_tv.tv_sec = sc->sc_advbase;
747 	sc_tv.tv_usec = vhe->advskew * 1000000 / 256;
748 	ch_tv.tv_sec = ch->carp_advbase;
749 	ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
750 
751 	switch (vhe->state) {
752 	case INIT:
753 		break;
754 	case MASTER:
755 		/*
756 		 * If we receive an advertisement from a master who's going to
757 		 * be more frequent than us, and whose demote count is not higher
758 		 * than ours, go into BACKUP state. If his demote count is lower,
759 		 * also go into BACKUP.
760 		 */
761 		if (((timercmp(&sc_tv, &ch_tv, >) ||
762 		    timercmp(&sc_tv, &ch_tv, ==)) &&
763 		    (ch->carp_demote <= carp_group_demote_count(sc))) ||
764 		    ch->carp_demote < carp_group_demote_count(sc)) {
765 			timeout_del(&vhe->ad_tmo);
766 			carp_set_state(vhe, BACKUP);
767 			carp_setrun(vhe, 0);
768 			if (vhe->vhe_leader)
769 				carp_setroute(sc, RTM_DELETE);
770 		}
771 		break;
772 	case BACKUP:
773 		/*
774 		 * If we're pre-empting masters who advertise slower than us,
775 		 * and do not have a better demote count, treat them as down.
776 		 *
777 		 */
778 		if (carp_opts[CARPCTL_PREEMPT] &&
779 		    timercmp(&sc_tv, &ch_tv, <) &&
780 		    ch->carp_demote >= carp_group_demote_count(sc)) {
781 			carp_master_down(vhe);
782 			break;
783 		}
784 
785 		/*
786 		 * Take over masters advertising with a higher demote count,
787 		 * regardless of CARPCTL_PREEMPT.
788 		 */
789 		if (ch->carp_demote > carp_group_demote_count(sc)) {
790 			carp_master_down(vhe);
791 			break;
792 		}
793 
794 		/*
795 		 *  If the master is going to advertise at such a low frequency
796 		 *  that he's guaranteed to time out, we'd might as well just
797 		 *  treat him as timed out now.
798 		 */
799 		sc_tv.tv_sec = sc->sc_advbase * 3;
800 		if (sc->sc_advbase && timercmp(&sc_tv, &ch_tv, <)) {
801 			carp_master_down(vhe);
802 			break;
803 		}
804 
805 		/*
806 		 * Otherwise, we reset the counter and wait for the next
807 		 * advertisement.
808 		 */
809 		carp_setrun(vhe, af);
810 		break;
811 	}
812 
813 	m_freem(m);
814 	return;
815 }
816 
817 int
818 carp_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
819     size_t newlen)
820 {
821 	/* All sysctl names at this level are terminal. */
822 	if (namelen != 1)
823 		return (ENOTDIR);
824 
825 	switch (name[0]) {
826 	case CARPCTL_STATS:
827 		if (newp != NULL)
828 			return (EPERM);
829 		return (sysctl_struct(oldp, oldlenp, newp, newlen,
830 		    &carpstats, sizeof(carpstats)));
831 	default:
832 		if (name[0] <= 0 || name[0] >= CARPCTL_MAXID)
833 			return (ENOPROTOOPT);
834 		return sysctl_int(oldp, oldlenp, newp, newlen,
835 		    &carp_opts[name[0]]);
836 	}
837 }
838 
839 /*
840  * Interface side of the CARP implementation.
841  */
842 
843 /* ARGSUSED */
844 void
845 carpattach(int n)
846 {
847 	struct ifg_group	*ifg;
848 
849 	if ((ifg = if_creategroup("carp")) != NULL)
850 		ifg->ifg_refcnt++;	/* keep around even if empty */
851 	if_clone_attach(&carp_cloner);
852 }
853 
854 int
855 carp_clone_create(ifc, unit)
856 	struct if_clone *ifc;
857 	int unit;
858 {
859 	struct carp_softc *sc;
860 	struct ifnet *ifp;
861 
862 	sc = malloc(sizeof(*sc), M_DEVBUF, M_NOWAIT|M_ZERO);
863 	if (!sc)
864 		return (ENOMEM);
865 
866 	LIST_INIT(&sc->carp_vhosts);
867 	sc->sc_vhe_count = 0;
868 	if (carp_new_vhost(sc, 0, 0)) {
869 		free(sc, M_DEVBUF);
870 		return (ENOMEM);
871 	}
872 
873 	sc->sc_suppress = 0;
874 	sc->sc_advbase = CARP_DFLTINTV;
875 	sc->sc_naddrs = sc->sc_naddrs6 = 0;
876 #ifdef INET6
877 	sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL;
878 #endif /* INET6 */
879 	sc->sc_imo.imo_membership = (struct in_multi **)malloc(
880 	    (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_IPMOPTS,
881 	    M_WAITOK|M_ZERO);
882 	sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
883 
884 	LIST_INIT(&sc->carp_mc_listhead);
885 	ifp = &sc->sc_if;
886 	ifp->if_softc = sc;
887 	snprintf(ifp->if_xname, sizeof ifp->if_xname, "%s%d", ifc->ifc_name,
888 	    unit);
889 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
890 	ifp->if_ioctl = carp_ioctl;
891 	ifp->if_start = carp_start;
892 	ifp->if_output = carp_output;
893 	ifp->if_type = IFT_CARP;
894 	ifp->if_addrlen = ETHER_ADDR_LEN;
895 	ifp->if_hdrlen = ETHER_HDR_LEN;
896 	ifp->if_mtu = ETHERMTU;
897 	IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
898 	IFQ_SET_READY(&ifp->if_snd);
899 	if_attach(ifp);
900 
901 	if_alloc_sadl(ifp);
902 	LIST_INIT(&sc->sc_ac.ac_multiaddrs);
903 #if NBPFILTER > 0
904 	bpfattach(&ifp->if_bpf, ifp, DLT_EN10MB, ETHER_HDR_LEN);
905 #endif
906 
907 	/* Hook carp_addr_updated to cope with address and route changes. */
908 	sc->ah_cookie = hook_establish(sc->sc_if.if_addrhooks, 0,
909 	    carp_addr_updated, sc);
910 	carp_set_state_all(sc, INIT);
911 
912 	return (0);
913 }
914 
915 int
916 carp_new_vhost(struct carp_softc *sc, int vhid, int advskew)
917 {
918 	struct carp_vhost_entry *vhe, *vhe0;
919 
920 	vhe = malloc(sizeof(*vhe), M_DEVBUF, M_NOWAIT | M_ZERO);
921 	if (vhe == NULL)
922 		return (ENOMEM);
923 
924 	vhe->parent_sc = sc;
925 	vhe->vhid = vhid;
926 	vhe->advskew = advskew;
927 	timeout_set(&vhe->ad_tmo, carp_send_ad, vhe);
928 	timeout_set(&vhe->md_tmo, carp_master_down, vhe);
929 	timeout_set(&vhe->md6_tmo, carp_master_down, vhe);
930 
931 	/* mark the first vhe as leader */
932 	if (LIST_EMPTY(&sc->carp_vhosts)) {
933 		vhe->vhe_leader = 1;
934 		LIST_INSERT_HEAD(&sc->carp_vhosts, vhe, vhost_entries);
935 		sc->sc_vhe_count = 1;
936 		return (0);
937 	}
938 
939 	LIST_FOREACH(vhe0, &sc->carp_vhosts, vhost_entries)
940 		if (LIST_NEXT(vhe0, vhost_entries) == NULL)
941 			break;
942 	LIST_INSERT_AFTER(vhe0, vhe, vhost_entries);
943 	sc->sc_vhe_count++;
944 
945 	return (0);
946 }
947 
948 int
949 carp_clone_destroy(struct ifnet *ifp)
950 {
951 	struct carp_softc *sc = ifp->if_softc;
952 
953 	carpdetach(sc);
954 	ether_ifdetach(ifp);
955 	if_detach(ifp);
956 	carp_destroy_vhosts(ifp->if_softc);
957 	free(sc->sc_imo.imo_membership, M_IPMOPTS);
958 	free(sc, M_DEVBUF);
959 
960 	return (0);
961 }
962 
963 void
964 carp_del_all_timeouts(struct carp_softc *sc)
965 {
966 	struct carp_vhost_entry *vhe;
967 
968 	LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) {
969 		timeout_del(&vhe->ad_tmo);
970 		timeout_del(&vhe->md_tmo);
971 		timeout_del(&vhe->md6_tmo);
972 	}
973 }
974 
975 void
976 carpdetach(struct carp_softc *sc)
977 {
978 	struct carp_if *cif;
979 	int s;
980 
981 	carp_del_all_timeouts(sc);
982 
983 	if (sc->sc_demote_cnt)
984 		carp_group_demote_adj(&sc->sc_if, sc->sc_demote_cnt, "detach");
985 	sc->sc_suppress = 0;
986 	sc->sc_sendad_errors = 0;
987 
988 	carp_set_state_all(sc, INIT);
989 	sc->sc_if.if_flags &= ~IFF_UP;
990 	carp_setrun_all(sc, 0);
991 	carp_multicast_cleanup(sc);
992 
993 	s = splnet();
994 	if (sc->ah_cookie != NULL)
995 		hook_disestablish(sc->sc_if.if_addrhooks, sc->ah_cookie);
996 	if (sc->sc_carpdev != NULL) {
997 		if (sc->lh_cookie != NULL)
998 			hook_disestablish(sc->sc_carpdev->if_linkstatehooks,
999 			    sc->lh_cookie);
1000 		cif = (struct carp_if *)sc->sc_carpdev->if_carp;
1001 		TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
1002 		if (!--cif->vhif_nvrs) {
1003 			ifpromisc(sc->sc_carpdev, 0);
1004 			sc->sc_carpdev->if_carp = NULL;
1005 			free(cif, M_IFADDR);
1006 		}
1007 	}
1008 	sc->sc_carpdev = NULL;
1009 	splx(s);
1010 }
1011 
1012 /* Detach an interface from the carp. */
1013 void
1014 carp_ifdetach(struct ifnet *ifp)
1015 {
1016 	struct carp_softc *sc, *nextsc;
1017 	struct carp_if *cif = (struct carp_if *)ifp->if_carp;
1018 
1019 	for (sc = TAILQ_FIRST(&cif->vhif_vrs); sc; sc = nextsc) {
1020 		nextsc = TAILQ_NEXT(sc, sc_list);
1021 		carpdetach(sc);
1022 	}
1023 }
1024 
1025 void
1026 carp_destroy_vhosts(struct carp_softc *sc)
1027 {
1028 	/* XXX bow out? */
1029 	struct carp_vhost_entry *vhe, *nvhe;
1030 
1031 	for (vhe = LIST_FIRST(&sc->carp_vhosts);
1032 	     vhe != LIST_END(&sc->carp_vhosts); vhe = nvhe) {
1033 		nvhe = LIST_NEXT(vhe, vhost_entries);
1034 		free(vhe, M_DEVBUF);
1035 	}
1036 	LIST_INIT(&sc->carp_vhosts);
1037 	sc->sc_vhe_count = 0;
1038 }
1039 
1040 int
1041 carp_prepare_ad(struct mbuf *m, struct carp_vhost_entry *vhe,
1042     struct carp_header *ch)
1043 {
1044 	if (!vhe->vhe_replay_cookie) {
1045 		arc4random_buf(&vhe->vhe_replay_cookie,
1046 		    sizeof(vhe->vhe_replay_cookie));
1047 	}
1048 
1049 	bcopy(&vhe->vhe_replay_cookie, ch->carp_counter,
1050 	    sizeof(ch->carp_counter));
1051 
1052 	/*
1053 	 * For the time being, do not include the IPv6 linklayer addresses
1054 	 * in the HMAC.
1055 	 */
1056 	carp_hmac_generate(vhe, ch->carp_counter, ch->carp_md, HMAC_NOV6LL);
1057 
1058 	return (0);
1059 }
1060 
1061 void
1062 carp_send_ad_all(void)
1063 {
1064 	struct ifnet *ifp;
1065 	struct carp_if *cif;
1066 	struct carp_softc *vh;
1067 
1068 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1069 		if (ifp->if_carp == NULL || ifp->if_type == IFT_CARP)
1070 			continue;
1071 
1072 		cif = (struct carp_if *)ifp->if_carp;
1073 		TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1074 			if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
1075 			    (IFF_UP|IFF_RUNNING)) {
1076 				carp_vhe_send_ad_all(vh);
1077 			}
1078 		}
1079 	}
1080 }
1081 
1082 void
1083 carp_vhe_send_ad_all(struct carp_softc *sc)
1084 {
1085 	struct carp_vhost_entry *vhe;
1086 
1087 	LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) {
1088 		if (vhe->state == MASTER)
1089 			carp_send_ad(vhe);
1090 	}
1091 }
1092 
1093 void
1094 carp_send_ad(void *v)
1095 {
1096 	struct carp_header ch;
1097 	struct timeval tv;
1098 	struct carp_vhost_entry *vhe = v;
1099 	struct carp_softc *sc = vhe->parent_sc;
1100 	struct carp_header *ch_ptr;
1101 
1102 	struct mbuf *m;
1103 	int error, len, advbase, advskew, s;
1104 	struct ifaddr *ifa;
1105 	struct sockaddr sa;
1106 
1107 	if (sc->sc_carpdev == NULL) {
1108 		sc->sc_if.if_oerrors++;
1109 		return;
1110 	}
1111 
1112 	s = splsoftnet();
1113 
1114 	/* bow out if we've gone to backup (the carp interface is going down) */
1115 	if (sc->sc_bow_out) {
1116 		advbase = 255;
1117 		advskew = 255;
1118 	} else {
1119 		advbase = sc->sc_advbase;
1120 		advskew = vhe->advskew;
1121 		tv.tv_sec = advbase;
1122 		if (advbase == 0 && advskew == 0)
1123 			tv.tv_usec = 1 * 1000000 / 256;
1124 		else
1125 			tv.tv_usec = advskew * 1000000 / 256;
1126 	}
1127 
1128 	ch.carp_version = CARP_VERSION;
1129 	ch.carp_type = CARP_ADVERTISEMENT;
1130 	ch.carp_vhid = vhe->vhid;
1131 	ch.carp_demote = carp_group_demote_count(sc) & 0xff;
1132 	ch.carp_advbase = advbase;
1133 	ch.carp_advskew = advskew;
1134 	ch.carp_authlen = 7;	/* XXX DEFINE */
1135 	ch.carp_cksum = 0;
1136 
1137 	sc->cur_vhe = vhe; /* we need the vhe later on the output path */
1138 
1139 #ifdef INET
1140 	if (sc->sc_naddrs) {
1141 		struct ip *ip;
1142 
1143 		MGETHDR(m, M_DONTWAIT, MT_HEADER);
1144 		if (m == NULL) {
1145 			sc->sc_if.if_oerrors++;
1146 			carpstats.carps_onomem++;
1147 			/* XXX maybe less ? */
1148 			goto retry_later;
1149 		}
1150 		len = sizeof(*ip) + sizeof(ch);
1151 		m->m_pkthdr.len = len;
1152 		m->m_pkthdr.rcvif = NULL;
1153 		m->m_pkthdr.rdomain = sc->sc_if.if_rdomain;
1154 		m->m_pkthdr.pf.prio = CARP_IFQ_PRIO;
1155 		m->m_len = len;
1156 		MH_ALIGN(m, m->m_len);
1157 		ip = mtod(m, struct ip *);
1158 		ip->ip_v = IPVERSION;
1159 		ip->ip_hl = sizeof(*ip) >> 2;
1160 		ip->ip_tos = IPTOS_LOWDELAY;
1161 		ip->ip_len = htons(len);
1162 		ip->ip_id = htons(ip_randomid());
1163 		ip->ip_off = htons(IP_DF);
1164 		ip->ip_ttl = CARP_DFLTTL;
1165 		ip->ip_p = IPPROTO_CARP;
1166 		ip->ip_sum = 0;
1167 
1168 		bzero(&sa, sizeof(sa));
1169 		sa.sa_family = AF_INET;
1170 		ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
1171 		if (ifa == NULL)
1172 			ip->ip_src.s_addr = 0;
1173 		else
1174 			ip->ip_src.s_addr =
1175 			    ifatoia(ifa)->ia_addr.sin_addr.s_addr;
1176 		ip->ip_dst.s_addr = sc->sc_peer.s_addr;
1177 		if (IN_MULTICAST(ip->ip_dst.s_addr))
1178 			m->m_flags |= M_MCAST;
1179 
1180 		ch_ptr = (struct carp_header *)(ip + 1);
1181 		bcopy(&ch, ch_ptr, sizeof(ch));
1182 		if (carp_prepare_ad(m, vhe, ch_ptr))
1183 			goto retry_later;
1184 
1185 		m->m_data += sizeof(*ip);
1186 		ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip));
1187 		m->m_data -= sizeof(*ip);
1188 
1189 		getmicrotime(&sc->sc_if.if_lastchange);
1190 		sc->sc_if.if_opackets++;
1191 		sc->sc_if.if_obytes += len;
1192 		carpstats.carps_opackets++;
1193 
1194 		error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo,
1195 		    NULL);
1196 		if (error) {
1197 			if (error == ENOBUFS)
1198 				carpstats.carps_onomem++;
1199 			else
1200 				CARP_LOG(LOG_WARNING, sc,
1201 				    ("ip_output failed: %d", error));
1202 			sc->sc_if.if_oerrors++;
1203 			if (sc->sc_sendad_errors < INT_MAX)
1204 				sc->sc_sendad_errors++;
1205 			if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS(sc))
1206 				carp_group_demote_adj(&sc->sc_if, 1,
1207 				    "> snderrors");
1208 			sc->sc_sendad_success = 0;
1209 		} else {
1210 			if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS(sc)) {
1211 				if (++sc->sc_sendad_success >=
1212 				    CARP_SENDAD_MIN_SUCCESS(sc)) {
1213 					carp_group_demote_adj(&sc->sc_if, -1,
1214 					    "< snderrors");
1215 					sc->sc_sendad_errors = 0;
1216 				}
1217 			} else
1218 				sc->sc_sendad_errors = 0;
1219 		}
1220 		if (vhe->vhe_leader) {
1221 			if (sc->sc_delayed_arp > 0)
1222 				sc->sc_delayed_arp--;
1223 			if (sc->sc_delayed_arp == 0) {
1224 				carp_send_arp(sc);
1225 				sc->sc_delayed_arp = -1;
1226 			}
1227 		}
1228 	}
1229 #endif /* INET */
1230 #ifdef INET6
1231 	if (sc->sc_naddrs6) {
1232 		struct ip6_hdr *ip6;
1233 
1234 		MGETHDR(m, M_DONTWAIT, MT_HEADER);
1235 		if (m == NULL) {
1236 			sc->sc_if.if_oerrors++;
1237 			carpstats.carps_onomem++;
1238 			/* XXX maybe less ? */
1239 			goto retry_later;
1240 		}
1241 		len = sizeof(*ip6) + sizeof(ch);
1242 		m->m_pkthdr.len = len;
1243 		m->m_pkthdr.rcvif = NULL;
1244 		m->m_pkthdr.pf.prio = CARP_IFQ_PRIO;
1245 		/* XXX m->m_pkthdr.rdomain = sc->sc_if.if_rdomain; */
1246 		m->m_len = len;
1247 		MH_ALIGN(m, m->m_len);
1248 		m->m_flags |= M_MCAST;
1249 		ip6 = mtod(m, struct ip6_hdr *);
1250 		bzero(ip6, sizeof(*ip6));
1251 		ip6->ip6_vfc |= IPV6_VERSION;
1252 		ip6->ip6_hlim = CARP_DFLTTL;
1253 		ip6->ip6_nxt = IPPROTO_CARP;
1254 
1255 		/* set the source address */
1256 		bzero(&sa, sizeof(sa));
1257 		sa.sa_family = AF_INET6;
1258 		ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
1259 		if (ifa == NULL)	/* This should never happen with IPv6 */
1260 			bzero(&ip6->ip6_src, sizeof(struct in6_addr));
1261 		else
1262 			bcopy(ifatoia6(ifa)->ia_addr.sin6_addr.s6_addr,
1263 			    &ip6->ip6_src, sizeof(struct in6_addr));
1264 		/* set the multicast destination */
1265 
1266 		ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
1267 		ip6->ip6_dst.s6_addr16[1] = htons(sc->sc_carpdev->if_index);
1268 		ip6->ip6_dst.s6_addr8[15] = 0x12;
1269 
1270 		ch_ptr = (struct carp_header *)(ip6 + 1);
1271 		bcopy(&ch, ch_ptr, sizeof(ch));
1272 		if (carp_prepare_ad(m, vhe, ch_ptr))
1273 			goto retry_later;
1274 
1275 		m->m_data += sizeof(*ip6);
1276 		ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip6));
1277 		m->m_data -= sizeof(*ip6);
1278 
1279 		getmicrotime(&sc->sc_if.if_lastchange);
1280 		sc->sc_if.if_opackets++;
1281 		sc->sc_if.if_obytes += len;
1282 		carpstats.carps_opackets6++;
1283 
1284 		error = ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL);
1285 		if (error) {
1286 			if (error == ENOBUFS)
1287 				carpstats.carps_onomem++;
1288 			else
1289 				CARP_LOG(LOG_WARNING, sc,
1290 				    ("ip6_output failed: %d", error));
1291 			sc->sc_if.if_oerrors++;
1292 			if (sc->sc_sendad_errors < INT_MAX)
1293 				sc->sc_sendad_errors++;
1294 			if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS(sc))
1295 				carp_group_demote_adj(&sc->sc_if, 1,
1296 					    "> snd6errors");
1297 			sc->sc_sendad_success = 0;
1298 		} else {
1299 			if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS(sc)) {
1300 				if (++sc->sc_sendad_success >=
1301 				    CARP_SENDAD_MIN_SUCCESS(sc)) {
1302 					carp_group_demote_adj(&sc->sc_if, -1,
1303 					    "< snd6errors");
1304 					sc->sc_sendad_errors = 0;
1305 				}
1306 			} else
1307 				sc->sc_sendad_errors = 0;
1308 		}
1309 	}
1310 #endif /* INET6 */
1311 
1312 retry_later:
1313 	sc->cur_vhe = NULL;
1314 	splx(s);
1315 	if (advbase != 255 || advskew != 255)
1316 		timeout_add(&vhe->ad_tmo, tvtohz(&tv));
1317 }
1318 
1319 /*
1320  * Broadcast a gratuitous ARP request containing
1321  * the virtual router MAC address for each IP address
1322  * associated with the virtual router.
1323  */
1324 void
1325 carp_send_arp(struct carp_softc *sc)
1326 {
1327 	struct ifaddr *ifa;
1328 	in_addr_t in;
1329 	int s = splsoftnet();
1330 
1331 	TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
1332 
1333 		if (ifa->ifa_addr->sa_family != AF_INET)
1334 			continue;
1335 
1336 		in = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
1337 		arprequest(sc->sc_carpdev, &in, &in, sc->sc_ac.ac_enaddr);
1338 		DELAY(1000);	/* XXX */
1339 	}
1340 	splx(s);
1341 }
1342 
1343 #ifdef INET6
1344 void
1345 carp_send_na(struct carp_softc *sc)
1346 {
1347 	struct ifaddr *ifa;
1348 	struct in6_addr *in6;
1349 	static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
1350 	int s = splsoftnet();
1351 
1352 	TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
1353 
1354 		if (ifa->ifa_addr->sa_family != AF_INET6)
1355 			continue;
1356 
1357 		in6 = &ifatoia6(ifa)->ia_addr.sin6_addr;
1358 		nd6_na_output(sc->sc_carpdev, &mcast, in6,
1359 		    ND_NA_FLAG_OVERRIDE, 1, NULL);
1360 		DELAY(1000);	/* XXX */
1361 	}
1362 	splx(s);
1363 }
1364 #endif /* INET6 */
1365 
1366 /*
1367  * Based on bridge_hash() in if_bridge.c
1368  */
1369 #define	mix(a,b,c) \
1370 	do {						\
1371 		a -= b; a -= c; a ^= (c >> 13);		\
1372 		b -= c; b -= a; b ^= (a << 8);		\
1373 		c -= a; c -= b; c ^= (b >> 13);		\
1374 		a -= b; a -= c; a ^= (c >> 12);		\
1375 		b -= c; b -= a; b ^= (a << 16);		\
1376 		c -= a; c -= b; c ^= (b >> 5);		\
1377 		a -= b; a -= c; a ^= (c >> 3);		\
1378 		b -= c; b -= a; b ^= (a << 10);		\
1379 		c -= a; c -= b; c ^= (b >> 15);		\
1380 	} while (0)
1381 
1382 u_int32_t
1383 carp_hash(struct carp_softc *sc, u_char *src)
1384 {
1385 	u_int32_t a = 0x9e3779b9, b = sc->sc_hashkey[0], c = sc->sc_hashkey[1];
1386 
1387 	c += sc->sc_key[3] << 24;
1388 	c += sc->sc_key[2] << 16;
1389 	c += sc->sc_key[1] << 8;
1390 	c += sc->sc_key[0];
1391 	b += src[5] << 8;
1392 	b += src[4];
1393 	a += src[3] << 24;
1394 	a += src[2] << 16;
1395 	a += src[1] << 8;
1396 	a += src[0];
1397 
1398 	mix(a, b, c);
1399 	return (c);
1400 }
1401 
1402 void
1403 carp_update_lsmask(struct carp_softc *sc)
1404 {
1405 	struct carp_vhost_entry *vhe;
1406 	int count;
1407 
1408 	if (!sc->sc_balancing)
1409 		return;
1410 
1411 	sc->sc_lsmask = 0;
1412 	count = 0;
1413 
1414 	LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) {
1415 		if (vhe->state == MASTER && count < sizeof(sc->sc_lsmask) * 8)
1416 			sc->sc_lsmask |= 1 << count;
1417 		count++;
1418 	}
1419 	sc->sc_lscount = count;
1420 	CARP_LOG(LOG_DEBUG, sc, ("carp_update_lsmask: %x", sc->sc_lsmask));
1421 }
1422 
1423 int
1424 carp_iamatch(struct in_ifaddr *ia, u_char *src, u_int8_t **sha,
1425     u_int8_t **ether_shost)
1426 {
1427 	struct carp_softc *sc = ia->ia_ifp->if_softc;
1428 	struct carp_vhost_entry *vhe = LIST_FIRST(&sc->carp_vhosts);
1429 
1430 	if (sc->sc_balancing == CARP_BAL_ARP) {
1431 		int lshash;
1432 		/*
1433 		 * We use the source MAC address to decide which virtual host
1434 		 * should handle the request. If we're master of that virtual
1435 		 * host, then we respond, otherwise, just drop the arp packet
1436 		 * on the floor.
1437 		 */
1438 
1439 		if (sc->sc_lscount == 0) /* just to be safe */
1440 			return (0);
1441 		lshash = carp_hash(sc, src) % sc->sc_lscount;
1442 		if ((1 << lshash) & sc->sc_lsmask) {
1443 			int i = 0;
1444 			LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) {
1445 				if (i++ == lshash)
1446 					break;
1447 			}
1448 			if (vhe == NULL)
1449 				return (0);
1450 			*sha = vhe->vhe_enaddr;
1451 			return (1);
1452 		}
1453 	} else if (sc->sc_balancing == CARP_BAL_IPSTEALTH ||
1454 	    sc->sc_balancing == CARP_BAL_IP) {
1455 		if (vhe->state == MASTER) {
1456 			*ether_shost = ((struct arpcom *)sc->sc_carpdev)->
1457 			    ac_enaddr;
1458 			return (1);
1459 		}
1460 	} else {
1461 		if (vhe->state == MASTER)
1462 			return (1);
1463 	}
1464 
1465 	return (0);
1466 }
1467 
1468 #ifdef INET6
1469 int
1470 carp_iamatch6(struct ifnet *ifp, u_char *src, struct sockaddr_dl **sdl)
1471 {
1472 	struct carp_softc *sc = ifp->if_softc;
1473 	struct carp_vhost_entry *vhe = LIST_FIRST(&sc->carp_vhosts);
1474 
1475 	if (sc->sc_balancing == CARP_BAL_ARP) {
1476 		int lshash;
1477 		/*
1478 		 * We use the source MAC address to decide which virtual host
1479 		 * should handle the request. If we're master of that virtual
1480 		 * host, then we respond, otherwise, just drop the ndp packet
1481 		 * on the floor.
1482 		 */
1483 
1484 		/* can happen if optional src lladdr is not provided */
1485 		if (src == NULL)
1486 			return (0);
1487 		if (sc->sc_lscount == 0) /* just to be safe */
1488 			return (0);
1489 		lshash = carp_hash(sc, src) % sc->sc_lscount;
1490 		if ((1 << lshash) & sc->sc_lsmask) {
1491 			int i = 0;
1492 			LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) {
1493 				if (i++ == lshash)
1494 					break;
1495 			}
1496 			if (vhe == NULL)
1497 				return (0);
1498 			*sdl = &vhe->vhe_sdl;
1499 			return (1);
1500 		}
1501 	} else {
1502 		if (vhe->state == MASTER)
1503 			return (1);
1504 	}
1505 
1506 	return (0);
1507 }
1508 #endif /* INET6 */
1509 
1510 struct ifnet *
1511 carp_ourether(void *v, struct ether_header *eh, int src)
1512 {
1513 	struct carp_if *cif = (struct carp_if *)v;
1514 	struct carp_softc *vh;
1515 	u_int8_t *ena;
1516 
1517 	if (src)
1518 		ena = (u_int8_t *)&eh->ether_shost;
1519 	else
1520 		ena = (u_int8_t *)&eh->ether_dhost;
1521 
1522 	TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1523 		struct carp_vhost_entry *vhe;
1524 		if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) !=
1525 		    (IFF_UP|IFF_RUNNING))
1526 			continue;
1527 		if (vh->sc_balancing == CARP_BAL_ARP) {
1528 			LIST_FOREACH(vhe, &vh->carp_vhosts, vhost_entries)
1529 				if (vhe->state == MASTER &&
1530 				    !bcmp(ena, vhe->vhe_enaddr, ETHER_ADDR_LEN))
1531 					return (&vh->sc_if);
1532 		} else {
1533 			vhe = LIST_FIRST(&vh->carp_vhosts);
1534 			if ((vhe->state == MASTER ||
1535 			    vh->sc_balancing >= CARP_BAL_IP) &&
1536 			    !bcmp(ena, vh->sc_ac.ac_enaddr, ETHER_ADDR_LEN))
1537 				return (&vh->sc_if);
1538 		}
1539 	}
1540 	return (NULL);
1541 }
1542 
1543 void
1544 carp_rewrite_lladdr(struct ifnet *ifp, u_int8_t *s_enaddr)
1545 {
1546 	struct carp_softc *sc = ifp->if_softc;
1547 
1548 	if (sc->sc_balancing != CARP_BAL_IPSTEALTH &&
1549 	    sc->sc_balancing != CARP_BAL_IP && sc->cur_vhe) {
1550 		if (sc->cur_vhe->vhe_leader)
1551 			bcopy((caddr_t)sc->sc_ac.ac_enaddr,
1552 			    (caddr_t)s_enaddr, ETHER_ADDR_LEN);
1553 		else
1554 			bcopy((caddr_t)sc->cur_vhe->vhe_enaddr,
1555 			    (caddr_t)s_enaddr, ETHER_ADDR_LEN);
1556 	}
1557 }
1558 
1559 int
1560 carp_our_mcastaddr(struct ifnet *ifp, u_int8_t *d_enaddr)
1561 {
1562 	struct carp_softc *sc = ifp->if_softc;
1563 
1564 	if (sc->sc_balancing != CARP_BAL_IP)
1565 		return (0);
1566 
1567 	return(!bcmp(sc->sc_ac.ac_enaddr, d_enaddr, ETHER_ADDR_LEN));
1568 }
1569 
1570 
1571 int
1572 carp_input(struct mbuf *m, u_int8_t *shost, u_int8_t *dhost, u_int16_t etype)
1573 {
1574 	struct ether_header eh;
1575 	struct carp_if *cif = (struct carp_if *)m->m_pkthdr.rcvif->if_carp;
1576 	struct ifnet *ifp;
1577 
1578 	bcopy(shost, &eh.ether_shost, sizeof(eh.ether_shost));
1579 	bcopy(dhost, &eh.ether_dhost, sizeof(eh.ether_dhost));
1580 	eh.ether_type = etype;
1581 
1582 	if ((ifp = carp_ourether(cif, &eh, 0)))
1583 		;
1584 	else if (m->m_flags & (M_BCAST|M_MCAST)) {
1585 		struct carp_softc *vh;
1586 		struct mbuf *m0;
1587 
1588 		/*
1589 		 * XXX Should really check the list of multicast addresses
1590 		 * for each CARP interface _before_ copying.
1591 		 */
1592 		TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1593 			if (!(vh->sc_if.if_flags & IFF_UP))
1594 				continue;
1595 			m0 = m_copym2(m, 0, M_COPYALL, M_DONTWAIT);
1596 			if (m0 == NULL)
1597 				continue;
1598 			m0->m_pkthdr.rcvif = &vh->sc_if;
1599 #if NBPFILTER > 0
1600 			if (vh->sc_if.if_bpf)
1601 				bpf_mtap_hdr(vh->sc_if.if_bpf, (char *)&eh,
1602 				    ETHER_HDR_LEN, m0, BPF_DIRECTION_IN);
1603 #endif
1604 			vh->sc_if.if_ipackets++;
1605 			ether_input(&vh->sc_if, &eh, m0);
1606 		}
1607 		return (1);
1608 	}
1609 
1610 	if (ifp == NULL)
1611 		return (1);
1612 
1613 	m->m_pkthdr.rcvif = ifp;
1614 
1615 #if NBPFILTER > 0
1616 	if (ifp->if_bpf)
1617 		bpf_mtap_hdr(ifp->if_bpf, (char *)&eh, ETHER_HDR_LEN, m,
1618 		    BPF_DIRECTION_IN);
1619 #endif
1620 	ifp->if_ipackets++;
1621 	ether_input(ifp, &eh, m);
1622 
1623 	return (0);
1624 }
1625 
1626 int
1627 carp_lsdrop(struct mbuf *m, sa_family_t af, u_int32_t *src, u_int32_t *dst)
1628 {
1629 	struct carp_softc *sc = m->m_pkthdr.rcvif->if_softc;
1630 	int match;
1631 	u_int32_t fold;
1632 
1633 	if (sc->sc_balancing < CARP_BAL_IP)
1634 		return (0);
1635 	/*
1636 	 * Never drop carp advertisements.
1637 	 * XXX Bad idea to pass all broadcast / multicast traffic?
1638 	 */
1639 	if (m->m_flags & (M_BCAST|M_MCAST))
1640 		return (0);
1641 
1642 	fold = src[0] ^ dst[0];
1643 #ifdef INET6
1644 	if (af == AF_INET6) {
1645 		int i;
1646 		for (i = 1; i < 4; i++)
1647 			fold ^= src[i] ^ dst[i];
1648 	}
1649 #endif
1650 	if (sc->sc_lscount == 0) /* just to be safe */
1651 		return (1);
1652 	match = (1 << (ntohl(fold) % sc->sc_lscount)) & sc->sc_lsmask;
1653 
1654 	return (!match);
1655 }
1656 
1657 void
1658 carp_master_down(void *v)
1659 {
1660 	struct carp_vhost_entry *vhe = v;
1661 	struct carp_softc *sc = vhe->parent_sc;
1662 
1663 	switch (vhe->state) {
1664 	case INIT:
1665 		printf("%s: master_down event in INIT state\n",
1666 		    sc->sc_if.if_xname);
1667 		break;
1668 	case MASTER:
1669 		break;
1670 	case BACKUP:
1671 		carp_set_state(vhe, MASTER);
1672 		carp_send_ad(vhe);
1673 		if (sc->sc_balancing == CARP_BAL_NONE && vhe->vhe_leader) {
1674 			carp_send_arp(sc);
1675 			/* Schedule a delayed ARP to deal w/ some L3 switches */
1676 			sc->sc_delayed_arp = 2;
1677 #ifdef INET6
1678 			carp_send_na(sc);
1679 #endif /* INET6 */
1680 		}
1681 		carp_setrun(vhe, 0);
1682 		if (vhe->vhe_leader)
1683 			carp_setroute(sc, RTM_ADD);
1684 		carpstats.carps_preempt++;
1685 		break;
1686 	}
1687 }
1688 
1689 void
1690 carp_setrun_all(struct carp_softc *sc, sa_family_t af)
1691 {
1692 	struct carp_vhost_entry *vhe;
1693 	LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) {
1694 		carp_setrun(vhe, af);
1695 	}
1696 }
1697 
1698 /*
1699  * When in backup state, af indicates whether to reset the master down timer
1700  * for v4 or v6. If it's set to zero, reset the ones which are already pending.
1701  */
1702 void
1703 carp_setrun(struct carp_vhost_entry *vhe, sa_family_t af)
1704 {
1705 	struct timeval tv;
1706 	struct carp_softc *sc = vhe->parent_sc;
1707 
1708 	if (sc->sc_carpdev == NULL) {
1709 		sc->sc_if.if_flags &= ~IFF_RUNNING;
1710 		carp_set_state_all(sc, INIT);
1711 		return;
1712 	}
1713 
1714 	if (sc->sc_if.if_flags & IFF_UP && vhe->vhid > 0 &&
1715 	    (sc->sc_naddrs || sc->sc_naddrs6) && !sc->sc_suppress) {
1716 		sc->sc_if.if_flags |= IFF_RUNNING;
1717 	} else {
1718 		sc->sc_if.if_flags &= ~IFF_RUNNING;
1719 		if (vhe->vhe_leader)
1720 			carp_setroute(sc, RTM_DELETE);
1721 		return;
1722 	}
1723 
1724 	switch (vhe->state) {
1725 	case INIT:
1726 		carp_set_state(vhe, BACKUP);
1727 		if (vhe->vhe_leader)
1728 			carp_setroute(sc, RTM_DELETE);
1729 		carp_setrun(vhe, 0);
1730 		break;
1731 	case BACKUP:
1732 		timeout_del(&vhe->ad_tmo);
1733 		tv.tv_sec = 3 * sc->sc_advbase;
1734 		if (sc->sc_advbase == 0 && vhe->advskew == 0)
1735 			tv.tv_usec = 3 * 1000000 / 256;
1736 		else
1737 			tv.tv_usec = vhe->advskew * 1000000 / 256;
1738 		if (vhe->vhe_leader)
1739 			sc->sc_delayed_arp = -1;
1740 		switch (af) {
1741 #ifdef INET
1742 		case AF_INET:
1743 			timeout_add(&vhe->md_tmo, tvtohz(&tv));
1744 			break;
1745 #endif /* INET */
1746 #ifdef INET6
1747 		case AF_INET6:
1748 			timeout_add(&vhe->md6_tmo, tvtohz(&tv));
1749 			break;
1750 #endif /* INET6 */
1751 		default:
1752 			if (sc->sc_naddrs)
1753 				timeout_add(&vhe->md_tmo, tvtohz(&tv));
1754 			if (sc->sc_naddrs6)
1755 				timeout_add(&vhe->md6_tmo, tvtohz(&tv));
1756 			break;
1757 		}
1758 		break;
1759 	case MASTER:
1760 		tv.tv_sec = sc->sc_advbase;
1761 		if (sc->sc_advbase == 0 && vhe->advskew == 0)
1762 			tv.tv_usec = 1 * 1000000 / 256;
1763 		else
1764 			tv.tv_usec = vhe->advskew * 1000000 / 256;
1765 		timeout_add(&vhe->ad_tmo, tvtohz(&tv));
1766 		break;
1767 	}
1768 }
1769 
1770 void
1771 carp_multicast_cleanup(struct carp_softc *sc)
1772 {
1773 	struct ip_moptions *imo = &sc->sc_imo;
1774 #ifdef INET6
1775 	struct ip6_moptions *im6o = &sc->sc_im6o;
1776 #endif
1777 	u_int16_t n = imo->imo_num_memberships;
1778 
1779 	/* Clean up our own multicast memberships */
1780 	while (n-- > 0) {
1781 		if (imo->imo_membership[n] != NULL) {
1782 			in_delmulti(imo->imo_membership[n]);
1783 			imo->imo_membership[n] = NULL;
1784 		}
1785 	}
1786 	imo->imo_num_memberships = 0;
1787 	imo->imo_multicast_ifp = NULL;
1788 
1789 #ifdef INET6
1790 	while (!LIST_EMPTY(&im6o->im6o_memberships)) {
1791 		struct in6_multi_mship *imm =
1792 		    LIST_FIRST(&im6o->im6o_memberships);
1793 
1794 		LIST_REMOVE(imm, i6mm_chain);
1795 		in6_leavegroup(imm);
1796 	}
1797 	im6o->im6o_multicast_ifp = NULL;
1798 #endif
1799 
1800 	/* And any other multicast memberships */
1801 	carp_ether_purgemulti(sc);
1802 }
1803 
1804 int
1805 carp_set_ifp(struct carp_softc *sc, struct ifnet *ifp)
1806 {
1807 	struct carp_if *cif, *ncif = NULL;
1808 	struct carp_softc *vr, *after = NULL;
1809 	int myself = 0, error = 0;
1810 	int s;
1811 
1812 	if (ifp == sc->sc_carpdev)
1813 		return (0);
1814 
1815 	if (ifp != NULL) {
1816 		if ((ifp->if_flags & IFF_MULTICAST) == 0)
1817 			return (EADDRNOTAVAIL);
1818 
1819 		if (ifp->if_type == IFT_CARP)
1820 			return (EINVAL);
1821 
1822 		if (ifp->if_carp == NULL) {
1823 			ncif = malloc(sizeof(*cif), M_IFADDR, M_NOWAIT|M_ZERO);
1824 			if (ncif == NULL)
1825 				return (ENOBUFS);
1826 			if ((error = ifpromisc(ifp, 1))) {
1827 				free(ncif, M_IFADDR);
1828 				return (error);
1829 			}
1830 
1831 			ncif->vhif_ifp = ifp;
1832 			TAILQ_INIT(&ncif->vhif_vrs);
1833 		} else {
1834 			cif = (struct carp_if *)ifp->if_carp;
1835 			if (carp_check_dup_vhids(sc, cif, NULL))
1836 				return (EINVAL);
1837 		}
1838 
1839 		/* detach from old interface */
1840 		if (sc->sc_carpdev != NULL)
1841 			carpdetach(sc);
1842 
1843 		/* join multicast groups */
1844 		if (sc->sc_naddrs < 0 &&
1845 		    (error = carp_join_multicast(sc)) != 0) {
1846 			if (ncif != NULL)
1847 				free(ncif, M_IFADDR);
1848 			return (error);
1849 		}
1850 
1851 #ifdef INET6
1852 		if (sc->sc_naddrs6 < 0 &&
1853 		    (error = carp_join_multicast6(sc)) != 0) {
1854 			if (ncif != NULL)
1855 				free(ncif, M_IFADDR);
1856 			carp_multicast_cleanup(sc);
1857 			return (error);
1858 		}
1859 #endif
1860 
1861 		/* attach carp interface to physical interface */
1862 		if (ncif != NULL)
1863 			ifp->if_carp = (caddr_t)ncif;
1864 		sc->sc_carpdev = ifp;
1865 		cif = (struct carp_if *)ifp->if_carp;
1866 		TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
1867 			if (vr == sc)
1868 				myself = 1;
1869 			if (LIST_FIRST(&vr->carp_vhosts)->vhid <
1870 			    LIST_FIRST(&sc->carp_vhosts)->vhid)
1871 				after = vr;
1872 		}
1873 
1874 		if (!myself) {
1875 			/* We're trying to keep things in order */
1876 			if (after == NULL) {
1877 				TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
1878 			} else {
1879 				TAILQ_INSERT_AFTER(&cif->vhif_vrs, after,
1880 				    sc, sc_list);
1881 			}
1882 			cif->vhif_nvrs++;
1883 		}
1884 		if (sc->sc_naddrs || sc->sc_naddrs6)
1885 			sc->sc_if.if_flags |= IFF_UP;
1886 		carp_set_enaddr(sc);
1887 		s = splnet();
1888 		sc->lh_cookie = hook_establish(ifp->if_linkstatehooks, 1,
1889 		    carp_carpdev_state, ifp);
1890 		carp_carpdev_state(ifp);
1891 		splx(s);
1892 	} else {
1893 		carpdetach(sc);
1894 		sc->sc_if.if_flags &= ~(IFF_UP|IFF_RUNNING);
1895 	}
1896 	return (0);
1897 }
1898 
1899 void
1900 carp_set_vhe_enaddr(struct carp_vhost_entry *vhe)
1901 {
1902 	struct carp_softc *sc = vhe->parent_sc;
1903 
1904 	if (vhe->vhid != 0 && sc->sc_carpdev) {
1905 		if (vhe->vhe_leader && sc->sc_balancing == CARP_BAL_IP)
1906 			vhe->vhe_enaddr[0] = 1;
1907 		else
1908 			vhe->vhe_enaddr[0] = 0;
1909 		vhe->vhe_enaddr[1] = 0;
1910 		vhe->vhe_enaddr[2] = 0x5e;
1911 		vhe->vhe_enaddr[3] = 0;
1912 		vhe->vhe_enaddr[4] = 1;
1913 		vhe->vhe_enaddr[5] = vhe->vhid;
1914 
1915 		vhe->vhe_sdl.sdl_family = AF_LINK;
1916 		vhe->vhe_sdl.sdl_alen = ETHER_ADDR_LEN;
1917 		bcopy(vhe->vhe_enaddr, vhe->vhe_sdl.sdl_data, ETHER_ADDR_LEN);
1918 	} else
1919 		bzero(vhe->vhe_enaddr, ETHER_ADDR_LEN);
1920 }
1921 
1922 void
1923 carp_set_enaddr(struct carp_softc *sc)
1924 {
1925 	struct carp_vhost_entry *vhe;
1926 
1927 	LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries)
1928 		carp_set_vhe_enaddr(vhe);
1929 
1930 	vhe = LIST_FIRST(&sc->carp_vhosts);
1931 
1932 	/*
1933 	 * Use the carp lladdr if the running one isn't manually set.
1934 	 * Only compare static parts of the lladdr.
1935 	 */
1936 	if ((bcmp(sc->sc_ac.ac_enaddr + 1, vhe->vhe_enaddr + 1,
1937 	    ETHER_ADDR_LEN - 2) == 0) ||
1938 	    (!sc->sc_ac.ac_enaddr[0] && !sc->sc_ac.ac_enaddr[1] &&
1939 	    !sc->sc_ac.ac_enaddr[2] && !sc->sc_ac.ac_enaddr[3] &&
1940 	    !sc->sc_ac.ac_enaddr[4] && !sc->sc_ac.ac_enaddr[5]))
1941 		bcopy(vhe->vhe_enaddr, sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN);
1942 
1943 	/* Make sure the enaddr has changed before further twiddling. */
1944 	if (bcmp(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN) != 0) {
1945 		bcopy(sc->sc_ac.ac_enaddr, LLADDR(sc->sc_if.if_sadl),
1946 		    ETHER_ADDR_LEN);
1947 		bcopy(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN);
1948 #ifdef INET6
1949 		/*
1950 		 * (re)attach a link-local address which matches
1951 		 * our new MAC address.
1952 		 */
1953 		in6_ifattach_linklocal(&sc->sc_if, NULL);
1954 #endif
1955 		carp_set_state_all(sc, INIT);
1956 		carp_setrun_all(sc, 0);
1957 	}
1958 }
1959 
1960 void
1961 carp_addr_updated(void *v)
1962 {
1963 	struct carp_softc *sc = (struct carp_softc *) v;
1964 	struct ifaddr *ifa;
1965 	int new_naddrs = 0, new_naddrs6 = 0;
1966 
1967 	TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
1968 		if (ifa->ifa_addr->sa_family == AF_INET)
1969 			new_naddrs++;
1970 		else if (ifa->ifa_addr->sa_family == AF_INET6 &&
1971 		    !IN6_IS_ADDR_LINKLOCAL(&ifatoia6(ifa)->ia_addr.sin6_addr))
1972 			new_naddrs6++;
1973 	}
1974 
1975 	/* We received address changes from if_addrhooks callback */
1976 	if (new_naddrs != sc->sc_naddrs || new_naddrs6 != sc->sc_naddrs6) {
1977 		struct in_addr mc_addr;
1978 		struct in_multi *inm;
1979 
1980 		sc->sc_naddrs = new_naddrs;
1981 		sc->sc_naddrs6 = new_naddrs6;
1982 
1983 		/* Re-establish multicast membership removed by in_control */
1984 		if (IN_MULTICAST(sc->sc_peer.s_addr)) {
1985 			mc_addr.s_addr = sc->sc_peer.s_addr;
1986 			IN_LOOKUP_MULTI(mc_addr, &sc->sc_if, inm);
1987 			if (inm == NULL) {
1988 				struct in_multi **imm =
1989 				    sc->sc_imo.imo_membership;
1990 				u_int16_t maxmem =
1991 				    sc->sc_imo.imo_max_memberships;
1992 
1993 				bzero(&sc->sc_imo, sizeof(sc->sc_imo));
1994 				sc->sc_imo.imo_membership = imm;
1995 				sc->sc_imo.imo_max_memberships = maxmem;
1996 
1997 				if (sc->sc_carpdev != NULL && sc->sc_naddrs > 0)
1998 					carp_join_multicast(sc);
1999 			}
2000 		}
2001 
2002 		if (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) {
2003 			sc->sc_if.if_flags &= ~IFF_UP;
2004 			carp_set_state_all(sc, INIT);
2005 		} else
2006 			carp_hmac_prepare(sc);
2007 	}
2008 
2009 	carp_setrun_all(sc, 0);
2010 }
2011 
2012 int
2013 carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin)
2014 {
2015 	struct ifnet *ifp = sc->sc_carpdev;
2016 	struct in_ifaddr *ia, *ia_if;
2017 	int error = 0;
2018 
2019 	/* XXX is this necessary? */
2020 	if (sin->sin_addr.s_addr == 0) {
2021 		if (!(sc->sc_if.if_flags & IFF_UP))
2022 			carp_set_state_all(sc, INIT);
2023 		if (sc->sc_naddrs)
2024 			sc->sc_if.if_flags |= IFF_UP;
2025 		carp_setrun_all(sc, 0);
2026 		return (0);
2027 	}
2028 
2029 	/* we have to do this by hand to ensure we don't match on ourselves */
2030 	ia_if = NULL;
2031 	for (ia = TAILQ_FIRST(&in_ifaddr); ia;
2032 	    ia = TAILQ_NEXT(ia, ia_list)) {
2033 
2034 		/* and, yeah, we need a multicast-capable iface too */
2035 		if (ia->ia_ifp != &sc->sc_if &&
2036 		    ia->ia_ifp->if_type != IFT_CARP &&
2037 		    (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
2038 		    ia->ia_ifp->if_rdomain == sc->sc_if.if_rdomain &&
2039 		    (sin->sin_addr.s_addr & ia->ia_netmask) ==
2040 		    ia->ia_net) {
2041 			if (!ia_if)
2042 				ia_if = ia;
2043 		}
2044 	}
2045 
2046 	if (ia_if) {
2047 		ia = ia_if;
2048 		if (ifp) {
2049 			if (ifp != ia->ia_ifp)
2050 				return (EADDRNOTAVAIL);
2051 		} else {
2052 			ifp = ia->ia_ifp;
2053 		}
2054 	}
2055 
2056 	if ((error = carp_set_ifp(sc, ifp)))
2057 		return (error);
2058 
2059 	if (sc->sc_carpdev == NULL)
2060 		return (EADDRNOTAVAIL);
2061 
2062 	if (sc->sc_naddrs == 0 && (error = carp_join_multicast(sc)) != 0)
2063 		return (error);
2064 
2065 	if (sc->sc_carpdev != NULL)
2066 		sc->sc_if.if_flags |= IFF_UP;
2067 
2068 	carp_set_state_all(sc, INIT);
2069 
2070 	return (0);
2071 }
2072 
2073 int
2074 carp_join_multicast(struct carp_softc *sc)
2075 {
2076 	struct ip_moptions *imo = &sc->sc_imo;
2077 	struct in_multi *imm;
2078 	struct in_addr addr;
2079 
2080 	if (!IN_MULTICAST(sc->sc_peer.s_addr))
2081 		return (0);
2082 
2083 	addr.s_addr = sc->sc_peer.s_addr;
2084 	if ((imm = in_addmulti(&addr, &sc->sc_if)) == NULL)
2085 		return (ENOBUFS);
2086 
2087 	imo->imo_membership[0] = imm;
2088 	imo->imo_num_memberships = 1;
2089 	imo->imo_multicast_ifp = &sc->sc_if;
2090 	imo->imo_multicast_ttl = CARP_DFLTTL;
2091 	imo->imo_multicast_loop = 0;
2092 	return (0);
2093 }
2094 
2095 
2096 #ifdef INET6
2097 int
2098 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
2099 {
2100 	struct ifnet *ifp = sc->sc_carpdev;
2101 	struct in6_ifaddr *ia, *ia_if;
2102 	int error = 0;
2103 
2104 	if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
2105 		if (!(sc->sc_if.if_flags & IFF_UP))
2106 			carp_set_state_all(sc, INIT);
2107 		if (sc->sc_naddrs6)
2108 			sc->sc_if.if_flags |= IFF_UP;
2109 		carp_setrun_all(sc, 0);
2110 		return (0);
2111 	}
2112 
2113 	/* we have to do this by hand to ensure we don't match on ourselves */
2114 	ia_if = NULL;
2115 	for (ia = in6_ifaddr; ia; ia = ia->ia_next) {
2116 		int i;
2117 
2118 		for (i = 0; i < 4; i++) {
2119 			if ((sin6->sin6_addr.s6_addr32[i] &
2120 			    ia->ia_prefixmask.sin6_addr.s6_addr32[i]) !=
2121 			    (ia->ia_addr.sin6_addr.s6_addr32[i] &
2122 			    ia->ia_prefixmask.sin6_addr.s6_addr32[i]))
2123 				break;
2124 		}
2125 		/* and, yeah, we need a multicast-capable iface too */
2126 		if (ia->ia_ifp != &sc->sc_if &&
2127 		    ia->ia_ifp->if_type != IFT_CARP &&
2128 		    (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
2129 		    (i == 4)) {
2130 			if (!ia_if)
2131 				ia_if = ia;
2132 		}
2133 	}
2134 
2135 	if (ia_if) {
2136 		ia = ia_if;
2137 		if (sc->sc_carpdev) {
2138 			if (sc->sc_carpdev != ia->ia_ifp)
2139 				return (EADDRNOTAVAIL);
2140 		} else {
2141 			ifp = ia->ia_ifp;
2142 		}
2143 	}
2144 
2145 	if ((error = carp_set_ifp(sc, ifp)))
2146 		return (error);
2147 
2148 	if (sc->sc_carpdev == NULL)
2149 		return (EADDRNOTAVAIL);
2150 
2151 	if (sc->sc_naddrs6 == 0 && (error = carp_join_multicast6(sc)) != 0)
2152 		return (error);
2153 
2154 	if (sc->sc_carpdev != NULL)
2155 		sc->sc_if.if_flags |= IFF_UP;
2156 
2157 	carp_set_state_all(sc, INIT);
2158 
2159 	return (0);
2160 }
2161 
2162 int
2163 carp_join_multicast6(struct carp_softc *sc)
2164 {
2165 	struct in6_multi_mship *imm, *imm2;
2166 	struct ip6_moptions *im6o = &sc->sc_im6o;
2167 	struct sockaddr_in6 addr6;
2168 	int error;
2169 
2170 	/* Join IPv6 CARP multicast group */
2171 	bzero(&addr6, sizeof(addr6));
2172 	addr6.sin6_family = AF_INET6;
2173 	addr6.sin6_len = sizeof(addr6);
2174 	addr6.sin6_addr.s6_addr16[0] = htons(0xff02);
2175 	addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index);
2176 	addr6.sin6_addr.s6_addr8[15] = 0x12;
2177 	if ((imm = in6_joingroup(&sc->sc_if,
2178 	    &addr6.sin6_addr, &error)) == NULL) {
2179 		return (error);
2180 	}
2181 	/* join solicited multicast address */
2182 	bzero(&addr6.sin6_addr, sizeof(addr6.sin6_addr));
2183 	addr6.sin6_addr.s6_addr16[0] = htons(0xff02);
2184 	addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index);
2185 	addr6.sin6_addr.s6_addr32[1] = 0;
2186 	addr6.sin6_addr.s6_addr32[2] = htonl(1);
2187 	addr6.sin6_addr.s6_addr32[3] = 0;
2188 	addr6.sin6_addr.s6_addr8[12] = 0xff;
2189 	if ((imm2 = in6_joingroup(&sc->sc_if,
2190 	    &addr6.sin6_addr, &error)) == NULL) {
2191 		in6_leavegroup(imm);
2192 		return (error);
2193 	}
2194 
2195 	/* apply v6 multicast membership */
2196 	im6o->im6o_multicast_ifp = &sc->sc_if;
2197 	if (imm)
2198 		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm,
2199 		    i6mm_chain);
2200 	if (imm2)
2201 		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm2,
2202 		    i6mm_chain);
2203 
2204 	return (0);
2205 }
2206 
2207 #endif /* INET6 */
2208 
2209 int
2210 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr)
2211 {
2212 	struct proc *p = curproc;	/* XXX */
2213 	struct carp_softc *sc = ifp->if_softc;
2214 	struct carp_vhost_entry *vhe;
2215 	struct carpreq carpr;
2216 	struct ifaddr *ifa = (struct ifaddr *)addr;
2217 	struct ifreq *ifr = (struct ifreq *)addr;
2218 	struct ifnet *cdev = NULL;
2219 	int i, error = 0;
2220 
2221 	switch (cmd) {
2222 	case SIOCSIFADDR:
2223 		switch (ifa->ifa_addr->sa_family) {
2224 #ifdef INET
2225 		case AF_INET:
2226 			sc->sc_if.if_flags |= IFF_UP;
2227 			/*
2228 			 * emulate arp_ifinit() without doing a gratious arp
2229 			 * request so that the routes are setup correctly.
2230 			 */
2231 			ifa->ifa_rtrequest = arp_rtrequest;
2232 			ifa->ifa_flags |= RTF_CLONING;
2233 
2234 			error = carp_set_addr(sc, satosin(ifa->ifa_addr));
2235 			break;
2236 #endif /* INET */
2237 #ifdef INET6
2238 		case AF_INET6:
2239 			sc->sc_if.if_flags |= IFF_UP;
2240 			error = carp_set_addr6(sc, satosin6(ifa->ifa_addr));
2241 			break;
2242 #endif /* INET6 */
2243 		default:
2244 			error = EAFNOSUPPORT;
2245 			break;
2246 		}
2247 		break;
2248 
2249 	case SIOCSIFFLAGS:
2250 		vhe = LIST_FIRST(&sc->carp_vhosts);
2251 		if (vhe->state != INIT && !(ifr->ifr_flags & IFF_UP)) {
2252 			carp_del_all_timeouts(sc);
2253 
2254 			/* we need the interface up to bow out */
2255 			sc->sc_if.if_flags |= IFF_UP;
2256 			sc->sc_bow_out = 1;
2257 			carp_vhe_send_ad_all(sc);
2258 			sc->sc_bow_out = 0;
2259 
2260 			sc->sc_if.if_flags &= ~IFF_UP;
2261 			carp_set_state_all(sc, INIT);
2262 			carp_setrun_all(sc, 0);
2263 		} else if (vhe->state == INIT && (ifr->ifr_flags & IFF_UP)) {
2264 			sc->sc_if.if_flags |= IFF_UP;
2265 			carp_setrun_all(sc, 0);
2266 		}
2267 		break;
2268 
2269 	case SIOCSVH:
2270 		vhe = LIST_FIRST(&sc->carp_vhosts);
2271 		if ((error = suser(p, p->p_acflag)) != 0)
2272 			break;
2273 		if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr)))
2274 			break;
2275 		error = 1;
2276 		if (carpr.carpr_carpdev[0] != '\0' &&
2277 		    (cdev = ifunit(carpr.carpr_carpdev)) == NULL)
2278 			return (EINVAL);
2279 		if (carpr.carpr_peer.s_addr == 0)
2280 			sc->sc_peer.s_addr = INADDR_CARP_GROUP;
2281 		else
2282 			sc->sc_peer.s_addr = carpr.carpr_peer.s_addr;
2283 		if ((error = carp_set_ifp(sc, cdev)))
2284 			return (error);
2285 		if (vhe->state != INIT && carpr.carpr_state != vhe->state) {
2286 			switch (carpr.carpr_state) {
2287 			case BACKUP:
2288 				timeout_del(&vhe->ad_tmo);
2289 				carp_set_state_all(sc, BACKUP);
2290 				carp_setrun_all(sc, 0);
2291 				carp_setroute(sc, RTM_DELETE);
2292 				break;
2293 			case MASTER:
2294 				LIST_FOREACH(vhe, &sc->carp_vhosts,
2295 				    vhost_entries)
2296 					carp_master_down(vhe);
2297 				break;
2298 			default:
2299 				break;
2300 			}
2301 		}
2302 		if ((error = carp_vhids_ioctl(sc, &carpr)))
2303 			return (error);
2304 		if (carpr.carpr_advbase >= 0) {
2305 			if (carpr.carpr_advbase > 255) {
2306 				error = EINVAL;
2307 				break;
2308 			}
2309 			sc->sc_advbase = carpr.carpr_advbase;
2310 			error--;
2311 		}
2312 		if (bcmp(sc->sc_advskews, carpr.carpr_advskews,
2313 		    sizeof(sc->sc_advskews))) {
2314 			i = 0;
2315 			LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries)
2316 				vhe->advskew = carpr.carpr_advskews[i++];
2317 			bcopy(carpr.carpr_advskews, sc->sc_advskews,
2318 			    sizeof(sc->sc_advskews));
2319 		}
2320 		if (sc->sc_balancing != carpr.carpr_balancing) {
2321 			if (carpr.carpr_balancing > CARP_BAL_MAXID) {
2322 				error = EINVAL;
2323 				break;
2324 			}
2325 			sc->sc_balancing = carpr.carpr_balancing;
2326 			carp_set_enaddr(sc);
2327 			carp_update_lsmask(sc);
2328 		}
2329 		bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key));
2330 		if (error > 0)
2331 			error = EINVAL;
2332 		else {
2333 			error = 0;
2334 			carp_hmac_prepare(sc);
2335 			carp_setrun_all(sc, 0);
2336 		}
2337 		break;
2338 
2339 	case SIOCGVH:
2340 		bzero(&carpr, sizeof(carpr));
2341 		if (sc->sc_carpdev != NULL)
2342 			strlcpy(carpr.carpr_carpdev, sc->sc_carpdev->if_xname,
2343 			    IFNAMSIZ);
2344 		i = 0;
2345 		LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) {
2346 			carpr.carpr_vhids[i] = vhe->vhid;
2347 			carpr.carpr_advskews[i] = vhe->advskew;
2348 			carpr.carpr_states[i] = vhe->state;
2349 			i++;
2350 		}
2351 		carpr.carpr_advbase = sc->sc_advbase;
2352 		carpr.carpr_balancing = sc->sc_balancing;
2353 		if (suser(p, p->p_acflag) == 0)
2354 			bcopy(sc->sc_key, carpr.carpr_key,
2355 			    sizeof(carpr.carpr_key));
2356 		carpr.carpr_peer.s_addr = sc->sc_peer.s_addr;
2357 		error = copyout(&carpr, ifr->ifr_data, sizeof(carpr));
2358 		break;
2359 
2360 	case SIOCADDMULTI:
2361 		error = carp_ether_addmulti(sc, ifr);
2362 		break;
2363 
2364 	case SIOCDELMULTI:
2365 		error = carp_ether_delmulti(sc, ifr);
2366 		break;
2367 	case SIOCAIFGROUP:
2368 	case SIOCDIFGROUP:
2369 		if (sc->sc_demote_cnt)
2370 			carp_ifgroup_ioctl(ifp, cmd, addr);
2371 		break;
2372 	case SIOCSIFGATTR:
2373 		carp_ifgattr_ioctl(ifp, cmd, addr);
2374 		break;
2375 	default:
2376 		error = ENOTTY;
2377 	}
2378 
2379 	if (bcmp(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN) != 0)
2380 		carp_set_enaddr(sc);
2381 	return (error);
2382 }
2383 
2384 int
2385 carp_check_dup_vhids(struct carp_softc *sc, struct carp_if *cif,
2386     struct carpreq *carpr)
2387 {
2388 	struct carp_softc *vr;
2389 	struct carp_vhost_entry *vhe, *vhe0;
2390 	int i;
2391 
2392 	TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
2393 		if (vr == sc)
2394 			continue;
2395 		LIST_FOREACH(vhe, &vr->carp_vhosts, vhost_entries) {
2396 			if (carpr) {
2397 				for (i = 0; carpr->carpr_vhids[i]; i++) {
2398 					if (vhe->vhid == carpr->carpr_vhids[i])
2399 						return (EINVAL);
2400 				}
2401 			}
2402 			LIST_FOREACH(vhe0, &sc->carp_vhosts, vhost_entries) {
2403 				if (vhe->vhid == vhe0->vhid)
2404 					return (EINVAL);
2405 			}
2406 		}
2407 	}
2408 	return (0);
2409 }
2410 
2411 int
2412 carp_vhids_ioctl(struct carp_softc *sc, struct carpreq *carpr)
2413 {
2414 	int i, j;
2415 	u_int8_t taken_vhids[256];
2416 
2417 	if (carpr->carpr_vhids[0] == 0 ||
2418 	    !bcmp(sc->sc_vhids, carpr->carpr_vhids, sizeof(sc->sc_vhids)))
2419 		return (0);
2420 
2421 	bzero(taken_vhids, sizeof(taken_vhids));
2422 	for (i = 0; carpr->carpr_vhids[i]; i++) {
2423 		if (taken_vhids[carpr->carpr_vhids[i]])
2424 			return (EINVAL);
2425 		taken_vhids[carpr->carpr_vhids[i]] = 1;
2426 
2427 		if (sc->sc_carpdev) {
2428 			struct carp_if *cif;
2429 			cif = (struct carp_if *)sc->sc_carpdev->if_carp;
2430 			if (carp_check_dup_vhids(sc, cif, carpr))
2431 				return (EINVAL);
2432 		}
2433 		if (carpr->carpr_advskews[i] >= 255)
2434 			return (EINVAL);
2435 	}
2436 	/* set sane balancing defaults */
2437 	if (i <= 1)
2438 		carpr->carpr_balancing = CARP_BAL_NONE;
2439 	else if (carpr->carpr_balancing == CARP_BAL_NONE &&
2440 	    sc->sc_balancing == CARP_BAL_NONE)
2441 		carpr->carpr_balancing = CARP_BAL_IP;
2442 
2443 	/* destroy all */
2444 	carp_del_all_timeouts(sc);
2445 	carp_destroy_vhosts(sc);
2446 	bzero(sc->sc_vhids, sizeof(sc->sc_vhids));
2447 
2448 	/* sort vhosts list by vhid */
2449 	for (j = 1; j <= 255; j++) {
2450 		for (i = 0; carpr->carpr_vhids[i]; i++) {
2451 			if (carpr->carpr_vhids[i] != j)
2452 				continue;
2453 			if (carp_new_vhost(sc, carpr->carpr_vhids[i],
2454 			    carpr->carpr_advskews[i]))
2455 				return (ENOMEM);
2456 			sc->sc_vhids[i] = carpr->carpr_vhids[i];
2457 			sc->sc_advskews[i] = carpr->carpr_advskews[i];
2458 		}
2459 	}
2460 	carp_set_enaddr(sc);
2461 	carp_set_state_all(sc, INIT);
2462 	return (0);
2463 }
2464 
2465 void
2466 carp_ifgroup_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr)
2467 {
2468 	struct ifgroupreq *ifgr = (struct ifgroupreq *)addr;
2469 	struct ifg_list	*ifgl;
2470 	int *dm, adj;
2471 
2472 	if (!strcmp(ifgr->ifgr_group, IFG_ALL))
2473 		return;
2474 	adj = ((struct carp_softc *)ifp->if_softc)->sc_demote_cnt;
2475 	if (cmd == SIOCDIFGROUP)
2476 		adj = adj * -1;
2477 
2478 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2479 		if (!strcmp(ifgl->ifgl_group->ifg_group, ifgr->ifgr_group)) {
2480 			dm = &ifgl->ifgl_group->ifg_carp_demoted;
2481 			if (*dm + adj >= 0)
2482 				*dm += adj;
2483 			else
2484 				*dm = 0;
2485 		}
2486 }
2487 
2488 void
2489 carp_ifgattr_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr)
2490 {
2491 	struct ifgroupreq *ifgr = (struct ifgroupreq *)addr;
2492 	struct carp_softc *sc = ifp->if_softc;
2493 
2494 	if (ifgr->ifgr_attrib.ifg_carp_demoted > 0 && (sc->sc_if.if_flags &
2495 	    (IFF_UP|IFF_RUNNING)) == (IFF_UP|IFF_RUNNING))
2496 		carp_vhe_send_ad_all(sc);
2497 }
2498 
2499 /*
2500  * Start output on carp interface. This function should never be called.
2501  */
2502 void
2503 carp_start(struct ifnet *ifp)
2504 {
2505 #ifdef DEBUG
2506 	printf("%s: start called\n", ifp->if_xname);
2507 #endif
2508 }
2509 
2510 int
2511 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
2512     struct rtentry *rt)
2513 {
2514 	struct carp_softc *sc = ((struct carp_softc *)ifp->if_softc);
2515 	struct carp_vhost_entry *vhe;
2516 
2517 	vhe = sc->cur_vhe ? sc->cur_vhe : LIST_FIRST(&sc->carp_vhosts);
2518 
2519 	if (sc->sc_carpdev != NULL &&
2520 	    (sc->sc_balancing || vhe->state == MASTER))
2521 		return (sc->sc_carpdev->if_output(ifp, m, sa, rt));
2522 	else {
2523 		m_freem(m);
2524 		return (ENETUNREACH);
2525 	}
2526 }
2527 
2528 void
2529 carp_set_state_all(struct carp_softc *sc, int state)
2530 {
2531 	struct carp_vhost_entry *vhe;
2532 
2533 	LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries)
2534 		carp_set_state(vhe, state);
2535 }
2536 
2537 void
2538 carp_set_state(struct carp_vhost_entry *vhe, int state)
2539 {
2540 	struct carp_softc *sc = vhe->parent_sc;
2541 	static const char *carp_states[] = { CARP_STATES };
2542 	int loglevel;
2543 
2544 	if (vhe->state == state)
2545 		return;
2546 	if (vhe->state == INIT || state == INIT)
2547 		loglevel = LOG_WARNING;
2548 	else
2549 		loglevel = LOG_CRIT;
2550 
2551 	if (sc->sc_vhe_count > 1)
2552 		CARP_LOG(loglevel, sc,
2553 		    ("state transition (vhid %d): %s -> %s", vhe->vhid,
2554 		    carp_states[vhe->state], carp_states[state]));
2555 	else
2556 		CARP_LOG(loglevel, sc,
2557 		    ("state transition: %s -> %s",
2558 		    carp_states[vhe->state], carp_states[state]));
2559 
2560 	vhe->state = state;
2561 	carp_update_lsmask(sc);
2562 
2563 	/* only the master vhe creates link state messages */
2564 	if (!vhe->vhe_leader)
2565 		return;
2566 
2567 	switch (state) {
2568 	case BACKUP:
2569 		sc->sc_if.if_link_state = LINK_STATE_DOWN;
2570 		break;
2571 	case MASTER:
2572 		sc->sc_if.if_link_state = LINK_STATE_UP;
2573 		break;
2574 	default:
2575 		sc->sc_if.if_link_state = LINK_STATE_INVALID;
2576 		break;
2577 	}
2578 	if_link_state_change(&sc->sc_if);
2579 }
2580 
2581 void
2582 carp_group_demote_adj(struct ifnet *ifp, int adj, char *reason)
2583 {
2584 	struct ifg_list	*ifgl;
2585 	int *dm;
2586 	struct carp_softc *nil = NULL;
2587 
2588 	if (ifp->if_type == IFT_CARP) {
2589 		dm = &((struct carp_softc *)ifp->if_softc)->sc_demote_cnt;
2590 		if (*dm + adj >= 0)
2591 			*dm += adj;
2592 		else
2593 			*dm = 0;
2594 	}
2595 
2596 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
2597 		if (!strcmp(ifgl->ifgl_group->ifg_group, IFG_ALL))
2598 			continue;
2599 		dm = &ifgl->ifgl_group->ifg_carp_demoted;
2600 
2601 		if (*dm + adj >= 0)
2602 			*dm += adj;
2603 		else
2604 			*dm = 0;
2605 
2606 		if (adj > 0 && *dm == 1)
2607 			carp_send_ad_all();
2608 		CARP_LOG(LOG_ERR, nil,
2609 		    ("%s demoted group %s by %d to %d (%s)",
2610 		    ifp->if_xname, ifgl->ifgl_group->ifg_group,
2611 		    adj, *dm, reason));
2612 	}
2613 }
2614 
2615 int
2616 carp_group_demote_count(struct carp_softc *sc)
2617 {
2618 	struct ifg_list	*ifgl;
2619 	int count = 0;
2620 
2621 	TAILQ_FOREACH(ifgl, &sc->sc_if.if_groups, ifgl_next)
2622 		count += ifgl->ifgl_group->ifg_carp_demoted;
2623 
2624 	if (count == 0 && sc->sc_demote_cnt)
2625 		count = sc->sc_demote_cnt;
2626 
2627 	return (count > 255 ? 255 : count);
2628 }
2629 
2630 void
2631 carp_carpdev_state(void *v)
2632 {
2633 	struct carp_if *cif;
2634 	struct carp_softc *sc;
2635 	struct ifnet *ifp = v;
2636 
2637 	if (ifp->if_type == IFT_CARP)
2638 		return;
2639 
2640 	cif = (struct carp_if *)ifp->if_carp;
2641 
2642 	TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) {
2643 		int suppressed = sc->sc_suppress;
2644 
2645 		if (sc->sc_carpdev->if_link_state == LINK_STATE_DOWN ||
2646 		    !(sc->sc_carpdev->if_flags & IFF_UP)) {
2647 			sc->sc_if.if_flags &= ~IFF_RUNNING;
2648 			carp_del_all_timeouts(sc);
2649 			carp_set_state_all(sc, INIT);
2650 			sc->sc_suppress = 1;
2651 			carp_setrun_all(sc, 0);
2652 			if (!suppressed)
2653 				carp_group_demote_adj(&sc->sc_if, 1, "carpdev");
2654 		} else if (suppressed) {
2655 			carp_set_state_all(sc, INIT);
2656 			sc->sc_suppress = 0;
2657 			carp_setrun_all(sc, 0);
2658 			carp_group_demote_adj(&sc->sc_if, -1, "carpdev");
2659 		}
2660 	}
2661 }
2662 
2663 int
2664 carp_ether_addmulti(struct carp_softc *sc, struct ifreq *ifr)
2665 {
2666 	struct ifnet *ifp;
2667 	struct carp_mc_entry *mc;
2668 	u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN];
2669 	int error;
2670 
2671 	ifp = sc->sc_carpdev;
2672 	if (ifp == NULL)
2673 		return (EINVAL);
2674 
2675 	error = ether_addmulti(ifr, (struct arpcom *)&sc->sc_ac);
2676 	if (error != ENETRESET)
2677 		return (error);
2678 
2679 	/*
2680 	 * This is new multicast address.  We have to tell parent
2681 	 * about it.  Also, remember this multicast address so that
2682 	 * we can delete them on unconfigure.
2683 	 */
2684 	mc = malloc(sizeof(struct carp_mc_entry), M_DEVBUF, M_NOWAIT);
2685 	if (mc == NULL) {
2686 		error = ENOMEM;
2687 		goto alloc_failed;
2688 	}
2689 
2690 	/*
2691 	 * As ether_addmulti() returns ENETRESET, following two
2692 	 * statement shouldn't fail.
2693 	 */
2694 	(void)ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi);
2695 	ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, mc->mc_enm);
2696 	memcpy(&mc->mc_addr, &ifr->ifr_addr, ifr->ifr_addr.sa_len);
2697 	LIST_INSERT_HEAD(&sc->carp_mc_listhead, mc, mc_entries);
2698 
2699 	error = (*ifp->if_ioctl)(ifp, SIOCADDMULTI, (caddr_t)ifr);
2700 	if (error != 0)
2701 		goto ioctl_failed;
2702 
2703 	return (error);
2704 
2705  ioctl_failed:
2706 	LIST_REMOVE(mc, mc_entries);
2707 	free(mc, M_DEVBUF);
2708  alloc_failed:
2709 	(void)ether_delmulti(ifr, (struct arpcom *)&sc->sc_ac);
2710 
2711 	return (error);
2712 }
2713 
2714 int
2715 carp_ether_delmulti(struct carp_softc *sc, struct ifreq *ifr)
2716 {
2717 	struct ifnet *ifp;
2718 	struct ether_multi *enm;
2719 	struct carp_mc_entry *mc;
2720 	u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN];
2721 	int error;
2722 
2723 	ifp = sc->sc_carpdev;
2724 	if (ifp == NULL)
2725 		return (EINVAL);
2726 
2727 	/*
2728 	 * Find a key to lookup carp_mc_entry.  We have to do this
2729 	 * before calling ether_delmulti for obvious reason.
2730 	 */
2731 	if ((error = ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi)) != 0)
2732 		return (error);
2733 	ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, enm);
2734 	if (enm == NULL)
2735 		return (EINVAL);
2736 
2737 	LIST_FOREACH(mc, &sc->carp_mc_listhead, mc_entries)
2738 		if (mc->mc_enm == enm)
2739 			break;
2740 
2741 	/* We won't delete entries we didn't add */
2742 	if (mc == NULL)
2743 		return (EINVAL);
2744 
2745 	error = ether_delmulti(ifr, (struct arpcom *)&sc->sc_ac);
2746 	if (error != ENETRESET)
2747 		return (error);
2748 
2749 	/* We no longer use this multicast address.  Tell parent so. */
2750 	error = (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)ifr);
2751 	if (error == 0) {
2752 		/* And forget about this address. */
2753 		LIST_REMOVE(mc, mc_entries);
2754 		free(mc, M_DEVBUF);
2755 	} else
2756 		(void)ether_addmulti(ifr, (struct arpcom *)&sc->sc_ac);
2757 	return (error);
2758 }
2759 
2760 /*
2761  * Delete any multicast address we have asked to add from parent
2762  * interface.  Called when the carp is being unconfigured.
2763  */
2764 void
2765 carp_ether_purgemulti(struct carp_softc *sc)
2766 {
2767 	struct ifnet *ifp = sc->sc_carpdev;		/* Parent. */
2768 	struct carp_mc_entry *mc;
2769 	union {
2770 		struct ifreq ifreq;
2771 		struct {
2772 			char ifr_name[IFNAMSIZ];
2773 			struct sockaddr_storage ifr_ss;
2774 		} ifreq_storage;
2775 	} u;
2776 	struct ifreq *ifr = &u.ifreq;
2777 
2778 	if (ifp == NULL)
2779 		return;
2780 
2781 	memcpy(ifr->ifr_name, ifp->if_xname, IFNAMSIZ);
2782 	while ((mc = LIST_FIRST(&sc->carp_mc_listhead)) != NULL) {
2783 		memcpy(&ifr->ifr_addr, &mc->mc_addr, mc->mc_addr.ss_len);
2784 		(void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)ifr);
2785 		LIST_REMOVE(mc, mc_entries);
2786 		free(mc, M_DEVBUF);
2787 	}
2788 }
2789