xref: /openbsd-src/sys/netinet/ip_carp.c (revision 4c1e55dc91edd6e69ccc60ce855900fbc12cf34f)
1 /*	$OpenBSD: ip_carp.c,v 1.195 2012/04/11 17:42:53 mikeb Exp $	*/
2 
3 /*
4  * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
5  * Copyright (c) 2003 Ryan McBride. All rights reserved.
6  * Copyright (c) 2006-2008 Marco Pfatschbacher. All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
21  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
25  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
26  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
27  * THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 /*
31  * TODO:
32  *	- iface reconfigure
33  *	- support for hardware checksum calculations;
34  *
35  */
36 
37 #include "ether.h"
38 
39 #include <sys/param.h>
40 #include <sys/proc.h>
41 #include <sys/systm.h>
42 #include <sys/mbuf.h>
43 #include <sys/socket.h>
44 #include <sys/socketvar.h>
45 #include <sys/ioctl.h>
46 #include <sys/errno.h>
47 #include <sys/device.h>
48 #include <sys/kernel.h>
49 #include <sys/sysctl.h>
50 #include <sys/syslog.h>
51 
52 #include <machine/cpu.h>
53 
54 #include <net/if.h>
55 #include <net/if_types.h>
56 #include <net/if_llc.h>
57 #include <net/route.h>
58 #include <net/netisr.h>
59 
60 /* for arc4random() */
61 #include <dev/rndvar.h>
62 
63 #if NFDDI > 0
64 #include <net/if_fddi.h>
65 #endif
66 
67 #include <crypto/sha1.h>
68 
69 #ifdef INET
70 #include <netinet/in.h>
71 #include <netinet/in_systm.h>
72 #include <netinet/in_var.h>
73 #include <netinet/ip.h>
74 #include <netinet/ip_var.h>
75 #include <netinet/if_ether.h>
76 #include <netinet/ip_ipsp.h>
77 
78 #include <net/if_enc.h>
79 #include <net/if_dl.h>
80 #endif
81 
82 #ifdef INET6
83 #include <netinet/icmp6.h>
84 #include <netinet/ip6.h>
85 #include <netinet6/ip6_var.h>
86 #include <netinet6/nd6.h>
87 #include <netinet6/in6_ifattach.h>
88 #endif
89 
90 #include "bpfilter.h"
91 #if NBPFILTER > 0
92 #include <net/bpf.h>
93 #endif
94 
95 #include <netinet/ip_carp.h>
96 
97 struct carp_mc_entry {
98 	LIST_ENTRY(carp_mc_entry)	mc_entries;
99 	union {
100 		struct ether_multi	*mcu_enm;
101 	} mc_u;
102 	struct sockaddr_storage		mc_addr;
103 };
104 #define	mc_enm	mc_u.mcu_enm
105 
106 enum { HMAC_ORIG=0, HMAC_NOV6LL=1, HMAC_MAX=2 };
107 
108 struct carp_vhost_entry {
109 	LIST_ENTRY(carp_vhost_entry)	vhost_entries;
110 	struct carp_softc *parent_sc;
111 	int vhe_leader;
112 	int vhid;
113 	int advskew;
114 	enum { INIT = 0, BACKUP, MASTER }	state;
115 	struct timeout ad_tmo;	/* advertisement timeout */
116 	struct timeout md_tmo;	/* master down timeout */
117 	struct timeout md6_tmo;	/* master down timeout */
118 
119 	u_int64_t vhe_replay_cookie;
120 
121 	/* authentication */
122 #define CARP_HMAC_PAD	64
123 	unsigned char vhe_pad[CARP_HMAC_PAD];
124 	SHA1_CTX vhe_sha1[HMAC_MAX];
125 
126 	u_int8_t vhe_enaddr[ETHER_ADDR_LEN];
127 	struct sockaddr_dl vhe_sdl;	/* for IPv6 ndp balancing */
128 };
129 
130 struct carp_softc {
131 	struct arpcom sc_ac;
132 #define	sc_if		sc_ac.ac_if
133 #define	sc_carpdev	sc_ac.ac_if.if_carpdev
134 	void *ah_cookie;
135 	void *lh_cookie;
136 	struct ip_moptions sc_imo;
137 #ifdef INET6
138 	struct ip6_moptions sc_im6o;
139 #endif /* INET6 */
140 	TAILQ_ENTRY(carp_softc) sc_list;
141 
142 	int sc_suppress;
143 	int sc_bow_out;
144 	int sc_demote_cnt;
145 
146 	int sc_sendad_errors;
147 #define CARP_SENDAD_MAX_ERRORS(sc) (3 * (sc)->sc_vhe_count)
148 	int sc_sendad_success;
149 #define CARP_SENDAD_MIN_SUCCESS(sc) (3 * (sc)->sc_vhe_count)
150 
151 	char sc_curlladdr[ETHER_ADDR_LEN];
152 
153 	LIST_HEAD(__carp_vhosthead, carp_vhost_entry)	carp_vhosts;
154 	int sc_vhe_count;
155 	u_int8_t sc_vhids[CARP_MAXNODES];
156 	u_int8_t sc_advskews[CARP_MAXNODES];
157 	u_int8_t sc_balancing;
158 
159 	int sc_naddrs;
160 	int sc_naddrs6;
161 	int sc_advbase;		/* seconds */
162 
163 	/* authentication */
164 	unsigned char sc_key[CARP_KEY_LEN];
165 
166 	u_int32_t sc_hashkey[2];
167 	u_int32_t sc_lsmask;		/* load sharing mask */
168 	int sc_lscount;			/* # load sharing interfaces (max 32) */
169 	int sc_delayed_arp;		/* delayed ARP request countdown */
170 
171 	struct in_addr sc_peer;
172 
173 	LIST_HEAD(__carp_mchead, carp_mc_entry)	carp_mc_listhead;
174 	struct carp_vhost_entry *cur_vhe; /* current active vhe */
175 };
176 
177 int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, LOG_CRIT };	/* XXX for now */
178 struct carpstats carpstats;
179 
180 struct carp_if {
181 	TAILQ_HEAD(, carp_softc) vhif_vrs;
182 	int vhif_nvrs;
183 
184 	struct ifnet *vhif_ifp;
185 };
186 
187 #define	CARP_LOG(l, sc, s)						\
188 	do {								\
189 		if (carp_opts[CARPCTL_LOG] >= l) {			\
190 			if (sc)						\
191 				log(l, "%s: ",				\
192 				    (sc)->sc_if.if_xname);		\
193 			else						\
194 				log(l, "carp: ");			\
195 			addlog s;					\
196 			addlog("\n");					\
197 		}							\
198 	} while (0)
199 
200 void	carp_hmac_prepare(struct carp_softc *);
201 void	carp_hmac_prepare_ctx(struct carp_vhost_entry *, u_int8_t);
202 void	carp_hmac_generate(struct carp_vhost_entry *, u_int32_t *,
203 	    unsigned char *, u_int8_t);
204 int	carp_hmac_verify(struct carp_vhost_entry *, u_int32_t *,
205 	    unsigned char *);
206 void	carp_setroute(struct carp_softc *, int);
207 void	carp_proto_input_c(struct mbuf *, struct carp_header *, int,
208 	    sa_family_t);
209 void	carpattach(int);
210 void	carpdetach(struct carp_softc *);
211 int	carp_prepare_ad(struct mbuf *, struct carp_vhost_entry *,
212 	    struct carp_header *);
213 void	carp_send_ad_all(void);
214 void	carp_vhe_send_ad_all(struct carp_softc *);
215 void	carp_send_ad(void *);
216 void	carp_send_arp(struct carp_softc *);
217 void	carp_master_down(void *);
218 int	carp_ioctl(struct ifnet *, u_long, caddr_t);
219 int	carp_vhids_ioctl(struct carp_softc *, struct carpreq *);
220 int	carp_check_dup_vhids(struct carp_softc *, struct carp_if *,
221 	    struct carpreq *);
222 void	carp_ifgroup_ioctl(struct ifnet *, u_long, caddr_t);
223 void	carp_ifgattr_ioctl(struct ifnet *, u_long, caddr_t);
224 void	carp_start(struct ifnet *);
225 void	carp_setrun_all(struct carp_softc *, sa_family_t);
226 void	carp_setrun(struct carp_vhost_entry *, sa_family_t);
227 void	carp_set_state_all(struct carp_softc *, int);
228 void	carp_set_state(struct carp_vhost_entry *, int);
229 void	carp_multicast_cleanup(struct carp_softc *);
230 int	carp_set_ifp(struct carp_softc *, struct ifnet *);
231 void	carp_set_enaddr(struct carp_softc *);
232 void	carp_set_vhe_enaddr(struct carp_vhost_entry *);
233 void	carp_addr_updated(void *);
234 u_int32_t	carp_hash(struct carp_softc *, u_char *);
235 int	carp_set_addr(struct carp_softc *, struct sockaddr_in *);
236 int	carp_join_multicast(struct carp_softc *);
237 #ifdef INET6
238 void	carp_send_na(struct carp_softc *);
239 int	carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *);
240 int	carp_join_multicast6(struct carp_softc *);
241 #endif
242 int	carp_clone_create(struct if_clone *, int);
243 int	carp_clone_destroy(struct ifnet *);
244 int	carp_ether_addmulti(struct carp_softc *, struct ifreq *);
245 int	carp_ether_delmulti(struct carp_softc *, struct ifreq *);
246 void	carp_ether_purgemulti(struct carp_softc *);
247 int	carp_group_demote_count(struct carp_softc *);
248 void	carp_update_lsmask(struct carp_softc *);
249 int	carp_new_vhost(struct carp_softc *, int, int);
250 void	carp_destroy_vhosts(struct carp_softc *);
251 void	carp_del_all_timeouts(struct carp_softc *);
252 
253 struct if_clone carp_cloner =
254     IF_CLONE_INITIALIZER("carp", carp_clone_create, carp_clone_destroy);
255 
256 #define carp_cksum(_m, _l)	((u_int16_t)in_cksum((_m), (_l)))
257 #define CARP_IFQ_PRIO	6
258 
259 void
260 carp_hmac_prepare(struct carp_softc *sc)
261 {
262 	struct carp_vhost_entry *vhe;
263 	u_int8_t i;
264 
265 	LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) {
266 		for (i = 0; i < HMAC_MAX; i++) {
267 			carp_hmac_prepare_ctx(vhe, i);
268 		}
269 	}
270 }
271 
272 void
273 carp_hmac_prepare_ctx(struct carp_vhost_entry *vhe, u_int8_t ctx)
274 {
275 	struct carp_softc *sc = vhe->parent_sc;
276 
277 	u_int8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
278 	u_int8_t vhid = vhe->vhid & 0xff;
279 	SHA1_CTX sha1ctx;
280 	u_int32_t kmd[5];
281 	struct ifaddr *ifa;
282 	int i, found;
283 	struct in_addr last, cur, in;
284 #ifdef INET6
285 	struct in6_addr last6, cur6, in6;
286 #endif /* INET6 */
287 
288 	/* compute ipad from key */
289 	bzero(vhe->vhe_pad, sizeof(vhe->vhe_pad));
290 	bcopy(sc->sc_key, vhe->vhe_pad, sizeof(sc->sc_key));
291 	for (i = 0; i < sizeof(vhe->vhe_pad); i++)
292 		vhe->vhe_pad[i] ^= 0x36;
293 
294 	/* precompute first part of inner hash */
295 	SHA1Init(&vhe->vhe_sha1[ctx]);
296 	SHA1Update(&vhe->vhe_sha1[ctx], vhe->vhe_pad, sizeof(vhe->vhe_pad));
297 	SHA1Update(&vhe->vhe_sha1[ctx], (void *)&version, sizeof(version));
298 	SHA1Update(&vhe->vhe_sha1[ctx], (void *)&type, sizeof(type));
299 
300 	/* generate a key for the arpbalance hash, before the vhid is hashed */
301 	if (vhe->vhe_leader) {
302 		bcopy(&vhe->vhe_sha1[ctx], &sha1ctx, sizeof(sha1ctx));
303 		SHA1Final((unsigned char *)kmd, &sha1ctx);
304 		sc->sc_hashkey[0] = kmd[0] ^ kmd[1];
305 		sc->sc_hashkey[1] = kmd[2] ^ kmd[3];
306 	}
307 
308 	/* the rest of the precomputation */
309 	if (vhe->vhe_leader && bcmp(sc->sc_ac.ac_enaddr, vhe->vhe_enaddr,
310 	    ETHER_ADDR_LEN) != 0)
311 		SHA1Update(&vhe->vhe_sha1[ctx], sc->sc_ac.ac_enaddr,
312 		    ETHER_ADDR_LEN);
313 
314 	SHA1Update(&vhe->vhe_sha1[ctx], (void *)&vhid, sizeof(vhid));
315 
316 	/* Hash the addresses from smallest to largest, not interface order */
317 #ifdef INET
318 	cur.s_addr = 0;
319 	do {
320 		found = 0;
321 		last = cur;
322 		cur.s_addr = 0xffffffff;
323 		TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
324 			if (ifa->ifa_addr->sa_family != AF_INET)
325 				continue;
326 			in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
327 			if (ntohl(in.s_addr) > ntohl(last.s_addr) &&
328 			    ntohl(in.s_addr) < ntohl(cur.s_addr)) {
329 				cur.s_addr = in.s_addr;
330 				found++;
331 			}
332 		}
333 		if (found)
334 			SHA1Update(&vhe->vhe_sha1[ctx],
335 			    (void *)&cur, sizeof(cur));
336 	} while (found);
337 #endif /* INET */
338 #ifdef INET6
339 	memset(&cur6, 0x00, sizeof(cur6));
340 	do {
341 		found = 0;
342 		last6 = cur6;
343 		memset(&cur6, 0xff, sizeof(cur6));
344 		TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
345 			if (ifa->ifa_addr->sa_family != AF_INET6)
346 				continue;
347 			in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
348 			if (IN6_IS_SCOPE_EMBED(&in6)) {
349 				if (ctx == HMAC_NOV6LL)
350 					continue;
351 				in6.s6_addr16[1] = 0;
352 			}
353 			if (memcmp(&in6, &last6, sizeof(in6)) > 0 &&
354 			    memcmp(&in6, &cur6, sizeof(in6)) < 0) {
355 				cur6 = in6;
356 				found++;
357 			}
358 		}
359 		if (found)
360 			SHA1Update(&vhe->vhe_sha1[ctx],
361 			    (void *)&cur6, sizeof(cur6));
362 	} while (found);
363 #endif /* INET6 */
364 
365 	/* convert ipad to opad */
366 	for (i = 0; i < sizeof(vhe->vhe_pad); i++)
367 		vhe->vhe_pad[i] ^= 0x36 ^ 0x5c;
368 }
369 
370 void
371 carp_hmac_generate(struct carp_vhost_entry *vhe, u_int32_t counter[2],
372     unsigned char md[20], u_int8_t ctx)
373 {
374 	SHA1_CTX sha1ctx;
375 
376 	/* fetch first half of inner hash */
377 	bcopy(&vhe->vhe_sha1[ctx], &sha1ctx, sizeof(sha1ctx));
378 
379 	SHA1Update(&sha1ctx, (void *)counter, sizeof(vhe->vhe_replay_cookie));
380 	SHA1Final(md, &sha1ctx);
381 
382 	/* outer hash */
383 	SHA1Init(&sha1ctx);
384 	SHA1Update(&sha1ctx, vhe->vhe_pad, sizeof(vhe->vhe_pad));
385 	SHA1Update(&sha1ctx, md, 20);
386 	SHA1Final(md, &sha1ctx);
387 }
388 
389 int
390 carp_hmac_verify(struct carp_vhost_entry *vhe, u_int32_t counter[2],
391     unsigned char md[20])
392 {
393 	unsigned char md2[20];
394 	u_int8_t i;
395 
396 	for (i = 0; i < HMAC_MAX; i++) {
397 		carp_hmac_generate(vhe, counter, md2, i);
398 		if (!timingsafe_bcmp(md, md2, sizeof(md2)))
399 			return (0);
400 	}
401 	return (1);
402 }
403 
404 void
405 carp_setroute(struct carp_softc *sc, int cmd)
406 {
407 	struct ifaddr *ifa;
408 	int s;
409 
410 	/* XXX this mess needs fixing */
411 
412 	s = splsoftnet();
413 	TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
414 		switch (ifa->ifa_addr->sa_family) {
415 		case AF_INET: {
416 			int error;
417 			struct sockaddr sa;
418 			struct rtentry *rt;
419 			struct radix_node_head *rnh;
420 			struct radix_node *rn;
421 			struct rt_addrinfo info;
422 			int hr_otherif, nr_ourif;
423 			struct sockaddr_rtlabel	sa_rl;
424 			const char *label;
425 
426 			/* Remove the existing host route, if any */
427 			bzero(&info, sizeof(info));
428 			info.rti_info[RTAX_DST] = ifa->ifa_addr;
429 			info.rti_flags = RTF_HOST;
430 			error = rtrequest1(RTM_DELETE, &info, RTP_CONNECTED,
431 			    NULL, sc->sc_if.if_rdomain);
432 			rt_missmsg(RTM_DELETE, &info, info.rti_flags, NULL,
433 			    error, sc->sc_if.if_rdomain);
434 
435 			/* Check for our address on another interface */
436 			/* XXX cries for proper API */
437 			rnh = rt_gettable(ifa->ifa_addr->sa_family,
438 			    sc->sc_if.if_rdomain);
439 			rn = rnh->rnh_matchaddr(ifa->ifa_addr, rnh);
440 			rt = (struct rtentry *)rn;
441 			hr_otherif = (rt && rt->rt_ifp != &sc->sc_if &&
442 			    rt->rt_flags & (RTF_CLONING|RTF_CLONED));
443 
444 			/* Check for a network route on our interface */
445 			bcopy(ifa->ifa_addr, &sa, sizeof(sa));
446 			satosin(&sa)->sin_addr.s_addr = satosin(ifa->ifa_netmask
447 			    )->sin_addr.s_addr & satosin(&sa)->sin_addr.s_addr;
448 			rt = (struct rtentry *)rt_lookup(&sa,
449 			    ifa->ifa_netmask, sc->sc_if.if_rdomain);
450 			nr_ourif = (rt && rt->rt_ifp == &sc->sc_if);
451 
452 			/* Restore the route label */
453 			bzero(&sa_rl, sizeof(sa_rl));
454 			if (rt && rt->rt_labelid) {
455 				sa_rl.sr_len = sizeof(sa_rl);
456 				sa_rl.sr_family = AF_UNSPEC;
457 				label = rtlabel_id2name(rt->rt_labelid);
458 				if (label != NULL)
459 					strlcpy(sa_rl.sr_label, label,
460 					    sizeof(sa_rl.sr_label));
461 			}
462 
463 			switch (cmd) {
464 			case RTM_ADD:
465 				if (hr_otherif) {
466 					ifa->ifa_rtrequest = NULL;
467 					ifa->ifa_flags &= ~RTF_CLONING;
468 					bzero(&info, sizeof(info));
469 					info.rti_info[RTAX_DST] = ifa->ifa_addr;
470 					info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
471 					info.rti_flags = RTF_UP | RTF_HOST;
472 					error = rtrequest1(RTM_ADD, &info,
473 					    RTP_CONNECTED, NULL,
474 					    sc->sc_if.if_rdomain);
475 					rt_missmsg(RTM_ADD, &info,
476 					    info.rti_flags, &sc->sc_if,
477 					    error, sc->sc_if.if_rdomain);
478 				}
479 				if (!hr_otherif || nr_ourif || !rt) {
480 					if (nr_ourif && !(rt->rt_flags &
481 					    RTF_CLONING)) {
482 						bzero(&info, sizeof(info));
483 						info.rti_info[RTAX_DST] = &sa;
484 						info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
485 						error = rtrequest1(RTM_DELETE,
486 						    &info, RTP_CONNECTED, NULL,
487 						    sc->sc_if.if_rdomain);
488 						rt_missmsg(RTM_DELETE, &info, info.rti_flags, NULL,
489 						    error, sc->sc_if.if_rdomain);
490 					}
491 
492 					ifa->ifa_rtrequest = arp_rtrequest;
493 					ifa->ifa_flags |= RTF_CLONING;
494 
495 					bzero(&info, sizeof(info));
496 					info.rti_info[RTAX_DST] = &sa;
497 					info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
498 					info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
499 					info.rti_info[RTAX_LABEL] =
500 					    (struct sockaddr *)&sa_rl;
501 					error = rtrequest1(RTM_ADD, &info,
502 					    RTP_CONNECTED, NULL,
503 					    sc->sc_if.if_rdomain);
504 					if (error == 0)
505 						ifa->ifa_flags |= IFA_ROUTE;
506 					rt_missmsg(RTM_ADD, &info, info.rti_flags,
507 					    &sc->sc_if, error, sc->sc_if.if_rdomain);
508 				}
509 				break;
510 			case RTM_DELETE:
511 				break;
512 			default:
513 				break;
514 			}
515 			break;
516 		}
517 
518 #ifdef INET6
519 		case AF_INET6:
520 			if (sc->sc_balancing >= CARP_BAL_IP)
521 				continue;
522 			if (cmd == RTM_ADD)
523 				in6_ifaddloop(ifa);
524 			else
525 				in6_ifremloop(ifa);
526 			break;
527 #endif /* INET6 */
528 		default:
529 			break;
530 		}
531 	}
532 	splx(s);
533 }
534 
535 /*
536  * process input packet.
537  * we have rearranged checks order compared to the rfc,
538  * but it seems more efficient this way or not possible otherwise.
539  */
540 void
541 carp_proto_input(struct mbuf *m, ...)
542 {
543 	struct ip *ip = mtod(m, struct ip *);
544 	struct ifnet *ifp = m->m_pkthdr.rcvif;
545 	struct carp_softc *sc = NULL;
546 	struct carp_header *ch;
547 	int iplen, len, hlen, ismulti;
548 	va_list ap;
549 
550 	va_start(ap, m);
551 	hlen = va_arg(ap, int);
552 	va_end(ap);
553 
554 	carpstats.carps_ipackets++;
555 
556 	if (!carp_opts[CARPCTL_ALLOW]) {
557 		m_freem(m);
558 		return;
559 	}
560 
561 	ismulti = IN_MULTICAST(ip->ip_dst.s_addr);
562 
563 	/* check if received on a valid carp interface */
564 	if (!((ifp->if_type == IFT_CARP && ismulti) ||
565 	    (ifp->if_type != IFT_CARP && !ismulti && ifp->if_carp != NULL))) {
566 		carpstats.carps_badif++;
567 		CARP_LOG(LOG_INFO, sc, ("packet received on non-carp interface: %s",
568 		    m->m_pkthdr.rcvif->if_xname));
569 		m_freem(m);
570 		return;
571 	}
572 
573 	/* verify that the IP TTL is 255.  */
574 	if (ip->ip_ttl != CARP_DFLTTL) {
575 		carpstats.carps_badttl++;
576 		CARP_LOG(LOG_NOTICE, sc, ("received ttl %d != %d on %s", ip->ip_ttl,
577 		    CARP_DFLTTL, m->m_pkthdr.rcvif->if_xname));
578 		m_freem(m);
579 		return;
580 	}
581 
582 	/*
583 	 * verify that the received packet length is
584 	 * equal to the CARP header
585 	 */
586 	iplen = ip->ip_hl << 2;
587 	len = iplen + sizeof(*ch);
588 	if (len > m->m_pkthdr.len) {
589 		carpstats.carps_badlen++;
590 		CARP_LOG(LOG_INFO, sc, ("packet too short %d on %s", m->m_pkthdr.len,
591 		    m->m_pkthdr.rcvif->if_xname));
592 		m_freem(m);
593 		return;
594 	}
595 
596 	if ((m = m_pullup(m, len)) == NULL) {
597 		carpstats.carps_hdrops++;
598 		return;
599 	}
600 	ip = mtod(m, struct ip *);
601 	ch = (struct carp_header *)(mtod(m, caddr_t) + iplen);
602 
603 	/* verify the CARP checksum */
604 	m->m_data += iplen;
605 	if (carp_cksum(m, len - iplen)) {
606 		carpstats.carps_badsum++;
607 		CARP_LOG(LOG_INFO, sc, ("checksum failed on %s",
608 		    m->m_pkthdr.rcvif->if_xname));
609 		m_freem(m);
610 		return;
611 	}
612 	m->m_data -= iplen;
613 
614 	carp_proto_input_c(m, ch, ismulti, AF_INET);
615 }
616 
617 #ifdef INET6
618 int
619 carp6_proto_input(struct mbuf **mp, int *offp, int proto)
620 {
621 	struct mbuf *m = *mp;
622 	struct carp_softc *sc = NULL;
623 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
624 	struct carp_header *ch;
625 	u_int len;
626 
627 	carpstats.carps_ipackets6++;
628 
629 	if (!carp_opts[CARPCTL_ALLOW]) {
630 		m_freem(m);
631 		return (IPPROTO_DONE);
632 	}
633 
634 	/* check if received on a valid carp interface */
635 	if (m->m_pkthdr.rcvif->if_type != IFT_CARP) {
636 		carpstats.carps_badif++;
637 		CARP_LOG(LOG_INFO, sc, ("packet received on non-carp interface: %s",
638 		    m->m_pkthdr.rcvif->if_xname));
639 		m_freem(m);
640 		return (IPPROTO_DONE);
641 	}
642 
643 	/* verify that the IP TTL is 255 */
644 	if (ip6->ip6_hlim != CARP_DFLTTL) {
645 		carpstats.carps_badttl++;
646 		CARP_LOG(LOG_NOTICE, sc, ("received ttl %d != %d on %s", ip6->ip6_hlim,
647 		    CARP_DFLTTL, m->m_pkthdr.rcvif->if_xname));
648 		m_freem(m);
649 		return (IPPROTO_DONE);
650 	}
651 
652 	/* verify that we have a complete carp packet */
653 	len = m->m_len;
654 	if ((m = m_pullup(m, *offp + sizeof(*ch))) == NULL) {
655 		carpstats.carps_badlen++;
656 		CARP_LOG(LOG_INFO, sc, ("packet size %u too small", len));
657 		return (IPPROTO_DONE);
658 	}
659 	ch = (struct carp_header *)(mtod(m, caddr_t) + *offp);
660 
661 	/* verify the CARP checksum */
662 	m->m_data += *offp;
663 	if (carp_cksum(m, sizeof(*ch))) {
664 		carpstats.carps_badsum++;
665 		CARP_LOG(LOG_INFO, sc, ("checksum failed, on %s",
666 		    m->m_pkthdr.rcvif->if_xname));
667 		m_freem(m);
668 		return (IPPROTO_DONE);
669 	}
670 	m->m_data -= *offp;
671 
672 	carp_proto_input_c(m, ch, 1, AF_INET6);
673 	return (IPPROTO_DONE);
674 }
675 #endif /* INET6 */
676 
677 void
678 carp_proto_input_c(struct mbuf *m, struct carp_header *ch, int ismulti,
679     sa_family_t af)
680 {
681 	struct ifnet *ifp = m->m_pkthdr.rcvif;
682 	struct carp_softc *sc;
683 	struct carp_vhost_entry *vhe;
684 	struct timeval sc_tv, ch_tv;
685 	struct carp_if *cif;
686 
687 	if (ifp->if_type == IFT_CARP)
688 		cif = (struct carp_if *)ifp->if_carpdev->if_carp;
689 	else
690 		cif = (struct carp_if *)ifp->if_carp;
691 
692 	TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) {
693 		if (af == AF_INET &&
694 		    ismulti != IN_MULTICAST(sc->sc_peer.s_addr))
695 			continue;
696 		LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) {
697 			if (vhe->vhid == ch->carp_vhid)
698 				goto found;
699 		}
700 	}
701  found:
702 
703 	if (!sc || (sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) !=
704 	    (IFF_UP|IFF_RUNNING)) {
705 		carpstats.carps_badvhid++;
706 		m_freem(m);
707 		return;
708 	}
709 
710 	getmicrotime(&sc->sc_if.if_lastchange);
711 	sc->sc_if.if_ipackets++;
712 	sc->sc_if.if_ibytes += m->m_pkthdr.len;
713 
714 	/* verify the CARP version. */
715 	if (ch->carp_version != CARP_VERSION) {
716 		carpstats.carps_badver++;
717 		sc->sc_if.if_ierrors++;
718 		CARP_LOG(LOG_NOTICE, sc, ("invalid version %d != %d",
719 		    ch->carp_version, CARP_VERSION));
720 		m_freem(m);
721 		return;
722 	}
723 
724 	/* verify the hash */
725 	if (carp_hmac_verify(vhe, ch->carp_counter, ch->carp_md)) {
726 		carpstats.carps_badauth++;
727 		sc->sc_if.if_ierrors++;
728 		CARP_LOG(LOG_INFO, sc, ("incorrect hash"));
729 		m_freem(m);
730 		return;
731 	}
732 
733 	if (!bcmp(&vhe->vhe_replay_cookie, ch->carp_counter,
734 	    sizeof(ch->carp_counter))) {
735 		/* Do not log duplicates from non simplex interfaces */
736 		if (sc->sc_carpdev->if_flags & IFF_SIMPLEX) {
737 			carpstats.carps_badauth++;
738 			sc->sc_if.if_ierrors++;
739 			CARP_LOG(LOG_WARNING, sc,
740 			    ("replay or network loop detected"));
741 		}
742 		m_freem(m);
743 		return;
744 	}
745 
746 	sc_tv.tv_sec = sc->sc_advbase;
747 	sc_tv.tv_usec = vhe->advskew * 1000000 / 256;
748 	ch_tv.tv_sec = ch->carp_advbase;
749 	ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
750 
751 	switch (vhe->state) {
752 	case INIT:
753 		break;
754 	case MASTER:
755 		/*
756 		 * If we receive an advertisement from a master who's going to
757 		 * be more frequent than us, and whose demote count is not higher
758 		 * than ours, go into BACKUP state. If his demote count is lower,
759 		 * also go into BACKUP.
760 		 */
761 		if (((timercmp(&sc_tv, &ch_tv, >) ||
762 		    timercmp(&sc_tv, &ch_tv, ==)) &&
763 		    (ch->carp_demote <= carp_group_demote_count(sc))) ||
764 		    ch->carp_demote < carp_group_demote_count(sc)) {
765 			timeout_del(&vhe->ad_tmo);
766 			carp_set_state(vhe, BACKUP);
767 			carp_setrun(vhe, 0);
768 			if (vhe->vhe_leader)
769 				carp_setroute(sc, RTM_DELETE);
770 		}
771 		break;
772 	case BACKUP:
773 		/*
774 		 * If we're pre-empting masters who advertise slower than us,
775 		 * and do not have a better demote count, treat them as down.
776 		 *
777 		 */
778 		if (carp_opts[CARPCTL_PREEMPT] &&
779 		    timercmp(&sc_tv, &ch_tv, <) &&
780 		    ch->carp_demote >= carp_group_demote_count(sc)) {
781 			carp_master_down(vhe);
782 			break;
783 		}
784 
785 		/*
786 		 * Take over masters advertising with a higher demote count,
787 		 * regardless of CARPCTL_PREEMPT.
788 		 */
789 		if (ch->carp_demote > carp_group_demote_count(sc)) {
790 			carp_master_down(vhe);
791 			break;
792 		}
793 
794 		/*
795 		 *  If the master is going to advertise at such a low frequency
796 		 *  that he's guaranteed to time out, we'd might as well just
797 		 *  treat him as timed out now.
798 		 */
799 		sc_tv.tv_sec = sc->sc_advbase * 3;
800 		if (sc->sc_advbase && timercmp(&sc_tv, &ch_tv, <)) {
801 			carp_master_down(vhe);
802 			break;
803 		}
804 
805 		/*
806 		 * Otherwise, we reset the counter and wait for the next
807 		 * advertisement.
808 		 */
809 		carp_setrun(vhe, af);
810 		break;
811 	}
812 
813 	m_freem(m);
814 	return;
815 }
816 
817 int
818 carp_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
819     size_t newlen)
820 {
821 	/* All sysctl names at this level are terminal. */
822 	if (namelen != 1)
823 		return (ENOTDIR);
824 
825 	switch (name[0]) {
826 	case CARPCTL_STATS:
827 		if (newp != NULL)
828 			return (EPERM);
829 		return (sysctl_struct(oldp, oldlenp, newp, newlen,
830 		    &carpstats, sizeof(carpstats)));
831 	default:
832 		if (name[0] <= 0 || name[0] >= CARPCTL_MAXID)
833 			return (ENOPROTOOPT);
834 		return sysctl_int(oldp, oldlenp, newp, newlen,
835 		    &carp_opts[name[0]]);
836 	}
837 }
838 
839 /*
840  * Interface side of the CARP implementation.
841  */
842 
843 /* ARGSUSED */
844 void
845 carpattach(int n)
846 {
847 	struct ifg_group	*ifg;
848 
849 	if ((ifg = if_creategroup("carp")) != NULL)
850 		ifg->ifg_refcnt++;	/* keep around even if empty */
851 	if_clone_attach(&carp_cloner);
852 }
853 
854 int
855 carp_clone_create(ifc, unit)
856 	struct if_clone *ifc;
857 	int unit;
858 {
859 	struct carp_softc *sc;
860 	struct ifnet *ifp;
861 
862 	sc = malloc(sizeof(*sc), M_DEVBUF, M_NOWAIT|M_ZERO);
863 	if (!sc)
864 		return (ENOMEM);
865 
866 	LIST_INIT(&sc->carp_vhosts);
867 	sc->sc_vhe_count = 0;
868 	if (carp_new_vhost(sc, 0, 0)) {
869 		free(sc, M_DEVBUF);
870 		return (ENOMEM);
871 	}
872 
873 	sc->sc_suppress = 0;
874 	sc->sc_advbase = CARP_DFLTINTV;
875 	sc->sc_naddrs = sc->sc_naddrs6 = 0;
876 #ifdef INET6
877 	sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL;
878 #endif /* INET6 */
879 	sc->sc_imo.imo_membership = (struct in_multi **)malloc(
880 	    (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_IPMOPTS,
881 	    M_WAITOK|M_ZERO);
882 	sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
883 
884 	LIST_INIT(&sc->carp_mc_listhead);
885 	ifp = &sc->sc_if;
886 	ifp->if_softc = sc;
887 	snprintf(ifp->if_xname, sizeof ifp->if_xname, "%s%d", ifc->ifc_name,
888 	    unit);
889 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
890 	ifp->if_ioctl = carp_ioctl;
891 	ifp->if_start = carp_start;
892 	ifp->if_output = carp_output;
893 	ifp->if_type = IFT_CARP;
894 	ifp->if_addrlen = ETHER_ADDR_LEN;
895 	ifp->if_hdrlen = ETHER_HDR_LEN;
896 	ifp->if_mtu = ETHERMTU;
897 	IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
898 	IFQ_SET_READY(&ifp->if_snd);
899 	if_attach(ifp);
900 
901 	if_alloc_sadl(ifp);
902 	LIST_INIT(&sc->sc_ac.ac_multiaddrs);
903 #if NBPFILTER > 0
904 	bpfattach(&ifp->if_bpf, ifp, DLT_EN10MB, ETHER_HDR_LEN);
905 #endif
906 
907 	/* Hook carp_addr_updated to cope with address and route changes. */
908 	sc->ah_cookie = hook_establish(sc->sc_if.if_addrhooks, 0,
909 	    carp_addr_updated, sc);
910 	carp_set_state_all(sc, INIT);
911 
912 	return (0);
913 }
914 
915 int
916 carp_new_vhost(struct carp_softc *sc, int vhid, int advskew)
917 {
918 	struct carp_vhost_entry *vhe, *vhe0;
919 
920 	vhe = malloc(sizeof(*vhe), M_DEVBUF, M_NOWAIT | M_ZERO);
921 	if (vhe == NULL)
922 		return (ENOMEM);
923 
924 	vhe->parent_sc = sc;
925 	vhe->vhid = vhid;
926 	vhe->advskew = advskew;
927 	timeout_set(&vhe->ad_tmo, carp_send_ad, vhe);
928 	timeout_set(&vhe->md_tmo, carp_master_down, vhe);
929 	timeout_set(&vhe->md6_tmo, carp_master_down, vhe);
930 
931 	/* mark the first vhe as leader */
932 	if (LIST_EMPTY(&sc->carp_vhosts)) {
933 		vhe->vhe_leader = 1;
934 		LIST_INSERT_HEAD(&sc->carp_vhosts, vhe, vhost_entries);
935 		sc->sc_vhe_count = 1;
936 		return (0);
937 	}
938 
939 	LIST_FOREACH(vhe0, &sc->carp_vhosts, vhost_entries)
940 		if (LIST_NEXT(vhe0, vhost_entries) == NULL)
941 			break;
942 	LIST_INSERT_AFTER(vhe0, vhe, vhost_entries);
943 	sc->sc_vhe_count++;
944 
945 	return (0);
946 }
947 
948 int
949 carp_clone_destroy(struct ifnet *ifp)
950 {
951 	struct carp_softc *sc = ifp->if_softc;
952 
953 	carpdetach(sc);
954 	ether_ifdetach(ifp);
955 	if_detach(ifp);
956 	carp_destroy_vhosts(ifp->if_softc);
957 	free(sc->sc_imo.imo_membership, M_IPMOPTS);
958 	free(sc, M_DEVBUF);
959 
960 	return (0);
961 }
962 
963 void
964 carp_del_all_timeouts(struct carp_softc *sc)
965 {
966 	struct carp_vhost_entry *vhe;
967 
968 	LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) {
969 		timeout_del(&vhe->ad_tmo);
970 		timeout_del(&vhe->md_tmo);
971 		timeout_del(&vhe->md6_tmo);
972 	}
973 }
974 
975 void
976 carpdetach(struct carp_softc *sc)
977 {
978 	struct carp_if *cif;
979 	int s;
980 
981 	carp_del_all_timeouts(sc);
982 
983 	if (sc->sc_demote_cnt)
984 		carp_group_demote_adj(&sc->sc_if, -sc->sc_demote_cnt, "detach");
985 	sc->sc_suppress = 0;
986 	sc->sc_sendad_errors = 0;
987 
988 	carp_set_state_all(sc, INIT);
989 	sc->sc_if.if_flags &= ~IFF_UP;
990 	carp_setrun_all(sc, 0);
991 	carp_multicast_cleanup(sc);
992 
993 	s = splnet();
994 	if (sc->ah_cookie != NULL)
995 		hook_disestablish(sc->sc_if.if_addrhooks, sc->ah_cookie);
996 	if (sc->sc_carpdev != NULL) {
997 		if (sc->lh_cookie != NULL)
998 			hook_disestablish(sc->sc_carpdev->if_linkstatehooks,
999 			    sc->lh_cookie);
1000 		cif = (struct carp_if *)sc->sc_carpdev->if_carp;
1001 		TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
1002 		if (!--cif->vhif_nvrs) {
1003 			ifpromisc(sc->sc_carpdev, 0);
1004 			sc->sc_carpdev->if_carp = NULL;
1005 			free(cif, M_IFADDR);
1006 		}
1007 	}
1008 	sc->sc_carpdev = NULL;
1009 	splx(s);
1010 }
1011 
1012 /* Detach an interface from the carp. */
1013 void
1014 carp_ifdetach(struct ifnet *ifp)
1015 {
1016 	struct carp_softc *sc, *nextsc;
1017 	struct carp_if *cif = (struct carp_if *)ifp->if_carp;
1018 
1019 	for (sc = TAILQ_FIRST(&cif->vhif_vrs); sc; sc = nextsc) {
1020 		nextsc = TAILQ_NEXT(sc, sc_list);
1021 		carpdetach(sc);
1022 	}
1023 }
1024 
1025 void
1026 carp_destroy_vhosts(struct carp_softc *sc)
1027 {
1028 	/* XXX bow out? */
1029 	struct carp_vhost_entry *vhe, *nvhe;
1030 
1031 	for (vhe = LIST_FIRST(&sc->carp_vhosts);
1032 	     vhe != LIST_END(&sc->carp_vhosts); vhe = nvhe) {
1033 		nvhe = LIST_NEXT(vhe, vhost_entries);
1034 		free(vhe, M_DEVBUF);
1035 	}
1036 	LIST_INIT(&sc->carp_vhosts);
1037 	sc->sc_vhe_count = 0;
1038 }
1039 
1040 int
1041 carp_prepare_ad(struct mbuf *m, struct carp_vhost_entry *vhe,
1042     struct carp_header *ch)
1043 {
1044 	if (!vhe->vhe_replay_cookie) {
1045 		arc4random_buf(&vhe->vhe_replay_cookie,
1046 		    sizeof(vhe->vhe_replay_cookie));
1047 	}
1048 
1049 	bcopy(&vhe->vhe_replay_cookie, ch->carp_counter,
1050 	    sizeof(ch->carp_counter));
1051 
1052 	/*
1053 	 * For the time being, do not include the IPv6 linklayer addresses
1054 	 * in the HMAC.
1055 	 */
1056 	carp_hmac_generate(vhe, ch->carp_counter, ch->carp_md, HMAC_NOV6LL);
1057 
1058 	return (0);
1059 }
1060 
1061 void
1062 carp_send_ad_all(void)
1063 {
1064 	struct ifnet *ifp;
1065 	struct carp_if *cif;
1066 	struct carp_softc *vh;
1067 
1068 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1069 		if (ifp->if_carp == NULL || ifp->if_type == IFT_CARP)
1070 			continue;
1071 
1072 		cif = (struct carp_if *)ifp->if_carp;
1073 		TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1074 			if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
1075 			    (IFF_UP|IFF_RUNNING)) {
1076 				carp_vhe_send_ad_all(vh);
1077 			}
1078 		}
1079 	}
1080 }
1081 
1082 void
1083 carp_vhe_send_ad_all(struct carp_softc *sc)
1084 {
1085 	struct carp_vhost_entry *vhe;
1086 
1087 	LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) {
1088 		if (vhe->state == MASTER)
1089 			carp_send_ad(vhe);
1090 	}
1091 }
1092 
1093 void
1094 carp_send_ad(void *v)
1095 {
1096 	struct carp_header ch;
1097 	struct timeval tv;
1098 	struct carp_vhost_entry *vhe = v;
1099 	struct carp_softc *sc = vhe->parent_sc;
1100 	struct carp_header *ch_ptr;
1101 
1102 	struct mbuf *m;
1103 	int error, len, advbase, advskew, s;
1104 	struct ifaddr *ifa;
1105 	struct sockaddr sa;
1106 
1107 	if (sc->sc_carpdev == NULL) {
1108 		sc->sc_if.if_oerrors++;
1109 		return;
1110 	}
1111 
1112 	s = splsoftnet();
1113 
1114 	/* bow out if we've gone to backup (the carp interface is going down) */
1115 	if (sc->sc_bow_out) {
1116 		advbase = 255;
1117 		advskew = 255;
1118 	} else {
1119 		advbase = sc->sc_advbase;
1120 		advskew = vhe->advskew;
1121 		tv.tv_sec = advbase;
1122 		if (advbase == 0 && advskew == 0)
1123 			tv.tv_usec = 1 * 1000000 / 256;
1124 		else
1125 			tv.tv_usec = advskew * 1000000 / 256;
1126 	}
1127 
1128 	ch.carp_version = CARP_VERSION;
1129 	ch.carp_type = CARP_ADVERTISEMENT;
1130 	ch.carp_vhid = vhe->vhid;
1131 	ch.carp_demote = carp_group_demote_count(sc) & 0xff;
1132 	ch.carp_advbase = advbase;
1133 	ch.carp_advskew = advskew;
1134 	ch.carp_authlen = 7;	/* XXX DEFINE */
1135 	ch.carp_cksum = 0;
1136 
1137 	sc->cur_vhe = vhe; /* we need the vhe later on the output path */
1138 
1139 #ifdef INET
1140 	if (sc->sc_naddrs) {
1141 		struct ip *ip;
1142 
1143 		MGETHDR(m, M_DONTWAIT, MT_HEADER);
1144 		if (m == NULL) {
1145 			sc->sc_if.if_oerrors++;
1146 			carpstats.carps_onomem++;
1147 			/* XXX maybe less ? */
1148 			goto retry_later;
1149 		}
1150 		len = sizeof(*ip) + sizeof(ch);
1151 		m->m_pkthdr.len = len;
1152 		m->m_pkthdr.rcvif = NULL;
1153 		m->m_pkthdr.rdomain = sc->sc_if.if_rdomain;
1154 		m->m_pkthdr.pf.prio = CARP_IFQ_PRIO;
1155 		m->m_len = len;
1156 		MH_ALIGN(m, m->m_len);
1157 		ip = mtod(m, struct ip *);
1158 		ip->ip_v = IPVERSION;
1159 		ip->ip_hl = sizeof(*ip) >> 2;
1160 		ip->ip_tos = IPTOS_LOWDELAY;
1161 		ip->ip_len = htons(len);
1162 		ip->ip_id = htons(ip_randomid());
1163 		ip->ip_off = htons(IP_DF);
1164 		ip->ip_ttl = CARP_DFLTTL;
1165 		ip->ip_p = IPPROTO_CARP;
1166 		ip->ip_sum = 0;
1167 
1168 		bzero(&sa, sizeof(sa));
1169 		sa.sa_family = AF_INET;
1170 		ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
1171 		if (ifa == NULL)
1172 			ip->ip_src.s_addr = 0;
1173 		else
1174 			ip->ip_src.s_addr =
1175 			    ifatoia(ifa)->ia_addr.sin_addr.s_addr;
1176 		ip->ip_dst.s_addr = sc->sc_peer.s_addr;
1177 		if (IN_MULTICAST(ip->ip_dst.s_addr))
1178 			m->m_flags |= M_MCAST;
1179 
1180 		ch_ptr = (struct carp_header *)(ip + 1);
1181 		bcopy(&ch, ch_ptr, sizeof(ch));
1182 		if (carp_prepare_ad(m, vhe, ch_ptr))
1183 			goto retry_later;
1184 
1185 		m->m_data += sizeof(*ip);
1186 		ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip));
1187 		m->m_data -= sizeof(*ip);
1188 
1189 		getmicrotime(&sc->sc_if.if_lastchange);
1190 		sc->sc_if.if_opackets++;
1191 		sc->sc_if.if_obytes += len;
1192 		carpstats.carps_opackets++;
1193 
1194 		error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo,
1195 		    NULL);
1196 		if (error) {
1197 			if (error == ENOBUFS)
1198 				carpstats.carps_onomem++;
1199 			else
1200 				CARP_LOG(LOG_WARNING, sc,
1201 				    ("ip_output failed: %d", error));
1202 			sc->sc_if.if_oerrors++;
1203 			if (sc->sc_sendad_errors < INT_MAX)
1204 				sc->sc_sendad_errors++;
1205 			if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS(sc))
1206 				carp_group_demote_adj(&sc->sc_if, 1,
1207 				    "> snderrors");
1208 			sc->sc_sendad_success = 0;
1209 		} else {
1210 			if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS(sc)) {
1211 				if (++sc->sc_sendad_success >=
1212 				    CARP_SENDAD_MIN_SUCCESS(sc)) {
1213 					carp_group_demote_adj(&sc->sc_if, -1,
1214 					    "< snderrors");
1215 					sc->sc_sendad_errors = 0;
1216 				}
1217 			} else
1218 				sc->sc_sendad_errors = 0;
1219 		}
1220 		if (vhe->vhe_leader) {
1221 			if (sc->sc_delayed_arp > 0)
1222 				sc->sc_delayed_arp--;
1223 			if (sc->sc_delayed_arp == 0) {
1224 				carp_send_arp(sc);
1225 				sc->sc_delayed_arp = -1;
1226 			}
1227 		}
1228 	}
1229 #endif /* INET */
1230 #ifdef INET6
1231 	if (sc->sc_naddrs6) {
1232 		struct ip6_hdr *ip6;
1233 
1234 		MGETHDR(m, M_DONTWAIT, MT_HEADER);
1235 		if (m == NULL) {
1236 			sc->sc_if.if_oerrors++;
1237 			carpstats.carps_onomem++;
1238 			/* XXX maybe less ? */
1239 			goto retry_later;
1240 		}
1241 		len = sizeof(*ip6) + sizeof(ch);
1242 		m->m_pkthdr.len = len;
1243 		m->m_pkthdr.rcvif = NULL;
1244 		m->m_pkthdr.pf.prio = CARP_IFQ_PRIO;
1245 		/* XXX m->m_pkthdr.rdomain = sc->sc_if.if_rdomain; */
1246 		m->m_len = len;
1247 		MH_ALIGN(m, m->m_len);
1248 		m->m_flags |= M_MCAST;
1249 		ip6 = mtod(m, struct ip6_hdr *);
1250 		bzero(ip6, sizeof(*ip6));
1251 		ip6->ip6_vfc |= IPV6_VERSION;
1252 		ip6->ip6_hlim = CARP_DFLTTL;
1253 		ip6->ip6_nxt = IPPROTO_CARP;
1254 
1255 		/* set the source address */
1256 		bzero(&sa, sizeof(sa));
1257 		sa.sa_family = AF_INET6;
1258 		ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
1259 		if (ifa == NULL)	/* This should never happen with IPv6 */
1260 			bzero(&ip6->ip6_src, sizeof(struct in6_addr));
1261 		else
1262 			bcopy(ifatoia6(ifa)->ia_addr.sin6_addr.s6_addr,
1263 			    &ip6->ip6_src, sizeof(struct in6_addr));
1264 		/* set the multicast destination */
1265 
1266 		ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
1267 		ip6->ip6_dst.s6_addr16[1] = htons(sc->sc_carpdev->if_index);
1268 		ip6->ip6_dst.s6_addr8[15] = 0x12;
1269 
1270 		ch_ptr = (struct carp_header *)(ip6 + 1);
1271 		bcopy(&ch, ch_ptr, sizeof(ch));
1272 		if (carp_prepare_ad(m, vhe, ch_ptr))
1273 			goto retry_later;
1274 
1275 		m->m_data += sizeof(*ip6);
1276 		ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip6));
1277 		m->m_data -= sizeof(*ip6);
1278 
1279 		getmicrotime(&sc->sc_if.if_lastchange);
1280 		sc->sc_if.if_opackets++;
1281 		sc->sc_if.if_obytes += len;
1282 		carpstats.carps_opackets6++;
1283 
1284 		error = ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL);
1285 		if (error) {
1286 			if (error == ENOBUFS)
1287 				carpstats.carps_onomem++;
1288 			else
1289 				CARP_LOG(LOG_WARNING, sc,
1290 				    ("ip6_output failed: %d", error));
1291 			sc->sc_if.if_oerrors++;
1292 			if (sc->sc_sendad_errors < INT_MAX)
1293 				sc->sc_sendad_errors++;
1294 			if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS(sc))
1295 				carp_group_demote_adj(&sc->sc_if, 1,
1296 					    "> snd6errors");
1297 			sc->sc_sendad_success = 0;
1298 		} else {
1299 			if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS(sc)) {
1300 				if (++sc->sc_sendad_success >=
1301 				    CARP_SENDAD_MIN_SUCCESS(sc)) {
1302 					carp_group_demote_adj(&sc->sc_if, -1,
1303 					    "< snd6errors");
1304 					sc->sc_sendad_errors = 0;
1305 				}
1306 			} else
1307 				sc->sc_sendad_errors = 0;
1308 		}
1309 	}
1310 #endif /* INET6 */
1311 
1312 retry_later:
1313 	sc->cur_vhe = NULL;
1314 	splx(s);
1315 	if (advbase != 255 || advskew != 255)
1316 		timeout_add(&vhe->ad_tmo, tvtohz(&tv));
1317 }
1318 
1319 /*
1320  * Broadcast a gratuitous ARP request containing
1321  * the virtual router MAC address for each IP address
1322  * associated with the virtual router.
1323  */
1324 void
1325 carp_send_arp(struct carp_softc *sc)
1326 {
1327 	struct ifaddr *ifa;
1328 	in_addr_t in;
1329 	int s = splsoftnet();
1330 
1331 	TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
1332 
1333 		if (ifa->ifa_addr->sa_family != AF_INET)
1334 			continue;
1335 
1336 		in = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
1337 		arprequest(sc->sc_carpdev, &in, &in, sc->sc_ac.ac_enaddr);
1338 		DELAY(1000);	/* XXX */
1339 	}
1340 	splx(s);
1341 }
1342 
1343 #ifdef INET6
1344 void
1345 carp_send_na(struct carp_softc *sc)
1346 {
1347 	struct ifaddr *ifa;
1348 	struct in6_addr *in6;
1349 	static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
1350 	int s = splsoftnet();
1351 
1352 	TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
1353 
1354 		if (ifa->ifa_addr->sa_family != AF_INET6)
1355 			continue;
1356 
1357 		in6 = &ifatoia6(ifa)->ia_addr.sin6_addr;
1358 		nd6_na_output(sc->sc_carpdev, &mcast, in6,
1359 		    ND_NA_FLAG_OVERRIDE, 1, NULL);
1360 		DELAY(1000);	/* XXX */
1361 	}
1362 	splx(s);
1363 }
1364 #endif /* INET6 */
1365 
1366 /*
1367  * Based on bridge_hash() in if_bridge.c
1368  */
1369 #define	mix(a,b,c) \
1370 	do {						\
1371 		a -= b; a -= c; a ^= (c >> 13);		\
1372 		b -= c; b -= a; b ^= (a << 8);		\
1373 		c -= a; c -= b; c ^= (b >> 13);		\
1374 		a -= b; a -= c; a ^= (c >> 12);		\
1375 		b -= c; b -= a; b ^= (a << 16);		\
1376 		c -= a; c -= b; c ^= (b >> 5);		\
1377 		a -= b; a -= c; a ^= (c >> 3);		\
1378 		b -= c; b -= a; b ^= (a << 10);		\
1379 		c -= a; c -= b; c ^= (b >> 15);		\
1380 	} while (0)
1381 
1382 u_int32_t
1383 carp_hash(struct carp_softc *sc, u_char *src)
1384 {
1385 	u_int32_t a = 0x9e3779b9, b = sc->sc_hashkey[0], c = sc->sc_hashkey[1];
1386 
1387 	c += sc->sc_key[3] << 24;
1388 	c += sc->sc_key[2] << 16;
1389 	c += sc->sc_key[1] << 8;
1390 	c += sc->sc_key[0];
1391 	b += src[5] << 8;
1392 	b += src[4];
1393 	a += src[3] << 24;
1394 	a += src[2] << 16;
1395 	a += src[1] << 8;
1396 	a += src[0];
1397 
1398 	mix(a, b, c);
1399 	return (c);
1400 }
1401 
1402 void
1403 carp_update_lsmask(struct carp_softc *sc)
1404 {
1405 	struct carp_vhost_entry *vhe;
1406 	int count;
1407 
1408 	if (!sc->sc_balancing)
1409 		return;
1410 
1411 	sc->sc_lsmask = 0;
1412 	count = 0;
1413 
1414 	LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) {
1415 		if (vhe->state == MASTER && count < sizeof(sc->sc_lsmask) * 8)
1416 			sc->sc_lsmask |= 1 << count;
1417 		count++;
1418 	}
1419 	sc->sc_lscount = count;
1420 	CARP_LOG(LOG_DEBUG, sc, ("carp_update_lsmask: %x", sc->sc_lsmask));
1421 }
1422 
1423 int
1424 carp_iamatch(struct in_ifaddr *ia, u_char *src, u_int8_t **sha,
1425     u_int8_t **ether_shost)
1426 {
1427 	struct carp_softc *sc = ia->ia_ifp->if_softc;
1428 	struct carp_vhost_entry *vhe = LIST_FIRST(&sc->carp_vhosts);
1429 
1430 	if (sc->sc_balancing == CARP_BAL_ARP) {
1431 		int lshash;
1432 		/*
1433 		 * We use the source MAC address to decide which virtual host
1434 		 * should handle the request. If we're master of that virtual
1435 		 * host, then we respond, otherwise, just drop the arp packet
1436 		 * on the floor.
1437 		 */
1438 
1439 		if (sc->sc_lscount == 0) /* just to be safe */
1440 			return (0);
1441 		lshash = carp_hash(sc, src) % sc->sc_lscount;
1442 		if ((1 << lshash) & sc->sc_lsmask) {
1443 			int i = 0;
1444 			LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) {
1445 				if (i++ == lshash)
1446 					break;
1447 			}
1448 			if (vhe == NULL)
1449 				return (0);
1450 			*sha = vhe->vhe_enaddr;
1451 			return (1);
1452 		}
1453 	} else if (sc->sc_balancing == CARP_BAL_IPSTEALTH ||
1454 	    sc->sc_balancing == CARP_BAL_IP) {
1455 		if (vhe->state == MASTER) {
1456 			*ether_shost = ((struct arpcom *)sc->sc_carpdev)->
1457 			    ac_enaddr;
1458 			return (1);
1459 		}
1460 	} else {
1461 		if (vhe->state == MASTER)
1462 			return (1);
1463 	}
1464 
1465 	return (0);
1466 }
1467 
1468 #ifdef INET6
1469 int
1470 carp_iamatch6(struct ifnet *ifp, u_char *src, struct sockaddr_dl **sdl)
1471 {
1472 	struct carp_softc *sc = ifp->if_softc;
1473 	struct carp_vhost_entry *vhe = LIST_FIRST(&sc->carp_vhosts);
1474 
1475 	if (sc->sc_balancing == CARP_BAL_ARP) {
1476 		int lshash;
1477 		/*
1478 		 * We use the source MAC address to decide which virtual host
1479 		 * should handle the request. If we're master of that virtual
1480 		 * host, then we respond, otherwise, just drop the ndp packet
1481 		 * on the floor.
1482 		 */
1483 
1484 		/* can happen if optional src lladdr is not provided */
1485 		if (src == NULL)
1486 			return (0);
1487 		if (sc->sc_lscount == 0) /* just to be safe */
1488 			return (0);
1489 		lshash = carp_hash(sc, src) % sc->sc_lscount;
1490 		if ((1 << lshash) & sc->sc_lsmask) {
1491 			int i = 0;
1492 			LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) {
1493 				if (i++ == lshash)
1494 					break;
1495 			}
1496 			if (vhe == NULL)
1497 				return (0);
1498 			*sdl = &vhe->vhe_sdl;
1499 			return (1);
1500 		}
1501 	} else {
1502 		if (vhe->state == MASTER)
1503 			return (1);
1504 	}
1505 
1506 	return (0);
1507 }
1508 #endif /* INET6 */
1509 
1510 struct ifnet *
1511 carp_ourether(void *v, struct ether_header *eh, int src)
1512 {
1513 	struct carp_if *cif = (struct carp_if *)v;
1514 	struct carp_softc *vh;
1515 	u_int8_t *ena;
1516 
1517 	if (src)
1518 		ena = (u_int8_t *)&eh->ether_shost;
1519 	else
1520 		ena = (u_int8_t *)&eh->ether_dhost;
1521 
1522 	TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1523 		struct carp_vhost_entry *vhe;
1524 		if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) !=
1525 		    (IFF_UP|IFF_RUNNING))
1526 			continue;
1527 		if (vh->sc_balancing == CARP_BAL_ARP) {
1528 			LIST_FOREACH(vhe, &vh->carp_vhosts, vhost_entries)
1529 				if (vhe->state == MASTER &&
1530 				    !bcmp(ena, vhe->vhe_enaddr, ETHER_ADDR_LEN))
1531 					return (&vh->sc_if);
1532 		} else {
1533 			vhe = LIST_FIRST(&vh->carp_vhosts);
1534 			if ((vhe->state == MASTER ||
1535 			    vh->sc_balancing >= CARP_BAL_IP) &&
1536 			    !bcmp(ena, vh->sc_ac.ac_enaddr, ETHER_ADDR_LEN))
1537 				return (&vh->sc_if);
1538 		}
1539 	}
1540 	return (NULL);
1541 }
1542 
1543 void
1544 carp_rewrite_lladdr(struct ifnet *ifp, u_int8_t *s_enaddr)
1545 {
1546 	struct carp_softc *sc = ifp->if_softc;
1547 
1548 	if (sc->sc_balancing != CARP_BAL_IPSTEALTH &&
1549 	    sc->sc_balancing != CARP_BAL_IP && sc->cur_vhe) {
1550 		if (sc->cur_vhe->vhe_leader)
1551 			bcopy((caddr_t)sc->sc_ac.ac_enaddr,
1552 			    (caddr_t)s_enaddr, ETHER_ADDR_LEN);
1553 		else
1554 			bcopy((caddr_t)sc->cur_vhe->vhe_enaddr,
1555 			    (caddr_t)s_enaddr, ETHER_ADDR_LEN);
1556 	}
1557 }
1558 
1559 int
1560 carp_our_mcastaddr(struct ifnet *ifp, u_int8_t *d_enaddr)
1561 {
1562 	struct carp_softc *sc = ifp->if_softc;
1563 
1564 	if (sc->sc_balancing != CARP_BAL_IP)
1565 		return (0);
1566 
1567 	return(!bcmp(sc->sc_ac.ac_enaddr, d_enaddr, ETHER_ADDR_LEN));
1568 }
1569 
1570 
1571 int
1572 carp_input(struct mbuf *m, u_int8_t *shost, u_int8_t *dhost, u_int16_t etype)
1573 {
1574 	struct ether_header eh;
1575 	struct carp_if *cif = (struct carp_if *)m->m_pkthdr.rcvif->if_carp;
1576 	struct ifnet *ifp;
1577 
1578 	bcopy(shost, &eh.ether_shost, sizeof(eh.ether_shost));
1579 	bcopy(dhost, &eh.ether_dhost, sizeof(eh.ether_dhost));
1580 	eh.ether_type = etype;
1581 
1582 	if ((ifp = carp_ourether(cif, &eh, 0)))
1583 		;
1584 	else if (m->m_flags & (M_BCAST|M_MCAST)) {
1585 		struct carp_softc *vh;
1586 		struct mbuf *m0;
1587 
1588 		/*
1589 		 * XXX Should really check the list of multicast addresses
1590 		 * for each CARP interface _before_ copying.
1591 		 */
1592 		TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1593 			if (!(vh->sc_if.if_flags & IFF_UP))
1594 				continue;
1595 			m0 = m_copym2(m, 0, M_COPYALL, M_DONTWAIT);
1596 			if (m0 == NULL)
1597 				continue;
1598 			m0->m_pkthdr.rcvif = &vh->sc_if;
1599 #if NBPFILTER > 0
1600 			if (vh->sc_if.if_bpf)
1601 				bpf_mtap_hdr(vh->sc_if.if_bpf, (char *)&eh,
1602 				    ETHER_HDR_LEN, m0, BPF_DIRECTION_IN);
1603 #endif
1604 			vh->sc_if.if_ipackets++;
1605 			ether_input(&vh->sc_if, &eh, m0);
1606 		}
1607 		return (1);
1608 	}
1609 
1610 	if (ifp == NULL)
1611 		return (1);
1612 
1613 	m->m_pkthdr.rcvif = ifp;
1614 
1615 #if NBPFILTER > 0
1616 	if (ifp->if_bpf)
1617 		bpf_mtap_hdr(ifp->if_bpf, (char *)&eh, ETHER_HDR_LEN, m,
1618 		    BPF_DIRECTION_IN);
1619 #endif
1620 	ifp->if_ipackets++;
1621 	ether_input(ifp, &eh, m);
1622 
1623 	return (0);
1624 }
1625 
1626 int
1627 carp_lsdrop(struct mbuf *m, sa_family_t af, u_int32_t *src, u_int32_t *dst)
1628 {
1629 	struct carp_softc *sc = m->m_pkthdr.rcvif->if_softc;
1630 	int match;
1631 	u_int32_t fold;
1632 
1633 	if (sc->sc_balancing < CARP_BAL_IP)
1634 		return (0);
1635 	/*
1636 	 * Never drop carp advertisements.
1637 	 * XXX Bad idea to pass all broadcast / multicast traffic?
1638 	 */
1639 	if (m->m_flags & (M_BCAST|M_MCAST))
1640 		return (0);
1641 
1642 	fold = src[0] ^ dst[0];
1643 #ifdef INET6
1644 	if (af == AF_INET6) {
1645 		int i;
1646 		for (i = 1; i < 4; i++)
1647 			fold ^= src[i] ^ dst[i];
1648 	}
1649 #endif
1650 	if (sc->sc_lscount == 0) /* just to be safe */
1651 		return (1);
1652 	match = (1 << (ntohl(fold) % sc->sc_lscount)) & sc->sc_lsmask;
1653 
1654 	return (!match);
1655 }
1656 
1657 void
1658 carp_master_down(void *v)
1659 {
1660 	struct carp_vhost_entry *vhe = v;
1661 	struct carp_softc *sc = vhe->parent_sc;
1662 
1663 	switch (vhe->state) {
1664 	case INIT:
1665 		printf("%s: master_down event in INIT state\n",
1666 		    sc->sc_if.if_xname);
1667 		break;
1668 	case MASTER:
1669 		break;
1670 	case BACKUP:
1671 		carp_set_state(vhe, MASTER);
1672 		carp_send_ad(vhe);
1673 		if (sc->sc_balancing == CARP_BAL_NONE && vhe->vhe_leader) {
1674 			carp_send_arp(sc);
1675 			/* Schedule a delayed ARP to deal w/ some L3 switches */
1676 			sc->sc_delayed_arp = 2;
1677 #ifdef INET6
1678 			carp_send_na(sc);
1679 #endif /* INET6 */
1680 		}
1681 		carp_setrun(vhe, 0);
1682 		if (vhe->vhe_leader)
1683 			carp_setroute(sc, RTM_ADD);
1684 		carpstats.carps_preempt++;
1685 		break;
1686 	}
1687 }
1688 
1689 void
1690 carp_setrun_all(struct carp_softc *sc, sa_family_t af)
1691 {
1692 	struct carp_vhost_entry *vhe;
1693 	LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) {
1694 		carp_setrun(vhe, af);
1695 	}
1696 }
1697 
1698 /*
1699  * When in backup state, af indicates whether to reset the master down timer
1700  * for v4 or v6. If it's set to zero, reset the ones which are already pending.
1701  */
1702 void
1703 carp_setrun(struct carp_vhost_entry *vhe, sa_family_t af)
1704 {
1705 	struct timeval tv;
1706 	struct carp_softc *sc = vhe->parent_sc;
1707 
1708 	if (sc->sc_carpdev == NULL) {
1709 		sc->sc_if.if_flags &= ~IFF_RUNNING;
1710 		carp_set_state_all(sc, INIT);
1711 		return;
1712 	}
1713 
1714 	if (sc->sc_if.if_flags & IFF_UP && vhe->vhid > 0 &&
1715 	    (sc->sc_naddrs || sc->sc_naddrs6) && !sc->sc_suppress) {
1716 		sc->sc_if.if_flags |= IFF_RUNNING;
1717 	} else {
1718 		sc->sc_if.if_flags &= ~IFF_RUNNING;
1719 		if (vhe->vhe_leader)
1720 			carp_setroute(sc, RTM_DELETE);
1721 		return;
1722 	}
1723 
1724 	switch (vhe->state) {
1725 	case INIT:
1726 		carp_set_state(vhe, BACKUP);
1727 		if (vhe->vhe_leader)
1728 			carp_setroute(sc, RTM_DELETE);
1729 		carp_setrun(vhe, 0);
1730 		break;
1731 	case BACKUP:
1732 		timeout_del(&vhe->ad_tmo);
1733 		tv.tv_sec = 3 * sc->sc_advbase;
1734 		if (sc->sc_advbase == 0 && vhe->advskew == 0)
1735 			tv.tv_usec = 3 * 1000000 / 256;
1736 		else if (sc->sc_advbase == 0)
1737 			tv.tv_usec = 3 * vhe->advskew * 1000000 / 256;
1738 		else
1739 			tv.tv_usec = vhe->advskew * 1000000 / 256;
1740 		if (vhe->vhe_leader)
1741 			sc->sc_delayed_arp = -1;
1742 		switch (af) {
1743 #ifdef INET
1744 		case AF_INET:
1745 			timeout_add(&vhe->md_tmo, tvtohz(&tv));
1746 			break;
1747 #endif /* INET */
1748 #ifdef INET6
1749 		case AF_INET6:
1750 			timeout_add(&vhe->md6_tmo, tvtohz(&tv));
1751 			break;
1752 #endif /* INET6 */
1753 		default:
1754 			if (sc->sc_naddrs)
1755 				timeout_add(&vhe->md_tmo, tvtohz(&tv));
1756 			if (sc->sc_naddrs6)
1757 				timeout_add(&vhe->md6_tmo, tvtohz(&tv));
1758 			break;
1759 		}
1760 		break;
1761 	case MASTER:
1762 		tv.tv_sec = sc->sc_advbase;
1763 		if (sc->sc_advbase == 0 && vhe->advskew == 0)
1764 			tv.tv_usec = 1 * 1000000 / 256;
1765 		else
1766 			tv.tv_usec = vhe->advskew * 1000000 / 256;
1767 		timeout_add(&vhe->ad_tmo, tvtohz(&tv));
1768 		break;
1769 	}
1770 }
1771 
1772 void
1773 carp_multicast_cleanup(struct carp_softc *sc)
1774 {
1775 	struct ip_moptions *imo = &sc->sc_imo;
1776 #ifdef INET6
1777 	struct ip6_moptions *im6o = &sc->sc_im6o;
1778 #endif
1779 	u_int16_t n = imo->imo_num_memberships;
1780 
1781 	/* Clean up our own multicast memberships */
1782 	while (n-- > 0) {
1783 		if (imo->imo_membership[n] != NULL) {
1784 			in_delmulti(imo->imo_membership[n]);
1785 			imo->imo_membership[n] = NULL;
1786 		}
1787 	}
1788 	imo->imo_num_memberships = 0;
1789 	imo->imo_multicast_ifp = NULL;
1790 
1791 #ifdef INET6
1792 	while (!LIST_EMPTY(&im6o->im6o_memberships)) {
1793 		struct in6_multi_mship *imm =
1794 		    LIST_FIRST(&im6o->im6o_memberships);
1795 
1796 		LIST_REMOVE(imm, i6mm_chain);
1797 		in6_leavegroup(imm);
1798 	}
1799 	im6o->im6o_multicast_ifp = NULL;
1800 #endif
1801 
1802 	/* And any other multicast memberships */
1803 	carp_ether_purgemulti(sc);
1804 }
1805 
1806 int
1807 carp_set_ifp(struct carp_softc *sc, struct ifnet *ifp)
1808 {
1809 	struct carp_if *cif, *ncif = NULL;
1810 	struct carp_softc *vr, *after = NULL;
1811 	int myself = 0, error = 0;
1812 	int s;
1813 
1814 	if (ifp == sc->sc_carpdev)
1815 		return (0);
1816 
1817 	if (ifp != NULL) {
1818 		if ((ifp->if_flags & IFF_MULTICAST) == 0)
1819 			return (EADDRNOTAVAIL);
1820 
1821 		if (ifp->if_type == IFT_CARP)
1822 			return (EINVAL);
1823 
1824 		if (ifp->if_carp == NULL) {
1825 			ncif = malloc(sizeof(*cif), M_IFADDR, M_NOWAIT|M_ZERO);
1826 			if (ncif == NULL)
1827 				return (ENOBUFS);
1828 			if ((error = ifpromisc(ifp, 1))) {
1829 				free(ncif, M_IFADDR);
1830 				return (error);
1831 			}
1832 
1833 			ncif->vhif_ifp = ifp;
1834 			TAILQ_INIT(&ncif->vhif_vrs);
1835 		} else {
1836 			cif = (struct carp_if *)ifp->if_carp;
1837 			if (carp_check_dup_vhids(sc, cif, NULL))
1838 				return (EINVAL);
1839 		}
1840 
1841 		/* detach from old interface */
1842 		if (sc->sc_carpdev != NULL)
1843 			carpdetach(sc);
1844 
1845 		/* join multicast groups */
1846 		if (sc->sc_naddrs < 0 &&
1847 		    (error = carp_join_multicast(sc)) != 0) {
1848 			if (ncif != NULL)
1849 				free(ncif, M_IFADDR);
1850 			return (error);
1851 		}
1852 
1853 #ifdef INET6
1854 		if (sc->sc_naddrs6 < 0 &&
1855 		    (error = carp_join_multicast6(sc)) != 0) {
1856 			if (ncif != NULL)
1857 				free(ncif, M_IFADDR);
1858 			carp_multicast_cleanup(sc);
1859 			return (error);
1860 		}
1861 #endif
1862 
1863 		/* attach carp interface to physical interface */
1864 		if (ncif != NULL)
1865 			ifp->if_carp = (caddr_t)ncif;
1866 		sc->sc_carpdev = ifp;
1867 		cif = (struct carp_if *)ifp->if_carp;
1868 		TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
1869 			if (vr == sc)
1870 				myself = 1;
1871 			if (LIST_FIRST(&vr->carp_vhosts)->vhid <
1872 			    LIST_FIRST(&sc->carp_vhosts)->vhid)
1873 				after = vr;
1874 		}
1875 
1876 		if (!myself) {
1877 			/* We're trying to keep things in order */
1878 			if (after == NULL) {
1879 				TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
1880 			} else {
1881 				TAILQ_INSERT_AFTER(&cif->vhif_vrs, after,
1882 				    sc, sc_list);
1883 			}
1884 			cif->vhif_nvrs++;
1885 		}
1886 		if (sc->sc_naddrs || sc->sc_naddrs6)
1887 			sc->sc_if.if_flags |= IFF_UP;
1888 		carp_set_enaddr(sc);
1889 		s = splnet();
1890 		sc->lh_cookie = hook_establish(ifp->if_linkstatehooks, 1,
1891 		    carp_carpdev_state, ifp);
1892 		carp_carpdev_state(ifp);
1893 		splx(s);
1894 	} else {
1895 		carpdetach(sc);
1896 		sc->sc_if.if_flags &= ~(IFF_UP|IFF_RUNNING);
1897 	}
1898 	return (0);
1899 }
1900 
1901 void
1902 carp_set_vhe_enaddr(struct carp_vhost_entry *vhe)
1903 {
1904 	struct carp_softc *sc = vhe->parent_sc;
1905 
1906 	if (vhe->vhid != 0 && sc->sc_carpdev) {
1907 		if (vhe->vhe_leader && sc->sc_balancing == CARP_BAL_IP)
1908 			vhe->vhe_enaddr[0] = 1;
1909 		else
1910 			vhe->vhe_enaddr[0] = 0;
1911 		vhe->vhe_enaddr[1] = 0;
1912 		vhe->vhe_enaddr[2] = 0x5e;
1913 		vhe->vhe_enaddr[3] = 0;
1914 		vhe->vhe_enaddr[4] = 1;
1915 		vhe->vhe_enaddr[5] = vhe->vhid;
1916 
1917 		vhe->vhe_sdl.sdl_family = AF_LINK;
1918 		vhe->vhe_sdl.sdl_alen = ETHER_ADDR_LEN;
1919 		bcopy(vhe->vhe_enaddr, vhe->vhe_sdl.sdl_data, ETHER_ADDR_LEN);
1920 	} else
1921 		bzero(vhe->vhe_enaddr, ETHER_ADDR_LEN);
1922 }
1923 
1924 void
1925 carp_set_enaddr(struct carp_softc *sc)
1926 {
1927 	struct carp_vhost_entry *vhe;
1928 
1929 	LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries)
1930 		carp_set_vhe_enaddr(vhe);
1931 
1932 	vhe = LIST_FIRST(&sc->carp_vhosts);
1933 
1934 	/*
1935 	 * Use the carp lladdr if the running one isn't manually set.
1936 	 * Only compare static parts of the lladdr.
1937 	 */
1938 	if ((bcmp(sc->sc_ac.ac_enaddr + 1, vhe->vhe_enaddr + 1,
1939 	    ETHER_ADDR_LEN - 2) == 0) ||
1940 	    (!sc->sc_ac.ac_enaddr[0] && !sc->sc_ac.ac_enaddr[1] &&
1941 	    !sc->sc_ac.ac_enaddr[2] && !sc->sc_ac.ac_enaddr[3] &&
1942 	    !sc->sc_ac.ac_enaddr[4] && !sc->sc_ac.ac_enaddr[5]))
1943 		bcopy(vhe->vhe_enaddr, sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN);
1944 
1945 	/* Make sure the enaddr has changed before further twiddling. */
1946 	if (bcmp(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN) != 0) {
1947 		bcopy(sc->sc_ac.ac_enaddr, LLADDR(sc->sc_if.if_sadl),
1948 		    ETHER_ADDR_LEN);
1949 		bcopy(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN);
1950 #ifdef INET6
1951 		/*
1952 		 * (re)attach a link-local address which matches
1953 		 * our new MAC address.
1954 		 */
1955 		in6_ifattach_linklocal(&sc->sc_if, NULL);
1956 #endif
1957 		carp_set_state_all(sc, INIT);
1958 		carp_setrun_all(sc, 0);
1959 	}
1960 }
1961 
1962 void
1963 carp_addr_updated(void *v)
1964 {
1965 	struct carp_softc *sc = (struct carp_softc *) v;
1966 	struct ifaddr *ifa;
1967 	int new_naddrs = 0, new_naddrs6 = 0;
1968 
1969 	TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
1970 		if (ifa->ifa_addr->sa_family == AF_INET)
1971 			new_naddrs++;
1972 		else if (ifa->ifa_addr->sa_family == AF_INET6 &&
1973 		    !IN6_IS_ADDR_LINKLOCAL(&ifatoia6(ifa)->ia_addr.sin6_addr))
1974 			new_naddrs6++;
1975 	}
1976 
1977 	/* We received address changes from if_addrhooks callback */
1978 	if (new_naddrs != sc->sc_naddrs || new_naddrs6 != sc->sc_naddrs6) {
1979 		struct in_addr mc_addr;
1980 		struct in_multi *inm;
1981 
1982 		sc->sc_naddrs = new_naddrs;
1983 		sc->sc_naddrs6 = new_naddrs6;
1984 
1985 		/* Re-establish multicast membership removed by in_control */
1986 		if (IN_MULTICAST(sc->sc_peer.s_addr)) {
1987 			mc_addr.s_addr = sc->sc_peer.s_addr;
1988 			IN_LOOKUP_MULTI(mc_addr, &sc->sc_if, inm);
1989 			if (inm == NULL) {
1990 				struct in_multi **imm =
1991 				    sc->sc_imo.imo_membership;
1992 				u_int16_t maxmem =
1993 				    sc->sc_imo.imo_max_memberships;
1994 
1995 				bzero(&sc->sc_imo, sizeof(sc->sc_imo));
1996 				sc->sc_imo.imo_membership = imm;
1997 				sc->sc_imo.imo_max_memberships = maxmem;
1998 
1999 				if (sc->sc_carpdev != NULL && sc->sc_naddrs > 0)
2000 					carp_join_multicast(sc);
2001 			}
2002 		}
2003 
2004 		if (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) {
2005 			sc->sc_if.if_flags &= ~IFF_UP;
2006 			carp_set_state_all(sc, INIT);
2007 		} else
2008 			carp_hmac_prepare(sc);
2009 	}
2010 
2011 	carp_setrun_all(sc, 0);
2012 }
2013 
2014 int
2015 carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin)
2016 {
2017 	struct ifnet *ifp = sc->sc_carpdev;
2018 	struct in_ifaddr *ia, *ia_if;
2019 	int error = 0;
2020 
2021 	/* XXX is this necessary? */
2022 	if (sin->sin_addr.s_addr == 0) {
2023 		if (!(sc->sc_if.if_flags & IFF_UP))
2024 			carp_set_state_all(sc, INIT);
2025 		if (sc->sc_naddrs)
2026 			sc->sc_if.if_flags |= IFF_UP;
2027 		carp_setrun_all(sc, 0);
2028 		return (0);
2029 	}
2030 
2031 	/* we have to do this by hand to ensure we don't match on ourselves */
2032 	ia_if = NULL;
2033 	for (ia = TAILQ_FIRST(&in_ifaddr); ia;
2034 	    ia = TAILQ_NEXT(ia, ia_list)) {
2035 
2036 		/* and, yeah, we need a multicast-capable iface too */
2037 		if (ia->ia_ifp != &sc->sc_if &&
2038 		    ia->ia_ifp->if_type != IFT_CARP &&
2039 		    (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
2040 		    ia->ia_ifp->if_rdomain == sc->sc_if.if_rdomain &&
2041 		    (sin->sin_addr.s_addr & ia->ia_netmask) ==
2042 		    ia->ia_net) {
2043 			if (!ia_if)
2044 				ia_if = ia;
2045 		}
2046 	}
2047 
2048 	if (ia_if) {
2049 		ia = ia_if;
2050 		if (ifp) {
2051 			if (ifp != ia->ia_ifp)
2052 				return (EADDRNOTAVAIL);
2053 		} else {
2054 			ifp = ia->ia_ifp;
2055 		}
2056 	}
2057 
2058 	if ((error = carp_set_ifp(sc, ifp)))
2059 		return (error);
2060 
2061 	if (sc->sc_carpdev == NULL)
2062 		return (EADDRNOTAVAIL);
2063 
2064 	if (sc->sc_naddrs == 0 && (error = carp_join_multicast(sc)) != 0)
2065 		return (error);
2066 
2067 	if (sc->sc_carpdev != NULL)
2068 		sc->sc_if.if_flags |= IFF_UP;
2069 
2070 	carp_set_state_all(sc, INIT);
2071 
2072 	return (0);
2073 }
2074 
2075 int
2076 carp_join_multicast(struct carp_softc *sc)
2077 {
2078 	struct ip_moptions *imo = &sc->sc_imo;
2079 	struct in_multi *imm;
2080 	struct in_addr addr;
2081 
2082 	if (!IN_MULTICAST(sc->sc_peer.s_addr))
2083 		return (0);
2084 
2085 	addr.s_addr = sc->sc_peer.s_addr;
2086 	if ((imm = in_addmulti(&addr, &sc->sc_if)) == NULL)
2087 		return (ENOBUFS);
2088 
2089 	imo->imo_membership[0] = imm;
2090 	imo->imo_num_memberships = 1;
2091 	imo->imo_multicast_ifp = &sc->sc_if;
2092 	imo->imo_multicast_ttl = CARP_DFLTTL;
2093 	imo->imo_multicast_loop = 0;
2094 	return (0);
2095 }
2096 
2097 
2098 #ifdef INET6
2099 int
2100 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
2101 {
2102 	struct ifnet *ifp = sc->sc_carpdev;
2103 	struct in6_ifaddr *ia, *ia_if;
2104 	int error = 0;
2105 
2106 	if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
2107 		if (!(sc->sc_if.if_flags & IFF_UP))
2108 			carp_set_state_all(sc, INIT);
2109 		if (sc->sc_naddrs6)
2110 			sc->sc_if.if_flags |= IFF_UP;
2111 		carp_setrun_all(sc, 0);
2112 		return (0);
2113 	}
2114 
2115 	/* we have to do this by hand to ensure we don't match on ourselves */
2116 	ia_if = NULL;
2117 	for (ia = in6_ifaddr; ia; ia = ia->ia_next) {
2118 		int i;
2119 
2120 		for (i = 0; i < 4; i++) {
2121 			if ((sin6->sin6_addr.s6_addr32[i] &
2122 			    ia->ia_prefixmask.sin6_addr.s6_addr32[i]) !=
2123 			    (ia->ia_addr.sin6_addr.s6_addr32[i] &
2124 			    ia->ia_prefixmask.sin6_addr.s6_addr32[i]))
2125 				break;
2126 		}
2127 		/* and, yeah, we need a multicast-capable iface too */
2128 		if (ia->ia_ifp != &sc->sc_if &&
2129 		    ia->ia_ifp->if_type != IFT_CARP &&
2130 		    (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
2131 		    (i == 4)) {
2132 			if (!ia_if)
2133 				ia_if = ia;
2134 		}
2135 	}
2136 
2137 	if (ia_if) {
2138 		ia = ia_if;
2139 		if (sc->sc_carpdev) {
2140 			if (sc->sc_carpdev != ia->ia_ifp)
2141 				return (EADDRNOTAVAIL);
2142 		} else {
2143 			ifp = ia->ia_ifp;
2144 		}
2145 	}
2146 
2147 	if ((error = carp_set_ifp(sc, ifp)))
2148 		return (error);
2149 
2150 	if (sc->sc_carpdev == NULL)
2151 		return (EADDRNOTAVAIL);
2152 
2153 	if (sc->sc_naddrs6 == 0 && (error = carp_join_multicast6(sc)) != 0)
2154 		return (error);
2155 
2156 	if (sc->sc_carpdev != NULL)
2157 		sc->sc_if.if_flags |= IFF_UP;
2158 
2159 	carp_set_state_all(sc, INIT);
2160 
2161 	return (0);
2162 }
2163 
2164 int
2165 carp_join_multicast6(struct carp_softc *sc)
2166 {
2167 	struct in6_multi_mship *imm, *imm2;
2168 	struct ip6_moptions *im6o = &sc->sc_im6o;
2169 	struct sockaddr_in6 addr6;
2170 	int error;
2171 
2172 	/* Join IPv6 CARP multicast group */
2173 	bzero(&addr6, sizeof(addr6));
2174 	addr6.sin6_family = AF_INET6;
2175 	addr6.sin6_len = sizeof(addr6);
2176 	addr6.sin6_addr.s6_addr16[0] = htons(0xff02);
2177 	addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index);
2178 	addr6.sin6_addr.s6_addr8[15] = 0x12;
2179 	if ((imm = in6_joingroup(&sc->sc_if,
2180 	    &addr6.sin6_addr, &error)) == NULL) {
2181 		return (error);
2182 	}
2183 	/* join solicited multicast address */
2184 	bzero(&addr6.sin6_addr, sizeof(addr6.sin6_addr));
2185 	addr6.sin6_addr.s6_addr16[0] = htons(0xff02);
2186 	addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index);
2187 	addr6.sin6_addr.s6_addr32[1] = 0;
2188 	addr6.sin6_addr.s6_addr32[2] = htonl(1);
2189 	addr6.sin6_addr.s6_addr32[3] = 0;
2190 	addr6.sin6_addr.s6_addr8[12] = 0xff;
2191 	if ((imm2 = in6_joingroup(&sc->sc_if,
2192 	    &addr6.sin6_addr, &error)) == NULL) {
2193 		in6_leavegroup(imm);
2194 		return (error);
2195 	}
2196 
2197 	/* apply v6 multicast membership */
2198 	im6o->im6o_multicast_ifp = &sc->sc_if;
2199 	if (imm)
2200 		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm,
2201 		    i6mm_chain);
2202 	if (imm2)
2203 		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm2,
2204 		    i6mm_chain);
2205 
2206 	return (0);
2207 }
2208 
2209 #endif /* INET6 */
2210 
2211 int
2212 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr)
2213 {
2214 	struct proc *p = curproc;	/* XXX */
2215 	struct carp_softc *sc = ifp->if_softc;
2216 	struct carp_vhost_entry *vhe;
2217 	struct carpreq carpr;
2218 	struct ifaddr *ifa = (struct ifaddr *)addr;
2219 	struct ifreq *ifr = (struct ifreq *)addr;
2220 	struct ifnet *cdev = NULL;
2221 	int i, error = 0;
2222 
2223 	switch (cmd) {
2224 	case SIOCSIFADDR:
2225 		switch (ifa->ifa_addr->sa_family) {
2226 #ifdef INET
2227 		case AF_INET:
2228 			sc->sc_if.if_flags |= IFF_UP;
2229 			/*
2230 			 * emulate arp_ifinit() without doing a gratious arp
2231 			 * request so that the routes are setup correctly.
2232 			 */
2233 			ifa->ifa_rtrequest = arp_rtrequest;
2234 			ifa->ifa_flags |= RTF_CLONING;
2235 
2236 			error = carp_set_addr(sc, satosin(ifa->ifa_addr));
2237 			break;
2238 #endif /* INET */
2239 #ifdef INET6
2240 		case AF_INET6:
2241 			sc->sc_if.if_flags |= IFF_UP;
2242 			error = carp_set_addr6(sc, satosin6(ifa->ifa_addr));
2243 			break;
2244 #endif /* INET6 */
2245 		default:
2246 			error = EAFNOSUPPORT;
2247 			break;
2248 		}
2249 		break;
2250 
2251 	case SIOCSIFFLAGS:
2252 		vhe = LIST_FIRST(&sc->carp_vhosts);
2253 		if (vhe->state != INIT && !(ifr->ifr_flags & IFF_UP)) {
2254 			carp_del_all_timeouts(sc);
2255 
2256 			/* we need the interface up to bow out */
2257 			sc->sc_if.if_flags |= IFF_UP;
2258 			sc->sc_bow_out = 1;
2259 			carp_vhe_send_ad_all(sc);
2260 			sc->sc_bow_out = 0;
2261 
2262 			sc->sc_if.if_flags &= ~IFF_UP;
2263 			carp_set_state_all(sc, INIT);
2264 			carp_setrun_all(sc, 0);
2265 		} else if (vhe->state == INIT && (ifr->ifr_flags & IFF_UP)) {
2266 			sc->sc_if.if_flags |= IFF_UP;
2267 			carp_setrun_all(sc, 0);
2268 		}
2269 		break;
2270 
2271 	case SIOCSVH:
2272 		vhe = LIST_FIRST(&sc->carp_vhosts);
2273 		if ((error = suser(p, 0)) != 0)
2274 			break;
2275 		if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr)))
2276 			break;
2277 		error = 1;
2278 		if (carpr.carpr_carpdev[0] != '\0' &&
2279 		    (cdev = ifunit(carpr.carpr_carpdev)) == NULL)
2280 			return (EINVAL);
2281 		if (carpr.carpr_peer.s_addr == 0)
2282 			sc->sc_peer.s_addr = INADDR_CARP_GROUP;
2283 		else
2284 			sc->sc_peer.s_addr = carpr.carpr_peer.s_addr;
2285 		if ((error = carp_set_ifp(sc, cdev)))
2286 			return (error);
2287 		if (vhe->state != INIT && carpr.carpr_state != vhe->state) {
2288 			switch (carpr.carpr_state) {
2289 			case BACKUP:
2290 				timeout_del(&vhe->ad_tmo);
2291 				carp_set_state_all(sc, BACKUP);
2292 				carp_setrun_all(sc, 0);
2293 				carp_setroute(sc, RTM_DELETE);
2294 				break;
2295 			case MASTER:
2296 				LIST_FOREACH(vhe, &sc->carp_vhosts,
2297 				    vhost_entries)
2298 					carp_master_down(vhe);
2299 				break;
2300 			default:
2301 				break;
2302 			}
2303 		}
2304 		if ((error = carp_vhids_ioctl(sc, &carpr)))
2305 			return (error);
2306 		if (carpr.carpr_advbase >= 0) {
2307 			if (carpr.carpr_advbase > 255) {
2308 				error = EINVAL;
2309 				break;
2310 			}
2311 			sc->sc_advbase = carpr.carpr_advbase;
2312 			error--;
2313 		}
2314 		if (bcmp(sc->sc_advskews, carpr.carpr_advskews,
2315 		    sizeof(sc->sc_advskews))) {
2316 			i = 0;
2317 			LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries)
2318 				vhe->advskew = carpr.carpr_advskews[i++];
2319 			bcopy(carpr.carpr_advskews, sc->sc_advskews,
2320 			    sizeof(sc->sc_advskews));
2321 		}
2322 		if (sc->sc_balancing != carpr.carpr_balancing) {
2323 			if (carpr.carpr_balancing > CARP_BAL_MAXID) {
2324 				error = EINVAL;
2325 				break;
2326 			}
2327 			sc->sc_balancing = carpr.carpr_balancing;
2328 			carp_set_enaddr(sc);
2329 			carp_update_lsmask(sc);
2330 		}
2331 		bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key));
2332 		if (error > 0)
2333 			error = EINVAL;
2334 		else {
2335 			error = 0;
2336 			carp_hmac_prepare(sc);
2337 			carp_setrun_all(sc, 0);
2338 		}
2339 		break;
2340 
2341 	case SIOCGVH:
2342 		bzero(&carpr, sizeof(carpr));
2343 		if (sc->sc_carpdev != NULL)
2344 			strlcpy(carpr.carpr_carpdev, sc->sc_carpdev->if_xname,
2345 			    IFNAMSIZ);
2346 		i = 0;
2347 		LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) {
2348 			carpr.carpr_vhids[i] = vhe->vhid;
2349 			carpr.carpr_advskews[i] = vhe->advskew;
2350 			carpr.carpr_states[i] = vhe->state;
2351 			i++;
2352 		}
2353 		carpr.carpr_advbase = sc->sc_advbase;
2354 		carpr.carpr_balancing = sc->sc_balancing;
2355 		if (suser(p, 0) == 0)
2356 			bcopy(sc->sc_key, carpr.carpr_key,
2357 			    sizeof(carpr.carpr_key));
2358 		carpr.carpr_peer.s_addr = sc->sc_peer.s_addr;
2359 		error = copyout(&carpr, ifr->ifr_data, sizeof(carpr));
2360 		break;
2361 
2362 	case SIOCADDMULTI:
2363 		error = carp_ether_addmulti(sc, ifr);
2364 		break;
2365 
2366 	case SIOCDELMULTI:
2367 		error = carp_ether_delmulti(sc, ifr);
2368 		break;
2369 	case SIOCAIFGROUP:
2370 	case SIOCDIFGROUP:
2371 		if (sc->sc_demote_cnt)
2372 			carp_ifgroup_ioctl(ifp, cmd, addr);
2373 		break;
2374 	case SIOCSIFGATTR:
2375 		carp_ifgattr_ioctl(ifp, cmd, addr);
2376 		break;
2377 	default:
2378 		error = ENOTTY;
2379 	}
2380 
2381 	if (bcmp(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN) != 0)
2382 		carp_set_enaddr(sc);
2383 	return (error);
2384 }
2385 
2386 int
2387 carp_check_dup_vhids(struct carp_softc *sc, struct carp_if *cif,
2388     struct carpreq *carpr)
2389 {
2390 	struct carp_softc *vr;
2391 	struct carp_vhost_entry *vhe, *vhe0;
2392 	int i;
2393 
2394 	TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
2395 		if (vr == sc)
2396 			continue;
2397 		LIST_FOREACH(vhe, &vr->carp_vhosts, vhost_entries) {
2398 			if (carpr) {
2399 				for (i = 0; carpr->carpr_vhids[i]; i++) {
2400 					if (vhe->vhid == carpr->carpr_vhids[i])
2401 						return (EINVAL);
2402 				}
2403 			}
2404 			LIST_FOREACH(vhe0, &sc->carp_vhosts, vhost_entries) {
2405 				if (vhe->vhid == vhe0->vhid)
2406 					return (EINVAL);
2407 			}
2408 		}
2409 	}
2410 	return (0);
2411 }
2412 
2413 int
2414 carp_vhids_ioctl(struct carp_softc *sc, struct carpreq *carpr)
2415 {
2416 	int i, j;
2417 	u_int8_t taken_vhids[256];
2418 
2419 	if (carpr->carpr_vhids[0] == 0 ||
2420 	    !bcmp(sc->sc_vhids, carpr->carpr_vhids, sizeof(sc->sc_vhids)))
2421 		return (0);
2422 
2423 	bzero(taken_vhids, sizeof(taken_vhids));
2424 	for (i = 0; carpr->carpr_vhids[i]; i++) {
2425 		if (taken_vhids[carpr->carpr_vhids[i]])
2426 			return (EINVAL);
2427 		taken_vhids[carpr->carpr_vhids[i]] = 1;
2428 
2429 		if (sc->sc_carpdev) {
2430 			struct carp_if *cif;
2431 			cif = (struct carp_if *)sc->sc_carpdev->if_carp;
2432 			if (carp_check_dup_vhids(sc, cif, carpr))
2433 				return (EINVAL);
2434 		}
2435 		if (carpr->carpr_advskews[i] >= 255)
2436 			return (EINVAL);
2437 	}
2438 	/* set sane balancing defaults */
2439 	if (i <= 1)
2440 		carpr->carpr_balancing = CARP_BAL_NONE;
2441 	else if (carpr->carpr_balancing == CARP_BAL_NONE &&
2442 	    sc->sc_balancing == CARP_BAL_NONE)
2443 		carpr->carpr_balancing = CARP_BAL_IP;
2444 
2445 	/* destroy all */
2446 	carp_del_all_timeouts(sc);
2447 	carp_destroy_vhosts(sc);
2448 	bzero(sc->sc_vhids, sizeof(sc->sc_vhids));
2449 
2450 	/* sort vhosts list by vhid */
2451 	for (j = 1; j <= 255; j++) {
2452 		for (i = 0; carpr->carpr_vhids[i]; i++) {
2453 			if (carpr->carpr_vhids[i] != j)
2454 				continue;
2455 			if (carp_new_vhost(sc, carpr->carpr_vhids[i],
2456 			    carpr->carpr_advskews[i]))
2457 				return (ENOMEM);
2458 			sc->sc_vhids[i] = carpr->carpr_vhids[i];
2459 			sc->sc_advskews[i] = carpr->carpr_advskews[i];
2460 		}
2461 	}
2462 	carp_set_enaddr(sc);
2463 	carp_set_state_all(sc, INIT);
2464 	return (0);
2465 }
2466 
2467 void
2468 carp_ifgroup_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr)
2469 {
2470 	struct ifgroupreq *ifgr = (struct ifgroupreq *)addr;
2471 	struct ifg_list	*ifgl;
2472 	int *dm, adj;
2473 
2474 	if (!strcmp(ifgr->ifgr_group, IFG_ALL))
2475 		return;
2476 	adj = ((struct carp_softc *)ifp->if_softc)->sc_demote_cnt;
2477 	if (cmd == SIOCDIFGROUP)
2478 		adj = adj * -1;
2479 
2480 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2481 		if (!strcmp(ifgl->ifgl_group->ifg_group, ifgr->ifgr_group)) {
2482 			dm = &ifgl->ifgl_group->ifg_carp_demoted;
2483 			if (*dm + adj >= 0)
2484 				*dm += adj;
2485 			else
2486 				*dm = 0;
2487 		}
2488 }
2489 
2490 void
2491 carp_ifgattr_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr)
2492 {
2493 	struct ifgroupreq *ifgr = (struct ifgroupreq *)addr;
2494 	struct carp_softc *sc = ifp->if_softc;
2495 
2496 	if (ifgr->ifgr_attrib.ifg_carp_demoted > 0 && (sc->sc_if.if_flags &
2497 	    (IFF_UP|IFF_RUNNING)) == (IFF_UP|IFF_RUNNING))
2498 		carp_vhe_send_ad_all(sc);
2499 }
2500 
2501 /*
2502  * Start output on carp interface. This function should never be called.
2503  */
2504 void
2505 carp_start(struct ifnet *ifp)
2506 {
2507 #ifdef DEBUG
2508 	printf("%s: start called\n", ifp->if_xname);
2509 #endif
2510 }
2511 
2512 int
2513 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
2514     struct rtentry *rt)
2515 {
2516 	struct carp_softc *sc = ((struct carp_softc *)ifp->if_softc);
2517 	struct carp_vhost_entry *vhe;
2518 
2519 	vhe = sc->cur_vhe ? sc->cur_vhe : LIST_FIRST(&sc->carp_vhosts);
2520 
2521 	if (sc->sc_carpdev != NULL &&
2522 	    (sc->sc_balancing || vhe->state == MASTER))
2523 		return (sc->sc_carpdev->if_output(ifp, m, sa, rt));
2524 	else {
2525 		m_freem(m);
2526 		return (ENETUNREACH);
2527 	}
2528 }
2529 
2530 void
2531 carp_set_state_all(struct carp_softc *sc, int state)
2532 {
2533 	struct carp_vhost_entry *vhe;
2534 
2535 	LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries)
2536 		carp_set_state(vhe, state);
2537 }
2538 
2539 void
2540 carp_set_state(struct carp_vhost_entry *vhe, int state)
2541 {
2542 	struct carp_softc *sc = vhe->parent_sc;
2543 	static const char *carp_states[] = { CARP_STATES };
2544 	int loglevel;
2545 
2546 	if (vhe->state == state)
2547 		return;
2548 	if (vhe->state == INIT || state == INIT)
2549 		loglevel = LOG_WARNING;
2550 	else
2551 		loglevel = LOG_CRIT;
2552 
2553 	if (sc->sc_vhe_count > 1)
2554 		CARP_LOG(loglevel, sc,
2555 		    ("state transition (vhid %d): %s -> %s", vhe->vhid,
2556 		    carp_states[vhe->state], carp_states[state]));
2557 	else
2558 		CARP_LOG(loglevel, sc,
2559 		    ("state transition: %s -> %s",
2560 		    carp_states[vhe->state], carp_states[state]));
2561 
2562 	vhe->state = state;
2563 	carp_update_lsmask(sc);
2564 
2565 	/* only the master vhe creates link state messages */
2566 	if (!vhe->vhe_leader)
2567 		return;
2568 
2569 	switch (state) {
2570 	case BACKUP:
2571 		sc->sc_if.if_link_state = LINK_STATE_DOWN;
2572 		break;
2573 	case MASTER:
2574 		sc->sc_if.if_link_state = LINK_STATE_UP;
2575 		break;
2576 	default:
2577 		sc->sc_if.if_link_state = LINK_STATE_INVALID;
2578 		break;
2579 	}
2580 	if_link_state_change(&sc->sc_if);
2581 }
2582 
2583 void
2584 carp_group_demote_adj(struct ifnet *ifp, int adj, char *reason)
2585 {
2586 	struct ifg_list	*ifgl;
2587 	int *dm;
2588 	struct carp_softc *nil = NULL;
2589 
2590 	if (ifp->if_type == IFT_CARP) {
2591 		dm = &((struct carp_softc *)ifp->if_softc)->sc_demote_cnt;
2592 		if (*dm + adj >= 0)
2593 			*dm += adj;
2594 		else
2595 			*dm = 0;
2596 	}
2597 
2598 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
2599 		if (!strcmp(ifgl->ifgl_group->ifg_group, IFG_ALL))
2600 			continue;
2601 		dm = &ifgl->ifgl_group->ifg_carp_demoted;
2602 
2603 		if (*dm + adj >= 0)
2604 			*dm += adj;
2605 		else
2606 			*dm = 0;
2607 
2608 		if (adj > 0 && *dm == 1)
2609 			carp_send_ad_all();
2610 		CARP_LOG(LOG_ERR, nil,
2611 		    ("%s demoted group %s by %d to %d (%s)",
2612 		    ifp->if_xname, ifgl->ifgl_group->ifg_group,
2613 		    adj, *dm, reason));
2614 	}
2615 }
2616 
2617 int
2618 carp_group_demote_count(struct carp_softc *sc)
2619 {
2620 	struct ifg_list	*ifgl;
2621 	int count = 0;
2622 
2623 	TAILQ_FOREACH(ifgl, &sc->sc_if.if_groups, ifgl_next)
2624 		count += ifgl->ifgl_group->ifg_carp_demoted;
2625 
2626 	if (count == 0 && sc->sc_demote_cnt)
2627 		count = sc->sc_demote_cnt;
2628 
2629 	return (count > 255 ? 255 : count);
2630 }
2631 
2632 void
2633 carp_carpdev_state(void *v)
2634 {
2635 	struct carp_if *cif;
2636 	struct carp_softc *sc;
2637 	struct ifnet *ifp = v;
2638 
2639 	if (ifp->if_type == IFT_CARP)
2640 		return;
2641 
2642 	cif = (struct carp_if *)ifp->if_carp;
2643 
2644 	TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) {
2645 		int suppressed = sc->sc_suppress;
2646 
2647 		if (sc->sc_carpdev->if_link_state == LINK_STATE_DOWN ||
2648 		    !(sc->sc_carpdev->if_flags & IFF_UP)) {
2649 			sc->sc_if.if_flags &= ~IFF_RUNNING;
2650 			carp_del_all_timeouts(sc);
2651 			carp_set_state_all(sc, INIT);
2652 			sc->sc_suppress = 1;
2653 			carp_setrun_all(sc, 0);
2654 			if (!suppressed)
2655 				carp_group_demote_adj(&sc->sc_if, 1, "carpdev");
2656 		} else if (suppressed) {
2657 			carp_set_state_all(sc, INIT);
2658 			sc->sc_suppress = 0;
2659 			carp_setrun_all(sc, 0);
2660 			carp_group_demote_adj(&sc->sc_if, -1, "carpdev");
2661 		}
2662 	}
2663 }
2664 
2665 int
2666 carp_ether_addmulti(struct carp_softc *sc, struct ifreq *ifr)
2667 {
2668 	struct ifnet *ifp;
2669 	struct carp_mc_entry *mc;
2670 	u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN];
2671 	int error;
2672 
2673 	ifp = sc->sc_carpdev;
2674 	if (ifp == NULL)
2675 		return (EINVAL);
2676 
2677 	error = ether_addmulti(ifr, (struct arpcom *)&sc->sc_ac);
2678 	if (error != ENETRESET)
2679 		return (error);
2680 
2681 	/*
2682 	 * This is new multicast address.  We have to tell parent
2683 	 * about it.  Also, remember this multicast address so that
2684 	 * we can delete them on unconfigure.
2685 	 */
2686 	mc = malloc(sizeof(struct carp_mc_entry), M_DEVBUF, M_NOWAIT);
2687 	if (mc == NULL) {
2688 		error = ENOMEM;
2689 		goto alloc_failed;
2690 	}
2691 
2692 	/*
2693 	 * As ether_addmulti() returns ENETRESET, following two
2694 	 * statement shouldn't fail.
2695 	 */
2696 	(void)ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi);
2697 	ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, mc->mc_enm);
2698 	memcpy(&mc->mc_addr, &ifr->ifr_addr, ifr->ifr_addr.sa_len);
2699 	LIST_INSERT_HEAD(&sc->carp_mc_listhead, mc, mc_entries);
2700 
2701 	error = (*ifp->if_ioctl)(ifp, SIOCADDMULTI, (caddr_t)ifr);
2702 	if (error != 0)
2703 		goto ioctl_failed;
2704 
2705 	return (error);
2706 
2707  ioctl_failed:
2708 	LIST_REMOVE(mc, mc_entries);
2709 	free(mc, M_DEVBUF);
2710  alloc_failed:
2711 	(void)ether_delmulti(ifr, (struct arpcom *)&sc->sc_ac);
2712 
2713 	return (error);
2714 }
2715 
2716 int
2717 carp_ether_delmulti(struct carp_softc *sc, struct ifreq *ifr)
2718 {
2719 	struct ifnet *ifp;
2720 	struct ether_multi *enm;
2721 	struct carp_mc_entry *mc;
2722 	u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN];
2723 	int error;
2724 
2725 	ifp = sc->sc_carpdev;
2726 	if (ifp == NULL)
2727 		return (EINVAL);
2728 
2729 	/*
2730 	 * Find a key to lookup carp_mc_entry.  We have to do this
2731 	 * before calling ether_delmulti for obvious reason.
2732 	 */
2733 	if ((error = ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi)) != 0)
2734 		return (error);
2735 	ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, enm);
2736 	if (enm == NULL)
2737 		return (EINVAL);
2738 
2739 	LIST_FOREACH(mc, &sc->carp_mc_listhead, mc_entries)
2740 		if (mc->mc_enm == enm)
2741 			break;
2742 
2743 	/* We won't delete entries we didn't add */
2744 	if (mc == NULL)
2745 		return (EINVAL);
2746 
2747 	error = ether_delmulti(ifr, (struct arpcom *)&sc->sc_ac);
2748 	if (error != ENETRESET)
2749 		return (error);
2750 
2751 	/* We no longer use this multicast address.  Tell parent so. */
2752 	error = (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)ifr);
2753 	if (error == 0) {
2754 		/* And forget about this address. */
2755 		LIST_REMOVE(mc, mc_entries);
2756 		free(mc, M_DEVBUF);
2757 	} else
2758 		(void)ether_addmulti(ifr, (struct arpcom *)&sc->sc_ac);
2759 	return (error);
2760 }
2761 
2762 /*
2763  * Delete any multicast address we have asked to add from parent
2764  * interface.  Called when the carp is being unconfigured.
2765  */
2766 void
2767 carp_ether_purgemulti(struct carp_softc *sc)
2768 {
2769 	struct ifnet *ifp = sc->sc_carpdev;		/* Parent. */
2770 	struct carp_mc_entry *mc;
2771 	union {
2772 		struct ifreq ifreq;
2773 		struct {
2774 			char ifr_name[IFNAMSIZ];
2775 			struct sockaddr_storage ifr_ss;
2776 		} ifreq_storage;
2777 	} u;
2778 	struct ifreq *ifr = &u.ifreq;
2779 
2780 	if (ifp == NULL)
2781 		return;
2782 
2783 	memcpy(ifr->ifr_name, ifp->if_xname, IFNAMSIZ);
2784 	while ((mc = LIST_FIRST(&sc->carp_mc_listhead)) != NULL) {
2785 		memcpy(&ifr->ifr_addr, &mc->mc_addr, mc->mc_addr.ss_len);
2786 		(void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)ifr);
2787 		LIST_REMOVE(mc, mc_entries);
2788 		free(mc, M_DEVBUF);
2789 	}
2790 }
2791