xref: /openbsd-src/sys/netinet/ip_carp.c (revision a28daedfc357b214be5c701aa8ba8adb29a7f1c2)
1 /*	$OpenBSD: ip_carp.c,v 1.169 2008/10/28 23:07:12 mpf Exp $	*/
2 
3 /*
4  * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
5  * Copyright (c) 2003 Ryan McBride. All rights reserved.
6  * Copyright (c) 2006-2008 Marco Pfatschbacher. All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
21  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
25  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
26  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
27  * THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 /*
31  * TODO:
32  *	- iface reconfigure
33  *	- support for hardware checksum calculations;
34  *
35  */
36 
37 #include "ether.h"
38 
39 #include <sys/param.h>
40 #include <sys/proc.h>
41 #include <sys/systm.h>
42 #include <sys/mbuf.h>
43 #include <sys/socket.h>
44 #include <sys/socketvar.h>
45 #include <sys/ioctl.h>
46 #include <sys/errno.h>
47 #include <sys/device.h>
48 #include <sys/kernel.h>
49 #include <sys/sysctl.h>
50 #include <sys/syslog.h>
51 
52 #include <machine/cpu.h>
53 
54 #include <net/if.h>
55 #include <net/if_types.h>
56 #include <net/if_llc.h>
57 #include <net/route.h>
58 #include <net/netisr.h>
59 
60 /* for arc4random() */
61 #include <dev/rndvar.h>
62 
63 #if NFDDI > 0
64 #include <net/if_fddi.h>
65 #endif
66 
67 #include <crypto/sha1.h>
68 
69 #ifdef INET
70 #include <netinet/in.h>
71 #include <netinet/in_systm.h>
72 #include <netinet/in_var.h>
73 #include <netinet/ip.h>
74 #include <netinet/ip_var.h>
75 #include <netinet/if_ether.h>
76 #include <netinet/ip_ipsp.h>
77 
78 #include <net/if_enc.h>
79 #include <net/if_dl.h>
80 #endif
81 
82 #ifdef INET6
83 #include <netinet/icmp6.h>
84 #include <netinet/ip6.h>
85 #include <netinet6/ip6_var.h>
86 #include <netinet6/nd6.h>
87 #include <netinet6/in6_ifattach.h>
88 #endif
89 
90 #include "bpfilter.h"
91 #if NBPFILTER > 0
92 #include <net/bpf.h>
93 #endif
94 
95 #include <netinet/ip_carp.h>
96 
97 struct carp_mc_entry {
98 	LIST_ENTRY(carp_mc_entry)	mc_entries;
99 	union {
100 		struct ether_multi	*mcu_enm;
101 	} mc_u;
102 	struct sockaddr_storage		mc_addr;
103 };
104 #define	mc_enm	mc_u.mcu_enm
105 
106 enum { HMAC_ORIG=0, HMAC_NOV6LL=1, HMAC_MAX=2 };
107 
108 struct carp_vhost_entry {
109 	LIST_ENTRY(carp_vhost_entry)	vhost_entries;
110 	struct carp_softc *parent_sc;
111 	int vhe_leader;
112 	int vhid;
113 	int advskew;
114 	enum { INIT = 0, BACKUP, MASTER }	state;
115 	struct timeout ad_tmo;	/* advertisement timeout */
116 	struct timeout md_tmo;	/* master down timeout */
117 	struct timeout md6_tmo;	/* master down timeout */
118 
119 	u_int64_t vhe_replay_cookie;
120 
121 	/* authentication */
122 #define CARP_HMAC_PAD	64
123 	unsigned char vhe_pad[CARP_HMAC_PAD];
124 	SHA1_CTX vhe_sha1[HMAC_MAX];
125 
126 	u_int8_t vhe_enaddr[ETHER_ADDR_LEN];
127 	struct sockaddr_dl vhe_sdl;	/* for IPv6 ndp balancing */
128 };
129 
130 struct carp_softc {
131 	struct arpcom sc_ac;
132 #define	sc_if		sc_ac.ac_if
133 #define	sc_carpdev	sc_ac.ac_if.if_carpdev
134 	void *ah_cookie;
135 	void *lh_cookie;
136 	struct ip_moptions sc_imo;
137 #ifdef INET6
138 	struct ip6_moptions sc_im6o;
139 #endif /* INET6 */
140 	TAILQ_ENTRY(carp_softc) sc_list;
141 
142 	int sc_suppress;
143 	int sc_bow_out;
144 
145 	int sc_sendad_errors;
146 #define CARP_SENDAD_MAX_ERRORS(sc) (3 * (sc)->sc_vhe_count)
147 	int sc_sendad_success;
148 #define CARP_SENDAD_MIN_SUCCESS(sc) (3 * (sc)->sc_vhe_count)
149 
150 	char sc_curlladdr[ETHER_ADDR_LEN];
151 
152 	LIST_HEAD(__carp_vhosthead, carp_vhost_entry)	carp_vhosts;
153 	int sc_vhe_count;
154 	u_int8_t sc_vhids[CARP_MAXNODES];
155 	u_int8_t sc_advskews[CARP_MAXNODES];
156 	u_int8_t sc_balancing;
157 
158 	int sc_naddrs;
159 	int sc_naddrs6;
160 	int sc_advbase;		/* seconds */
161 
162 	/* authentication */
163 	unsigned char sc_key[CARP_KEY_LEN];
164 
165 	u_int32_t sc_hashkey[2];
166 	u_int32_t sc_lsmask;		/* load sharing mask */
167 	int sc_lscount;			/* # load sharing interfaces (max 32) */
168 	int sc_delayed_arp;		/* delayed ARP request countdown */
169 
170 	struct in_addr sc_peer;
171 
172 	LIST_HEAD(__carp_mchead, carp_mc_entry)	carp_mc_listhead;
173 	struct carp_vhost_entry *cur_vhe; /* current active vhe */
174 };
175 
176 int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, LOG_CRIT };	/* XXX for now */
177 struct carpstats carpstats;
178 
179 struct carp_if {
180 	TAILQ_HEAD(, carp_softc) vhif_vrs;
181 	int vhif_nvrs;
182 
183 	struct ifnet *vhif_ifp;
184 };
185 
186 #define	CARP_LOG(l, sc, s)						\
187 	do {								\
188 		if (carp_opts[CARPCTL_LOG] >= l) {			\
189 			if (sc)						\
190 				log(l, "%s: ",				\
191 				    (sc)->sc_if.if_xname);		\
192 			else						\
193 				log(l, "carp: ");			\
194 			addlog s;					\
195 			addlog("\n");					\
196 		}							\
197 	} while (0)
198 
199 void	carp_hmac_prepare(struct carp_softc *);
200 void	carp_hmac_prepare_ctx(struct carp_vhost_entry *, u_int8_t);
201 void	carp_hmac_generate(struct carp_vhost_entry *, u_int32_t *,
202 	    unsigned char *, u_int8_t);
203 int	carp_hmac_verify(struct carp_vhost_entry *, u_int32_t *,
204 	    unsigned char *);
205 void	carp_setroute(struct carp_softc *, int);
206 void	carp_proto_input_c(struct mbuf *, struct carp_header *, int,
207 	    sa_family_t);
208 void	carpattach(int);
209 void	carpdetach(struct carp_softc *);
210 int	carp_prepare_ad(struct mbuf *, struct carp_vhost_entry *,
211 	    struct carp_header *);
212 void	carp_send_ad_all(void);
213 void	carp_vhe_send_ad_all(struct carp_softc *);
214 void	carp_send_ad(void *);
215 void	carp_send_arp(struct carp_softc *);
216 void	carp_master_down(void *);
217 int	carp_ioctl(struct ifnet *, u_long, caddr_t);
218 int	carp_vhids_ioctl(struct carp_softc *, struct carpreq *);
219 int	carp_check_dup_vhids(struct carp_softc *, struct carp_if *,
220 	    struct carpreq *);
221 void	carp_ifgroup_ioctl(struct ifnet *, u_long, caddr_t);
222 void	carp_ifgattr_ioctl(struct ifnet *, u_long, caddr_t);
223 void	carp_start(struct ifnet *);
224 void	carp_setrun_all(struct carp_softc *, sa_family_t);
225 void	carp_setrun(struct carp_vhost_entry *, sa_family_t);
226 void	carp_set_state_all(struct carp_softc *, int);
227 void	carp_set_state(struct carp_vhost_entry *, int);
228 void	carp_multicast_cleanup(struct carp_softc *);
229 int	carp_set_ifp(struct carp_softc *, struct ifnet *);
230 void	carp_set_enaddr(struct carp_softc *);
231 void	carp_set_vhe_enaddr(struct carp_vhost_entry *);
232 void	carp_addr_updated(void *);
233 u_int32_t	carp_hash(struct carp_softc *, u_char *);
234 int	carp_set_addr(struct carp_softc *, struct sockaddr_in *);
235 int	carp_join_multicast(struct carp_softc *);
236 #ifdef INET6
237 void	carp_send_na(struct carp_softc *);
238 int	carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *);
239 int	carp_join_multicast6(struct carp_softc *);
240 #endif
241 int	carp_clone_create(struct if_clone *, int);
242 int	carp_clone_destroy(struct ifnet *);
243 int	carp_ether_addmulti(struct carp_softc *, struct ifreq *);
244 int	carp_ether_delmulti(struct carp_softc *, struct ifreq *);
245 void	carp_ether_purgemulti(struct carp_softc *);
246 int	carp_group_demote_count(struct carp_softc *);
247 void	carp_update_lsmask(struct carp_softc *);
248 int	carp_new_vhost(struct carp_softc *, int, int);
249 void	carp_destroy_vhosts(struct carp_softc *);
250 void	carp_del_all_timeouts(struct carp_softc *);
251 
252 struct if_clone carp_cloner =
253     IF_CLONE_INITIALIZER("carp", carp_clone_create, carp_clone_destroy);
254 
255 #define carp_cksum(_m, _l)	((u_int16_t)in_cksum((_m), (_l)))
256 
257 void
258 carp_hmac_prepare(struct carp_softc *sc)
259 {
260 	struct carp_vhost_entry *vhe;
261 	u_int8_t i;
262 
263 	LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) {
264 		for (i = 0; i < HMAC_MAX; i++) {
265 			carp_hmac_prepare_ctx(vhe, i);
266 		}
267 	}
268 }
269 
270 void
271 carp_hmac_prepare_ctx(struct carp_vhost_entry *vhe, u_int8_t ctx)
272 {
273 	struct carp_softc *sc = vhe->parent_sc;
274 
275 	u_int8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
276 	u_int8_t vhid = vhe->vhid & 0xff;
277 	SHA1_CTX sha1ctx;
278 	u_int32_t kmd[5];
279 	struct ifaddr *ifa;
280 	int i, found;
281 	struct in_addr last, cur, in;
282 #ifdef INET6
283 	struct in6_addr last6, cur6, in6;
284 #endif /* INET6 */
285 
286 	/* compute ipad from key */
287 	bzero(vhe->vhe_pad, sizeof(vhe->vhe_pad));
288 	bcopy(sc->sc_key, vhe->vhe_pad, sizeof(sc->sc_key));
289 	for (i = 0; i < sizeof(vhe->vhe_pad); i++)
290 		vhe->vhe_pad[i] ^= 0x36;
291 
292 	/* precompute first part of inner hash */
293 	SHA1Init(&vhe->vhe_sha1[ctx]);
294 	SHA1Update(&vhe->vhe_sha1[ctx], vhe->vhe_pad, sizeof(vhe->vhe_pad));
295 	SHA1Update(&vhe->vhe_sha1[ctx], (void *)&version, sizeof(version));
296 	SHA1Update(&vhe->vhe_sha1[ctx], (void *)&type, sizeof(type));
297 
298 	/* generate a key for the arpbalance hash, before the vhid is hashed */
299 	if (vhe->vhe_leader) {
300 		bcopy(&vhe->vhe_sha1[ctx], &sha1ctx, sizeof(sha1ctx));
301 		SHA1Final((unsigned char *)kmd, &sha1ctx);
302 		sc->sc_hashkey[0] = kmd[0] ^ kmd[1];
303 		sc->sc_hashkey[1] = kmd[2] ^ kmd[3];
304 	}
305 
306 	/* the rest of the precomputation */
307 	if (vhe->vhe_leader && bcmp(sc->sc_ac.ac_enaddr, vhe->vhe_enaddr,
308 	    ETHER_ADDR_LEN) != 0)
309 		SHA1Update(&vhe->vhe_sha1[ctx], sc->sc_ac.ac_enaddr,
310 		    ETHER_ADDR_LEN);
311 
312 	SHA1Update(&vhe->vhe_sha1[ctx], (void *)&vhid, sizeof(vhid));
313 
314 	/* Hash the addresses from smallest to largest, not interface order */
315 #ifdef INET
316 	cur.s_addr = 0;
317 	do {
318 		found = 0;
319 		last = cur;
320 		cur.s_addr = 0xffffffff;
321 		TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
322 			in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
323 			if (ifa->ifa_addr->sa_family == AF_INET &&
324 			    ntohl(in.s_addr) > ntohl(last.s_addr) &&
325 			    ntohl(in.s_addr) < ntohl(cur.s_addr)) {
326 				cur.s_addr = in.s_addr;
327 				found++;
328 			}
329 		}
330 		if (found)
331 			SHA1Update(&vhe->vhe_sha1[ctx],
332 			    (void *)&cur, sizeof(cur));
333 	} while (found);
334 #endif /* INET */
335 #ifdef INET6
336 	memset(&cur6, 0x00, sizeof(cur6));
337 	do {
338 		found = 0;
339 		last6 = cur6;
340 		memset(&cur6, 0xff, sizeof(cur6));
341 		TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
342 			in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
343 			if (IN6_IS_SCOPE_EMBED(&in6)) {
344 				if (ctx == HMAC_NOV6LL)
345 					continue;
346 				in6.s6_addr16[1] = 0;
347 			}
348 			if (ifa->ifa_addr->sa_family == AF_INET6 &&
349 			    memcmp(&in6, &last6, sizeof(in6)) > 0 &&
350 			    memcmp(&in6, &cur6, sizeof(in6)) < 0) {
351 				cur6 = in6;
352 				found++;
353 			}
354 		}
355 		if (found)
356 			SHA1Update(&vhe->vhe_sha1[ctx],
357 			    (void *)&cur6, sizeof(cur6));
358 	} while (found);
359 #endif /* INET6 */
360 
361 	/* convert ipad to opad */
362 	for (i = 0; i < sizeof(vhe->vhe_pad); i++)
363 		vhe->vhe_pad[i] ^= 0x36 ^ 0x5c;
364 }
365 
366 void
367 carp_hmac_generate(struct carp_vhost_entry *vhe, u_int32_t counter[2],
368     unsigned char md[20], u_int8_t ctx)
369 {
370 	SHA1_CTX sha1ctx;
371 
372 	/* fetch first half of inner hash */
373 	bcopy(&vhe->vhe_sha1[ctx], &sha1ctx, sizeof(sha1ctx));
374 
375 	SHA1Update(&sha1ctx, (void *)counter, sizeof(vhe->vhe_replay_cookie));
376 	SHA1Final(md, &sha1ctx);
377 
378 	/* outer hash */
379 	SHA1Init(&sha1ctx);
380 	SHA1Update(&sha1ctx, vhe->vhe_pad, sizeof(vhe->vhe_pad));
381 	SHA1Update(&sha1ctx, md, 20);
382 	SHA1Final(md, &sha1ctx);
383 }
384 
385 int
386 carp_hmac_verify(struct carp_vhost_entry *vhe, u_int32_t counter[2],
387     unsigned char md[20])
388 {
389 	unsigned char md2[20];
390 	u_int8_t i;
391 
392 	for (i = 0; i < HMAC_MAX; i++) {
393 		carp_hmac_generate(vhe, counter, md2, i);
394 		if (!bcmp(md, md2, sizeof(md2)))
395 			return (0);
396 	}
397 	return (1);
398 }
399 
400 void
401 carp_setroute(struct carp_softc *sc, int cmd)
402 {
403 	struct ifaddr *ifa;
404 	int s;
405 
406 	/* XXX this mess needs fixing */
407 
408 	s = splsoftnet();
409 	TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
410 		switch (ifa->ifa_addr->sa_family) {
411 		case AF_INET: {
412 			int error;
413 			struct sockaddr sa;
414 			struct rtentry *rt;
415 			struct radix_node_head *rnh;
416 			struct radix_node *rn;
417 			struct rt_addrinfo info;
418 			int hr_otherif, nr_ourif;
419 			struct sockaddr_rtlabel	sa_rl;
420 			const char *label;
421 
422 			/* Remove the existing host route, if any */
423 			bzero(&info, sizeof(info));
424 			info.rti_info[RTAX_DST] = ifa->ifa_addr;
425 			info.rti_flags = RTF_HOST;
426 			error = rtrequest1(RTM_DELETE, &info, RTP_CONNECTED,
427 			    NULL, 0);
428 			rt_missmsg(RTM_DELETE, &info, info.rti_flags, NULL,
429 			    error, 0);
430 
431 			/* Check for our address on another interface */
432 			/* XXX cries for proper API */
433 			rnh = rt_gettable(ifa->ifa_addr->sa_family, 0);
434 			rn = rnh->rnh_matchaddr(ifa->ifa_addr, rnh);
435 			rt = (struct rtentry *)rn;
436 			hr_otherif = (rt && rt->rt_ifp != &sc->sc_if &&
437 			    rt->rt_flags & (RTF_CLONING|RTF_CLONED));
438 
439 			/* Check for a network route on our interface */
440 			bcopy(ifa->ifa_addr, &sa, sizeof(sa));
441 			satosin(&sa)->sin_addr.s_addr = satosin(ifa->ifa_netmask
442 			    )->sin_addr.s_addr & satosin(&sa)->sin_addr.s_addr;
443 			rt = (struct rtentry *)rt_lookup(&sa,
444 			    ifa->ifa_netmask, 0);
445 			nr_ourif = (rt && rt->rt_ifp == &sc->sc_if);
446 
447 			/* Restore the route label */
448 			bzero(&sa_rl, sizeof(sa_rl));
449 			if (rt && rt->rt_labelid) {
450 				sa_rl.sr_len = sizeof(sa_rl);
451 				sa_rl.sr_family = AF_UNSPEC;
452 				label = rtlabel_id2name(rt->rt_labelid);
453 				if (label != NULL)
454 					strlcpy(sa_rl.sr_label, label,
455 					    sizeof(sa_rl.sr_label));
456 			}
457 
458 			switch (cmd) {
459 			case RTM_ADD:
460 				if (hr_otherif) {
461 					ifa->ifa_rtrequest = NULL;
462 					ifa->ifa_flags &= ~RTF_CLONING;
463 					bzero(&info, sizeof(info));
464 					info.rti_info[RTAX_DST] = ifa->ifa_addr;
465 					info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
466 					info.rti_flags = RTF_UP | RTF_HOST;
467 					error = rtrequest1(RTM_ADD, &info,
468 					    RTP_CONNECTED, NULL, 0);
469 					rt_missmsg(RTM_ADD, &info, info.rti_flags,
470 					    &sc->sc_if, error, 0);
471 				}
472 				if (!hr_otherif || nr_ourif || !rt) {
473 					if (nr_ourif && !(rt->rt_flags &
474 					    RTF_CLONING)) {
475 						bzero(&info, sizeof(info));
476 						info.rti_info[RTAX_DST] = &sa;
477 						info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
478 						error = rtrequest1(RTM_DELETE, &info, RTP_CONNECTED, NULL, 0);
479 						rt_missmsg(RTM_DELETE, &info, info.rti_flags, NULL,
480 						    error, 0);
481 					}
482 
483 					ifa->ifa_rtrequest = arp_rtrequest;
484 					ifa->ifa_flags |= RTF_CLONING;
485 
486 					bzero(&info, sizeof(info));
487 					info.rti_info[RTAX_DST] = &sa;
488 					info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
489 					info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
490 					info.rti_info[RTAX_LABEL] =
491 					    (struct sockaddr *)&sa_rl;
492 					error = rtrequest1(RTM_ADD, &info, RTP_CONNECTED, NULL, 0);
493 					if (error == 0)
494 						ifa->ifa_flags |= IFA_ROUTE;
495 					rt_missmsg(RTM_ADD, &info, info.rti_flags,
496 					    &sc->sc_if, error, 0);
497 				}
498 				break;
499 			case RTM_DELETE:
500 				break;
501 			default:
502 				break;
503 			}
504 			break;
505 		}
506 
507 #ifdef INET6
508 		case AF_INET6:
509 			if (sc->sc_balancing >= CARP_BAL_IP)
510 				continue;
511 			if (cmd == RTM_ADD)
512 				in6_ifaddloop(ifa);
513 			else
514 				in6_ifremloop(ifa);
515 			break;
516 #endif /* INET6 */
517 		default:
518 			break;
519 		}
520 	}
521 	splx(s);
522 }
523 
524 /*
525  * process input packet.
526  * we have rearranged checks order compared to the rfc,
527  * but it seems more efficient this way or not possible otherwise.
528  */
529 void
530 carp_proto_input(struct mbuf *m, ...)
531 {
532 	struct ip *ip = mtod(m, struct ip *);
533 	struct ifnet *ifp = m->m_pkthdr.rcvif;
534 	struct carp_softc *sc = NULL;
535 	struct carp_header *ch;
536 	int iplen, len, hlen, ismulti;
537 	va_list ap;
538 
539 	va_start(ap, m);
540 	hlen = va_arg(ap, int);
541 	va_end(ap);
542 
543 	carpstats.carps_ipackets++;
544 
545 	if (!carp_opts[CARPCTL_ALLOW]) {
546 		m_freem(m);
547 		return;
548 	}
549 
550 	ismulti = IN_MULTICAST(ip->ip_dst.s_addr);
551 
552 	/* check if received on a valid carp interface */
553 	if (!((ifp->if_type == IFT_CARP && ismulti) ||
554 	    (ifp->if_type != IFT_CARP && !ismulti && ifp->if_carp != NULL))) {
555 		carpstats.carps_badif++;
556 		CARP_LOG(LOG_INFO, sc, ("packet received on non-carp interface: %s",
557 		    m->m_pkthdr.rcvif->if_xname));
558 		m_freem(m);
559 		return;
560 	}
561 
562 	/* verify that the IP TTL is 255.  */
563 	if (ip->ip_ttl != CARP_DFLTTL) {
564 		carpstats.carps_badttl++;
565 		CARP_LOG(LOG_NOTICE, sc, ("received ttl %d != %d on %s", ip->ip_ttl,
566 		    CARP_DFLTTL, m->m_pkthdr.rcvif->if_xname));
567 		m_freem(m);
568 		return;
569 	}
570 
571 	/*
572 	 * verify that the received packet length is
573 	 * equal to the CARP header
574 	 */
575 	iplen = ip->ip_hl << 2;
576 	len = iplen + sizeof(*ch);
577 	if (len > m->m_pkthdr.len) {
578 		carpstats.carps_badlen++;
579 		CARP_LOG(LOG_INFO, sc, ("packet too short %d on %s", m->m_pkthdr.len,
580 		    m->m_pkthdr.rcvif->if_xname));
581 		m_freem(m);
582 		return;
583 	}
584 
585 	if ((m = m_pullup2(m, len)) == NULL) {
586 		carpstats.carps_hdrops++;
587 		return;
588 	}
589 	ip = mtod(m, struct ip *);
590 	ch = (void *)ip + iplen;
591 
592 	/* verify the CARP checksum */
593 	m->m_data += iplen;
594 	if (carp_cksum(m, len - iplen)) {
595 		carpstats.carps_badsum++;
596 		CARP_LOG(LOG_INFO, sc, ("checksum failed on %s",
597 		    m->m_pkthdr.rcvif->if_xname));
598 		m_freem(m);
599 		return;
600 	}
601 	m->m_data -= iplen;
602 
603 	carp_proto_input_c(m, ch, ismulti, AF_INET);
604 }
605 
606 #ifdef INET6
607 int
608 carp6_proto_input(struct mbuf **mp, int *offp, int proto)
609 {
610 	struct mbuf *m = *mp;
611 	struct carp_softc *sc = NULL;
612 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
613 	struct carp_header *ch;
614 	u_int len;
615 
616 	carpstats.carps_ipackets6++;
617 
618 	if (!carp_opts[CARPCTL_ALLOW]) {
619 		m_freem(m);
620 		return (IPPROTO_DONE);
621 	}
622 
623 	/* check if received on a valid carp interface */
624 	if (m->m_pkthdr.rcvif->if_type != IFT_CARP) {
625 		carpstats.carps_badif++;
626 		CARP_LOG(LOG_INFO, sc, ("packet received on non-carp interface: %s",
627 		    m->m_pkthdr.rcvif->if_xname));
628 		m_freem(m);
629 		return (IPPROTO_DONE);
630 	}
631 
632 	/* verify that the IP TTL is 255 */
633 	if (ip6->ip6_hlim != CARP_DFLTTL) {
634 		carpstats.carps_badttl++;
635 		CARP_LOG(LOG_NOTICE, sc, ("received ttl %d != %d on %s", ip6->ip6_hlim,
636 		    CARP_DFLTTL, m->m_pkthdr.rcvif->if_xname));
637 		m_freem(m);
638 		return (IPPROTO_DONE);
639 	}
640 
641 	/* verify that we have a complete carp packet */
642 	len = m->m_len;
643 	IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch));
644 	if (ch == NULL) {
645 		carpstats.carps_badlen++;
646 		CARP_LOG(LOG_INFO, sc, ("packet size %u too small", len));
647 		return (IPPROTO_DONE);
648 	}
649 
650 
651 	/* verify the CARP checksum */
652 	m->m_data += *offp;
653 	if (carp_cksum(m, sizeof(*ch))) {
654 		carpstats.carps_badsum++;
655 		CARP_LOG(LOG_INFO, sc, ("checksum failed, on %s",
656 		    m->m_pkthdr.rcvif->if_xname));
657 		m_freem(m);
658 		return (IPPROTO_DONE);
659 	}
660 	m->m_data -= *offp;
661 
662 	carp_proto_input_c(m, ch, 1, AF_INET6);
663 	return (IPPROTO_DONE);
664 }
665 #endif /* INET6 */
666 
667 void
668 carp_proto_input_c(struct mbuf *m, struct carp_header *ch, int ismulti,
669     sa_family_t af)
670 {
671 	struct ifnet *ifp = m->m_pkthdr.rcvif;
672 	struct carp_softc *sc;
673 	struct carp_vhost_entry *vhe;
674 	struct timeval sc_tv, ch_tv;
675 	struct carp_if *cif;
676 
677 	if (ifp->if_type == IFT_CARP)
678 		cif = (struct carp_if *)ifp->if_carpdev->if_carp;
679 	else
680 		cif = (struct carp_if *)ifp->if_carp;
681 
682 	TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) {
683 		if (af == AF_INET &&
684 		    ismulti != IN_MULTICAST(sc->sc_peer.s_addr))
685 			continue;
686 		LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) {
687 			if (vhe->vhid == ch->carp_vhid)
688 				goto found;
689 		}
690 	}
691  found:
692 
693 	if (!sc || (sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) !=
694 	    (IFF_UP|IFF_RUNNING)) {
695 		carpstats.carps_badvhid++;
696 		m_freem(m);
697 		return;
698 	}
699 
700 	getmicrotime(&sc->sc_if.if_lastchange);
701 	sc->sc_if.if_ipackets++;
702 	sc->sc_if.if_ibytes += m->m_pkthdr.len;
703 
704 	/* verify the CARP version. */
705 	if (ch->carp_version != CARP_VERSION) {
706 		carpstats.carps_badver++;
707 		sc->sc_if.if_ierrors++;
708 		CARP_LOG(LOG_NOTICE, sc, ("invalid version %d != %d",
709 		    ch->carp_version, CARP_VERSION));
710 		m_freem(m);
711 		return;
712 	}
713 
714 	/* verify the hash */
715 	if (carp_hmac_verify(vhe, ch->carp_counter, ch->carp_md)) {
716 		carpstats.carps_badauth++;
717 		sc->sc_if.if_ierrors++;
718 		CARP_LOG(LOG_INFO, sc, ("incorrect hash"));
719 		m_freem(m);
720 		return;
721 	}
722 
723 	if (!bcmp(&vhe->vhe_replay_cookie, ch->carp_counter,
724 	    sizeof(ch->carp_counter))) {
725 		/* Do not log duplicates from non simplex interfaces */
726 		if (sc->sc_carpdev->if_flags & IFF_SIMPLEX) {
727 			carpstats.carps_badauth++;
728 			sc->sc_if.if_ierrors++;
729 			CARP_LOG(LOG_WARNING, sc,
730 			    ("replay or network loop detected"));
731 		}
732 		m_freem(m);
733 		return;
734 	}
735 
736 	sc_tv.tv_sec = sc->sc_advbase;
737 	if (carp_group_demote_count(sc) && vhe->advskew <  240)
738 		sc_tv.tv_usec = 240 * 1000000 / 256;
739 	else
740 		sc_tv.tv_usec = vhe->advskew * 1000000 / 256;
741 	ch_tv.tv_sec = ch->carp_advbase;
742 	ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
743 
744 	switch (vhe->state) {
745 	case INIT:
746 		break;
747 	case MASTER:
748 		/*
749 		 * If we receive an advertisement from a master who's going to
750 		 * be more frequent than us, go into BACKUP state.
751 		 */
752 		if (timercmp(&sc_tv, &ch_tv, >) ||
753 		    (timercmp(&sc_tv, &ch_tv, ==) &&
754 		    ch->carp_demote <=
755 		    (carp_group_demote_count(sc) & 0xff))) {
756 			timeout_del(&vhe->ad_tmo);
757 			carp_set_state(vhe, BACKUP);
758 			carp_setrun(vhe, 0);
759 			if (vhe->vhe_leader)
760 				carp_setroute(sc, RTM_DELETE);
761 		}
762 		break;
763 	case BACKUP:
764 		/*
765 		 * If we're pre-empting masters who advertise slower than us,
766 		 * and this one claims to be slower, treat him as down.
767 		 */
768 		if (carp_opts[CARPCTL_PREEMPT] && timercmp(&sc_tv, &ch_tv, <)) {
769 			carp_master_down(vhe);
770 			break;
771 		}
772 
773 		/*
774 		 * Take over masters advertising with a higher demote count,
775 		 * regardless of CARPCTL_PREEMPT.
776 		 */
777 		if (ch->carp_demote > (carp_group_demote_count(sc) & 0xff)) {
778 			carp_master_down(vhe);
779 			break;
780 		}
781 
782 		/*
783 		 *  If the master is going to advertise at such a low frequency
784 		 *  that he's guaranteed to time out, we'd might as well just
785 		 *  treat him as timed out now.
786 		 */
787 		sc_tv.tv_sec = sc->sc_advbase * 3;
788 		if (timercmp(&sc_tv, &ch_tv, <)) {
789 			carp_master_down(vhe);
790 			break;
791 		}
792 
793 		/*
794 		 * Otherwise, we reset the counter and wait for the next
795 		 * advertisement.
796 		 */
797 		carp_setrun(vhe, af);
798 		break;
799 	}
800 
801 	m_freem(m);
802 	return;
803 }
804 
805 int
806 carp_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
807     size_t newlen)
808 {
809 	/* All sysctl names at this level are terminal. */
810 	if (namelen != 1)
811 		return (ENOTDIR);
812 
813 	switch (name[0]) {
814 	case CARPCTL_STATS:
815 		if (newp != NULL)
816 			return (EPERM);
817 		return (sysctl_struct(oldp, oldlenp, newp, newlen,
818 		    &carpstats, sizeof(carpstats)));
819 	default:
820 		if (name[0] <= 0 || name[0] >= CARPCTL_MAXID)
821 			return (ENOPROTOOPT);
822 		return sysctl_int(oldp, oldlenp, newp, newlen,
823 		    &carp_opts[name[0]]);
824 	}
825 }
826 
827 /*
828  * Interface side of the CARP implementation.
829  */
830 
831 /* ARGSUSED */
832 void
833 carpattach(int n)
834 {
835 	struct ifg_group	*ifg;
836 
837 	if ((ifg = if_creategroup("carp")) != NULL)
838 		ifg->ifg_refcnt++;	/* keep around even if empty */
839 	if_clone_attach(&carp_cloner);
840 }
841 
842 int
843 carp_clone_create(ifc, unit)
844 	struct if_clone *ifc;
845 	int unit;
846 {
847 	struct carp_softc *sc;
848 	struct ifnet *ifp;
849 
850 	sc = malloc(sizeof(*sc), M_DEVBUF, M_NOWAIT);
851 	if (!sc)
852 		return (ENOMEM);
853 	bzero(sc, sizeof(*sc));
854 
855 	LIST_INIT(&sc->carp_vhosts);
856 	sc->sc_vhe_count = 0;
857 	if (carp_new_vhost(sc, 0, 0)) {
858 		free(sc, M_DEVBUF);
859 		return (ENOMEM);
860 	}
861 
862 	sc->sc_suppress = 0;
863 	sc->sc_advbase = CARP_DFLTINTV;
864 	sc->sc_naddrs = sc->sc_naddrs6 = 0;
865 #ifdef INET6
866 	sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL;
867 #endif /* INET6 */
868 	sc->sc_imo.imo_membership = (struct in_multi **)malloc(
869 	    (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_IPMOPTS,
870 	    M_WAITOK|M_ZERO);
871 	sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
872 
873 	LIST_INIT(&sc->carp_mc_listhead);
874 	ifp = &sc->sc_if;
875 	ifp->if_softc = sc;
876 	snprintf(ifp->if_xname, sizeof ifp->if_xname, "%s%d", ifc->ifc_name,
877 	    unit);
878 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
879 	ifp->if_ioctl = carp_ioctl;
880 	ifp->if_start = carp_start;
881 	ifp->if_output = carp_output;
882 	ifp->if_type = IFT_CARP;
883 	ifp->if_addrlen = ETHER_ADDR_LEN;
884 	ifp->if_hdrlen = ETHER_HDR_LEN;
885 	ifp->if_mtu = ETHERMTU;
886 	IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
887 	IFQ_SET_READY(&ifp->if_snd);
888 	if_attach(ifp);
889 
890 	if_alloc_sadl(ifp);
891 	LIST_INIT(&sc->sc_ac.ac_multiaddrs);
892 #if NBPFILTER > 0
893 	bpfattach(&ifp->if_bpf, ifp, DLT_EN10MB, ETHER_HDR_LEN);
894 #endif
895 	return (0);
896 }
897 
898 int
899 carp_new_vhost(struct carp_softc *sc, int vhid, int advskew)
900 {
901 	struct carp_vhost_entry *vhe, *vhe0;
902 
903 	vhe = malloc(sizeof(*vhe), M_DEVBUF, M_NOWAIT | M_ZERO);
904 	if (vhe == NULL)
905 		return (ENOMEM);
906 
907 	vhe->parent_sc = sc;
908 	vhe->vhid = vhid;
909 	vhe->advskew = advskew;
910 	timeout_set(&vhe->ad_tmo, carp_send_ad, vhe);
911 	timeout_set(&vhe->md_tmo, carp_master_down, vhe);
912 	timeout_set(&vhe->md6_tmo, carp_master_down, vhe);
913 
914 	/* mark the first vhe as leader */
915 	if (LIST_EMPTY(&sc->carp_vhosts)) {
916 		vhe->vhe_leader = 1;
917 		LIST_INSERT_HEAD(&sc->carp_vhosts, vhe, vhost_entries);
918 		sc->sc_vhe_count = 1;
919 		return (0);
920 	}
921 
922 	LIST_FOREACH(vhe0, &sc->carp_vhosts, vhost_entries)
923 		if (LIST_NEXT(vhe0, vhost_entries) == NULL)
924 			break;
925 	LIST_INSERT_AFTER(vhe0, vhe, vhost_entries);
926 	sc->sc_vhe_count++;
927 
928 	return (0);
929 }
930 
931 int
932 carp_clone_destroy(struct ifnet *ifp)
933 {
934 	struct carp_softc *sc = ifp->if_softc;
935 
936 	carpdetach(sc);
937 	ether_ifdetach(ifp);
938 	if_detach(ifp);
939 	carp_destroy_vhosts(ifp->if_softc);
940 	free(sc->sc_imo.imo_membership, M_IPMOPTS);
941 	free(sc, M_DEVBUF);
942 
943 	return (0);
944 }
945 
946 void
947 carp_del_all_timeouts(struct carp_softc *sc)
948 {
949 	struct carp_vhost_entry *vhe;
950 
951 	LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) {
952 		timeout_del(&vhe->ad_tmo);
953 		timeout_del(&vhe->md_tmo);
954 		timeout_del(&vhe->md6_tmo);
955 	}
956 }
957 
958 void
959 carpdetach(struct carp_softc *sc)
960 {
961 	struct carp_if *cif;
962 	int s;
963 
964 	carp_del_all_timeouts(sc);
965 
966 	if (sc->sc_suppress)
967 		carp_group_demote_adj(&sc->sc_if, -1);
968 	sc->sc_suppress = 0;
969 
970 	if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS(sc))
971 		carp_group_demote_adj(&sc->sc_if, -1);
972 	sc->sc_sendad_errors = 0;
973 
974 	carp_set_state_all(sc, INIT);
975 	sc->sc_if.if_flags &= ~IFF_UP;
976 	carp_setrun_all(sc, 0);
977 	carp_multicast_cleanup(sc);
978 
979 	s = splnet();
980 	if (sc->sc_carpdev != NULL) {
981 		if (sc->lh_cookie != NULL)
982 			hook_disestablish(sc->sc_carpdev->if_linkstatehooks,
983 			    sc->lh_cookie);
984 		cif = (struct carp_if *)sc->sc_carpdev->if_carp;
985 		TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
986 		if (!--cif->vhif_nvrs) {
987 			ifpromisc(sc->sc_carpdev, 0);
988 			sc->sc_carpdev->if_carp = NULL;
989 			free(cif, M_IFADDR);
990 		}
991 	}
992 	sc->sc_carpdev = NULL;
993 	splx(s);
994 }
995 
996 /* Detach an interface from the carp. */
997 void
998 carp_ifdetach(struct ifnet *ifp)
999 {
1000 	struct carp_softc *sc, *nextsc;
1001 	struct carp_if *cif = (struct carp_if *)ifp->if_carp;
1002 
1003 	for (sc = TAILQ_FIRST(&cif->vhif_vrs); sc; sc = nextsc) {
1004 		nextsc = TAILQ_NEXT(sc, sc_list);
1005 		carpdetach(sc);
1006 	}
1007 }
1008 
1009 void
1010 carp_destroy_vhosts(struct carp_softc *sc)
1011 {
1012 	/* XXX bow out? */
1013 	struct carp_vhost_entry *vhe, *nvhe;
1014 
1015 	for (vhe = LIST_FIRST(&sc->carp_vhosts);
1016 	     vhe != LIST_END(&sc->carp_vhosts); vhe = nvhe) {
1017 		nvhe = LIST_NEXT(vhe, vhost_entries);
1018 		free(vhe, M_DEVBUF);
1019 	}
1020 	LIST_INIT(&sc->carp_vhosts);
1021 	sc->sc_vhe_count = 0;
1022 }
1023 
1024 int
1025 carp_prepare_ad(struct mbuf *m, struct carp_vhost_entry *vhe,
1026     struct carp_header *ch)
1027 {
1028 	if (!vhe->vhe_replay_cookie) {
1029 		arc4random_buf(&vhe->vhe_replay_cookie,
1030 		    sizeof(vhe->vhe_replay_cookie));
1031 	}
1032 
1033 	bcopy(&vhe->vhe_replay_cookie, ch->carp_counter,
1034 	    sizeof(ch->carp_counter));
1035 
1036 	/*
1037 	 * For the time being, do not include the IPv6 linklayer addresses
1038 	 * in the HMAC.
1039 	 */
1040 	carp_hmac_generate(vhe, ch->carp_counter, ch->carp_md, HMAC_NOV6LL);
1041 
1042 	return (0);
1043 }
1044 
1045 void
1046 carp_send_ad_all(void)
1047 {
1048 	struct ifnet *ifp;
1049 	struct carp_if *cif;
1050 	struct carp_softc *vh;
1051 
1052 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1053 		if (ifp->if_carp == NULL || ifp->if_type == IFT_CARP)
1054 			continue;
1055 
1056 		cif = (struct carp_if *)ifp->if_carp;
1057 		TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1058 			if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
1059 			    (IFF_UP|IFF_RUNNING)) {
1060 				carp_vhe_send_ad_all(vh);
1061 			}
1062 		}
1063 	}
1064 }
1065 
1066 void
1067 carp_vhe_send_ad_all(struct carp_softc *sc)
1068 {
1069 	struct carp_vhost_entry *vhe;
1070 
1071 	LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) {
1072 		if (vhe->state == MASTER)
1073 			carp_send_ad(vhe);
1074 	}
1075 }
1076 
1077 void
1078 carp_send_ad(void *v)
1079 {
1080 	struct carp_header ch;
1081 	struct timeval tv;
1082 	struct carp_vhost_entry *vhe = v;
1083 	struct carp_softc *sc = vhe->parent_sc;
1084 	struct carp_header *ch_ptr;
1085 
1086 	struct mbuf *m;
1087 	int error, len, advbase, advskew, s;
1088 	struct ifaddr *ifa;
1089 	struct sockaddr sa;
1090 
1091 	if (sc->sc_carpdev == NULL) {
1092 		sc->sc_if.if_oerrors++;
1093 		return;
1094 	}
1095 
1096 	s = splsoftnet();
1097 
1098 	/* bow out if we've gone to backup (the carp interface is going down) */
1099 	if (sc->sc_bow_out) {
1100 		advbase = 255;
1101 		advskew = 255;
1102 	} else {
1103 		advbase = sc->sc_advbase;
1104 		if (!carp_group_demote_count(sc) || vhe->advskew > 240)
1105 			advskew = vhe->advskew;
1106 		else
1107 			advskew = 240;
1108 		tv.tv_sec = advbase;
1109 		tv.tv_usec = advskew * 1000000 / 256;
1110 	}
1111 
1112 	ch.carp_version = CARP_VERSION;
1113 	ch.carp_type = CARP_ADVERTISEMENT;
1114 	ch.carp_vhid = vhe->vhid;
1115 	ch.carp_demote = carp_group_demote_count(sc) & 0xff;
1116 	ch.carp_advbase = advbase;
1117 	ch.carp_advskew = advskew;
1118 	ch.carp_authlen = 7;	/* XXX DEFINE */
1119 	ch.carp_cksum = 0;
1120 
1121 	sc->cur_vhe = vhe; /* we need the vhe later on the output path */
1122 
1123 #ifdef INET
1124 	if (sc->sc_naddrs) {
1125 		struct ip *ip;
1126 
1127 		MGETHDR(m, M_DONTWAIT, MT_HEADER);
1128 		if (m == NULL) {
1129 			sc->sc_if.if_oerrors++;
1130 			carpstats.carps_onomem++;
1131 			/* XXX maybe less ? */
1132 			goto retry_later;
1133 		}
1134 		len = sizeof(*ip) + sizeof(ch);
1135 		m->m_pkthdr.len = len;
1136 		m->m_pkthdr.rcvif = NULL;
1137 		m->m_len = len;
1138 		MH_ALIGN(m, m->m_len);
1139 		ip = mtod(m, struct ip *);
1140 		ip->ip_v = IPVERSION;
1141 		ip->ip_hl = sizeof(*ip) >> 2;
1142 		ip->ip_tos = IPTOS_LOWDELAY;
1143 		ip->ip_len = htons(len);
1144 		ip->ip_id = htons(ip_randomid());
1145 		ip->ip_off = htons(IP_DF);
1146 		ip->ip_ttl = CARP_DFLTTL;
1147 		ip->ip_p = IPPROTO_CARP;
1148 		ip->ip_sum = 0;
1149 
1150 		bzero(&sa, sizeof(sa));
1151 		sa.sa_family = AF_INET;
1152 		ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
1153 		if (ifa == NULL)
1154 			ip->ip_src.s_addr = 0;
1155 		else
1156 			ip->ip_src.s_addr =
1157 			    ifatoia(ifa)->ia_addr.sin_addr.s_addr;
1158 		ip->ip_dst.s_addr = sc->sc_peer.s_addr;
1159 		if (IN_MULTICAST(ip->ip_dst.s_addr))
1160 			m->m_flags |= M_MCAST;
1161 
1162 		ch_ptr = (void *)ip + sizeof(*ip);
1163 		bcopy(&ch, ch_ptr, sizeof(ch));
1164 		if (carp_prepare_ad(m, vhe, ch_ptr))
1165 			goto retry_later;
1166 
1167 		m->m_data += sizeof(*ip);
1168 		ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip));
1169 		m->m_data -= sizeof(*ip);
1170 
1171 		getmicrotime(&sc->sc_if.if_lastchange);
1172 		sc->sc_if.if_opackets++;
1173 		sc->sc_if.if_obytes += len;
1174 		carpstats.carps_opackets++;
1175 
1176 		error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo,
1177 		    NULL);
1178 		if (error) {
1179 			if (error == ENOBUFS)
1180 				carpstats.carps_onomem++;
1181 			else
1182 				CARP_LOG(LOG_WARNING, sc,
1183 				    ("ip_output failed: %d", error));
1184 			sc->sc_if.if_oerrors++;
1185 			if (sc->sc_sendad_errors < INT_MAX)
1186 				sc->sc_sendad_errors++;
1187 			if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS(sc))
1188 				carp_group_demote_adj(&sc->sc_if, 1);
1189 			sc->sc_sendad_success = 0;
1190 		} else {
1191 			if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS(sc)) {
1192 				if (++sc->sc_sendad_success >=
1193 				    CARP_SENDAD_MIN_SUCCESS(sc)) {
1194 					carp_group_demote_adj(&sc->sc_if, -1);
1195 					sc->sc_sendad_errors = 0;
1196 				}
1197 			} else
1198 				sc->sc_sendad_errors = 0;
1199 		}
1200 		if (vhe->vhe_leader) {
1201 			if (sc->sc_delayed_arp > 0)
1202 				sc->sc_delayed_arp--;
1203 			if (sc->sc_delayed_arp == 0) {
1204 				carp_send_arp(sc);
1205 				sc->sc_delayed_arp = -1;
1206 			}
1207 		}
1208 	}
1209 #endif /* INET */
1210 #ifdef INET6
1211 	if (sc->sc_naddrs6) {
1212 		struct ip6_hdr *ip6;
1213 
1214 		MGETHDR(m, M_DONTWAIT, MT_HEADER);
1215 		if (m == NULL) {
1216 			sc->sc_if.if_oerrors++;
1217 			carpstats.carps_onomem++;
1218 			/* XXX maybe less ? */
1219 			goto retry_later;
1220 		}
1221 		len = sizeof(*ip6) + sizeof(ch);
1222 		m->m_pkthdr.len = len;
1223 		m->m_pkthdr.rcvif = NULL;
1224 		m->m_len = len;
1225 		MH_ALIGN(m, m->m_len);
1226 		m->m_flags |= M_MCAST;
1227 		ip6 = mtod(m, struct ip6_hdr *);
1228 		bzero(ip6, sizeof(*ip6));
1229 		ip6->ip6_vfc |= IPV6_VERSION;
1230 		ip6->ip6_hlim = CARP_DFLTTL;
1231 		ip6->ip6_nxt = IPPROTO_CARP;
1232 
1233 		/* set the source address */
1234 		bzero(&sa, sizeof(sa));
1235 		sa.sa_family = AF_INET6;
1236 		ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
1237 		if (ifa == NULL)	/* This should never happen with IPv6 */
1238 			bzero(&ip6->ip6_src, sizeof(struct in6_addr));
1239 		else
1240 			bcopy(ifatoia6(ifa)->ia_addr.sin6_addr.s6_addr,
1241 			    &ip6->ip6_src, sizeof(struct in6_addr));
1242 		/* set the multicast destination */
1243 
1244 		ip6->ip6_dst.s6_addr8[0] = 0xff;
1245 		ip6->ip6_dst.s6_addr8[1] = 0x02;
1246 		ip6->ip6_dst.s6_addr8[15] = 0x12;
1247 
1248 		ch_ptr = (void *)ip6 + sizeof(*ip6);
1249 		bcopy(&ch, ch_ptr, sizeof(ch));
1250 		if (carp_prepare_ad(m, vhe, ch_ptr))
1251 			goto retry_later;
1252 
1253 		m->m_data += sizeof(*ip6);
1254 		ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip6));
1255 		m->m_data -= sizeof(*ip6);
1256 
1257 		getmicrotime(&sc->sc_if.if_lastchange);
1258 		sc->sc_if.if_opackets++;
1259 		sc->sc_if.if_obytes += len;
1260 		carpstats.carps_opackets6++;
1261 
1262 		error = ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL);
1263 		if (error) {
1264 			if (error == ENOBUFS)
1265 				carpstats.carps_onomem++;
1266 			else
1267 				CARP_LOG(LOG_WARNING, sc,
1268 				    ("ip6_output failed: %d", error));
1269 			sc->sc_if.if_oerrors++;
1270 			if (sc->sc_sendad_errors < INT_MAX)
1271 				sc->sc_sendad_errors++;
1272 			if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS(sc))
1273 				carp_group_demote_adj(&sc->sc_if, 1);
1274 			sc->sc_sendad_success = 0;
1275 		} else {
1276 			if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS(sc)) {
1277 				if (++sc->sc_sendad_success >=
1278 				    CARP_SENDAD_MIN_SUCCESS(sc)) {
1279 					carp_group_demote_adj(&sc->sc_if, -1);
1280 					sc->sc_sendad_errors = 0;
1281 				}
1282 			} else
1283 				sc->sc_sendad_errors = 0;
1284 		}
1285 	}
1286 #endif /* INET6 */
1287 
1288 retry_later:
1289 	sc->cur_vhe = NULL;
1290 	splx(s);
1291 	if (advbase != 255 || advskew != 255)
1292 		timeout_add(&vhe->ad_tmo, tvtohz(&tv));
1293 }
1294 
1295 /*
1296  * Broadcast a gratuitous ARP request containing
1297  * the virtual router MAC address for each IP address
1298  * associated with the virtual router.
1299  */
1300 void
1301 carp_send_arp(struct carp_softc *sc)
1302 {
1303 	struct ifaddr *ifa;
1304 	in_addr_t in;
1305 	int s = splsoftnet();
1306 
1307 	TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
1308 
1309 		if (ifa->ifa_addr->sa_family != AF_INET)
1310 			continue;
1311 
1312 		in = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
1313 		arprequest(sc->sc_carpdev, &in, &in, sc->sc_ac.ac_enaddr);
1314 		DELAY(1000);	/* XXX */
1315 	}
1316 	splx(s);
1317 }
1318 
1319 #ifdef INET6
1320 void
1321 carp_send_na(struct carp_softc *sc)
1322 {
1323 	struct ifaddr *ifa;
1324 	struct in6_addr *in6;
1325 	static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
1326 	int s = splsoftnet();
1327 
1328 	TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
1329 
1330 		if (ifa->ifa_addr->sa_family != AF_INET6)
1331 			continue;
1332 
1333 		in6 = &ifatoia6(ifa)->ia_addr.sin6_addr;
1334 		nd6_na_output(sc->sc_carpdev, &mcast, in6,
1335 		    ND_NA_FLAG_OVERRIDE, 1, NULL);
1336 		DELAY(1000);	/* XXX */
1337 	}
1338 	splx(s);
1339 }
1340 #endif /* INET6 */
1341 
1342 /*
1343  * Based on bridge_hash() in if_bridge.c
1344  */
1345 #define	mix(a,b,c) \
1346 	do {						\
1347 		a -= b; a -= c; a ^= (c >> 13);		\
1348 		b -= c; b -= a; b ^= (a << 8);		\
1349 		c -= a; c -= b; c ^= (b >> 13);		\
1350 		a -= b; a -= c; a ^= (c >> 12);		\
1351 		b -= c; b -= a; b ^= (a << 16);		\
1352 		c -= a; c -= b; c ^= (b >> 5);		\
1353 		a -= b; a -= c; a ^= (c >> 3);		\
1354 		b -= c; b -= a; b ^= (a << 10);		\
1355 		c -= a; c -= b; c ^= (b >> 15);		\
1356 	} while (0)
1357 
1358 u_int32_t
1359 carp_hash(struct carp_softc *sc, u_char *src)
1360 {
1361 	u_int32_t a = 0x9e3779b9, b = sc->sc_hashkey[0], c = sc->sc_hashkey[1];
1362 
1363 	c += sc->sc_key[3] << 24;
1364 	c += sc->sc_key[2] << 16;
1365 	c += sc->sc_key[1] << 8;
1366 	c += sc->sc_key[0];
1367 	b += src[5] << 8;
1368 	b += src[4];
1369 	a += src[3] << 24;
1370 	a += src[2] << 16;
1371 	a += src[1] << 8;
1372 	a += src[0];
1373 
1374 	mix(a, b, c);
1375 	return (c);
1376 }
1377 
1378 void
1379 carp_update_lsmask(struct carp_softc *sc)
1380 {
1381 	struct carp_vhost_entry *vhe;
1382 	int count;
1383 
1384 	if (!sc->sc_balancing)
1385 		return;
1386 
1387 	sc->sc_lsmask = 0;
1388 	count = 0;
1389 
1390 	LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) {
1391 		if (vhe->state == MASTER && count < sizeof(sc->sc_lsmask) * 8)
1392 			sc->sc_lsmask |= 1 << count;
1393 		count++;
1394 	}
1395 	sc->sc_lscount = count;
1396 	CARP_LOG(LOG_DEBUG, sc, ("carp_update_lsmask: %x", sc->sc_lsmask));
1397 }
1398 
1399 int
1400 carp_iamatch(struct in_ifaddr *ia, u_char *src, u_int8_t **sha,
1401     u_int8_t **ether_shost)
1402 {
1403 	struct carp_softc *sc = ia->ia_ifp->if_softc;
1404 	struct carp_vhost_entry *vhe = LIST_FIRST(&sc->carp_vhosts);
1405 
1406 	if (sc->sc_balancing == CARP_BAL_ARP) {
1407 		int lshash;
1408 		/*
1409 		 * We use the source MAC address to decide which virtual host
1410 		 * should handle the request. If we're master of that virtual
1411 		 * host, then we respond, otherwise, just drop the arp packet
1412 		 * on the floor.
1413 		 */
1414 
1415 		if (sc->sc_lscount == 0) /* just to be safe */
1416 			return (0);
1417 		lshash = carp_hash(sc, src) % sc->sc_lscount;
1418 		if ((1 << lshash) & sc->sc_lsmask) {
1419 			int i = 0;
1420 			LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) {
1421 				if (i++ == lshash)
1422 					break;
1423 			}
1424 			if (vhe == NULL)
1425 				return (0);
1426 			*sha = vhe->vhe_enaddr;
1427 			return (1);
1428 		}
1429 	} else if (sc->sc_balancing == CARP_BAL_IPSTEALTH ||
1430 	    sc->sc_balancing == CARP_BAL_IP) {
1431 		if (vhe->state == MASTER) {
1432 			*ether_shost = ((struct arpcom *)sc->sc_carpdev)->
1433 			    ac_enaddr;
1434 			return (1);
1435 		}
1436 	} else {
1437 		if (vhe->state == MASTER)
1438 			return (1);
1439 	}
1440 
1441 	return (0);
1442 }
1443 
1444 #ifdef INET6
1445 int
1446 carp_iamatch6(struct ifnet *ifp, u_char *src, struct sockaddr_dl **sdl)
1447 {
1448 	struct carp_softc *sc = ifp->if_softc;
1449 	struct carp_vhost_entry *vhe = LIST_FIRST(&sc->carp_vhosts);
1450 
1451 	if (sc->sc_balancing == CARP_BAL_ARP) {
1452 		int lshash;
1453 		/*
1454 		 * We use the source MAC address to decide which virtual host
1455 		 * should handle the request. If we're master of that virtual
1456 		 * host, then we respond, otherwise, just drop the ndp packet
1457 		 * on the floor.
1458 		 */
1459 
1460 		/* can happen if optional src lladdr is not provided */
1461 		if (src == NULL)
1462 			return (0);
1463 		if (sc->sc_lscount == 0) /* just to be safe */
1464 			return (0);
1465 		lshash = carp_hash(sc, src) % sc->sc_lscount;
1466 		if ((1 << lshash) & sc->sc_lsmask) {
1467 			int i = 0;
1468 			LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) {
1469 				if (i++ == lshash)
1470 					break;
1471 			}
1472 			if (vhe == NULL)
1473 				return (0);
1474 			*sdl = &vhe->vhe_sdl;
1475 			return (1);
1476 		}
1477 	} else {
1478 		if (vhe->state == MASTER)
1479 			return (1);
1480 	}
1481 
1482 	return (0);
1483 }
1484 #endif /* INET6 */
1485 
1486 struct ifnet *
1487 carp_ourether(void *v, struct ether_header *eh, int src)
1488 {
1489 	struct carp_if *cif = (struct carp_if *)v;
1490 	struct carp_softc *vh;
1491 	u_int8_t *ena;
1492 
1493 	if (src)
1494 		ena = (u_int8_t *)&eh->ether_shost;
1495 	else
1496 		ena = (u_int8_t *)&eh->ether_dhost;
1497 
1498 	TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1499 		struct carp_vhost_entry *vhe;
1500 		if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) !=
1501 		    (IFF_UP|IFF_RUNNING))
1502 			continue;
1503 		if (vh->sc_balancing == CARP_BAL_ARP) {
1504 			LIST_FOREACH(vhe, &vh->carp_vhosts, vhost_entries)
1505 				if (vhe->state == MASTER &&
1506 				    !bcmp(ena, vhe->vhe_enaddr, ETHER_ADDR_LEN))
1507 					return (&vh->sc_if);
1508 		} else {
1509 			vhe = LIST_FIRST(&vh->carp_vhosts);
1510 			if ((vhe->state == MASTER ||
1511 			    vh->sc_balancing >= CARP_BAL_IP) &&
1512 			    !bcmp(ena, vh->sc_ac.ac_enaddr, ETHER_ADDR_LEN))
1513 				return (&vh->sc_if);
1514 		}
1515 	}
1516 	return (NULL);
1517 }
1518 
1519 void
1520 carp_rewrite_lladdr(struct ifnet *ifp, u_int8_t *s_enaddr)
1521 {
1522 	struct carp_softc *sc = ifp->if_softc;
1523 
1524 	if (sc->sc_balancing != CARP_BAL_IPSTEALTH &&
1525 	    sc->sc_balancing != CARP_BAL_IP && sc->cur_vhe) {
1526 		if (sc->cur_vhe->vhe_leader)
1527 			bcopy((caddr_t)sc->sc_ac.ac_enaddr,
1528 			    (caddr_t)s_enaddr, ETHER_ADDR_LEN);
1529 		else
1530 			bcopy((caddr_t)sc->cur_vhe->vhe_enaddr,
1531 			    (caddr_t)s_enaddr, ETHER_ADDR_LEN);
1532 	}
1533 }
1534 
1535 int
1536 carp_our_mcastaddr(struct ifnet *ifp, u_int8_t *d_enaddr)
1537 {
1538 	struct carp_softc *sc = ifp->if_softc;
1539 
1540 	if (sc->sc_balancing != CARP_BAL_IP)
1541 		return (0);
1542 
1543 	return(!bcmp(sc->sc_ac.ac_enaddr, d_enaddr, ETHER_ADDR_LEN));
1544 }
1545 
1546 
1547 int
1548 carp_input(struct mbuf *m, u_int8_t *shost, u_int8_t *dhost, u_int16_t etype)
1549 {
1550 	struct ether_header eh;
1551 	struct carp_if *cif = (struct carp_if *)m->m_pkthdr.rcvif->if_carp;
1552 	struct ifnet *ifp;
1553 
1554 	bcopy(shost, &eh.ether_shost, sizeof(eh.ether_shost));
1555 	bcopy(dhost, &eh.ether_dhost, sizeof(eh.ether_dhost));
1556 	eh.ether_type = etype;
1557 
1558 	if ((ifp = carp_ourether(cif, &eh, 0)))
1559 		;
1560 	else if (m->m_flags & (M_BCAST|M_MCAST)) {
1561 		struct carp_softc *vh;
1562 		struct mbuf *m0;
1563 
1564 		/*
1565 		 * XXX Should really check the list of multicast addresses
1566 		 * for each CARP interface _before_ copying.
1567 		 */
1568 		TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1569 			m0 = m_copym2(m, 0, M_COPYALL, M_DONTWAIT);
1570 			if (m0 == NULL)
1571 				continue;
1572 			m0->m_pkthdr.rcvif = &vh->sc_if;
1573 			ether_input(&vh->sc_if, &eh, m0);
1574 		}
1575 		return (1);
1576 	}
1577 
1578 	if (ifp == NULL)
1579 		return (1);
1580 
1581 	m->m_pkthdr.rcvif = ifp;
1582 
1583 #if NBPFILTER > 0
1584 	if (ifp->if_bpf)
1585 		bpf_mtap_hdr(ifp->if_bpf, (char *)&eh, ETHER_HDR_LEN, m,
1586 		    BPF_DIRECTION_IN);
1587 #endif
1588 	ifp->if_ipackets++;
1589 	ether_input(ifp, &eh, m);
1590 
1591 	return (0);
1592 }
1593 
1594 int
1595 carp_lsdrop(struct mbuf *m, sa_family_t af, u_int32_t *src, u_int32_t *dst)
1596 {
1597 	struct carp_softc *sc = m->m_pkthdr.rcvif->if_softc;
1598 	int match;
1599 	u_int32_t fold;
1600 
1601 	if (sc->sc_balancing < CARP_BAL_IP)
1602 		return (0);
1603 	/*
1604 	 * Never drop carp advertisements.
1605 	 * XXX Bad idea to pass all broadcast / multicast traffic?
1606 	 */
1607 	if (m->m_flags & (M_BCAST|M_MCAST))
1608 		return (0);
1609 
1610 	fold = src[0] ^ dst[0];
1611 #ifdef INET6
1612 	if (af == AF_INET6) {
1613 		int i;
1614 		for (i = 1; i < 4; i++)
1615 			fold ^= src[i] ^ dst[i];
1616 	}
1617 #endif
1618 	if (sc->sc_lscount == 0) /* just to be safe */
1619 		return (1);
1620 	match = (1 << (ntohl(fold) % sc->sc_lscount)) & sc->sc_lsmask;
1621 
1622 	return (!match);
1623 }
1624 
1625 void
1626 carp_master_down(void *v)
1627 {
1628 	struct carp_vhost_entry *vhe = v;
1629 	struct carp_softc *sc = vhe->parent_sc;
1630 
1631 	switch (vhe->state) {
1632 	case INIT:
1633 		printf("%s: master_down event in INIT state\n",
1634 		    sc->sc_if.if_xname);
1635 		break;
1636 	case MASTER:
1637 		break;
1638 	case BACKUP:
1639 		carp_set_state(vhe, MASTER);
1640 		carp_send_ad(vhe);
1641 		if (sc->sc_balancing == CARP_BAL_NONE && vhe->vhe_leader) {
1642 			carp_send_arp(sc);
1643 			/* Schedule a delayed ARP to deal w/ some L3 switches */
1644 			sc->sc_delayed_arp = 2;
1645 #ifdef INET6
1646 			carp_send_na(sc);
1647 #endif /* INET6 */
1648 		}
1649 		carp_setrun(vhe, 0);
1650 		if (vhe->vhe_leader)
1651 			carp_setroute(sc, RTM_ADD);
1652 		carpstats.carps_preempt++;
1653 		break;
1654 	}
1655 }
1656 
1657 void
1658 carp_setrun_all(struct carp_softc *sc, sa_family_t af)
1659 {
1660 	struct carp_vhost_entry *vhe;
1661 	LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) {
1662 		carp_setrun(vhe, af);
1663 	}
1664 }
1665 
1666 /*
1667  * When in backup state, af indicates whether to reset the master down timer
1668  * for v4 or v6. If it's set to zero, reset the ones which are already pending.
1669  */
1670 void
1671 carp_setrun(struct carp_vhost_entry *vhe, sa_family_t af)
1672 {
1673 	struct timeval tv;
1674 	struct carp_softc *sc = vhe->parent_sc;
1675 
1676 	if (sc->sc_carpdev == NULL) {
1677 		sc->sc_if.if_flags &= ~IFF_RUNNING;
1678 		carp_set_state_all(sc, INIT);
1679 		return;
1680 	}
1681 
1682 	if (sc->sc_if.if_flags & IFF_UP && vhe->vhid > 0 &&
1683 	    (sc->sc_naddrs || sc->sc_naddrs6) && !sc->sc_suppress) {
1684 		sc->sc_if.if_flags |= IFF_RUNNING;
1685 	} else {
1686 		sc->sc_if.if_flags &= ~IFF_RUNNING;
1687 		if (vhe->vhe_leader)
1688 			carp_setroute(sc, RTM_DELETE);
1689 		return;
1690 	}
1691 
1692 	switch (vhe->state) {
1693 	case INIT:
1694 		carp_set_state(vhe, BACKUP);
1695 		if (vhe->vhe_leader)
1696 			carp_setroute(sc, RTM_DELETE);
1697 		carp_setrun(vhe, 0);
1698 		break;
1699 	case BACKUP:
1700 		timeout_del(&vhe->ad_tmo);
1701 		tv.tv_sec = 3 * sc->sc_advbase;
1702 		tv.tv_usec = vhe->advskew * 1000000 / 256;
1703 		if (vhe->vhe_leader)
1704 			sc->sc_delayed_arp = -1;
1705 		switch (af) {
1706 #ifdef INET
1707 		case AF_INET:
1708 			timeout_add(&vhe->md_tmo, tvtohz(&tv));
1709 			break;
1710 #endif /* INET */
1711 #ifdef INET6
1712 		case AF_INET6:
1713 			timeout_add(&vhe->md6_tmo, tvtohz(&tv));
1714 			break;
1715 #endif /* INET6 */
1716 		default:
1717 			if (sc->sc_naddrs)
1718 				timeout_add(&vhe->md_tmo, tvtohz(&tv));
1719 			if (sc->sc_naddrs6)
1720 				timeout_add(&vhe->md6_tmo, tvtohz(&tv));
1721 			break;
1722 		}
1723 		break;
1724 	case MASTER:
1725 		tv.tv_sec = sc->sc_advbase;
1726 		tv.tv_usec = vhe->advskew * 1000000 / 256;
1727 		timeout_add(&vhe->ad_tmo, tvtohz(&tv));
1728 		break;
1729 	}
1730 }
1731 
1732 void
1733 carp_multicast_cleanup(struct carp_softc *sc)
1734 {
1735 	struct ip_moptions *imo = &sc->sc_imo;
1736 #ifdef INET6
1737 	struct ip6_moptions *im6o = &sc->sc_im6o;
1738 #endif
1739 	u_int16_t n = imo->imo_num_memberships;
1740 
1741 	/* Clean up our own multicast memberships */
1742 	while (n-- > 0) {
1743 		if (imo->imo_membership[n] != NULL) {
1744 			in_delmulti(imo->imo_membership[n]);
1745 			imo->imo_membership[n] = NULL;
1746 		}
1747 	}
1748 	imo->imo_num_memberships = 0;
1749 	imo->imo_multicast_ifp = NULL;
1750 
1751 #ifdef INET6
1752 	while (!LIST_EMPTY(&im6o->im6o_memberships)) {
1753 		struct in6_multi_mship *imm =
1754 		    LIST_FIRST(&im6o->im6o_memberships);
1755 
1756 		LIST_REMOVE(imm, i6mm_chain);
1757 		in6_leavegroup(imm);
1758 	}
1759 	im6o->im6o_multicast_ifp = NULL;
1760 #endif
1761 
1762 	/* And any other multicast memberships */
1763 	carp_ether_purgemulti(sc);
1764 }
1765 
1766 int
1767 carp_set_ifp(struct carp_softc *sc, struct ifnet *ifp)
1768 {
1769 	struct carp_if *cif, *ncif = NULL;
1770 	struct carp_softc *vr, *after = NULL;
1771 	int myself = 0, error = 0;
1772 	int s;
1773 
1774 	if (ifp == sc->sc_carpdev)
1775 		return (0);
1776 
1777 	if (ifp != NULL) {
1778 		if ((ifp->if_flags & IFF_MULTICAST) == 0)
1779 			return (EADDRNOTAVAIL);
1780 
1781 		if (ifp->if_type == IFT_CARP)
1782 			return (EINVAL);
1783 
1784 		if (ifp->if_carp == NULL) {
1785 			ncif = malloc(sizeof(*cif), M_IFADDR, M_NOWAIT);
1786 			if (ncif == NULL)
1787 				return (ENOBUFS);
1788 			if ((error = ifpromisc(ifp, 1))) {
1789 				free(ncif, M_IFADDR);
1790 				return (error);
1791 			}
1792 
1793 			ncif->vhif_ifp = ifp;
1794 			TAILQ_INIT(&ncif->vhif_vrs);
1795 		} else {
1796 			cif = (struct carp_if *)ifp->if_carp;
1797 			if (carp_check_dup_vhids(sc, cif, NULL))
1798 				return (EINVAL);
1799 		}
1800 
1801 		/* detach from old interface */
1802 		if (sc->sc_carpdev != NULL)
1803 			carpdetach(sc);
1804 
1805 		/* join multicast groups */
1806 		if (sc->sc_naddrs < 0 &&
1807 		    (error = carp_join_multicast(sc)) != 0) {
1808 			if (ncif != NULL)
1809 				free(ncif, M_IFADDR);
1810 			return (error);
1811 		}
1812 
1813 #ifdef INET6
1814 		if (sc->sc_naddrs6 < 0 &&
1815 		    (error = carp_join_multicast6(sc)) != 0) {
1816 			if (ncif != NULL)
1817 				free(ncif, M_IFADDR);
1818 			carp_multicast_cleanup(sc);
1819 			return (error);
1820 		}
1821 #endif
1822 
1823 		/* attach carp interface to physical interface */
1824 		if (ncif != NULL)
1825 			ifp->if_carp = (caddr_t)ncif;
1826 		sc->sc_carpdev = ifp;
1827 		cif = (struct carp_if *)ifp->if_carp;
1828 		TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
1829 			if (vr == sc)
1830 				myself = 1;
1831 			if (LIST_FIRST(&vr->carp_vhosts)->vhid <
1832 			    LIST_FIRST(&sc->carp_vhosts)->vhid)
1833 				after = vr;
1834 		}
1835 
1836 		if (!myself) {
1837 			/* We're trying to keep things in order */
1838 			if (after == NULL) {
1839 				TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
1840 			} else {
1841 				TAILQ_INSERT_AFTER(&cif->vhif_vrs, after,
1842 				    sc, sc_list);
1843 			}
1844 			cif->vhif_nvrs++;
1845 		}
1846 		if (sc->sc_naddrs || sc->sc_naddrs6)
1847 			sc->sc_if.if_flags |= IFF_UP;
1848 		carp_set_enaddr(sc);
1849 		s = splnet();
1850 		sc->lh_cookie = hook_establish(ifp->if_linkstatehooks, 1,
1851 		    carp_carpdev_state, ifp);
1852 		carp_carpdev_state(ifp);
1853 		splx(s);
1854 	} else {
1855 		carpdetach(sc);
1856 		sc->sc_if.if_flags &= ~(IFF_UP|IFF_RUNNING);
1857 	}
1858 	return (0);
1859 }
1860 
1861 void
1862 carp_set_vhe_enaddr(struct carp_vhost_entry *vhe)
1863 {
1864 	struct carp_softc *sc = vhe->parent_sc;
1865 
1866 	if (vhe->vhid != 0 && sc->sc_carpdev) {
1867 		if (vhe->vhe_leader && sc->sc_balancing == CARP_BAL_IP)
1868 			vhe->vhe_enaddr[0] = 1;
1869 		else
1870 			vhe->vhe_enaddr[0] = 0;
1871 		vhe->vhe_enaddr[1] = 0;
1872 		vhe->vhe_enaddr[2] = 0x5e;
1873 		vhe->vhe_enaddr[3] = 0;
1874 		vhe->vhe_enaddr[4] = 1;
1875 		vhe->vhe_enaddr[5] = vhe->vhid;
1876 
1877 		vhe->vhe_sdl.sdl_family = AF_LINK;
1878 		vhe->vhe_sdl.sdl_alen = ETHER_ADDR_LEN;
1879 		bcopy(vhe->vhe_enaddr, vhe->vhe_sdl.sdl_data, ETHER_ADDR_LEN);
1880 	} else
1881 		bzero(vhe->vhe_enaddr, ETHER_ADDR_LEN);
1882 }
1883 
1884 void
1885 carp_set_enaddr(struct carp_softc *sc)
1886 {
1887 	struct carp_vhost_entry *vhe;
1888 
1889 	LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries)
1890 		carp_set_vhe_enaddr(vhe);
1891 
1892 	vhe = LIST_FIRST(&sc->carp_vhosts);
1893 
1894 	/*
1895 	 * Use the carp lladdr if the running one isn't manually set.
1896 	 * Only compare static parts of the lladdr.
1897 	 */
1898 	if ((bcmp(sc->sc_ac.ac_enaddr + 1, vhe->vhe_enaddr + 1,
1899 	    ETHER_ADDR_LEN - 2) == 0) ||
1900 	    (!sc->sc_ac.ac_enaddr[0] && !sc->sc_ac.ac_enaddr[1] &&
1901 	    !sc->sc_ac.ac_enaddr[2] && !sc->sc_ac.ac_enaddr[3] &&
1902 	    !sc->sc_ac.ac_enaddr[4] && !sc->sc_ac.ac_enaddr[5]))
1903 		bcopy(vhe->vhe_enaddr, sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN);
1904 
1905 	/* Make sure the enaddr has changed before further twiddling. */
1906 	if (bcmp(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN) != 0) {
1907 		bcopy(sc->sc_ac.ac_enaddr, LLADDR(sc->sc_if.if_sadl),
1908 		    ETHER_ADDR_LEN);
1909 		bcopy(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN);
1910 #ifdef INET6
1911 		/*
1912 		 * (re)attach a link-local address which matches
1913 		 * our new MAC address.
1914 		 */
1915 		in6_ifattach_linklocal(&sc->sc_if, NULL);
1916 #endif
1917 		carp_set_state_all(sc, INIT);
1918 		carp_setrun_all(sc, 0);
1919 	}
1920 }
1921 
1922 void
1923 carp_addr_updated(void *v)
1924 {
1925 	struct carp_softc *sc = (struct carp_softc *) v;
1926 	struct ifaddr *ifa;
1927 	int new_naddrs = 0, new_naddrs6 = 0;
1928 
1929 	TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
1930 		if (ifa->ifa_addr->sa_family == AF_INET)
1931 			new_naddrs++;
1932 		else if (ifa->ifa_addr->sa_family == AF_INET6 &&
1933 		    !IN6_IS_ADDR_LINKLOCAL(&ifatoia6(ifa)->ia_addr.sin6_addr))
1934 				new_naddrs6++;
1935 	}
1936 
1937 	/* Handle a callback after SIOCDIFADDR */
1938 	if (new_naddrs < sc->sc_naddrs || new_naddrs6 < sc->sc_naddrs6) {
1939 		struct in_addr mc_addr;
1940 		struct in_multi *inm;
1941 
1942 		sc->sc_naddrs = new_naddrs;
1943 		sc->sc_naddrs6 = new_naddrs6;
1944 
1945 		/* Re-establish multicast membership removed by in_control */
1946 		if (IN_MULTICAST(sc->sc_peer.s_addr)) {
1947 			mc_addr.s_addr = sc->sc_peer.s_addr;
1948 			IN_LOOKUP_MULTI(mc_addr, &sc->sc_if, inm);
1949 			if (inm == NULL) {
1950 				struct in_multi **imm =
1951 				    sc->sc_imo.imo_membership;
1952 				u_int16_t maxmem =
1953 				    sc->sc_imo.imo_max_memberships;
1954 
1955 				bzero(&sc->sc_imo, sizeof(sc->sc_imo));
1956 				sc->sc_imo.imo_membership = imm;
1957 				sc->sc_imo.imo_max_memberships = maxmem;
1958 
1959 				if (sc->sc_carpdev != NULL && sc->sc_naddrs > 0)
1960 					carp_join_multicast(sc);
1961 			}
1962 		}
1963 
1964 		if (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) {
1965 			sc->sc_if.if_flags &= ~IFF_UP;
1966 			carp_set_state_all(sc, INIT);
1967 		} else
1968 			carp_hmac_prepare(sc);
1969 	}
1970 
1971 	carp_setrun_all(sc, 0);
1972 }
1973 
1974 int
1975 carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin)
1976 {
1977 	struct ifnet *ifp = sc->sc_carpdev;
1978 	struct in_ifaddr *ia, *ia_if;
1979 	int error = 0;
1980 
1981 	if (sin->sin_addr.s_addr == 0) {
1982 		if (!(sc->sc_if.if_flags & IFF_UP))
1983 			carp_set_state_all(sc, INIT);
1984 		if (sc->sc_naddrs)
1985 			sc->sc_if.if_flags |= IFF_UP;
1986 		carp_setrun_all(sc, 0);
1987 		return (0);
1988 	}
1989 
1990 	/* we have to do this by hand to ensure we don't match on ourselves */
1991 	ia_if = NULL;
1992 	for (ia = TAILQ_FIRST(&in_ifaddr); ia;
1993 	    ia = TAILQ_NEXT(ia, ia_list)) {
1994 
1995 		/* and, yeah, we need a multicast-capable iface too */
1996 		if (ia->ia_ifp != &sc->sc_if &&
1997 		    ia->ia_ifp->if_type != IFT_CARP &&
1998 		    (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
1999 		    (sin->sin_addr.s_addr & ia->ia_subnetmask) ==
2000 		    ia->ia_subnet) {
2001 			if (!ia_if)
2002 				ia_if = ia;
2003 		}
2004 	}
2005 
2006 	if (ia_if) {
2007 		ia = ia_if;
2008 		if (ifp) {
2009 			if (ifp != ia->ia_ifp)
2010 				return (EADDRNOTAVAIL);
2011 		} else {
2012 			ifp = ia->ia_ifp;
2013 		}
2014 	}
2015 
2016 	if ((error = carp_set_ifp(sc, ifp)))
2017 		return (error);
2018 
2019 	if (sc->sc_carpdev == NULL)
2020 		return (EADDRNOTAVAIL);
2021 
2022 	if (sc->sc_naddrs == 0 && (error = carp_join_multicast(sc)) != 0)
2023 		return (error);
2024 
2025 	sc->sc_naddrs++;
2026 	if (sc->sc_carpdev != NULL)
2027 		sc->sc_if.if_flags |= IFF_UP;
2028 
2029 	carp_set_state_all(sc, INIT);
2030 
2031 	/*
2032 	 * Hook if_addrhooks so that we get a callback after in_ifinit has run,
2033 	 * to correct any inappropriate routes that it inserted.
2034 	 */
2035 	if (sc->ah_cookie == NULL)
2036 		sc->ah_cookie = hook_establish(sc->sc_if.if_addrhooks, 0,
2037 		    carp_addr_updated, sc);
2038 
2039 	return (0);
2040 }
2041 
2042 int
2043 carp_join_multicast(struct carp_softc *sc)
2044 {
2045 	struct ip_moptions *imo = &sc->sc_imo;
2046 	struct in_multi *imm;
2047 	struct in_addr addr;
2048 
2049 	if (!IN_MULTICAST(sc->sc_peer.s_addr))
2050 		return (0);
2051 
2052 	addr.s_addr = sc->sc_peer.s_addr;
2053 	if ((imm = in_addmulti(&addr, &sc->sc_if)) == NULL)
2054 		return (ENOBUFS);
2055 
2056 	imo->imo_membership[0] = imm;
2057 	imo->imo_num_memberships = 1;
2058 	imo->imo_multicast_ifp = &sc->sc_if;
2059 	imo->imo_multicast_ttl = CARP_DFLTTL;
2060 	imo->imo_multicast_loop = 0;
2061 	return (0);
2062 }
2063 
2064 
2065 #ifdef INET6
2066 int
2067 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
2068 {
2069 	struct ifnet *ifp = sc->sc_carpdev;
2070 	struct in6_ifaddr *ia, *ia_if;
2071 	int error = 0;
2072 
2073 	if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
2074 		if (!(sc->sc_if.if_flags & IFF_UP))
2075 			carp_set_state_all(sc, INIT);
2076 		if (sc->sc_naddrs6)
2077 			sc->sc_if.if_flags |= IFF_UP;
2078 		carp_setrun_all(sc, 0);
2079 		return (0);
2080 	}
2081 
2082 	/* we have to do this by hand to ensure we don't match on ourselves */
2083 	ia_if = NULL;
2084 	for (ia = in6_ifaddr; ia; ia = ia->ia_next) {
2085 		int i;
2086 
2087 		for (i = 0; i < 4; i++) {
2088 			if ((sin6->sin6_addr.s6_addr32[i] &
2089 			    ia->ia_prefixmask.sin6_addr.s6_addr32[i]) !=
2090 			    (ia->ia_addr.sin6_addr.s6_addr32[i] &
2091 			    ia->ia_prefixmask.sin6_addr.s6_addr32[i]))
2092 				break;
2093 		}
2094 		/* and, yeah, we need a multicast-capable iface too */
2095 		if (ia->ia_ifp != &sc->sc_if &&
2096 		    ia->ia_ifp->if_type != IFT_CARP &&
2097 		    (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
2098 		    (i == 4)) {
2099 			if (!ia_if)
2100 				ia_if = ia;
2101 		}
2102 	}
2103 
2104 	if (ia_if) {
2105 		ia = ia_if;
2106 		if (sc->sc_carpdev) {
2107 			if (sc->sc_carpdev != ia->ia_ifp)
2108 				return (EADDRNOTAVAIL);
2109 		} else {
2110 			ifp = ia->ia_ifp;
2111 		}
2112 	}
2113 
2114 	if ((error = carp_set_ifp(sc, ifp)))
2115 		return (error);
2116 
2117 	if (sc->sc_carpdev == NULL)
2118 		return (EADDRNOTAVAIL);
2119 
2120 	if (sc->sc_naddrs6 == 0 && (error = carp_join_multicast6(sc)) != 0)
2121 		return (error);
2122 
2123 	if (!IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr))
2124 		sc->sc_naddrs6++;
2125 	if (sc->sc_carpdev != NULL && sc->sc_naddrs6)
2126 		sc->sc_if.if_flags |= IFF_UP;
2127 	carp_set_state_all(sc, INIT);
2128 	carp_setrun_all(sc, 0);
2129 
2130 	return (0);
2131 }
2132 
2133 int
2134 carp_join_multicast6(struct carp_softc *sc)
2135 {
2136 	struct in6_multi_mship *imm, *imm2;
2137 	struct ip6_moptions *im6o = &sc->sc_im6o;
2138 	struct sockaddr_in6 addr6;
2139 	int error;
2140 
2141 	/* Join IPv6 CARP multicast group */
2142 	bzero(&addr6, sizeof(addr6));
2143 	addr6.sin6_family = AF_INET6;
2144 	addr6.sin6_len = sizeof(addr6);
2145 	addr6.sin6_addr.s6_addr16[0] = htons(0xff02);
2146 	addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index);
2147 	addr6.sin6_addr.s6_addr8[15] = 0x12;
2148 	if ((imm = in6_joingroup(&sc->sc_if,
2149 	    &addr6.sin6_addr, &error)) == NULL) {
2150 		return (error);
2151 	}
2152 	/* join solicited multicast address */
2153 	bzero(&addr6.sin6_addr, sizeof(addr6.sin6_addr));
2154 	addr6.sin6_addr.s6_addr16[0] = htons(0xff02);
2155 	addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index);
2156 	addr6.sin6_addr.s6_addr32[1] = 0;
2157 	addr6.sin6_addr.s6_addr32[2] = htonl(1);
2158 	addr6.sin6_addr.s6_addr32[3] = 0;
2159 	addr6.sin6_addr.s6_addr8[12] = 0xff;
2160 	if ((imm2 = in6_joingroup(&sc->sc_if,
2161 	    &addr6.sin6_addr, &error)) == NULL) {
2162 		in6_leavegroup(imm);
2163 		return (error);
2164 	}
2165 
2166 	/* apply v6 multicast membership */
2167 	im6o->im6o_multicast_ifp = &sc->sc_if;
2168 	if (imm)
2169 		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm,
2170 		    i6mm_chain);
2171 	if (imm2)
2172 		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm2,
2173 		    i6mm_chain);
2174 
2175 	return (0);
2176 }
2177 
2178 #endif /* INET6 */
2179 
2180 int
2181 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr)
2182 {
2183 	struct proc *p = curproc;	/* XXX */
2184 	struct carp_softc *sc = ifp->if_softc;
2185 	struct carp_vhost_entry *vhe;
2186 	struct carpreq carpr;
2187 	struct ifaddr *ifa = (struct ifaddr *)addr;
2188 	struct ifreq *ifr = (struct ifreq *)addr;
2189 	struct ifnet *cdev = NULL;
2190 	int i, error = 0;
2191 
2192 	switch (cmd) {
2193 	case SIOCSIFADDR:
2194 		switch (ifa->ifa_addr->sa_family) {
2195 #ifdef INET
2196 		case AF_INET:
2197 			sc->sc_if.if_flags |= IFF_UP;
2198 			/*
2199 			 * emulate arp_ifinit() without doing a gratious arp
2200 			 * request so that the routes are setup correctly.
2201 			 */
2202 			ifa->ifa_rtrequest = arp_rtrequest;
2203 			ifa->ifa_flags |= RTF_CLONING;
2204 
2205 			error = carp_set_addr(sc, satosin(ifa->ifa_addr));
2206 			break;
2207 #endif /* INET */
2208 #ifdef INET6
2209 		case AF_INET6:
2210 			sc->sc_if.if_flags |= IFF_UP;
2211 			error = carp_set_addr6(sc, satosin6(ifa->ifa_addr));
2212 			break;
2213 #endif /* INET6 */
2214 		default:
2215 			error = EAFNOSUPPORT;
2216 			break;
2217 		}
2218 		break;
2219 
2220 	case SIOCSIFFLAGS:
2221 		vhe = LIST_FIRST(&sc->carp_vhosts);
2222 		if (vhe->state != INIT && !(ifr->ifr_flags & IFF_UP)) {
2223 			carp_del_all_timeouts(sc);
2224 
2225 			/* we need the interface up to bow out */
2226 			sc->sc_if.if_flags |= IFF_UP;
2227 			sc->sc_bow_out = 1;
2228 			carp_vhe_send_ad_all(sc);
2229 			sc->sc_bow_out = 0;
2230 
2231 			sc->sc_if.if_flags &= ~IFF_UP;
2232 			carp_set_state_all(sc, INIT);
2233 			carp_setrun_all(sc, 0);
2234 		} else if (vhe->state == INIT && (ifr->ifr_flags & IFF_UP)) {
2235 			sc->sc_if.if_flags |= IFF_UP;
2236 			carp_setrun_all(sc, 0);
2237 		}
2238 		break;
2239 
2240 	case SIOCSVH:
2241 		vhe = LIST_FIRST(&sc->carp_vhosts);
2242 		if ((error = suser(p, p->p_acflag)) != 0)
2243 			break;
2244 		if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr)))
2245 			break;
2246 		error = 1;
2247 		if (carpr.carpr_carpdev[0] != '\0' &&
2248 		    (cdev = ifunit(carpr.carpr_carpdev)) == NULL)
2249 			return (EINVAL);
2250 		if (carpr.carpr_peer.s_addr == 0)
2251 			sc->sc_peer.s_addr = INADDR_CARP_GROUP;
2252 		else
2253 			sc->sc_peer.s_addr = carpr.carpr_peer.s_addr;
2254 		if ((error = carp_set_ifp(sc, cdev)))
2255 			return (error);
2256 		if (vhe->state != INIT && carpr.carpr_state != vhe->state) {
2257 			switch (carpr.carpr_state) {
2258 			case BACKUP:
2259 				timeout_del(&vhe->ad_tmo);
2260 				carp_set_state_all(sc, BACKUP);
2261 				carp_setrun_all(sc, 0);
2262 				carp_setroute(sc, RTM_DELETE);
2263 				break;
2264 			case MASTER:
2265 				LIST_FOREACH(vhe, &sc->carp_vhosts,
2266 				    vhost_entries)
2267 					carp_master_down(vhe);
2268 				break;
2269 			default:
2270 				break;
2271 			}
2272 		}
2273 		if ((error = carp_vhids_ioctl(sc, &carpr)))
2274 			return (error);
2275 		if (carpr.carpr_advbase > 0) {
2276 			if (carpr.carpr_advbase > 255) {
2277 				error = EINVAL;
2278 				break;
2279 			}
2280 			sc->sc_advbase = carpr.carpr_advbase;
2281 			error--;
2282 		}
2283 		if (bcmp(sc->sc_advskews, carpr.carpr_advskews,
2284 		    sizeof(sc->sc_advskews))) {
2285 			i = 0;
2286 			LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries)
2287 				vhe->advskew = carpr.carpr_advskews[i++];
2288 			bcopy(carpr.carpr_advskews, sc->sc_advskews,
2289 			    sizeof(sc->sc_advskews));
2290 		}
2291 		if (sc->sc_balancing != carpr.carpr_balancing) {
2292 			if (carpr.carpr_balancing > CARP_BAL_MAXID) {
2293 				error = EINVAL;
2294 				break;
2295 			}
2296 			sc->sc_balancing = carpr.carpr_balancing;
2297 			carp_set_enaddr(sc);
2298 			carp_update_lsmask(sc);
2299 		}
2300 		bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key));
2301 		if (error > 0)
2302 			error = EINVAL;
2303 		else {
2304 			error = 0;
2305 			carp_setrun_all(sc, 0);
2306 		}
2307 		break;
2308 
2309 	case SIOCGVH:
2310 		bzero(&carpr, sizeof(carpr));
2311 		if (sc->sc_carpdev != NULL)
2312 			strlcpy(carpr.carpr_carpdev, sc->sc_carpdev->if_xname,
2313 			    IFNAMSIZ);
2314 		i = 0;
2315 		LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) {
2316 			carpr.carpr_vhids[i] = vhe->vhid;
2317 			carpr.carpr_advskews[i] = vhe->advskew;
2318 			carpr.carpr_states[i] = vhe->state;
2319 			i++;
2320 		}
2321 		carpr.carpr_advbase = sc->sc_advbase;
2322 		carpr.carpr_balancing = sc->sc_balancing;
2323 		if (suser(p, p->p_acflag) == 0)
2324 			bcopy(sc->sc_key, carpr.carpr_key,
2325 			    sizeof(carpr.carpr_key));
2326 		carpr.carpr_peer.s_addr = sc->sc_peer.s_addr;
2327 		error = copyout(&carpr, ifr->ifr_data, sizeof(carpr));
2328 		break;
2329 
2330 	case SIOCADDMULTI:
2331 		error = carp_ether_addmulti(sc, ifr);
2332 		break;
2333 
2334 	case SIOCDELMULTI:
2335 		error = carp_ether_delmulti(sc, ifr);
2336 		break;
2337 	case SIOCAIFGROUP:
2338 	case SIOCDIFGROUP:
2339 		if (sc->sc_suppress)
2340 			carp_ifgroup_ioctl(ifp, cmd, addr);
2341 		break;
2342 	case SIOCSIFGATTR:
2343 		carp_ifgattr_ioctl(ifp, cmd, addr);
2344 		break;
2345 	default:
2346 		error = ENOTTY;
2347 	}
2348 
2349 	if (bcmp(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN) != 0)
2350 		carp_set_enaddr(sc);
2351 	carp_hmac_prepare(sc);
2352 	return (error);
2353 }
2354 
2355 int
2356 carp_check_dup_vhids(struct carp_softc *sc, struct carp_if *cif,
2357     struct carpreq *carpr)
2358 {
2359 	struct carp_softc *vr;
2360 	struct carp_vhost_entry *vhe, *vhe0;
2361 	int i;
2362 
2363 	TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
2364 		if (vr == sc)
2365 			continue;
2366 		LIST_FOREACH(vhe, &vr->carp_vhosts, vhost_entries) {
2367 			if (carpr) {
2368 				for (i = 0; carpr->carpr_vhids[i]; i++) {
2369 					if (vhe->vhid == carpr->carpr_vhids[i])
2370 						return (EINVAL);
2371 				}
2372 			}
2373 			LIST_FOREACH(vhe0, &sc->carp_vhosts, vhost_entries) {
2374 				if (vhe->vhid == vhe0->vhid)
2375 					return (EINVAL);
2376 			}
2377 		}
2378 	}
2379 	return (0);
2380 }
2381 
2382 int
2383 carp_vhids_ioctl(struct carp_softc *sc, struct carpreq *carpr)
2384 {
2385 	int i, j;
2386 	u_int8_t taken_vhids[256];
2387 
2388 	if (carpr->carpr_vhids[0] == 0 ||
2389 	    !bcmp(sc->sc_vhids, carpr->carpr_vhids, sizeof(sc->sc_vhids)))
2390 		return (0);
2391 
2392 	bzero(taken_vhids, sizeof(taken_vhids));
2393 	for (i = 0; carpr->carpr_vhids[i]; i++) {
2394 		if (taken_vhids[carpr->carpr_vhids[i]])
2395 			return (EINVAL);
2396 		taken_vhids[carpr->carpr_vhids[i]] = 1;
2397 
2398 		if (sc->sc_carpdev) {
2399 			struct carp_if *cif;
2400 			cif = (struct carp_if *)sc->sc_carpdev->if_carp;
2401 			if (carp_check_dup_vhids(sc, cif, carpr))
2402 				return (EINVAL);
2403 		}
2404 		if (carpr->carpr_advskews[i] >= 255)
2405 			return (EINVAL);
2406 	}
2407 	/* set sane balancing defaults */
2408 	if (i <= 1)
2409 		carpr->carpr_balancing = CARP_BAL_NONE;
2410 	else if (carpr->carpr_balancing == CARP_BAL_NONE &&
2411 	    sc->sc_balancing == CARP_BAL_NONE)
2412 		carpr->carpr_balancing = CARP_BAL_IP;
2413 
2414 	/* destroy all */
2415 	carp_del_all_timeouts(sc);
2416 	carp_destroy_vhosts(sc);
2417 	bzero(sc->sc_vhids, sizeof(sc->sc_vhids));
2418 
2419 	/* sort vhosts list by vhid */
2420 	for (j = 1; j <= 255; j++) {
2421 		for (i = 0; carpr->carpr_vhids[i]; i++) {
2422 			if (carpr->carpr_vhids[i] != j)
2423 				continue;
2424 			if (carp_new_vhost(sc, carpr->carpr_vhids[i],
2425 			    carpr->carpr_advskews[i]))
2426 				return (ENOMEM);
2427 			sc->sc_vhids[i] = carpr->carpr_vhids[i];
2428 			sc->sc_advskews[i] = carpr->carpr_advskews[i];
2429 		}
2430 	}
2431 	carp_set_enaddr(sc);
2432 	carp_set_state_all(sc, INIT);
2433 	return (0);
2434 }
2435 
2436 void
2437 carp_ifgroup_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr)
2438 {
2439 	struct ifgroupreq *ifgr = (struct ifgroupreq *)addr;
2440 	struct ifg_list	*ifgl;
2441 
2442 	if (!strcmp(ifgr->ifgr_group, IFG_ALL))
2443 		return;
2444 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2445 		if (!strcmp(ifgl->ifgl_group->ifg_group, ifgr->ifgr_group)) {
2446 			if (cmd == SIOCAIFGROUP)
2447 				ifgl->ifgl_group->ifg_carp_demoted++;
2448 			else if (cmd == SIOCDIFGROUP &&
2449 			    ifgl->ifgl_group->ifg_carp_demoted)
2450 				ifgl->ifgl_group->ifg_carp_demoted--;
2451 		}
2452 }
2453 
2454 void
2455 carp_ifgattr_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr)
2456 {
2457 	struct ifgroupreq *ifgr = (struct ifgroupreq *)addr;
2458 	struct carp_softc *sc = ifp->if_softc;
2459 
2460 	if (ifgr->ifgr_attrib.ifg_carp_demoted > 0 && (sc->sc_if.if_flags &
2461 	    (IFF_UP|IFF_RUNNING)) == (IFF_UP|IFF_RUNNING))
2462 		carp_vhe_send_ad_all(sc);
2463 }
2464 
2465 /*
2466  * Start output on carp interface. This function should never be called.
2467  */
2468 void
2469 carp_start(struct ifnet *ifp)
2470 {
2471 #ifdef DEBUG
2472 	printf("%s: start called\n", ifp->if_xname);
2473 #endif
2474 }
2475 
2476 int
2477 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
2478     struct rtentry *rt)
2479 {
2480 	struct carp_softc *sc = ((struct carp_softc *)ifp->if_softc);
2481 	struct carp_vhost_entry *vhe;
2482 
2483 	vhe = sc->cur_vhe ? sc->cur_vhe : LIST_FIRST(&sc->carp_vhosts);
2484 
2485 	if (sc->sc_carpdev != NULL &&
2486 	    (sc->sc_balancing || vhe->state == MASTER))
2487 		return (sc->sc_carpdev->if_output(ifp, m, sa, rt));
2488 	else {
2489 		m_freem(m);
2490 		return (ENETUNREACH);
2491 	}
2492 }
2493 
2494 void
2495 carp_set_state_all(struct carp_softc *sc, int state)
2496 {
2497 	struct carp_vhost_entry *vhe;
2498 
2499 	LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries)
2500 		carp_set_state(vhe, state);
2501 }
2502 
2503 void
2504 carp_set_state(struct carp_vhost_entry *vhe, int state)
2505 {
2506 	struct carp_softc *sc = vhe->parent_sc;
2507 	static const char *carp_states[] = { CARP_STATES };
2508 	int loglevel;
2509 
2510 	if (vhe->state == state)
2511 		return;
2512 	if (vhe->state == INIT || state == INIT)
2513 		loglevel = LOG_WARNING;
2514 	else
2515 		loglevel = LOG_CRIT;
2516 
2517 	if (sc->sc_vhe_count > 1)
2518 		CARP_LOG(loglevel, sc,
2519 		    ("state transition (vhid %d): %s -> %s", vhe->vhid,
2520 		    carp_states[vhe->state], carp_states[state]));
2521 	else
2522 		CARP_LOG(loglevel, sc,
2523 		    ("state transition: %s -> %s",
2524 		    carp_states[vhe->state], carp_states[state]));
2525 
2526 	vhe->state = state;
2527 	carp_update_lsmask(sc);
2528 
2529 	/* only the master vhe creates link state messages */
2530 	if (!vhe->vhe_leader)
2531 		return;
2532 
2533 	switch (state) {
2534 	case BACKUP:
2535 		sc->sc_if.if_link_state = LINK_STATE_DOWN;
2536 		break;
2537 	case MASTER:
2538 		sc->sc_if.if_link_state = LINK_STATE_UP;
2539 		break;
2540 	default:
2541 		sc->sc_if.if_link_state = LINK_STATE_UNKNOWN;
2542 		break;
2543 	}
2544 	if_link_state_change(&sc->sc_if);
2545 }
2546 
2547 void
2548 carp_group_demote_adj(struct ifnet *ifp, int adj)
2549 {
2550 	struct ifg_list	*ifgl;
2551 	int *dm;
2552 	struct carp_softc *nil = NULL;
2553 
2554 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
2555 		if (!strcmp(ifgl->ifgl_group->ifg_group, IFG_ALL))
2556 			continue;
2557 		dm = &ifgl->ifgl_group->ifg_carp_demoted;
2558 
2559 		if (*dm + adj >= 0)
2560 			*dm += adj;
2561 		else
2562 			*dm = 0;
2563 
2564 		if (adj > 0 && *dm == 1)
2565 			carp_send_ad_all();
2566 		CARP_LOG(LOG_INFO, nil, ("%s demoted group %s to %d", ifp->if_xname,
2567 		    ifgl->ifgl_group->ifg_group, *dm));
2568 	}
2569 }
2570 
2571 int
2572 carp_group_demote_count(struct carp_softc *sc)
2573 {
2574 	struct ifg_list	*ifgl;
2575 	int count = 0;
2576 
2577 	TAILQ_FOREACH(ifgl, &sc->sc_if.if_groups, ifgl_next)
2578 		count += ifgl->ifgl_group->ifg_carp_demoted;
2579 
2580 	return (count > 255 ? 255 : count);
2581 }
2582 
2583 void
2584 carp_carpdev_state(void *v)
2585 {
2586 	struct carp_if *cif;
2587 	struct carp_softc *sc;
2588 	struct ifnet *ifp = v;
2589 
2590 	if (ifp->if_type == IFT_CARP)
2591 		return;
2592 
2593 	cif = (struct carp_if *)ifp->if_carp;
2594 
2595 	TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) {
2596 		int suppressed = sc->sc_suppress;
2597 
2598 		if (sc->sc_carpdev->if_link_state == LINK_STATE_DOWN ||
2599 		    !(sc->sc_carpdev->if_flags & IFF_UP)) {
2600 			sc->sc_if.if_flags &= ~IFF_RUNNING;
2601 			carp_del_all_timeouts(sc);
2602 			carp_set_state_all(sc, INIT);
2603 			sc->sc_suppress = 1;
2604 			carp_setrun_all(sc, 0);
2605 			if (!suppressed)
2606 				carp_group_demote_adj(&sc->sc_if, 1);
2607 		} else {
2608 			carp_set_state_all(sc, INIT);
2609 			sc->sc_suppress = 0;
2610 			carp_setrun_all(sc, 0);
2611 			if (suppressed)
2612 				carp_group_demote_adj(&sc->sc_if, -1);
2613 		}
2614 	}
2615 }
2616 
2617 int
2618 carp_ether_addmulti(struct carp_softc *sc, struct ifreq *ifr)
2619 {
2620 	struct ifnet *ifp;
2621 	struct carp_mc_entry *mc;
2622 	u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN];
2623 	int error;
2624 
2625 	ifp = sc->sc_carpdev;
2626 	if (ifp == NULL)
2627 		return (EINVAL);
2628 
2629 	error = ether_addmulti(ifr, (struct arpcom *)&sc->sc_ac);
2630 	if (error != ENETRESET)
2631 		return (error);
2632 
2633 	/*
2634 	 * This is new multicast address.  We have to tell parent
2635 	 * about it.  Also, remember this multicast address so that
2636 	 * we can delete them on unconfigure.
2637 	 */
2638 	mc = malloc(sizeof(struct carp_mc_entry), M_DEVBUF, M_NOWAIT);
2639 	if (mc == NULL) {
2640 		error = ENOMEM;
2641 		goto alloc_failed;
2642 	}
2643 
2644 	/*
2645 	 * As ether_addmulti() returns ENETRESET, following two
2646 	 * statement shouldn't fail.
2647 	 */
2648 	(void)ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi);
2649 	ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, mc->mc_enm);
2650 	memcpy(&mc->mc_addr, &ifr->ifr_addr, ifr->ifr_addr.sa_len);
2651 	LIST_INSERT_HEAD(&sc->carp_mc_listhead, mc, mc_entries);
2652 
2653 	error = (*ifp->if_ioctl)(ifp, SIOCADDMULTI, (caddr_t)ifr);
2654 	if (error != 0)
2655 		goto ioctl_failed;
2656 
2657 	return (error);
2658 
2659  ioctl_failed:
2660 	LIST_REMOVE(mc, mc_entries);
2661 	free(mc, M_DEVBUF);
2662  alloc_failed:
2663 	(void)ether_delmulti(ifr, (struct arpcom *)&sc->sc_ac);
2664 
2665 	return (error);
2666 }
2667 
2668 int
2669 carp_ether_delmulti(struct carp_softc *sc, struct ifreq *ifr)
2670 {
2671 	struct ifnet *ifp;
2672 	struct ether_multi *enm;
2673 	struct carp_mc_entry *mc;
2674 	u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN];
2675 	int error;
2676 
2677 	ifp = sc->sc_carpdev;
2678 	if (ifp == NULL)
2679 		return (EINVAL);
2680 
2681 	/*
2682 	 * Find a key to lookup carp_mc_entry.  We have to do this
2683 	 * before calling ether_delmulti for obvious reason.
2684 	 */
2685 	if ((error = ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi)) != 0)
2686 		return (error);
2687 	ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, enm);
2688 	if (enm == NULL)
2689 		return (EINVAL);
2690 
2691 	LIST_FOREACH(mc, &sc->carp_mc_listhead, mc_entries)
2692 		if (mc->mc_enm == enm)
2693 			break;
2694 
2695 	/* We won't delete entries we didn't add */
2696 	if (mc == NULL)
2697 		return (EINVAL);
2698 
2699 	error = ether_delmulti(ifr, (struct arpcom *)&sc->sc_ac);
2700 	if (error != ENETRESET)
2701 		return (error);
2702 
2703 	/* We no longer use this multicast address.  Tell parent so. */
2704 	error = (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)ifr);
2705 	if (error == 0) {
2706 		/* And forget about this address. */
2707 		LIST_REMOVE(mc, mc_entries);
2708 		free(mc, M_DEVBUF);
2709 	} else
2710 		(void)ether_addmulti(ifr, (struct arpcom *)&sc->sc_ac);
2711 	return (error);
2712 }
2713 
2714 /*
2715  * Delete any multicast address we have asked to add from parent
2716  * interface.  Called when the carp is being unconfigured.
2717  */
2718 void
2719 carp_ether_purgemulti(struct carp_softc *sc)
2720 {
2721 	struct ifnet *ifp = sc->sc_carpdev;		/* Parent. */
2722 	struct carp_mc_entry *mc;
2723 	union {
2724 		struct ifreq ifreq;
2725 		struct {
2726 			char ifr_name[IFNAMSIZ];
2727 			struct sockaddr_storage ifr_ss;
2728 		} ifreq_storage;
2729 	} u;
2730 	struct ifreq *ifr = &u.ifreq;
2731 
2732 	if (ifp == NULL)
2733 		return;
2734 
2735 	memcpy(ifr->ifr_name, ifp->if_xname, IFNAMSIZ);
2736 	while ((mc = LIST_FIRST(&sc->carp_mc_listhead)) != NULL) {
2737 		memcpy(&ifr->ifr_addr, &mc->mc_addr, mc->mc_addr.ss_len);
2738 		(void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)ifr);
2739 		LIST_REMOVE(mc, mc_entries);
2740 		free(mc, M_DEVBUF);
2741 	}
2742 }
2743