xref: /netbsd-src/sys/netinet/ip_carp.c (revision 8ecbf5f02b752fcb7debe1a8fab1dc82602bc760)
1 /*	$NetBSD: ip_carp.c,v 1.110 2020/02/06 23:30:20 thorpej Exp $	*/
2 /*	$OpenBSD: ip_carp.c,v 1.113 2005/11/04 08:11:54 mcbride Exp $	*/
3 
4 /*
5  * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
6  * Copyright (c) 2003 Ryan McBride. All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
21  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
25  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
26  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
27  * THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 #ifdef _KERNEL_OPT
31 #include "opt_inet.h"
32 #include "opt_mbuftrace.h"
33 #endif
34 
35 #include <sys/cdefs.h>
36 __KERNEL_RCSID(0, "$NetBSD: ip_carp.c,v 1.110 2020/02/06 23:30:20 thorpej Exp $");
37 
38 /*
39  * TODO:
40  *	- iface reconfigure
41  *	- support for hardware checksum calculations;
42  *
43  */
44 
45 #include <sys/param.h>
46 #include <sys/proc.h>
47 #include <sys/mbuf.h>
48 #include <sys/socket.h>
49 #include <sys/socketvar.h>
50 #include <sys/callout.h>
51 #include <sys/ioctl.h>
52 #include <sys/errno.h>
53 #include <sys/device.h>
54 #include <sys/time.h>
55 #include <sys/kernel.h>
56 #include <sys/kauth.h>
57 #include <sys/sysctl.h>
58 #include <sys/ucred.h>
59 #include <sys/syslog.h>
60 #include <sys/acct.h>
61 #include <sys/cprng.h>
62 #include <sys/cpu.h>
63 #include <sys/pserialize.h>
64 #include <sys/psref.h>
65 
66 #include <net/if.h>
67 #include <net/pfil.h>
68 #include <net/if_types.h>
69 #include <net/if_ether.h>
70 #include <net/if_media.h>
71 #include <net/route.h>
72 #include <net/netisr.h>
73 #include <net/net_stats.h>
74 #include <netinet/if_inarp.h>
75 #include <netinet/wqinput.h>
76 
77 #ifdef INET
78 #include <netinet/in.h>
79 #include <netinet/in_systm.h>
80 #include <netinet/in_var.h>
81 #include <netinet/ip.h>
82 #include <netinet/ip_var.h>
83 
84 #include <net/if_dl.h>
85 #endif
86 
87 #ifdef INET6
88 #include <netinet/icmp6.h>
89 #include <netinet/ip6.h>
90 #include <netinet6/ip6_var.h>
91 #include <netinet6/nd6.h>
92 #include <netinet6/scope6_var.h>
93 #include <netinet6/in6_var.h>
94 #endif
95 
96 #include <net/bpf.h>
97 
98 #include <sys/sha1.h>
99 
100 #include <netinet/ip_carp.h>
101 
102 #include "ioconf.h"
103 
104 struct carp_mc_entry {
105 	LIST_ENTRY(carp_mc_entry)	mc_entries;
106 	union {
107 		struct ether_multi	*mcu_enm;
108 	} mc_u;
109 	struct sockaddr_storage		mc_addr;
110 };
111 #define	mc_enm	mc_u.mcu_enm
112 
113 struct carp_softc {
114 	struct ethercom sc_ac;
115 #define	sc_if		sc_ac.ec_if
116 #define	sc_carpdev	sc_ac.ec_if.if_carpdev
117 	int ah_cookie;
118 	int lh_cookie;
119 	struct ifmedia  sc_im;	/* ifmedia for link status */
120 	struct ip_moptions sc_imo;
121 #ifdef INET6
122 	struct ip6_moptions sc_im6o;
123 #endif /* INET6 */
124 	TAILQ_ENTRY(carp_softc) sc_list;
125 
126 	enum { INIT = 0, BACKUP, MASTER }	sc_state;
127 
128 	int sc_suppress;
129 	int sc_bow_out;
130 
131 	int sc_sendad_errors;
132 #define CARP_SENDAD_MAX_ERRORS	3
133 	int sc_sendad_success;
134 #define CARP_SENDAD_MIN_SUCCESS 3
135 
136 	int sc_vhid;
137 	int sc_advskew;
138 	int sc_naddrs;
139 	int sc_naddrs6;
140 	int sc_advbase;		/* seconds */
141 	int sc_init_counter;
142 	u_int64_t sc_counter;
143 
144 	/* authentication */
145 #define CARP_HMAC_PAD	64
146 	unsigned char sc_key[CARP_KEY_LEN];
147 	unsigned char sc_pad[CARP_HMAC_PAD];
148 	SHA1_CTX sc_sha1;
149 	u_int32_t sc_hashkey[2];
150 
151 	struct callout sc_ad_tmo;	/* advertisement timeout */
152 	struct callout sc_md_tmo;	/* master down timeout */
153 	struct callout sc_md6_tmo;	/* master down timeout */
154 
155 	LIST_HEAD(__carp_mchead, carp_mc_entry)	carp_mc_listhead;
156 };
157 
158 int carp_suppress_preempt = 0;
159 static int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 0, 0 };	/* XXX for now */
160 
161 static percpu_t *carpstat_percpu;
162 
163 #define	CARP_STATINC(x)		_NET_STATINC(carpstat_percpu, x)
164 
165 #ifdef MBUFTRACE
166 static struct mowner carp_proto_mowner_rx = MOWNER_INIT("carp", "rx");
167 static struct mowner carp_proto_mowner_tx = MOWNER_INIT("carp", "tx");
168 static struct mowner carp_proto6_mowner_rx = MOWNER_INIT("carp6", "rx");
169 static struct mowner carp_proto6_mowner_tx = MOWNER_INIT("carp6", "tx");
170 #endif
171 
172 struct carp_if {
173 	TAILQ_HEAD(, carp_softc) vhif_vrs;
174 	int vhif_nvrs;
175 
176 	struct ifnet *vhif_ifp;
177 };
178 
179 #define	CARP_LOG(sc, s)							\
180 	if (carp_opts[CARPCTL_LOG]) {					\
181 		if (sc)							\
182 			log(LOG_INFO, "%s: ",				\
183 			    (sc)->sc_if.if_xname);			\
184 		else							\
185 			log(LOG_INFO, "carp: ");			\
186 		addlog s;						\
187 		addlog("\n");						\
188 	}
189 
190 static void	carp_hmac_prepare(struct carp_softc *);
191 static void	carp_hmac_generate(struct carp_softc *, u_int32_t *,
192 		    unsigned char *);
193 static int	carp_hmac_verify(struct carp_softc *, u_int32_t *,
194 		    unsigned char *);
195 static void	carp_setroute(struct carp_softc *, int);
196 static void	carp_proto_input_c(struct mbuf *, struct carp_header *,
197 		    sa_family_t);
198 static void	carpdetach(struct carp_softc *);
199 static void	carp_prepare_ad(struct mbuf *, struct carp_softc *,
200 		    struct carp_header *);
201 static void	carp_send_ad_all(void);
202 static void	carp_send_ad(void *);
203 static void	carp_send_arp(struct carp_softc *);
204 static void	carp_master_down(void *);
205 static int	carp_ioctl(struct ifnet *, u_long, void *);
206 static void	carp_start(struct ifnet *);
207 static void	carp_setrun(struct carp_softc *, sa_family_t);
208 static void	carp_set_state(struct carp_softc *, int);
209 static int	carp_addrcount(struct carp_if *, struct in_ifaddr *, int);
210 enum	{ CARP_COUNT_MASTER, CARP_COUNT_RUNNING };
211 
212 static void	carp_multicast_cleanup(struct carp_softc *);
213 static int	carp_set_ifp(struct carp_softc *, struct ifnet *);
214 static void	carp_set_enaddr(struct carp_softc *);
215 #if 0
216 static void	carp_addr_updated(void *);
217 #endif
218 static u_int32_t	carp_hash(struct carp_softc *, u_char *);
219 static int	carp_set_addr(struct carp_softc *, struct sockaddr_in *);
220 static int	carp_join_multicast(struct carp_softc *);
221 #ifdef INET6
222 static void	carp_send_na(struct carp_softc *);
223 static int	carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *);
224 static int	carp_join_multicast6(struct carp_softc *);
225 #endif
226 static int	carp_clone_create(struct if_clone *, int);
227 static int	carp_clone_destroy(struct ifnet *);
228 static int	carp_ether_addmulti(struct carp_softc *, struct ifreq *);
229 static int	carp_ether_delmulti(struct carp_softc *, struct ifreq *);
230 static void	carp_ether_purgemulti(struct carp_softc *);
231 static int      carp_mediachange(struct ifnet *ifp);
232 static void	carp_mediastatus(struct ifnet *ifp, struct ifmediareq *imr);
233 static void	carp_update_link_state(struct carp_softc *sc);
234 
235 static void	sysctl_net_inet_carp_setup(struct sysctllog **);
236 
237 /* workqueue-based pr_input */
238 static struct wqinput *carp_wqinput;
239 static void _carp_proto_input(struct mbuf *, int, int);
240 #ifdef INET6
241 static struct wqinput *carp6_wqinput;
242 static void _carp6_proto_input(struct mbuf *, int, int);
243 #endif
244 
245 struct if_clone carp_cloner =
246     IF_CLONE_INITIALIZER("carp", carp_clone_create, carp_clone_destroy);
247 
248 static __inline u_int16_t
249 carp_cksum(struct mbuf *m, int len)
250 {
251 	return (in_cksum(m, len));
252 }
253 
254 #ifdef INET6
255 static __inline u_int16_t
256 carp6_cksum(struct mbuf *m, uint32_t off, uint32_t len)
257 {
258 	return (in6_cksum(m, IPPROTO_CARP, off, len));
259 }
260 #endif
261 
262 static void
263 carp_hmac_prepare(struct carp_softc *sc)
264 {
265 	u_int8_t carp_version = CARP_VERSION, type = CARP_ADVERTISEMENT;
266 	u_int8_t vhid = sc->sc_vhid & 0xff;
267 	SHA1_CTX sha1ctx;
268 	u_int32_t kmd[5];
269 	struct ifaddr *ifa;
270 	int i, found;
271 	struct in_addr last, cur, in;
272 #ifdef INET6
273 	struct in6_addr last6, cur6, in6;
274 #endif /* INET6 */
275 
276 	/* compute ipad from key */
277 	memset(sc->sc_pad, 0, sizeof(sc->sc_pad));
278 	memcpy(sc->sc_pad, sc->sc_key, sizeof(sc->sc_key));
279 	for (i = 0; i < sizeof(sc->sc_pad); i++)
280 		sc->sc_pad[i] ^= 0x36;
281 
282 	/* precompute first part of inner hash */
283 	SHA1Init(&sc->sc_sha1);
284 	SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad));
285 	SHA1Update(&sc->sc_sha1, (void *)&carp_version, sizeof(carp_version));
286 	SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type));
287 
288 	/* generate a key for the arpbalance hash, before the vhid is hashed */
289 	memcpy(&sha1ctx, &sc->sc_sha1, sizeof(sha1ctx));
290 	SHA1Final((unsigned char *)kmd, &sha1ctx);
291 	sc->sc_hashkey[0] = kmd[0] ^ kmd[1];
292 	sc->sc_hashkey[1] = kmd[2] ^ kmd[3];
293 
294 	/* the rest of the precomputation */
295 	SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid));
296 
297 	/* Hash the addresses from smallest to largest, not interface order */
298 #ifdef INET
299 	cur.s_addr = 0;
300 	do {
301 		int s;
302 		found = 0;
303 		last = cur;
304 		cur.s_addr = 0xffffffff;
305 		s = pserialize_read_enter();
306 		IFADDR_READER_FOREACH(ifa, &sc->sc_if) {
307 			in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
308 			if (ifa->ifa_addr->sa_family == AF_INET &&
309 			    ntohl(in.s_addr) > ntohl(last.s_addr) &&
310 			    ntohl(in.s_addr) < ntohl(cur.s_addr)) {
311 				cur.s_addr = in.s_addr;
312 				found++;
313 			}
314 		}
315 		pserialize_read_exit(s);
316 		if (found)
317 			SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur));
318 	} while (found);
319 #endif /* INET */
320 
321 #ifdef INET6
322 	memset(&cur6, 0x00, sizeof(cur6));
323 	do {
324 		int s;
325 		found = 0;
326 		last6 = cur6;
327 		memset(&cur6, 0xff, sizeof(cur6));
328 		s = pserialize_read_enter();
329 		IFADDR_READER_FOREACH(ifa, &sc->sc_if) {
330 			in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
331 			if (IN6_IS_ADDR_LINKLOCAL(&in6))
332 				in6.s6_addr16[1] = 0;
333 			if (ifa->ifa_addr->sa_family == AF_INET6 &&
334 			    memcmp(&in6, &last6, sizeof(in6)) > 0 &&
335 			    memcmp(&in6, &cur6, sizeof(in6)) < 0) {
336 				cur6 = in6;
337 				found++;
338 			}
339 		}
340 		pserialize_read_exit(s);
341 		if (found)
342 			SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6));
343 	} while (found);
344 #endif /* INET6 */
345 
346 	/* convert ipad to opad */
347 	for (i = 0; i < sizeof(sc->sc_pad); i++)
348 		sc->sc_pad[i] ^= 0x36 ^ 0x5c;
349 }
350 
351 static void
352 carp_hmac_generate(struct carp_softc *sc, u_int32_t counter[2],
353     unsigned char md[20])
354 {
355 	SHA1_CTX sha1ctx;
356 
357 	/* fetch first half of inner hash */
358 	memcpy(&sha1ctx, &sc->sc_sha1, sizeof(sha1ctx));
359 
360 	SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter));
361 	SHA1Final(md, &sha1ctx);
362 
363 	/* outer hash */
364 	SHA1Init(&sha1ctx);
365 	SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad));
366 	SHA1Update(&sha1ctx, md, 20);
367 	SHA1Final(md, &sha1ctx);
368 }
369 
370 static int
371 carp_hmac_verify(struct carp_softc *sc, u_int32_t counter[2],
372     unsigned char md[20])
373 {
374 	unsigned char md2[20];
375 
376 	carp_hmac_generate(sc, counter, md2);
377 
378 	return (memcmp(md, md2, sizeof(md2)));
379 }
380 
381 static void
382 carp_setroute(struct carp_softc *sc, int cmd)
383 {
384 	struct ifaddr *ifa;
385 	int s, bound;
386 
387 	KERNEL_LOCK(1, NULL);
388 	bound = curlwp_bind();
389 	s = pserialize_read_enter();
390 	IFADDR_READER_FOREACH(ifa, &sc->sc_if) {
391 		struct psref psref;
392 		ifa_acquire(ifa, &psref);
393 		pserialize_read_exit(s);
394 
395 		switch (ifa->ifa_addr->sa_family) {
396 		case AF_INET: {
397 			int count = 0;
398 			struct rtentry *rt;
399 			int hr_otherif, nr_ourif;
400 
401 			/*
402 			 * Avoid screwing with the routes if there are other
403 			 * carp interfaces which are master and have the same
404 			 * address.
405 			 */
406 			if (sc->sc_carpdev != NULL &&
407 			    sc->sc_carpdev->if_carp != NULL) {
408 				count = carp_addrcount(
409 				    (struct carp_if *)sc->sc_carpdev->if_carp,
410 				    ifatoia(ifa), CARP_COUNT_MASTER);
411 				if ((cmd == RTM_ADD && count != 1) ||
412 				    (cmd == RTM_DELETE && count != 0))
413 					goto next;
414 			}
415 
416 			/* Remove the existing host route, if any */
417 			rtrequest(RTM_DELETE, ifa->ifa_addr,
418 			    ifa->ifa_addr, ifa->ifa_netmask,
419 			    RTF_HOST, NULL);
420 
421 			rt = NULL;
422 			(void)rtrequest(RTM_GET, ifa->ifa_addr, ifa->ifa_addr,
423 			    ifa->ifa_netmask, RTF_HOST, &rt);
424 			hr_otherif = (rt && rt->rt_ifp != &sc->sc_if &&
425 			    (rt->rt_flags & RTF_CONNECTED));
426 			if (rt != NULL) {
427 				rt_unref(rt);
428 				rt = NULL;
429 			}
430 
431 			/* Check for a network route on our interface */
432 
433 			rt = NULL;
434 			(void)rtrequest(RTM_GET, ifa->ifa_addr, ifa->ifa_addr,
435 			    ifa->ifa_netmask, 0, &rt);
436 			nr_ourif = (rt && rt->rt_ifp == &sc->sc_if);
437 
438 			switch (cmd) {
439 			case RTM_ADD:
440 				if (hr_otherif) {
441 					ifa->ifa_rtrequest = NULL;
442 					ifa->ifa_flags &= ~RTF_CONNECTED;
443 
444 					rtrequest(RTM_ADD, ifa->ifa_addr,
445 					    ifa->ifa_addr, ifa->ifa_netmask,
446 					    RTF_UP | RTF_HOST, NULL);
447 				}
448 				if (!hr_otherif || nr_ourif || !rt) {
449 					if (nr_ourif &&
450 					    (rt->rt_flags & RTF_CONNECTED) == 0)
451 						rtrequest(RTM_DELETE,
452 						    ifa->ifa_addr,
453 						    ifa->ifa_addr,
454 						    ifa->ifa_netmask, 0, NULL);
455 
456 					ifa->ifa_rtrequest = arp_rtrequest;
457 					ifa->ifa_flags |= RTF_CONNECTED;
458 
459 					if (rtrequest(RTM_ADD, ifa->ifa_addr,
460 					    ifa->ifa_addr, ifa->ifa_netmask, 0,
461 					    NULL) == 0)
462 						ifa->ifa_flags |= IFA_ROUTE;
463 				}
464 				break;
465 			case RTM_DELETE:
466 				break;
467 			default:
468 				break;
469 			}
470 			if (rt != NULL) {
471 				rt_unref(rt);
472 				rt = NULL;
473 			}
474 			break;
475 		}
476 
477 #ifdef INET6
478 		case AF_INET6:
479 			if (cmd == RTM_ADD)
480 				in6_ifaddlocal(ifa);
481 			else
482 				in6_ifremlocal(ifa);
483 			break;
484 #endif /* INET6 */
485 		default:
486 			break;
487 		}
488 	next:
489 		s = pserialize_read_enter();
490 		ifa_release(ifa, &psref);
491 	}
492 	pserialize_read_exit(s);
493 	curlwp_bindx(bound);
494 	KERNEL_UNLOCK_ONE(NULL);
495 }
496 
497 /*
498  * process input packet.
499  * we have rearranged checks order compared to the rfc,
500  * but it seems more efficient this way or not possible otherwise.
501  */
502 static void
503 _carp_proto_input(struct mbuf *m, int hlen, int proto)
504 {
505 	struct ip *ip = mtod(m, struct ip *);
506 	struct carp_softc *sc = NULL;
507 	struct carp_header *ch;
508 	int iplen, len;
509 	struct ifnet *rcvif;
510 
511 	CARP_STATINC(CARP_STAT_IPACKETS);
512 	MCLAIM(m, &carp_proto_mowner_rx);
513 
514 	if (!carp_opts[CARPCTL_ALLOW]) {
515 		m_freem(m);
516 		return;
517 	}
518 
519 	rcvif = m_get_rcvif_NOMPSAFE(m);
520 	/* check if received on a valid carp interface */
521 	if (rcvif->if_type != IFT_CARP) {
522 		CARP_STATINC(CARP_STAT_BADIF);
523 		CARP_LOG(sc, ("packet received on non-carp interface: %s",
524 		    rcvif->if_xname));
525 		m_freem(m);
526 		return;
527 	}
528 
529 	/* verify that the IP TTL is 255.  */
530 	if (ip->ip_ttl != CARP_DFLTTL) {
531 		CARP_STATINC(CARP_STAT_BADTTL);
532 		CARP_LOG(sc, ("received ttl %d != %d on %s", ip->ip_ttl,
533 		    CARP_DFLTTL, rcvif->if_xname));
534 		m_freem(m);
535 		return;
536 	}
537 
538 	/*
539 	 * verify that the received packet length is
540 	 * equal to the CARP header
541 	 */
542 	iplen = ip->ip_hl << 2;
543 	len = iplen + sizeof(*ch);
544 	if (len > m->m_pkthdr.len) {
545 		CARP_STATINC(CARP_STAT_BADLEN);
546 		CARP_LOG(sc, ("packet too short %d on %s", m->m_pkthdr.len,
547 		    rcvif->if_xname));
548 		m_freem(m);
549 		return;
550 	}
551 
552 	if ((m = m_pullup(m, len)) == NULL) {
553 		CARP_STATINC(CARP_STAT_HDROPS);
554 		return;
555 	}
556 	ip = mtod(m, struct ip *);
557 	ch = (struct carp_header *)((char *)ip + iplen);
558 	/* verify the CARP checksum */
559 	m->m_data += iplen;
560 	if (carp_cksum(m, len - iplen)) {
561 		CARP_STATINC(CARP_STAT_BADSUM);
562 		CARP_LOG(sc, ("checksum failed on %s",
563 		    rcvif->if_xname));
564 		m_freem(m);
565 		return;
566 	}
567 	m->m_data -= iplen;
568 
569 	carp_proto_input_c(m, ch, AF_INET);
570 }
571 
572 void
573 carp_proto_input(struct mbuf *m, int off, int proto)
574 {
575 
576 	wqinput_input(carp_wqinput, m, 0, 0);
577 }
578 
579 #ifdef INET6
580 static void
581 _carp6_proto_input(struct mbuf *m, int off, int proto)
582 {
583 	struct carp_softc *sc = NULL;
584 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
585 	struct carp_header *ch;
586 	u_int len;
587 	struct ifnet *rcvif;
588 
589 	CARP_STATINC(CARP_STAT_IPACKETS6);
590 	MCLAIM(m, &carp_proto6_mowner_rx);
591 
592 	if (!carp_opts[CARPCTL_ALLOW]) {
593 		m_freem(m);
594 		return;
595 	}
596 
597 	rcvif = m_get_rcvif_NOMPSAFE(m);
598 
599 	/* check if received on a valid carp interface */
600 	if (rcvif->if_type != IFT_CARP) {
601 		CARP_STATINC(CARP_STAT_BADIF);
602 		CARP_LOG(sc, ("packet received on non-carp interface: %s",
603 		    rcvif->if_xname));
604 		m_freem(m);
605 		return;
606 	}
607 
608 	/* verify that the IP TTL is 255 */
609 	if (ip6->ip6_hlim != CARP_DFLTTL) {
610 		CARP_STATINC(CARP_STAT_BADTTL);
611 		CARP_LOG(sc, ("received ttl %d != %d on %s", ip6->ip6_hlim,
612 		    CARP_DFLTTL, rcvif->if_xname));
613 		m_freem(m);
614 		return;
615 	}
616 
617 	/* verify that we have a complete carp packet */
618 	len = m->m_len;
619 	M_REGION_GET(ch, struct carp_header *, m, off, sizeof(*ch));
620 	if (ch == NULL) {
621 		CARP_STATINC(CARP_STAT_BADLEN);
622 		CARP_LOG(sc, ("packet size %u too small", len));
623 		return;
624 	}
625 
626 	/* verify the CARP checksum */
627 	if (carp6_cksum(m, off, sizeof(*ch))) {
628 		CARP_STATINC(CARP_STAT_BADSUM);
629 		CARP_LOG(sc, ("checksum failed, on %s", rcvif->if_xname));
630 		m_freem(m);
631 		return;
632 	}
633 
634 	carp_proto_input_c(m, ch, AF_INET6);
635 	return;
636 }
637 
638 int
639 carp6_proto_input(struct mbuf **mp, int *offp, int proto)
640 {
641 
642 	wqinput_input(carp6_wqinput, *mp, *offp, proto);
643 
644 	return IPPROTO_DONE;
645 }
646 #endif /* INET6 */
647 
648 static void
649 carp_proto_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af)
650 {
651 	struct carp_softc *sc;
652 	u_int64_t tmp_counter;
653 	struct timeval sc_tv, ch_tv;
654 
655 	TAILQ_FOREACH(sc, &((struct carp_if *)
656 	    m_get_rcvif_NOMPSAFE(m)->if_carpdev->if_carp)->vhif_vrs, sc_list)
657 		if (sc->sc_vhid == ch->carp_vhid)
658 			break;
659 
660 	if (!sc || (sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) !=
661 	    (IFF_UP|IFF_RUNNING)) {
662 		CARP_STATINC(CARP_STAT_BADVHID);
663 		m_freem(m);
664 		return;
665 	}
666 
667 	/*
668 	 * Check if our own advertisement was duplicated
669 	 * from a non simplex interface.
670 	 * XXX If there is no address on our physical interface
671 	 * there is no way to distinguish our ads from the ones
672 	 * another carp host might have sent us.
673 	 */
674 	if ((sc->sc_carpdev->if_flags & IFF_SIMPLEX) == 0) {
675 		struct sockaddr sa;
676 		struct ifaddr *ifa;
677 		int s;
678 
679 		memset(&sa, 0, sizeof(sa));
680 		sa.sa_family = af;
681 		s = pserialize_read_enter();
682 		ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
683 
684 		if (ifa && af == AF_INET) {
685 			struct ip *ip = mtod(m, struct ip *);
686 			if (ip->ip_src.s_addr ==
687 					ifatoia(ifa)->ia_addr.sin_addr.s_addr) {
688 				pserialize_read_exit(s);
689 				m_freem(m);
690 				return;
691 			}
692 		}
693 #ifdef INET6
694 		if (ifa && af == AF_INET6) {
695 			struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
696 			struct in6_addr in6_src, in6_found;
697 
698 			in6_src = ip6->ip6_src;
699 			in6_found = ifatoia6(ifa)->ia_addr.sin6_addr;
700 			if (IN6_IS_ADDR_LINKLOCAL(&in6_src))
701 				in6_src.s6_addr16[1] = 0;
702 			if (IN6_IS_ADDR_LINKLOCAL(&in6_found))
703 				in6_found.s6_addr16[1] = 0;
704 			if (IN6_ARE_ADDR_EQUAL(&in6_src, &in6_found)) {
705 				pserialize_read_exit(s);
706 				m_freem(m);
707 				return;
708 			}
709 		}
710 #endif /* INET6 */
711 		pserialize_read_exit(s);
712 	}
713 
714 	nanotime(&sc->sc_if.if_lastchange);
715 	if_statadd2(&sc->sc_if, if_ipackets, 1, if_ibytes, m->m_pkthdr.len);
716 
717 	/* verify the CARP version. */
718 	if (ch->carp_version != CARP_VERSION) {
719 		CARP_STATINC(CARP_STAT_BADVER);
720 		if_statinc(&sc->sc_if, if_ierrors);
721 		CARP_LOG(sc, ("invalid version %d != %d",
722 		    ch->carp_version, CARP_VERSION));
723 		m_freem(m);
724 		return;
725 	}
726 
727 	/* verify the hash */
728 	if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) {
729 		struct ip *ip;
730 		char ipbuf[INET_ADDRSTRLEN];
731 #ifdef INET6
732 		struct ip6_hdr *ip6;
733 		char ip6buf[INET6_ADDRSTRLEN];
734 #endif
735 
736 		CARP_STATINC(CARP_STAT_BADAUTH);
737 		if_statinc(&sc->sc_if, if_ierrors);
738 
739 		switch(af) {
740 		case AF_INET:
741 			ip = mtod(m, struct ip *);
742 			CARP_LOG(sc, ("incorrect hash from %s",
743 			    IN_PRINT(ipbuf, &ip->ip_src)));
744 			break;
745 
746 #ifdef INET6
747 		case AF_INET6:
748 			ip6 = mtod(m, struct ip6_hdr *);
749 			CARP_LOG(sc, ("incorrect hash from %s",
750 			    IN6_PRINT(ip6buf, &ip6->ip6_src)));
751 			break;
752 #endif
753 
754 		default: CARP_LOG(sc, ("incorrect hash"));
755 			break;
756 		}
757 		m_freem(m);
758 		return;
759 	}
760 
761 	tmp_counter = ntohl(ch->carp_counter[0]);
762 	tmp_counter = tmp_counter<<32;
763 	tmp_counter += ntohl(ch->carp_counter[1]);
764 
765 	/* XXX Replay protection goes here */
766 
767 	sc->sc_init_counter = 0;
768 	sc->sc_counter = tmp_counter;
769 
770 
771 	sc_tv.tv_sec = sc->sc_advbase;
772 	if (carp_suppress_preempt && sc->sc_advskew <  240)
773 		sc_tv.tv_usec = 240 * 1000000 / 256;
774 	else
775 		sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256;
776 	ch_tv.tv_sec = ch->carp_advbase;
777 	ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
778 
779 	switch (sc->sc_state) {
780 	case INIT:
781 		break;
782 	case MASTER:
783 		/*
784 		 * If we receive an advertisement from a backup who's going to
785 		 * be more frequent than us, go into BACKUP state.
786 		 */
787 		if (timercmp(&sc_tv, &ch_tv, >) ||
788 		    timercmp(&sc_tv, &ch_tv, ==)) {
789 			callout_stop(&sc->sc_ad_tmo);
790 			CARP_LOG(sc, ("MASTER -> BACKUP (more frequent advertisement received)"));
791 			carp_set_state(sc, BACKUP);
792 			carp_setrun(sc, 0);
793 			carp_setroute(sc, RTM_DELETE);
794 		}
795 		break;
796 	case BACKUP:
797 		/*
798 		 * If we're pre-empting masters who advertise slower than us,
799 		 * and this one claims to be slower, treat him as down.
800 		 */
801 		if (carp_opts[CARPCTL_PREEMPT] && timercmp(&sc_tv, &ch_tv, <)) {
802 			CARP_LOG(sc, ("BACKUP -> MASTER (preempting a slower master)"));
803 			carp_master_down(sc);
804 			break;
805 		}
806 
807 		/*
808 		 *  If the master is going to advertise at such a low frequency
809 		 *  that he's guaranteed to time out, we'd might as well just
810 		 *  treat him as timed out now.
811 		 */
812 		sc_tv.tv_sec = sc->sc_advbase * 3;
813 		if (timercmp(&sc_tv, &ch_tv, <)) {
814 			CARP_LOG(sc, ("BACKUP -> MASTER (master timed out)"));
815 			carp_master_down(sc);
816 			break;
817 		}
818 
819 		/*
820 		 * Otherwise, we reset the counter and wait for the next
821 		 * advertisement.
822 		 */
823 		carp_setrun(sc, af);
824 		break;
825 	}
826 
827 	m_freem(m);
828 	return;
829 }
830 
831 /*
832  * Interface side of the CARP implementation.
833  */
834 
835 /* ARGSUSED */
836 void
837 carpattach(int n)
838 {
839 	if_clone_attach(&carp_cloner);
840 
841 	carpstat_percpu = percpu_alloc(sizeof(uint64_t) * CARP_NSTATS);
842 }
843 
844 static int
845 carp_clone_create(struct if_clone *ifc, int unit)
846 {
847 	extern int ifqmaxlen;
848 	struct carp_softc *sc;
849 	struct ifnet *ifp;
850 	int rv;
851 
852 	sc = malloc(sizeof(*sc), M_DEVBUF, M_NOWAIT|M_ZERO);
853 	if (!sc)
854 		return (ENOMEM);
855 
856 	sc->sc_suppress = 0;
857 	sc->sc_advbase = CARP_DFLTINTV;
858 	sc->sc_vhid = -1;	/* required setting */
859 	sc->sc_advskew = 0;
860 	sc->sc_init_counter = 1;
861 	sc->sc_naddrs = sc->sc_naddrs6 = 0;
862 #ifdef INET6
863 	sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL;
864 #endif /* INET6 */
865 
866 	callout_init(&sc->sc_ad_tmo, 0);
867 	callout_init(&sc->sc_md_tmo, 0);
868 	callout_init(&sc->sc_md6_tmo, 0);
869 
870 	callout_setfunc(&sc->sc_ad_tmo, carp_send_ad, sc);
871 	callout_setfunc(&sc->sc_md_tmo, carp_master_down, sc);
872 	callout_setfunc(&sc->sc_md6_tmo, carp_master_down, sc);
873 
874 	LIST_INIT(&sc->carp_mc_listhead);
875 	ifp = &sc->sc_if;
876 	ifp->if_softc = sc;
877 	snprintf(ifp->if_xname, sizeof ifp->if_xname, "%s%d", ifc->ifc_name,
878 	    unit);
879 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
880 	ifp->if_ioctl = carp_ioctl;
881 	ifp->if_start = carp_start;
882 	IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
883 	IFQ_SET_READY(&ifp->if_snd);
884 	rv = if_initialize(ifp);
885 	if (rv != 0) {
886 		callout_destroy(&sc->sc_ad_tmo);
887 		callout_destroy(&sc->sc_md_tmo);
888 		callout_destroy(&sc->sc_md6_tmo);
889 		free(ifp->if_softc, M_DEVBUF);
890 
891 		return rv;
892 	}
893 	ifmedia_init(&sc->sc_im, 0, carp_mediachange, carp_mediastatus);
894 	sc->sc_im.ifm_media = IFM_CARP;
895 	ether_ifattach(ifp, NULL);
896 	carp_set_enaddr(sc);
897 	/* Overwrite ethernet defaults */
898 	ifp->if_type = IFT_CARP;
899 	ifp->if_output = carp_output;
900 	if_register(ifp);
901 
902 	return (0);
903 }
904 
905 static int
906 carp_clone_destroy(struct ifnet *ifp)
907 {
908 	struct carp_softc *sc = ifp->if_softc;
909 
910 	carpdetach(ifp->if_softc);
911 	ether_ifdetach(ifp);
912 	if_detach(ifp);
913 	ifmedia_fini(&sc->sc_im);
914 	callout_destroy(&sc->sc_ad_tmo);
915 	callout_destroy(&sc->sc_md_tmo);
916 	callout_destroy(&sc->sc_md6_tmo);
917 	free(ifp->if_softc, M_DEVBUF);
918 
919 	return (0);
920 }
921 
922 static void
923 carpdetach(struct carp_softc *sc)
924 {
925 	struct carp_if *cif;
926 	int s;
927 
928 	callout_stop(&sc->sc_ad_tmo);
929 	callout_stop(&sc->sc_md_tmo);
930 	callout_stop(&sc->sc_md6_tmo);
931 
932 	if (sc->sc_suppress)
933 		carp_suppress_preempt--;
934 	sc->sc_suppress = 0;
935 
936 	if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS)
937 		carp_suppress_preempt--;
938 	sc->sc_sendad_errors = 0;
939 
940 	carp_set_state(sc, INIT);
941 	sc->sc_if.if_flags &= ~IFF_UP;
942 	carp_setrun(sc, 0);
943 	carp_multicast_cleanup(sc);
944 
945 	KERNEL_LOCK(1, NULL);
946 	s = splnet();
947 	if (sc->sc_carpdev != NULL) {
948 		/* XXX linkstatehook removal */
949 		cif = (struct carp_if *)sc->sc_carpdev->if_carp;
950 		TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
951 		if (!--cif->vhif_nvrs) {
952 			ifpromisc(sc->sc_carpdev, 0);
953 			sc->sc_carpdev->if_carp = NULL;
954 			free(cif, M_IFADDR);
955 		}
956 	}
957 	sc->sc_carpdev = NULL;
958 	splx(s);
959 	KERNEL_UNLOCK_ONE(NULL);
960 }
961 
962 /* Detach an interface from the carp. */
963 void
964 carp_ifdetach(struct ifnet *ifp)
965 {
966 	struct carp_softc *sc, *nextsc;
967 	struct carp_if *cif = (struct carp_if *)ifp->if_carp;
968 
969 	for (sc = TAILQ_FIRST(&cif->vhif_vrs); sc; sc = nextsc) {
970 		nextsc = TAILQ_NEXT(sc, sc_list);
971 		carpdetach(sc);
972 	}
973 }
974 
975 static void
976 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc,
977     struct carp_header *ch)
978 {
979 	if (sc->sc_init_counter) {
980 		/* this could also be seconds since unix epoch */
981 		sc->sc_counter = cprng_fast64();
982 	} else
983 		sc->sc_counter++;
984 
985 	ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff);
986 	ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff);
987 
988 	carp_hmac_generate(sc, ch->carp_counter, ch->carp_md);
989 }
990 
991 static void
992 carp_send_ad_all(void)
993 {
994 	struct ifnet *ifp;
995 	struct carp_if *cif;
996 	struct carp_softc *vh;
997 	int s;
998 	int bound = curlwp_bind();
999 
1000 	s = pserialize_read_enter();
1001 	IFNET_READER_FOREACH(ifp) {
1002 		struct psref psref;
1003 		if (ifp->if_carp == NULL || ifp->if_type == IFT_CARP)
1004 			continue;
1005 
1006 		if_acquire(ifp, &psref);
1007 		pserialize_read_exit(s);
1008 
1009 		cif = (struct carp_if *)ifp->if_carp;
1010 		TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1011 			if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
1012 			    (IFF_UP|IFF_RUNNING) && vh->sc_state == MASTER)
1013 				carp_send_ad(vh);
1014 		}
1015 
1016 		s = pserialize_read_enter();
1017 		if_release(ifp, &psref);
1018 	}
1019 	pserialize_read_exit(s);
1020 	curlwp_bindx(bound);
1021 }
1022 
1023 
1024 static void
1025 carp_send_ad(void *v)
1026 {
1027 	struct carp_header ch;
1028 	struct timeval tv;
1029 	struct carp_softc *sc = v;
1030 	struct carp_header *ch_ptr;
1031 	struct mbuf *m;
1032 	int error, len, advbase, advskew, s;
1033 	struct sockaddr sa;
1034 
1035 	KERNEL_LOCK(1, NULL);
1036 	s = splsoftnet();
1037 
1038 	advbase = advskew = 0; /* Sssssh compiler */
1039 	if (sc->sc_carpdev == NULL) {
1040 		if_statinc(&sc->sc_if, if_oerrors);
1041 		goto retry_later;
1042 	}
1043 
1044 	/* bow out if we've gone to backup (the carp interface is going down) */
1045 	if (sc->sc_bow_out) {
1046 		sc->sc_bow_out = 0;
1047 		advbase = 255;
1048 		advskew = 255;
1049 	} else {
1050 		advbase = sc->sc_advbase;
1051 		if (!carp_suppress_preempt || sc->sc_advskew > 240)
1052 			advskew = sc->sc_advskew;
1053 		else
1054 			advskew = 240;
1055 		tv.tv_sec = advbase;
1056 		tv.tv_usec = advskew * 1000000 / 256;
1057 	}
1058 
1059 	ch.carp_version = CARP_VERSION;
1060 	ch.carp_type = CARP_ADVERTISEMENT;
1061 	ch.carp_vhid = sc->sc_vhid;
1062 	ch.carp_advbase = advbase;
1063 	ch.carp_advskew = advskew;
1064 	ch.carp_authlen = 7;	/* XXX DEFINE */
1065 	ch.carp_pad1 = 0;	/* must be zero */
1066 	ch.carp_cksum = 0;
1067 
1068 
1069 #ifdef INET
1070 	if (sc->sc_naddrs) {
1071 		struct ip *ip;
1072 		struct ifaddr *ifa;
1073 		int _s;
1074 
1075 		MGETHDR(m, M_DONTWAIT, MT_HEADER);
1076 		if (m == NULL) {
1077 			if_statinc(&sc->sc_if, if_oerrors);
1078 			CARP_STATINC(CARP_STAT_ONOMEM);
1079 			/* XXX maybe less ? */
1080 			goto retry_later;
1081 		}
1082 		MCLAIM(m, &carp_proto_mowner_tx);
1083 		len = sizeof(*ip) + sizeof(ch);
1084 		m->m_pkthdr.len = len;
1085 		m_reset_rcvif(m);
1086 		m->m_len = len;
1087 		m_align(m, m->m_len);
1088 		m->m_flags |= M_MCAST;
1089 		ip = mtod(m, struct ip *);
1090 		ip->ip_v = IPVERSION;
1091 		ip->ip_hl = sizeof(*ip) >> 2;
1092 		ip->ip_tos = IPTOS_LOWDELAY;
1093 		ip->ip_len = htons(len);
1094 		ip->ip_id = 0;	/* no need for id, we don't support fragments */
1095 		ip->ip_off = htons(IP_DF);
1096 		ip->ip_ttl = CARP_DFLTTL;
1097 		ip->ip_p = IPPROTO_CARP;
1098 		ip->ip_sum = 0;
1099 
1100 		memset(&sa, 0, sizeof(sa));
1101 		sa.sa_family = AF_INET;
1102 		_s = pserialize_read_enter();
1103 		ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
1104 		if (ifa == NULL)
1105 			ip->ip_src.s_addr = 0;
1106 		else
1107 			ip->ip_src.s_addr =
1108 			    ifatoia(ifa)->ia_addr.sin_addr.s_addr;
1109 		pserialize_read_exit(_s);
1110 		ip->ip_dst.s_addr = INADDR_CARP_GROUP;
1111 
1112 		ch_ptr = (struct carp_header *)(&ip[1]);
1113 		memcpy(ch_ptr, &ch, sizeof(ch));
1114 		carp_prepare_ad(m, sc, ch_ptr);
1115 
1116 		m->m_data += sizeof(*ip);
1117 		ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip));
1118 		m->m_data -= sizeof(*ip);
1119 
1120 		nanotime(&sc->sc_if.if_lastchange);
1121 		if_statadd2(&sc->sc_if, if_opackets, 1, if_obytes, len);
1122 		CARP_STATINC(CARP_STAT_OPACKETS);
1123 
1124 		error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo,
1125 		    NULL);
1126 		if (error) {
1127 			if (error == ENOBUFS)
1128 				CARP_STATINC(CARP_STAT_ONOMEM);
1129 			else
1130 				CARP_LOG(sc, ("ip_output failed: %d", error));
1131 			if_statinc(&sc->sc_if, if_oerrors);
1132 			if (sc->sc_sendad_errors < INT_MAX)
1133 				sc->sc_sendad_errors++;
1134 			if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
1135 				carp_suppress_preempt++;
1136 				if (carp_suppress_preempt == 1)
1137 					carp_send_ad_all();
1138 			}
1139 			sc->sc_sendad_success = 0;
1140 		} else {
1141 			if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
1142 				if (++sc->sc_sendad_success >=
1143 				    CARP_SENDAD_MIN_SUCCESS) {
1144 					carp_suppress_preempt--;
1145 					sc->sc_sendad_errors = 0;
1146 				}
1147 			} else
1148 				sc->sc_sendad_errors = 0;
1149 		}
1150 	}
1151 #endif /* INET */
1152 #ifdef INET6
1153 	if (sc->sc_naddrs6) {
1154 		struct ip6_hdr *ip6;
1155 		struct ifaddr *ifa;
1156 		int _s;
1157 
1158 		MGETHDR(m, M_DONTWAIT, MT_HEADER);
1159 		if (m == NULL) {
1160 			if_statinc(&sc->sc_if, if_oerrors);
1161 			CARP_STATINC(CARP_STAT_ONOMEM);
1162 			/* XXX maybe less ? */
1163 			goto retry_later;
1164 		}
1165 		MCLAIM(m, &carp_proto6_mowner_tx);
1166 		len = sizeof(*ip6) + sizeof(ch);
1167 		m->m_pkthdr.len = len;
1168 		m_reset_rcvif(m);
1169 		m->m_len = len;
1170 		m_align(m, m->m_len);
1171 		m->m_flags |= M_MCAST;
1172 		ip6 = mtod(m, struct ip6_hdr *);
1173 		memset(ip6, 0, sizeof(*ip6));
1174 		ip6->ip6_vfc |= IPV6_VERSION;
1175 		ip6->ip6_hlim = CARP_DFLTTL;
1176 		ip6->ip6_nxt = IPPROTO_CARP;
1177 
1178 		/* set the source address */
1179 		memset(&sa, 0, sizeof(sa));
1180 		sa.sa_family = AF_INET6;
1181 		_s = pserialize_read_enter();
1182 		ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
1183 		if (ifa == NULL)	/* This should never happen with IPv6 */
1184 			memset(&ip6->ip6_src, 0, sizeof(struct in6_addr));
1185 		else
1186 			bcopy(ifatoia6(ifa)->ia_addr.sin6_addr.s6_addr,
1187 			    &ip6->ip6_src, sizeof(struct in6_addr));
1188 		pserialize_read_exit(_s);
1189 		/* set the multicast destination */
1190 
1191 		ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
1192 		ip6->ip6_dst.s6_addr8[15] = 0x12;
1193 		if (in6_setscope(&ip6->ip6_dst, &sc->sc_if, NULL) != 0) {
1194 			if_statinc(&sc->sc_if, if_oerrors);
1195 			m_freem(m);
1196 			CARP_LOG(sc, ("in6_setscope failed"));
1197 			goto retry_later;
1198 		}
1199 
1200 		ch_ptr = (struct carp_header *)(&ip6[1]);
1201 		memcpy(ch_ptr, &ch, sizeof(ch));
1202 		carp_prepare_ad(m, sc, ch_ptr);
1203 
1204 		ch_ptr->carp_cksum = carp6_cksum(m, sizeof(*ip6),
1205 		    len - sizeof(*ip6));
1206 
1207 		nanotime(&sc->sc_if.if_lastchange);
1208 		if_statadd2(&sc->sc_if, if_opackets, 1, if_obytes, len);
1209 		CARP_STATINC(CARP_STAT_OPACKETS6);
1210 
1211 		error = ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL);
1212 		if (error) {
1213 			if (error == ENOBUFS)
1214 				CARP_STATINC(CARP_STAT_ONOMEM);
1215 			else
1216 				CARP_LOG(sc, ("ip6_output failed: %d", error));
1217 			if_statinc(&sc->sc_if, if_oerrors);
1218 			if (sc->sc_sendad_errors < INT_MAX)
1219 				sc->sc_sendad_errors++;
1220 			if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
1221 				carp_suppress_preempt++;
1222 				if (carp_suppress_preempt == 1)
1223 					carp_send_ad_all();
1224 			}
1225 			sc->sc_sendad_success = 0;
1226 		} else {
1227 			if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
1228 				if (++sc->sc_sendad_success >=
1229 				    CARP_SENDAD_MIN_SUCCESS) {
1230 					carp_suppress_preempt--;
1231 					sc->sc_sendad_errors = 0;
1232 				}
1233 			} else
1234 				sc->sc_sendad_errors = 0;
1235 		}
1236 	}
1237 #endif /* INET6 */
1238 
1239 retry_later:
1240 	splx(s);
1241 	KERNEL_UNLOCK_ONE(NULL);
1242 	if (advbase != 255 || advskew != 255)
1243 		callout_schedule(&sc->sc_ad_tmo, tvtohz(&tv));
1244 }
1245 
1246 /*
1247  * Broadcast a gratuitous ARP request containing
1248  * the virtual router MAC address for each IP address
1249  * associated with the virtual router.
1250  */
1251 static void
1252 carp_send_arp(struct carp_softc *sc)
1253 {
1254 	struct ifaddr *ifa;
1255 	int s, bound;
1256 
1257 	KERNEL_LOCK(1, NULL);
1258 	bound = curlwp_bind();
1259 	s = pserialize_read_enter();
1260 	IFADDR_READER_FOREACH(ifa, &sc->sc_if) {
1261 		struct psref psref;
1262 
1263 		if (ifa->ifa_addr->sa_family != AF_INET)
1264 			continue;
1265 
1266 		ifa_acquire(ifa, &psref);
1267 		pserialize_read_exit(s);
1268 
1269 		arpannounce(sc->sc_carpdev, ifa, CLLADDR(sc->sc_if.if_sadl));
1270 
1271 		s = pserialize_read_enter();
1272 		ifa_release(ifa, &psref);
1273 	}
1274 	pserialize_read_exit(s);
1275 	curlwp_bindx(bound);
1276 	KERNEL_UNLOCK_ONE(NULL);
1277 }
1278 
1279 #ifdef INET6
1280 static void
1281 carp_send_na(struct carp_softc *sc)
1282 {
1283 	struct ifaddr *ifa;
1284 	struct in6_addr *in6;
1285 	static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
1286 	int s, bound;
1287 
1288 	KERNEL_LOCK(1, NULL);
1289 	bound = curlwp_bind();
1290 	s = pserialize_read_enter();
1291 	IFADDR_READER_FOREACH(ifa, &sc->sc_if) {
1292 		struct psref psref;
1293 
1294 		if (ifa->ifa_addr->sa_family != AF_INET6)
1295 			continue;
1296 
1297 		ifa_acquire(ifa, &psref);
1298 		pserialize_read_exit(s);
1299 
1300 		in6 = &ifatoia6(ifa)->ia_addr.sin6_addr;
1301 		nd6_na_output(sc->sc_carpdev, &mcast, in6,
1302 		    ND_NA_FLAG_OVERRIDE, 1, NULL);
1303 
1304 		s = pserialize_read_enter();
1305 		ifa_release(ifa, &psref);
1306 	}
1307 	pserialize_read_exit(s);
1308 	curlwp_bindx(bound);
1309 	KERNEL_UNLOCK_ONE(NULL);
1310 }
1311 #endif /* INET6 */
1312 
1313 /*
1314  * Based on bridge_hash() in if_bridge.c
1315  */
1316 #define	mix(a,b,c) \
1317 	do {						\
1318 		a -= b; a -= c; a ^= (c >> 13);		\
1319 		b -= c; b -= a; b ^= (a << 8);		\
1320 		c -= a; c -= b; c ^= (b >> 13);		\
1321 		a -= b; a -= c; a ^= (c >> 12);		\
1322 		b -= c; b -= a; b ^= (a << 16);		\
1323 		c -= a; c -= b; c ^= (b >> 5);		\
1324 		a -= b; a -= c; a ^= (c >> 3);		\
1325 		b -= c; b -= a; b ^= (a << 10);		\
1326 		c -= a; c -= b; c ^= (b >> 15);		\
1327 	} while (0)
1328 
1329 static u_int32_t
1330 carp_hash(struct carp_softc *sc, u_char *src)
1331 {
1332 	u_int32_t a = 0x9e3779b9, b = sc->sc_hashkey[0], c = sc->sc_hashkey[1];
1333 
1334 	c += sc->sc_key[3] << 24;
1335 	c += sc->sc_key[2] << 16;
1336 	c += sc->sc_key[1] << 8;
1337 	c += sc->sc_key[0];
1338 	b += src[5] << 8;
1339 	b += src[4];
1340 	a += src[3] << 24;
1341 	a += src[2] << 16;
1342 	a += src[1] << 8;
1343 	a += src[0];
1344 
1345 	mix(a, b, c);
1346 	return (c);
1347 }
1348 
1349 static int
1350 carp_addrcount(struct carp_if *cif, struct in_ifaddr *ia, int type)
1351 {
1352 	struct carp_softc *vh;
1353 	struct ifaddr *ifa;
1354 	int count = 0;
1355 
1356 	TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1357 		if ((type == CARP_COUNT_RUNNING &&
1358 		    (vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
1359 		    (IFF_UP|IFF_RUNNING)) ||
1360 		    (type == CARP_COUNT_MASTER && vh->sc_state == MASTER)) {
1361 			int s = pserialize_read_enter();
1362 			IFADDR_READER_FOREACH(ifa, &vh->sc_if) {
1363 				if (ifa->ifa_addr->sa_family == AF_INET &&
1364 				    ia->ia_addr.sin_addr.s_addr ==
1365 				    ifatoia(ifa)->ia_addr.sin_addr.s_addr)
1366 					count++;
1367 			}
1368 			pserialize_read_exit(s);
1369 		}
1370 	}
1371 	return (count);
1372 }
1373 
1374 int
1375 carp_iamatch(struct in_ifaddr *ia, u_char *src,
1376     u_int32_t *count, u_int32_t index)
1377 {
1378 	struct carp_softc *sc = ia->ia_ifp->if_softc;
1379 
1380 	if (carp_opts[CARPCTL_ARPBALANCE]) {
1381 		/*
1382 		 * We use the source ip to decide which virtual host should
1383 		 * handle the request. If we're master of that virtual host,
1384 		 * then we respond, otherwise, just drop the arp packet on
1385 		 * the floor.
1386 		 */
1387 
1388 		/* Count the elegible carp interfaces with this address */
1389 		if (*count == 0)
1390 			*count = carp_addrcount(
1391 			    (struct carp_if *)ia->ia_ifp->if_carpdev->if_carp,
1392 			    ia, CARP_COUNT_RUNNING);
1393 
1394 		/* This should never happen, but... */
1395 		if (*count == 0)
1396 			return (0);
1397 
1398 		if (carp_hash(sc, src) % *count == index - 1 &&
1399 		    sc->sc_state == MASTER) {
1400 			return (1);
1401 		}
1402 	} else {
1403 		if (sc->sc_state == MASTER)
1404 			return (1);
1405 	}
1406 
1407 	return (0);
1408 }
1409 
1410 #ifdef INET6
1411 struct ifaddr *
1412 carp_iamatch6(void *v, struct in6_addr *taddr)
1413 {
1414 	struct carp_if *cif = v;
1415 	struct carp_softc *vh;
1416 	struct ifaddr *ifa;
1417 
1418 	TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1419 		int s = pserialize_read_enter();
1420 		IFADDR_READER_FOREACH(ifa, &vh->sc_if) {
1421 			if (IN6_ARE_ADDR_EQUAL(taddr,
1422 			    &ifatoia6(ifa)->ia_addr.sin6_addr) &&
1423 			    ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
1424 			    (IFF_UP|IFF_RUNNING)) && vh->sc_state == MASTER)
1425 				return (ifa);
1426 		}
1427 		pserialize_read_exit(s);
1428 	}
1429 
1430 	return (NULL);
1431 }
1432 #endif /* INET6 */
1433 
1434 struct ifnet *
1435 carp_ourether(void *v, struct ether_header *eh, u_char iftype, int src)
1436 {
1437 	struct carp_if *cif = (struct carp_if *)v;
1438 	struct carp_softc *vh;
1439 	u_int8_t *ena;
1440 
1441 	if (src)
1442 		ena = (u_int8_t *)&eh->ether_shost;
1443 	else
1444 		ena = (u_int8_t *)&eh->ether_dhost;
1445 
1446 	switch (iftype) {
1447 	case IFT_ETHER:
1448 	case IFT_FDDI:
1449 		if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1)
1450 			return (NULL);
1451 		break;
1452 	case IFT_ISO88025:
1453 		if (ena[0] != 3 || ena[1] || ena[4] || ena[5])
1454 			return (NULL);
1455 		break;
1456 	default:
1457 		return (NULL);
1458 		break;
1459 	}
1460 
1461 	TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list)
1462 		if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
1463 		    (IFF_UP|IFF_RUNNING) && vh->sc_state == MASTER &&
1464 		    !memcmp(ena, CLLADDR(vh->sc_if.if_sadl),
1465 		    ETHER_ADDR_LEN)) {
1466 			return (&vh->sc_if);
1467 		    }
1468 
1469 	return (NULL);
1470 }
1471 
1472 int
1473 carp_input(struct mbuf *m, u_int8_t *shost, u_int8_t *dhost, u_int16_t etype)
1474 {
1475 	struct ether_header eh;
1476 	struct carp_if *cif = (struct carp_if *)m_get_rcvif_NOMPSAFE(m)->if_carp;
1477 	struct ifnet *ifp;
1478 
1479 	memcpy(&eh.ether_shost, shost, sizeof(eh.ether_shost));
1480 	memcpy(&eh.ether_dhost, dhost, sizeof(eh.ether_dhost));
1481 	eh.ether_type = etype;
1482 
1483 	if (m->m_flags & (M_BCAST|M_MCAST)) {
1484 		struct carp_softc *vh;
1485 		struct mbuf *m0;
1486 
1487 		/*
1488 		 * XXX Should really check the list of multicast addresses
1489 		 * for each CARP interface _before_ copying.
1490 		 */
1491 		TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1492 			m0 = m_copym(m, 0, M_COPYALL, M_DONTWAIT);
1493 			if (m0 == NULL)
1494 				continue;
1495 			m_set_rcvif(m0, &vh->sc_if);
1496 			ether_input(&vh->sc_if, m0);
1497 		}
1498 		return (1);
1499 	}
1500 
1501 	ifp = carp_ourether(cif, &eh, m_get_rcvif_NOMPSAFE(m)->if_type, 0);
1502 	if (ifp == NULL) {
1503 		return (1);
1504 	}
1505 
1506 	m_set_rcvif(m, ifp);
1507 
1508 	bpf_mtap(ifp, m, BPF_D_IN);
1509 	if_statinc(ifp, if_ipackets);
1510 	ether_input(ifp, m);
1511 	return (0);
1512 }
1513 
1514 static void
1515 carp_master_down(void *v)
1516 {
1517 	struct carp_softc *sc = v;
1518 
1519 	switch (sc->sc_state) {
1520 	case INIT:
1521 		printf("%s: master_down event in INIT state\n",
1522 		    sc->sc_if.if_xname);
1523 		break;
1524 	case MASTER:
1525 		break;
1526 	case BACKUP:
1527 		CARP_LOG(sc, ("INIT -> MASTER (preempting)"));
1528 		carp_set_state(sc, MASTER);
1529 		carp_send_ad(sc);
1530 		carp_send_arp(sc);
1531 #ifdef INET6
1532 		carp_send_na(sc);
1533 #endif /* INET6 */
1534 		carp_setrun(sc, 0);
1535 		carp_setroute(sc, RTM_ADD);
1536 		break;
1537 	}
1538 }
1539 
1540 /*
1541  * When in backup state, af indicates whether to reset the master down timer
1542  * for v4 or v6. If it's set to zero, reset the ones which are already pending.
1543  */
1544 static void
1545 carp_setrun(struct carp_softc *sc, sa_family_t af)
1546 {
1547 	struct timeval tv;
1548 
1549 	if (sc->sc_carpdev == NULL) {
1550 		sc->sc_if.if_flags &= ~IFF_RUNNING;
1551 		carp_set_state(sc, INIT);
1552 		return;
1553 	}
1554 
1555 	if (sc->sc_if.if_flags & IFF_UP && sc->sc_vhid > 0 &&
1556 	    (sc->sc_naddrs || sc->sc_naddrs6) && !sc->sc_suppress) {
1557 		sc->sc_if.if_flags |= IFF_RUNNING;
1558 	} else {
1559 		sc->sc_if.if_flags &= ~IFF_RUNNING;
1560 		carp_setroute(sc, RTM_DELETE);
1561 		return;
1562 	}
1563 
1564 	switch (sc->sc_state) {
1565 	case INIT:
1566 		carp_set_state(sc, BACKUP);
1567 		carp_setroute(sc, RTM_DELETE);
1568 		carp_setrun(sc, 0);
1569 		break;
1570 	case BACKUP:
1571 		callout_stop(&sc->sc_ad_tmo);
1572 		tv.tv_sec = 3 * sc->sc_advbase;
1573 		tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1574 		switch (af) {
1575 #ifdef INET
1576 		case AF_INET:
1577 			callout_schedule(&sc->sc_md_tmo, tvtohz(&tv));
1578 			break;
1579 #endif /* INET */
1580 #ifdef INET6
1581 		case AF_INET6:
1582 			callout_schedule(&sc->sc_md6_tmo, tvtohz(&tv));
1583 			break;
1584 #endif /* INET6 */
1585 		default:
1586 			if (sc->sc_naddrs)
1587 				callout_schedule(&sc->sc_md_tmo, tvtohz(&tv));
1588 #ifdef INET6
1589 			if (sc->sc_naddrs6)
1590 				callout_schedule(&sc->sc_md6_tmo, tvtohz(&tv));
1591 #endif /* INET6 */
1592 			break;
1593 		}
1594 		break;
1595 	case MASTER:
1596 		tv.tv_sec = sc->sc_advbase;
1597 		tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1598 		callout_schedule(&sc->sc_ad_tmo, tvtohz(&tv));
1599 		break;
1600 	}
1601 }
1602 
1603 static void
1604 carp_multicast_cleanup(struct carp_softc *sc)
1605 {
1606 	struct ip_moptions *imo = &sc->sc_imo;
1607 #ifdef INET6
1608 	struct ip6_moptions *im6o = &sc->sc_im6o;
1609 #endif
1610 	u_int16_t n = imo->imo_num_memberships;
1611 
1612 	/* Clean up our own multicast memberships */
1613 	while (n-- > 0) {
1614 		if (imo->imo_membership[n] != NULL) {
1615 			in_delmulti(imo->imo_membership[n]);
1616 			imo->imo_membership[n] = NULL;
1617 		}
1618 	}
1619 	imo->imo_num_memberships = 0;
1620 	imo->imo_multicast_if_index = 0;
1621 
1622 #ifdef INET6
1623 	while (!LIST_EMPTY(&im6o->im6o_memberships)) {
1624 		struct in6_multi_mship *imm =
1625 		    LIST_FIRST(&im6o->im6o_memberships);
1626 
1627 		LIST_REMOVE(imm, i6mm_chain);
1628 		in6_leavegroup(imm);
1629 	}
1630 	im6o->im6o_multicast_if_index = 0;
1631 #endif
1632 
1633 	/* And any other multicast memberships */
1634 	carp_ether_purgemulti(sc);
1635 }
1636 
1637 static int
1638 carp_set_ifp(struct carp_softc *sc, struct ifnet *ifp)
1639 {
1640 	struct carp_if *cif, *ncif = NULL;
1641 	struct carp_softc *vr, *after = NULL;
1642 	int myself = 0, error = 0;
1643 	int s;
1644 
1645 	if (ifp == sc->sc_carpdev)
1646 		return (0);
1647 
1648 	if (ifp != NULL) {
1649 		if ((ifp->if_flags & IFF_MULTICAST) == 0)
1650 			return (EADDRNOTAVAIL);
1651 
1652 		if (ifp->if_type == IFT_CARP)
1653 			return (EINVAL);
1654 
1655 		if (ifp->if_carp == NULL) {
1656 			ncif = malloc(sizeof(*cif), M_IFADDR, M_WAITOK);
1657 			if ((error = ifpromisc(ifp, 1))) {
1658 				free(ncif, M_IFADDR);
1659 				return (error);
1660 			}
1661 
1662 			ncif->vhif_ifp = ifp;
1663 			TAILQ_INIT(&ncif->vhif_vrs);
1664 		} else {
1665 			cif = (struct carp_if *)ifp->if_carp;
1666 			TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
1667 				if (vr != sc && vr->sc_vhid == sc->sc_vhid)
1668 					return (EINVAL);
1669 		}
1670 
1671 		/* detach from old interface */
1672 		if (sc->sc_carpdev != NULL)
1673 			carpdetach(sc);
1674 
1675 		/* join multicast groups */
1676 		if (sc->sc_naddrs < 0 &&
1677 		    (error = carp_join_multicast(sc)) != 0) {
1678 			if (ncif != NULL)
1679 				free(ncif, M_IFADDR);
1680 			return (error);
1681 		}
1682 
1683 #ifdef INET6
1684 		if (sc->sc_naddrs6 < 0 &&
1685 		    (error = carp_join_multicast6(sc)) != 0) {
1686 			if (ncif != NULL)
1687 				free(ncif, M_IFADDR);
1688 			carp_multicast_cleanup(sc);
1689 			return (error);
1690 		}
1691 #endif
1692 
1693 		/* attach carp interface to physical interface */
1694 		if (ncif != NULL)
1695 			ifp->if_carp = (void *)ncif;
1696 		sc->sc_carpdev = ifp;
1697 		sc->sc_if.if_capabilities = ifp->if_capabilities &
1698 		             (IFCAP_TSOv4 | IFCAP_TSOv6 |
1699                              IFCAP_CSUM_IPv4_Tx|IFCAP_CSUM_IPv4_Rx|
1700                              IFCAP_CSUM_TCPv4_Tx|IFCAP_CSUM_TCPv4_Rx|
1701                              IFCAP_CSUM_UDPv4_Tx|IFCAP_CSUM_UDPv4_Rx|
1702                              IFCAP_CSUM_TCPv6_Tx|IFCAP_CSUM_TCPv6_Rx|
1703                              IFCAP_CSUM_UDPv6_Tx|IFCAP_CSUM_UDPv6_Rx);
1704 
1705 		cif = (struct carp_if *)ifp->if_carp;
1706 		TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
1707 			if (vr == sc)
1708 				myself = 1;
1709 			if (vr->sc_vhid < sc->sc_vhid)
1710 				after = vr;
1711 		}
1712 
1713 		if (!myself) {
1714 			/* We're trying to keep things in order */
1715 			if (after == NULL) {
1716 				TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
1717 			} else {
1718 				TAILQ_INSERT_AFTER(&cif->vhif_vrs, after,
1719 				    sc, sc_list);
1720 			}
1721 			cif->vhif_nvrs++;
1722 		}
1723 		if (sc->sc_naddrs || sc->sc_naddrs6)
1724 			sc->sc_if.if_flags |= IFF_UP;
1725 		carp_set_enaddr(sc);
1726 		KERNEL_LOCK(1, NULL);
1727 		s = splnet();
1728 		/* XXX linkstatehooks establish */
1729 		carp_carpdev_state(ifp);
1730 		splx(s);
1731 		KERNEL_UNLOCK_ONE(NULL);
1732 	} else {
1733 		carpdetach(sc);
1734 		sc->sc_if.if_flags &= ~(IFF_UP|IFF_RUNNING);
1735 	}
1736 	return (0);
1737 }
1738 
1739 static void
1740 carp_set_enaddr(struct carp_softc *sc)
1741 {
1742 	uint8_t enaddr[ETHER_ADDR_LEN];
1743 	if (sc->sc_carpdev && sc->sc_carpdev->if_type == IFT_ISO88025) {
1744 		enaddr[0] = 3;
1745 		enaddr[1] = 0;
1746 		enaddr[2] = 0x40 >> (sc->sc_vhid - 1);
1747 		enaddr[3] = 0x40000 >> (sc->sc_vhid - 1);
1748 		enaddr[4] = 0;
1749 		enaddr[5] = 0;
1750 	} else {
1751 		enaddr[0] = 0;
1752 		enaddr[1] = 0;
1753 		enaddr[2] = 0x5e;
1754 		enaddr[3] = 0;
1755 		enaddr[4] = 1;
1756 		enaddr[5] = sc->sc_vhid;
1757 	}
1758 	if_set_sadl(&sc->sc_if, enaddr, sizeof(enaddr), false);
1759 }
1760 
1761 #if 0
1762 static void
1763 carp_addr_updated(void *v)
1764 {
1765 	struct carp_softc *sc = (struct carp_softc *) v;
1766 	struct ifaddr *ifa;
1767 	int new_naddrs = 0, new_naddrs6 = 0;
1768 
1769 	IFADDR_READER_FOREACH(ifa, &sc->sc_if) {
1770 		if (ifa->ifa_addr->sa_family == AF_INET)
1771 			new_naddrs++;
1772 		else if (ifa->ifa_addr->sa_family == AF_INET6)
1773 			new_naddrs6++;
1774 	}
1775 
1776 	/* Handle a callback after SIOCDIFADDR */
1777 	if (new_naddrs < sc->sc_naddrs || new_naddrs6 < sc->sc_naddrs6) {
1778 		struct in_addr mc_addr;
1779 
1780 		sc->sc_naddrs = new_naddrs;
1781 		sc->sc_naddrs6 = new_naddrs6;
1782 
1783 		/* Re-establish multicast membership removed by in_control */
1784 		mc_addr.s_addr = INADDR_CARP_GROUP;
1785 		if (!in_multi_group(mc_addr, &sc->sc_if, 0)) {
1786 			memset(&sc->sc_imo, 0, sizeof(sc->sc_imo));
1787 
1788 			if (sc->sc_carpdev != NULL && sc->sc_naddrs > 0)
1789 				carp_join_multicast(sc);
1790 		}
1791 
1792 		if (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) {
1793 			sc->sc_if.if_flags &= ~IFF_UP;
1794 			carp_set_state(sc, INIT);
1795 		} else
1796 			carp_hmac_prepare(sc);
1797 	}
1798 
1799 	carp_setrun(sc, 0);
1800 }
1801 #endif
1802 
1803 static int
1804 carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin)
1805 {
1806 	struct ifnet *ifp = sc->sc_carpdev;
1807 	struct in_ifaddr *ia, *ia_if;
1808 	int error = 0;
1809 	int s;
1810 
1811 	if (sin->sin_addr.s_addr == 0) {
1812 		if (!(sc->sc_if.if_flags & IFF_UP))
1813 			carp_set_state(sc, INIT);
1814 		if (sc->sc_naddrs)
1815 			sc->sc_if.if_flags |= IFF_UP;
1816 		carp_setrun(sc, 0);
1817 		return (0);
1818 	}
1819 
1820 	/* we have to do this by hand to ensure we don't match on ourselves */
1821 	ia_if = NULL;
1822 	s = pserialize_read_enter();
1823 	IN_ADDRLIST_READER_FOREACH(ia) {
1824 		/* and, yeah, we need a multicast-capable iface too */
1825 		if (ia->ia_ifp != &sc->sc_if &&
1826 		    ia->ia_ifp->if_type != IFT_CARP &&
1827 		    (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
1828 		    (sin->sin_addr.s_addr & ia->ia_subnetmask) ==
1829 		    ia->ia_subnet) {
1830 			if (!ia_if)
1831 				ia_if = ia;
1832 		}
1833 	}
1834 
1835 	if (ia_if) {
1836 		ia = ia_if;
1837 		if (ifp) {
1838 			if (ifp != ia->ia_ifp)
1839 				return (EADDRNOTAVAIL);
1840 		} else {
1841 			/* FIXME NOMPSAFE */
1842 			ifp = ia->ia_ifp;
1843 		}
1844 	}
1845 	pserialize_read_exit(s);
1846 
1847 	if ((error = carp_set_ifp(sc, ifp)))
1848 		return (error);
1849 
1850 	if (sc->sc_carpdev == NULL)
1851 		return (EADDRNOTAVAIL);
1852 
1853 	if (sc->sc_naddrs == 0 && (error = carp_join_multicast(sc)) != 0)
1854 		return (error);
1855 
1856 	sc->sc_naddrs++;
1857 	if (sc->sc_carpdev != NULL)
1858 		sc->sc_if.if_flags |= IFF_UP;
1859 
1860 	carp_set_state(sc, INIT);
1861 	carp_setrun(sc, 0);
1862 
1863 	/*
1864 	 * Hook if_addrhooks so that we get a callback after in_ifinit has run,
1865 	 * to correct any inappropriate routes that it inserted.
1866 	 */
1867 	if (sc->ah_cookie == 0) {
1868 		/* XXX link address hook */
1869 	}
1870 
1871 	return (0);
1872 }
1873 
1874 static int
1875 carp_join_multicast(struct carp_softc *sc)
1876 {
1877 	struct ip_moptions *imo = &sc->sc_imo, tmpimo;
1878 	struct in_addr addr;
1879 
1880 	memset(&tmpimo, 0, sizeof(tmpimo));
1881 	addr.s_addr = INADDR_CARP_GROUP;
1882 	if ((tmpimo.imo_membership[0] =
1883 	    in_addmulti(&addr, &sc->sc_if)) == NULL) {
1884 		return (ENOBUFS);
1885 	}
1886 
1887 	imo->imo_membership[0] = tmpimo.imo_membership[0];
1888 	imo->imo_num_memberships = 1;
1889 	imo->imo_multicast_if_index = sc->sc_if.if_index;
1890 	imo->imo_multicast_ttl = CARP_DFLTTL;
1891 	imo->imo_multicast_loop = 0;
1892 	return (0);
1893 }
1894 
1895 
1896 #ifdef INET6
1897 static int
1898 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
1899 {
1900 	struct ifnet *ifp = sc->sc_carpdev;
1901 	struct in6_ifaddr *ia, *ia_if;
1902 	int error = 0;
1903 	int s;
1904 
1905 	if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
1906 		if (!(sc->sc_if.if_flags & IFF_UP))
1907 			carp_set_state(sc, INIT);
1908 		if (sc->sc_naddrs6)
1909 			sc->sc_if.if_flags |= IFF_UP;
1910 		carp_setrun(sc, 0);
1911 		return (0);
1912 	}
1913 
1914 	/* we have to do this by hand to ensure we don't match on ourselves */
1915 	ia_if = NULL;
1916 	s = pserialize_read_enter();
1917 	IN6_ADDRLIST_READER_FOREACH(ia) {
1918 		int i;
1919 
1920 		for (i = 0; i < 4; i++) {
1921 			if ((sin6->sin6_addr.s6_addr32[i] &
1922 			    ia->ia_prefixmask.sin6_addr.s6_addr32[i]) !=
1923 			    (ia->ia_addr.sin6_addr.s6_addr32[i] &
1924 			    ia->ia_prefixmask.sin6_addr.s6_addr32[i]))
1925 				break;
1926 		}
1927 		/* and, yeah, we need a multicast-capable iface too */
1928 		if (ia->ia_ifp != &sc->sc_if &&
1929 		    ia->ia_ifp->if_type != IFT_CARP &&
1930 		    (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
1931 		    (i == 4)) {
1932 			if (!ia_if)
1933 				ia_if = ia;
1934 		}
1935 	}
1936 	pserialize_read_exit(s);
1937 
1938 	if (ia_if) {
1939 		ia = ia_if;
1940 		if (sc->sc_carpdev) {
1941 			if (sc->sc_carpdev != ia->ia_ifp)
1942 				return (EADDRNOTAVAIL);
1943 		} else {
1944 			ifp = ia->ia_ifp;
1945 		}
1946 	}
1947 
1948 	if ((error = carp_set_ifp(sc, ifp)))
1949 		return (error);
1950 
1951 	if (sc->sc_carpdev == NULL)
1952 		return (EADDRNOTAVAIL);
1953 
1954 	if (sc->sc_naddrs6 == 0 && (error = carp_join_multicast6(sc)) != 0)
1955 		return (error);
1956 
1957 	sc->sc_naddrs6++;
1958 	if (sc->sc_carpdev != NULL)
1959 		sc->sc_if.if_flags |= IFF_UP;
1960 	carp_set_state(sc, INIT);
1961 	carp_setrun(sc, 0);
1962 
1963 	return (0);
1964 }
1965 
1966 static int
1967 carp_join_multicast6(struct carp_softc *sc)
1968 {
1969 	struct in6_multi_mship *imm, *imm2;
1970 	struct ip6_moptions *im6o = &sc->sc_im6o;
1971 	struct sockaddr_in6 addr6;
1972 	int error;
1973 
1974 	/* Join IPv6 CARP multicast group */
1975 	memset(&addr6, 0, sizeof(addr6));
1976 	addr6.sin6_family = AF_INET6;
1977 	addr6.sin6_len = sizeof(addr6);
1978 	addr6.sin6_addr.s6_addr16[0] = htons(0xff02);
1979 	addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index);
1980 	addr6.sin6_addr.s6_addr8[15] = 0x12;
1981 	if ((imm = in6_joingroup(&sc->sc_if,
1982 	    &addr6.sin6_addr, &error, 0)) == NULL) {
1983 		return (error);
1984 	}
1985 	/* join solicited multicast address */
1986 	memset(&addr6.sin6_addr, 0, sizeof(addr6.sin6_addr));
1987 	addr6.sin6_addr.s6_addr16[0] = htons(0xff02);
1988 	addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index);
1989 	addr6.sin6_addr.s6_addr32[1] = 0;
1990 	addr6.sin6_addr.s6_addr32[2] = htonl(1);
1991 	addr6.sin6_addr.s6_addr32[3] = 0;
1992 	addr6.sin6_addr.s6_addr8[12] = 0xff;
1993 	if ((imm2 = in6_joingroup(&sc->sc_if,
1994 	    &addr6.sin6_addr, &error, 0)) == NULL) {
1995 		in6_leavegroup(imm);
1996 		return (error);
1997 	}
1998 
1999 	/* apply v6 multicast membership */
2000 	im6o->im6o_multicast_if_index = sc->sc_if.if_index;
2001 	if (imm)
2002 		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm,
2003 		    i6mm_chain);
2004 	if (imm2)
2005 		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm2,
2006 		    i6mm_chain);
2007 
2008 	return (0);
2009 }
2010 
2011 #endif /* INET6 */
2012 
2013 static int
2014 carp_ioctl(struct ifnet *ifp, u_long cmd, void *data)
2015 {
2016 	struct lwp *l = curlwp;		/* XXX */
2017 	struct carp_softc *sc = ifp->if_softc, *vr;
2018 	struct carpreq carpr;
2019 	struct ifaddr *ifa;
2020 	struct ifreq *ifr;
2021 	struct ifnet *cdev = NULL;
2022 	int error = 0;
2023 
2024 	ifa = (struct ifaddr *)data;
2025 	ifr = (struct ifreq *)data;
2026 
2027 	switch (cmd) {
2028 	case SIOCINITIFADDR:
2029 		switch (ifa->ifa_addr->sa_family) {
2030 #ifdef INET
2031 		case AF_INET:
2032 			sc->sc_if.if_flags |= IFF_UP;
2033 			memcpy(ifa->ifa_dstaddr, ifa->ifa_addr,
2034 			    sizeof(struct sockaddr));
2035 			error = carp_set_addr(sc, satosin(ifa->ifa_addr));
2036 			break;
2037 #endif /* INET */
2038 #ifdef INET6
2039 		case AF_INET6:
2040 			sc->sc_if.if_flags|= IFF_UP;
2041 			error = carp_set_addr6(sc, satosin6(ifa->ifa_addr));
2042 			break;
2043 #endif /* INET6 */
2044 		default:
2045 			error = EAFNOSUPPORT;
2046 			break;
2047 		}
2048 		break;
2049 
2050 	case SIOCSIFFLAGS:
2051 		if ((error = ifioctl_common(ifp, cmd, data)) != 0)
2052 			break;
2053 		if (sc->sc_state != INIT && !(ifr->ifr_flags & IFF_UP)) {
2054 			callout_stop(&sc->sc_ad_tmo);
2055 			callout_stop(&sc->sc_md_tmo);
2056 			callout_stop(&sc->sc_md6_tmo);
2057 			if (sc->sc_state == MASTER) {
2058 				/* we need the interface up to bow out */
2059 				sc->sc_if.if_flags |= IFF_UP;
2060 				sc->sc_bow_out = 1;
2061 				carp_send_ad(sc);
2062 			}
2063 			sc->sc_if.if_flags &= ~IFF_UP;
2064 			carp_set_state(sc, INIT);
2065 			carp_setrun(sc, 0);
2066 		} else if (sc->sc_state == INIT && (ifr->ifr_flags & IFF_UP)) {
2067 			sc->sc_if.if_flags |= IFF_UP;
2068 			carp_setrun(sc, 0);
2069 		}
2070 		carp_update_link_state(sc);
2071 		break;
2072 
2073 	case SIOCSVH:
2074 		if (l == NULL)
2075 			break;
2076 		if ((error = kauth_authorize_network(l->l_cred,
2077 		    KAUTH_NETWORK_INTERFACE,
2078 		    KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd,
2079 		    NULL)) != 0)
2080 			break;
2081 		if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr)))
2082 			break;
2083 		error = 1;
2084 		if (carpr.carpr_carpdev[0] != '\0' &&
2085 		    (cdev = ifunit(carpr.carpr_carpdev)) == NULL)
2086 			return (EINVAL);
2087 		if ((error = carp_set_ifp(sc, cdev)))
2088 			return (error);
2089 		if (sc->sc_state != INIT && carpr.carpr_state != sc->sc_state) {
2090 			switch (carpr.carpr_state) {
2091 			case BACKUP:
2092 				callout_stop(&sc->sc_ad_tmo);
2093 				carp_set_state(sc, BACKUP);
2094 				carp_setrun(sc, 0);
2095 				carp_setroute(sc, RTM_DELETE);
2096 				break;
2097 			case MASTER:
2098 				carp_master_down(sc);
2099 				break;
2100 			default:
2101 				break;
2102 			}
2103 		}
2104 		if (carpr.carpr_vhid > 0) {
2105 			if (carpr.carpr_vhid > 255) {
2106 				error = EINVAL;
2107 				break;
2108 			}
2109 			if (sc->sc_carpdev) {
2110 				struct carp_if *cif;
2111 				cif = (struct carp_if *)sc->sc_carpdev->if_carp;
2112 				TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
2113 					if (vr != sc &&
2114 					    vr->sc_vhid == carpr.carpr_vhid)
2115 						return (EINVAL);
2116 			}
2117 			sc->sc_vhid = carpr.carpr_vhid;
2118 			carp_set_enaddr(sc);
2119 			carp_set_state(sc, INIT);
2120 			error--;
2121 		}
2122 		if (carpr.carpr_advbase > 0 || carpr.carpr_advskew > 0) {
2123 			if (carpr.carpr_advskew > 254) {
2124 				error = EINVAL;
2125 				break;
2126 			}
2127 			if (carpr.carpr_advbase > 255) {
2128 				error = EINVAL;
2129 				break;
2130 			}
2131 			sc->sc_advbase = carpr.carpr_advbase;
2132 			sc->sc_advskew = carpr.carpr_advskew;
2133 			error--;
2134 		}
2135 		memcpy(sc->sc_key, carpr.carpr_key, sizeof(sc->sc_key));
2136 		if (error > 0)
2137 			error = EINVAL;
2138 		else {
2139 			error = 0;
2140 			carp_setrun(sc, 0);
2141 		}
2142 		break;
2143 
2144 	case SIOCGVH:
2145 		memset(&carpr, 0, sizeof(carpr));
2146 		if (sc->sc_carpdev != NULL)
2147 			strlcpy(carpr.carpr_carpdev, sc->sc_carpdev->if_xname,
2148 			    IFNAMSIZ);
2149 		carpr.carpr_state = sc->sc_state;
2150 		carpr.carpr_vhid = sc->sc_vhid;
2151 		carpr.carpr_advbase = sc->sc_advbase;
2152 		carpr.carpr_advskew = sc->sc_advskew;
2153 
2154 		if ((l != NULL) && (error = kauth_authorize_network(l->l_cred,
2155 		    KAUTH_NETWORK_INTERFACE,
2156 		    KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd,
2157 		    NULL)) == 0)
2158 			memcpy(carpr.carpr_key, sc->sc_key,
2159 			    sizeof(carpr.carpr_key));
2160 		error = copyout(&carpr, ifr->ifr_data, sizeof(carpr));
2161 		break;
2162 
2163 	case SIOCADDMULTI:
2164 		error = carp_ether_addmulti(sc, ifr);
2165 		break;
2166 
2167 	case SIOCDELMULTI:
2168 		error = carp_ether_delmulti(sc, ifr);
2169 		break;
2170 
2171 	case SIOCSIFCAP:
2172 		if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET)
2173 			error = 0;
2174 		break;
2175 
2176         case SIOCGIFMEDIA:
2177 		error = ifmedia_ioctl(ifp, ifr, &sc->sc_im, cmd);
2178 		break;
2179 
2180 	default:
2181 		error = ether_ioctl(ifp, cmd, data);
2182 	}
2183 
2184 	carp_hmac_prepare(sc);
2185 	return (error);
2186 }
2187 
2188 
2189 /*
2190  * Start output on carp interface. This function should never be called.
2191  */
2192 static void
2193 carp_start(struct ifnet *ifp)
2194 {
2195 #ifdef DEBUG
2196 	printf("%s: start called\n", ifp->if_xname);
2197 #endif
2198 }
2199 
2200 int
2201 carp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa,
2202     const struct rtentry *rt)
2203 {
2204 	struct carp_softc *sc = ((struct carp_softc *)ifp->if_softc);
2205 	KASSERT(KERNEL_LOCKED_P());
2206 
2207 	if (sc->sc_carpdev != NULL && sc->sc_state == MASTER) {
2208 		return if_output_lock(sc->sc_carpdev, ifp, m, sa, rt);
2209 	} else {
2210 		m_freem(m);
2211 		return (ENETUNREACH);
2212 	}
2213 }
2214 
2215 static int
2216 carp_mediachange(struct ifnet *ifp)
2217 {
2218         return (0);
2219 }
2220 
2221 static void
2222 carp_mediastatus(struct ifnet *ifp, struct ifmediareq *imr)
2223 {
2224         switch (ifp->if_link_state) {
2225         case LINK_STATE_UP:
2226                 imr->ifm_status = IFM_AVALID | IFM_ACTIVE;
2227                 break;
2228         case LINK_STATE_DOWN:
2229                 imr->ifm_status = IFM_AVALID;
2230                 break;
2231         default:
2232                 imr->ifm_status = 0;
2233                 break;
2234         }
2235 }
2236 
2237 static void
2238 carp_set_state(struct carp_softc *sc, int state)
2239 {
2240 	static const char *carp_states[] = { CARP_STATES };
2241 
2242 	if (sc->sc_state == state)
2243 		return;
2244 
2245 	CARP_LOG(sc, ("state transition from: %s -> to: %s", carp_states[sc->sc_state], carp_states[state]));
2246 
2247 	sc->sc_state = state;
2248 	carp_update_link_state(sc);
2249 }
2250 
2251 static void
2252 carp_update_link_state(struct carp_softc *sc)
2253 {
2254 	int link_state;
2255 
2256 	switch (sc->sc_state) {
2257 	case BACKUP:
2258 		link_state = LINK_STATE_DOWN;
2259 		break;
2260 	case MASTER:
2261 		link_state = LINK_STATE_UP;
2262 		break;
2263 	default:
2264 		link_state = ((sc->sc_if.if_flags & IFF_ONLY_MASTER_UP) != 0)
2265 			     ? LINK_STATE_DOWN : LINK_STATE_UNKNOWN;
2266 		break;
2267 	}
2268 	if_link_state_change(&sc->sc_if, link_state);
2269 }
2270 
2271 void
2272 carp_carpdev_state(void *v)
2273 {
2274 	struct carp_if *cif;
2275 	struct carp_softc *sc;
2276 	struct ifnet *ifp = v;
2277 
2278 	if (ifp->if_type == IFT_CARP)
2279 		return;
2280 
2281 	cif = (struct carp_if *)ifp->if_carp;
2282 
2283 	TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) {
2284 		int suppressed = sc->sc_suppress;
2285 
2286 		if (sc->sc_carpdev->if_link_state == LINK_STATE_DOWN ||
2287 		    !(sc->sc_carpdev->if_flags & IFF_UP)) {
2288 			sc->sc_if.if_flags &= ~IFF_RUNNING;
2289 			callout_stop(&sc->sc_ad_tmo);
2290 			callout_stop(&sc->sc_md_tmo);
2291 			callout_stop(&sc->sc_md6_tmo);
2292 			carp_set_state(sc, INIT);
2293 			sc->sc_suppress = 1;
2294 			carp_setrun(sc, 0);
2295 			if (!suppressed) {
2296 				carp_suppress_preempt++;
2297 				if (carp_suppress_preempt == 1)
2298 					carp_send_ad_all();
2299 			}
2300 		} else {
2301 			carp_set_state(sc, INIT);
2302 			sc->sc_suppress = 0;
2303 			carp_setrun(sc, 0);
2304 			if (suppressed)
2305 				carp_suppress_preempt--;
2306 		}
2307 	}
2308 }
2309 
2310 static int
2311 carp_ether_addmulti(struct carp_softc *sc, struct ifreq *ifr)
2312 {
2313 	const struct sockaddr *sa = ifreq_getaddr(SIOCADDMULTI, ifr);
2314 	struct ifnet *ifp;
2315 	struct carp_mc_entry *mc;
2316 	u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN];
2317 	int error;
2318 
2319 	ifp = sc->sc_carpdev;
2320 	if (ifp == NULL)
2321 		return (EINVAL);
2322 
2323 	error = ether_addmulti(sa, &sc->sc_ac);
2324 	if (error != ENETRESET)
2325 		return (error);
2326 
2327 	/*
2328 	 * This is new multicast address.  We have to tell parent
2329 	 * about it.  Also, remember this multicast address so that
2330 	 * we can delete them on unconfigure.
2331 	 */
2332 	mc = malloc(sizeof(struct carp_mc_entry), M_DEVBUF, M_NOWAIT);
2333 	if (mc == NULL) {
2334 		error = ENOMEM;
2335 		goto alloc_failed;
2336 	}
2337 
2338 	/*
2339 	 * As ether_addmulti() returns ENETRESET, following two
2340 	 * statement shouldn't fail.
2341 	 */
2342 	(void)ether_multiaddr(sa, addrlo, addrhi);
2343 
2344 	ETHER_LOCK(&sc->sc_ac);
2345 	mc->mc_enm = ether_lookup_multi(addrlo, addrhi, &sc->sc_ac);
2346 	ETHER_UNLOCK(&sc->sc_ac);
2347 
2348 	memcpy(&mc->mc_addr, sa, sa->sa_len);
2349 	LIST_INSERT_HEAD(&sc->carp_mc_listhead, mc, mc_entries);
2350 
2351 	error = if_mcast_op(ifp, SIOCADDMULTI, sa);
2352 	if (error != 0)
2353 		goto ioctl_failed;
2354 
2355 	return (error);
2356 
2357  ioctl_failed:
2358 	LIST_REMOVE(mc, mc_entries);
2359 	free(mc, M_DEVBUF);
2360  alloc_failed:
2361 	(void)ether_delmulti(sa, &sc->sc_ac);
2362 
2363 	return (error);
2364 }
2365 
2366 static int
2367 carp_ether_delmulti(struct carp_softc *sc, struct ifreq *ifr)
2368 {
2369 	const struct sockaddr *sa = ifreq_getaddr(SIOCDELMULTI, ifr);
2370 	struct ifnet *ifp;
2371 	struct ether_multi *enm;
2372 	struct carp_mc_entry *mc;
2373 	u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN];
2374 	int error;
2375 
2376 	ifp = sc->sc_carpdev;
2377 	if (ifp == NULL)
2378 		return (EINVAL);
2379 
2380 	/*
2381 	 * Find a key to lookup carp_mc_entry.  We have to do this
2382 	 * before calling ether_delmulti for obvious reason.
2383 	 */
2384 	if ((error = ether_multiaddr(sa, addrlo, addrhi)) != 0)
2385 		return (error);
2386 
2387 	ETHER_LOCK(&sc->sc_ac);
2388 	enm = ether_lookup_multi(addrlo, addrhi, &sc->sc_ac);
2389 	ETHER_UNLOCK(&sc->sc_ac);
2390 	if (enm == NULL)
2391 		return (EINVAL);
2392 
2393 	LIST_FOREACH(mc, &sc->carp_mc_listhead, mc_entries)
2394 		if (mc->mc_enm == enm)
2395 			break;
2396 
2397 	/* We won't delete entries we didn't add */
2398 	if (mc == NULL)
2399 		return (EINVAL);
2400 
2401 	error = ether_delmulti(sa, &sc->sc_ac);
2402 	if (error != ENETRESET)
2403 		return (error);
2404 
2405 	/* We no longer use this multicast address.  Tell parent so. */
2406 	error = if_mcast_op(ifp, SIOCDELMULTI, sa);
2407 	if (error == 0) {
2408 		/* And forget about this address. */
2409 		LIST_REMOVE(mc, mc_entries);
2410 		free(mc, M_DEVBUF);
2411 	} else
2412 		(void)ether_addmulti(sa, &sc->sc_ac);
2413 	return (error);
2414 }
2415 
2416 /*
2417  * Delete any multicast address we have asked to add from parent
2418  * interface.  Called when the carp is being unconfigured.
2419  */
2420 static void
2421 carp_ether_purgemulti(struct carp_softc *sc)
2422 {
2423 	struct ifnet *ifp = sc->sc_carpdev;		/* Parent. */
2424 	struct carp_mc_entry *mc;
2425 
2426 	if (ifp == NULL)
2427 		return;
2428 
2429 	while ((mc = LIST_FIRST(&sc->carp_mc_listhead)) != NULL) {
2430 		(void)if_mcast_op(ifp, SIOCDELMULTI, sstosa(&mc->mc_addr));
2431 		LIST_REMOVE(mc, mc_entries);
2432 		free(mc, M_DEVBUF);
2433 	}
2434 }
2435 
2436 static int
2437 sysctl_net_inet_carp_stats(SYSCTLFN_ARGS)
2438 {
2439 
2440 	return (NETSTAT_SYSCTL(carpstat_percpu, CARP_NSTATS));
2441 }
2442 
2443 void
2444 carp_init(void)
2445 {
2446 
2447 	sysctl_net_inet_carp_setup(NULL);
2448 #ifdef MBUFTRACE
2449 	MOWNER_ATTACH(&carp_proto_mowner_rx);
2450 	MOWNER_ATTACH(&carp_proto_mowner_tx);
2451 	MOWNER_ATTACH(&carp_proto6_mowner_rx);
2452 	MOWNER_ATTACH(&carp_proto6_mowner_tx);
2453 #endif
2454 
2455 	carp_wqinput = wqinput_create("carp", _carp_proto_input);
2456 #ifdef INET6
2457 	carp6_wqinput = wqinput_create("carp6", _carp6_proto_input);
2458 #endif
2459 }
2460 
2461 static void
2462 sysctl_net_inet_carp_setup(struct sysctllog **clog)
2463 {
2464 
2465 	sysctl_createv(clog, 0, NULL, NULL,
2466 		       CTLFLAG_PERMANENT,
2467 		       CTLTYPE_NODE, "inet", NULL,
2468 		       NULL, 0, NULL, 0,
2469 		       CTL_NET, PF_INET, CTL_EOL);
2470 	sysctl_createv(clog, 0, NULL, NULL,
2471 		       CTLFLAG_PERMANENT,
2472 		       CTLTYPE_NODE, "carp",
2473 		       SYSCTL_DESCR("CARP related settings"),
2474 		       NULL, 0, NULL, 0,
2475 		       CTL_NET, PF_INET, IPPROTO_CARP, CTL_EOL);
2476 
2477 	sysctl_createv(clog, 0, NULL, NULL,
2478 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2479 		       CTLTYPE_INT, "preempt",
2480 		       SYSCTL_DESCR("Enable CARP Preempt"),
2481 		       NULL, 0, &carp_opts[CARPCTL_PREEMPT], 0,
2482 		       CTL_NET, PF_INET, IPPROTO_CARP,
2483 		       CTL_CREATE, CTL_EOL);
2484 	sysctl_createv(clog, 0, NULL, NULL,
2485 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2486 		       CTLTYPE_INT, "arpbalance",
2487 		       SYSCTL_DESCR("Enable ARP balancing"),
2488 		       NULL, 0, &carp_opts[CARPCTL_ARPBALANCE], 0,
2489 		       CTL_NET, PF_INET, IPPROTO_CARP,
2490 		       CTL_CREATE, CTL_EOL);
2491 	sysctl_createv(clog, 0, NULL, NULL,
2492 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2493 		       CTLTYPE_INT, "allow",
2494 		       SYSCTL_DESCR("Enable CARP"),
2495 		       NULL, 0, &carp_opts[CARPCTL_ALLOW], 0,
2496 		       CTL_NET, PF_INET, IPPROTO_CARP,
2497 		       CTL_CREATE, CTL_EOL);
2498 	sysctl_createv(clog, 0, NULL, NULL,
2499 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2500 		       CTLTYPE_INT, "log",
2501 		       SYSCTL_DESCR("CARP logging"),
2502 		       NULL, 0, &carp_opts[CARPCTL_LOG], 0,
2503 		       CTL_NET, PF_INET, IPPROTO_CARP,
2504 		       CTL_CREATE, CTL_EOL);
2505 	sysctl_createv(clog, 0, NULL, NULL,
2506 		       CTLFLAG_PERMANENT,
2507 		       CTLTYPE_STRUCT, "stats",
2508 		       SYSCTL_DESCR("CARP statistics"),
2509 		       sysctl_net_inet_carp_stats, 0, NULL, 0,
2510 		       CTL_NET, PF_INET, IPPROTO_CARP, CARPCTL_STATS,
2511 		       CTL_EOL);
2512 }
2513