xref: /netbsd-src/sys/netinet/ip_carp.c (revision 796c32c94f6e154afc9de0f63da35c91bb739b45)
1 /*	$NetBSD: ip_carp.c,v 1.93 2017/11/22 07:40:45 ozaki-r Exp $	*/
2 /*	$OpenBSD: ip_carp.c,v 1.113 2005/11/04 08:11:54 mcbride Exp $	*/
3 
4 /*
5  * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
6  * Copyright (c) 2003 Ryan McBride. All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
21  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
25  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
26  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
27  * THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 #ifdef _KERNEL_OPT
31 #include "opt_inet.h"
32 #include "opt_mbuftrace.h"
33 #endif
34 
35 #include <sys/cdefs.h>
36 __KERNEL_RCSID(0, "$NetBSD: ip_carp.c,v 1.93 2017/11/22 07:40:45 ozaki-r Exp $");
37 
38 /*
39  * TODO:
40  *	- iface reconfigure
41  *	- support for hardware checksum calculations;
42  *
43  */
44 
45 #include <sys/param.h>
46 #include <sys/proc.h>
47 #include <sys/mbuf.h>
48 #include <sys/socket.h>
49 #include <sys/socketvar.h>
50 #include <sys/callout.h>
51 #include <sys/ioctl.h>
52 #include <sys/errno.h>
53 #include <sys/device.h>
54 #include <sys/time.h>
55 #include <sys/kernel.h>
56 #include <sys/kauth.h>
57 #include <sys/sysctl.h>
58 #include <sys/ucred.h>
59 #include <sys/syslog.h>
60 #include <sys/acct.h>
61 #include <sys/cprng.h>
62 #include <sys/cpu.h>
63 #include <sys/pserialize.h>
64 #include <sys/psref.h>
65 
66 #include <net/if.h>
67 #include <net/pfil.h>
68 #include <net/if_types.h>
69 #include <net/if_ether.h>
70 #include <net/route.h>
71 #include <net/netisr.h>
72 #include <net/net_stats.h>
73 #include <netinet/if_inarp.h>
74 #include <netinet/wqinput.h>
75 
76 #if NFDDI > 0
77 #include <net/if_fddi.h>
78 #endif
79 #if NTOKEN > 0
80 #include <net/if_token.h>
81 #endif
82 
83 #ifdef INET
84 #include <netinet/in.h>
85 #include <netinet/in_systm.h>
86 #include <netinet/in_var.h>
87 #include <netinet/ip.h>
88 #include <netinet/ip_var.h>
89 
90 #include <net/if_dl.h>
91 #endif
92 
93 #ifdef INET6
94 #include <netinet/icmp6.h>
95 #include <netinet/ip6.h>
96 #include <netinet6/ip6_var.h>
97 #include <netinet6/nd6.h>
98 #include <netinet6/scope6_var.h>
99 #include <netinet6/in6_var.h>
100 #endif
101 
102 #include <net/bpf.h>
103 
104 #include <sys/sha1.h>
105 
106 #include <netinet/ip_carp.h>
107 
108 #include "ioconf.h"
109 
110 struct carp_mc_entry {
111 	LIST_ENTRY(carp_mc_entry)	mc_entries;
112 	union {
113 		struct ether_multi	*mcu_enm;
114 	} mc_u;
115 	struct sockaddr_storage		mc_addr;
116 };
117 #define	mc_enm	mc_u.mcu_enm
118 
119 struct carp_softc {
120 	struct ethercom sc_ac;
121 #define	sc_if		sc_ac.ec_if
122 #define	sc_carpdev	sc_ac.ec_if.if_carpdev
123 	int ah_cookie;
124 	int lh_cookie;
125 	struct ip_moptions sc_imo;
126 #ifdef INET6
127 	struct ip6_moptions sc_im6o;
128 #endif /* INET6 */
129 	TAILQ_ENTRY(carp_softc) sc_list;
130 
131 	enum { INIT = 0, BACKUP, MASTER }	sc_state;
132 
133 	int sc_suppress;
134 	int sc_bow_out;
135 
136 	int sc_sendad_errors;
137 #define CARP_SENDAD_MAX_ERRORS	3
138 	int sc_sendad_success;
139 #define CARP_SENDAD_MIN_SUCCESS 3
140 
141 	int sc_vhid;
142 	int sc_advskew;
143 	int sc_naddrs;
144 	int sc_naddrs6;
145 	int sc_advbase;		/* seconds */
146 	int sc_init_counter;
147 	u_int64_t sc_counter;
148 
149 	/* authentication */
150 #define CARP_HMAC_PAD	64
151 	unsigned char sc_key[CARP_KEY_LEN];
152 	unsigned char sc_pad[CARP_HMAC_PAD];
153 	SHA1_CTX sc_sha1;
154 	u_int32_t sc_hashkey[2];
155 
156 	struct callout sc_ad_tmo;	/* advertisement timeout */
157 	struct callout sc_md_tmo;	/* master down timeout */
158 	struct callout sc_md6_tmo;	/* master down timeout */
159 
160 	LIST_HEAD(__carp_mchead, carp_mc_entry)	carp_mc_listhead;
161 };
162 
163 int carp_suppress_preempt = 0;
164 static int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 0, 0 };	/* XXX for now */
165 
166 static percpu_t *carpstat_percpu;
167 
168 #define	CARP_STATINC(x)		_NET_STATINC(carpstat_percpu, x)
169 
170 #ifdef MBUFTRACE
171 static struct mowner carp_proto_mowner_rx = MOWNER_INIT("carp", "rx");
172 static struct mowner carp_proto_mowner_tx = MOWNER_INIT("carp", "tx");
173 static struct mowner carp_proto6_mowner_rx = MOWNER_INIT("carp6", "rx");
174 static struct mowner carp_proto6_mowner_tx = MOWNER_INIT("carp6", "tx");
175 #endif
176 
177 struct carp_if {
178 	TAILQ_HEAD(, carp_softc) vhif_vrs;
179 	int vhif_nvrs;
180 
181 	struct ifnet *vhif_ifp;
182 };
183 
184 #define	CARP_LOG(sc, s)							\
185 	if (carp_opts[CARPCTL_LOG]) {					\
186 		if (sc)							\
187 			log(LOG_INFO, "%s: ",				\
188 			    (sc)->sc_if.if_xname);			\
189 		else							\
190 			log(LOG_INFO, "carp: ");			\
191 		addlog s;						\
192 		addlog("\n");						\
193 	}
194 
195 static void	carp_hmac_prepare(struct carp_softc *);
196 static void	carp_hmac_generate(struct carp_softc *, u_int32_t *,
197 		    unsigned char *);
198 static int	carp_hmac_verify(struct carp_softc *, u_int32_t *,
199 		    unsigned char *);
200 static void	carp_setroute(struct carp_softc *, int);
201 static void	carp_proto_input_c(struct mbuf *, struct carp_header *,
202 		    sa_family_t);
203 static void	carpdetach(struct carp_softc *);
204 static int	carp_prepare_ad(struct mbuf *, struct carp_softc *,
205 		    struct carp_header *);
206 static void	carp_send_ad_all(void);
207 static void	carp_send_ad(void *);
208 static void	carp_send_arp(struct carp_softc *);
209 static void	carp_master_down(void *);
210 static int	carp_ioctl(struct ifnet *, u_long, void *);
211 static void	carp_start(struct ifnet *);
212 static void	carp_setrun(struct carp_softc *, sa_family_t);
213 static void	carp_set_state(struct carp_softc *, int);
214 static int	carp_addrcount(struct carp_if *, struct in_ifaddr *, int);
215 enum	{ CARP_COUNT_MASTER, CARP_COUNT_RUNNING };
216 
217 static void	carp_multicast_cleanup(struct carp_softc *);
218 static int	carp_set_ifp(struct carp_softc *, struct ifnet *);
219 static void	carp_set_enaddr(struct carp_softc *);
220 #if 0
221 static void	carp_addr_updated(void *);
222 #endif
223 static u_int32_t	carp_hash(struct carp_softc *, u_char *);
224 static int	carp_set_addr(struct carp_softc *, struct sockaddr_in *);
225 static int	carp_join_multicast(struct carp_softc *);
226 #ifdef INET6
227 static void	carp_send_na(struct carp_softc *);
228 static int	carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *);
229 static int	carp_join_multicast6(struct carp_softc *);
230 #endif
231 static int	carp_clone_create(struct if_clone *, int);
232 static int	carp_clone_destroy(struct ifnet *);
233 static int	carp_ether_addmulti(struct carp_softc *, struct ifreq *);
234 static int	carp_ether_delmulti(struct carp_softc *, struct ifreq *);
235 static void	carp_ether_purgemulti(struct carp_softc *);
236 
237 static void	sysctl_net_inet_carp_setup(struct sysctllog **);
238 
239 /* workqueue-based pr_input */
240 static struct wqinput *carp_wqinput;
241 static void _carp_proto_input(struct mbuf *, int, int);
242 #ifdef INET6
243 static struct wqinput *carp6_wqinput;
244 static void _carp6_proto_input(struct mbuf *, int, int);
245 #endif
246 
247 struct if_clone carp_cloner =
248     IF_CLONE_INITIALIZER("carp", carp_clone_create, carp_clone_destroy);
249 
250 static __inline u_int16_t
251 carp_cksum(struct mbuf *m, int len)
252 {
253 	return (in_cksum(m, len));
254 }
255 
256 static __inline u_int16_t
257 carp6_cksum(struct mbuf *m, uint32_t off, uint32_t len)
258 {
259 	return (in6_cksum(m, IPPROTO_CARP, off, len));
260 }
261 
262 static void
263 carp_hmac_prepare(struct carp_softc *sc)
264 {
265 	u_int8_t carp_version = CARP_VERSION, type = CARP_ADVERTISEMENT;
266 	u_int8_t vhid = sc->sc_vhid & 0xff;
267 	SHA1_CTX sha1ctx;
268 	u_int32_t kmd[5];
269 	struct ifaddr *ifa;
270 	int i, found;
271 	struct in_addr last, cur, in;
272 #ifdef INET6
273 	struct in6_addr last6, cur6, in6;
274 #endif /* INET6 */
275 
276 	/* compute ipad from key */
277 	memset(sc->sc_pad, 0, sizeof(sc->sc_pad));
278 	memcpy(sc->sc_pad, sc->sc_key, sizeof(sc->sc_key));
279 	for (i = 0; i < sizeof(sc->sc_pad); i++)
280 		sc->sc_pad[i] ^= 0x36;
281 
282 	/* precompute first part of inner hash */
283 	SHA1Init(&sc->sc_sha1);
284 	SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad));
285 	SHA1Update(&sc->sc_sha1, (void *)&carp_version, sizeof(carp_version));
286 	SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type));
287 
288 	/* generate a key for the arpbalance hash, before the vhid is hashed */
289 	memcpy(&sha1ctx, &sc->sc_sha1, sizeof(sha1ctx));
290 	SHA1Final((unsigned char *)kmd, &sha1ctx);
291 	sc->sc_hashkey[0] = kmd[0] ^ kmd[1];
292 	sc->sc_hashkey[1] = kmd[2] ^ kmd[3];
293 
294 	/* the rest of the precomputation */
295 	SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid));
296 
297 	/* Hash the addresses from smallest to largest, not interface order */
298 #ifdef INET
299 	cur.s_addr = 0;
300 	do {
301 		int s;
302 		found = 0;
303 		last = cur;
304 		cur.s_addr = 0xffffffff;
305 		s = pserialize_read_enter();
306 		IFADDR_READER_FOREACH(ifa, &sc->sc_if) {
307 			in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
308 			if (ifa->ifa_addr->sa_family == AF_INET &&
309 			    ntohl(in.s_addr) > ntohl(last.s_addr) &&
310 			    ntohl(in.s_addr) < ntohl(cur.s_addr)) {
311 				cur.s_addr = in.s_addr;
312 				found++;
313 			}
314 		}
315 		pserialize_read_exit(s);
316 		if (found)
317 			SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur));
318 	} while (found);
319 #endif /* INET */
320 
321 #ifdef INET6
322 	memset(&cur6, 0x00, sizeof(cur6));
323 	do {
324 		int s;
325 		found = 0;
326 		last6 = cur6;
327 		memset(&cur6, 0xff, sizeof(cur6));
328 		s = pserialize_read_enter();
329 		IFADDR_READER_FOREACH(ifa, &sc->sc_if) {
330 			in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
331 			if (IN6_IS_ADDR_LINKLOCAL(&in6))
332 				in6.s6_addr16[1] = 0;
333 			if (ifa->ifa_addr->sa_family == AF_INET6 &&
334 			    memcmp(&in6, &last6, sizeof(in6)) > 0 &&
335 			    memcmp(&in6, &cur6, sizeof(in6)) < 0) {
336 				cur6 = in6;
337 				found++;
338 			}
339 		}
340 		pserialize_read_exit(s);
341 		if (found)
342 			SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6));
343 	} while (found);
344 #endif /* INET6 */
345 
346 	/* convert ipad to opad */
347 	for (i = 0; i < sizeof(sc->sc_pad); i++)
348 		sc->sc_pad[i] ^= 0x36 ^ 0x5c;
349 }
350 
351 static void
352 carp_hmac_generate(struct carp_softc *sc, u_int32_t counter[2],
353     unsigned char md[20])
354 {
355 	SHA1_CTX sha1ctx;
356 
357 	/* fetch first half of inner hash */
358 	memcpy(&sha1ctx, &sc->sc_sha1, sizeof(sha1ctx));
359 
360 	SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter));
361 	SHA1Final(md, &sha1ctx);
362 
363 	/* outer hash */
364 	SHA1Init(&sha1ctx);
365 	SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad));
366 	SHA1Update(&sha1ctx, md, 20);
367 	SHA1Final(md, &sha1ctx);
368 }
369 
370 static int
371 carp_hmac_verify(struct carp_softc *sc, u_int32_t counter[2],
372     unsigned char md[20])
373 {
374 	unsigned char md2[20];
375 
376 	carp_hmac_generate(sc, counter, md2);
377 
378 	return (memcmp(md, md2, sizeof(md2)));
379 }
380 
381 static void
382 carp_setroute(struct carp_softc *sc, int cmd)
383 {
384 	struct ifaddr *ifa;
385 	int s, bound;
386 
387 	KERNEL_LOCK(1, NULL);
388 	bound = curlwp_bind();
389 	s = pserialize_read_enter();
390 	IFADDR_READER_FOREACH(ifa, &sc->sc_if) {
391 		struct psref psref;
392 		ifa_acquire(ifa, &psref);
393 		pserialize_read_exit(s);
394 
395 		switch (ifa->ifa_addr->sa_family) {
396 		case AF_INET: {
397 			int count = 0;
398 			struct rtentry *rt;
399 			int hr_otherif, nr_ourif;
400 
401 			/*
402 			 * Avoid screwing with the routes if there are other
403 			 * carp interfaces which are master and have the same
404 			 * address.
405 			 */
406 			if (sc->sc_carpdev != NULL &&
407 			    sc->sc_carpdev->if_carp != NULL) {
408 				count = carp_addrcount(
409 				    (struct carp_if *)sc->sc_carpdev->if_carp,
410 				    ifatoia(ifa), CARP_COUNT_MASTER);
411 				if ((cmd == RTM_ADD && count != 1) ||
412 				    (cmd == RTM_DELETE && count != 0))
413 					continue;
414 			}
415 
416 			/* Remove the existing host route, if any */
417 			rtrequest(RTM_DELETE, ifa->ifa_addr,
418 			    ifa->ifa_addr, ifa->ifa_netmask,
419 			    RTF_HOST, NULL);
420 
421 			rt = NULL;
422 			(void)rtrequest(RTM_GET, ifa->ifa_addr, ifa->ifa_addr,
423 			    ifa->ifa_netmask, RTF_HOST, &rt);
424 			hr_otherif = (rt && rt->rt_ifp != &sc->sc_if &&
425 			    (rt->rt_flags & RTF_CONNECTED));
426 			if (rt != NULL) {
427 				rt_unref(rt);
428 				rt = NULL;
429 			}
430 
431 			/* Check for a network route on our interface */
432 
433 			rt = NULL;
434 			(void)rtrequest(RTM_GET, ifa->ifa_addr, ifa->ifa_addr,
435 			    ifa->ifa_netmask, 0, &rt);
436 			nr_ourif = (rt && rt->rt_ifp == &sc->sc_if);
437 
438 			switch (cmd) {
439 			case RTM_ADD:
440 				if (hr_otherif) {
441 					ifa->ifa_rtrequest = NULL;
442 					ifa->ifa_flags &= ~RTF_CONNECTED;
443 
444 					rtrequest(RTM_ADD, ifa->ifa_addr,
445 					    ifa->ifa_addr, ifa->ifa_netmask,
446 					    RTF_UP | RTF_HOST, NULL);
447 				}
448 				if (!hr_otherif || nr_ourif || !rt) {
449 					if (nr_ourif &&
450 					    (rt->rt_flags & RTF_CONNECTED) == 0)
451 						rtrequest(RTM_DELETE,
452 						    ifa->ifa_addr,
453 						    ifa->ifa_addr,
454 						    ifa->ifa_netmask, 0, NULL);
455 
456 					ifa->ifa_rtrequest = arp_rtrequest;
457 					ifa->ifa_flags |= RTF_CONNECTED;
458 
459 					if (rtrequest(RTM_ADD, ifa->ifa_addr,
460 					    ifa->ifa_addr, ifa->ifa_netmask, 0,
461 					    NULL) == 0)
462 						ifa->ifa_flags |= IFA_ROUTE;
463 				}
464 				break;
465 			case RTM_DELETE:
466 				break;
467 			default:
468 				break;
469 			}
470 			if (rt != NULL) {
471 				rt_unref(rt);
472 				rt = NULL;
473 			}
474 			break;
475 		}
476 
477 #ifdef INET6
478 		case AF_INET6:
479 			if (cmd == RTM_ADD)
480 				in6_ifaddlocal(ifa);
481 			else
482 				in6_ifremlocal(ifa);
483 			break;
484 #endif /* INET6 */
485 		default:
486 			break;
487 		}
488 		s = pserialize_read_enter();
489 		ifa_release(ifa, &psref);
490 	}
491 	pserialize_read_exit(s);
492 	curlwp_bindx(bound);
493 	KERNEL_UNLOCK_ONE(NULL);
494 }
495 
496 /*
497  * process input packet.
498  * we have rearranged checks order compared to the rfc,
499  * but it seems more efficient this way or not possible otherwise.
500  */
501 static void
502 _carp_proto_input(struct mbuf *m, int hlen, int proto)
503 {
504 	struct ip *ip = mtod(m, struct ip *);
505 	struct carp_softc *sc = NULL;
506 	struct carp_header *ch;
507 	int iplen, len;
508 	struct ifnet *rcvif;
509 
510 	CARP_STATINC(CARP_STAT_IPACKETS);
511 	MCLAIM(m, &carp_proto_mowner_rx);
512 
513 	if (!carp_opts[CARPCTL_ALLOW]) {
514 		m_freem(m);
515 		return;
516 	}
517 
518 	rcvif = m_get_rcvif_NOMPSAFE(m);
519 	/* check if received on a valid carp interface */
520 	if (rcvif->if_type != IFT_CARP) {
521 		CARP_STATINC(CARP_STAT_BADIF);
522 		CARP_LOG(sc, ("packet received on non-carp interface: %s",
523 		    rcvif->if_xname));
524 		m_freem(m);
525 		return;
526 	}
527 
528 	/* verify that the IP TTL is 255.  */
529 	if (ip->ip_ttl != CARP_DFLTTL) {
530 		CARP_STATINC(CARP_STAT_BADTTL);
531 		CARP_LOG(sc, ("received ttl %d != %d on %s", ip->ip_ttl,
532 		    CARP_DFLTTL, rcvif->if_xname));
533 		m_freem(m);
534 		return;
535 	}
536 
537 	/*
538 	 * verify that the received packet length is
539 	 * equal to the CARP header
540 	 */
541 	iplen = ip->ip_hl << 2;
542 	len = iplen + sizeof(*ch);
543 	if (len > m->m_pkthdr.len) {
544 		CARP_STATINC(CARP_STAT_BADLEN);
545 		CARP_LOG(sc, ("packet too short %d on %s", m->m_pkthdr.len,
546 		    rcvif->if_xname));
547 		m_freem(m);
548 		return;
549 	}
550 
551 	if ((m = m_pullup(m, len)) == NULL) {
552 		CARP_STATINC(CARP_STAT_HDROPS);
553 		return;
554 	}
555 	ip = mtod(m, struct ip *);
556 	ch = (struct carp_header *)((char *)ip + iplen);
557 	/* verify the CARP checksum */
558 	m->m_data += iplen;
559 	if (carp_cksum(m, len - iplen)) {
560 		CARP_STATINC(CARP_STAT_BADSUM);
561 		CARP_LOG(sc, ("checksum failed on %s",
562 		    rcvif->if_xname));
563 		m_freem(m);
564 		return;
565 	}
566 	m->m_data -= iplen;
567 
568 	carp_proto_input_c(m, ch, AF_INET);
569 }
570 
571 void
572 carp_proto_input(struct mbuf *m, ...)
573 {
574 
575 	wqinput_input(carp_wqinput, m, 0, 0);
576 }
577 
578 #ifdef INET6
579 static void
580 _carp6_proto_input(struct mbuf *m, int off, int proto)
581 {
582 	struct carp_softc *sc = NULL;
583 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
584 	struct carp_header *ch;
585 	u_int len;
586 	struct ifnet *rcvif;
587 
588 	CARP_STATINC(CARP_STAT_IPACKETS6);
589 	MCLAIM(m, &carp_proto6_mowner_rx);
590 
591 	if (!carp_opts[CARPCTL_ALLOW]) {
592 		m_freem(m);
593 		return;
594 	}
595 
596 	rcvif = m_get_rcvif_NOMPSAFE(m);
597 
598 	/* check if received on a valid carp interface */
599 	if (rcvif->if_type != IFT_CARP) {
600 		CARP_STATINC(CARP_STAT_BADIF);
601 		CARP_LOG(sc, ("packet received on non-carp interface: %s",
602 		    rcvif->if_xname));
603 		m_freem(m);
604 		return;
605 	}
606 
607 	/* verify that the IP TTL is 255 */
608 	if (ip6->ip6_hlim != CARP_DFLTTL) {
609 		CARP_STATINC(CARP_STAT_BADTTL);
610 		CARP_LOG(sc, ("received ttl %d != %d on %s", ip6->ip6_hlim,
611 		    CARP_DFLTTL, rcvif->if_xname));
612 		m_freem(m);
613 		return;
614 	}
615 
616 	/* verify that we have a complete carp packet */
617 	len = m->m_len;
618 	IP6_EXTHDR_GET(ch, struct carp_header *, m, off, sizeof(*ch));
619 	if (ch == NULL) {
620 		CARP_STATINC(CARP_STAT_BADLEN);
621 		CARP_LOG(sc, ("packet size %u too small", len));
622 		return;
623 	}
624 
625 	/* verify the CARP checksum */
626 	if (carp6_cksum(m, off, sizeof(*ch))) {
627 		CARP_STATINC(CARP_STAT_BADSUM);
628 		CARP_LOG(sc, ("checksum failed, on %s", rcvif->if_xname));
629 		m_freem(m);
630 		return;
631 	}
632 
633 	carp_proto_input_c(m, ch, AF_INET6);
634 	return;
635 }
636 
637 int
638 carp6_proto_input(struct mbuf **mp, int *offp, int proto)
639 {
640 
641 	wqinput_input(carp6_wqinput, *mp, *offp, proto);
642 
643 	return IPPROTO_DONE;
644 }
645 #endif /* INET6 */
646 
647 static void
648 carp_proto_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af)
649 {
650 	struct carp_softc *sc;
651 	u_int64_t tmp_counter;
652 	struct timeval sc_tv, ch_tv;
653 
654 	TAILQ_FOREACH(sc, &((struct carp_if *)
655 	    m_get_rcvif_NOMPSAFE(m)->if_carpdev->if_carp)->vhif_vrs, sc_list)
656 		if (sc->sc_vhid == ch->carp_vhid)
657 			break;
658 
659 	if (!sc || (sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) !=
660 	    (IFF_UP|IFF_RUNNING)) {
661 		CARP_STATINC(CARP_STAT_BADVHID);
662 		m_freem(m);
663 		return;
664 	}
665 
666 	/*
667 	 * Check if our own advertisement was duplicated
668 	 * from a non simplex interface.
669 	 * XXX If there is no address on our physical interface
670 	 * there is no way to distinguish our ads from the ones
671 	 * another carp host might have sent us.
672 	 */
673 	if ((sc->sc_carpdev->if_flags & IFF_SIMPLEX) == 0) {
674 		struct sockaddr sa;
675 		struct ifaddr *ifa;
676 		int s;
677 
678 		memset(&sa, 0, sizeof(sa));
679 		sa.sa_family = af;
680 		s = pserialize_read_enter();
681 		ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
682 
683 		if (ifa && af == AF_INET) {
684 			struct ip *ip = mtod(m, struct ip *);
685 			if (ip->ip_src.s_addr ==
686 					ifatoia(ifa)->ia_addr.sin_addr.s_addr) {
687 				pserialize_read_exit(s);
688 				m_freem(m);
689 				return;
690 			}
691 		}
692 #ifdef INET6
693 		if (ifa && af == AF_INET6) {
694 			struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
695 			struct in6_addr in6_src, in6_found;
696 
697 			in6_src = ip6->ip6_src;
698 			in6_found = ifatoia6(ifa)->ia_addr.sin6_addr;
699 			if (IN6_IS_ADDR_LINKLOCAL(&in6_src))
700 				in6_src.s6_addr16[1] = 0;
701 			if (IN6_IS_ADDR_LINKLOCAL(&in6_found))
702 				in6_found.s6_addr16[1] = 0;
703 			if (IN6_ARE_ADDR_EQUAL(&in6_src, &in6_found)) {
704 				pserialize_read_exit(s);
705 				m_freem(m);
706 				return;
707 			}
708 		}
709 #endif /* INET6 */
710 		pserialize_read_exit(s);
711 	}
712 
713 	nanotime(&sc->sc_if.if_lastchange);
714 	sc->sc_if.if_ipackets++;
715 	sc->sc_if.if_ibytes += m->m_pkthdr.len;
716 
717 	/* verify the CARP version. */
718 	if (ch->carp_version != CARP_VERSION) {
719 		CARP_STATINC(CARP_STAT_BADVER);
720 		sc->sc_if.if_ierrors++;
721 		CARP_LOG(sc, ("invalid version %d != %d",
722 		    ch->carp_version, CARP_VERSION));
723 		m_freem(m);
724 		return;
725 	}
726 
727 	/* verify the hash */
728 	if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) {
729 		struct ip *ip;
730 		char ipbuf[INET_ADDRSTRLEN];
731 #ifdef INET6
732 		struct ip6_hdr *ip6;
733 		char ip6buf[INET6_ADDRSTRLEN];
734 #endif
735 
736 		CARP_STATINC(CARP_STAT_BADAUTH);
737 		sc->sc_if.if_ierrors++;
738 
739 		switch(af) {
740 		case AF_INET:
741 			ip = mtod(m, struct ip *);
742 			CARP_LOG(sc, ("incorrect hash from %s",
743 			    IN_PRINT(ipbuf, &ip->ip_src)));
744 			break;
745 
746 #ifdef INET6
747 		case AF_INET6:
748 			ip6 = mtod(m, struct ip6_hdr *);
749 			CARP_LOG(sc, ("incorrect hash from %s",
750 			    IN6_PRINT(ip6buf, &ip6->ip6_src)));
751 			break;
752 #endif
753 
754 		default: CARP_LOG(sc, ("incorrect hash"));
755 			break;
756 		}
757 		m_freem(m);
758 		return;
759 	}
760 
761 	tmp_counter = ntohl(ch->carp_counter[0]);
762 	tmp_counter = tmp_counter<<32;
763 	tmp_counter += ntohl(ch->carp_counter[1]);
764 
765 	/* XXX Replay protection goes here */
766 
767 	sc->sc_init_counter = 0;
768 	sc->sc_counter = tmp_counter;
769 
770 
771 	sc_tv.tv_sec = sc->sc_advbase;
772 	if (carp_suppress_preempt && sc->sc_advskew <  240)
773 		sc_tv.tv_usec = 240 * 1000000 / 256;
774 	else
775 		sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256;
776 	ch_tv.tv_sec = ch->carp_advbase;
777 	ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
778 
779 	switch (sc->sc_state) {
780 	case INIT:
781 		break;
782 	case MASTER:
783 		/*
784 		 * If we receive an advertisement from a backup who's going to
785 		 * be more frequent than us, go into BACKUP state.
786 		 */
787 		if (timercmp(&sc_tv, &ch_tv, >) ||
788 		    timercmp(&sc_tv, &ch_tv, ==)) {
789 			callout_stop(&sc->sc_ad_tmo);
790 			CARP_LOG(sc, ("MASTER -> BACKUP (more frequent advertisement received)"));
791 			carp_set_state(sc, BACKUP);
792 			carp_setrun(sc, 0);
793 			carp_setroute(sc, RTM_DELETE);
794 		}
795 		break;
796 	case BACKUP:
797 		/*
798 		 * If we're pre-empting masters who advertise slower than us,
799 		 * and this one claims to be slower, treat him as down.
800 		 */
801 		if (carp_opts[CARPCTL_PREEMPT] && timercmp(&sc_tv, &ch_tv, <)) {
802 			CARP_LOG(sc, ("BACKUP -> MASTER (preempting a slower master)"));
803 			carp_master_down(sc);
804 			break;
805 		}
806 
807 		/*
808 		 *  If the master is going to advertise at such a low frequency
809 		 *  that he's guaranteed to time out, we'd might as well just
810 		 *  treat him as timed out now.
811 		 */
812 		sc_tv.tv_sec = sc->sc_advbase * 3;
813 		if (timercmp(&sc_tv, &ch_tv, <)) {
814 			CARP_LOG(sc, ("BACKUP -> MASTER (master timed out)"));
815 			carp_master_down(sc);
816 			break;
817 		}
818 
819 		/*
820 		 * Otherwise, we reset the counter and wait for the next
821 		 * advertisement.
822 		 */
823 		carp_setrun(sc, af);
824 		break;
825 	}
826 
827 	m_freem(m);
828 	return;
829 }
830 
831 /*
832  * Interface side of the CARP implementation.
833  */
834 
835 /* ARGSUSED */
836 void
837 carpattach(int n)
838 {
839 	if_clone_attach(&carp_cloner);
840 
841 	carpstat_percpu = percpu_alloc(sizeof(uint64_t) * CARP_NSTATS);
842 }
843 
844 static int
845 carp_clone_create(struct if_clone *ifc, int unit)
846 {
847 	extern int ifqmaxlen;
848 	struct carp_softc *sc;
849 	struct ifnet *ifp;
850 	int rv;
851 
852 	sc = malloc(sizeof(*sc), M_DEVBUF, M_NOWAIT|M_ZERO);
853 	if (!sc)
854 		return (ENOMEM);
855 
856 	sc->sc_suppress = 0;
857 	sc->sc_advbase = CARP_DFLTINTV;
858 	sc->sc_vhid = -1;	/* required setting */
859 	sc->sc_advskew = 0;
860 	sc->sc_init_counter = 1;
861 	sc->sc_naddrs = sc->sc_naddrs6 = 0;
862 #ifdef INET6
863 	sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL;
864 #endif /* INET6 */
865 
866 	callout_init(&sc->sc_ad_tmo, 0);
867 	callout_init(&sc->sc_md_tmo, 0);
868 	callout_init(&sc->sc_md6_tmo, 0);
869 
870 	callout_setfunc(&sc->sc_ad_tmo, carp_send_ad, sc);
871 	callout_setfunc(&sc->sc_md_tmo, carp_master_down, sc);
872 	callout_setfunc(&sc->sc_md6_tmo, carp_master_down, sc);
873 
874 	LIST_INIT(&sc->carp_mc_listhead);
875 	ifp = &sc->sc_if;
876 	ifp->if_softc = sc;
877 	snprintf(ifp->if_xname, sizeof ifp->if_xname, "%s%d", ifc->ifc_name,
878 	    unit);
879 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
880 	ifp->if_ioctl = carp_ioctl;
881 	ifp->if_start = carp_start;
882 	IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
883 	IFQ_SET_READY(&ifp->if_snd);
884 	rv = if_initialize(ifp);
885 	if (rv != 0) {
886 		callout_destroy(&sc->sc_ad_tmo);
887 		callout_destroy(&sc->sc_md_tmo);
888 		callout_destroy(&sc->sc_md6_tmo);
889 		free(ifp->if_softc, M_DEVBUF);
890 
891 		return rv;
892 	}
893 	ether_ifattach(ifp, NULL);
894 	carp_set_enaddr(sc);
895 	/* Overwrite ethernet defaults */
896 	ifp->if_type = IFT_CARP;
897 	ifp->if_output = carp_output;
898 	if_register(ifp);
899 
900 	return (0);
901 }
902 
903 static int
904 carp_clone_destroy(struct ifnet *ifp)
905 {
906 	struct carp_softc *sc = ifp->if_softc;
907 
908 	carpdetach(ifp->if_softc);
909 	ether_ifdetach(ifp);
910 	if_detach(ifp);
911 	callout_destroy(&sc->sc_ad_tmo);
912 	callout_destroy(&sc->sc_md_tmo);
913 	callout_destroy(&sc->sc_md6_tmo);
914 	free(ifp->if_softc, M_DEVBUF);
915 
916 	return (0);
917 }
918 
919 static void
920 carpdetach(struct carp_softc *sc)
921 {
922 	struct carp_if *cif;
923 	int s;
924 
925 	callout_stop(&sc->sc_ad_tmo);
926 	callout_stop(&sc->sc_md_tmo);
927 	callout_stop(&sc->sc_md6_tmo);
928 
929 	if (sc->sc_suppress)
930 		carp_suppress_preempt--;
931 	sc->sc_suppress = 0;
932 
933 	if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS)
934 		carp_suppress_preempt--;
935 	sc->sc_sendad_errors = 0;
936 
937 	carp_set_state(sc, INIT);
938 	sc->sc_if.if_flags &= ~IFF_UP;
939 	carp_setrun(sc, 0);
940 	carp_multicast_cleanup(sc);
941 
942 	KERNEL_LOCK(1, NULL);
943 	s = splnet();
944 	if (sc->sc_carpdev != NULL) {
945 		/* XXX linkstatehook removal */
946 		cif = (struct carp_if *)sc->sc_carpdev->if_carp;
947 		TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
948 		if (!--cif->vhif_nvrs) {
949 			ifpromisc(sc->sc_carpdev, 0);
950 			sc->sc_carpdev->if_carp = NULL;
951 			free(cif, M_IFADDR);
952 		}
953 	}
954 	sc->sc_carpdev = NULL;
955 	splx(s);
956 	KERNEL_UNLOCK_ONE(NULL);
957 }
958 
959 /* Detach an interface from the carp. */
960 void
961 carp_ifdetach(struct ifnet *ifp)
962 {
963 	struct carp_softc *sc, *nextsc;
964 	struct carp_if *cif = (struct carp_if *)ifp->if_carp;
965 
966 	for (sc = TAILQ_FIRST(&cif->vhif_vrs); sc; sc = nextsc) {
967 		nextsc = TAILQ_NEXT(sc, sc_list);
968 		carpdetach(sc);
969 	}
970 }
971 
972 static int
973 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc,
974     struct carp_header *ch)
975 {
976 	if (sc->sc_init_counter) {
977 		/* this could also be seconds since unix epoch */
978 		sc->sc_counter = cprng_fast64();
979 	} else
980 		sc->sc_counter++;
981 
982 	ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff);
983 	ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff);
984 
985 	carp_hmac_generate(sc, ch->carp_counter, ch->carp_md);
986 
987 	return (0);
988 }
989 
990 static void
991 carp_send_ad_all(void)
992 {
993 	struct ifnet *ifp;
994 	struct carp_if *cif;
995 	struct carp_softc *vh;
996 	int s;
997 	int bound = curlwp_bind();
998 
999 	s = pserialize_read_enter();
1000 	IFNET_READER_FOREACH(ifp) {
1001 		struct psref psref;
1002 		if (ifp->if_carp == NULL || ifp->if_type == IFT_CARP)
1003 			continue;
1004 
1005 		if_acquire(ifp, &psref);
1006 		pserialize_read_exit(s);
1007 
1008 		cif = (struct carp_if *)ifp->if_carp;
1009 		TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1010 			if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
1011 			    (IFF_UP|IFF_RUNNING) && vh->sc_state == MASTER)
1012 				carp_send_ad(vh);
1013 		}
1014 
1015 		s = pserialize_read_enter();
1016 		if_release(ifp, &psref);
1017 	}
1018 	pserialize_read_exit(s);
1019 	curlwp_bindx(bound);
1020 }
1021 
1022 
1023 static void
1024 carp_send_ad(void *v)
1025 {
1026 	struct carp_header ch;
1027 	struct timeval tv;
1028 	struct carp_softc *sc = v;
1029 	struct carp_header *ch_ptr;
1030 	struct mbuf *m;
1031 	int error, len, advbase, advskew, s;
1032 	struct sockaddr sa;
1033 
1034 	KERNEL_LOCK(1, NULL);
1035 	s = splsoftnet();
1036 
1037 	advbase = advskew = 0; /* Sssssh compiler */
1038 	if (sc->sc_carpdev == NULL) {
1039 		sc->sc_if.if_oerrors++;
1040 		goto retry_later;
1041 	}
1042 
1043 	/* bow out if we've gone to backup (the carp interface is going down) */
1044 	if (sc->sc_bow_out) {
1045 		sc->sc_bow_out = 0;
1046 		advbase = 255;
1047 		advskew = 255;
1048 	} else {
1049 		advbase = sc->sc_advbase;
1050 		if (!carp_suppress_preempt || sc->sc_advskew > 240)
1051 			advskew = sc->sc_advskew;
1052 		else
1053 			advskew = 240;
1054 		tv.tv_sec = advbase;
1055 		tv.tv_usec = advskew * 1000000 / 256;
1056 	}
1057 
1058 	ch.carp_version = CARP_VERSION;
1059 	ch.carp_type = CARP_ADVERTISEMENT;
1060 	ch.carp_vhid = sc->sc_vhid;
1061 	ch.carp_advbase = advbase;
1062 	ch.carp_advskew = advskew;
1063 	ch.carp_authlen = 7;	/* XXX DEFINE */
1064 	ch.carp_pad1 = 0;	/* must be zero */
1065 	ch.carp_cksum = 0;
1066 
1067 
1068 #ifdef INET
1069 	if (sc->sc_naddrs) {
1070 		struct ip *ip;
1071 		struct ifaddr *ifa;
1072 		int _s;
1073 
1074 		MGETHDR(m, M_DONTWAIT, MT_HEADER);
1075 		if (m == NULL) {
1076 			sc->sc_if.if_oerrors++;
1077 			CARP_STATINC(CARP_STAT_ONOMEM);
1078 			/* XXX maybe less ? */
1079 			goto retry_later;
1080 		}
1081 		MCLAIM(m, &carp_proto_mowner_tx);
1082 		len = sizeof(*ip) + sizeof(ch);
1083 		m->m_pkthdr.len = len;
1084 		m_reset_rcvif(m);
1085 		m->m_len = len;
1086 		MH_ALIGN(m, m->m_len);
1087 		m->m_flags |= M_MCAST;
1088 		ip = mtod(m, struct ip *);
1089 		ip->ip_v = IPVERSION;
1090 		ip->ip_hl = sizeof(*ip) >> 2;
1091 		ip->ip_tos = IPTOS_LOWDELAY;
1092 		ip->ip_len = htons(len);
1093 		ip->ip_id = 0;	/* no need for id, we don't support fragments */
1094 		ip->ip_off = htons(IP_DF);
1095 		ip->ip_ttl = CARP_DFLTTL;
1096 		ip->ip_p = IPPROTO_CARP;
1097 		ip->ip_sum = 0;
1098 
1099 		memset(&sa, 0, sizeof(sa));
1100 		sa.sa_family = AF_INET;
1101 		_s = pserialize_read_enter();
1102 		ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
1103 		if (ifa == NULL)
1104 			ip->ip_src.s_addr = 0;
1105 		else
1106 			ip->ip_src.s_addr =
1107 			    ifatoia(ifa)->ia_addr.sin_addr.s_addr;
1108 		pserialize_read_exit(_s);
1109 		ip->ip_dst.s_addr = INADDR_CARP_GROUP;
1110 
1111 		ch_ptr = (struct carp_header *)(&ip[1]);
1112 		memcpy(ch_ptr, &ch, sizeof(ch));
1113 		if (carp_prepare_ad(m, sc, ch_ptr))
1114 			goto retry_later;
1115 
1116 		m->m_data += sizeof(*ip);
1117 		ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip));
1118 		m->m_data -= sizeof(*ip);
1119 
1120 		nanotime(&sc->sc_if.if_lastchange);
1121 		sc->sc_if.if_opackets++;
1122 		sc->sc_if.if_obytes += len;
1123 		CARP_STATINC(CARP_STAT_OPACKETS);
1124 
1125 		error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo,
1126 		    NULL);
1127 		if (error) {
1128 			if (error == ENOBUFS)
1129 				CARP_STATINC(CARP_STAT_ONOMEM);
1130 			else
1131 				CARP_LOG(sc, ("ip_output failed: %d", error));
1132 			sc->sc_if.if_oerrors++;
1133 			if (sc->sc_sendad_errors < INT_MAX)
1134 				sc->sc_sendad_errors++;
1135 			if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
1136 				carp_suppress_preempt++;
1137 				if (carp_suppress_preempt == 1)
1138 					carp_send_ad_all();
1139 			}
1140 			sc->sc_sendad_success = 0;
1141 		} else {
1142 			if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
1143 				if (++sc->sc_sendad_success >=
1144 				    CARP_SENDAD_MIN_SUCCESS) {
1145 					carp_suppress_preempt--;
1146 					sc->sc_sendad_errors = 0;
1147 				}
1148 			} else
1149 				sc->sc_sendad_errors = 0;
1150 		}
1151 	}
1152 #endif /* INET */
1153 #ifdef INET6
1154 	if (sc->sc_naddrs6) {
1155 		struct ip6_hdr *ip6;
1156 		struct ifaddr *ifa;
1157 		int _s;
1158 
1159 		MGETHDR(m, M_DONTWAIT, MT_HEADER);
1160 		if (m == NULL) {
1161 			sc->sc_if.if_oerrors++;
1162 			CARP_STATINC(CARP_STAT_ONOMEM);
1163 			/* XXX maybe less ? */
1164 			goto retry_later;
1165 		}
1166 		MCLAIM(m, &carp_proto6_mowner_tx);
1167 		len = sizeof(*ip6) + sizeof(ch);
1168 		m->m_pkthdr.len = len;
1169 		m_reset_rcvif(m);
1170 		m->m_len = len;
1171 		MH_ALIGN(m, m->m_len);
1172 		m->m_flags |= M_MCAST;
1173 		ip6 = mtod(m, struct ip6_hdr *);
1174 		memset(ip6, 0, sizeof(*ip6));
1175 		ip6->ip6_vfc |= IPV6_VERSION;
1176 		ip6->ip6_hlim = CARP_DFLTTL;
1177 		ip6->ip6_nxt = IPPROTO_CARP;
1178 
1179 		/* set the source address */
1180 		memset(&sa, 0, sizeof(sa));
1181 		sa.sa_family = AF_INET6;
1182 		_s = pserialize_read_enter();
1183 		ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
1184 		if (ifa == NULL)	/* This should never happen with IPv6 */
1185 			memset(&ip6->ip6_src, 0, sizeof(struct in6_addr));
1186 		else
1187 			bcopy(ifatoia6(ifa)->ia_addr.sin6_addr.s6_addr,
1188 			    &ip6->ip6_src, sizeof(struct in6_addr));
1189 		pserialize_read_exit(_s);
1190 		/* set the multicast destination */
1191 
1192 		ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
1193 		ip6->ip6_dst.s6_addr8[15] = 0x12;
1194 		if (in6_setscope(&ip6->ip6_dst, &sc->sc_if, NULL) != 0) {
1195 			sc->sc_if.if_oerrors++;
1196 			m_freem(m);
1197 			CARP_LOG(sc, ("in6_setscope failed"));
1198 			goto retry_later;
1199 		}
1200 
1201 		ch_ptr = (struct carp_header *)(&ip6[1]);
1202 		memcpy(ch_ptr, &ch, sizeof(ch));
1203 		if (carp_prepare_ad(m, sc, ch_ptr))
1204 			goto retry_later;
1205 
1206 		ch_ptr->carp_cksum = carp6_cksum(m, sizeof(*ip6),
1207 		    len - sizeof(*ip6));
1208 
1209 		nanotime(&sc->sc_if.if_lastchange);
1210 		sc->sc_if.if_opackets++;
1211 		sc->sc_if.if_obytes += len;
1212 		CARP_STATINC(CARP_STAT_OPACKETS6);
1213 
1214 		error = ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL);
1215 		if (error) {
1216 			if (error == ENOBUFS)
1217 				CARP_STATINC(CARP_STAT_ONOMEM);
1218 			else
1219 				CARP_LOG(sc, ("ip6_output failed: %d", error));
1220 			sc->sc_if.if_oerrors++;
1221 			if (sc->sc_sendad_errors < INT_MAX)
1222 				sc->sc_sendad_errors++;
1223 			if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
1224 				carp_suppress_preempt++;
1225 				if (carp_suppress_preempt == 1)
1226 					carp_send_ad_all();
1227 			}
1228 			sc->sc_sendad_success = 0;
1229 		} else {
1230 			if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
1231 				if (++sc->sc_sendad_success >=
1232 				    CARP_SENDAD_MIN_SUCCESS) {
1233 					carp_suppress_preempt--;
1234 					sc->sc_sendad_errors = 0;
1235 				}
1236 			} else
1237 				sc->sc_sendad_errors = 0;
1238 		}
1239 	}
1240 #endif /* INET6 */
1241 
1242 retry_later:
1243 	splx(s);
1244 	KERNEL_UNLOCK_ONE(NULL);
1245 	if (advbase != 255 || advskew != 255)
1246 		callout_schedule(&sc->sc_ad_tmo, tvtohz(&tv));
1247 }
1248 
1249 /*
1250  * Broadcast a gratuitous ARP request containing
1251  * the virtual router MAC address for each IP address
1252  * associated with the virtual router.
1253  */
1254 static void
1255 carp_send_arp(struct carp_softc *sc)
1256 {
1257 	struct ifaddr *ifa;
1258 	int s, bound;
1259 
1260 	KERNEL_LOCK(1, NULL);
1261 	bound = curlwp_bind();
1262 	s = pserialize_read_enter();
1263 	IFADDR_READER_FOREACH(ifa, &sc->sc_if) {
1264 		struct psref psref;
1265 
1266 		if (ifa->ifa_addr->sa_family != AF_INET)
1267 			continue;
1268 
1269 		ifa_acquire(ifa, &psref);
1270 		pserialize_read_exit(s);
1271 
1272 		arpannounce(sc->sc_carpdev, ifa, CLLADDR(sc->sc_if.if_sadl));
1273 
1274 		s = pserialize_read_enter();
1275 		ifa_release(ifa, &psref);
1276 	}
1277 	pserialize_read_exit(s);
1278 	curlwp_bindx(bound);
1279 	KERNEL_UNLOCK_ONE(NULL);
1280 }
1281 
1282 #ifdef INET6
1283 static void
1284 carp_send_na(struct carp_softc *sc)
1285 {
1286 	struct ifaddr *ifa;
1287 	struct in6_addr *in6;
1288 	static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
1289 	int s, bound;
1290 
1291 	KERNEL_LOCK(1, NULL);
1292 	bound = curlwp_bind();
1293 	s = pserialize_read_enter();
1294 	IFADDR_READER_FOREACH(ifa, &sc->sc_if) {
1295 		struct psref psref;
1296 
1297 		if (ifa->ifa_addr->sa_family != AF_INET6)
1298 			continue;
1299 
1300 		ifa_acquire(ifa, &psref);
1301 		pserialize_read_exit(s);
1302 
1303 		in6 = &ifatoia6(ifa)->ia_addr.sin6_addr;
1304 		nd6_na_output(sc->sc_carpdev, &mcast, in6,
1305 		    ND_NA_FLAG_OVERRIDE, 1, NULL);
1306 
1307 		s = pserialize_read_enter();
1308 		ifa_release(ifa, &psref);
1309 	}
1310 	pserialize_read_exit(s);
1311 	curlwp_bindx(bound);
1312 	KERNEL_UNLOCK_ONE(NULL);
1313 }
1314 #endif /* INET6 */
1315 
1316 /*
1317  * Based on bridge_hash() in if_bridge.c
1318  */
1319 #define	mix(a,b,c) \
1320 	do {						\
1321 		a -= b; a -= c; a ^= (c >> 13);		\
1322 		b -= c; b -= a; b ^= (a << 8);		\
1323 		c -= a; c -= b; c ^= (b >> 13);		\
1324 		a -= b; a -= c; a ^= (c >> 12);		\
1325 		b -= c; b -= a; b ^= (a << 16);		\
1326 		c -= a; c -= b; c ^= (b >> 5);		\
1327 		a -= b; a -= c; a ^= (c >> 3);		\
1328 		b -= c; b -= a; b ^= (a << 10);		\
1329 		c -= a; c -= b; c ^= (b >> 15);		\
1330 	} while (0)
1331 
1332 static u_int32_t
1333 carp_hash(struct carp_softc *sc, u_char *src)
1334 {
1335 	u_int32_t a = 0x9e3779b9, b = sc->sc_hashkey[0], c = sc->sc_hashkey[1];
1336 
1337 	c += sc->sc_key[3] << 24;
1338 	c += sc->sc_key[2] << 16;
1339 	c += sc->sc_key[1] << 8;
1340 	c += sc->sc_key[0];
1341 	b += src[5] << 8;
1342 	b += src[4];
1343 	a += src[3] << 24;
1344 	a += src[2] << 16;
1345 	a += src[1] << 8;
1346 	a += src[0];
1347 
1348 	mix(a, b, c);
1349 	return (c);
1350 }
1351 
1352 static int
1353 carp_addrcount(struct carp_if *cif, struct in_ifaddr *ia, int type)
1354 {
1355 	struct carp_softc *vh;
1356 	struct ifaddr *ifa;
1357 	int count = 0;
1358 
1359 	TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1360 		if ((type == CARP_COUNT_RUNNING &&
1361 		    (vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
1362 		    (IFF_UP|IFF_RUNNING)) ||
1363 		    (type == CARP_COUNT_MASTER && vh->sc_state == MASTER)) {
1364 			int s = pserialize_read_enter();
1365 			IFADDR_READER_FOREACH(ifa, &vh->sc_if) {
1366 				if (ifa->ifa_addr->sa_family == AF_INET &&
1367 				    ia->ia_addr.sin_addr.s_addr ==
1368 				    ifatoia(ifa)->ia_addr.sin_addr.s_addr)
1369 					count++;
1370 			}
1371 			pserialize_read_exit(s);
1372 		}
1373 	}
1374 	return (count);
1375 }
1376 
1377 int
1378 carp_iamatch(struct in_ifaddr *ia, u_char *src,
1379     u_int32_t *count, u_int32_t index)
1380 {
1381 	struct carp_softc *sc = ia->ia_ifp->if_softc;
1382 
1383 	if (carp_opts[CARPCTL_ARPBALANCE]) {
1384 		/*
1385 		 * We use the source ip to decide which virtual host should
1386 		 * handle the request. If we're master of that virtual host,
1387 		 * then we respond, otherwise, just drop the arp packet on
1388 		 * the floor.
1389 		 */
1390 
1391 		/* Count the elegible carp interfaces with this address */
1392 		if (*count == 0)
1393 			*count = carp_addrcount(
1394 			    (struct carp_if *)ia->ia_ifp->if_carpdev->if_carp,
1395 			    ia, CARP_COUNT_RUNNING);
1396 
1397 		/* This should never happen, but... */
1398 		if (*count == 0)
1399 			return (0);
1400 
1401 		if (carp_hash(sc, src) % *count == index - 1 &&
1402 		    sc->sc_state == MASTER) {
1403 			return (1);
1404 		}
1405 	} else {
1406 		if (sc->sc_state == MASTER)
1407 			return (1);
1408 	}
1409 
1410 	return (0);
1411 }
1412 
1413 #ifdef INET6
1414 struct ifaddr *
1415 carp_iamatch6(void *v, struct in6_addr *taddr)
1416 {
1417 	struct carp_if *cif = v;
1418 	struct carp_softc *vh;
1419 	struct ifaddr *ifa;
1420 
1421 	TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1422 		int s = pserialize_read_enter();
1423 		IFADDR_READER_FOREACH(ifa, &vh->sc_if) {
1424 			if (IN6_ARE_ADDR_EQUAL(taddr,
1425 			    &ifatoia6(ifa)->ia_addr.sin6_addr) &&
1426 			    ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
1427 			    (IFF_UP|IFF_RUNNING)) && vh->sc_state == MASTER)
1428 				return (ifa);
1429 		}
1430 		pserialize_read_exit(s);
1431 	}
1432 
1433 	return (NULL);
1434 }
1435 #endif /* INET6 */
1436 
1437 struct ifnet *
1438 carp_ourether(void *v, struct ether_header *eh, u_char iftype, int src)
1439 {
1440 	struct carp_if *cif = (struct carp_if *)v;
1441 	struct carp_softc *vh;
1442 	u_int8_t *ena;
1443 
1444 	if (src)
1445 		ena = (u_int8_t *)&eh->ether_shost;
1446 	else
1447 		ena = (u_int8_t *)&eh->ether_dhost;
1448 
1449 	switch (iftype) {
1450 	case IFT_ETHER:
1451 	case IFT_FDDI:
1452 		if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1)
1453 			return (NULL);
1454 		break;
1455 	case IFT_ISO88025:
1456 		if (ena[0] != 3 || ena[1] || ena[4] || ena[5])
1457 			return (NULL);
1458 		break;
1459 	default:
1460 		return (NULL);
1461 		break;
1462 	}
1463 
1464 	TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list)
1465 		if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
1466 		    (IFF_UP|IFF_RUNNING) && vh->sc_state == MASTER &&
1467 		    !memcmp(ena, CLLADDR(vh->sc_if.if_sadl),
1468 		    ETHER_ADDR_LEN)) {
1469 			return (&vh->sc_if);
1470 		    }
1471 
1472 	return (NULL);
1473 }
1474 
1475 int
1476 carp_input(struct mbuf *m, u_int8_t *shost, u_int8_t *dhost, u_int16_t etype)
1477 {
1478 	struct ether_header eh;
1479 	struct carp_if *cif = (struct carp_if *)m_get_rcvif_NOMPSAFE(m)->if_carp;
1480 	struct ifnet *ifp;
1481 
1482 	memcpy(&eh.ether_shost, shost, sizeof(eh.ether_shost));
1483 	memcpy(&eh.ether_dhost, dhost, sizeof(eh.ether_dhost));
1484 	eh.ether_type = etype;
1485 
1486 	if (m->m_flags & (M_BCAST|M_MCAST)) {
1487 		struct carp_softc *vh;
1488 		struct mbuf *m0;
1489 
1490 		/*
1491 		 * XXX Should really check the list of multicast addresses
1492 		 * for each CARP interface _before_ copying.
1493 		 */
1494 		TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1495 			m0 = m_copym(m, 0, M_COPYALL, M_DONTWAIT);
1496 			if (m0 == NULL)
1497 				continue;
1498 			m_set_rcvif(m0, &vh->sc_if);
1499 			ether_input(&vh->sc_if, m0);
1500 		}
1501 		return (1);
1502 	}
1503 
1504 	ifp = carp_ourether(cif, &eh, m_get_rcvif_NOMPSAFE(m)->if_type, 0);
1505 	if (ifp == NULL) {
1506 		return (1);
1507 	}
1508 
1509 	m_set_rcvif(m, ifp);
1510 
1511 	bpf_mtap(ifp, m);
1512 	ifp->if_ipackets++;
1513 	ether_input(ifp, m);
1514 	return (0);
1515 }
1516 
1517 static void
1518 carp_master_down(void *v)
1519 {
1520 	struct carp_softc *sc = v;
1521 
1522 	switch (sc->sc_state) {
1523 	case INIT:
1524 		printf("%s: master_down event in INIT state\n",
1525 		    sc->sc_if.if_xname);
1526 		break;
1527 	case MASTER:
1528 		break;
1529 	case BACKUP:
1530 		CARP_LOG(sc, ("INIT -> MASTER (preempting)"));
1531 		carp_set_state(sc, MASTER);
1532 		carp_send_ad(sc);
1533 		carp_send_arp(sc);
1534 #ifdef INET6
1535 		carp_send_na(sc);
1536 #endif /* INET6 */
1537 		carp_setrun(sc, 0);
1538 		carp_setroute(sc, RTM_ADD);
1539 		break;
1540 	}
1541 }
1542 
1543 /*
1544  * When in backup state, af indicates whether to reset the master down timer
1545  * for v4 or v6. If it's set to zero, reset the ones which are already pending.
1546  */
1547 static void
1548 carp_setrun(struct carp_softc *sc, sa_family_t af)
1549 {
1550 	struct timeval tv;
1551 
1552 	if (sc->sc_carpdev == NULL) {
1553 		sc->sc_if.if_flags &= ~IFF_RUNNING;
1554 		carp_set_state(sc, INIT);
1555 		return;
1556 	}
1557 
1558 	if (sc->sc_if.if_flags & IFF_UP && sc->sc_vhid > 0 &&
1559 	    (sc->sc_naddrs || sc->sc_naddrs6) && !sc->sc_suppress) {
1560 		sc->sc_if.if_flags |= IFF_RUNNING;
1561 	} else {
1562 		sc->sc_if.if_flags &= ~IFF_RUNNING;
1563 		carp_setroute(sc, RTM_DELETE);
1564 		return;
1565 	}
1566 
1567 	switch (sc->sc_state) {
1568 	case INIT:
1569 		carp_set_state(sc, BACKUP);
1570 		carp_setroute(sc, RTM_DELETE);
1571 		carp_setrun(sc, 0);
1572 		break;
1573 	case BACKUP:
1574 		callout_stop(&sc->sc_ad_tmo);
1575 		tv.tv_sec = 3 * sc->sc_advbase;
1576 		tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1577 		switch (af) {
1578 #ifdef INET
1579 		case AF_INET:
1580 			callout_schedule(&sc->sc_md_tmo, tvtohz(&tv));
1581 			break;
1582 #endif /* INET */
1583 #ifdef INET6
1584 		case AF_INET6:
1585 			callout_schedule(&sc->sc_md6_tmo, tvtohz(&tv));
1586 			break;
1587 #endif /* INET6 */
1588 		default:
1589 			if (sc->sc_naddrs)
1590 				callout_schedule(&sc->sc_md_tmo, tvtohz(&tv));
1591 #ifdef INET6
1592 			if (sc->sc_naddrs6)
1593 				callout_schedule(&sc->sc_md6_tmo, tvtohz(&tv));
1594 #endif /* INET6 */
1595 			break;
1596 		}
1597 		break;
1598 	case MASTER:
1599 		tv.tv_sec = sc->sc_advbase;
1600 		tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1601 		callout_schedule(&sc->sc_ad_tmo, tvtohz(&tv));
1602 		break;
1603 	}
1604 }
1605 
1606 static void
1607 carp_multicast_cleanup(struct carp_softc *sc)
1608 {
1609 	struct ip_moptions *imo = &sc->sc_imo;
1610 #ifdef INET6
1611 	struct ip6_moptions *im6o = &sc->sc_im6o;
1612 #endif
1613 	u_int16_t n = imo->imo_num_memberships;
1614 
1615 	/* Clean up our own multicast memberships */
1616 	while (n-- > 0) {
1617 		if (imo->imo_membership[n] != NULL) {
1618 			in_delmulti(imo->imo_membership[n]);
1619 			imo->imo_membership[n] = NULL;
1620 		}
1621 	}
1622 	imo->imo_num_memberships = 0;
1623 	imo->imo_multicast_if_index = 0;
1624 
1625 #ifdef INET6
1626 	while (!LIST_EMPTY(&im6o->im6o_memberships)) {
1627 		struct in6_multi_mship *imm =
1628 		    LIST_FIRST(&im6o->im6o_memberships);
1629 
1630 		LIST_REMOVE(imm, i6mm_chain);
1631 		in6_leavegroup(imm);
1632 	}
1633 	im6o->im6o_multicast_if_index = 0;
1634 #endif
1635 
1636 	/* And any other multicast memberships */
1637 	carp_ether_purgemulti(sc);
1638 }
1639 
1640 static int
1641 carp_set_ifp(struct carp_softc *sc, struct ifnet *ifp)
1642 {
1643 	struct carp_if *cif, *ncif = NULL;
1644 	struct carp_softc *vr, *after = NULL;
1645 	int myself = 0, error = 0;
1646 	int s;
1647 
1648 	if (ifp == sc->sc_carpdev)
1649 		return (0);
1650 
1651 	if (ifp != NULL) {
1652 		if ((ifp->if_flags & IFF_MULTICAST) == 0)
1653 			return (EADDRNOTAVAIL);
1654 
1655 		if (ifp->if_type == IFT_CARP)
1656 			return (EINVAL);
1657 
1658 		if (ifp->if_carp == NULL) {
1659 			ncif = malloc(sizeof(*cif), M_IFADDR, M_NOWAIT);
1660 			if (ncif == NULL)
1661 				return (ENOBUFS);
1662 			if ((error = ifpromisc(ifp, 1))) {
1663 				free(ncif, M_IFADDR);
1664 				return (error);
1665 			}
1666 
1667 			ncif->vhif_ifp = ifp;
1668 			TAILQ_INIT(&ncif->vhif_vrs);
1669 		} else {
1670 			cif = (struct carp_if *)ifp->if_carp;
1671 			TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
1672 				if (vr != sc && vr->sc_vhid == sc->sc_vhid)
1673 					return (EINVAL);
1674 		}
1675 
1676 		/* detach from old interface */
1677 		if (sc->sc_carpdev != NULL)
1678 			carpdetach(sc);
1679 
1680 		/* join multicast groups */
1681 		if (sc->sc_naddrs < 0 &&
1682 		    (error = carp_join_multicast(sc)) != 0) {
1683 			if (ncif != NULL)
1684 				free(ncif, M_IFADDR);
1685 			return (error);
1686 		}
1687 
1688 #ifdef INET6
1689 		if (sc->sc_naddrs6 < 0 &&
1690 		    (error = carp_join_multicast6(sc)) != 0) {
1691 			if (ncif != NULL)
1692 				free(ncif, M_IFADDR);
1693 			carp_multicast_cleanup(sc);
1694 			return (error);
1695 		}
1696 #endif
1697 
1698 		/* attach carp interface to physical interface */
1699 		if (ncif != NULL)
1700 			ifp->if_carp = (void *)ncif;
1701 		sc->sc_carpdev = ifp;
1702 		sc->sc_if.if_capabilities = ifp->if_capabilities &
1703 		             (IFCAP_TSOv4 | IFCAP_TSOv6 |
1704                              IFCAP_CSUM_IPv4_Tx|IFCAP_CSUM_IPv4_Rx|
1705                              IFCAP_CSUM_TCPv4_Tx|IFCAP_CSUM_TCPv4_Rx|
1706                              IFCAP_CSUM_UDPv4_Tx|IFCAP_CSUM_UDPv4_Rx|
1707                              IFCAP_CSUM_TCPv6_Tx|IFCAP_CSUM_TCPv6_Rx|
1708                              IFCAP_CSUM_UDPv6_Tx|IFCAP_CSUM_UDPv6_Rx);
1709 
1710 		cif = (struct carp_if *)ifp->if_carp;
1711 		TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
1712 			if (vr == sc)
1713 				myself = 1;
1714 			if (vr->sc_vhid < sc->sc_vhid)
1715 				after = vr;
1716 		}
1717 
1718 		if (!myself) {
1719 			/* We're trying to keep things in order */
1720 			if (after == NULL) {
1721 				TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
1722 			} else {
1723 				TAILQ_INSERT_AFTER(&cif->vhif_vrs, after,
1724 				    sc, sc_list);
1725 			}
1726 			cif->vhif_nvrs++;
1727 		}
1728 		if (sc->sc_naddrs || sc->sc_naddrs6)
1729 			sc->sc_if.if_flags |= IFF_UP;
1730 		carp_set_enaddr(sc);
1731 		KERNEL_LOCK(1, NULL);
1732 		s = splnet();
1733 		/* XXX linkstatehooks establish */
1734 		carp_carpdev_state(ifp);
1735 		splx(s);
1736 		KERNEL_UNLOCK_ONE(NULL);
1737 	} else {
1738 		carpdetach(sc);
1739 		sc->sc_if.if_flags &= ~(IFF_UP|IFF_RUNNING);
1740 	}
1741 	return (0);
1742 }
1743 
1744 static void
1745 carp_set_enaddr(struct carp_softc *sc)
1746 {
1747 	uint8_t enaddr[ETHER_ADDR_LEN];
1748 	if (sc->sc_carpdev && sc->sc_carpdev->if_type == IFT_ISO88025) {
1749 		enaddr[0] = 3;
1750 		enaddr[1] = 0;
1751 		enaddr[2] = 0x40 >> (sc->sc_vhid - 1);
1752 		enaddr[3] = 0x40000 >> (sc->sc_vhid - 1);
1753 		enaddr[4] = 0;
1754 		enaddr[5] = 0;
1755 	} else {
1756 		enaddr[0] = 0;
1757 		enaddr[1] = 0;
1758 		enaddr[2] = 0x5e;
1759 		enaddr[3] = 0;
1760 		enaddr[4] = 1;
1761 		enaddr[5] = sc->sc_vhid;
1762 	}
1763 	if_set_sadl(&sc->sc_if, enaddr, sizeof(enaddr), false);
1764 }
1765 
1766 #if 0
1767 static void
1768 carp_addr_updated(void *v)
1769 {
1770 	struct carp_softc *sc = (struct carp_softc *) v;
1771 	struct ifaddr *ifa;
1772 	int new_naddrs = 0, new_naddrs6 = 0;
1773 
1774 	IFADDR_READER_FOREACH(ifa, &sc->sc_if) {
1775 		if (ifa->ifa_addr->sa_family == AF_INET)
1776 			new_naddrs++;
1777 		else if (ifa->ifa_addr->sa_family == AF_INET6)
1778 			new_naddrs6++;
1779 	}
1780 
1781 	/* Handle a callback after SIOCDIFADDR */
1782 	if (new_naddrs < sc->sc_naddrs || new_naddrs6 < sc->sc_naddrs6) {
1783 		struct in_addr mc_addr;
1784 
1785 		sc->sc_naddrs = new_naddrs;
1786 		sc->sc_naddrs6 = new_naddrs6;
1787 
1788 		/* Re-establish multicast membership removed by in_control */
1789 		mc_addr.s_addr = INADDR_CARP_GROUP;
1790 		if (!in_multi_group(mc_addr, &sc->sc_if, 0)) {
1791 			memset(&sc->sc_imo, 0, sizeof(sc->sc_imo));
1792 
1793 			if (sc->sc_carpdev != NULL && sc->sc_naddrs > 0)
1794 				carp_join_multicast(sc);
1795 		}
1796 
1797 		if (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) {
1798 			sc->sc_if.if_flags &= ~IFF_UP;
1799 			carp_set_state(sc, INIT);
1800 		} else
1801 			carp_hmac_prepare(sc);
1802 	}
1803 
1804 	carp_setrun(sc, 0);
1805 }
1806 #endif
1807 
1808 static int
1809 carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin)
1810 {
1811 	struct ifnet *ifp = sc->sc_carpdev;
1812 	struct in_ifaddr *ia, *ia_if;
1813 	int error = 0;
1814 	int s;
1815 
1816 	if (sin->sin_addr.s_addr == 0) {
1817 		if (!(sc->sc_if.if_flags & IFF_UP))
1818 			carp_set_state(sc, INIT);
1819 		if (sc->sc_naddrs)
1820 			sc->sc_if.if_flags |= IFF_UP;
1821 		carp_setrun(sc, 0);
1822 		return (0);
1823 	}
1824 
1825 	/* we have to do this by hand to ensure we don't match on ourselves */
1826 	ia_if = NULL;
1827 	s = pserialize_read_enter();
1828 	IN_ADDRLIST_READER_FOREACH(ia) {
1829 		/* and, yeah, we need a multicast-capable iface too */
1830 		if (ia->ia_ifp != &sc->sc_if &&
1831 		    ia->ia_ifp->if_type != IFT_CARP &&
1832 		    (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
1833 		    (sin->sin_addr.s_addr & ia->ia_subnetmask) ==
1834 		    ia->ia_subnet) {
1835 			if (!ia_if)
1836 				ia_if = ia;
1837 		}
1838 	}
1839 
1840 	if (ia_if) {
1841 		ia = ia_if;
1842 		if (ifp) {
1843 			if (ifp != ia->ia_ifp)
1844 				return (EADDRNOTAVAIL);
1845 		} else {
1846 			/* FIXME NOMPSAFE */
1847 			ifp = ia->ia_ifp;
1848 		}
1849 	}
1850 	pserialize_read_exit(s);
1851 
1852 	if ((error = carp_set_ifp(sc, ifp)))
1853 		return (error);
1854 
1855 	if (sc->sc_carpdev == NULL)
1856 		return (EADDRNOTAVAIL);
1857 
1858 	if (sc->sc_naddrs == 0 && (error = carp_join_multicast(sc)) != 0)
1859 		return (error);
1860 
1861 	sc->sc_naddrs++;
1862 	if (sc->sc_carpdev != NULL)
1863 		sc->sc_if.if_flags |= IFF_UP;
1864 
1865 	carp_set_state(sc, INIT);
1866 	carp_setrun(sc, 0);
1867 
1868 	/*
1869 	 * Hook if_addrhooks so that we get a callback after in_ifinit has run,
1870 	 * to correct any inappropriate routes that it inserted.
1871 	 */
1872 	if (sc->ah_cookie == 0) {
1873 		/* XXX link address hook */
1874 	}
1875 
1876 	return (0);
1877 }
1878 
1879 static int
1880 carp_join_multicast(struct carp_softc *sc)
1881 {
1882 	struct ip_moptions *imo = &sc->sc_imo, tmpimo;
1883 	struct in_addr addr;
1884 
1885 	memset(&tmpimo, 0, sizeof(tmpimo));
1886 	addr.s_addr = INADDR_CARP_GROUP;
1887 	if ((tmpimo.imo_membership[0] =
1888 	    in_addmulti(&addr, &sc->sc_if)) == NULL) {
1889 		return (ENOBUFS);
1890 	}
1891 
1892 	imo->imo_membership[0] = tmpimo.imo_membership[0];
1893 	imo->imo_num_memberships = 1;
1894 	imo->imo_multicast_if_index = sc->sc_if.if_index;
1895 	imo->imo_multicast_ttl = CARP_DFLTTL;
1896 	imo->imo_multicast_loop = 0;
1897 	return (0);
1898 }
1899 
1900 
1901 #ifdef INET6
1902 static int
1903 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
1904 {
1905 	struct ifnet *ifp = sc->sc_carpdev;
1906 	struct in6_ifaddr *ia, *ia_if;
1907 	int error = 0;
1908 	int s;
1909 
1910 	if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
1911 		if (!(sc->sc_if.if_flags & IFF_UP))
1912 			carp_set_state(sc, INIT);
1913 		if (sc->sc_naddrs6)
1914 			sc->sc_if.if_flags |= IFF_UP;
1915 		carp_setrun(sc, 0);
1916 		return (0);
1917 	}
1918 
1919 	/* we have to do this by hand to ensure we don't match on ourselves */
1920 	ia_if = NULL;
1921 	s = pserialize_read_enter();
1922 	IN6_ADDRLIST_READER_FOREACH(ia) {
1923 		int i;
1924 
1925 		for (i = 0; i < 4; i++) {
1926 			if ((sin6->sin6_addr.s6_addr32[i] &
1927 			    ia->ia_prefixmask.sin6_addr.s6_addr32[i]) !=
1928 			    (ia->ia_addr.sin6_addr.s6_addr32[i] &
1929 			    ia->ia_prefixmask.sin6_addr.s6_addr32[i]))
1930 				break;
1931 		}
1932 		/* and, yeah, we need a multicast-capable iface too */
1933 		if (ia->ia_ifp != &sc->sc_if &&
1934 		    ia->ia_ifp->if_type != IFT_CARP &&
1935 		    (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
1936 		    (i == 4)) {
1937 			if (!ia_if)
1938 				ia_if = ia;
1939 		}
1940 	}
1941 	pserialize_read_exit(s);
1942 
1943 	if (ia_if) {
1944 		ia = ia_if;
1945 		if (sc->sc_carpdev) {
1946 			if (sc->sc_carpdev != ia->ia_ifp)
1947 				return (EADDRNOTAVAIL);
1948 		} else {
1949 			ifp = ia->ia_ifp;
1950 		}
1951 	}
1952 
1953 	if ((error = carp_set_ifp(sc, ifp)))
1954 		return (error);
1955 
1956 	if (sc->sc_carpdev == NULL)
1957 		return (EADDRNOTAVAIL);
1958 
1959 	if (sc->sc_naddrs6 == 0 && (error = carp_join_multicast6(sc)) != 0)
1960 		return (error);
1961 
1962 	sc->sc_naddrs6++;
1963 	if (sc->sc_carpdev != NULL)
1964 		sc->sc_if.if_flags |= IFF_UP;
1965 	carp_set_state(sc, INIT);
1966 	carp_setrun(sc, 0);
1967 
1968 	return (0);
1969 }
1970 
1971 static int
1972 carp_join_multicast6(struct carp_softc *sc)
1973 {
1974 	struct in6_multi_mship *imm, *imm2;
1975 	struct ip6_moptions *im6o = &sc->sc_im6o;
1976 	struct sockaddr_in6 addr6;
1977 	int error;
1978 
1979 	/* Join IPv6 CARP multicast group */
1980 	memset(&addr6, 0, sizeof(addr6));
1981 	addr6.sin6_family = AF_INET6;
1982 	addr6.sin6_len = sizeof(addr6);
1983 	addr6.sin6_addr.s6_addr16[0] = htons(0xff02);
1984 	addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index);
1985 	addr6.sin6_addr.s6_addr8[15] = 0x12;
1986 	if ((imm = in6_joingroup(&sc->sc_if,
1987 	    &addr6.sin6_addr, &error, 0)) == NULL) {
1988 		return (error);
1989 	}
1990 	/* join solicited multicast address */
1991 	memset(&addr6.sin6_addr, 0, sizeof(addr6.sin6_addr));
1992 	addr6.sin6_addr.s6_addr16[0] = htons(0xff02);
1993 	addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index);
1994 	addr6.sin6_addr.s6_addr32[1] = 0;
1995 	addr6.sin6_addr.s6_addr32[2] = htonl(1);
1996 	addr6.sin6_addr.s6_addr32[3] = 0;
1997 	addr6.sin6_addr.s6_addr8[12] = 0xff;
1998 	if ((imm2 = in6_joingroup(&sc->sc_if,
1999 	    &addr6.sin6_addr, &error, 0)) == NULL) {
2000 		in6_leavegroup(imm);
2001 		return (error);
2002 	}
2003 
2004 	/* apply v6 multicast membership */
2005 	im6o->im6o_multicast_if_index = sc->sc_if.if_index;
2006 	if (imm)
2007 		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm,
2008 		    i6mm_chain);
2009 	if (imm2)
2010 		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm2,
2011 		    i6mm_chain);
2012 
2013 	return (0);
2014 }
2015 
2016 #endif /* INET6 */
2017 
2018 static int
2019 carp_ioctl(struct ifnet *ifp, u_long cmd, void *data)
2020 {
2021 	struct lwp *l = curlwp;		/* XXX */
2022 	struct carp_softc *sc = ifp->if_softc, *vr;
2023 	struct carpreq carpr;
2024 	struct ifaddr *ifa;
2025 	struct ifreq *ifr;
2026 	struct ifnet *cdev = NULL;
2027 	int error = 0;
2028 
2029 	ifa = (struct ifaddr *)data;
2030 	ifr = (struct ifreq *)data;
2031 
2032 	switch (cmd) {
2033 	case SIOCINITIFADDR:
2034 		switch (ifa->ifa_addr->sa_family) {
2035 #ifdef INET
2036 		case AF_INET:
2037 			sc->sc_if.if_flags |= IFF_UP;
2038 			memcpy(ifa->ifa_dstaddr, ifa->ifa_addr,
2039 			    sizeof(struct sockaddr));
2040 			error = carp_set_addr(sc, satosin(ifa->ifa_addr));
2041 			break;
2042 #endif /* INET */
2043 #ifdef INET6
2044 		case AF_INET6:
2045 			sc->sc_if.if_flags|= IFF_UP;
2046 			error = carp_set_addr6(sc, satosin6(ifa->ifa_addr));
2047 			break;
2048 #endif /* INET6 */
2049 		default:
2050 			error = EAFNOSUPPORT;
2051 			break;
2052 		}
2053 		break;
2054 
2055 	case SIOCSIFFLAGS:
2056 		if ((error = ifioctl_common(ifp, cmd, data)) != 0)
2057 			break;
2058 		if (sc->sc_state != INIT && !(ifr->ifr_flags & IFF_UP)) {
2059 			callout_stop(&sc->sc_ad_tmo);
2060 			callout_stop(&sc->sc_md_tmo);
2061 			callout_stop(&sc->sc_md6_tmo);
2062 			if (sc->sc_state == MASTER) {
2063 				/* we need the interface up to bow out */
2064 				sc->sc_if.if_flags |= IFF_UP;
2065 				sc->sc_bow_out = 1;
2066 				carp_send_ad(sc);
2067 			}
2068 			sc->sc_if.if_flags &= ~IFF_UP;
2069 			carp_set_state(sc, INIT);
2070 			carp_setrun(sc, 0);
2071 		} else if (sc->sc_state == INIT && (ifr->ifr_flags & IFF_UP)) {
2072 			sc->sc_if.if_flags |= IFF_UP;
2073 			carp_setrun(sc, 0);
2074 		}
2075 		break;
2076 
2077 	case SIOCSVH:
2078 		if (l == NULL)
2079 			break;
2080 		if ((error = kauth_authorize_network(l->l_cred,
2081 		    KAUTH_NETWORK_INTERFACE,
2082 		    KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd,
2083 		    NULL)) != 0)
2084 			break;
2085 		if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr)))
2086 			break;
2087 		error = 1;
2088 		if (carpr.carpr_carpdev[0] != '\0' &&
2089 		    (cdev = ifunit(carpr.carpr_carpdev)) == NULL)
2090 			return (EINVAL);
2091 		if ((error = carp_set_ifp(sc, cdev)))
2092 			return (error);
2093 		if (sc->sc_state != INIT && carpr.carpr_state != sc->sc_state) {
2094 			switch (carpr.carpr_state) {
2095 			case BACKUP:
2096 				callout_stop(&sc->sc_ad_tmo);
2097 				carp_set_state(sc, BACKUP);
2098 				carp_setrun(sc, 0);
2099 				carp_setroute(sc, RTM_DELETE);
2100 				break;
2101 			case MASTER:
2102 				carp_master_down(sc);
2103 				break;
2104 			default:
2105 				break;
2106 			}
2107 		}
2108 		if (carpr.carpr_vhid > 0) {
2109 			if (carpr.carpr_vhid > 255) {
2110 				error = EINVAL;
2111 				break;
2112 			}
2113 			if (sc->sc_carpdev) {
2114 				struct carp_if *cif;
2115 				cif = (struct carp_if *)sc->sc_carpdev->if_carp;
2116 				TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
2117 					if (vr != sc &&
2118 					    vr->sc_vhid == carpr.carpr_vhid)
2119 						return (EINVAL);
2120 			}
2121 			sc->sc_vhid = carpr.carpr_vhid;
2122 			carp_set_enaddr(sc);
2123 			carp_set_state(sc, INIT);
2124 			error--;
2125 		}
2126 		if (carpr.carpr_advbase > 0 || carpr.carpr_advskew > 0) {
2127 			if (carpr.carpr_advskew > 254) {
2128 				error = EINVAL;
2129 				break;
2130 			}
2131 			if (carpr.carpr_advbase > 255) {
2132 				error = EINVAL;
2133 				break;
2134 			}
2135 			sc->sc_advbase = carpr.carpr_advbase;
2136 			sc->sc_advskew = carpr.carpr_advskew;
2137 			error--;
2138 		}
2139 		memcpy(sc->sc_key, carpr.carpr_key, sizeof(sc->sc_key));
2140 		if (error > 0)
2141 			error = EINVAL;
2142 		else {
2143 			error = 0;
2144 			carp_setrun(sc, 0);
2145 		}
2146 		break;
2147 
2148 	case SIOCGVH:
2149 		memset(&carpr, 0, sizeof(carpr));
2150 		if (sc->sc_carpdev != NULL)
2151 			strlcpy(carpr.carpr_carpdev, sc->sc_carpdev->if_xname,
2152 			    IFNAMSIZ);
2153 		carpr.carpr_state = sc->sc_state;
2154 		carpr.carpr_vhid = sc->sc_vhid;
2155 		carpr.carpr_advbase = sc->sc_advbase;
2156 		carpr.carpr_advskew = sc->sc_advskew;
2157 
2158 		if ((l != NULL) && (error = kauth_authorize_network(l->l_cred,
2159 		    KAUTH_NETWORK_INTERFACE,
2160 		    KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd,
2161 		    NULL)) == 0)
2162 			memcpy(carpr.carpr_key, sc->sc_key,
2163 			    sizeof(carpr.carpr_key));
2164 		error = copyout(&carpr, ifr->ifr_data, sizeof(carpr));
2165 		break;
2166 
2167 	case SIOCADDMULTI:
2168 		error = carp_ether_addmulti(sc, ifr);
2169 		break;
2170 
2171 	case SIOCDELMULTI:
2172 		error = carp_ether_delmulti(sc, ifr);
2173 		break;
2174 
2175 	case SIOCSIFCAP:
2176 		if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET)
2177 			error = 0;
2178 		break;
2179 
2180 	default:
2181 		error = ether_ioctl(ifp, cmd, data);
2182 	}
2183 
2184 	carp_hmac_prepare(sc);
2185 	return (error);
2186 }
2187 
2188 
2189 /*
2190  * Start output on carp interface. This function should never be called.
2191  */
2192 static void
2193 carp_start(struct ifnet *ifp)
2194 {
2195 #ifdef DEBUG
2196 	printf("%s: start called\n", ifp->if_xname);
2197 #endif
2198 }
2199 
2200 int
2201 carp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa,
2202     const struct rtentry *rt)
2203 {
2204 	struct carp_softc *sc = ((struct carp_softc *)ifp->if_softc);
2205 	KASSERT(KERNEL_LOCKED_P());
2206 
2207 	if (sc->sc_carpdev != NULL && sc->sc_state == MASTER) {
2208 		return if_output_lock(sc->sc_carpdev, ifp, m, sa, rt);
2209 	} else {
2210 		m_freem(m);
2211 		return (ENETUNREACH);
2212 	}
2213 }
2214 
2215 static void
2216 carp_set_state(struct carp_softc *sc, int state)
2217 {
2218 	static const char *carp_states[] = { CARP_STATES };
2219 	int link_state;
2220 
2221 	if (sc->sc_state == state)
2222 		return;
2223 
2224 	CARP_LOG(sc, ("state transition from: %s -> to: %s", carp_states[sc->sc_state], carp_states[state]));
2225 
2226 	sc->sc_state = state;
2227 	switch (state) {
2228 	case BACKUP:
2229 		link_state = LINK_STATE_DOWN;
2230 		break;
2231 	case MASTER:
2232 		link_state = LINK_STATE_UP;
2233 		break;
2234 	default:
2235 		link_state = LINK_STATE_UNKNOWN;
2236 		break;
2237 	}
2238 	if_link_state_change_softint(&sc->sc_if, link_state);
2239 }
2240 
2241 void
2242 carp_carpdev_state(void *v)
2243 {
2244 	struct carp_if *cif;
2245 	struct carp_softc *sc;
2246 	struct ifnet *ifp = v;
2247 
2248 	if (ifp->if_type == IFT_CARP)
2249 		return;
2250 
2251 	cif = (struct carp_if *)ifp->if_carp;
2252 
2253 	TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) {
2254 		int suppressed = sc->sc_suppress;
2255 
2256 		if (sc->sc_carpdev->if_link_state == LINK_STATE_DOWN ||
2257 		    !(sc->sc_carpdev->if_flags & IFF_UP)) {
2258 			sc->sc_if.if_flags &= ~IFF_RUNNING;
2259 			callout_stop(&sc->sc_ad_tmo);
2260 			callout_stop(&sc->sc_md_tmo);
2261 			callout_stop(&sc->sc_md6_tmo);
2262 			carp_set_state(sc, INIT);
2263 			sc->sc_suppress = 1;
2264 			carp_setrun(sc, 0);
2265 			if (!suppressed) {
2266 				carp_suppress_preempt++;
2267 				if (carp_suppress_preempt == 1)
2268 					carp_send_ad_all();
2269 			}
2270 		} else {
2271 			carp_set_state(sc, INIT);
2272 			sc->sc_suppress = 0;
2273 			carp_setrun(sc, 0);
2274 			if (suppressed)
2275 				carp_suppress_preempt--;
2276 		}
2277 	}
2278 }
2279 
2280 static int
2281 carp_ether_addmulti(struct carp_softc *sc, struct ifreq *ifr)
2282 {
2283 	const struct sockaddr *sa = ifreq_getaddr(SIOCADDMULTI, ifr);
2284 	struct ifnet *ifp;
2285 	struct carp_mc_entry *mc;
2286 	u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN];
2287 	int error;
2288 
2289 	ifp = sc->sc_carpdev;
2290 	if (ifp == NULL)
2291 		return (EINVAL);
2292 
2293 	error = ether_addmulti(sa, &sc->sc_ac);
2294 	if (error != ENETRESET)
2295 		return (error);
2296 
2297 	/*
2298 	 * This is new multicast address.  We have to tell parent
2299 	 * about it.  Also, remember this multicast address so that
2300 	 * we can delete them on unconfigure.
2301 	 */
2302 	mc = malloc(sizeof(struct carp_mc_entry), M_DEVBUF, M_NOWAIT);
2303 	if (mc == NULL) {
2304 		error = ENOMEM;
2305 		goto alloc_failed;
2306 	}
2307 
2308 	/*
2309 	 * As ether_addmulti() returns ENETRESET, following two
2310 	 * statement shouldn't fail.
2311 	 */
2312 	(void)ether_multiaddr(sa, addrlo, addrhi);
2313 	ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, mc->mc_enm);
2314 	memcpy(&mc->mc_addr, sa, sa->sa_len);
2315 	LIST_INSERT_HEAD(&sc->carp_mc_listhead, mc, mc_entries);
2316 
2317 	error = if_mcast_op(ifp, SIOCADDMULTI, sa);
2318 	if (error != 0)
2319 		goto ioctl_failed;
2320 
2321 	return (error);
2322 
2323  ioctl_failed:
2324 	LIST_REMOVE(mc, mc_entries);
2325 	free(mc, M_DEVBUF);
2326  alloc_failed:
2327 	(void)ether_delmulti(sa, &sc->sc_ac);
2328 
2329 	return (error);
2330 }
2331 
2332 static int
2333 carp_ether_delmulti(struct carp_softc *sc, struct ifreq *ifr)
2334 {
2335 	const struct sockaddr *sa = ifreq_getaddr(SIOCDELMULTI, ifr);
2336 	struct ifnet *ifp;
2337 	struct ether_multi *enm;
2338 	struct carp_mc_entry *mc;
2339 	u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN];
2340 	int error;
2341 
2342 	ifp = sc->sc_carpdev;
2343 	if (ifp == NULL)
2344 		return (EINVAL);
2345 
2346 	/*
2347 	 * Find a key to lookup carp_mc_entry.  We have to do this
2348 	 * before calling ether_delmulti for obvious reason.
2349 	 */
2350 	if ((error = ether_multiaddr(sa, addrlo, addrhi)) != 0)
2351 		return (error);
2352 	ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, enm);
2353 	if (enm == NULL)
2354 		return (EINVAL);
2355 
2356 	LIST_FOREACH(mc, &sc->carp_mc_listhead, mc_entries)
2357 		if (mc->mc_enm == enm)
2358 			break;
2359 
2360 	/* We won't delete entries we didn't add */
2361 	if (mc == NULL)
2362 		return (EINVAL);
2363 
2364 	error = ether_delmulti(sa, &sc->sc_ac);
2365 	if (error != ENETRESET)
2366 		return (error);
2367 
2368 	/* We no longer use this multicast address.  Tell parent so. */
2369 	error = if_mcast_op(ifp, SIOCDELMULTI, sa);
2370 	if (error == 0) {
2371 		/* And forget about this address. */
2372 		LIST_REMOVE(mc, mc_entries);
2373 		free(mc, M_DEVBUF);
2374 	} else
2375 		(void)ether_addmulti(sa, &sc->sc_ac);
2376 	return (error);
2377 }
2378 
2379 /*
2380  * Delete any multicast address we have asked to add from parent
2381  * interface.  Called when the carp is being unconfigured.
2382  */
2383 static void
2384 carp_ether_purgemulti(struct carp_softc *sc)
2385 {
2386 	struct ifnet *ifp = sc->sc_carpdev;		/* Parent. */
2387 	struct carp_mc_entry *mc;
2388 
2389 	if (ifp == NULL)
2390 		return;
2391 
2392 	while ((mc = LIST_FIRST(&sc->carp_mc_listhead)) != NULL) {
2393 		(void)if_mcast_op(ifp, SIOCDELMULTI, sstosa(&mc->mc_addr));
2394 		LIST_REMOVE(mc, mc_entries);
2395 		free(mc, M_DEVBUF);
2396 	}
2397 }
2398 
2399 static int
2400 sysctl_net_inet_carp_stats(SYSCTLFN_ARGS)
2401 {
2402 
2403 	return (NETSTAT_SYSCTL(carpstat_percpu, CARP_NSTATS));
2404 }
2405 
2406 void
2407 carp_init(void)
2408 {
2409 
2410 	sysctl_net_inet_carp_setup(NULL);
2411 #ifdef MBUFTRACE
2412 	MOWNER_ATTACH(&carp_proto_mowner_rx);
2413 	MOWNER_ATTACH(&carp_proto_mowner_tx);
2414 	MOWNER_ATTACH(&carp_proto6_mowner_rx);
2415 	MOWNER_ATTACH(&carp_proto6_mowner_tx);
2416 #endif
2417 
2418 	carp_wqinput = wqinput_create("carp", _carp_proto_input);
2419 #ifdef INET6
2420 	carp6_wqinput = wqinput_create("carp6", _carp6_proto_input);
2421 #endif
2422 }
2423 
2424 static void
2425 sysctl_net_inet_carp_setup(struct sysctllog **clog)
2426 {
2427 
2428 	sysctl_createv(clog, 0, NULL, NULL,
2429 		       CTLFLAG_PERMANENT,
2430 		       CTLTYPE_NODE, "inet", NULL,
2431 		       NULL, 0, NULL, 0,
2432 		       CTL_NET, PF_INET, CTL_EOL);
2433 	sysctl_createv(clog, 0, NULL, NULL,
2434 		       CTLFLAG_PERMANENT,
2435 		       CTLTYPE_NODE, "carp",
2436 		       SYSCTL_DESCR("CARP related settings"),
2437 		       NULL, 0, NULL, 0,
2438 		       CTL_NET, PF_INET, IPPROTO_CARP, CTL_EOL);
2439 
2440 	sysctl_createv(clog, 0, NULL, NULL,
2441 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2442 		       CTLTYPE_INT, "preempt",
2443 		       SYSCTL_DESCR("Enable CARP Preempt"),
2444 		       NULL, 0, &carp_opts[CARPCTL_PREEMPT], 0,
2445 		       CTL_NET, PF_INET, IPPROTO_CARP,
2446 		       CTL_CREATE, CTL_EOL);
2447 	sysctl_createv(clog, 0, NULL, NULL,
2448 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2449 		       CTLTYPE_INT, "arpbalance",
2450 		       SYSCTL_DESCR("Enable ARP balancing"),
2451 		       NULL, 0, &carp_opts[CARPCTL_ARPBALANCE], 0,
2452 		       CTL_NET, PF_INET, IPPROTO_CARP,
2453 		       CTL_CREATE, CTL_EOL);
2454 	sysctl_createv(clog, 0, NULL, NULL,
2455 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2456 		       CTLTYPE_INT, "allow",
2457 		       SYSCTL_DESCR("Enable CARP"),
2458 		       NULL, 0, &carp_opts[CARPCTL_ALLOW], 0,
2459 		       CTL_NET, PF_INET, IPPROTO_CARP,
2460 		       CTL_CREATE, CTL_EOL);
2461 	sysctl_createv(clog, 0, NULL, NULL,
2462 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2463 		       CTLTYPE_INT, "log",
2464 		       SYSCTL_DESCR("CARP logging"),
2465 		       NULL, 0, &carp_opts[CARPCTL_LOG], 0,
2466 		       CTL_NET, PF_INET, IPPROTO_CARP,
2467 		       CTL_CREATE, CTL_EOL);
2468 	sysctl_createv(clog, 0, NULL, NULL,
2469 		       CTLFLAG_PERMANENT,
2470 		       CTLTYPE_STRUCT, "stats",
2471 		       SYSCTL_DESCR("CARP statistics"),
2472 		       sysctl_net_inet_carp_stats, 0, NULL, 0,
2473 		       CTL_NET, PF_INET, IPPROTO_CARP, CARPCTL_STATS,
2474 		       CTL_EOL);
2475 }
2476