xref: /netbsd-src/sys/netinet/ip_carp.c (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1 /*	$NetBSD: ip_carp.c,v 1.99 2018/06/26 06:48:03 msaitoh Exp $	*/
2 /*	$OpenBSD: ip_carp.c,v 1.113 2005/11/04 08:11:54 mcbride Exp $	*/
3 
4 /*
5  * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
6  * Copyright (c) 2003 Ryan McBride. All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
21  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
25  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
26  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
27  * THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 #ifdef _KERNEL_OPT
31 #include "opt_inet.h"
32 #include "opt_mbuftrace.h"
33 #endif
34 
35 #include <sys/cdefs.h>
36 __KERNEL_RCSID(0, "$NetBSD: ip_carp.c,v 1.99 2018/06/26 06:48:03 msaitoh Exp $");
37 
38 /*
39  * TODO:
40  *	- iface reconfigure
41  *	- support for hardware checksum calculations;
42  *
43  */
44 
45 #include <sys/param.h>
46 #include <sys/proc.h>
47 #include <sys/mbuf.h>
48 #include <sys/socket.h>
49 #include <sys/socketvar.h>
50 #include <sys/callout.h>
51 #include <sys/ioctl.h>
52 #include <sys/errno.h>
53 #include <sys/device.h>
54 #include <sys/time.h>
55 #include <sys/kernel.h>
56 #include <sys/kauth.h>
57 #include <sys/sysctl.h>
58 #include <sys/ucred.h>
59 #include <sys/syslog.h>
60 #include <sys/acct.h>
61 #include <sys/cprng.h>
62 #include <sys/cpu.h>
63 #include <sys/pserialize.h>
64 #include <sys/psref.h>
65 
66 #include <net/if.h>
67 #include <net/pfil.h>
68 #include <net/if_types.h>
69 #include <net/if_ether.h>
70 #include <net/route.h>
71 #include <net/netisr.h>
72 #include <net/net_stats.h>
73 #include <netinet/if_inarp.h>
74 #include <netinet/wqinput.h>
75 
76 #if NFDDI > 0
77 #include <net/if_fddi.h>
78 #endif
79 #if NTOKEN > 0
80 #include <net/if_token.h>
81 #endif
82 
83 #ifdef INET
84 #include <netinet/in.h>
85 #include <netinet/in_systm.h>
86 #include <netinet/in_var.h>
87 #include <netinet/ip.h>
88 #include <netinet/ip_var.h>
89 
90 #include <net/if_dl.h>
91 #endif
92 
93 #ifdef INET6
94 #include <netinet/icmp6.h>
95 #include <netinet/ip6.h>
96 #include <netinet6/ip6_var.h>
97 #include <netinet6/nd6.h>
98 #include <netinet6/scope6_var.h>
99 #include <netinet6/in6_var.h>
100 #endif
101 
102 #include <net/bpf.h>
103 
104 #include <sys/sha1.h>
105 
106 #include <netinet/ip_carp.h>
107 
108 #include "ioconf.h"
109 
110 struct carp_mc_entry {
111 	LIST_ENTRY(carp_mc_entry)	mc_entries;
112 	union {
113 		struct ether_multi	*mcu_enm;
114 	} mc_u;
115 	struct sockaddr_storage		mc_addr;
116 };
117 #define	mc_enm	mc_u.mcu_enm
118 
119 struct carp_softc {
120 	struct ethercom sc_ac;
121 #define	sc_if		sc_ac.ec_if
122 #define	sc_carpdev	sc_ac.ec_if.if_carpdev
123 	int ah_cookie;
124 	int lh_cookie;
125 	struct ip_moptions sc_imo;
126 #ifdef INET6
127 	struct ip6_moptions sc_im6o;
128 #endif /* INET6 */
129 	TAILQ_ENTRY(carp_softc) sc_list;
130 
131 	enum { INIT = 0, BACKUP, MASTER }	sc_state;
132 
133 	int sc_suppress;
134 	int sc_bow_out;
135 
136 	int sc_sendad_errors;
137 #define CARP_SENDAD_MAX_ERRORS	3
138 	int sc_sendad_success;
139 #define CARP_SENDAD_MIN_SUCCESS 3
140 
141 	int sc_vhid;
142 	int sc_advskew;
143 	int sc_naddrs;
144 	int sc_naddrs6;
145 	int sc_advbase;		/* seconds */
146 	int sc_init_counter;
147 	u_int64_t sc_counter;
148 
149 	/* authentication */
150 #define CARP_HMAC_PAD	64
151 	unsigned char sc_key[CARP_KEY_LEN];
152 	unsigned char sc_pad[CARP_HMAC_PAD];
153 	SHA1_CTX sc_sha1;
154 	u_int32_t sc_hashkey[2];
155 
156 	struct callout sc_ad_tmo;	/* advertisement timeout */
157 	struct callout sc_md_tmo;	/* master down timeout */
158 	struct callout sc_md6_tmo;	/* master down timeout */
159 
160 	LIST_HEAD(__carp_mchead, carp_mc_entry)	carp_mc_listhead;
161 };
162 
163 int carp_suppress_preempt = 0;
164 static int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 0, 0 };	/* XXX for now */
165 
166 static percpu_t *carpstat_percpu;
167 
168 #define	CARP_STATINC(x)		_NET_STATINC(carpstat_percpu, x)
169 
170 #ifdef MBUFTRACE
171 static struct mowner carp_proto_mowner_rx = MOWNER_INIT("carp", "rx");
172 static struct mowner carp_proto_mowner_tx = MOWNER_INIT("carp", "tx");
173 static struct mowner carp_proto6_mowner_rx = MOWNER_INIT("carp6", "rx");
174 static struct mowner carp_proto6_mowner_tx = MOWNER_INIT("carp6", "tx");
175 #endif
176 
177 struct carp_if {
178 	TAILQ_HEAD(, carp_softc) vhif_vrs;
179 	int vhif_nvrs;
180 
181 	struct ifnet *vhif_ifp;
182 };
183 
184 #define	CARP_LOG(sc, s)							\
185 	if (carp_opts[CARPCTL_LOG]) {					\
186 		if (sc)							\
187 			log(LOG_INFO, "%s: ",				\
188 			    (sc)->sc_if.if_xname);			\
189 		else							\
190 			log(LOG_INFO, "carp: ");			\
191 		addlog s;						\
192 		addlog("\n");						\
193 	}
194 
195 static void	carp_hmac_prepare(struct carp_softc *);
196 static void	carp_hmac_generate(struct carp_softc *, u_int32_t *,
197 		    unsigned char *);
198 static int	carp_hmac_verify(struct carp_softc *, u_int32_t *,
199 		    unsigned char *);
200 static void	carp_setroute(struct carp_softc *, int);
201 static void	carp_proto_input_c(struct mbuf *, struct carp_header *,
202 		    sa_family_t);
203 static void	carpdetach(struct carp_softc *);
204 static void	carp_prepare_ad(struct mbuf *, struct carp_softc *,
205 		    struct carp_header *);
206 static void	carp_send_ad_all(void);
207 static void	carp_send_ad(void *);
208 static void	carp_send_arp(struct carp_softc *);
209 static void	carp_master_down(void *);
210 static int	carp_ioctl(struct ifnet *, u_long, void *);
211 static void	carp_start(struct ifnet *);
212 static void	carp_setrun(struct carp_softc *, sa_family_t);
213 static void	carp_set_state(struct carp_softc *, int);
214 static int	carp_addrcount(struct carp_if *, struct in_ifaddr *, int);
215 enum	{ CARP_COUNT_MASTER, CARP_COUNT_RUNNING };
216 
217 static void	carp_multicast_cleanup(struct carp_softc *);
218 static int	carp_set_ifp(struct carp_softc *, struct ifnet *);
219 static void	carp_set_enaddr(struct carp_softc *);
220 #if 0
221 static void	carp_addr_updated(void *);
222 #endif
223 static u_int32_t	carp_hash(struct carp_softc *, u_char *);
224 static int	carp_set_addr(struct carp_softc *, struct sockaddr_in *);
225 static int	carp_join_multicast(struct carp_softc *);
226 #ifdef INET6
227 static void	carp_send_na(struct carp_softc *);
228 static int	carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *);
229 static int	carp_join_multicast6(struct carp_softc *);
230 #endif
231 static int	carp_clone_create(struct if_clone *, int);
232 static int	carp_clone_destroy(struct ifnet *);
233 static int	carp_ether_addmulti(struct carp_softc *, struct ifreq *);
234 static int	carp_ether_delmulti(struct carp_softc *, struct ifreq *);
235 static void	carp_ether_purgemulti(struct carp_softc *);
236 
237 static void	sysctl_net_inet_carp_setup(struct sysctllog **);
238 
239 /* workqueue-based pr_input */
240 static struct wqinput *carp_wqinput;
241 static void _carp_proto_input(struct mbuf *, int, int);
242 #ifdef INET6
243 static struct wqinput *carp6_wqinput;
244 static void _carp6_proto_input(struct mbuf *, int, int);
245 #endif
246 
247 struct if_clone carp_cloner =
248     IF_CLONE_INITIALIZER("carp", carp_clone_create, carp_clone_destroy);
249 
250 static __inline u_int16_t
251 carp_cksum(struct mbuf *m, int len)
252 {
253 	return (in_cksum(m, len));
254 }
255 
256 static __inline u_int16_t
257 carp6_cksum(struct mbuf *m, uint32_t off, uint32_t len)
258 {
259 	return (in6_cksum(m, IPPROTO_CARP, off, len));
260 }
261 
262 static void
263 carp_hmac_prepare(struct carp_softc *sc)
264 {
265 	u_int8_t carp_version = CARP_VERSION, type = CARP_ADVERTISEMENT;
266 	u_int8_t vhid = sc->sc_vhid & 0xff;
267 	SHA1_CTX sha1ctx;
268 	u_int32_t kmd[5];
269 	struct ifaddr *ifa;
270 	int i, found;
271 	struct in_addr last, cur, in;
272 #ifdef INET6
273 	struct in6_addr last6, cur6, in6;
274 #endif /* INET6 */
275 
276 	/* compute ipad from key */
277 	memset(sc->sc_pad, 0, sizeof(sc->sc_pad));
278 	memcpy(sc->sc_pad, sc->sc_key, sizeof(sc->sc_key));
279 	for (i = 0; i < sizeof(sc->sc_pad); i++)
280 		sc->sc_pad[i] ^= 0x36;
281 
282 	/* precompute first part of inner hash */
283 	SHA1Init(&sc->sc_sha1);
284 	SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad));
285 	SHA1Update(&sc->sc_sha1, (void *)&carp_version, sizeof(carp_version));
286 	SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type));
287 
288 	/* generate a key for the arpbalance hash, before the vhid is hashed */
289 	memcpy(&sha1ctx, &sc->sc_sha1, sizeof(sha1ctx));
290 	SHA1Final((unsigned char *)kmd, &sha1ctx);
291 	sc->sc_hashkey[0] = kmd[0] ^ kmd[1];
292 	sc->sc_hashkey[1] = kmd[2] ^ kmd[3];
293 
294 	/* the rest of the precomputation */
295 	SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid));
296 
297 	/* Hash the addresses from smallest to largest, not interface order */
298 #ifdef INET
299 	cur.s_addr = 0;
300 	do {
301 		int s;
302 		found = 0;
303 		last = cur;
304 		cur.s_addr = 0xffffffff;
305 		s = pserialize_read_enter();
306 		IFADDR_READER_FOREACH(ifa, &sc->sc_if) {
307 			in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
308 			if (ifa->ifa_addr->sa_family == AF_INET &&
309 			    ntohl(in.s_addr) > ntohl(last.s_addr) &&
310 			    ntohl(in.s_addr) < ntohl(cur.s_addr)) {
311 				cur.s_addr = in.s_addr;
312 				found++;
313 			}
314 		}
315 		pserialize_read_exit(s);
316 		if (found)
317 			SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur));
318 	} while (found);
319 #endif /* INET */
320 
321 #ifdef INET6
322 	memset(&cur6, 0x00, sizeof(cur6));
323 	do {
324 		int s;
325 		found = 0;
326 		last6 = cur6;
327 		memset(&cur6, 0xff, sizeof(cur6));
328 		s = pserialize_read_enter();
329 		IFADDR_READER_FOREACH(ifa, &sc->sc_if) {
330 			in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
331 			if (IN6_IS_ADDR_LINKLOCAL(&in6))
332 				in6.s6_addr16[1] = 0;
333 			if (ifa->ifa_addr->sa_family == AF_INET6 &&
334 			    memcmp(&in6, &last6, sizeof(in6)) > 0 &&
335 			    memcmp(&in6, &cur6, sizeof(in6)) < 0) {
336 				cur6 = in6;
337 				found++;
338 			}
339 		}
340 		pserialize_read_exit(s);
341 		if (found)
342 			SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6));
343 	} while (found);
344 #endif /* INET6 */
345 
346 	/* convert ipad to opad */
347 	for (i = 0; i < sizeof(sc->sc_pad); i++)
348 		sc->sc_pad[i] ^= 0x36 ^ 0x5c;
349 }
350 
351 static void
352 carp_hmac_generate(struct carp_softc *sc, u_int32_t counter[2],
353     unsigned char md[20])
354 {
355 	SHA1_CTX sha1ctx;
356 
357 	/* fetch first half of inner hash */
358 	memcpy(&sha1ctx, &sc->sc_sha1, sizeof(sha1ctx));
359 
360 	SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter));
361 	SHA1Final(md, &sha1ctx);
362 
363 	/* outer hash */
364 	SHA1Init(&sha1ctx);
365 	SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad));
366 	SHA1Update(&sha1ctx, md, 20);
367 	SHA1Final(md, &sha1ctx);
368 }
369 
370 static int
371 carp_hmac_verify(struct carp_softc *sc, u_int32_t counter[2],
372     unsigned char md[20])
373 {
374 	unsigned char md2[20];
375 
376 	carp_hmac_generate(sc, counter, md2);
377 
378 	return (memcmp(md, md2, sizeof(md2)));
379 }
380 
381 static void
382 carp_setroute(struct carp_softc *sc, int cmd)
383 {
384 	struct ifaddr *ifa;
385 	int s, bound;
386 
387 	KERNEL_LOCK(1, NULL);
388 	bound = curlwp_bind();
389 	s = pserialize_read_enter();
390 	IFADDR_READER_FOREACH(ifa, &sc->sc_if) {
391 		struct psref psref;
392 		ifa_acquire(ifa, &psref);
393 		pserialize_read_exit(s);
394 
395 		switch (ifa->ifa_addr->sa_family) {
396 		case AF_INET: {
397 			int count = 0;
398 			struct rtentry *rt;
399 			int hr_otherif, nr_ourif;
400 
401 			/*
402 			 * Avoid screwing with the routes if there are other
403 			 * carp interfaces which are master and have the same
404 			 * address.
405 			 */
406 			if (sc->sc_carpdev != NULL &&
407 			    sc->sc_carpdev->if_carp != NULL) {
408 				count = carp_addrcount(
409 				    (struct carp_if *)sc->sc_carpdev->if_carp,
410 				    ifatoia(ifa), CARP_COUNT_MASTER);
411 				if ((cmd == RTM_ADD && count != 1) ||
412 				    (cmd == RTM_DELETE && count != 0))
413 					continue;
414 			}
415 
416 			/* Remove the existing host route, if any */
417 			rtrequest(RTM_DELETE, ifa->ifa_addr,
418 			    ifa->ifa_addr, ifa->ifa_netmask,
419 			    RTF_HOST, NULL);
420 
421 			rt = NULL;
422 			(void)rtrequest(RTM_GET, ifa->ifa_addr, ifa->ifa_addr,
423 			    ifa->ifa_netmask, RTF_HOST, &rt);
424 			hr_otherif = (rt && rt->rt_ifp != &sc->sc_if &&
425 			    (rt->rt_flags & RTF_CONNECTED));
426 			if (rt != NULL) {
427 				rt_unref(rt);
428 				rt = NULL;
429 			}
430 
431 			/* Check for a network route on our interface */
432 
433 			rt = NULL;
434 			(void)rtrequest(RTM_GET, ifa->ifa_addr, ifa->ifa_addr,
435 			    ifa->ifa_netmask, 0, &rt);
436 			nr_ourif = (rt && rt->rt_ifp == &sc->sc_if);
437 
438 			switch (cmd) {
439 			case RTM_ADD:
440 				if (hr_otherif) {
441 					ifa->ifa_rtrequest = NULL;
442 					ifa->ifa_flags &= ~RTF_CONNECTED;
443 
444 					rtrequest(RTM_ADD, ifa->ifa_addr,
445 					    ifa->ifa_addr, ifa->ifa_netmask,
446 					    RTF_UP | RTF_HOST, NULL);
447 				}
448 				if (!hr_otherif || nr_ourif || !rt) {
449 					if (nr_ourif &&
450 					    (rt->rt_flags & RTF_CONNECTED) == 0)
451 						rtrequest(RTM_DELETE,
452 						    ifa->ifa_addr,
453 						    ifa->ifa_addr,
454 						    ifa->ifa_netmask, 0, NULL);
455 
456 					ifa->ifa_rtrequest = arp_rtrequest;
457 					ifa->ifa_flags |= RTF_CONNECTED;
458 
459 					if (rtrequest(RTM_ADD, ifa->ifa_addr,
460 					    ifa->ifa_addr, ifa->ifa_netmask, 0,
461 					    NULL) == 0)
462 						ifa->ifa_flags |= IFA_ROUTE;
463 				}
464 				break;
465 			case RTM_DELETE:
466 				break;
467 			default:
468 				break;
469 			}
470 			if (rt != NULL) {
471 				rt_unref(rt);
472 				rt = NULL;
473 			}
474 			break;
475 		}
476 
477 #ifdef INET6
478 		case AF_INET6:
479 			if (cmd == RTM_ADD)
480 				in6_ifaddlocal(ifa);
481 			else
482 				in6_ifremlocal(ifa);
483 			break;
484 #endif /* INET6 */
485 		default:
486 			break;
487 		}
488 		s = pserialize_read_enter();
489 		ifa_release(ifa, &psref);
490 	}
491 	pserialize_read_exit(s);
492 	curlwp_bindx(bound);
493 	KERNEL_UNLOCK_ONE(NULL);
494 }
495 
496 /*
497  * process input packet.
498  * we have rearranged checks order compared to the rfc,
499  * but it seems more efficient this way or not possible otherwise.
500  */
501 static void
502 _carp_proto_input(struct mbuf *m, int hlen, int proto)
503 {
504 	struct ip *ip = mtod(m, struct ip *);
505 	struct carp_softc *sc = NULL;
506 	struct carp_header *ch;
507 	int iplen, len;
508 	struct ifnet *rcvif;
509 
510 	CARP_STATINC(CARP_STAT_IPACKETS);
511 	MCLAIM(m, &carp_proto_mowner_rx);
512 
513 	if (!carp_opts[CARPCTL_ALLOW]) {
514 		m_freem(m);
515 		return;
516 	}
517 
518 	rcvif = m_get_rcvif_NOMPSAFE(m);
519 	/* check if received on a valid carp interface */
520 	if (rcvif->if_type != IFT_CARP) {
521 		CARP_STATINC(CARP_STAT_BADIF);
522 		CARP_LOG(sc, ("packet received on non-carp interface: %s",
523 		    rcvif->if_xname));
524 		m_freem(m);
525 		return;
526 	}
527 
528 	/* verify that the IP TTL is 255.  */
529 	if (ip->ip_ttl != CARP_DFLTTL) {
530 		CARP_STATINC(CARP_STAT_BADTTL);
531 		CARP_LOG(sc, ("received ttl %d != %d on %s", ip->ip_ttl,
532 		    CARP_DFLTTL, rcvif->if_xname));
533 		m_freem(m);
534 		return;
535 	}
536 
537 	/*
538 	 * verify that the received packet length is
539 	 * equal to the CARP header
540 	 */
541 	iplen = ip->ip_hl << 2;
542 	len = iplen + sizeof(*ch);
543 	if (len > m->m_pkthdr.len) {
544 		CARP_STATINC(CARP_STAT_BADLEN);
545 		CARP_LOG(sc, ("packet too short %d on %s", m->m_pkthdr.len,
546 		    rcvif->if_xname));
547 		m_freem(m);
548 		return;
549 	}
550 
551 	if ((m = m_pullup(m, len)) == NULL) {
552 		CARP_STATINC(CARP_STAT_HDROPS);
553 		return;
554 	}
555 	ip = mtod(m, struct ip *);
556 	ch = (struct carp_header *)((char *)ip + iplen);
557 	/* verify the CARP checksum */
558 	m->m_data += iplen;
559 	if (carp_cksum(m, len - iplen)) {
560 		CARP_STATINC(CARP_STAT_BADSUM);
561 		CARP_LOG(sc, ("checksum failed on %s",
562 		    rcvif->if_xname));
563 		m_freem(m);
564 		return;
565 	}
566 	m->m_data -= iplen;
567 
568 	carp_proto_input_c(m, ch, AF_INET);
569 }
570 
571 void
572 carp_proto_input(struct mbuf *m, ...)
573 {
574 
575 	wqinput_input(carp_wqinput, m, 0, 0);
576 }
577 
578 #ifdef INET6
579 static void
580 _carp6_proto_input(struct mbuf *m, int off, int proto)
581 {
582 	struct carp_softc *sc = NULL;
583 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
584 	struct carp_header *ch;
585 	u_int len;
586 	struct ifnet *rcvif;
587 
588 	CARP_STATINC(CARP_STAT_IPACKETS6);
589 	MCLAIM(m, &carp_proto6_mowner_rx);
590 
591 	if (!carp_opts[CARPCTL_ALLOW]) {
592 		m_freem(m);
593 		return;
594 	}
595 
596 	rcvif = m_get_rcvif_NOMPSAFE(m);
597 
598 	/* check if received on a valid carp interface */
599 	if (rcvif->if_type != IFT_CARP) {
600 		CARP_STATINC(CARP_STAT_BADIF);
601 		CARP_LOG(sc, ("packet received on non-carp interface: %s",
602 		    rcvif->if_xname));
603 		m_freem(m);
604 		return;
605 	}
606 
607 	/* verify that the IP TTL is 255 */
608 	if (ip6->ip6_hlim != CARP_DFLTTL) {
609 		CARP_STATINC(CARP_STAT_BADTTL);
610 		CARP_LOG(sc, ("received ttl %d != %d on %s", ip6->ip6_hlim,
611 		    CARP_DFLTTL, rcvif->if_xname));
612 		m_freem(m);
613 		return;
614 	}
615 
616 	/* verify that we have a complete carp packet */
617 	len = m->m_len;
618 	M_REGION_GET(ch, struct carp_header *, m, off, sizeof(*ch));
619 	if (ch == NULL) {
620 		CARP_STATINC(CARP_STAT_BADLEN);
621 		CARP_LOG(sc, ("packet size %u too small", len));
622 		return;
623 	}
624 
625 	/* verify the CARP checksum */
626 	if (carp6_cksum(m, off, sizeof(*ch))) {
627 		CARP_STATINC(CARP_STAT_BADSUM);
628 		CARP_LOG(sc, ("checksum failed, on %s", rcvif->if_xname));
629 		m_freem(m);
630 		return;
631 	}
632 
633 	carp_proto_input_c(m, ch, AF_INET6);
634 	return;
635 }
636 
637 int
638 carp6_proto_input(struct mbuf **mp, int *offp, int proto)
639 {
640 
641 	wqinput_input(carp6_wqinput, *mp, *offp, proto);
642 
643 	return IPPROTO_DONE;
644 }
645 #endif /* INET6 */
646 
647 static void
648 carp_proto_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af)
649 {
650 	struct carp_softc *sc;
651 	u_int64_t tmp_counter;
652 	struct timeval sc_tv, ch_tv;
653 
654 	TAILQ_FOREACH(sc, &((struct carp_if *)
655 	    m_get_rcvif_NOMPSAFE(m)->if_carpdev->if_carp)->vhif_vrs, sc_list)
656 		if (sc->sc_vhid == ch->carp_vhid)
657 			break;
658 
659 	if (!sc || (sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) !=
660 	    (IFF_UP|IFF_RUNNING)) {
661 		CARP_STATINC(CARP_STAT_BADVHID);
662 		m_freem(m);
663 		return;
664 	}
665 
666 	/*
667 	 * Check if our own advertisement was duplicated
668 	 * from a non simplex interface.
669 	 * XXX If there is no address on our physical interface
670 	 * there is no way to distinguish our ads from the ones
671 	 * another carp host might have sent us.
672 	 */
673 	if ((sc->sc_carpdev->if_flags & IFF_SIMPLEX) == 0) {
674 		struct sockaddr sa;
675 		struct ifaddr *ifa;
676 		int s;
677 
678 		memset(&sa, 0, sizeof(sa));
679 		sa.sa_family = af;
680 		s = pserialize_read_enter();
681 		ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
682 
683 		if (ifa && af == AF_INET) {
684 			struct ip *ip = mtod(m, struct ip *);
685 			if (ip->ip_src.s_addr ==
686 					ifatoia(ifa)->ia_addr.sin_addr.s_addr) {
687 				pserialize_read_exit(s);
688 				m_freem(m);
689 				return;
690 			}
691 		}
692 #ifdef INET6
693 		if (ifa && af == AF_INET6) {
694 			struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
695 			struct in6_addr in6_src, in6_found;
696 
697 			in6_src = ip6->ip6_src;
698 			in6_found = ifatoia6(ifa)->ia_addr.sin6_addr;
699 			if (IN6_IS_ADDR_LINKLOCAL(&in6_src))
700 				in6_src.s6_addr16[1] = 0;
701 			if (IN6_IS_ADDR_LINKLOCAL(&in6_found))
702 				in6_found.s6_addr16[1] = 0;
703 			if (IN6_ARE_ADDR_EQUAL(&in6_src, &in6_found)) {
704 				pserialize_read_exit(s);
705 				m_freem(m);
706 				return;
707 			}
708 		}
709 #endif /* INET6 */
710 		pserialize_read_exit(s);
711 	}
712 
713 	nanotime(&sc->sc_if.if_lastchange);
714 	sc->sc_if.if_ipackets++;
715 	sc->sc_if.if_ibytes += m->m_pkthdr.len;
716 
717 	/* verify the CARP version. */
718 	if (ch->carp_version != CARP_VERSION) {
719 		CARP_STATINC(CARP_STAT_BADVER);
720 		sc->sc_if.if_ierrors++;
721 		CARP_LOG(sc, ("invalid version %d != %d",
722 		    ch->carp_version, CARP_VERSION));
723 		m_freem(m);
724 		return;
725 	}
726 
727 	/* verify the hash */
728 	if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) {
729 		struct ip *ip;
730 		char ipbuf[INET_ADDRSTRLEN];
731 #ifdef INET6
732 		struct ip6_hdr *ip6;
733 		char ip6buf[INET6_ADDRSTRLEN];
734 #endif
735 
736 		CARP_STATINC(CARP_STAT_BADAUTH);
737 		sc->sc_if.if_ierrors++;
738 
739 		switch(af) {
740 		case AF_INET:
741 			ip = mtod(m, struct ip *);
742 			CARP_LOG(sc, ("incorrect hash from %s",
743 			    IN_PRINT(ipbuf, &ip->ip_src)));
744 			break;
745 
746 #ifdef INET6
747 		case AF_INET6:
748 			ip6 = mtod(m, struct ip6_hdr *);
749 			CARP_LOG(sc, ("incorrect hash from %s",
750 			    IN6_PRINT(ip6buf, &ip6->ip6_src)));
751 			break;
752 #endif
753 
754 		default: CARP_LOG(sc, ("incorrect hash"));
755 			break;
756 		}
757 		m_freem(m);
758 		return;
759 	}
760 
761 	tmp_counter = ntohl(ch->carp_counter[0]);
762 	tmp_counter = tmp_counter<<32;
763 	tmp_counter += ntohl(ch->carp_counter[1]);
764 
765 	/* XXX Replay protection goes here */
766 
767 	sc->sc_init_counter = 0;
768 	sc->sc_counter = tmp_counter;
769 
770 
771 	sc_tv.tv_sec = sc->sc_advbase;
772 	if (carp_suppress_preempt && sc->sc_advskew <  240)
773 		sc_tv.tv_usec = 240 * 1000000 / 256;
774 	else
775 		sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256;
776 	ch_tv.tv_sec = ch->carp_advbase;
777 	ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
778 
779 	switch (sc->sc_state) {
780 	case INIT:
781 		break;
782 	case MASTER:
783 		/*
784 		 * If we receive an advertisement from a backup who's going to
785 		 * be more frequent than us, go into BACKUP state.
786 		 */
787 		if (timercmp(&sc_tv, &ch_tv, >) ||
788 		    timercmp(&sc_tv, &ch_tv, ==)) {
789 			callout_stop(&sc->sc_ad_tmo);
790 			CARP_LOG(sc, ("MASTER -> BACKUP (more frequent advertisement received)"));
791 			carp_set_state(sc, BACKUP);
792 			carp_setrun(sc, 0);
793 			carp_setroute(sc, RTM_DELETE);
794 		}
795 		break;
796 	case BACKUP:
797 		/*
798 		 * If we're pre-empting masters who advertise slower than us,
799 		 * and this one claims to be slower, treat him as down.
800 		 */
801 		if (carp_opts[CARPCTL_PREEMPT] && timercmp(&sc_tv, &ch_tv, <)) {
802 			CARP_LOG(sc, ("BACKUP -> MASTER (preempting a slower master)"));
803 			carp_master_down(sc);
804 			break;
805 		}
806 
807 		/*
808 		 *  If the master is going to advertise at such a low frequency
809 		 *  that he's guaranteed to time out, we'd might as well just
810 		 *  treat him as timed out now.
811 		 */
812 		sc_tv.tv_sec = sc->sc_advbase * 3;
813 		if (timercmp(&sc_tv, &ch_tv, <)) {
814 			CARP_LOG(sc, ("BACKUP -> MASTER (master timed out)"));
815 			carp_master_down(sc);
816 			break;
817 		}
818 
819 		/*
820 		 * Otherwise, we reset the counter and wait for the next
821 		 * advertisement.
822 		 */
823 		carp_setrun(sc, af);
824 		break;
825 	}
826 
827 	m_freem(m);
828 	return;
829 }
830 
831 /*
832  * Interface side of the CARP implementation.
833  */
834 
835 /* ARGSUSED */
836 void
837 carpattach(int n)
838 {
839 	if_clone_attach(&carp_cloner);
840 
841 	carpstat_percpu = percpu_alloc(sizeof(uint64_t) * CARP_NSTATS);
842 }
843 
844 static int
845 carp_clone_create(struct if_clone *ifc, int unit)
846 {
847 	extern int ifqmaxlen;
848 	struct carp_softc *sc;
849 	struct ifnet *ifp;
850 	int rv;
851 
852 	sc = malloc(sizeof(*sc), M_DEVBUF, M_NOWAIT|M_ZERO);
853 	if (!sc)
854 		return (ENOMEM);
855 
856 	sc->sc_suppress = 0;
857 	sc->sc_advbase = CARP_DFLTINTV;
858 	sc->sc_vhid = -1;	/* required setting */
859 	sc->sc_advskew = 0;
860 	sc->sc_init_counter = 1;
861 	sc->sc_naddrs = sc->sc_naddrs6 = 0;
862 #ifdef INET6
863 	sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL;
864 #endif /* INET6 */
865 
866 	callout_init(&sc->sc_ad_tmo, 0);
867 	callout_init(&sc->sc_md_tmo, 0);
868 	callout_init(&sc->sc_md6_tmo, 0);
869 
870 	callout_setfunc(&sc->sc_ad_tmo, carp_send_ad, sc);
871 	callout_setfunc(&sc->sc_md_tmo, carp_master_down, sc);
872 	callout_setfunc(&sc->sc_md6_tmo, carp_master_down, sc);
873 
874 	LIST_INIT(&sc->carp_mc_listhead);
875 	ifp = &sc->sc_if;
876 	ifp->if_softc = sc;
877 	snprintf(ifp->if_xname, sizeof ifp->if_xname, "%s%d", ifc->ifc_name,
878 	    unit);
879 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
880 	ifp->if_ioctl = carp_ioctl;
881 	ifp->if_start = carp_start;
882 	IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
883 	IFQ_SET_READY(&ifp->if_snd);
884 	rv = if_initialize(ifp);
885 	if (rv != 0) {
886 		callout_destroy(&sc->sc_ad_tmo);
887 		callout_destroy(&sc->sc_md_tmo);
888 		callout_destroy(&sc->sc_md6_tmo);
889 		free(ifp->if_softc, M_DEVBUF);
890 
891 		return rv;
892 	}
893 	ether_ifattach(ifp, NULL);
894 	carp_set_enaddr(sc);
895 	/* Overwrite ethernet defaults */
896 	ifp->if_type = IFT_CARP;
897 	ifp->if_output = carp_output;
898 	if_register(ifp);
899 
900 	return (0);
901 }
902 
903 static int
904 carp_clone_destroy(struct ifnet *ifp)
905 {
906 	struct carp_softc *sc = ifp->if_softc;
907 
908 	carpdetach(ifp->if_softc);
909 	ether_ifdetach(ifp);
910 	if_detach(ifp);
911 	callout_destroy(&sc->sc_ad_tmo);
912 	callout_destroy(&sc->sc_md_tmo);
913 	callout_destroy(&sc->sc_md6_tmo);
914 	free(ifp->if_softc, M_DEVBUF);
915 
916 	return (0);
917 }
918 
919 static void
920 carpdetach(struct carp_softc *sc)
921 {
922 	struct carp_if *cif;
923 	int s;
924 
925 	callout_stop(&sc->sc_ad_tmo);
926 	callout_stop(&sc->sc_md_tmo);
927 	callout_stop(&sc->sc_md6_tmo);
928 
929 	if (sc->sc_suppress)
930 		carp_suppress_preempt--;
931 	sc->sc_suppress = 0;
932 
933 	if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS)
934 		carp_suppress_preempt--;
935 	sc->sc_sendad_errors = 0;
936 
937 	carp_set_state(sc, INIT);
938 	sc->sc_if.if_flags &= ~IFF_UP;
939 	carp_setrun(sc, 0);
940 	carp_multicast_cleanup(sc);
941 
942 	KERNEL_LOCK(1, NULL);
943 	s = splnet();
944 	if (sc->sc_carpdev != NULL) {
945 		/* XXX linkstatehook removal */
946 		cif = (struct carp_if *)sc->sc_carpdev->if_carp;
947 		TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
948 		if (!--cif->vhif_nvrs) {
949 			ifpromisc(sc->sc_carpdev, 0);
950 			sc->sc_carpdev->if_carp = NULL;
951 			free(cif, M_IFADDR);
952 		}
953 	}
954 	sc->sc_carpdev = NULL;
955 	splx(s);
956 	KERNEL_UNLOCK_ONE(NULL);
957 }
958 
959 /* Detach an interface from the carp. */
960 void
961 carp_ifdetach(struct ifnet *ifp)
962 {
963 	struct carp_softc *sc, *nextsc;
964 	struct carp_if *cif = (struct carp_if *)ifp->if_carp;
965 
966 	for (sc = TAILQ_FIRST(&cif->vhif_vrs); sc; sc = nextsc) {
967 		nextsc = TAILQ_NEXT(sc, sc_list);
968 		carpdetach(sc);
969 	}
970 }
971 
972 static void
973 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc,
974     struct carp_header *ch)
975 {
976 	if (sc->sc_init_counter) {
977 		/* this could also be seconds since unix epoch */
978 		sc->sc_counter = cprng_fast64();
979 	} else
980 		sc->sc_counter++;
981 
982 	ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff);
983 	ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff);
984 
985 	carp_hmac_generate(sc, ch->carp_counter, ch->carp_md);
986 }
987 
988 static void
989 carp_send_ad_all(void)
990 {
991 	struct ifnet *ifp;
992 	struct carp_if *cif;
993 	struct carp_softc *vh;
994 	int s;
995 	int bound = curlwp_bind();
996 
997 	s = pserialize_read_enter();
998 	IFNET_READER_FOREACH(ifp) {
999 		struct psref psref;
1000 		if (ifp->if_carp == NULL || ifp->if_type == IFT_CARP)
1001 			continue;
1002 
1003 		if_acquire(ifp, &psref);
1004 		pserialize_read_exit(s);
1005 
1006 		cif = (struct carp_if *)ifp->if_carp;
1007 		TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1008 			if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
1009 			    (IFF_UP|IFF_RUNNING) && vh->sc_state == MASTER)
1010 				carp_send_ad(vh);
1011 		}
1012 
1013 		s = pserialize_read_enter();
1014 		if_release(ifp, &psref);
1015 	}
1016 	pserialize_read_exit(s);
1017 	curlwp_bindx(bound);
1018 }
1019 
1020 
1021 static void
1022 carp_send_ad(void *v)
1023 {
1024 	struct carp_header ch;
1025 	struct timeval tv;
1026 	struct carp_softc *sc = v;
1027 	struct carp_header *ch_ptr;
1028 	struct mbuf *m;
1029 	int error, len, advbase, advskew, s;
1030 	struct sockaddr sa;
1031 
1032 	KERNEL_LOCK(1, NULL);
1033 	s = splsoftnet();
1034 
1035 	advbase = advskew = 0; /* Sssssh compiler */
1036 	if (sc->sc_carpdev == NULL) {
1037 		sc->sc_if.if_oerrors++;
1038 		goto retry_later;
1039 	}
1040 
1041 	/* bow out if we've gone to backup (the carp interface is going down) */
1042 	if (sc->sc_bow_out) {
1043 		sc->sc_bow_out = 0;
1044 		advbase = 255;
1045 		advskew = 255;
1046 	} else {
1047 		advbase = sc->sc_advbase;
1048 		if (!carp_suppress_preempt || sc->sc_advskew > 240)
1049 			advskew = sc->sc_advskew;
1050 		else
1051 			advskew = 240;
1052 		tv.tv_sec = advbase;
1053 		tv.tv_usec = advskew * 1000000 / 256;
1054 	}
1055 
1056 	ch.carp_version = CARP_VERSION;
1057 	ch.carp_type = CARP_ADVERTISEMENT;
1058 	ch.carp_vhid = sc->sc_vhid;
1059 	ch.carp_advbase = advbase;
1060 	ch.carp_advskew = advskew;
1061 	ch.carp_authlen = 7;	/* XXX DEFINE */
1062 	ch.carp_pad1 = 0;	/* must be zero */
1063 	ch.carp_cksum = 0;
1064 
1065 
1066 #ifdef INET
1067 	if (sc->sc_naddrs) {
1068 		struct ip *ip;
1069 		struct ifaddr *ifa;
1070 		int _s;
1071 
1072 		MGETHDR(m, M_DONTWAIT, MT_HEADER);
1073 		if (m == NULL) {
1074 			sc->sc_if.if_oerrors++;
1075 			CARP_STATINC(CARP_STAT_ONOMEM);
1076 			/* XXX maybe less ? */
1077 			goto retry_later;
1078 		}
1079 		MCLAIM(m, &carp_proto_mowner_tx);
1080 		len = sizeof(*ip) + sizeof(ch);
1081 		m->m_pkthdr.len = len;
1082 		m_reset_rcvif(m);
1083 		m->m_len = len;
1084 		MH_ALIGN(m, m->m_len);
1085 		m->m_flags |= M_MCAST;
1086 		ip = mtod(m, struct ip *);
1087 		ip->ip_v = IPVERSION;
1088 		ip->ip_hl = sizeof(*ip) >> 2;
1089 		ip->ip_tos = IPTOS_LOWDELAY;
1090 		ip->ip_len = htons(len);
1091 		ip->ip_id = 0;	/* no need for id, we don't support fragments */
1092 		ip->ip_off = htons(IP_DF);
1093 		ip->ip_ttl = CARP_DFLTTL;
1094 		ip->ip_p = IPPROTO_CARP;
1095 		ip->ip_sum = 0;
1096 
1097 		memset(&sa, 0, sizeof(sa));
1098 		sa.sa_family = AF_INET;
1099 		_s = pserialize_read_enter();
1100 		ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
1101 		if (ifa == NULL)
1102 			ip->ip_src.s_addr = 0;
1103 		else
1104 			ip->ip_src.s_addr =
1105 			    ifatoia(ifa)->ia_addr.sin_addr.s_addr;
1106 		pserialize_read_exit(_s);
1107 		ip->ip_dst.s_addr = INADDR_CARP_GROUP;
1108 
1109 		ch_ptr = (struct carp_header *)(&ip[1]);
1110 		memcpy(ch_ptr, &ch, sizeof(ch));
1111 		carp_prepare_ad(m, sc, ch_ptr);
1112 
1113 		m->m_data += sizeof(*ip);
1114 		ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip));
1115 		m->m_data -= sizeof(*ip);
1116 
1117 		nanotime(&sc->sc_if.if_lastchange);
1118 		sc->sc_if.if_opackets++;
1119 		sc->sc_if.if_obytes += len;
1120 		CARP_STATINC(CARP_STAT_OPACKETS);
1121 
1122 		error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo,
1123 		    NULL);
1124 		if (error) {
1125 			if (error == ENOBUFS)
1126 				CARP_STATINC(CARP_STAT_ONOMEM);
1127 			else
1128 				CARP_LOG(sc, ("ip_output failed: %d", error));
1129 			sc->sc_if.if_oerrors++;
1130 			if (sc->sc_sendad_errors < INT_MAX)
1131 				sc->sc_sendad_errors++;
1132 			if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
1133 				carp_suppress_preempt++;
1134 				if (carp_suppress_preempt == 1)
1135 					carp_send_ad_all();
1136 			}
1137 			sc->sc_sendad_success = 0;
1138 		} else {
1139 			if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
1140 				if (++sc->sc_sendad_success >=
1141 				    CARP_SENDAD_MIN_SUCCESS) {
1142 					carp_suppress_preempt--;
1143 					sc->sc_sendad_errors = 0;
1144 				}
1145 			} else
1146 				sc->sc_sendad_errors = 0;
1147 		}
1148 	}
1149 #endif /* INET */
1150 #ifdef INET6
1151 	if (sc->sc_naddrs6) {
1152 		struct ip6_hdr *ip6;
1153 		struct ifaddr *ifa;
1154 		int _s;
1155 
1156 		MGETHDR(m, M_DONTWAIT, MT_HEADER);
1157 		if (m == NULL) {
1158 			sc->sc_if.if_oerrors++;
1159 			CARP_STATINC(CARP_STAT_ONOMEM);
1160 			/* XXX maybe less ? */
1161 			goto retry_later;
1162 		}
1163 		MCLAIM(m, &carp_proto6_mowner_tx);
1164 		len = sizeof(*ip6) + sizeof(ch);
1165 		m->m_pkthdr.len = len;
1166 		m_reset_rcvif(m);
1167 		m->m_len = len;
1168 		MH_ALIGN(m, m->m_len);
1169 		m->m_flags |= M_MCAST;
1170 		ip6 = mtod(m, struct ip6_hdr *);
1171 		memset(ip6, 0, sizeof(*ip6));
1172 		ip6->ip6_vfc |= IPV6_VERSION;
1173 		ip6->ip6_hlim = CARP_DFLTTL;
1174 		ip6->ip6_nxt = IPPROTO_CARP;
1175 
1176 		/* set the source address */
1177 		memset(&sa, 0, sizeof(sa));
1178 		sa.sa_family = AF_INET6;
1179 		_s = pserialize_read_enter();
1180 		ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
1181 		if (ifa == NULL)	/* This should never happen with IPv6 */
1182 			memset(&ip6->ip6_src, 0, sizeof(struct in6_addr));
1183 		else
1184 			bcopy(ifatoia6(ifa)->ia_addr.sin6_addr.s6_addr,
1185 			    &ip6->ip6_src, sizeof(struct in6_addr));
1186 		pserialize_read_exit(_s);
1187 		/* set the multicast destination */
1188 
1189 		ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
1190 		ip6->ip6_dst.s6_addr8[15] = 0x12;
1191 		if (in6_setscope(&ip6->ip6_dst, &sc->sc_if, NULL) != 0) {
1192 			sc->sc_if.if_oerrors++;
1193 			m_freem(m);
1194 			CARP_LOG(sc, ("in6_setscope failed"));
1195 			goto retry_later;
1196 		}
1197 
1198 		ch_ptr = (struct carp_header *)(&ip6[1]);
1199 		memcpy(ch_ptr, &ch, sizeof(ch));
1200 		carp_prepare_ad(m, sc, ch_ptr);
1201 
1202 		ch_ptr->carp_cksum = carp6_cksum(m, sizeof(*ip6),
1203 		    len - sizeof(*ip6));
1204 
1205 		nanotime(&sc->sc_if.if_lastchange);
1206 		sc->sc_if.if_opackets++;
1207 		sc->sc_if.if_obytes += len;
1208 		CARP_STATINC(CARP_STAT_OPACKETS6);
1209 
1210 		error = ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL);
1211 		if (error) {
1212 			if (error == ENOBUFS)
1213 				CARP_STATINC(CARP_STAT_ONOMEM);
1214 			else
1215 				CARP_LOG(sc, ("ip6_output failed: %d", error));
1216 			sc->sc_if.if_oerrors++;
1217 			if (sc->sc_sendad_errors < INT_MAX)
1218 				sc->sc_sendad_errors++;
1219 			if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
1220 				carp_suppress_preempt++;
1221 				if (carp_suppress_preempt == 1)
1222 					carp_send_ad_all();
1223 			}
1224 			sc->sc_sendad_success = 0;
1225 		} else {
1226 			if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
1227 				if (++sc->sc_sendad_success >=
1228 				    CARP_SENDAD_MIN_SUCCESS) {
1229 					carp_suppress_preempt--;
1230 					sc->sc_sendad_errors = 0;
1231 				}
1232 			} else
1233 				sc->sc_sendad_errors = 0;
1234 		}
1235 	}
1236 #endif /* INET6 */
1237 
1238 retry_later:
1239 	splx(s);
1240 	KERNEL_UNLOCK_ONE(NULL);
1241 	if (advbase != 255 || advskew != 255)
1242 		callout_schedule(&sc->sc_ad_tmo, tvtohz(&tv));
1243 }
1244 
1245 /*
1246  * Broadcast a gratuitous ARP request containing
1247  * the virtual router MAC address for each IP address
1248  * associated with the virtual router.
1249  */
1250 static void
1251 carp_send_arp(struct carp_softc *sc)
1252 {
1253 	struct ifaddr *ifa;
1254 	int s, bound;
1255 
1256 	KERNEL_LOCK(1, NULL);
1257 	bound = curlwp_bind();
1258 	s = pserialize_read_enter();
1259 	IFADDR_READER_FOREACH(ifa, &sc->sc_if) {
1260 		struct psref psref;
1261 
1262 		if (ifa->ifa_addr->sa_family != AF_INET)
1263 			continue;
1264 
1265 		ifa_acquire(ifa, &psref);
1266 		pserialize_read_exit(s);
1267 
1268 		arpannounce(sc->sc_carpdev, ifa, CLLADDR(sc->sc_if.if_sadl));
1269 
1270 		s = pserialize_read_enter();
1271 		ifa_release(ifa, &psref);
1272 	}
1273 	pserialize_read_exit(s);
1274 	curlwp_bindx(bound);
1275 	KERNEL_UNLOCK_ONE(NULL);
1276 }
1277 
1278 #ifdef INET6
1279 static void
1280 carp_send_na(struct carp_softc *sc)
1281 {
1282 	struct ifaddr *ifa;
1283 	struct in6_addr *in6;
1284 	static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
1285 	int s, bound;
1286 
1287 	KERNEL_LOCK(1, NULL);
1288 	bound = curlwp_bind();
1289 	s = pserialize_read_enter();
1290 	IFADDR_READER_FOREACH(ifa, &sc->sc_if) {
1291 		struct psref psref;
1292 
1293 		if (ifa->ifa_addr->sa_family != AF_INET6)
1294 			continue;
1295 
1296 		ifa_acquire(ifa, &psref);
1297 		pserialize_read_exit(s);
1298 
1299 		in6 = &ifatoia6(ifa)->ia_addr.sin6_addr;
1300 		nd6_na_output(sc->sc_carpdev, &mcast, in6,
1301 		    ND_NA_FLAG_OVERRIDE, 1, NULL);
1302 
1303 		s = pserialize_read_enter();
1304 		ifa_release(ifa, &psref);
1305 	}
1306 	pserialize_read_exit(s);
1307 	curlwp_bindx(bound);
1308 	KERNEL_UNLOCK_ONE(NULL);
1309 }
1310 #endif /* INET6 */
1311 
1312 /*
1313  * Based on bridge_hash() in if_bridge.c
1314  */
1315 #define	mix(a,b,c) \
1316 	do {						\
1317 		a -= b; a -= c; a ^= (c >> 13);		\
1318 		b -= c; b -= a; b ^= (a << 8);		\
1319 		c -= a; c -= b; c ^= (b >> 13);		\
1320 		a -= b; a -= c; a ^= (c >> 12);		\
1321 		b -= c; b -= a; b ^= (a << 16);		\
1322 		c -= a; c -= b; c ^= (b >> 5);		\
1323 		a -= b; a -= c; a ^= (c >> 3);		\
1324 		b -= c; b -= a; b ^= (a << 10);		\
1325 		c -= a; c -= b; c ^= (b >> 15);		\
1326 	} while (0)
1327 
1328 static u_int32_t
1329 carp_hash(struct carp_softc *sc, u_char *src)
1330 {
1331 	u_int32_t a = 0x9e3779b9, b = sc->sc_hashkey[0], c = sc->sc_hashkey[1];
1332 
1333 	c += sc->sc_key[3] << 24;
1334 	c += sc->sc_key[2] << 16;
1335 	c += sc->sc_key[1] << 8;
1336 	c += sc->sc_key[0];
1337 	b += src[5] << 8;
1338 	b += src[4];
1339 	a += src[3] << 24;
1340 	a += src[2] << 16;
1341 	a += src[1] << 8;
1342 	a += src[0];
1343 
1344 	mix(a, b, c);
1345 	return (c);
1346 }
1347 
1348 static int
1349 carp_addrcount(struct carp_if *cif, struct in_ifaddr *ia, int type)
1350 {
1351 	struct carp_softc *vh;
1352 	struct ifaddr *ifa;
1353 	int count = 0;
1354 
1355 	TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1356 		if ((type == CARP_COUNT_RUNNING &&
1357 		    (vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
1358 		    (IFF_UP|IFF_RUNNING)) ||
1359 		    (type == CARP_COUNT_MASTER && vh->sc_state == MASTER)) {
1360 			int s = pserialize_read_enter();
1361 			IFADDR_READER_FOREACH(ifa, &vh->sc_if) {
1362 				if (ifa->ifa_addr->sa_family == AF_INET &&
1363 				    ia->ia_addr.sin_addr.s_addr ==
1364 				    ifatoia(ifa)->ia_addr.sin_addr.s_addr)
1365 					count++;
1366 			}
1367 			pserialize_read_exit(s);
1368 		}
1369 	}
1370 	return (count);
1371 }
1372 
1373 int
1374 carp_iamatch(struct in_ifaddr *ia, u_char *src,
1375     u_int32_t *count, u_int32_t index)
1376 {
1377 	struct carp_softc *sc = ia->ia_ifp->if_softc;
1378 
1379 	if (carp_opts[CARPCTL_ARPBALANCE]) {
1380 		/*
1381 		 * We use the source ip to decide which virtual host should
1382 		 * handle the request. If we're master of that virtual host,
1383 		 * then we respond, otherwise, just drop the arp packet on
1384 		 * the floor.
1385 		 */
1386 
1387 		/* Count the elegible carp interfaces with this address */
1388 		if (*count == 0)
1389 			*count = carp_addrcount(
1390 			    (struct carp_if *)ia->ia_ifp->if_carpdev->if_carp,
1391 			    ia, CARP_COUNT_RUNNING);
1392 
1393 		/* This should never happen, but... */
1394 		if (*count == 0)
1395 			return (0);
1396 
1397 		if (carp_hash(sc, src) % *count == index - 1 &&
1398 		    sc->sc_state == MASTER) {
1399 			return (1);
1400 		}
1401 	} else {
1402 		if (sc->sc_state == MASTER)
1403 			return (1);
1404 	}
1405 
1406 	return (0);
1407 }
1408 
1409 #ifdef INET6
1410 struct ifaddr *
1411 carp_iamatch6(void *v, struct in6_addr *taddr)
1412 {
1413 	struct carp_if *cif = v;
1414 	struct carp_softc *vh;
1415 	struct ifaddr *ifa;
1416 
1417 	TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1418 		int s = pserialize_read_enter();
1419 		IFADDR_READER_FOREACH(ifa, &vh->sc_if) {
1420 			if (IN6_ARE_ADDR_EQUAL(taddr,
1421 			    &ifatoia6(ifa)->ia_addr.sin6_addr) &&
1422 			    ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
1423 			    (IFF_UP|IFF_RUNNING)) && vh->sc_state == MASTER)
1424 				return (ifa);
1425 		}
1426 		pserialize_read_exit(s);
1427 	}
1428 
1429 	return (NULL);
1430 }
1431 #endif /* INET6 */
1432 
1433 struct ifnet *
1434 carp_ourether(void *v, struct ether_header *eh, u_char iftype, int src)
1435 {
1436 	struct carp_if *cif = (struct carp_if *)v;
1437 	struct carp_softc *vh;
1438 	u_int8_t *ena;
1439 
1440 	if (src)
1441 		ena = (u_int8_t *)&eh->ether_shost;
1442 	else
1443 		ena = (u_int8_t *)&eh->ether_dhost;
1444 
1445 	switch (iftype) {
1446 	case IFT_ETHER:
1447 	case IFT_FDDI:
1448 		if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1)
1449 			return (NULL);
1450 		break;
1451 	case IFT_ISO88025:
1452 		if (ena[0] != 3 || ena[1] || ena[4] || ena[5])
1453 			return (NULL);
1454 		break;
1455 	default:
1456 		return (NULL);
1457 		break;
1458 	}
1459 
1460 	TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list)
1461 		if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
1462 		    (IFF_UP|IFF_RUNNING) && vh->sc_state == MASTER &&
1463 		    !memcmp(ena, CLLADDR(vh->sc_if.if_sadl),
1464 		    ETHER_ADDR_LEN)) {
1465 			return (&vh->sc_if);
1466 		    }
1467 
1468 	return (NULL);
1469 }
1470 
1471 int
1472 carp_input(struct mbuf *m, u_int8_t *shost, u_int8_t *dhost, u_int16_t etype)
1473 {
1474 	struct ether_header eh;
1475 	struct carp_if *cif = (struct carp_if *)m_get_rcvif_NOMPSAFE(m)->if_carp;
1476 	struct ifnet *ifp;
1477 
1478 	memcpy(&eh.ether_shost, shost, sizeof(eh.ether_shost));
1479 	memcpy(&eh.ether_dhost, dhost, sizeof(eh.ether_dhost));
1480 	eh.ether_type = etype;
1481 
1482 	if (m->m_flags & (M_BCAST|M_MCAST)) {
1483 		struct carp_softc *vh;
1484 		struct mbuf *m0;
1485 
1486 		/*
1487 		 * XXX Should really check the list of multicast addresses
1488 		 * for each CARP interface _before_ copying.
1489 		 */
1490 		TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1491 			m0 = m_copym(m, 0, M_COPYALL, M_DONTWAIT);
1492 			if (m0 == NULL)
1493 				continue;
1494 			m_set_rcvif(m0, &vh->sc_if);
1495 			ether_input(&vh->sc_if, m0);
1496 		}
1497 		return (1);
1498 	}
1499 
1500 	ifp = carp_ourether(cif, &eh, m_get_rcvif_NOMPSAFE(m)->if_type, 0);
1501 	if (ifp == NULL) {
1502 		return (1);
1503 	}
1504 
1505 	m_set_rcvif(m, ifp);
1506 
1507 	bpf_mtap(ifp, m, BPF_D_IN);
1508 	ifp->if_ipackets++;
1509 	ether_input(ifp, m);
1510 	return (0);
1511 }
1512 
1513 static void
1514 carp_master_down(void *v)
1515 {
1516 	struct carp_softc *sc = v;
1517 
1518 	switch (sc->sc_state) {
1519 	case INIT:
1520 		printf("%s: master_down event in INIT state\n",
1521 		    sc->sc_if.if_xname);
1522 		break;
1523 	case MASTER:
1524 		break;
1525 	case BACKUP:
1526 		CARP_LOG(sc, ("INIT -> MASTER (preempting)"));
1527 		carp_set_state(sc, MASTER);
1528 		carp_send_ad(sc);
1529 		carp_send_arp(sc);
1530 #ifdef INET6
1531 		carp_send_na(sc);
1532 #endif /* INET6 */
1533 		carp_setrun(sc, 0);
1534 		carp_setroute(sc, RTM_ADD);
1535 		break;
1536 	}
1537 }
1538 
1539 /*
1540  * When in backup state, af indicates whether to reset the master down timer
1541  * for v4 or v6. If it's set to zero, reset the ones which are already pending.
1542  */
1543 static void
1544 carp_setrun(struct carp_softc *sc, sa_family_t af)
1545 {
1546 	struct timeval tv;
1547 
1548 	if (sc->sc_carpdev == NULL) {
1549 		sc->sc_if.if_flags &= ~IFF_RUNNING;
1550 		carp_set_state(sc, INIT);
1551 		return;
1552 	}
1553 
1554 	if (sc->sc_if.if_flags & IFF_UP && sc->sc_vhid > 0 &&
1555 	    (sc->sc_naddrs || sc->sc_naddrs6) && !sc->sc_suppress) {
1556 		sc->sc_if.if_flags |= IFF_RUNNING;
1557 	} else {
1558 		sc->sc_if.if_flags &= ~IFF_RUNNING;
1559 		carp_setroute(sc, RTM_DELETE);
1560 		return;
1561 	}
1562 
1563 	switch (sc->sc_state) {
1564 	case INIT:
1565 		carp_set_state(sc, BACKUP);
1566 		carp_setroute(sc, RTM_DELETE);
1567 		carp_setrun(sc, 0);
1568 		break;
1569 	case BACKUP:
1570 		callout_stop(&sc->sc_ad_tmo);
1571 		tv.tv_sec = 3 * sc->sc_advbase;
1572 		tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1573 		switch (af) {
1574 #ifdef INET
1575 		case AF_INET:
1576 			callout_schedule(&sc->sc_md_tmo, tvtohz(&tv));
1577 			break;
1578 #endif /* INET */
1579 #ifdef INET6
1580 		case AF_INET6:
1581 			callout_schedule(&sc->sc_md6_tmo, tvtohz(&tv));
1582 			break;
1583 #endif /* INET6 */
1584 		default:
1585 			if (sc->sc_naddrs)
1586 				callout_schedule(&sc->sc_md_tmo, tvtohz(&tv));
1587 #ifdef INET6
1588 			if (sc->sc_naddrs6)
1589 				callout_schedule(&sc->sc_md6_tmo, tvtohz(&tv));
1590 #endif /* INET6 */
1591 			break;
1592 		}
1593 		break;
1594 	case MASTER:
1595 		tv.tv_sec = sc->sc_advbase;
1596 		tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1597 		callout_schedule(&sc->sc_ad_tmo, tvtohz(&tv));
1598 		break;
1599 	}
1600 }
1601 
1602 static void
1603 carp_multicast_cleanup(struct carp_softc *sc)
1604 {
1605 	struct ip_moptions *imo = &sc->sc_imo;
1606 #ifdef INET6
1607 	struct ip6_moptions *im6o = &sc->sc_im6o;
1608 #endif
1609 	u_int16_t n = imo->imo_num_memberships;
1610 
1611 	/* Clean up our own multicast memberships */
1612 	while (n-- > 0) {
1613 		if (imo->imo_membership[n] != NULL) {
1614 			in_delmulti(imo->imo_membership[n]);
1615 			imo->imo_membership[n] = NULL;
1616 		}
1617 	}
1618 	imo->imo_num_memberships = 0;
1619 	imo->imo_multicast_if_index = 0;
1620 
1621 #ifdef INET6
1622 	while (!LIST_EMPTY(&im6o->im6o_memberships)) {
1623 		struct in6_multi_mship *imm =
1624 		    LIST_FIRST(&im6o->im6o_memberships);
1625 
1626 		LIST_REMOVE(imm, i6mm_chain);
1627 		in6_leavegroup(imm);
1628 	}
1629 	im6o->im6o_multicast_if_index = 0;
1630 #endif
1631 
1632 	/* And any other multicast memberships */
1633 	carp_ether_purgemulti(sc);
1634 }
1635 
1636 static int
1637 carp_set_ifp(struct carp_softc *sc, struct ifnet *ifp)
1638 {
1639 	struct carp_if *cif, *ncif = NULL;
1640 	struct carp_softc *vr, *after = NULL;
1641 	int myself = 0, error = 0;
1642 	int s;
1643 
1644 	if (ifp == sc->sc_carpdev)
1645 		return (0);
1646 
1647 	if (ifp != NULL) {
1648 		if ((ifp->if_flags & IFF_MULTICAST) == 0)
1649 			return (EADDRNOTAVAIL);
1650 
1651 		if (ifp->if_type == IFT_CARP)
1652 			return (EINVAL);
1653 
1654 		if (ifp->if_carp == NULL) {
1655 			ncif = malloc(sizeof(*cif), M_IFADDR, M_NOWAIT);
1656 			if (ncif == NULL)
1657 				return (ENOBUFS);
1658 			if ((error = ifpromisc(ifp, 1))) {
1659 				free(ncif, M_IFADDR);
1660 				return (error);
1661 			}
1662 
1663 			ncif->vhif_ifp = ifp;
1664 			TAILQ_INIT(&ncif->vhif_vrs);
1665 		} else {
1666 			cif = (struct carp_if *)ifp->if_carp;
1667 			TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
1668 				if (vr != sc && vr->sc_vhid == sc->sc_vhid)
1669 					return (EINVAL);
1670 		}
1671 
1672 		/* detach from old interface */
1673 		if (sc->sc_carpdev != NULL)
1674 			carpdetach(sc);
1675 
1676 		/* join multicast groups */
1677 		if (sc->sc_naddrs < 0 &&
1678 		    (error = carp_join_multicast(sc)) != 0) {
1679 			if (ncif != NULL)
1680 				free(ncif, M_IFADDR);
1681 			return (error);
1682 		}
1683 
1684 #ifdef INET6
1685 		if (sc->sc_naddrs6 < 0 &&
1686 		    (error = carp_join_multicast6(sc)) != 0) {
1687 			if (ncif != NULL)
1688 				free(ncif, M_IFADDR);
1689 			carp_multicast_cleanup(sc);
1690 			return (error);
1691 		}
1692 #endif
1693 
1694 		/* attach carp interface to physical interface */
1695 		if (ncif != NULL)
1696 			ifp->if_carp = (void *)ncif;
1697 		sc->sc_carpdev = ifp;
1698 		sc->sc_if.if_capabilities = ifp->if_capabilities &
1699 		             (IFCAP_TSOv4 | IFCAP_TSOv6 |
1700                              IFCAP_CSUM_IPv4_Tx|IFCAP_CSUM_IPv4_Rx|
1701                              IFCAP_CSUM_TCPv4_Tx|IFCAP_CSUM_TCPv4_Rx|
1702                              IFCAP_CSUM_UDPv4_Tx|IFCAP_CSUM_UDPv4_Rx|
1703                              IFCAP_CSUM_TCPv6_Tx|IFCAP_CSUM_TCPv6_Rx|
1704                              IFCAP_CSUM_UDPv6_Tx|IFCAP_CSUM_UDPv6_Rx);
1705 
1706 		cif = (struct carp_if *)ifp->if_carp;
1707 		TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
1708 			if (vr == sc)
1709 				myself = 1;
1710 			if (vr->sc_vhid < sc->sc_vhid)
1711 				after = vr;
1712 		}
1713 
1714 		if (!myself) {
1715 			/* We're trying to keep things in order */
1716 			if (after == NULL) {
1717 				TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
1718 			} else {
1719 				TAILQ_INSERT_AFTER(&cif->vhif_vrs, after,
1720 				    sc, sc_list);
1721 			}
1722 			cif->vhif_nvrs++;
1723 		}
1724 		if (sc->sc_naddrs || sc->sc_naddrs6)
1725 			sc->sc_if.if_flags |= IFF_UP;
1726 		carp_set_enaddr(sc);
1727 		KERNEL_LOCK(1, NULL);
1728 		s = splnet();
1729 		/* XXX linkstatehooks establish */
1730 		carp_carpdev_state(ifp);
1731 		splx(s);
1732 		KERNEL_UNLOCK_ONE(NULL);
1733 	} else {
1734 		carpdetach(sc);
1735 		sc->sc_if.if_flags &= ~(IFF_UP|IFF_RUNNING);
1736 	}
1737 	return (0);
1738 }
1739 
1740 static void
1741 carp_set_enaddr(struct carp_softc *sc)
1742 {
1743 	uint8_t enaddr[ETHER_ADDR_LEN];
1744 	if (sc->sc_carpdev && sc->sc_carpdev->if_type == IFT_ISO88025) {
1745 		enaddr[0] = 3;
1746 		enaddr[1] = 0;
1747 		enaddr[2] = 0x40 >> (sc->sc_vhid - 1);
1748 		enaddr[3] = 0x40000 >> (sc->sc_vhid - 1);
1749 		enaddr[4] = 0;
1750 		enaddr[5] = 0;
1751 	} else {
1752 		enaddr[0] = 0;
1753 		enaddr[1] = 0;
1754 		enaddr[2] = 0x5e;
1755 		enaddr[3] = 0;
1756 		enaddr[4] = 1;
1757 		enaddr[5] = sc->sc_vhid;
1758 	}
1759 	if_set_sadl(&sc->sc_if, enaddr, sizeof(enaddr), false);
1760 }
1761 
1762 #if 0
1763 static void
1764 carp_addr_updated(void *v)
1765 {
1766 	struct carp_softc *sc = (struct carp_softc *) v;
1767 	struct ifaddr *ifa;
1768 	int new_naddrs = 0, new_naddrs6 = 0;
1769 
1770 	IFADDR_READER_FOREACH(ifa, &sc->sc_if) {
1771 		if (ifa->ifa_addr->sa_family == AF_INET)
1772 			new_naddrs++;
1773 		else if (ifa->ifa_addr->sa_family == AF_INET6)
1774 			new_naddrs6++;
1775 	}
1776 
1777 	/* Handle a callback after SIOCDIFADDR */
1778 	if (new_naddrs < sc->sc_naddrs || new_naddrs6 < sc->sc_naddrs6) {
1779 		struct in_addr mc_addr;
1780 
1781 		sc->sc_naddrs = new_naddrs;
1782 		sc->sc_naddrs6 = new_naddrs6;
1783 
1784 		/* Re-establish multicast membership removed by in_control */
1785 		mc_addr.s_addr = INADDR_CARP_GROUP;
1786 		if (!in_multi_group(mc_addr, &sc->sc_if, 0)) {
1787 			memset(&sc->sc_imo, 0, sizeof(sc->sc_imo));
1788 
1789 			if (sc->sc_carpdev != NULL && sc->sc_naddrs > 0)
1790 				carp_join_multicast(sc);
1791 		}
1792 
1793 		if (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) {
1794 			sc->sc_if.if_flags &= ~IFF_UP;
1795 			carp_set_state(sc, INIT);
1796 		} else
1797 			carp_hmac_prepare(sc);
1798 	}
1799 
1800 	carp_setrun(sc, 0);
1801 }
1802 #endif
1803 
1804 static int
1805 carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin)
1806 {
1807 	struct ifnet *ifp = sc->sc_carpdev;
1808 	struct in_ifaddr *ia, *ia_if;
1809 	int error = 0;
1810 	int s;
1811 
1812 	if (sin->sin_addr.s_addr == 0) {
1813 		if (!(sc->sc_if.if_flags & IFF_UP))
1814 			carp_set_state(sc, INIT);
1815 		if (sc->sc_naddrs)
1816 			sc->sc_if.if_flags |= IFF_UP;
1817 		carp_setrun(sc, 0);
1818 		return (0);
1819 	}
1820 
1821 	/* we have to do this by hand to ensure we don't match on ourselves */
1822 	ia_if = NULL;
1823 	s = pserialize_read_enter();
1824 	IN_ADDRLIST_READER_FOREACH(ia) {
1825 		/* and, yeah, we need a multicast-capable iface too */
1826 		if (ia->ia_ifp != &sc->sc_if &&
1827 		    ia->ia_ifp->if_type != IFT_CARP &&
1828 		    (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
1829 		    (sin->sin_addr.s_addr & ia->ia_subnetmask) ==
1830 		    ia->ia_subnet) {
1831 			if (!ia_if)
1832 				ia_if = ia;
1833 		}
1834 	}
1835 
1836 	if (ia_if) {
1837 		ia = ia_if;
1838 		if (ifp) {
1839 			if (ifp != ia->ia_ifp)
1840 				return (EADDRNOTAVAIL);
1841 		} else {
1842 			/* FIXME NOMPSAFE */
1843 			ifp = ia->ia_ifp;
1844 		}
1845 	}
1846 	pserialize_read_exit(s);
1847 
1848 	if ((error = carp_set_ifp(sc, ifp)))
1849 		return (error);
1850 
1851 	if (sc->sc_carpdev == NULL)
1852 		return (EADDRNOTAVAIL);
1853 
1854 	if (sc->sc_naddrs == 0 && (error = carp_join_multicast(sc)) != 0)
1855 		return (error);
1856 
1857 	sc->sc_naddrs++;
1858 	if (sc->sc_carpdev != NULL)
1859 		sc->sc_if.if_flags |= IFF_UP;
1860 
1861 	carp_set_state(sc, INIT);
1862 	carp_setrun(sc, 0);
1863 
1864 	/*
1865 	 * Hook if_addrhooks so that we get a callback after in_ifinit has run,
1866 	 * to correct any inappropriate routes that it inserted.
1867 	 */
1868 	if (sc->ah_cookie == 0) {
1869 		/* XXX link address hook */
1870 	}
1871 
1872 	return (0);
1873 }
1874 
1875 static int
1876 carp_join_multicast(struct carp_softc *sc)
1877 {
1878 	struct ip_moptions *imo = &sc->sc_imo, tmpimo;
1879 	struct in_addr addr;
1880 
1881 	memset(&tmpimo, 0, sizeof(tmpimo));
1882 	addr.s_addr = INADDR_CARP_GROUP;
1883 	if ((tmpimo.imo_membership[0] =
1884 	    in_addmulti(&addr, &sc->sc_if)) == NULL) {
1885 		return (ENOBUFS);
1886 	}
1887 
1888 	imo->imo_membership[0] = tmpimo.imo_membership[0];
1889 	imo->imo_num_memberships = 1;
1890 	imo->imo_multicast_if_index = sc->sc_if.if_index;
1891 	imo->imo_multicast_ttl = CARP_DFLTTL;
1892 	imo->imo_multicast_loop = 0;
1893 	return (0);
1894 }
1895 
1896 
1897 #ifdef INET6
1898 static int
1899 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
1900 {
1901 	struct ifnet *ifp = sc->sc_carpdev;
1902 	struct in6_ifaddr *ia, *ia_if;
1903 	int error = 0;
1904 	int s;
1905 
1906 	if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
1907 		if (!(sc->sc_if.if_flags & IFF_UP))
1908 			carp_set_state(sc, INIT);
1909 		if (sc->sc_naddrs6)
1910 			sc->sc_if.if_flags |= IFF_UP;
1911 		carp_setrun(sc, 0);
1912 		return (0);
1913 	}
1914 
1915 	/* we have to do this by hand to ensure we don't match on ourselves */
1916 	ia_if = NULL;
1917 	s = pserialize_read_enter();
1918 	IN6_ADDRLIST_READER_FOREACH(ia) {
1919 		int i;
1920 
1921 		for (i = 0; i < 4; i++) {
1922 			if ((sin6->sin6_addr.s6_addr32[i] &
1923 			    ia->ia_prefixmask.sin6_addr.s6_addr32[i]) !=
1924 			    (ia->ia_addr.sin6_addr.s6_addr32[i] &
1925 			    ia->ia_prefixmask.sin6_addr.s6_addr32[i]))
1926 				break;
1927 		}
1928 		/* and, yeah, we need a multicast-capable iface too */
1929 		if (ia->ia_ifp != &sc->sc_if &&
1930 		    ia->ia_ifp->if_type != IFT_CARP &&
1931 		    (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
1932 		    (i == 4)) {
1933 			if (!ia_if)
1934 				ia_if = ia;
1935 		}
1936 	}
1937 	pserialize_read_exit(s);
1938 
1939 	if (ia_if) {
1940 		ia = ia_if;
1941 		if (sc->sc_carpdev) {
1942 			if (sc->sc_carpdev != ia->ia_ifp)
1943 				return (EADDRNOTAVAIL);
1944 		} else {
1945 			ifp = ia->ia_ifp;
1946 		}
1947 	}
1948 
1949 	if ((error = carp_set_ifp(sc, ifp)))
1950 		return (error);
1951 
1952 	if (sc->sc_carpdev == NULL)
1953 		return (EADDRNOTAVAIL);
1954 
1955 	if (sc->sc_naddrs6 == 0 && (error = carp_join_multicast6(sc)) != 0)
1956 		return (error);
1957 
1958 	sc->sc_naddrs6++;
1959 	if (sc->sc_carpdev != NULL)
1960 		sc->sc_if.if_flags |= IFF_UP;
1961 	carp_set_state(sc, INIT);
1962 	carp_setrun(sc, 0);
1963 
1964 	return (0);
1965 }
1966 
1967 static int
1968 carp_join_multicast6(struct carp_softc *sc)
1969 {
1970 	struct in6_multi_mship *imm, *imm2;
1971 	struct ip6_moptions *im6o = &sc->sc_im6o;
1972 	struct sockaddr_in6 addr6;
1973 	int error;
1974 
1975 	/* Join IPv6 CARP multicast group */
1976 	memset(&addr6, 0, sizeof(addr6));
1977 	addr6.sin6_family = AF_INET6;
1978 	addr6.sin6_len = sizeof(addr6);
1979 	addr6.sin6_addr.s6_addr16[0] = htons(0xff02);
1980 	addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index);
1981 	addr6.sin6_addr.s6_addr8[15] = 0x12;
1982 	if ((imm = in6_joingroup(&sc->sc_if,
1983 	    &addr6.sin6_addr, &error, 0)) == NULL) {
1984 		return (error);
1985 	}
1986 	/* join solicited multicast address */
1987 	memset(&addr6.sin6_addr, 0, sizeof(addr6.sin6_addr));
1988 	addr6.sin6_addr.s6_addr16[0] = htons(0xff02);
1989 	addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index);
1990 	addr6.sin6_addr.s6_addr32[1] = 0;
1991 	addr6.sin6_addr.s6_addr32[2] = htonl(1);
1992 	addr6.sin6_addr.s6_addr32[3] = 0;
1993 	addr6.sin6_addr.s6_addr8[12] = 0xff;
1994 	if ((imm2 = in6_joingroup(&sc->sc_if,
1995 	    &addr6.sin6_addr, &error, 0)) == NULL) {
1996 		in6_leavegroup(imm);
1997 		return (error);
1998 	}
1999 
2000 	/* apply v6 multicast membership */
2001 	im6o->im6o_multicast_if_index = sc->sc_if.if_index;
2002 	if (imm)
2003 		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm,
2004 		    i6mm_chain);
2005 	if (imm2)
2006 		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm2,
2007 		    i6mm_chain);
2008 
2009 	return (0);
2010 }
2011 
2012 #endif /* INET6 */
2013 
2014 static int
2015 carp_ioctl(struct ifnet *ifp, u_long cmd, void *data)
2016 {
2017 	struct lwp *l = curlwp;		/* XXX */
2018 	struct carp_softc *sc = ifp->if_softc, *vr;
2019 	struct carpreq carpr;
2020 	struct ifaddr *ifa;
2021 	struct ifreq *ifr;
2022 	struct ifnet *cdev = NULL;
2023 	int error = 0;
2024 
2025 	ifa = (struct ifaddr *)data;
2026 	ifr = (struct ifreq *)data;
2027 
2028 	switch (cmd) {
2029 	case SIOCINITIFADDR:
2030 		switch (ifa->ifa_addr->sa_family) {
2031 #ifdef INET
2032 		case AF_INET:
2033 			sc->sc_if.if_flags |= IFF_UP;
2034 			memcpy(ifa->ifa_dstaddr, ifa->ifa_addr,
2035 			    sizeof(struct sockaddr));
2036 			error = carp_set_addr(sc, satosin(ifa->ifa_addr));
2037 			break;
2038 #endif /* INET */
2039 #ifdef INET6
2040 		case AF_INET6:
2041 			sc->sc_if.if_flags|= IFF_UP;
2042 			error = carp_set_addr6(sc, satosin6(ifa->ifa_addr));
2043 			break;
2044 #endif /* INET6 */
2045 		default:
2046 			error = EAFNOSUPPORT;
2047 			break;
2048 		}
2049 		break;
2050 
2051 	case SIOCSIFFLAGS:
2052 		if ((error = ifioctl_common(ifp, cmd, data)) != 0)
2053 			break;
2054 		if (sc->sc_state != INIT && !(ifr->ifr_flags & IFF_UP)) {
2055 			callout_stop(&sc->sc_ad_tmo);
2056 			callout_stop(&sc->sc_md_tmo);
2057 			callout_stop(&sc->sc_md6_tmo);
2058 			if (sc->sc_state == MASTER) {
2059 				/* we need the interface up to bow out */
2060 				sc->sc_if.if_flags |= IFF_UP;
2061 				sc->sc_bow_out = 1;
2062 				carp_send_ad(sc);
2063 			}
2064 			sc->sc_if.if_flags &= ~IFF_UP;
2065 			carp_set_state(sc, INIT);
2066 			carp_setrun(sc, 0);
2067 		} else if (sc->sc_state == INIT && (ifr->ifr_flags & IFF_UP)) {
2068 			sc->sc_if.if_flags |= IFF_UP;
2069 			carp_setrun(sc, 0);
2070 		}
2071 		break;
2072 
2073 	case SIOCSVH:
2074 		if (l == NULL)
2075 			break;
2076 		if ((error = kauth_authorize_network(l->l_cred,
2077 		    KAUTH_NETWORK_INTERFACE,
2078 		    KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd,
2079 		    NULL)) != 0)
2080 			break;
2081 		if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr)))
2082 			break;
2083 		error = 1;
2084 		if (carpr.carpr_carpdev[0] != '\0' &&
2085 		    (cdev = ifunit(carpr.carpr_carpdev)) == NULL)
2086 			return (EINVAL);
2087 		if ((error = carp_set_ifp(sc, cdev)))
2088 			return (error);
2089 		if (sc->sc_state != INIT && carpr.carpr_state != sc->sc_state) {
2090 			switch (carpr.carpr_state) {
2091 			case BACKUP:
2092 				callout_stop(&sc->sc_ad_tmo);
2093 				carp_set_state(sc, BACKUP);
2094 				carp_setrun(sc, 0);
2095 				carp_setroute(sc, RTM_DELETE);
2096 				break;
2097 			case MASTER:
2098 				carp_master_down(sc);
2099 				break;
2100 			default:
2101 				break;
2102 			}
2103 		}
2104 		if (carpr.carpr_vhid > 0) {
2105 			if (carpr.carpr_vhid > 255) {
2106 				error = EINVAL;
2107 				break;
2108 			}
2109 			if (sc->sc_carpdev) {
2110 				struct carp_if *cif;
2111 				cif = (struct carp_if *)sc->sc_carpdev->if_carp;
2112 				TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
2113 					if (vr != sc &&
2114 					    vr->sc_vhid == carpr.carpr_vhid)
2115 						return (EINVAL);
2116 			}
2117 			sc->sc_vhid = carpr.carpr_vhid;
2118 			carp_set_enaddr(sc);
2119 			carp_set_state(sc, INIT);
2120 			error--;
2121 		}
2122 		if (carpr.carpr_advbase > 0 || carpr.carpr_advskew > 0) {
2123 			if (carpr.carpr_advskew > 254) {
2124 				error = EINVAL;
2125 				break;
2126 			}
2127 			if (carpr.carpr_advbase > 255) {
2128 				error = EINVAL;
2129 				break;
2130 			}
2131 			sc->sc_advbase = carpr.carpr_advbase;
2132 			sc->sc_advskew = carpr.carpr_advskew;
2133 			error--;
2134 		}
2135 		memcpy(sc->sc_key, carpr.carpr_key, sizeof(sc->sc_key));
2136 		if (error > 0)
2137 			error = EINVAL;
2138 		else {
2139 			error = 0;
2140 			carp_setrun(sc, 0);
2141 		}
2142 		break;
2143 
2144 	case SIOCGVH:
2145 		memset(&carpr, 0, sizeof(carpr));
2146 		if (sc->sc_carpdev != NULL)
2147 			strlcpy(carpr.carpr_carpdev, sc->sc_carpdev->if_xname,
2148 			    IFNAMSIZ);
2149 		carpr.carpr_state = sc->sc_state;
2150 		carpr.carpr_vhid = sc->sc_vhid;
2151 		carpr.carpr_advbase = sc->sc_advbase;
2152 		carpr.carpr_advskew = sc->sc_advskew;
2153 
2154 		if ((l != NULL) && (error = kauth_authorize_network(l->l_cred,
2155 		    KAUTH_NETWORK_INTERFACE,
2156 		    KAUTH_REQ_NETWORK_INTERFACE_SETPRIV, ifp, (void *)cmd,
2157 		    NULL)) == 0)
2158 			memcpy(carpr.carpr_key, sc->sc_key,
2159 			    sizeof(carpr.carpr_key));
2160 		error = copyout(&carpr, ifr->ifr_data, sizeof(carpr));
2161 		break;
2162 
2163 	case SIOCADDMULTI:
2164 		error = carp_ether_addmulti(sc, ifr);
2165 		break;
2166 
2167 	case SIOCDELMULTI:
2168 		error = carp_ether_delmulti(sc, ifr);
2169 		break;
2170 
2171 	case SIOCSIFCAP:
2172 		if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET)
2173 			error = 0;
2174 		break;
2175 
2176 	default:
2177 		error = ether_ioctl(ifp, cmd, data);
2178 	}
2179 
2180 	carp_hmac_prepare(sc);
2181 	return (error);
2182 }
2183 
2184 
2185 /*
2186  * Start output on carp interface. This function should never be called.
2187  */
2188 static void
2189 carp_start(struct ifnet *ifp)
2190 {
2191 #ifdef DEBUG
2192 	printf("%s: start called\n", ifp->if_xname);
2193 #endif
2194 }
2195 
2196 int
2197 carp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa,
2198     const struct rtentry *rt)
2199 {
2200 	struct carp_softc *sc = ((struct carp_softc *)ifp->if_softc);
2201 	KASSERT(KERNEL_LOCKED_P());
2202 
2203 	if (sc->sc_carpdev != NULL && sc->sc_state == MASTER) {
2204 		return if_output_lock(sc->sc_carpdev, ifp, m, sa, rt);
2205 	} else {
2206 		m_freem(m);
2207 		return (ENETUNREACH);
2208 	}
2209 }
2210 
2211 static void
2212 carp_set_state(struct carp_softc *sc, int state)
2213 {
2214 	static const char *carp_states[] = { CARP_STATES };
2215 	int link_state;
2216 
2217 	if (sc->sc_state == state)
2218 		return;
2219 
2220 	CARP_LOG(sc, ("state transition from: %s -> to: %s", carp_states[sc->sc_state], carp_states[state]));
2221 
2222 	sc->sc_state = state;
2223 	switch (state) {
2224 	case BACKUP:
2225 		link_state = LINK_STATE_DOWN;
2226 		break;
2227 	case MASTER:
2228 		link_state = LINK_STATE_UP;
2229 		break;
2230 	default:
2231 		link_state = LINK_STATE_UNKNOWN;
2232 		break;
2233 	}
2234 	/*
2235 	 * The lock is needed to serialize a call of
2236 	 * if_link_state_change_softint from here and a call from softint.
2237 	 */
2238 	KERNEL_LOCK(1, NULL);
2239 	if_link_state_change_softint(&sc->sc_if, link_state);
2240 	KERNEL_UNLOCK_ONE(NULL);
2241 }
2242 
2243 void
2244 carp_carpdev_state(void *v)
2245 {
2246 	struct carp_if *cif;
2247 	struct carp_softc *sc;
2248 	struct ifnet *ifp = v;
2249 
2250 	if (ifp->if_type == IFT_CARP)
2251 		return;
2252 
2253 	cif = (struct carp_if *)ifp->if_carp;
2254 
2255 	TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) {
2256 		int suppressed = sc->sc_suppress;
2257 
2258 		if (sc->sc_carpdev->if_link_state == LINK_STATE_DOWN ||
2259 		    !(sc->sc_carpdev->if_flags & IFF_UP)) {
2260 			sc->sc_if.if_flags &= ~IFF_RUNNING;
2261 			callout_stop(&sc->sc_ad_tmo);
2262 			callout_stop(&sc->sc_md_tmo);
2263 			callout_stop(&sc->sc_md6_tmo);
2264 			carp_set_state(sc, INIT);
2265 			sc->sc_suppress = 1;
2266 			carp_setrun(sc, 0);
2267 			if (!suppressed) {
2268 				carp_suppress_preempt++;
2269 				if (carp_suppress_preempt == 1)
2270 					carp_send_ad_all();
2271 			}
2272 		} else {
2273 			carp_set_state(sc, INIT);
2274 			sc->sc_suppress = 0;
2275 			carp_setrun(sc, 0);
2276 			if (suppressed)
2277 				carp_suppress_preempt--;
2278 		}
2279 	}
2280 }
2281 
2282 static int
2283 carp_ether_addmulti(struct carp_softc *sc, struct ifreq *ifr)
2284 {
2285 	const struct sockaddr *sa = ifreq_getaddr(SIOCADDMULTI, ifr);
2286 	struct ifnet *ifp;
2287 	struct carp_mc_entry *mc;
2288 	u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN];
2289 	int error;
2290 
2291 	ifp = sc->sc_carpdev;
2292 	if (ifp == NULL)
2293 		return (EINVAL);
2294 
2295 	error = ether_addmulti(sa, &sc->sc_ac);
2296 	if (error != ENETRESET)
2297 		return (error);
2298 
2299 	/*
2300 	 * This is new multicast address.  We have to tell parent
2301 	 * about it.  Also, remember this multicast address so that
2302 	 * we can delete them on unconfigure.
2303 	 */
2304 	mc = malloc(sizeof(struct carp_mc_entry), M_DEVBUF, M_NOWAIT);
2305 	if (mc == NULL) {
2306 		error = ENOMEM;
2307 		goto alloc_failed;
2308 	}
2309 
2310 	/*
2311 	 * As ether_addmulti() returns ENETRESET, following two
2312 	 * statement shouldn't fail.
2313 	 */
2314 	(void)ether_multiaddr(sa, addrlo, addrhi);
2315 
2316 	ETHER_LOCK(&sc->sc_ac);
2317 	mc->mc_enm = ether_lookup_multi(addrlo, addrhi, &sc->sc_ac);
2318 	ETHER_UNLOCK(&sc->sc_ac);
2319 
2320 	memcpy(&mc->mc_addr, sa, sa->sa_len);
2321 	LIST_INSERT_HEAD(&sc->carp_mc_listhead, mc, mc_entries);
2322 
2323 	error = if_mcast_op(ifp, SIOCADDMULTI, sa);
2324 	if (error != 0)
2325 		goto ioctl_failed;
2326 
2327 	return (error);
2328 
2329  ioctl_failed:
2330 	LIST_REMOVE(mc, mc_entries);
2331 	free(mc, M_DEVBUF);
2332  alloc_failed:
2333 	(void)ether_delmulti(sa, &sc->sc_ac);
2334 
2335 	return (error);
2336 }
2337 
2338 static int
2339 carp_ether_delmulti(struct carp_softc *sc, struct ifreq *ifr)
2340 {
2341 	const struct sockaddr *sa = ifreq_getaddr(SIOCDELMULTI, ifr);
2342 	struct ifnet *ifp;
2343 	struct ether_multi *enm;
2344 	struct carp_mc_entry *mc;
2345 	u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN];
2346 	int error;
2347 
2348 	ifp = sc->sc_carpdev;
2349 	if (ifp == NULL)
2350 		return (EINVAL);
2351 
2352 	/*
2353 	 * Find a key to lookup carp_mc_entry.  We have to do this
2354 	 * before calling ether_delmulti for obvious reason.
2355 	 */
2356 	if ((error = ether_multiaddr(sa, addrlo, addrhi)) != 0)
2357 		return (error);
2358 
2359 	ETHER_LOCK(&sc->sc_ac);
2360 	enm = ether_lookup_multi(addrlo, addrhi, &sc->sc_ac);
2361 	ETHER_UNLOCK(&sc->sc_ac);
2362 	if (enm == NULL)
2363 		return (EINVAL);
2364 
2365 	LIST_FOREACH(mc, &sc->carp_mc_listhead, mc_entries)
2366 		if (mc->mc_enm == enm)
2367 			break;
2368 
2369 	/* We won't delete entries we didn't add */
2370 	if (mc == NULL)
2371 		return (EINVAL);
2372 
2373 	error = ether_delmulti(sa, &sc->sc_ac);
2374 	if (error != ENETRESET)
2375 		return (error);
2376 
2377 	/* We no longer use this multicast address.  Tell parent so. */
2378 	error = if_mcast_op(ifp, SIOCDELMULTI, sa);
2379 	if (error == 0) {
2380 		/* And forget about this address. */
2381 		LIST_REMOVE(mc, mc_entries);
2382 		free(mc, M_DEVBUF);
2383 	} else
2384 		(void)ether_addmulti(sa, &sc->sc_ac);
2385 	return (error);
2386 }
2387 
2388 /*
2389  * Delete any multicast address we have asked to add from parent
2390  * interface.  Called when the carp is being unconfigured.
2391  */
2392 static void
2393 carp_ether_purgemulti(struct carp_softc *sc)
2394 {
2395 	struct ifnet *ifp = sc->sc_carpdev;		/* Parent. */
2396 	struct carp_mc_entry *mc;
2397 
2398 	if (ifp == NULL)
2399 		return;
2400 
2401 	while ((mc = LIST_FIRST(&sc->carp_mc_listhead)) != NULL) {
2402 		(void)if_mcast_op(ifp, SIOCDELMULTI, sstosa(&mc->mc_addr));
2403 		LIST_REMOVE(mc, mc_entries);
2404 		free(mc, M_DEVBUF);
2405 	}
2406 }
2407 
2408 static int
2409 sysctl_net_inet_carp_stats(SYSCTLFN_ARGS)
2410 {
2411 
2412 	return (NETSTAT_SYSCTL(carpstat_percpu, CARP_NSTATS));
2413 }
2414 
2415 void
2416 carp_init(void)
2417 {
2418 
2419 	sysctl_net_inet_carp_setup(NULL);
2420 #ifdef MBUFTRACE
2421 	MOWNER_ATTACH(&carp_proto_mowner_rx);
2422 	MOWNER_ATTACH(&carp_proto_mowner_tx);
2423 	MOWNER_ATTACH(&carp_proto6_mowner_rx);
2424 	MOWNER_ATTACH(&carp_proto6_mowner_tx);
2425 #endif
2426 
2427 	carp_wqinput = wqinput_create("carp", _carp_proto_input);
2428 #ifdef INET6
2429 	carp6_wqinput = wqinput_create("carp6", _carp6_proto_input);
2430 #endif
2431 }
2432 
2433 static void
2434 sysctl_net_inet_carp_setup(struct sysctllog **clog)
2435 {
2436 
2437 	sysctl_createv(clog, 0, NULL, NULL,
2438 		       CTLFLAG_PERMANENT,
2439 		       CTLTYPE_NODE, "inet", NULL,
2440 		       NULL, 0, NULL, 0,
2441 		       CTL_NET, PF_INET, CTL_EOL);
2442 	sysctl_createv(clog, 0, NULL, NULL,
2443 		       CTLFLAG_PERMANENT,
2444 		       CTLTYPE_NODE, "carp",
2445 		       SYSCTL_DESCR("CARP related settings"),
2446 		       NULL, 0, NULL, 0,
2447 		       CTL_NET, PF_INET, IPPROTO_CARP, CTL_EOL);
2448 
2449 	sysctl_createv(clog, 0, NULL, NULL,
2450 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2451 		       CTLTYPE_INT, "preempt",
2452 		       SYSCTL_DESCR("Enable CARP Preempt"),
2453 		       NULL, 0, &carp_opts[CARPCTL_PREEMPT], 0,
2454 		       CTL_NET, PF_INET, IPPROTO_CARP,
2455 		       CTL_CREATE, CTL_EOL);
2456 	sysctl_createv(clog, 0, NULL, NULL,
2457 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2458 		       CTLTYPE_INT, "arpbalance",
2459 		       SYSCTL_DESCR("Enable ARP balancing"),
2460 		       NULL, 0, &carp_opts[CARPCTL_ARPBALANCE], 0,
2461 		       CTL_NET, PF_INET, IPPROTO_CARP,
2462 		       CTL_CREATE, CTL_EOL);
2463 	sysctl_createv(clog, 0, NULL, NULL,
2464 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2465 		       CTLTYPE_INT, "allow",
2466 		       SYSCTL_DESCR("Enable CARP"),
2467 		       NULL, 0, &carp_opts[CARPCTL_ALLOW], 0,
2468 		       CTL_NET, PF_INET, IPPROTO_CARP,
2469 		       CTL_CREATE, CTL_EOL);
2470 	sysctl_createv(clog, 0, NULL, NULL,
2471 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2472 		       CTLTYPE_INT, "log",
2473 		       SYSCTL_DESCR("CARP logging"),
2474 		       NULL, 0, &carp_opts[CARPCTL_LOG], 0,
2475 		       CTL_NET, PF_INET, IPPROTO_CARP,
2476 		       CTL_CREATE, CTL_EOL);
2477 	sysctl_createv(clog, 0, NULL, NULL,
2478 		       CTLFLAG_PERMANENT,
2479 		       CTLTYPE_STRUCT, "stats",
2480 		       SYSCTL_DESCR("CARP statistics"),
2481 		       sysctl_net_inet_carp_stats, 0, NULL, 0,
2482 		       CTL_NET, PF_INET, IPPROTO_CARP, CARPCTL_STATS,
2483 		       CTL_EOL);
2484 }
2485