1 /* 2 * Copyright (c) 2002 Michael Shalayeff. All rights reserved. 3 * Copyright (c) 2003 Ryan McBride. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 18 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 22 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 23 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 24 * THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 /* 27 * $FreeBSD: src/sys/netinet/ip_carp.c,v 1.48 2007/02/02 09:39:09 glebius Exp $ 28 */ 29 30 #include "opt_carp.h" 31 #include "opt_inet.h" 32 #include "opt_inet6.h" 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/kernel.h> 37 #include <sys/in_cksum.h> 38 #include <sys/limits.h> 39 #include <sys/malloc.h> 40 #include <sys/mbuf.h> 41 #include <sys/time.h> 42 #include <sys/proc.h> 43 #include <sys/priv.h> 44 #include <sys/sockio.h> 45 #include <sys/socket.h> 46 #include <sys/sysctl.h> 47 #include <sys/syslog.h> 48 #include <sys/thread.h> 49 50 #include <machine/stdarg.h> 51 #include <crypto/sha1.h> 52 53 #include <net/bpf.h> 54 #include <net/ethernet.h> 55 #include <net/if.h> 56 #include <net/if_dl.h> 57 #include <net/if_types.h> 58 #include <net/route.h> 59 #include <net/if_clone.h> 60 #include <net/ifq_var.h> 61 62 #ifdef INET 63 #include <netinet/in.h> 64 #include <netinet/in_var.h> 65 #include <netinet/in_systm.h> 66 #include <netinet/ip.h> 67 #include <netinet/ip_var.h> 68 #include <netinet/if_ether.h> 69 #endif 70 71 #ifdef INET6 72 #include <netinet/icmp6.h> 73 #include <netinet/ip6.h> 74 #include <netinet6/ip6_var.h> 75 #include <netinet6/scope6_var.h> 76 #include <netinet6/nd6.h> 77 #endif 78 79 #include <netinet/ip_carp.h> 80 81 #define CARP_IFNAME "carp" 82 #define CARP_IS_RUNNING(ifp) \ 83 (((ifp)->if_flags & (IFF_UP | IFF_RUNNING)) == (IFF_UP | IFF_RUNNING)) 84 85 struct carp_vhaddr { 86 uint32_t vha_flags; /* CARP_VHAF_ */ 87 const struct in_ifaddr *vha_ia; /* carp address */ 88 const struct in_ifaddr *vha_iaback; /* backing address */ 89 TAILQ_ENTRY(carp_vhaddr) vha_link; 90 }; 91 TAILQ_HEAD(carp_vhaddr_list, carp_vhaddr); 92 93 struct carp_softc { 94 struct ifnet sc_if; 95 struct ifnet *sc_carpdev; /* parent interface */ 96 struct carp_vhaddr_list sc_vha_list; /* virtual addr list */ 97 98 const struct in_ifaddr *sc_ia; /* primary iface address v4 */ 99 struct ip_moptions sc_imo; 100 101 #ifdef INET6 102 struct in6_ifaddr *sc_ia6; /* primary iface address v6 */ 103 struct ip6_moptions sc_im6o; 104 #endif /* INET6 */ 105 TAILQ_ENTRY(carp_softc) sc_list; 106 107 enum { INIT = 0, BACKUP, MASTER } 108 sc_state; 109 int sc_dead; 110 111 int sc_suppress; 112 113 int sc_sendad_errors; 114 #define CARP_SENDAD_MAX_ERRORS 3 115 int sc_sendad_success; 116 #define CARP_SENDAD_MIN_SUCCESS 3 117 118 int sc_vhid; 119 int sc_advskew; 120 int sc_naddrs; /* actually used IPv4 vha */ 121 int sc_naddrs6; 122 int sc_advbase; /* seconds */ 123 int sc_init_counter; 124 uint64_t sc_counter; 125 126 /* authentication */ 127 #define CARP_HMAC_PAD 64 128 unsigned char sc_key[CARP_KEY_LEN]; 129 unsigned char sc_pad[CARP_HMAC_PAD]; 130 SHA1_CTX sc_sha1; 131 132 struct callout sc_ad_tmo; /* advertisement timeout */ 133 struct callout sc_md_tmo; /* master down timeout */ 134 struct callout sc_md6_tmo; /* master down timeout */ 135 136 LIST_ENTRY(carp_softc) sc_next; /* Interface clue */ 137 }; 138 139 struct carp_if { 140 TAILQ_HEAD(, carp_softc) vhif_vrs; 141 }; 142 143 enum { CARP_COUNT_MASTER, CARP_COUNT_RUNNING }; 144 145 SYSCTL_DECL(_net_inet_carp); 146 147 static int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 1, 0, 0 }; /* XXX for now */ 148 SYSCTL_INT(_net_inet_carp, CARPCTL_ALLOW, allow, CTLFLAG_RW, 149 &carp_opts[CARPCTL_ALLOW], 0, "Accept incoming CARP packets"); 150 SYSCTL_INT(_net_inet_carp, CARPCTL_PREEMPT, preempt, CTLFLAG_RW, 151 &carp_opts[CARPCTL_PREEMPT], 0, "high-priority backup preemption mode"); 152 SYSCTL_INT(_net_inet_carp, CARPCTL_LOG, log, CTLFLAG_RW, 153 &carp_opts[CARPCTL_LOG], 0, "log bad carp packets"); 154 SYSCTL_INT(_net_inet_carp, CARPCTL_ARPBALANCE, arpbalance, CTLFLAG_RW, 155 &carp_opts[CARPCTL_ARPBALANCE], 0, "balance arp responses"); 156 157 static int carp_suppress_preempt = 0; 158 SYSCTL_INT(_net_inet_carp, OID_AUTO, suppress_preempt, CTLFLAG_RD, 159 &carp_suppress_preempt, 0, "Preemption is suppressed"); 160 161 static struct carpstats carpstats; 162 SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW, 163 &carpstats, carpstats, 164 "CARP statistics (struct carpstats, netinet/ip_carp.h)"); 165 166 #define CARP_LOG(...) do { \ 167 if (carp_opts[CARPCTL_LOG] > 0) \ 168 log(LOG_INFO, __VA_ARGS__); \ 169 } while (0) 170 171 #define CARP_DEBUG(...) do { \ 172 if (carp_opts[CARPCTL_LOG] > 1) \ 173 log(LOG_DEBUG, __VA_ARGS__); \ 174 } while (0) 175 176 static struct lwkt_token carp_tok = LWKT_TOKEN_INITIALIZER(carp_token); 177 178 static void carp_hmac_prepare(struct carp_softc *); 179 static void carp_hmac_generate(struct carp_softc *, uint32_t *, 180 unsigned char *); 181 static int carp_hmac_verify(struct carp_softc *, uint32_t *, 182 unsigned char *); 183 static void carp_setroute(struct carp_softc *, int); 184 static void carp_input_c(struct mbuf *, struct carp_header *, sa_family_t); 185 static int carp_clone_create(struct if_clone *, int, caddr_t); 186 static int carp_clone_destroy(struct ifnet *); 187 static void carp_detach(struct carp_softc *, int); 188 static int carp_prepare_ad(struct mbuf *, struct carp_softc *, 189 struct carp_header *); 190 static void carp_send_ad_all(void); 191 static void carp_send_ad_timeout(void *); 192 static void carp_send_ad(struct carp_softc *); 193 static void carp_send_arp(struct carp_softc *); 194 static void carp_master_down_timeout(void *); 195 static void carp_master_down(struct carp_softc *); 196 static int carp_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *); 197 static int carp_looutput(struct ifnet *, struct mbuf *, struct sockaddr *, 198 struct rtentry *); 199 static void carp_start(struct ifnet *); 200 static void carp_setrun(struct carp_softc *, sa_family_t); 201 static void carp_set_state(struct carp_softc *, int); 202 203 static void carp_multicast_cleanup(struct carp_softc *); 204 static void carp_add_addr(struct carp_softc *, struct ifaddr *); 205 static void carp_del_addr(struct carp_softc *, struct ifaddr *); 206 static void carp_config_addr(struct carp_softc *, struct ifaddr *); 207 static void carp_link_addrs(struct carp_softc *, struct ifnet *, 208 struct ifaddr *); 209 static void carp_unlink_addrs(struct carp_softc *, struct ifnet *, 210 struct ifaddr *); 211 212 static int carp_get_vhaddr(struct carp_softc *, struct ifdrv *); 213 static int carp_config_vhaddr(struct carp_softc *, struct carp_vhaddr *); 214 static int carp_activate_vhaddr(struct carp_softc *, struct carp_vhaddr *, 215 struct ifnet *, const struct in_ifaddr *, int); 216 static void carp_deactivate_vhaddr(struct carp_softc *, 217 struct carp_vhaddr *); 218 219 static void carp_sc_state(struct carp_softc *); 220 #ifdef INET6 221 static void carp_send_na(struct carp_softc *); 222 static int carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *); 223 static int carp_del_addr6(struct carp_softc *, struct sockaddr_in6 *); 224 static void carp_multicast6_cleanup(struct carp_softc *); 225 #endif 226 static void carp_stop(struct carp_softc *, int); 227 static void carp_reset(struct carp_softc *, int); 228 229 static void carp_ifaddr(void *, struct ifnet *, enum ifaddr_event, 230 struct ifaddr *); 231 static void carp_ifdetach(void *, struct ifnet *); 232 233 static MALLOC_DEFINE(M_CARP, "CARP", "CARP interfaces"); 234 235 static LIST_HEAD(, carp_softc) carpif_list; 236 237 static struct if_clone carp_cloner = 238 IF_CLONE_INITIALIZER(CARP_IFNAME, carp_clone_create, carp_clone_destroy, 239 0, IF_MAXUNIT); 240 241 static eventhandler_tag carp_ifdetach_event; 242 static eventhandler_tag carp_ifaddr_event; 243 244 static __inline void 245 carp_insert_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha_new) 246 { 247 struct carp_vhaddr *vha; 248 u_long new_addr, addr; 249 250 KKASSERT((vha_new->vha_flags & CARP_VHAF_ONLIST) == 0); 251 252 /* 253 * Virtual address list is sorted; smaller one first 254 */ 255 new_addr = ntohl(vha_new->vha_ia->ia_addr.sin_addr.s_addr); 256 257 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 258 addr = ntohl(vha->vha_ia->ia_addr.sin_addr.s_addr); 259 260 if (addr > new_addr) 261 break; 262 } 263 if (vha == NULL) 264 TAILQ_INSERT_TAIL(&sc->sc_vha_list, vha_new, vha_link); 265 else 266 TAILQ_INSERT_BEFORE(vha, vha_new, vha_link); 267 vha_new->vha_flags |= CARP_VHAF_ONLIST; 268 } 269 270 static __inline void 271 carp_remove_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha) 272 { 273 KKASSERT(vha->vha_flags & CARP_VHAF_ONLIST); 274 vha->vha_flags &= ~CARP_VHAF_ONLIST; 275 TAILQ_REMOVE(&sc->sc_vha_list, vha, vha_link); 276 } 277 278 static void 279 carp_hmac_prepare(struct carp_softc *sc) 280 { 281 uint8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT; 282 uint8_t vhid = sc->sc_vhid & 0xff; 283 int i; 284 #ifdef INET6 285 struct ifaddr_container *ifac; 286 struct in6_addr in6; 287 #endif 288 #ifdef INET 289 struct carp_vhaddr *vha; 290 #endif 291 292 /* XXX: possible race here */ 293 294 /* compute ipad from key */ 295 bzero(sc->sc_pad, sizeof(sc->sc_pad)); 296 bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key)); 297 for (i = 0; i < sizeof(sc->sc_pad); i++) 298 sc->sc_pad[i] ^= 0x36; 299 300 /* precompute first part of inner hash */ 301 SHA1Init(&sc->sc_sha1); 302 SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad)); 303 SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version)); 304 SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type)); 305 SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid)); 306 #ifdef INET 307 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 308 SHA1Update(&sc->sc_sha1, 309 (const uint8_t *)&vha->vha_ia->ia_addr.sin_addr, 310 sizeof(struct in_addr)); 311 } 312 #endif /* INET */ 313 #ifdef INET6 314 TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) { 315 struct ifaddr *ifa = ifac->ifa; 316 317 if (ifa->ifa_addr->sa_family == AF_INET6) { 318 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 319 in6_clearscope(&in6); 320 SHA1Update(&sc->sc_sha1, (void *)&in6, sizeof(in6)); 321 } 322 } 323 #endif /* INET6 */ 324 325 /* convert ipad to opad */ 326 for (i = 0; i < sizeof(sc->sc_pad); i++) 327 sc->sc_pad[i] ^= 0x36 ^ 0x5c; 328 } 329 330 static void 331 carp_hmac_generate(struct carp_softc *sc, uint32_t counter[2], 332 unsigned char md[20]) 333 { 334 SHA1_CTX sha1ctx; 335 336 /* fetch first half of inner hash */ 337 bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx)); 338 339 SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter)); 340 SHA1Final(md, &sha1ctx); 341 342 /* outer hash */ 343 SHA1Init(&sha1ctx); 344 SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad)); 345 SHA1Update(&sha1ctx, md, 20); 346 SHA1Final(md, &sha1ctx); 347 } 348 349 static int 350 carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2], 351 unsigned char md[20]) 352 { 353 unsigned char md2[20]; 354 355 carp_hmac_generate(sc, counter, md2); 356 return (bcmp(md, md2, sizeof(md2))); 357 } 358 359 static void 360 carp_setroute(struct carp_softc *sc, int cmd) 361 { 362 #ifdef INET6 363 struct ifaddr_container *ifac; 364 365 TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) { 366 struct ifaddr *ifa = ifac->ifa; 367 368 if (ifa->ifa_addr->sa_family == AF_INET6) { 369 if (cmd == RTM_ADD) 370 in6_ifaddloop(ifa); 371 else 372 in6_ifremloop(ifa); 373 } 374 } 375 #endif /* INET6 */ 376 } 377 378 static int 379 carp_clone_create(struct if_clone *ifc, int unit, caddr_t param __unused) 380 { 381 struct carp_softc *sc; 382 struct ifnet *ifp; 383 384 sc = kmalloc(sizeof(*sc), M_CARP, M_WAITOK | M_ZERO); 385 ifp = &sc->sc_if; 386 387 sc->sc_suppress = 0; 388 sc->sc_advbase = CARP_DFLTINTV; 389 sc->sc_vhid = -1; /* required setting */ 390 sc->sc_advskew = 0; 391 sc->sc_init_counter = 1; 392 sc->sc_naddrs = 0; 393 sc->sc_naddrs6 = 0; 394 395 TAILQ_INIT(&sc->sc_vha_list); 396 397 #ifdef INET6 398 sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL; 399 #endif 400 401 callout_init(&sc->sc_ad_tmo); 402 callout_init(&sc->sc_md_tmo); 403 callout_init(&sc->sc_md6_tmo); 404 405 ifp->if_softc = sc; 406 if_initname(ifp, CARP_IFNAME, unit); 407 ifp->if_mtu = ETHERMTU; 408 ifp->if_flags = IFF_LOOPBACK; 409 ifp->if_ioctl = carp_ioctl; 410 ifp->if_output = carp_looutput; 411 ifp->if_start = carp_start; 412 ifp->if_type = IFT_CARP; 413 ifq_set_maxlen(&ifp->if_snd, ifqmaxlen); 414 ifp->if_hdrlen = 0; 415 if_attach(ifp, NULL); 416 bpfattach(ifp, DLT_NULL, sizeof(u_int)); 417 418 carp_gettok(); 419 LIST_INSERT_HEAD(&carpif_list, sc, sc_next); 420 carp_reltok(); 421 422 return (0); 423 } 424 425 static int 426 carp_clone_destroy(struct ifnet *ifp) 427 { 428 struct carp_softc *sc = ifp->if_softc; 429 430 carp_gettok(); 431 432 sc->sc_dead = 1; 433 carp_detach(sc, 1); 434 LIST_REMOVE(sc, sc_next); 435 436 carp_reltok(); 437 438 bpfdetach(ifp); 439 if_detach(ifp); 440 441 KASSERT(sc->sc_naddrs == 0, ("certain inet address is still active\n")); 442 kfree(sc, M_CARP); 443 444 return 0; 445 } 446 447 static void 448 carp_detach(struct carp_softc *sc, int detach) 449 { 450 struct carp_if *cif; 451 452 carp_reset(sc, detach); 453 454 carp_multicast_cleanup(sc); 455 #ifdef INET6 456 carp_multicast6_cleanup(sc); 457 #endif 458 459 if (!sc->sc_dead && detach) { 460 struct carp_vhaddr *vha; 461 462 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) 463 carp_deactivate_vhaddr(sc, vha); 464 KKASSERT(sc->sc_naddrs == 0); 465 } 466 467 if (sc->sc_carpdev != NULL) { 468 cif = sc->sc_carpdev->if_carp; 469 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); 470 if (TAILQ_EMPTY(&cif->vhif_vrs)) { 471 ifpromisc(sc->sc_carpdev, 0); 472 sc->sc_carpdev->if_carp = NULL; 473 kfree(cif, M_CARP); 474 } 475 sc->sc_carpdev = NULL; 476 sc->sc_ia = NULL; 477 } 478 } 479 480 /* Detach an interface from the carp. */ 481 static void 482 carp_ifdetach(void *arg __unused, struct ifnet *ifp) 483 { 484 struct carp_if *cif = ifp->if_carp; 485 struct carp_softc *sc; 486 487 carp_gettok(); 488 489 while (ifp->if_carp && 490 (sc = TAILQ_FIRST(&cif->vhif_vrs)) != NULL) 491 carp_detach(sc, 1); 492 493 carp_reltok(); 494 } 495 496 /* 497 * process input packet. 498 * we have rearranged checks order compared to the rfc, 499 * but it seems more efficient this way or not possible otherwise. 500 */ 501 int 502 carp_input(struct mbuf **mp, int *offp, int proto) 503 { 504 struct mbuf *m = *mp; 505 struct ip *ip = mtod(m, struct ip *); 506 struct carp_header *ch; 507 int len, iphlen; 508 509 carp_gettok(); 510 511 iphlen = *offp; 512 *mp = NULL; 513 514 carpstats.carps_ipackets++; 515 516 if (!carp_opts[CARPCTL_ALLOW]) { 517 m_freem(m); 518 goto back; 519 } 520 521 /* Check if received on a valid carp interface */ 522 if (m->m_pkthdr.rcvif->if_carp == NULL) { 523 carpstats.carps_badif++; 524 CARP_LOG("carp_input: packet received on non-carp " 525 "interface: %s\n", 526 m->m_pkthdr.rcvif->if_xname); 527 m_freem(m); 528 goto back; 529 } 530 531 /* Verify that the IP TTL is CARP_DFLTTL. */ 532 if (ip->ip_ttl != CARP_DFLTTL) { 533 carpstats.carps_badttl++; 534 CARP_LOG("carp_input: received ttl %d != %d on %s\n", 535 ip->ip_ttl, CARP_DFLTTL, 536 m->m_pkthdr.rcvif->if_xname); 537 m_freem(m); 538 goto back; 539 } 540 541 /* Minimal CARP packet size */ 542 len = iphlen + sizeof(*ch); 543 544 /* 545 * Verify that the received packet length is 546 * not less than the CARP header 547 */ 548 if (m->m_pkthdr.len < len) { 549 carpstats.carps_badlen++; 550 CARP_LOG("packet too short %d on %s\n", m->m_pkthdr.len, 551 m->m_pkthdr.rcvif->if_xname); 552 m_freem(m); 553 goto back; 554 } 555 556 /* Make sure that CARP header is contiguous */ 557 if (len > m->m_len) { 558 m = m_pullup(m, len); 559 if (m == NULL) { 560 carpstats.carps_hdrops++; 561 CARP_LOG("carp_input: m_pullup failed\n"); 562 goto back; 563 } 564 ip = mtod(m, struct ip *); 565 } 566 ch = (struct carp_header *)((uint8_t *)ip + iphlen); 567 568 /* Verify the CARP checksum */ 569 if (in_cksum_skip(m, len, iphlen)) { 570 carpstats.carps_badsum++; 571 CARP_LOG("carp_input: checksum failed on %s\n", 572 m->m_pkthdr.rcvif->if_xname); 573 m_freem(m); 574 goto back; 575 } 576 carp_input_c(m, ch, AF_INET); 577 578 back: 579 carp_reltok(); 580 return(IPPROTO_DONE); 581 } 582 583 #ifdef INET6 584 int 585 carp6_input(struct mbuf **mp, int *offp, int proto) 586 { 587 struct mbuf *m = *mp; 588 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 589 struct carp_header *ch; 590 u_int len; 591 592 carp_gettok(); 593 594 carpstats.carps_ipackets6++; 595 596 if (!carp_opts[CARPCTL_ALLOW]) { 597 m_freem(m); 598 goto back; 599 } 600 601 /* check if received on a valid carp interface */ 602 if (m->m_pkthdr.rcvif->if_carp == NULL) { 603 carpstats.carps_badif++; 604 CARP_LOG("carp6_input: packet received on non-carp " 605 "interface: %s\n", 606 m->m_pkthdr.rcvif->if_xname); 607 m_freem(m); 608 goto back; 609 } 610 611 /* verify that the IP TTL is 255 */ 612 if (ip6->ip6_hlim != CARP_DFLTTL) { 613 carpstats.carps_badttl++; 614 CARP_LOG("carp6_input: received ttl %d != 255 on %s\n", 615 ip6->ip6_hlim, 616 m->m_pkthdr.rcvif->if_xname); 617 m_freem(m); 618 goto back; 619 } 620 621 /* verify that we have a complete carp packet */ 622 len = m->m_len; 623 IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch)); 624 if (ch == NULL) { 625 carpstats.carps_badlen++; 626 CARP_LOG("carp6_input: packet size %u too small\n", len); 627 goto back; 628 } 629 630 /* verify the CARP checksum */ 631 if (in_cksum_range(m, 0, *offp, sizeof(*ch))) { 632 carpstats.carps_badsum++; 633 CARP_LOG("carp6_input: checksum failed, on %s\n", 634 m->m_pkthdr.rcvif->if_xname); 635 m_freem(m); 636 goto back; 637 } 638 639 carp_input_c(m, ch, AF_INET6); 640 back: 641 carp_reltok(); 642 return (IPPROTO_DONE); 643 } 644 #endif /* INET6 */ 645 646 static void 647 carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af) 648 { 649 struct ifnet *ifp = m->m_pkthdr.rcvif; 650 struct ifnet *cifp; 651 struct carp_softc *sc; 652 uint64_t tmp_counter; 653 struct timeval sc_tv, ch_tv; 654 655 /* verify that the VHID is valid on the receiving interface */ 656 TAILQ_FOREACH(sc, &((struct carp_if *)ifp->if_carp)->vhif_vrs, sc_list) 657 if (sc->sc_vhid == ch->carp_vhid) 658 break; 659 660 if (!sc || !CARP_IS_RUNNING(&sc->sc_if)) { 661 carpstats.carps_badvhid++; 662 m_freem(m); 663 return; 664 } 665 cifp = &sc->sc_if; 666 667 getmicrotime(&cifp->if_lastchange); 668 cifp->if_ipackets++; 669 cifp->if_ibytes += m->m_pkthdr.len; 670 671 if (cifp->if_bpf) { 672 struct ip *ip = mtod(m, struct ip *); 673 674 /* BPF wants net byte order */ 675 ip->ip_len = htons(ip->ip_len + (ip->ip_hl << 2)); 676 ip->ip_off = htons(ip->ip_off); 677 bpf_mtap(cifp->if_bpf, m); 678 } 679 680 /* verify the CARP version. */ 681 if (ch->carp_version != CARP_VERSION) { 682 carpstats.carps_badver++; 683 cifp->if_ierrors++; 684 CARP_LOG("%s; invalid version %d\n", cifp->if_xname, 685 ch->carp_version); 686 m_freem(m); 687 return; 688 } 689 690 /* verify the hash */ 691 if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) { 692 carpstats.carps_badauth++; 693 cifp->if_ierrors++; 694 CARP_LOG("%s: incorrect hash\n", cifp->if_xname); 695 m_freem(m); 696 return; 697 } 698 699 tmp_counter = ntohl(ch->carp_counter[0]); 700 tmp_counter = tmp_counter<<32; 701 tmp_counter += ntohl(ch->carp_counter[1]); 702 703 /* XXX Replay protection goes here */ 704 705 sc->sc_init_counter = 0; 706 sc->sc_counter = tmp_counter; 707 708 sc_tv.tv_sec = sc->sc_advbase; 709 if (carp_suppress_preempt && sc->sc_advskew < 240) 710 sc_tv.tv_usec = 240 * 1000000 / 256; 711 else 712 sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256; 713 ch_tv.tv_sec = ch->carp_advbase; 714 ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256; 715 716 switch (sc->sc_state) { 717 case INIT: 718 break; 719 720 case MASTER: 721 /* 722 * If we receive an advertisement from a master who's going to 723 * be more frequent than us, go into BACKUP state. 724 */ 725 if (timevalcmp(&sc_tv, &ch_tv, >) || 726 timevalcmp(&sc_tv, &ch_tv, ==)) { 727 callout_stop(&sc->sc_ad_tmo); 728 CARP_DEBUG("%s: MASTER -> BACKUP " 729 "(more frequent advertisement received)\n", 730 cifp->if_xname); 731 carp_set_state(sc, BACKUP); 732 carp_setrun(sc, 0); 733 carp_setroute(sc, RTM_DELETE); 734 } 735 break; 736 737 case BACKUP: 738 /* 739 * If we're pre-empting masters who advertise slower than us, 740 * and this one claims to be slower, treat him as down. 741 */ 742 if (carp_opts[CARPCTL_PREEMPT] && 743 timevalcmp(&sc_tv, &ch_tv, <)) { 744 CARP_DEBUG("%s: BACKUP -> MASTER " 745 "(preempting a slower master)\n", cifp->if_xname); 746 carp_master_down(sc); 747 break; 748 } 749 750 /* 751 * If the master is going to advertise at such a low frequency 752 * that he's guaranteed to time out, we'd might as well just 753 * treat him as timed out now. 754 */ 755 sc_tv.tv_sec = sc->sc_advbase * 3; 756 if (timevalcmp(&sc_tv, &ch_tv, <)) { 757 CARP_DEBUG("%s: BACKUP -> MASTER (master timed out)\n", 758 cifp->if_xname); 759 carp_master_down(sc); 760 break; 761 } 762 763 /* 764 * Otherwise, we reset the counter and wait for the next 765 * advertisement. 766 */ 767 carp_setrun(sc, af); 768 break; 769 } 770 m_freem(m); 771 } 772 773 static int 774 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch) 775 { 776 struct ifnet *cifp = &sc->sc_if; 777 struct m_tag *mtag; 778 779 if (sc->sc_init_counter) { 780 /* this could also be seconds since unix epoch */ 781 sc->sc_counter = karc4random(); 782 sc->sc_counter = sc->sc_counter << 32; 783 sc->sc_counter += karc4random(); 784 } else { 785 sc->sc_counter++; 786 } 787 788 ch->carp_counter[0] = htonl((sc->sc_counter >> 32) & 0xffffffff); 789 ch->carp_counter[1] = htonl(sc->sc_counter & 0xffffffff); 790 791 carp_hmac_generate(sc, ch->carp_counter, ch->carp_md); 792 793 /* Tag packet for carp_output */ 794 mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct ifnet *), MB_DONTWAIT); 795 if (mtag == NULL) { 796 m_freem(m); 797 cifp->if_oerrors++; 798 return ENOMEM; 799 } 800 bcopy(&cifp, (caddr_t)(mtag + 1), sizeof(struct ifnet *)); 801 m_tag_prepend(m, mtag); 802 803 return 0; 804 } 805 806 static void 807 carp_send_ad_all(void) 808 { 809 struct carp_softc *sc; 810 811 LIST_FOREACH(sc, &carpif_list, sc_next) { 812 if (sc->sc_carpdev == NULL) 813 continue; 814 815 if (CARP_IS_RUNNING(&sc->sc_if) && sc->sc_state == MASTER) 816 carp_send_ad(sc); 817 } 818 } 819 820 static void 821 carp_send_ad_timeout(void *xsc) 822 { 823 carp_send_ad(xsc); 824 } 825 826 static void 827 carp_send_ad(struct carp_softc *sc) 828 { 829 struct ifnet *cifp = &sc->sc_if; 830 struct carp_header ch; 831 struct timeval tv; 832 struct carp_header *ch_ptr; 833 struct mbuf *m; 834 int len, advbase, advskew; 835 836 if (!CARP_IS_RUNNING(cifp)) { 837 /* Bow out */ 838 advbase = 255; 839 advskew = 255; 840 } else { 841 advbase = sc->sc_advbase; 842 if (!carp_suppress_preempt || sc->sc_advskew > 240) 843 advskew = sc->sc_advskew; 844 else 845 advskew = 240; 846 tv.tv_sec = advbase; 847 tv.tv_usec = advskew * 1000000 / 256; 848 } 849 850 ch.carp_version = CARP_VERSION; 851 ch.carp_type = CARP_ADVERTISEMENT; 852 ch.carp_vhid = sc->sc_vhid; 853 ch.carp_advbase = advbase; 854 ch.carp_advskew = advskew; 855 ch.carp_authlen = 7; /* XXX DEFINE */ 856 ch.carp_pad1 = 0; /* must be zero */ 857 ch.carp_cksum = 0; 858 859 #ifdef INET 860 if (sc->sc_ia != NULL) { 861 struct ip *ip; 862 863 MGETHDR(m, MB_DONTWAIT, MT_HEADER); 864 if (m == NULL) { 865 cifp->if_oerrors++; 866 carpstats.carps_onomem++; 867 /* XXX maybe less ? */ 868 if (advbase != 255 || advskew != 255) 869 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv), 870 carp_send_ad_timeout, sc); 871 return; 872 } 873 len = sizeof(*ip) + sizeof(ch); 874 m->m_pkthdr.len = len; 875 m->m_pkthdr.rcvif = NULL; 876 m->m_len = len; 877 MH_ALIGN(m, m->m_len); 878 m->m_flags |= M_MCAST; 879 ip = mtod(m, struct ip *); 880 ip->ip_v = IPVERSION; 881 ip->ip_hl = sizeof(*ip) >> 2; 882 ip->ip_tos = IPTOS_LOWDELAY; 883 ip->ip_len = len; 884 ip->ip_id = ip_newid(); 885 ip->ip_off = IP_DF; 886 ip->ip_ttl = CARP_DFLTTL; 887 ip->ip_p = IPPROTO_CARP; 888 ip->ip_sum = 0; 889 ip->ip_src = sc->sc_ia->ia_addr.sin_addr; 890 ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP); 891 892 ch_ptr = (struct carp_header *)(&ip[1]); 893 bcopy(&ch, ch_ptr, sizeof(ch)); 894 if (carp_prepare_ad(m, sc, ch_ptr)) 895 return; 896 ch_ptr->carp_cksum = in_cksum_skip(m, len, sizeof(*ip)); 897 898 getmicrotime(&cifp->if_lastchange); 899 cifp->if_opackets++; 900 cifp->if_obytes += len; 901 carpstats.carps_opackets++; 902 903 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) { 904 cifp->if_oerrors++; 905 if (sc->sc_sendad_errors < INT_MAX) 906 sc->sc_sendad_errors++; 907 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { 908 carp_suppress_preempt++; 909 if (carp_suppress_preempt == 1) { 910 carp_send_ad_all(); 911 } 912 } 913 sc->sc_sendad_success = 0; 914 } else { 915 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { 916 if (++sc->sc_sendad_success >= 917 CARP_SENDAD_MIN_SUCCESS) { 918 carp_suppress_preempt--; 919 sc->sc_sendad_errors = 0; 920 } 921 } else { 922 sc->sc_sendad_errors = 0; 923 } 924 } 925 } 926 #endif /* INET */ 927 #ifdef INET6 928 if (sc->sc_ia6) { 929 struct ip6_hdr *ip6; 930 931 MGETHDR(m, MB_DONTWAIT, MT_HEADER); 932 if (m == NULL) { 933 cifp->if_oerrors++; 934 carpstats.carps_onomem++; 935 /* XXX maybe less ? */ 936 if (advbase != 255 || advskew != 255) 937 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv), 938 carp_send_ad_timeout, sc); 939 return; 940 } 941 len = sizeof(*ip6) + sizeof(ch); 942 m->m_pkthdr.len = len; 943 m->m_pkthdr.rcvif = NULL; 944 m->m_len = len; 945 MH_ALIGN(m, m->m_len); 946 m->m_flags |= M_MCAST; 947 ip6 = mtod(m, struct ip6_hdr *); 948 bzero(ip6, sizeof(*ip6)); 949 ip6->ip6_vfc |= IPV6_VERSION; 950 ip6->ip6_hlim = CARP_DFLTTL; 951 ip6->ip6_nxt = IPPROTO_CARP; 952 bcopy(&sc->sc_ia6->ia_addr.sin6_addr, &ip6->ip6_src, 953 sizeof(struct in6_addr)); 954 /* set the multicast destination */ 955 956 ip6->ip6_dst.s6_addr16[0] = htons(0xff02); 957 ip6->ip6_dst.s6_addr8[15] = 0x12; 958 if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) { 959 cifp->if_oerrors++; 960 m_freem(m); 961 CARP_LOG("%s: in6_setscope failed\n", __func__); 962 return; 963 } 964 965 ch_ptr = (struct carp_header *)(&ip6[1]); 966 bcopy(&ch, ch_ptr, sizeof(ch)); 967 if (carp_prepare_ad(m, sc, ch_ptr)) 968 return; 969 ch_ptr->carp_cksum = in_cksum_skip(m, len, sizeof(*ip6)); 970 971 getmicrotime(&cifp->if_lastchange); 972 cifp->if_opackets++; 973 cifp->if_obytes += len; 974 carpstats.carps_opackets6++; 975 976 if (ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL)) { 977 cifp->if_oerrors++; 978 if (sc->sc_sendad_errors < INT_MAX) 979 sc->sc_sendad_errors++; 980 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { 981 carp_suppress_preempt++; 982 if (carp_suppress_preempt == 1) { 983 carp_send_ad_all(); 984 } 985 } 986 sc->sc_sendad_success = 0; 987 } else { 988 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { 989 if (++sc->sc_sendad_success >= 990 CARP_SENDAD_MIN_SUCCESS) { 991 carp_suppress_preempt--; 992 sc->sc_sendad_errors = 0; 993 } 994 } else { 995 sc->sc_sendad_errors = 0; 996 } 997 } 998 } 999 #endif /* INET6 */ 1000 1001 if (advbase != 255 || advskew != 255) 1002 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv), 1003 carp_send_ad_timeout, sc); 1004 } 1005 1006 /* 1007 * Broadcast a gratuitous ARP request containing 1008 * the virtual router MAC address for each IP address 1009 * associated with the virtual router. 1010 */ 1011 static void 1012 carp_send_arp(struct carp_softc *sc) 1013 { 1014 const struct carp_vhaddr *vha; 1015 1016 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 1017 if (vha->vha_iaback == NULL) 1018 continue; 1019 1020 arp_iainit(sc->sc_carpdev, &vha->vha_ia->ia_addr.sin_addr, 1021 IF_LLADDR(&sc->sc_if)); 1022 } 1023 } 1024 1025 #ifdef INET6 1026 static void 1027 carp_send_na(struct carp_softc *sc) 1028 { 1029 struct ifaddr_container *ifac; 1030 struct in6_addr *in6; 1031 static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT; 1032 1033 TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) { 1034 struct ifaddr *ifa = ifac->ifa; 1035 1036 if (ifa->ifa_addr->sa_family != AF_INET6) 1037 continue; 1038 1039 in6 = &ifatoia6(ifa)->ia_addr.sin6_addr; 1040 nd6_na_output(sc->sc_carpdev, &mcast, in6, 1041 ND_NA_FLAG_OVERRIDE, 1, NULL); 1042 DELAY(1000); /* XXX */ 1043 } 1044 } 1045 #endif /* INET6 */ 1046 1047 static __inline const struct carp_vhaddr * 1048 carp_find_addr(const struct carp_softc *sc, const struct in_addr *addr) 1049 { 1050 struct carp_vhaddr *vha; 1051 1052 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 1053 if (vha->vha_iaback == NULL) 1054 continue; 1055 1056 if (vha->vha_ia->ia_addr.sin_addr.s_addr == addr->s_addr) 1057 return vha; 1058 } 1059 return NULL; 1060 } 1061 1062 static int 1063 carp_iamatch_balance(const struct carp_if *cif, const struct in_addr *itaddr, 1064 const struct in_addr *isaddr, uint8_t **enaddr) 1065 { 1066 const struct carp_softc *vh; 1067 int index, count = 0; 1068 1069 /* 1070 * XXX proof of concept implementation. 1071 * We use the source ip to decide which virtual host should 1072 * handle the request. If we're master of that virtual host, 1073 * then we respond, otherwise, just drop the arp packet on 1074 * the floor. 1075 */ 1076 1077 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1078 if (!CARP_IS_RUNNING(&vh->sc_if)) 1079 continue; 1080 1081 if (carp_find_addr(vh, itaddr) != NULL) 1082 count++; 1083 } 1084 if (count == 0) 1085 return 0; 1086 1087 /* this should be a hash, like pf_hash() */ 1088 index = ntohl(isaddr->s_addr) % count; 1089 count = 0; 1090 1091 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1092 if (!CARP_IS_RUNNING(&vh->sc_if)) 1093 continue; 1094 1095 if (carp_find_addr(vh, itaddr) == NULL) 1096 continue; 1097 1098 if (count == index) { 1099 if (vh->sc_state == MASTER) { 1100 *enaddr = IF_LLADDR(&vh->sc_if); 1101 return 1; 1102 } else { 1103 return 0; 1104 } 1105 } 1106 count++; 1107 } 1108 return 0; 1109 } 1110 1111 int 1112 carp_iamatch(const void *v, const struct in_addr *itaddr, 1113 const struct in_addr *isaddr, uint8_t **enaddr) 1114 { 1115 const struct carp_if *cif = v; 1116 const struct carp_softc *vh; 1117 1118 ASSERT_LWKT_TOKEN_HELD(&carp_tok); 1119 1120 if (carp_opts[CARPCTL_ARPBALANCE]) 1121 return carp_iamatch_balance(cif, itaddr, isaddr, enaddr); 1122 1123 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1124 if (!CARP_IS_RUNNING(&vh->sc_if) || vh->sc_state != MASTER) 1125 continue; 1126 1127 if (carp_find_addr(vh, itaddr) != NULL) { 1128 *enaddr = IF_LLADDR(&vh->sc_if); 1129 return 1; 1130 } 1131 } 1132 return 0; 1133 } 1134 1135 #ifdef INET6 1136 struct ifaddr * 1137 carp_iamatch6(void *v, struct in6_addr *taddr) 1138 { 1139 struct carp_if *cif = v; 1140 struct carp_softc *vh; 1141 1142 ASSERT_LWKT_TOKEN_HELD(&carp_tok); 1143 1144 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1145 struct ifaddr_container *ifac; 1146 1147 TAILQ_FOREACH(ifac, &vh->sc_if.if_addrheads[mycpuid], 1148 ifa_link) { 1149 struct ifaddr *ifa = ifac->ifa; 1150 1151 if (IN6_ARE_ADDR_EQUAL(taddr, 1152 &ifatoia6(ifa)->ia_addr.sin6_addr) && 1153 CARP_IS_RUNNING(&vh->sc_if) && 1154 vh->sc_state == MASTER) { 1155 return (ifa); 1156 } 1157 } 1158 } 1159 return (NULL); 1160 } 1161 1162 void * 1163 carp_macmatch6(void *v, struct mbuf *m, const struct in6_addr *taddr) 1164 { 1165 struct m_tag *mtag; 1166 struct carp_if *cif = v; 1167 struct carp_softc *sc; 1168 1169 ASSERT_LWKT_TOKEN_HELD(&carp_tok); 1170 1171 TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) { 1172 struct ifaddr_container *ifac; 1173 1174 TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], 1175 ifa_link) { 1176 struct ifaddr *ifa = ifac->ifa; 1177 1178 if (IN6_ARE_ADDR_EQUAL(taddr, 1179 &ifatoia6(ifa)->ia_addr.sin6_addr) && 1180 CARP_IS_RUNNING(&sc->sc_if)) { 1181 struct ifnet *ifp = &sc->sc_if; 1182 1183 mtag = m_tag_get(PACKET_TAG_CARP, 1184 sizeof(struct ifnet *), MB_DONTWAIT); 1185 if (mtag == NULL) { 1186 /* better a bit than nothing */ 1187 return (IF_LLADDR(ifp)); 1188 } 1189 bcopy(&ifp, (caddr_t)(mtag + 1), 1190 sizeof(struct ifnet *)); 1191 m_tag_prepend(m, mtag); 1192 1193 return (IF_LLADDR(ifp)); 1194 } 1195 } 1196 } 1197 return (NULL); 1198 } 1199 #endif 1200 1201 int 1202 carp_forus(const void *v, const void *dhost) 1203 { 1204 const struct carp_if *cif = v; 1205 const struct carp_softc *vh; 1206 const uint8_t *ena = dhost; 1207 1208 ASSERT_LWKT_TOKEN_HELD(&carp_tok); 1209 1210 if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1) 1211 return 0; 1212 1213 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1214 const struct ifnet *cifp = &vh->sc_if; 1215 1216 if (CARP_IS_RUNNING(cifp) && vh->sc_state == MASTER && 1217 !bcmp(dhost, IF_LLADDR(cifp), ETHER_ADDR_LEN)) 1218 return 1; 1219 } 1220 return 0; 1221 } 1222 1223 static void 1224 carp_master_down_timeout(void *xsc) 1225 { 1226 struct carp_softc *sc = xsc; 1227 1228 CARP_DEBUG("%s: BACKUP -> MASTER (master timed out)\n", 1229 sc->sc_if.if_xname); 1230 carp_master_down(sc); 1231 } 1232 1233 static void 1234 carp_master_down(struct carp_softc *sc) 1235 { 1236 switch (sc->sc_state) { 1237 case INIT: 1238 kprintf("%s: master_down event in INIT state\n", 1239 sc->sc_if.if_xname); 1240 break; 1241 1242 case MASTER: 1243 break; 1244 1245 case BACKUP: 1246 carp_set_state(sc, MASTER); 1247 carp_send_ad(sc); 1248 carp_send_arp(sc); 1249 #ifdef INET6 1250 carp_send_na(sc); 1251 #endif /* INET6 */ 1252 carp_setrun(sc, 0); 1253 carp_setroute(sc, RTM_ADD); 1254 break; 1255 } 1256 } 1257 1258 /* 1259 * When in backup state, af indicates whether to reset the master down timer 1260 * for v4 or v6. If it's set to zero, reset the ones which are already pending. 1261 */ 1262 static void 1263 carp_setrun(struct carp_softc *sc, sa_family_t af) 1264 { 1265 struct ifnet *cifp = &sc->sc_if; 1266 struct timeval tv; 1267 1268 if (sc->sc_carpdev == NULL) { 1269 carp_set_state(sc, INIT); 1270 return; 1271 } 1272 1273 if ((cifp->if_flags & IFF_RUNNING) && sc->sc_vhid > 0 && 1274 (sc->sc_naddrs || sc->sc_naddrs6)) { 1275 /* Nothing */ 1276 } else { 1277 carp_setroute(sc, RTM_DELETE); 1278 return; 1279 } 1280 1281 switch (sc->sc_state) { 1282 case INIT: 1283 if (carp_opts[CARPCTL_PREEMPT] && !carp_suppress_preempt) { 1284 carp_send_ad(sc); 1285 carp_send_arp(sc); 1286 #ifdef INET6 1287 carp_send_na(sc); 1288 #endif /* INET6 */ 1289 CARP_DEBUG("%s: INIT -> MASTER (preempting)\n", 1290 cifp->if_xname); 1291 carp_set_state(sc, MASTER); 1292 carp_setroute(sc, RTM_ADD); 1293 } else { 1294 CARP_DEBUG("%s: INIT -> BACKUP\n", cifp->if_xname); 1295 carp_set_state(sc, BACKUP); 1296 carp_setroute(sc, RTM_DELETE); 1297 carp_setrun(sc, 0); 1298 } 1299 break; 1300 1301 case BACKUP: 1302 callout_stop(&sc->sc_ad_tmo); 1303 tv.tv_sec = 3 * sc->sc_advbase; 1304 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1305 switch (af) { 1306 #ifdef INET 1307 case AF_INET: 1308 callout_reset(&sc->sc_md_tmo, tvtohz_high(&tv), 1309 carp_master_down_timeout, sc); 1310 break; 1311 #endif /* INET */ 1312 #ifdef INET6 1313 case AF_INET6: 1314 callout_reset(&sc->sc_md6_tmo, tvtohz_high(&tv), 1315 carp_master_down_timeout, sc); 1316 break; 1317 #endif /* INET6 */ 1318 default: 1319 if (sc->sc_naddrs) 1320 callout_reset(&sc->sc_md_tmo, tvtohz_high(&tv), 1321 carp_master_down_timeout, sc); 1322 if (sc->sc_naddrs6) 1323 callout_reset(&sc->sc_md6_tmo, tvtohz_high(&tv), 1324 carp_master_down_timeout, sc); 1325 break; 1326 } 1327 break; 1328 1329 case MASTER: 1330 tv.tv_sec = sc->sc_advbase; 1331 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1332 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv), 1333 carp_send_ad_timeout, sc); 1334 break; 1335 } 1336 } 1337 1338 static void 1339 carp_multicast_cleanup(struct carp_softc *sc) 1340 { 1341 struct ip_moptions *imo = &sc->sc_imo; 1342 1343 if (imo->imo_num_memberships == 0) 1344 return; 1345 KKASSERT(imo->imo_num_memberships == 1); 1346 1347 in_delmulti(imo->imo_membership[0]); 1348 imo->imo_membership[0] = NULL; 1349 imo->imo_num_memberships = 0; 1350 imo->imo_multicast_ifp = NULL; 1351 } 1352 1353 #ifdef INET6 1354 static void 1355 carp_multicast6_cleanup(struct carp_softc *sc) 1356 { 1357 struct ip6_moptions *im6o = &sc->sc_im6o; 1358 1359 while (!LIST_EMPTY(&im6o->im6o_memberships)) { 1360 struct in6_multi_mship *imm = 1361 LIST_FIRST(&im6o->im6o_memberships); 1362 1363 LIST_REMOVE(imm, i6mm_chain); 1364 in6_leavegroup(imm); 1365 } 1366 im6o->im6o_multicast_ifp = NULL; 1367 } 1368 #endif 1369 1370 static int 1371 carp_get_vhaddr(struct carp_softc *sc, struct ifdrv *ifd) 1372 { 1373 const struct carp_vhaddr *vha; 1374 struct ifcarpvhaddr *carpa, *carpa0; 1375 int count, len, error; 1376 1377 count = 0; 1378 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) 1379 ++count; 1380 1381 if (ifd->ifd_len == 0) { 1382 ifd->ifd_len = count * sizeof(*carpa); 1383 return 0; 1384 } else if (count == 0 || ifd->ifd_len < sizeof(*carpa)) { 1385 ifd->ifd_len = 0; 1386 return 0; 1387 } 1388 len = min(ifd->ifd_len, sizeof(*carpa) * count); 1389 KKASSERT(len >= sizeof(*carpa)); 1390 1391 carpa0 = carpa = kmalloc(len, M_TEMP, M_WAITOK | M_NULLOK | M_ZERO); 1392 if (carpa == NULL) 1393 return ENOMEM; 1394 1395 count = 0; 1396 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 1397 if (len < sizeof(*carpa)) 1398 break; 1399 1400 carpa->carpa_flags = vha->vha_flags; 1401 carpa->carpa_addr.sin_family = AF_INET; 1402 carpa->carpa_addr.sin_addr = vha->vha_ia->ia_addr.sin_addr; 1403 1404 carpa->carpa_baddr.sin_family = AF_INET; 1405 if (vha->vha_iaback == NULL) { 1406 carpa->carpa_baddr.sin_addr.s_addr = INADDR_ANY; 1407 } else { 1408 carpa->carpa_baddr.sin_addr = 1409 vha->vha_iaback->ia_addr.sin_addr; 1410 } 1411 1412 ++carpa; 1413 ++count; 1414 len -= sizeof(*carpa); 1415 } 1416 ifd->ifd_len = sizeof(*carpa) * count; 1417 KKASSERT(ifd->ifd_len > 0); 1418 1419 error = copyout(carpa0, ifd->ifd_data, ifd->ifd_len); 1420 kfree(carpa0, M_TEMP); 1421 return error; 1422 } 1423 1424 static int 1425 carp_config_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha) 1426 { 1427 struct ifnet *ifp; 1428 struct in_ifaddr *ia_if; 1429 struct in_ifaddr_container *iac; 1430 const struct sockaddr_in *sin; 1431 u_long iaddr; 1432 int own; 1433 1434 KKASSERT(vha->vha_ia != NULL); 1435 1436 sin = &vha->vha_ia->ia_addr; 1437 iaddr = ntohl(sin->sin_addr.s_addr); 1438 1439 ia_if = NULL; 1440 own = 0; 1441 TAILQ_FOREACH(iac, &in_ifaddrheads[mycpuid], ia_link) { 1442 struct in_ifaddr *ia = iac->ia; 1443 1444 if ((ia->ia_flags & IFA_ROUTE) == 0) 1445 continue; 1446 1447 if (ia->ia_ifp->if_type == IFT_CARP) 1448 continue; 1449 1450 /* and, yeah, we need a multicast-capable iface too */ 1451 if ((ia->ia_ifp->if_flags & IFF_MULTICAST) == 0) 1452 continue; 1453 1454 if ((iaddr & ia->ia_subnetmask) == ia->ia_subnet) { 1455 if (sin->sin_addr.s_addr == 1456 ia->ia_addr.sin_addr.s_addr) 1457 own = 1; 1458 if (ia_if == NULL) 1459 ia_if = ia; 1460 else if (sc->sc_carpdev != NULL && 1461 sc->sc_carpdev == ia->ia_ifp) 1462 ia_if = ia; 1463 } 1464 } 1465 1466 carp_deactivate_vhaddr(sc, vha); 1467 if (!ia_if) 1468 return ENOENT; 1469 1470 ifp = ia_if->ia_ifp; 1471 1472 /* XXX Don't allow parent iface to be changed */ 1473 if (sc->sc_carpdev != NULL && sc->sc_carpdev != ifp) 1474 return EEXIST; 1475 1476 return carp_activate_vhaddr(sc, vha, ifp, ia_if, own); 1477 } 1478 1479 static void 1480 carp_add_addr(struct carp_softc *sc, struct ifaddr *carp_ifa) 1481 { 1482 struct carp_vhaddr *vha_new; 1483 struct in_ifaddr *carp_ia; 1484 #ifdef INVARIANTS 1485 struct carp_vhaddr *vha; 1486 #endif 1487 1488 KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET); 1489 carp_ia = ifatoia(carp_ifa); 1490 1491 #ifdef INVARIANTS 1492 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) 1493 KKASSERT(vha->vha_ia != NULL && vha->vha_ia != carp_ia); 1494 #endif 1495 1496 vha_new = kmalloc(sizeof(*vha_new), M_CARP, M_WAITOK | M_ZERO); 1497 vha_new->vha_ia = carp_ia; 1498 carp_insert_vhaddr(sc, vha_new); 1499 1500 if (carp_config_vhaddr(sc, vha_new) != 0) { 1501 /* 1502 * If the above configuration fails, it may only mean 1503 * that the new address is problematic. However, the 1504 * carp(4) interface may already have several working 1505 * addresses. Since the expected behaviour of 1506 * SIOC[AS]IFADDR is to put the NIC into working state, 1507 * we try starting the state machine manually here with 1508 * the hope that the carp(4)'s previously working 1509 * addresses still could be brought up. 1510 */ 1511 carp_hmac_prepare(sc); 1512 carp_set_state(sc, INIT); 1513 carp_setrun(sc, 0); 1514 } 1515 } 1516 1517 static void 1518 carp_del_addr(struct carp_softc *sc, struct ifaddr *carp_ifa) 1519 { 1520 struct carp_vhaddr *vha; 1521 struct in_ifaddr *carp_ia; 1522 1523 KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET); 1524 carp_ia = ifatoia(carp_ifa); 1525 1526 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 1527 KKASSERT(vha->vha_ia != NULL); 1528 if (vha->vha_ia == carp_ia) 1529 break; 1530 } 1531 KASSERT(vha != NULL, ("no corresponding vhaddr %p\n", carp_ifa)); 1532 1533 /* 1534 * Remove the vhaddr from the list before deactivating 1535 * the vhaddr, so that the HMAC could be correctly 1536 * updated in carp_deactivate_vhaddr() 1537 */ 1538 carp_remove_vhaddr(sc, vha); 1539 1540 carp_deactivate_vhaddr(sc, vha); 1541 kfree(vha, M_CARP); 1542 } 1543 1544 static void 1545 carp_config_addr(struct carp_softc *sc, struct ifaddr *carp_ifa) 1546 { 1547 struct carp_vhaddr *vha; 1548 struct in_ifaddr *carp_ia; 1549 1550 KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET); 1551 carp_ia = ifatoia(carp_ifa); 1552 1553 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 1554 KKASSERT(vha->vha_ia != NULL); 1555 if (vha->vha_ia == carp_ia) 1556 break; 1557 } 1558 KASSERT(vha != NULL, ("no corresponding vhaddr %p\n", carp_ifa)); 1559 1560 /* Remove then reinsert, to keep the vhaddr list sorted */ 1561 carp_remove_vhaddr(sc, vha); 1562 carp_insert_vhaddr(sc, vha); 1563 1564 if (carp_config_vhaddr(sc, vha) != 0) { 1565 /* See the comment in carp_add_addr() */ 1566 carp_hmac_prepare(sc); 1567 carp_set_state(sc, INIT); 1568 carp_setrun(sc, 0); 1569 } 1570 } 1571 1572 #ifdef INET6 1573 static int 1574 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) 1575 { 1576 struct ifnet *ifp; 1577 struct carp_if *cif; 1578 struct in6_ifaddr *ia, *ia_if; 1579 struct ip6_moptions *im6o = &sc->sc_im6o; 1580 struct in6_multi_mship *imm; 1581 struct in6_addr in6; 1582 int own, error; 1583 1584 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 1585 carp_setrun(sc, 0); 1586 return (0); 1587 } 1588 1589 /* we have to do it by hands to check we won't match on us */ 1590 ia_if = NULL; own = 0; 1591 for (ia = in6_ifaddr; ia; ia = ia->ia_next) { 1592 int i; 1593 1594 for (i = 0; i < 4; i++) { 1595 if ((sin6->sin6_addr.s6_addr32[i] & 1596 ia->ia_prefixmask.sin6_addr.s6_addr32[i]) != 1597 (ia->ia_addr.sin6_addr.s6_addr32[i] & 1598 ia->ia_prefixmask.sin6_addr.s6_addr32[i])) 1599 break; 1600 } 1601 /* and, yeah, we need a multicast-capable iface too */ 1602 if (ia->ia_ifp != &sc->sc_if && 1603 (ia->ia_ifp->if_flags & IFF_MULTICAST) && 1604 (i == 4)) { 1605 if (!ia_if) 1606 ia_if = ia; 1607 if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, 1608 &ia->ia_addr.sin6_addr)) 1609 own++; 1610 } 1611 } 1612 1613 if (!ia_if) 1614 return (EADDRNOTAVAIL); 1615 ia = ia_if; 1616 ifp = ia->ia_ifp; 1617 1618 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 || 1619 (im6o->im6o_multicast_ifp && im6o->im6o_multicast_ifp != ifp)) 1620 return (EADDRNOTAVAIL); 1621 1622 if (!sc->sc_naddrs6) { 1623 im6o->im6o_multicast_ifp = ifp; 1624 1625 /* join CARP multicast address */ 1626 bzero(&in6, sizeof(in6)); 1627 in6.s6_addr16[0] = htons(0xff02); 1628 in6.s6_addr8[15] = 0x12; 1629 if (in6_setscope(&in6, ifp, NULL) != 0) 1630 goto cleanup; 1631 if ((imm = in6_joingroup(ifp, &in6, &error)) == NULL) 1632 goto cleanup; 1633 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain); 1634 1635 /* join solicited multicast address */ 1636 bzero(&in6, sizeof(in6)); 1637 in6.s6_addr16[0] = htons(0xff02); 1638 in6.s6_addr32[1] = 0; 1639 in6.s6_addr32[2] = htonl(1); 1640 in6.s6_addr32[3] = sin6->sin6_addr.s6_addr32[3]; 1641 in6.s6_addr8[12] = 0xff; 1642 if (in6_setscope(&in6, ifp, NULL) != 0) 1643 goto cleanup; 1644 if ((imm = in6_joingroup(ifp, &in6, &error)) == NULL) 1645 goto cleanup; 1646 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain); 1647 } 1648 1649 if (!ifp->if_carp) { 1650 cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO); 1651 1652 if ((error = ifpromisc(ifp, 1))) { 1653 kfree(cif, M_CARP); 1654 goto cleanup; 1655 } 1656 1657 TAILQ_INIT(&cif->vhif_vrs); 1658 ifp->if_carp = cif; 1659 } else { 1660 struct carp_softc *vr; 1661 1662 cif = ifp->if_carp; 1663 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 1664 if (vr != sc && vr->sc_vhid == sc->sc_vhid) { 1665 error = EINVAL; 1666 goto cleanup; 1667 } 1668 } 1669 } 1670 sc->sc_ia6 = ia; 1671 sc->sc_carpdev = ifp; 1672 1673 { /* XXX prevent endless loop if already in queue */ 1674 struct carp_softc *vr, *after = NULL; 1675 int myself = 0; 1676 cif = ifp->if_carp; 1677 1678 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 1679 if (vr == sc) 1680 myself = 1; 1681 if (vr->sc_vhid < sc->sc_vhid) 1682 after = vr; 1683 } 1684 1685 if (!myself) { 1686 /* We're trying to keep things in order */ 1687 if (after == NULL) 1688 TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list); 1689 else 1690 TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list); 1691 } 1692 } 1693 1694 sc->sc_naddrs6++; 1695 if (own) 1696 sc->sc_advskew = 0; 1697 carp_sc_state(sc); 1698 carp_setrun(sc, 0); 1699 1700 return (0); 1701 1702 cleanup: 1703 /* clean up multicast memberships */ 1704 if (!sc->sc_naddrs6) { 1705 while (!LIST_EMPTY(&im6o->im6o_memberships)) { 1706 imm = LIST_FIRST(&im6o->im6o_memberships); 1707 LIST_REMOVE(imm, i6mm_chain); 1708 in6_leavegroup(imm); 1709 } 1710 } 1711 return (error); 1712 } 1713 1714 static int 1715 carp_del_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) 1716 { 1717 int error = 0; 1718 1719 if (!--sc->sc_naddrs6) { 1720 struct carp_if *cif = sc->sc_carpdev->if_carp; 1721 struct ip6_moptions *im6o = &sc->sc_im6o; 1722 1723 callout_stop(&sc->sc_ad_tmo); 1724 sc->sc_vhid = -1; 1725 while (!LIST_EMPTY(&im6o->im6o_memberships)) { 1726 struct in6_multi_mship *imm = 1727 LIST_FIRST(&im6o->im6o_memberships); 1728 1729 LIST_REMOVE(imm, i6mm_chain); 1730 in6_leavegroup(imm); 1731 } 1732 im6o->im6o_multicast_ifp = NULL; 1733 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); 1734 if (TAILQ_EMPTY(&cif->vhif_vrs)) { 1735 sc->sc_carpdev->if_carp = NULL; 1736 kfree(cif, M_IFADDR); 1737 } 1738 } 1739 return (error); 1740 } 1741 #endif /* INET6 */ 1742 1743 static int 1744 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr, struct ucred *cr) 1745 { 1746 struct carp_softc *sc = ifp->if_softc, *vr; 1747 struct carpreq carpr; 1748 struct ifaddr *ifa; 1749 struct ifreq *ifr; 1750 struct ifaliasreq *ifra; 1751 struct ifdrv *ifd; 1752 char devname[IFNAMSIZ]; 1753 int error = 0; 1754 1755 carp_gettok(); 1756 1757 ifa = (struct ifaddr *)addr; 1758 ifra = (struct ifaliasreq *)addr; 1759 ifr = (struct ifreq *)addr; 1760 ifd = (struct ifdrv *)addr; 1761 1762 switch (cmd) { 1763 case SIOCSIFADDR: 1764 switch (ifa->ifa_addr->sa_family) { 1765 #ifdef INET 1766 case AF_INET: 1767 ifp->if_flags |= IFF_UP | IFF_RUNNING; 1768 break; 1769 #endif /* INET */ 1770 #ifdef INET6 1771 case AF_INET6: 1772 ifp->if_flags |= IFF_UP | IFF_RUNNING; 1773 error = carp_set_addr6(sc, satosin6(ifa->ifa_addr)); 1774 break; 1775 #endif /* INET6 */ 1776 default: 1777 error = EAFNOSUPPORT; 1778 break; 1779 } 1780 break; 1781 1782 case SIOCAIFADDR: 1783 switch (ifa->ifa_addr->sa_family) { 1784 #ifdef INET 1785 case AF_INET: 1786 panic("SIOCAIFADDR should never be seen\n"); 1787 #endif /* INET */ 1788 #ifdef INET6 1789 case AF_INET6: 1790 ifp->if_flags |= IFF_UP | IFF_RUNNING; 1791 error = carp_set_addr6(sc, satosin6(&ifra->ifra_addr)); 1792 break; 1793 #endif /* INET6 */ 1794 default: 1795 error = EAFNOSUPPORT; 1796 break; 1797 } 1798 break; 1799 1800 case SIOCDIFADDR: 1801 switch (ifa->ifa_addr->sa_family) { 1802 #ifdef INET 1803 case AF_INET: 1804 panic("SIOCDIFADDR should never be seen\n"); 1805 #endif /* INET */ 1806 #ifdef INET6 1807 case AF_INET6: 1808 error = carp_del_addr6(sc, satosin6(&ifra->ifra_addr)); 1809 break; 1810 #endif /* INET6 */ 1811 default: 1812 error = EAFNOSUPPORT; 1813 break; 1814 } 1815 break; 1816 1817 case SIOCSIFFLAGS: 1818 if (ifp->if_flags & IFF_UP) { 1819 if ((ifp->if_flags & IFF_RUNNING) == 0) { 1820 ifp->if_flags |= IFF_RUNNING; 1821 carp_set_state(sc, INIT); 1822 carp_setrun(sc, 0); 1823 } 1824 } else if (ifp->if_flags & IFF_RUNNING) { 1825 carp_stop(sc, 0); 1826 } 1827 break; 1828 1829 case SIOCSVH: 1830 error = priv_check_cred(cr, PRIV_ROOT, NULL_CRED_OKAY); 1831 if (error) 1832 break; 1833 error = copyin(ifr->ifr_data, &carpr, sizeof(carpr)); 1834 if (error) 1835 break; 1836 1837 error = 1; 1838 if ((ifp->if_flags & IFF_RUNNING) && 1839 sc->sc_state != INIT && carpr.carpr_state != sc->sc_state) { 1840 switch (carpr.carpr_state) { 1841 case BACKUP: 1842 callout_stop(&sc->sc_ad_tmo); 1843 carp_set_state(sc, BACKUP); 1844 carp_setrun(sc, 0); 1845 carp_setroute(sc, RTM_DELETE); 1846 break; 1847 1848 case MASTER: 1849 carp_master_down(sc); 1850 break; 1851 1852 default: 1853 break; 1854 } 1855 } 1856 if (carpr.carpr_vhid > 0) { 1857 if (carpr.carpr_vhid > 255) { 1858 error = EINVAL; 1859 break; 1860 } 1861 if (sc->sc_carpdev) { 1862 struct carp_if *cif = sc->sc_carpdev->if_carp; 1863 1864 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 1865 if (vr != sc && 1866 vr->sc_vhid == carpr.carpr_vhid) { 1867 carp_reltok(); 1868 return EEXIST; 1869 } 1870 } 1871 } 1872 sc->sc_vhid = carpr.carpr_vhid; 1873 IF_LLADDR(ifp)[0] = 0; 1874 IF_LLADDR(ifp)[1] = 0; 1875 IF_LLADDR(ifp)[2] = 0x5e; 1876 IF_LLADDR(ifp)[3] = 0; 1877 IF_LLADDR(ifp)[4] = 1; 1878 IF_LLADDR(ifp)[5] = sc->sc_vhid; 1879 error--; 1880 } 1881 if (carpr.carpr_advbase > 0 || carpr.carpr_advskew > 0) { 1882 if (carpr.carpr_advskew >= 255) { 1883 error = EINVAL; 1884 break; 1885 } 1886 if (carpr.carpr_advbase > 255) { 1887 error = EINVAL; 1888 break; 1889 } 1890 sc->sc_advbase = carpr.carpr_advbase; 1891 sc->sc_advskew = carpr.carpr_advskew; 1892 error--; 1893 } 1894 bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key)); 1895 if (error > 0) { 1896 error = EINVAL; 1897 } else { 1898 error = 0; 1899 carp_setrun(sc, 0); 1900 } 1901 break; 1902 1903 case SIOCGVH: 1904 bzero(&carpr, sizeof(carpr)); 1905 carpr.carpr_state = sc->sc_state; 1906 carpr.carpr_vhid = sc->sc_vhid; 1907 carpr.carpr_advbase = sc->sc_advbase; 1908 carpr.carpr_advskew = sc->sc_advskew; 1909 error = priv_check_cred(cr, PRIV_ROOT, NULL_CRED_OKAY); 1910 if (error == 0) { 1911 bcopy(sc->sc_key, carpr.carpr_key, 1912 sizeof(carpr.carpr_key)); 1913 } 1914 1915 error = copyout(&carpr, ifr->ifr_data, sizeof(carpr)); 1916 break; 1917 1918 case SIOCGDRVSPEC: 1919 switch (ifd->ifd_cmd) { 1920 case CARPGDEVNAME: 1921 if (ifd->ifd_len != sizeof(devname)) 1922 error = EINVAL; 1923 break; 1924 1925 case CARPGVHADDR: 1926 break; 1927 1928 default: 1929 error = EINVAL; 1930 break; 1931 } 1932 if (error) 1933 break; 1934 1935 switch (ifd->ifd_cmd) { 1936 case CARPGVHADDR: 1937 error = carp_get_vhaddr(sc, ifd); 1938 break; 1939 1940 case CARPGDEVNAME: 1941 bzero(devname, sizeof(devname)); 1942 if (sc->sc_carpdev != NULL) { 1943 strlcpy(devname, sc->sc_carpdev->if_xname, 1944 sizeof(devname)); 1945 } 1946 error = copyout(devname, ifd->ifd_data, 1947 sizeof(devname)); 1948 break; 1949 } 1950 break; 1951 1952 default: 1953 error = EINVAL; 1954 break; 1955 } 1956 carp_hmac_prepare(sc); 1957 1958 carp_reltok(); 1959 return error; 1960 } 1961 1962 /* 1963 * XXX: this is looutput. We should eventually use it from there. 1964 */ 1965 static int 1966 carp_looutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 1967 struct rtentry *rt) 1968 { 1969 uint32_t af; 1970 1971 M_ASSERTPKTHDR(m); /* check if we have the packet header */ 1972 1973 if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 1974 m_freem(m); 1975 return (rt->rt_flags & RTF_BLACKHOLE ? 0 : 1976 rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); 1977 } 1978 1979 ifp->if_opackets++; 1980 ifp->if_obytes += m->m_pkthdr.len; 1981 1982 /* BPF writes need to be handled specially. */ 1983 if (dst->sa_family == AF_UNSPEC) { 1984 bcopy(dst->sa_data, &af, sizeof(af)); 1985 dst->sa_family = af; 1986 } 1987 1988 #if 1 /* XXX */ 1989 switch (dst->sa_family) { 1990 case AF_INET: 1991 case AF_INET6: 1992 case AF_IPX: 1993 break; 1994 1995 default: 1996 m_freem(m); 1997 return (EAFNOSUPPORT); 1998 } 1999 #endif 2000 return (if_simloop(ifp, m, dst->sa_family, 0)); 2001 } 2002 2003 /* 2004 * Start output on carp interface. This function should never be called. 2005 */ 2006 static void 2007 carp_start(struct ifnet *ifp) 2008 { 2009 #ifdef DEBUG 2010 kprintf("%s: start called\n", ifp->if_xname); 2011 #endif 2012 } 2013 2014 int 2015 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa, 2016 struct rtentry *rt) 2017 { 2018 struct m_tag *mtag; 2019 struct carp_softc *sc; 2020 struct ifnet *carp_ifp; 2021 struct ether_header *eh; 2022 2023 ASSERT_LWKT_TOKEN_HELD(&carp_tok); 2024 2025 if (!sa) 2026 return (0); 2027 2028 switch (sa->sa_family) { 2029 #ifdef INET 2030 case AF_INET: 2031 break; 2032 #endif /* INET */ 2033 #ifdef INET6 2034 case AF_INET6: 2035 break; 2036 #endif /* INET6 */ 2037 default: 2038 return (0); 2039 } 2040 2041 mtag = m_tag_find(m, PACKET_TAG_CARP, NULL); 2042 if (mtag == NULL) 2043 return (0); 2044 2045 bcopy(mtag + 1, &carp_ifp, sizeof(struct ifnet *)); 2046 sc = carp_ifp->if_softc; 2047 2048 /* Set the source MAC address to Virtual Router MAC Address */ 2049 switch (ifp->if_type) { 2050 case IFT_ETHER: 2051 case IFT_L2VLAN: 2052 eh = mtod(m, struct ether_header *); 2053 eh->ether_shost[0] = 0; 2054 eh->ether_shost[1] = 0; 2055 eh->ether_shost[2] = 0x5e; 2056 eh->ether_shost[3] = 0; 2057 eh->ether_shost[4] = 1; 2058 eh->ether_shost[5] = sc->sc_vhid; 2059 break; 2060 2061 default: 2062 if_printf(ifp, "carp is not supported for this " 2063 "interface type\n"); 2064 return (EOPNOTSUPP); 2065 } 2066 return (0); 2067 } 2068 2069 static void 2070 carp_set_state(struct carp_softc *sc, int state) 2071 { 2072 struct ifnet *cifp = &sc->sc_if; 2073 2074 if (sc->sc_state == state) 2075 return; 2076 sc->sc_state = state; 2077 2078 switch (sc->sc_state) { 2079 case BACKUP: 2080 cifp->if_link_state = LINK_STATE_DOWN; 2081 break; 2082 2083 case MASTER: 2084 cifp->if_link_state = LINK_STATE_UP; 2085 break; 2086 2087 default: 2088 cifp->if_link_state = LINK_STATE_UNKNOWN; 2089 break; 2090 } 2091 rt_ifmsg(cifp); 2092 } 2093 2094 void 2095 carp_group_demote_adj(struct ifnet *ifp, int adj) 2096 { 2097 struct ifg_list *ifgl; 2098 int *dm; 2099 2100 carp_gettok(); 2101 2102 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) { 2103 if (!strcmp(ifgl->ifgl_group->ifg_group, IFG_ALL)) 2104 continue; 2105 dm = &ifgl->ifgl_group->ifg_carp_demoted; 2106 2107 if (*dm + adj >= 0) 2108 *dm += adj; 2109 else 2110 *dm = 0; 2111 2112 if (adj > 0 && *dm == 1) 2113 carp_send_ad_all(); 2114 CARP_LOG("%s demoted group %s to %d", ifp->if_xname, 2115 ifgl->ifgl_group->ifg_group, *dm); 2116 } 2117 2118 carp_reltok(); 2119 } 2120 2121 void 2122 carp_carpdev_state(void *v) 2123 { 2124 struct carp_if *cif = v; 2125 struct carp_softc *sc; 2126 2127 carp_gettok(); 2128 2129 TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) 2130 carp_sc_state(sc); 2131 2132 carp_reltok(); 2133 } 2134 2135 static void 2136 carp_sc_state(struct carp_softc *sc) 2137 { 2138 if (!(sc->sc_carpdev->if_flags & IFF_UP)) { 2139 callout_stop(&sc->sc_ad_tmo); 2140 callout_stop(&sc->sc_md_tmo); 2141 callout_stop(&sc->sc_md6_tmo); 2142 carp_set_state(sc, INIT); 2143 carp_setrun(sc, 0); 2144 if (!sc->sc_suppress) { 2145 carp_suppress_preempt++; 2146 if (carp_suppress_preempt == 1) 2147 carp_send_ad_all(); 2148 } 2149 sc->sc_suppress = 1; 2150 } else { 2151 carp_set_state(sc, INIT); 2152 carp_setrun(sc, 0); 2153 if (sc->sc_suppress) 2154 carp_suppress_preempt--; 2155 sc->sc_suppress = 0; 2156 } 2157 } 2158 2159 static void 2160 carp_stop(struct carp_softc *sc, int detach) 2161 { 2162 sc->sc_if.if_flags &= ~IFF_RUNNING; 2163 2164 callout_stop(&sc->sc_ad_tmo); 2165 callout_stop(&sc->sc_md_tmo); 2166 callout_stop(&sc->sc_md6_tmo); 2167 2168 if (!detach && sc->sc_state == MASTER) 2169 carp_send_ad(sc); 2170 2171 if (sc->sc_suppress) 2172 carp_suppress_preempt--; 2173 sc->sc_suppress = 0; 2174 2175 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) 2176 carp_suppress_preempt--; 2177 sc->sc_sendad_errors = 0; 2178 sc->sc_sendad_success = 0; 2179 2180 carp_set_state(sc, INIT); 2181 carp_setrun(sc, 0); 2182 } 2183 2184 static void 2185 carp_reset(struct carp_softc *sc, int detach) 2186 { 2187 struct ifnet *cifp = &sc->sc_if; 2188 2189 carp_stop(sc, detach); 2190 if (!sc->sc_dead && (cifp->if_flags & IFF_UP)) 2191 cifp->if_flags |= IFF_RUNNING; 2192 } 2193 2194 static int 2195 carp_activate_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha, 2196 struct ifnet *ifp, const struct in_ifaddr *ia_if, int own) 2197 { 2198 struct ip_moptions *imo = &sc->sc_imo; 2199 struct carp_if *cif; 2200 struct carp_softc *vr, *after = NULL; 2201 int onlist, error; 2202 #ifdef INVARIANTS 2203 int assert_onlist; 2204 #endif 2205 2206 KKASSERT(vha->vha_ia != NULL); 2207 2208 KASSERT(ia_if != NULL, ("NULL backing address\n")); 2209 KASSERT(vha->vha_iaback == NULL, ("%p is already activated\n", vha)); 2210 KASSERT((vha->vha_flags & CARP_VHAF_OWNER) == 0, 2211 ("inactive vhaddr %p is the address owner\n", vha)); 2212 2213 KASSERT(sc->sc_carpdev == NULL || sc->sc_carpdev == ifp, 2214 ("%s is already on %s\n", sc->sc_if.if_xname, 2215 sc->sc_carpdev->if_xname)); 2216 2217 KASSERT(imo->imo_multicast_ifp == NULL || 2218 imo->imo_multicast_ifp == ifp, 2219 ("%s didn't leave mcast group on %s\n", 2220 sc->sc_if.if_xname, imo->imo_multicast_ifp->if_xname)); 2221 2222 if (imo->imo_num_memberships == 0) { 2223 struct in_addr addr; 2224 2225 addr.s_addr = htonl(INADDR_CARP_GROUP); 2226 if ((imo->imo_membership[0] = in_addmulti(&addr, ifp)) == NULL) 2227 return ENOBUFS; 2228 imo->imo_num_memberships++; 2229 imo->imo_multicast_ifp = ifp; 2230 imo->imo_multicast_ttl = CARP_DFLTTL; 2231 imo->imo_multicast_loop = 0; 2232 } 2233 2234 if (!ifp->if_carp) { 2235 KASSERT(sc->sc_carpdev == NULL, 2236 ("%s is already on %s\n", sc->sc_if.if_xname, 2237 sc->sc_carpdev->if_xname)); 2238 2239 cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO); 2240 2241 error = ifpromisc(ifp, 1); 2242 if (error) { 2243 kfree(cif, M_CARP); 2244 goto cleanup; 2245 } 2246 2247 TAILQ_INIT(&cif->vhif_vrs); 2248 ifp->if_carp = cif; 2249 } else { 2250 cif = ifp->if_carp; 2251 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 2252 if (vr != sc && vr->sc_vhid == sc->sc_vhid) { 2253 error = EINVAL; 2254 goto cleanup; 2255 } 2256 } 2257 } 2258 2259 #ifdef INVARIANTS 2260 if (sc->sc_carpdev != NULL) 2261 assert_onlist = 1; 2262 else 2263 assert_onlist = 0; 2264 #endif 2265 sc->sc_ia = ia_if; 2266 sc->sc_carpdev = ifp; 2267 2268 cif = ifp->if_carp; 2269 onlist = 0; 2270 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 2271 if (vr == sc) 2272 onlist = 1; 2273 if (vr->sc_vhid < sc->sc_vhid) 2274 after = vr; 2275 } 2276 2277 #ifdef INVARIANTS 2278 if (assert_onlist) { 2279 KASSERT(onlist, ("%s is not on %s carp list\n", 2280 sc->sc_if.if_xname, ifp->if_xname)); 2281 } else { 2282 KASSERT(!onlist, ("%s is already on %s carp list\n", 2283 sc->sc_if.if_xname, ifp->if_xname)); 2284 } 2285 #endif 2286 2287 if (!onlist) { 2288 /* We're trying to keep things in order */ 2289 if (after == NULL) 2290 TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list); 2291 else 2292 TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list); 2293 } 2294 2295 vha->vha_iaback = ia_if; 2296 sc->sc_naddrs++; 2297 2298 if (own) { 2299 vha->vha_flags |= CARP_VHAF_OWNER; 2300 2301 /* XXX save user configured advskew? */ 2302 sc->sc_advskew = 0; 2303 } 2304 2305 carp_hmac_prepare(sc); 2306 carp_set_state(sc, INIT); 2307 carp_setrun(sc, 0); 2308 return 0; 2309 cleanup: 2310 carp_multicast_cleanup(sc); 2311 return error; 2312 } 2313 2314 static void 2315 carp_deactivate_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha) 2316 { 2317 KKASSERT(vha->vha_ia != NULL); 2318 2319 carp_hmac_prepare(sc); 2320 2321 if (vha->vha_iaback == NULL) { 2322 KASSERT((vha->vha_flags & CARP_VHAF_OWNER) == 0, 2323 ("inactive vhaddr %p is the address owner\n", vha)); 2324 return; 2325 } 2326 2327 vha->vha_flags &= ~CARP_VHAF_OWNER; 2328 2329 KKASSERT(sc->sc_naddrs > 0); 2330 vha->vha_iaback = NULL; 2331 sc->sc_naddrs--; 2332 if (!sc->sc_naddrs) { 2333 if (sc->sc_naddrs6) { 2334 carp_multicast_cleanup(sc); 2335 sc->sc_ia = NULL; 2336 } else { 2337 carp_detach(sc, 0); 2338 } 2339 } 2340 } 2341 2342 static void 2343 carp_link_addrs(struct carp_softc *sc, struct ifnet *ifp, struct ifaddr *ifa_if) 2344 { 2345 struct carp_vhaddr *vha; 2346 struct in_ifaddr *ia_if; 2347 2348 KKASSERT(ifa_if->ifa_addr->sa_family == AF_INET); 2349 ia_if = ifatoia(ifa_if); 2350 2351 if ((ia_if->ia_flags & IFA_ROUTE) == 0) 2352 return; 2353 2354 /* 2355 * Test each inactive vhaddr against the newly added address. 2356 * If the newly added address could be the backing address, 2357 * then activate the matching vhaddr. 2358 */ 2359 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 2360 const struct in_ifaddr *ia; 2361 u_long iaddr; 2362 int own; 2363 2364 if (vha->vha_iaback != NULL) 2365 continue; 2366 2367 ia = vha->vha_ia; 2368 iaddr = ntohl(ia->ia_addr.sin_addr.s_addr); 2369 2370 if ((iaddr & ia_if->ia_subnetmask) != ia_if->ia_subnet) 2371 continue; 2372 2373 own = 0; 2374 if (ia->ia_addr.sin_addr.s_addr == 2375 ia_if->ia_addr.sin_addr.s_addr) 2376 own = 1; 2377 2378 carp_activate_vhaddr(sc, vha, ifp, ia_if, own); 2379 } 2380 } 2381 2382 static void 2383 carp_unlink_addrs(struct carp_softc *sc, struct ifnet *ifp, 2384 struct ifaddr *ifa_if) 2385 { 2386 struct carp_vhaddr *vha; 2387 struct in_ifaddr *ia_if; 2388 2389 KKASSERT(ifa_if->ifa_addr->sa_family == AF_INET); 2390 ia_if = ifatoia(ifa_if); 2391 2392 /* 2393 * Ad src address is deleted; set it to NULL. 2394 * Following loop will try pick up a new ad src address 2395 * if one of the vhaddr could retain its backing address. 2396 */ 2397 if (sc->sc_ia == ia_if) 2398 sc->sc_ia = NULL; 2399 2400 /* 2401 * Test each active vhaddr against the deleted address. 2402 * If the deleted address is vhaddr address's backing 2403 * address, then deactivate the vhaddr. 2404 */ 2405 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 2406 if (vha->vha_iaback == NULL) 2407 continue; 2408 2409 if (vha->vha_iaback == ia_if) 2410 carp_deactivate_vhaddr(sc, vha); 2411 else if (sc->sc_ia == NULL) 2412 sc->sc_ia = vha->vha_iaback; 2413 } 2414 } 2415 2416 static void 2417 carp_update_addrs(struct carp_softc *sc) 2418 { 2419 struct carp_vhaddr *vha; 2420 2421 KKASSERT(sc->sc_carpdev == NULL); 2422 2423 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) 2424 carp_config_vhaddr(sc, vha); 2425 } 2426 2427 static void 2428 carp_ifaddr(void *arg __unused, struct ifnet *ifp, 2429 enum ifaddr_event event, struct ifaddr *ifa) 2430 { 2431 struct carp_softc *sc; 2432 2433 carp_gettok(); 2434 2435 if (ifa->ifa_addr->sa_family != AF_INET) 2436 goto back; 2437 2438 if (ifp->if_type == IFT_CARP) { 2439 /* 2440 * Address is changed on carp(4) interface 2441 */ 2442 switch (event) { 2443 case IFADDR_EVENT_ADD: 2444 carp_add_addr(ifp->if_softc, ifa); 2445 break; 2446 2447 case IFADDR_EVENT_CHANGE: 2448 carp_config_addr(ifp->if_softc, ifa); 2449 break; 2450 2451 case IFADDR_EVENT_DELETE: 2452 carp_del_addr(ifp->if_softc, ifa); 2453 break; 2454 } 2455 goto back; 2456 } 2457 2458 /* 2459 * Address is changed on non-carp(4) interface 2460 */ 2461 if ((ifp->if_flags & IFF_MULTICAST) == 0) 2462 goto back; 2463 2464 LIST_FOREACH(sc, &carpif_list, sc_next) { 2465 if (sc->sc_carpdev != NULL && sc->sc_carpdev != ifp) { 2466 /* Not the parent iface; skip */ 2467 continue; 2468 } 2469 2470 switch (event) { 2471 case IFADDR_EVENT_ADD: 2472 carp_link_addrs(sc, ifp, ifa); 2473 break; 2474 2475 case IFADDR_EVENT_DELETE: 2476 if (sc->sc_carpdev != NULL) { 2477 carp_unlink_addrs(sc, ifp, ifa); 2478 if (sc->sc_carpdev == NULL) { 2479 /* 2480 * We no longer have the parent 2481 * interface, however, certain 2482 * virtual addresses, which are 2483 * not used because they can't 2484 * match the previous parent 2485 * interface's addresses, may now 2486 * match different interface's 2487 * addresses. 2488 */ 2489 carp_update_addrs(sc); 2490 } 2491 } else { 2492 /* 2493 * The carp(4) interface didn't have a 2494 * parent iface, so it is not possible 2495 * that it will contain any address to 2496 * be unlinked. 2497 */ 2498 } 2499 break; 2500 2501 case IFADDR_EVENT_CHANGE: 2502 if (sc->sc_carpdev == NULL) { 2503 /* 2504 * The carp(4) interface didn't have a 2505 * parent iface, so it is not possible 2506 * that it will contain any address to 2507 * be updated. 2508 */ 2509 carp_link_addrs(sc, ifp, ifa); 2510 } else { 2511 /* 2512 * First try breaking tie with the old 2513 * address. Then see whether we could 2514 * link certain vhaddr to the new address. 2515 * If that fails, i.e. carpdev is NULL, 2516 * we try a global update. 2517 * 2518 * NOTE: The above order is critical. 2519 */ 2520 carp_unlink_addrs(sc, ifp, ifa); 2521 carp_link_addrs(sc, ifp, ifa); 2522 if (sc->sc_carpdev == NULL) { 2523 /* 2524 * See the comment in the above 2525 * IFADDR_EVENT_DELETE block. 2526 */ 2527 carp_update_addrs(sc); 2528 } 2529 } 2530 break; 2531 } 2532 } 2533 2534 back: 2535 carp_reltok(); 2536 } 2537 2538 void 2539 carp_gettok(void) 2540 { 2541 lwkt_gettoken(&carp_tok); 2542 } 2543 2544 void 2545 carp_reltok(void) 2546 { 2547 lwkt_reltoken(&carp_tok); 2548 } 2549 2550 static int 2551 carp_modevent(module_t mod, int type, void *data) 2552 { 2553 switch (type) { 2554 case MOD_LOAD: 2555 LIST_INIT(&carpif_list); 2556 carp_ifdetach_event = 2557 EVENTHANDLER_REGISTER(ifnet_detach_event, carp_ifdetach, NULL, 2558 EVENTHANDLER_PRI_ANY); 2559 carp_ifaddr_event = 2560 EVENTHANDLER_REGISTER(ifaddr_event, carp_ifaddr, NULL, 2561 EVENTHANDLER_PRI_ANY); 2562 if_clone_attach(&carp_cloner); 2563 break; 2564 2565 case MOD_UNLOAD: 2566 EVENTHANDLER_DEREGISTER(ifnet_detach_event, 2567 carp_ifdetach_event); 2568 EVENTHANDLER_DEREGISTER(ifaddr_event, 2569 carp_ifaddr_event); 2570 if_clone_detach(&carp_cloner); 2571 break; 2572 2573 default: 2574 return (EINVAL); 2575 } 2576 return (0); 2577 } 2578 2579 static moduledata_t carp_mod = { 2580 "carp", 2581 carp_modevent, 2582 0 2583 }; 2584 DECLARE_MODULE(carp, carp_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 2585