1 /* $OpenBSD: ip_carp.c,v 1.232 2014/07/12 18:44:23 tedu Exp $ */ 2 3 /* 4 * Copyright (c) 2002 Michael Shalayeff. All rights reserved. 5 * Copyright (c) 2003 Ryan McBride. All rights reserved. 6 * Copyright (c) 2006-2008 Marco Pfatschbacher. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 21 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 25 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 26 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 27 * THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 /* 31 * TODO: 32 * - iface reconfigure 33 * - support for hardware checksum calculations; 34 * 35 */ 36 37 #include "ether.h" 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/mbuf.h> 42 #include <sys/socket.h> 43 #include <sys/socketvar.h> 44 #include <sys/timeout.h> 45 #include <sys/ioctl.h> 46 #include <sys/errno.h> 47 #include <sys/device.h> 48 #include <sys/kernel.h> 49 #include <sys/sysctl.h> 50 #include <sys/syslog.h> 51 52 #include <net/if.h> 53 #include <net/if_types.h> 54 #include <net/route.h> 55 #include <net/netisr.h> 56 57 /* for arc4random() */ 58 #include <dev/rndvar.h> 59 60 #include <crypto/sha1.h> 61 62 #ifdef INET 63 #include <netinet/in.h> 64 #include <netinet/in_systm.h> 65 #include <netinet/in_var.h> 66 #include <netinet/ip.h> 67 #include <netinet/ip_var.h> 68 #include <netinet/if_ether.h> 69 #include <netinet/ip_ipsp.h> 70 71 #include <net/if_enc.h> 72 #include <net/if_dl.h> 73 #endif 74 75 #ifdef INET6 76 #include <netinet6/in6_var.h> 77 #include <netinet/icmp6.h> 78 #include <netinet/ip6.h> 79 #include <netinet6/ip6_var.h> 80 #include <netinet6/nd6.h> 81 #include <netinet6/in6_ifattach.h> 82 #endif 83 84 #include "bpfilter.h" 85 #if NBPFILTER > 0 86 #include <net/bpf.h> 87 #endif 88 89 #include <netinet/ip_carp.h> 90 91 struct carp_mc_entry { 92 LIST_ENTRY(carp_mc_entry) mc_entries; 93 union { 94 struct ether_multi *mcu_enm; 95 } mc_u; 96 struct sockaddr_storage mc_addr; 97 }; 98 #define mc_enm mc_u.mcu_enm 99 100 enum { HMAC_ORIG=0, HMAC_NOV6LL=1, HMAC_MAX=2 }; 101 102 struct carp_vhost_entry { 103 LIST_ENTRY(carp_vhost_entry) vhost_entries; 104 struct carp_softc *parent_sc; 105 int vhe_leader; 106 int vhid; 107 int advskew; 108 enum { INIT = 0, BACKUP, MASTER } state; 109 struct timeout ad_tmo; /* advertisement timeout */ 110 struct timeout md_tmo; /* master down timeout */ 111 struct timeout md6_tmo; /* master down timeout */ 112 113 u_int64_t vhe_replay_cookie; 114 115 /* authentication */ 116 #define CARP_HMAC_PAD 64 117 unsigned char vhe_pad[CARP_HMAC_PAD]; 118 SHA1_CTX vhe_sha1[HMAC_MAX]; 119 120 u_int8_t vhe_enaddr[ETHER_ADDR_LEN]; 121 struct sockaddr_dl vhe_sdl; /* for IPv6 ndp balancing */ 122 }; 123 124 struct carp_softc { 125 struct arpcom sc_ac; 126 #define sc_if sc_ac.ac_if 127 #define sc_carpdev sc_ac.ac_if.if_carpdev 128 void *ah_cookie; 129 void *lh_cookie; 130 struct ip_moptions sc_imo; 131 #ifdef INET6 132 struct ip6_moptions sc_im6o; 133 #endif /* INET6 */ 134 TAILQ_ENTRY(carp_softc) sc_list; 135 136 int sc_suppress; 137 int sc_bow_out; 138 int sc_demote_cnt; 139 140 int sc_sendad_errors; 141 #define CARP_SENDAD_MAX_ERRORS(sc) (3 * (sc)->sc_vhe_count) 142 int sc_sendad_success; 143 #define CARP_SENDAD_MIN_SUCCESS(sc) (3 * (sc)->sc_vhe_count) 144 145 char sc_curlladdr[ETHER_ADDR_LEN]; 146 147 LIST_HEAD(__carp_vhosthead, carp_vhost_entry) carp_vhosts; 148 int sc_vhe_count; 149 u_int8_t sc_vhids[CARP_MAXNODES]; 150 u_int8_t sc_advskews[CARP_MAXNODES]; 151 u_int8_t sc_balancing; 152 153 int sc_naddrs; 154 int sc_naddrs6; 155 int sc_advbase; /* seconds */ 156 157 /* authentication */ 158 unsigned char sc_key[CARP_KEY_LEN]; 159 160 u_int32_t sc_hashkey[2]; 161 u_int32_t sc_lsmask; /* load sharing mask */ 162 int sc_lscount; /* # load sharing interfaces (max 32) */ 163 int sc_delayed_arp; /* delayed ARP request countdown */ 164 int sc_realmac; /* using real mac */ 165 166 struct in_addr sc_peer; 167 168 LIST_HEAD(__carp_mchead, carp_mc_entry) carp_mc_listhead; 169 struct carp_vhost_entry *cur_vhe; /* current active vhe */ 170 }; 171 172 int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, LOG_CRIT }; /* XXX for now */ 173 struct carpstats carpstats; 174 175 struct carp_if { 176 TAILQ_HEAD(, carp_softc) vhif_vrs; 177 int vhif_nvrs; 178 179 struct ifnet *vhif_ifp; 180 }; 181 182 #define CARP_LOG(l, sc, s) \ 183 do { \ 184 if (carp_opts[CARPCTL_LOG] >= l) { \ 185 if (sc) \ 186 log(l, "%s: ", \ 187 (sc)->sc_if.if_xname); \ 188 else \ 189 log(l, "carp: "); \ 190 addlog s; \ 191 addlog("\n"); \ 192 } \ 193 } while (0) 194 195 void carp_hmac_prepare(struct carp_softc *); 196 void carp_hmac_prepare_ctx(struct carp_vhost_entry *, u_int8_t); 197 void carp_hmac_generate(struct carp_vhost_entry *, u_int32_t *, 198 unsigned char *, u_int8_t); 199 int carp_hmac_verify(struct carp_vhost_entry *, u_int32_t *, 200 unsigned char *); 201 void carp_proto_input_c(struct mbuf *, struct carp_header *, int, 202 sa_family_t); 203 void carpattach(int); 204 void carpdetach(struct carp_softc *); 205 int carp_prepare_ad(struct mbuf *, struct carp_vhost_entry *, 206 struct carp_header *); 207 void carp_send_ad_all(void); 208 void carp_vhe_send_ad_all(struct carp_softc *); 209 void carp_send_ad(void *); 210 void carp_send_arp(struct carp_softc *); 211 void carp_master_down(void *); 212 int carp_ioctl(struct ifnet *, u_long, caddr_t); 213 int carp_vhids_ioctl(struct carp_softc *, struct carpreq *); 214 int carp_check_dup_vhids(struct carp_softc *, struct carp_if *, 215 struct carpreq *); 216 void carp_ifgroup_ioctl(struct ifnet *, u_long, caddr_t); 217 void carp_ifgattr_ioctl(struct ifnet *, u_long, caddr_t); 218 void carp_start(struct ifnet *); 219 void carp_setrun_all(struct carp_softc *, sa_family_t); 220 void carp_setrun(struct carp_vhost_entry *, sa_family_t); 221 void carp_set_state_all(struct carp_softc *, int); 222 void carp_set_state(struct carp_vhost_entry *, int); 223 void carp_multicast_cleanup(struct carp_softc *); 224 int carp_set_ifp(struct carp_softc *, struct ifnet *); 225 void carp_set_enaddr(struct carp_softc *); 226 void carp_set_vhe_enaddr(struct carp_vhost_entry *); 227 void carp_addr_updated(void *); 228 u_int32_t carp_hash(struct carp_softc *, u_char *); 229 int carp_set_addr(struct carp_softc *, struct sockaddr_in *); 230 int carp_join_multicast(struct carp_softc *); 231 #ifdef INET6 232 void carp_send_na(struct carp_softc *); 233 int carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *); 234 int carp_join_multicast6(struct carp_softc *); 235 #endif 236 int carp_clone_create(struct if_clone *, int); 237 int carp_clone_destroy(struct ifnet *); 238 int carp_ether_addmulti(struct carp_softc *, struct ifreq *); 239 int carp_ether_delmulti(struct carp_softc *, struct ifreq *); 240 void carp_ether_purgemulti(struct carp_softc *); 241 int carp_group_demote_count(struct carp_softc *); 242 void carp_update_lsmask(struct carp_softc *); 243 int carp_new_vhost(struct carp_softc *, int, int); 244 void carp_destroy_vhosts(struct carp_softc *); 245 void carp_del_all_timeouts(struct carp_softc *); 246 247 struct if_clone carp_cloner = 248 IF_CLONE_INITIALIZER("carp", carp_clone_create, carp_clone_destroy); 249 250 #define carp_cksum(_m, _l) ((u_int16_t)in_cksum((_m), (_l))) 251 #define CARP_IFQ_PRIO 6 252 253 void 254 carp_hmac_prepare(struct carp_softc *sc) 255 { 256 struct carp_vhost_entry *vhe; 257 u_int8_t i; 258 259 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) { 260 for (i = 0; i < HMAC_MAX; i++) { 261 carp_hmac_prepare_ctx(vhe, i); 262 } 263 } 264 } 265 266 void 267 carp_hmac_prepare_ctx(struct carp_vhost_entry *vhe, u_int8_t ctx) 268 { 269 struct carp_softc *sc = vhe->parent_sc; 270 271 u_int8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT; 272 u_int8_t vhid = vhe->vhid & 0xff; 273 SHA1_CTX sha1ctx; 274 u_int32_t kmd[5]; 275 struct ifaddr *ifa; 276 int i, found; 277 struct in_addr last, cur, in; 278 #ifdef INET6 279 struct in6_addr last6, cur6, in6; 280 #endif /* INET6 */ 281 282 /* compute ipad from key */ 283 memset(vhe->vhe_pad, 0, sizeof(vhe->vhe_pad)); 284 bcopy(sc->sc_key, vhe->vhe_pad, sizeof(sc->sc_key)); 285 for (i = 0; i < sizeof(vhe->vhe_pad); i++) 286 vhe->vhe_pad[i] ^= 0x36; 287 288 /* precompute first part of inner hash */ 289 SHA1Init(&vhe->vhe_sha1[ctx]); 290 SHA1Update(&vhe->vhe_sha1[ctx], vhe->vhe_pad, sizeof(vhe->vhe_pad)); 291 SHA1Update(&vhe->vhe_sha1[ctx], (void *)&version, sizeof(version)); 292 SHA1Update(&vhe->vhe_sha1[ctx], (void *)&type, sizeof(type)); 293 294 /* generate a key for the arpbalance hash, before the vhid is hashed */ 295 if (vhe->vhe_leader) { 296 bcopy(&vhe->vhe_sha1[ctx], &sha1ctx, sizeof(sha1ctx)); 297 SHA1Final((unsigned char *)kmd, &sha1ctx); 298 sc->sc_hashkey[0] = kmd[0] ^ kmd[1]; 299 sc->sc_hashkey[1] = kmd[2] ^ kmd[3]; 300 } 301 302 /* the rest of the precomputation */ 303 if (!sc->sc_realmac && vhe->vhe_leader && 304 memcmp(sc->sc_ac.ac_enaddr, vhe->vhe_enaddr, ETHER_ADDR_LEN) != 0) 305 SHA1Update(&vhe->vhe_sha1[ctx], sc->sc_ac.ac_enaddr, 306 ETHER_ADDR_LEN); 307 308 SHA1Update(&vhe->vhe_sha1[ctx], (void *)&vhid, sizeof(vhid)); 309 310 /* Hash the addresses from smallest to largest, not interface order */ 311 #ifdef INET 312 cur.s_addr = 0; 313 do { 314 found = 0; 315 last = cur; 316 cur.s_addr = 0xffffffff; 317 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 318 if (ifa->ifa_addr->sa_family != AF_INET) 319 continue; 320 in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr; 321 if (ntohl(in.s_addr) > ntohl(last.s_addr) && 322 ntohl(in.s_addr) < ntohl(cur.s_addr)) { 323 cur.s_addr = in.s_addr; 324 found++; 325 } 326 } 327 if (found) 328 SHA1Update(&vhe->vhe_sha1[ctx], 329 (void *)&cur, sizeof(cur)); 330 } while (found); 331 #endif /* INET */ 332 #ifdef INET6 333 memset(&cur6, 0x00, sizeof(cur6)); 334 do { 335 found = 0; 336 last6 = cur6; 337 memset(&cur6, 0xff, sizeof(cur6)); 338 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 339 if (ifa->ifa_addr->sa_family != AF_INET6) 340 continue; 341 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 342 if (IN6_IS_SCOPE_EMBED(&in6)) { 343 if (ctx == HMAC_NOV6LL) 344 continue; 345 in6.s6_addr16[1] = 0; 346 } 347 if (memcmp(&in6, &last6, sizeof(in6)) > 0 && 348 memcmp(&in6, &cur6, sizeof(in6)) < 0) { 349 cur6 = in6; 350 found++; 351 } 352 } 353 if (found) 354 SHA1Update(&vhe->vhe_sha1[ctx], 355 (void *)&cur6, sizeof(cur6)); 356 } while (found); 357 #endif /* INET6 */ 358 359 /* convert ipad to opad */ 360 for (i = 0; i < sizeof(vhe->vhe_pad); i++) 361 vhe->vhe_pad[i] ^= 0x36 ^ 0x5c; 362 } 363 364 void 365 carp_hmac_generate(struct carp_vhost_entry *vhe, u_int32_t counter[2], 366 unsigned char md[20], u_int8_t ctx) 367 { 368 SHA1_CTX sha1ctx; 369 370 /* fetch first half of inner hash */ 371 bcopy(&vhe->vhe_sha1[ctx], &sha1ctx, sizeof(sha1ctx)); 372 373 SHA1Update(&sha1ctx, (void *)counter, sizeof(vhe->vhe_replay_cookie)); 374 SHA1Final(md, &sha1ctx); 375 376 /* outer hash */ 377 SHA1Init(&sha1ctx); 378 SHA1Update(&sha1ctx, vhe->vhe_pad, sizeof(vhe->vhe_pad)); 379 SHA1Update(&sha1ctx, md, 20); 380 SHA1Final(md, &sha1ctx); 381 } 382 383 int 384 carp_hmac_verify(struct carp_vhost_entry *vhe, u_int32_t counter[2], 385 unsigned char md[20]) 386 { 387 unsigned char md2[20]; 388 u_int8_t i; 389 390 for (i = 0; i < HMAC_MAX; i++) { 391 carp_hmac_generate(vhe, counter, md2, i); 392 if (!timingsafe_bcmp(md, md2, sizeof(md2))) 393 return (0); 394 } 395 return (1); 396 } 397 398 /* 399 * process input packet. 400 * we have rearranged checks order compared to the rfc, 401 * but it seems more efficient this way or not possible otherwise. 402 */ 403 void 404 carp_proto_input(struct mbuf *m, ...) 405 { 406 struct ip *ip = mtod(m, struct ip *); 407 struct ifnet *ifp = m->m_pkthdr.rcvif; 408 struct carp_softc *sc = NULL; 409 struct carp_header *ch; 410 int iplen, len, hlen, ismulti; 411 va_list ap; 412 413 va_start(ap, m); 414 hlen = va_arg(ap, int); 415 va_end(ap); 416 417 carpstats.carps_ipackets++; 418 419 if (!carp_opts[CARPCTL_ALLOW]) { 420 m_freem(m); 421 return; 422 } 423 424 ismulti = IN_MULTICAST(ip->ip_dst.s_addr); 425 426 /* check if received on a valid carp interface */ 427 if (!((ifp->if_type == IFT_CARP && ismulti) || 428 (ifp->if_type != IFT_CARP && !ismulti && ifp->if_carp != NULL))) { 429 carpstats.carps_badif++; 430 CARP_LOG(LOG_INFO, sc, 431 ("packet received on non-carp interface: %s", 432 ifp->if_xname)); 433 m_freem(m); 434 return; 435 } 436 437 /* verify that the IP TTL is 255. */ 438 if (ip->ip_ttl != CARP_DFLTTL) { 439 carpstats.carps_badttl++; 440 CARP_LOG(LOG_NOTICE, sc, ("received ttl %d != %d on %s", 441 ip->ip_ttl, CARP_DFLTTL, ifp->if_xname)); 442 m_freem(m); 443 return; 444 } 445 446 /* 447 * verify that the received packet length is 448 * equal to the CARP header 449 */ 450 iplen = ip->ip_hl << 2; 451 len = iplen + sizeof(*ch); 452 if (len > m->m_pkthdr.len) { 453 carpstats.carps_badlen++; 454 CARP_LOG(LOG_INFO, sc, ("packet too short %d on %s", 455 m->m_pkthdr.len, ifp->if_xname)); 456 m_freem(m); 457 return; 458 } 459 460 if ((m = m_pullup(m, len)) == NULL) { 461 carpstats.carps_hdrops++; 462 return; 463 } 464 ip = mtod(m, struct ip *); 465 ch = (struct carp_header *)(mtod(m, caddr_t) + iplen); 466 467 /* verify the CARP checksum */ 468 m->m_data += iplen; 469 if (carp_cksum(m, len - iplen)) { 470 carpstats.carps_badsum++; 471 CARP_LOG(LOG_INFO, sc, ("checksum failed on %s", 472 ifp->if_xname)); 473 m_freem(m); 474 return; 475 } 476 m->m_data -= iplen; 477 478 carp_proto_input_c(m, ch, ismulti, AF_INET); 479 } 480 481 #ifdef INET6 482 int 483 carp6_proto_input(struct mbuf **mp, int *offp, int proto) 484 { 485 struct mbuf *m = *mp; 486 struct ifnet *ifp = m->m_pkthdr.rcvif; 487 struct carp_softc *sc = NULL; 488 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 489 struct carp_header *ch; 490 u_int len; 491 492 carpstats.carps_ipackets6++; 493 494 if (!carp_opts[CARPCTL_ALLOW]) { 495 m_freem(m); 496 return (IPPROTO_DONE); 497 } 498 499 /* check if received on a valid carp interface */ 500 if (ifp->if_type != IFT_CARP) { 501 carpstats.carps_badif++; 502 CARP_LOG(LOG_INFO, sc, ("packet received on non-carp interface: %s", 503 ifp->if_xname)); 504 m_freem(m); 505 return (IPPROTO_DONE); 506 } 507 508 /* verify that the IP TTL is 255 */ 509 if (ip6->ip6_hlim != CARP_DFLTTL) { 510 carpstats.carps_badttl++; 511 CARP_LOG(LOG_NOTICE, sc, ("received ttl %d != %d on %s", 512 ip6->ip6_hlim, CARP_DFLTTL, ifp->if_xname)); 513 m_freem(m); 514 return (IPPROTO_DONE); 515 } 516 517 /* verify that we have a complete carp packet */ 518 len = m->m_len; 519 if ((m = m_pullup(m, *offp + sizeof(*ch))) == NULL) { 520 carpstats.carps_badlen++; 521 CARP_LOG(LOG_INFO, sc, ("packet size %u too small", len)); 522 return (IPPROTO_DONE); 523 } 524 ch = (struct carp_header *)(mtod(m, caddr_t) + *offp); 525 526 /* verify the CARP checksum */ 527 m->m_data += *offp; 528 if (carp_cksum(m, sizeof(*ch))) { 529 carpstats.carps_badsum++; 530 CARP_LOG(LOG_INFO, sc, ("checksum failed, on %s", 531 ifp->if_xname)); 532 m_freem(m); 533 return (IPPROTO_DONE); 534 } 535 m->m_data -= *offp; 536 537 carp_proto_input_c(m, ch, 1, AF_INET6); 538 return (IPPROTO_DONE); 539 } 540 #endif /* INET6 */ 541 542 void 543 carp_proto_input_c(struct mbuf *m, struct carp_header *ch, int ismulti, 544 sa_family_t af) 545 { 546 struct ifnet *ifp = m->m_pkthdr.rcvif; 547 struct carp_softc *sc; 548 struct carp_vhost_entry *vhe; 549 struct timeval sc_tv, ch_tv; 550 struct carp_if *cif; 551 552 if (ifp->if_type == IFT_CARP) 553 cif = (struct carp_if *)ifp->if_carpdev->if_carp; 554 else 555 cif = (struct carp_if *)ifp->if_carp; 556 557 TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) { 558 if (af == AF_INET && 559 ismulti != IN_MULTICAST(sc->sc_peer.s_addr)) 560 continue; 561 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) { 562 if (vhe->vhid == ch->carp_vhid) 563 goto found; 564 } 565 } 566 found: 567 568 if (!sc || (sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) != 569 (IFF_UP|IFF_RUNNING)) { 570 carpstats.carps_badvhid++; 571 m_freem(m); 572 return; 573 } 574 575 getmicrotime(&sc->sc_if.if_lastchange); 576 sc->sc_if.if_ipackets++; 577 sc->sc_if.if_ibytes += m->m_pkthdr.len; 578 579 /* verify the CARP version. */ 580 if (ch->carp_version != CARP_VERSION) { 581 carpstats.carps_badver++; 582 sc->sc_if.if_ierrors++; 583 CARP_LOG(LOG_NOTICE, sc, ("invalid version %d != %d", 584 ch->carp_version, CARP_VERSION)); 585 m_freem(m); 586 return; 587 } 588 589 /* verify the hash */ 590 if (carp_hmac_verify(vhe, ch->carp_counter, ch->carp_md)) { 591 carpstats.carps_badauth++; 592 sc->sc_if.if_ierrors++; 593 CARP_LOG(LOG_INFO, sc, ("incorrect hash")); 594 m_freem(m); 595 return; 596 } 597 598 if (!memcmp(&vhe->vhe_replay_cookie, ch->carp_counter, 599 sizeof(ch->carp_counter))) { 600 /* Do not log duplicates from non simplex interfaces */ 601 if (sc->sc_carpdev->if_flags & IFF_SIMPLEX) { 602 carpstats.carps_badauth++; 603 sc->sc_if.if_ierrors++; 604 CARP_LOG(LOG_WARNING, sc, 605 ("replay or network loop detected")); 606 } 607 m_freem(m); 608 return; 609 } 610 611 sc_tv.tv_sec = sc->sc_advbase; 612 sc_tv.tv_usec = vhe->advskew * 1000000 / 256; 613 ch_tv.tv_sec = ch->carp_advbase; 614 ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256; 615 616 switch (vhe->state) { 617 case INIT: 618 break; 619 case MASTER: 620 /* 621 * If we receive an advertisement from a master who's going to 622 * be more frequent than us, and whose demote count is not higher 623 * than ours, go into BACKUP state. If his demote count is lower, 624 * also go into BACKUP. 625 */ 626 if (((timercmp(&sc_tv, &ch_tv, >) || 627 timercmp(&sc_tv, &ch_tv, ==)) && 628 (ch->carp_demote <= carp_group_demote_count(sc))) || 629 ch->carp_demote < carp_group_demote_count(sc)) { 630 timeout_del(&vhe->ad_tmo); 631 carp_set_state(vhe, BACKUP); 632 carp_setrun(vhe, 0); 633 } 634 break; 635 case BACKUP: 636 /* 637 * If we're pre-empting masters who advertise slower than us, 638 * and do not have a better demote count, treat them as down. 639 * 640 */ 641 if (carp_opts[CARPCTL_PREEMPT] && 642 timercmp(&sc_tv, &ch_tv, <) && 643 ch->carp_demote >= carp_group_demote_count(sc)) { 644 carp_master_down(vhe); 645 break; 646 } 647 648 /* 649 * Take over masters advertising with a higher demote count, 650 * regardless of CARPCTL_PREEMPT. 651 */ 652 if (ch->carp_demote > carp_group_demote_count(sc)) { 653 carp_master_down(vhe); 654 break; 655 } 656 657 /* 658 * If the master is going to advertise at such a low frequency 659 * that he's guaranteed to time out, we'd might as well just 660 * treat him as timed out now. 661 */ 662 sc_tv.tv_sec = sc->sc_advbase * 3; 663 if (sc->sc_advbase && timercmp(&sc_tv, &ch_tv, <)) { 664 carp_master_down(vhe); 665 break; 666 } 667 668 /* 669 * Otherwise, we reset the counter and wait for the next 670 * advertisement. 671 */ 672 carp_setrun(vhe, af); 673 break; 674 } 675 676 m_freem(m); 677 return; 678 } 679 680 int 681 carp_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, 682 size_t newlen) 683 { 684 /* All sysctl names at this level are terminal. */ 685 if (namelen != 1) 686 return (ENOTDIR); 687 688 switch (name[0]) { 689 case CARPCTL_STATS: 690 if (newp != NULL) 691 return (EPERM); 692 return (sysctl_struct(oldp, oldlenp, newp, newlen, 693 &carpstats, sizeof(carpstats))); 694 default: 695 if (name[0] <= 0 || name[0] >= CARPCTL_MAXID) 696 return (ENOPROTOOPT); 697 return sysctl_int(oldp, oldlenp, newp, newlen, 698 &carp_opts[name[0]]); 699 } 700 } 701 702 /* 703 * Interface side of the CARP implementation. 704 */ 705 706 /* ARGSUSED */ 707 void 708 carpattach(int n) 709 { 710 struct ifg_group *ifg; 711 712 if ((ifg = if_creategroup("carp")) != NULL) 713 ifg->ifg_refcnt++; /* keep around even if empty */ 714 if_clone_attach(&carp_cloner); 715 } 716 717 int 718 carp_clone_create(ifc, unit) 719 struct if_clone *ifc; 720 int unit; 721 { 722 struct carp_softc *sc; 723 struct ifnet *ifp; 724 725 sc = malloc(sizeof(*sc), M_DEVBUF, M_NOWAIT|M_ZERO); 726 if (!sc) 727 return (ENOMEM); 728 729 LIST_INIT(&sc->carp_vhosts); 730 sc->sc_vhe_count = 0; 731 if (carp_new_vhost(sc, 0, 0)) { 732 free(sc, M_DEVBUF, 0); 733 return (ENOMEM); 734 } 735 736 sc->sc_suppress = 0; 737 sc->sc_advbase = CARP_DFLTINTV; 738 sc->sc_naddrs = sc->sc_naddrs6 = 0; 739 #ifdef INET6 740 sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL; 741 #endif /* INET6 */ 742 sc->sc_imo.imo_membership = (struct in_multi **)malloc( 743 (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_IPMOPTS, 744 M_WAITOK|M_ZERO); 745 sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS; 746 747 LIST_INIT(&sc->carp_mc_listhead); 748 ifp = &sc->sc_if; 749 ifp->if_softc = sc; 750 snprintf(ifp->if_xname, sizeof ifp->if_xname, "%s%d", ifc->ifc_name, 751 unit); 752 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 753 ifp->if_ioctl = carp_ioctl; 754 ifp->if_start = carp_start; 755 ifp->if_output = carp_output; 756 ifp->if_type = IFT_CARP; 757 ifp->if_addrlen = ETHER_ADDR_LEN; 758 ifp->if_hdrlen = ETHER_HDR_LEN; 759 ifp->if_mtu = ETHERMTU; 760 IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); 761 IFQ_SET_READY(&ifp->if_snd); 762 if_attach(ifp); 763 764 if_alloc_sadl(ifp); 765 LIST_INIT(&sc->sc_ac.ac_multiaddrs); 766 #if NBPFILTER > 0 767 bpfattach(&ifp->if_bpf, ifp, DLT_EN10MB, ETHER_HDR_LEN); 768 #endif 769 770 /* Hook carp_addr_updated to cope with address and route changes. */ 771 sc->ah_cookie = hook_establish(sc->sc_if.if_addrhooks, 0, 772 carp_addr_updated, sc); 773 carp_set_state_all(sc, INIT); 774 775 return (0); 776 } 777 778 int 779 carp_new_vhost(struct carp_softc *sc, int vhid, int advskew) 780 { 781 struct carp_vhost_entry *vhe, *vhe0; 782 783 vhe = malloc(sizeof(*vhe), M_DEVBUF, M_NOWAIT | M_ZERO); 784 if (vhe == NULL) 785 return (ENOMEM); 786 787 vhe->parent_sc = sc; 788 vhe->vhid = vhid; 789 vhe->advskew = advskew; 790 timeout_set(&vhe->ad_tmo, carp_send_ad, vhe); 791 timeout_set(&vhe->md_tmo, carp_master_down, vhe); 792 timeout_set(&vhe->md6_tmo, carp_master_down, vhe); 793 794 /* mark the first vhe as leader */ 795 if (LIST_EMPTY(&sc->carp_vhosts)) { 796 vhe->vhe_leader = 1; 797 LIST_INSERT_HEAD(&sc->carp_vhosts, vhe, vhost_entries); 798 sc->sc_vhe_count = 1; 799 return (0); 800 } 801 802 LIST_FOREACH(vhe0, &sc->carp_vhosts, vhost_entries) 803 if (LIST_NEXT(vhe0, vhost_entries) == NULL) 804 break; 805 LIST_INSERT_AFTER(vhe0, vhe, vhost_entries); 806 sc->sc_vhe_count++; 807 808 return (0); 809 } 810 811 int 812 carp_clone_destroy(struct ifnet *ifp) 813 { 814 struct carp_softc *sc = ifp->if_softc; 815 816 carpdetach(sc); 817 ether_ifdetach(ifp); 818 if_detach(ifp); 819 carp_destroy_vhosts(ifp->if_softc); 820 free(sc->sc_imo.imo_membership, M_IPMOPTS, 0); 821 free(sc, M_DEVBUF, 0); 822 823 return (0); 824 } 825 826 void 827 carp_del_all_timeouts(struct carp_softc *sc) 828 { 829 struct carp_vhost_entry *vhe; 830 831 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) { 832 timeout_del(&vhe->ad_tmo); 833 timeout_del(&vhe->md_tmo); 834 timeout_del(&vhe->md6_tmo); 835 } 836 } 837 838 void 839 carpdetach(struct carp_softc *sc) 840 { 841 struct carp_if *cif; 842 int s; 843 844 carp_del_all_timeouts(sc); 845 846 if (sc->sc_demote_cnt) 847 carp_group_demote_adj(&sc->sc_if, -sc->sc_demote_cnt, "detach"); 848 sc->sc_suppress = 0; 849 sc->sc_sendad_errors = 0; 850 851 carp_set_state_all(sc, INIT); 852 sc->sc_if.if_flags &= ~IFF_UP; 853 carp_setrun_all(sc, 0); 854 carp_multicast_cleanup(sc); 855 856 s = splnet(); 857 if (sc->ah_cookie != NULL) 858 hook_disestablish(sc->sc_if.if_addrhooks, sc->ah_cookie); 859 if (sc->sc_carpdev != NULL) { 860 if (sc->lh_cookie != NULL) 861 hook_disestablish(sc->sc_carpdev->if_linkstatehooks, 862 sc->lh_cookie); 863 cif = (struct carp_if *)sc->sc_carpdev->if_carp; 864 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); 865 if (!--cif->vhif_nvrs) { 866 ifpromisc(sc->sc_carpdev, 0); 867 sc->sc_carpdev->if_carp = NULL; 868 free(cif, M_IFADDR, 0); 869 } 870 } 871 sc->sc_carpdev = NULL; 872 splx(s); 873 } 874 875 /* Detach an interface from the carp. */ 876 void 877 carp_ifdetach(struct ifnet *ifp) 878 { 879 struct carp_softc *sc, *nextsc; 880 struct carp_if *cif = (struct carp_if *)ifp->if_carp; 881 882 for (sc = TAILQ_FIRST(&cif->vhif_vrs); sc; sc = nextsc) { 883 nextsc = TAILQ_NEXT(sc, sc_list); 884 carpdetach(sc); 885 } 886 } 887 888 void 889 carp_destroy_vhosts(struct carp_softc *sc) 890 { 891 /* XXX bow out? */ 892 struct carp_vhost_entry *vhe, *nvhe; 893 894 for (vhe = LIST_FIRST(&sc->carp_vhosts); vhe != NULL; vhe = nvhe) { 895 nvhe = LIST_NEXT(vhe, vhost_entries); 896 free(vhe, M_DEVBUF, 0); 897 } 898 LIST_INIT(&sc->carp_vhosts); 899 sc->sc_vhe_count = 0; 900 } 901 902 int 903 carp_prepare_ad(struct mbuf *m, struct carp_vhost_entry *vhe, 904 struct carp_header *ch) 905 { 906 if (!vhe->vhe_replay_cookie) { 907 arc4random_buf(&vhe->vhe_replay_cookie, 908 sizeof(vhe->vhe_replay_cookie)); 909 } 910 911 bcopy(&vhe->vhe_replay_cookie, ch->carp_counter, 912 sizeof(ch->carp_counter)); 913 914 /* 915 * For the time being, do not include the IPv6 linklayer addresses 916 * in the HMAC. 917 */ 918 carp_hmac_generate(vhe, ch->carp_counter, ch->carp_md, HMAC_NOV6LL); 919 920 return (0); 921 } 922 923 void 924 carp_send_ad_all(void) 925 { 926 struct ifnet *ifp; 927 struct carp_if *cif; 928 struct carp_softc *vh; 929 930 TAILQ_FOREACH(ifp, &ifnet, if_list) { 931 if (ifp->if_carp == NULL || ifp->if_type == IFT_CARP) 932 continue; 933 934 cif = (struct carp_if *)ifp->if_carp; 935 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 936 if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) == 937 (IFF_UP|IFF_RUNNING)) { 938 carp_vhe_send_ad_all(vh); 939 } 940 } 941 } 942 } 943 944 void 945 carp_vhe_send_ad_all(struct carp_softc *sc) 946 { 947 struct carp_vhost_entry *vhe; 948 949 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) { 950 if (vhe->state == MASTER) 951 carp_send_ad(vhe); 952 } 953 } 954 955 void 956 carp_send_ad(void *v) 957 { 958 struct carp_header ch; 959 struct timeval tv; 960 struct carp_vhost_entry *vhe = v; 961 struct carp_softc *sc = vhe->parent_sc; 962 struct carp_header *ch_ptr; 963 964 struct mbuf *m; 965 int error, len, advbase, advskew, s; 966 struct ifaddr *ifa; 967 struct sockaddr sa; 968 969 if (sc->sc_carpdev == NULL) { 970 sc->sc_if.if_oerrors++; 971 return; 972 } 973 974 s = splsoftnet(); 975 976 /* bow out if we've gone to backup (the carp interface is going down) */ 977 if (sc->sc_bow_out) { 978 advbase = 255; 979 advskew = 255; 980 } else { 981 advbase = sc->sc_advbase; 982 advskew = vhe->advskew; 983 tv.tv_sec = advbase; 984 if (advbase == 0 && advskew == 0) 985 tv.tv_usec = 1 * 1000000 / 256; 986 else 987 tv.tv_usec = advskew * 1000000 / 256; 988 } 989 990 ch.carp_version = CARP_VERSION; 991 ch.carp_type = CARP_ADVERTISEMENT; 992 ch.carp_vhid = vhe->vhid; 993 ch.carp_demote = carp_group_demote_count(sc) & 0xff; 994 ch.carp_advbase = advbase; 995 ch.carp_advskew = advskew; 996 ch.carp_authlen = 7; /* XXX DEFINE */ 997 ch.carp_cksum = 0; 998 999 sc->cur_vhe = vhe; /* we need the vhe later on the output path */ 1000 1001 #ifdef INET 1002 if (sc->sc_naddrs) { 1003 struct ip *ip; 1004 1005 MGETHDR(m, M_DONTWAIT, MT_HEADER); 1006 if (m == NULL) { 1007 sc->sc_if.if_oerrors++; 1008 carpstats.carps_onomem++; 1009 /* XXX maybe less ? */ 1010 goto retry_later; 1011 } 1012 len = sizeof(*ip) + sizeof(ch); 1013 m->m_pkthdr.len = len; 1014 m->m_pkthdr.rcvif = NULL; 1015 m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain; 1016 m->m_pkthdr.pf.prio = CARP_IFQ_PRIO; 1017 m->m_len = len; 1018 MH_ALIGN(m, m->m_len); 1019 ip = mtod(m, struct ip *); 1020 ip->ip_v = IPVERSION; 1021 ip->ip_hl = sizeof(*ip) >> 2; 1022 ip->ip_tos = IPTOS_LOWDELAY; 1023 ip->ip_len = htons(len); 1024 ip->ip_id = htons(ip_randomid()); 1025 ip->ip_off = htons(IP_DF); 1026 ip->ip_ttl = CARP_DFLTTL; 1027 ip->ip_p = IPPROTO_CARP; 1028 ip->ip_sum = 0; 1029 1030 memset(&sa, 0, sizeof(sa)); 1031 sa.sa_family = AF_INET; 1032 ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev); 1033 if (ifa == NULL) 1034 ip->ip_src.s_addr = 0; 1035 else 1036 ip->ip_src.s_addr = 1037 ifatoia(ifa)->ia_addr.sin_addr.s_addr; 1038 ip->ip_dst.s_addr = sc->sc_peer.s_addr; 1039 if (IN_MULTICAST(ip->ip_dst.s_addr)) 1040 m->m_flags |= M_MCAST; 1041 1042 ch_ptr = (struct carp_header *)(ip + 1); 1043 bcopy(&ch, ch_ptr, sizeof(ch)); 1044 if (carp_prepare_ad(m, vhe, ch_ptr)) 1045 goto retry_later; 1046 1047 m->m_data += sizeof(*ip); 1048 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip)); 1049 m->m_data -= sizeof(*ip); 1050 1051 getmicrotime(&sc->sc_if.if_lastchange); 1052 sc->sc_if.if_opackets++; 1053 sc->sc_if.if_obytes += len; 1054 carpstats.carps_opackets++; 1055 1056 error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, 1057 NULL, 0); 1058 if (error) { 1059 if (error == ENOBUFS) 1060 carpstats.carps_onomem++; 1061 else 1062 CARP_LOG(LOG_WARNING, sc, 1063 ("ip_output failed: %d", error)); 1064 sc->sc_if.if_oerrors++; 1065 if (sc->sc_sendad_errors < INT_MAX) 1066 sc->sc_sendad_errors++; 1067 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS(sc)) 1068 carp_group_demote_adj(&sc->sc_if, 1, 1069 "> snderrors"); 1070 sc->sc_sendad_success = 0; 1071 } else { 1072 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS(sc)) { 1073 if (++sc->sc_sendad_success >= 1074 CARP_SENDAD_MIN_SUCCESS(sc)) { 1075 carp_group_demote_adj(&sc->sc_if, -1, 1076 "< snderrors"); 1077 sc->sc_sendad_errors = 0; 1078 } 1079 } else 1080 sc->sc_sendad_errors = 0; 1081 } 1082 if (vhe->vhe_leader) { 1083 if (sc->sc_delayed_arp > 0) 1084 sc->sc_delayed_arp--; 1085 if (sc->sc_delayed_arp == 0) { 1086 carp_send_arp(sc); 1087 sc->sc_delayed_arp = -1; 1088 } 1089 } 1090 } 1091 #endif /* INET */ 1092 #ifdef INET6 1093 if (sc->sc_naddrs6) { 1094 struct ip6_hdr *ip6; 1095 1096 MGETHDR(m, M_DONTWAIT, MT_HEADER); 1097 if (m == NULL) { 1098 sc->sc_if.if_oerrors++; 1099 carpstats.carps_onomem++; 1100 /* XXX maybe less ? */ 1101 goto retry_later; 1102 } 1103 len = sizeof(*ip6) + sizeof(ch); 1104 m->m_pkthdr.len = len; 1105 m->m_pkthdr.rcvif = NULL; 1106 m->m_pkthdr.pf.prio = CARP_IFQ_PRIO; 1107 m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain; 1108 m->m_len = len; 1109 MH_ALIGN(m, m->m_len); 1110 m->m_flags |= M_MCAST; 1111 ip6 = mtod(m, struct ip6_hdr *); 1112 memset(ip6, 0, sizeof(*ip6)); 1113 ip6->ip6_vfc |= IPV6_VERSION; 1114 ip6->ip6_hlim = CARP_DFLTTL; 1115 ip6->ip6_nxt = IPPROTO_CARP; 1116 1117 /* set the source address */ 1118 memset(&sa, 0, sizeof(sa)); 1119 sa.sa_family = AF_INET6; 1120 ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev); 1121 if (ifa == NULL) /* This should never happen with IPv6 */ 1122 memset(&ip6->ip6_src, 0, sizeof(struct in6_addr)); 1123 else 1124 bcopy(ifatoia6(ifa)->ia_addr.sin6_addr.s6_addr, 1125 &ip6->ip6_src, sizeof(struct in6_addr)); 1126 /* set the multicast destination */ 1127 1128 ip6->ip6_dst.s6_addr16[0] = htons(0xff02); 1129 ip6->ip6_dst.s6_addr16[1] = htons(sc->sc_carpdev->if_index); 1130 ip6->ip6_dst.s6_addr8[15] = 0x12; 1131 1132 ch_ptr = (struct carp_header *)(ip6 + 1); 1133 bcopy(&ch, ch_ptr, sizeof(ch)); 1134 if (carp_prepare_ad(m, vhe, ch_ptr)) 1135 goto retry_later; 1136 1137 m->m_data += sizeof(*ip6); 1138 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip6)); 1139 m->m_data -= sizeof(*ip6); 1140 1141 getmicrotime(&sc->sc_if.if_lastchange); 1142 sc->sc_if.if_opackets++; 1143 sc->sc_if.if_obytes += len; 1144 carpstats.carps_opackets6++; 1145 1146 error = ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL); 1147 if (error) { 1148 if (error == ENOBUFS) 1149 carpstats.carps_onomem++; 1150 else 1151 CARP_LOG(LOG_WARNING, sc, 1152 ("ip6_output failed: %d", error)); 1153 sc->sc_if.if_oerrors++; 1154 if (sc->sc_sendad_errors < INT_MAX) 1155 sc->sc_sendad_errors++; 1156 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS(sc)) 1157 carp_group_demote_adj(&sc->sc_if, 1, 1158 "> snd6errors"); 1159 sc->sc_sendad_success = 0; 1160 } else { 1161 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS(sc)) { 1162 if (++sc->sc_sendad_success >= 1163 CARP_SENDAD_MIN_SUCCESS(sc)) { 1164 carp_group_demote_adj(&sc->sc_if, -1, 1165 "< snd6errors"); 1166 sc->sc_sendad_errors = 0; 1167 } 1168 } else 1169 sc->sc_sendad_errors = 0; 1170 } 1171 } 1172 #endif /* INET6 */ 1173 1174 retry_later: 1175 sc->cur_vhe = NULL; 1176 splx(s); 1177 if (advbase != 255 || advskew != 255) 1178 timeout_add(&vhe->ad_tmo, tvtohz(&tv)); 1179 } 1180 1181 /* 1182 * Broadcast a gratuitous ARP request containing 1183 * the virtual router MAC address for each IP address 1184 * associated with the virtual router. 1185 */ 1186 void 1187 carp_send_arp(struct carp_softc *sc) 1188 { 1189 struct ifaddr *ifa; 1190 in_addr_t in; 1191 int s = splsoftnet(); 1192 1193 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 1194 1195 if (ifa->ifa_addr->sa_family != AF_INET) 1196 continue; 1197 1198 in = ifatoia(ifa)->ia_addr.sin_addr.s_addr; 1199 arprequest(&sc->sc_if, &in, &in, sc->sc_ac.ac_enaddr); 1200 DELAY(1000); /* XXX */ 1201 } 1202 splx(s); 1203 } 1204 1205 #ifdef INET6 1206 void 1207 carp_send_na(struct carp_softc *sc) 1208 { 1209 struct ifaddr *ifa; 1210 struct in6_addr *in6; 1211 static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT; 1212 int s = splsoftnet(); 1213 1214 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 1215 1216 if (ifa->ifa_addr->sa_family != AF_INET6) 1217 continue; 1218 1219 in6 = &ifatoia6(ifa)->ia_addr.sin6_addr; 1220 nd6_na_output(&sc->sc_if, &mcast, in6, 1221 ND_NA_FLAG_OVERRIDE | 1222 (ip6_forwarding ? ND_NA_FLAG_ROUTER : 0), 1, NULL); 1223 DELAY(1000); /* XXX */ 1224 } 1225 splx(s); 1226 } 1227 #endif /* INET6 */ 1228 1229 /* 1230 * Based on bridge_hash() in if_bridge.c 1231 */ 1232 #define mix(a,b,c) \ 1233 do { \ 1234 a -= b; a -= c; a ^= (c >> 13); \ 1235 b -= c; b -= a; b ^= (a << 8); \ 1236 c -= a; c -= b; c ^= (b >> 13); \ 1237 a -= b; a -= c; a ^= (c >> 12); \ 1238 b -= c; b -= a; b ^= (a << 16); \ 1239 c -= a; c -= b; c ^= (b >> 5); \ 1240 a -= b; a -= c; a ^= (c >> 3); \ 1241 b -= c; b -= a; b ^= (a << 10); \ 1242 c -= a; c -= b; c ^= (b >> 15); \ 1243 } while (0) 1244 1245 u_int32_t 1246 carp_hash(struct carp_softc *sc, u_char *src) 1247 { 1248 u_int32_t a = 0x9e3779b9, b = sc->sc_hashkey[0], c = sc->sc_hashkey[1]; 1249 1250 c += sc->sc_key[3] << 24; 1251 c += sc->sc_key[2] << 16; 1252 c += sc->sc_key[1] << 8; 1253 c += sc->sc_key[0]; 1254 b += src[5] << 8; 1255 b += src[4]; 1256 a += src[3] << 24; 1257 a += src[2] << 16; 1258 a += src[1] << 8; 1259 a += src[0]; 1260 1261 mix(a, b, c); 1262 return (c); 1263 } 1264 1265 void 1266 carp_update_lsmask(struct carp_softc *sc) 1267 { 1268 struct carp_vhost_entry *vhe; 1269 int count; 1270 1271 if (!sc->sc_balancing) 1272 return; 1273 1274 sc->sc_lsmask = 0; 1275 count = 0; 1276 1277 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) { 1278 if (vhe->state == MASTER && count < sizeof(sc->sc_lsmask) * 8) 1279 sc->sc_lsmask |= 1 << count; 1280 count++; 1281 } 1282 sc->sc_lscount = count; 1283 CARP_LOG(LOG_DEBUG, sc, ("carp_update_lsmask: %x", sc->sc_lsmask)); 1284 } 1285 1286 int 1287 carp_iamatch(struct in_ifaddr *ia, u_char *src, u_int8_t **sha, 1288 u_int8_t **ether_shost) 1289 { 1290 struct carp_softc *sc = ia->ia_ifp->if_softc; 1291 struct carp_vhost_entry *vhe = LIST_FIRST(&sc->carp_vhosts); 1292 1293 if (sc->sc_balancing == CARP_BAL_ARP) { 1294 int lshash; 1295 /* 1296 * We use the source MAC address to decide which virtual host 1297 * should handle the request. If we're master of that virtual 1298 * host, then we respond, otherwise, just drop the arp packet 1299 * on the floor. 1300 */ 1301 1302 if (sc->sc_lscount == 0) /* just to be safe */ 1303 return (0); 1304 lshash = carp_hash(sc, src) % sc->sc_lscount; 1305 if ((1 << lshash) & sc->sc_lsmask) { 1306 int i = 0; 1307 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) { 1308 if (i++ == lshash) 1309 break; 1310 } 1311 if (vhe == NULL) 1312 return (0); 1313 *sha = vhe->vhe_enaddr; 1314 return (1); 1315 } 1316 } else if (sc->sc_balancing == CARP_BAL_IPSTEALTH || 1317 sc->sc_balancing == CARP_BAL_IP) { 1318 if (vhe->state == MASTER) { 1319 *ether_shost = ((struct arpcom *)sc->sc_carpdev)-> 1320 ac_enaddr; 1321 return (1); 1322 } 1323 } else { 1324 if (vhe->state == MASTER) 1325 return (1); 1326 } 1327 1328 return (0); 1329 } 1330 1331 #ifdef INET6 1332 int 1333 carp_iamatch6(struct ifnet *ifp, u_char *src, struct sockaddr_dl **sdl) 1334 { 1335 struct carp_softc *sc = ifp->if_softc; 1336 struct carp_vhost_entry *vhe = LIST_FIRST(&sc->carp_vhosts); 1337 1338 if (sc->sc_balancing == CARP_BAL_ARP) { 1339 int lshash; 1340 /* 1341 * We use the source MAC address to decide which virtual host 1342 * should handle the request. If we're master of that virtual 1343 * host, then we respond, otherwise, just drop the ndp packet 1344 * on the floor. 1345 */ 1346 1347 /* can happen if optional src lladdr is not provided */ 1348 if (src == NULL) 1349 return (0); 1350 if (sc->sc_lscount == 0) /* just to be safe */ 1351 return (0); 1352 lshash = carp_hash(sc, src) % sc->sc_lscount; 1353 if ((1 << lshash) & sc->sc_lsmask) { 1354 int i = 0; 1355 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) { 1356 if (i++ == lshash) 1357 break; 1358 } 1359 if (vhe == NULL) 1360 return (0); 1361 *sdl = &vhe->vhe_sdl; 1362 return (1); 1363 } 1364 } else { 1365 if (vhe->state == MASTER) 1366 return (1); 1367 } 1368 1369 return (0); 1370 } 1371 #endif /* INET6 */ 1372 1373 struct ifnet * 1374 carp_ourether(void *v, u_int8_t *ena) 1375 { 1376 struct carp_if *cif = (struct carp_if *)v; 1377 struct carp_softc *vh; 1378 1379 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1380 struct carp_vhost_entry *vhe; 1381 if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) != 1382 (IFF_UP|IFF_RUNNING)) 1383 continue; 1384 if (vh->sc_balancing == CARP_BAL_ARP) { 1385 LIST_FOREACH(vhe, &vh->carp_vhosts, vhost_entries) 1386 if (vhe->state == MASTER && 1387 !memcmp(ena, vhe->vhe_enaddr, 1388 ETHER_ADDR_LEN)) 1389 return (&vh->sc_if); 1390 } else { 1391 vhe = LIST_FIRST(&vh->carp_vhosts); 1392 if ((vhe->state == MASTER || 1393 vh->sc_balancing >= CARP_BAL_IP) && 1394 !memcmp(ena, vh->sc_ac.ac_enaddr, ETHER_ADDR_LEN)) 1395 return (&vh->sc_if); 1396 } 1397 } 1398 return (NULL); 1399 } 1400 1401 u_char * 1402 carp_get_srclladdr(struct ifnet *ifp, u_char *esrc) 1403 { 1404 struct carp_softc *sc = ifp->if_softc; 1405 1406 if (sc->sc_balancing != CARP_BAL_IPSTEALTH && 1407 sc->sc_balancing != CARP_BAL_IP && sc->cur_vhe) { 1408 if (sc->cur_vhe->vhe_leader) 1409 return (sc->sc_ac.ac_enaddr); 1410 else 1411 return (sc->cur_vhe->vhe_enaddr); 1412 } 1413 return (esrc); 1414 } 1415 1416 int 1417 carp_our_mcastaddr(struct ifnet *ifp, u_int8_t *d_enaddr) 1418 { 1419 struct carp_softc *sc = ifp->if_softc; 1420 1421 if (sc->sc_balancing != CARP_BAL_IP) 1422 return (0); 1423 1424 return (!memcmp(sc->sc_ac.ac_enaddr, d_enaddr, ETHER_ADDR_LEN)); 1425 } 1426 1427 1428 int 1429 carp_input(struct ifnet *ifp0, struct ether_header *eh0, struct mbuf *m) 1430 { 1431 struct ether_header eh; 1432 struct carp_if *cif = (struct carp_if *)ifp0->if_carp; 1433 struct ifnet *ifp; 1434 1435 memcpy(&eh, eh0, sizeof(eh)); 1436 1437 if ((ifp = carp_ourether(cif, eh0->ether_dhost))) 1438 ; 1439 else if (m->m_flags & (M_BCAST|M_MCAST)) { 1440 struct carp_softc *vh; 1441 struct mbuf *m0; 1442 1443 /* 1444 * XXX Should really check the list of multicast addresses 1445 * for each CARP interface _before_ copying. 1446 */ 1447 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1448 if (!(vh->sc_if.if_flags & IFF_UP)) 1449 continue; 1450 m0 = m_copym2(m, 0, M_COPYALL, M_DONTWAIT); 1451 if (m0 == NULL) 1452 continue; 1453 m0->m_pkthdr.rcvif = &vh->sc_if; 1454 #if NBPFILTER > 0 1455 if (vh->sc_if.if_bpf) 1456 bpf_mtap_hdr(vh->sc_if.if_bpf, (char *)&eh, 1457 ETHER_HDR_LEN, m0, BPF_DIRECTION_IN, NULL); 1458 #endif 1459 vh->sc_if.if_ipackets++; 1460 ether_input(&vh->sc_if, &eh, m0); 1461 } 1462 return (1); 1463 } 1464 1465 if (ifp == NULL) 1466 return (1); 1467 1468 m->m_pkthdr.rcvif = ifp; 1469 1470 #if NBPFILTER > 0 1471 if (ifp->if_bpf) 1472 bpf_mtap_hdr(ifp->if_bpf, (char *)&eh, ETHER_HDR_LEN, m, 1473 BPF_DIRECTION_IN, NULL); 1474 #endif 1475 ifp->if_ipackets++; 1476 ether_input(ifp, &eh, m); 1477 1478 return (0); 1479 } 1480 1481 int 1482 carp_lsdrop(struct mbuf *m, sa_family_t af, u_int32_t *src, u_int32_t *dst) 1483 { 1484 struct carp_softc *sc = m->m_pkthdr.rcvif->if_softc; 1485 int match; 1486 u_int32_t fold; 1487 1488 if (sc->sc_balancing < CARP_BAL_IP) 1489 return (0); 1490 /* 1491 * Never drop carp advertisements. 1492 * XXX Bad idea to pass all broadcast / multicast traffic? 1493 */ 1494 if (m->m_flags & (M_BCAST|M_MCAST)) 1495 return (0); 1496 1497 fold = src[0] ^ dst[0]; 1498 #ifdef INET6 1499 if (af == AF_INET6) { 1500 int i; 1501 for (i = 1; i < 4; i++) 1502 fold ^= src[i] ^ dst[i]; 1503 } 1504 #endif 1505 if (sc->sc_lscount == 0) /* just to be safe */ 1506 return (1); 1507 match = (1 << (ntohl(fold) % sc->sc_lscount)) & sc->sc_lsmask; 1508 1509 return (!match); 1510 } 1511 1512 void 1513 carp_master_down(void *v) 1514 { 1515 struct carp_vhost_entry *vhe = v; 1516 struct carp_softc *sc = vhe->parent_sc; 1517 1518 switch (vhe->state) { 1519 case INIT: 1520 printf("%s: master_down event in INIT state\n", 1521 sc->sc_if.if_xname); 1522 break; 1523 case MASTER: 1524 break; 1525 case BACKUP: 1526 carp_set_state(vhe, MASTER); 1527 carp_send_ad(vhe); 1528 if (sc->sc_balancing == CARP_BAL_NONE && vhe->vhe_leader) { 1529 carp_send_arp(sc); 1530 /* Schedule a delayed ARP to deal w/ some L3 switches */ 1531 sc->sc_delayed_arp = 2; 1532 #ifdef INET6 1533 carp_send_na(sc); 1534 #endif /* INET6 */ 1535 } 1536 carp_setrun(vhe, 0); 1537 carpstats.carps_preempt++; 1538 break; 1539 } 1540 } 1541 1542 void 1543 carp_setrun_all(struct carp_softc *sc, sa_family_t af) 1544 { 1545 struct carp_vhost_entry *vhe; 1546 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) { 1547 carp_setrun(vhe, af); 1548 } 1549 } 1550 1551 /* 1552 * When in backup state, af indicates whether to reset the master down timer 1553 * for v4 or v6. If it's set to zero, reset the ones which are already pending. 1554 */ 1555 void 1556 carp_setrun(struct carp_vhost_entry *vhe, sa_family_t af) 1557 { 1558 struct timeval tv; 1559 struct carp_softc *sc = vhe->parent_sc; 1560 1561 if (sc->sc_carpdev == NULL) { 1562 sc->sc_if.if_flags &= ~IFF_RUNNING; 1563 carp_set_state_all(sc, INIT); 1564 return; 1565 } 1566 1567 if (memcmp(((struct arpcom *)sc->sc_carpdev)->ac_enaddr, 1568 sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN) == 0) 1569 sc->sc_realmac = 1; 1570 else 1571 sc->sc_realmac = 0; 1572 1573 if (sc->sc_if.if_flags & IFF_UP && vhe->vhid > 0 && 1574 (sc->sc_naddrs || sc->sc_naddrs6) && !sc->sc_suppress) { 1575 sc->sc_if.if_flags |= IFF_RUNNING; 1576 } else { 1577 sc->sc_if.if_flags &= ~IFF_RUNNING; 1578 return; 1579 } 1580 1581 switch (vhe->state) { 1582 case INIT: 1583 carp_set_state(vhe, BACKUP); 1584 carp_setrun(vhe, 0); 1585 break; 1586 case BACKUP: 1587 timeout_del(&vhe->ad_tmo); 1588 tv.tv_sec = 3 * sc->sc_advbase; 1589 if (sc->sc_advbase == 0 && vhe->advskew == 0) 1590 tv.tv_usec = 3 * 1000000 / 256; 1591 else if (sc->sc_advbase == 0) 1592 tv.tv_usec = 3 * vhe->advskew * 1000000 / 256; 1593 else 1594 tv.tv_usec = vhe->advskew * 1000000 / 256; 1595 if (vhe->vhe_leader) 1596 sc->sc_delayed_arp = -1; 1597 switch (af) { 1598 #ifdef INET 1599 case AF_INET: 1600 timeout_add(&vhe->md_tmo, tvtohz(&tv)); 1601 break; 1602 #endif /* INET */ 1603 #ifdef INET6 1604 case AF_INET6: 1605 timeout_add(&vhe->md6_tmo, tvtohz(&tv)); 1606 break; 1607 #endif /* INET6 */ 1608 default: 1609 if (sc->sc_naddrs) 1610 timeout_add(&vhe->md_tmo, tvtohz(&tv)); 1611 if (sc->sc_naddrs6) 1612 timeout_add(&vhe->md6_tmo, tvtohz(&tv)); 1613 break; 1614 } 1615 break; 1616 case MASTER: 1617 tv.tv_sec = sc->sc_advbase; 1618 if (sc->sc_advbase == 0 && vhe->advskew == 0) 1619 tv.tv_usec = 1 * 1000000 / 256; 1620 else 1621 tv.tv_usec = vhe->advskew * 1000000 / 256; 1622 timeout_add(&vhe->ad_tmo, tvtohz(&tv)); 1623 break; 1624 } 1625 } 1626 1627 void 1628 carp_multicast_cleanup(struct carp_softc *sc) 1629 { 1630 struct ip_moptions *imo = &sc->sc_imo; 1631 #ifdef INET6 1632 struct ip6_moptions *im6o = &sc->sc_im6o; 1633 #endif 1634 u_int16_t n = imo->imo_num_memberships; 1635 1636 /* Clean up our own multicast memberships */ 1637 while (n-- > 0) { 1638 if (imo->imo_membership[n] != NULL) { 1639 in_delmulti(imo->imo_membership[n]); 1640 imo->imo_membership[n] = NULL; 1641 } 1642 } 1643 imo->imo_num_memberships = 0; 1644 imo->imo_multicast_ifp = NULL; 1645 1646 #ifdef INET6 1647 while (!LIST_EMPTY(&im6o->im6o_memberships)) { 1648 struct in6_multi_mship *imm = 1649 LIST_FIRST(&im6o->im6o_memberships); 1650 1651 LIST_REMOVE(imm, i6mm_chain); 1652 in6_leavegroup(imm); 1653 } 1654 im6o->im6o_multicast_ifp = NULL; 1655 #endif 1656 1657 /* And any other multicast memberships */ 1658 carp_ether_purgemulti(sc); 1659 } 1660 1661 int 1662 carp_set_ifp(struct carp_softc *sc, struct ifnet *ifp) 1663 { 1664 struct carp_if *cif, *ncif = NULL; 1665 struct carp_softc *vr, *after = NULL; 1666 int myself = 0, error = 0; 1667 int s; 1668 1669 if (ifp == sc->sc_carpdev) 1670 return (0); 1671 1672 if (ifp != NULL) { 1673 if ((ifp->if_flags & IFF_MULTICAST) == 0) 1674 return (EADDRNOTAVAIL); 1675 1676 if (ifp->if_type == IFT_CARP) 1677 return (EINVAL); 1678 1679 if (ifp->if_carp == NULL) { 1680 ncif = malloc(sizeof(*cif), M_IFADDR, M_NOWAIT|M_ZERO); 1681 if (ncif == NULL) 1682 return (ENOBUFS); 1683 if ((error = ifpromisc(ifp, 1))) { 1684 free(ncif, M_IFADDR, 0); 1685 return (error); 1686 } 1687 1688 ncif->vhif_ifp = ifp; 1689 TAILQ_INIT(&ncif->vhif_vrs); 1690 } else { 1691 cif = (struct carp_if *)ifp->if_carp; 1692 if (carp_check_dup_vhids(sc, cif, NULL)) 1693 return (EINVAL); 1694 } 1695 1696 /* detach from old interface */ 1697 if (sc->sc_carpdev != NULL) 1698 carpdetach(sc); 1699 1700 /* attach carp interface to physical interface */ 1701 if (ncif != NULL) 1702 ifp->if_carp = (caddr_t)ncif; 1703 sc->sc_carpdev = ifp; 1704 sc->sc_if.if_capabilities = ifp->if_capabilities & 1705 IFCAP_CSUM_MASK; 1706 cif = (struct carp_if *)ifp->if_carp; 1707 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 1708 if (vr == sc) 1709 myself = 1; 1710 if (LIST_FIRST(&vr->carp_vhosts)->vhid < 1711 LIST_FIRST(&sc->carp_vhosts)->vhid) 1712 after = vr; 1713 } 1714 1715 if (!myself) { 1716 /* We're trying to keep things in order */ 1717 if (after == NULL) { 1718 TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list); 1719 } else { 1720 TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, 1721 sc, sc_list); 1722 } 1723 cif->vhif_nvrs++; 1724 } 1725 if (sc->sc_naddrs || sc->sc_naddrs6) 1726 sc->sc_if.if_flags |= IFF_UP; 1727 carp_set_enaddr(sc); 1728 s = splnet(); 1729 sc->lh_cookie = hook_establish(ifp->if_linkstatehooks, 1, 1730 carp_carpdev_state, ifp); 1731 carp_carpdev_state(ifp); 1732 splx(s); 1733 } else { 1734 carpdetach(sc); 1735 sc->sc_if.if_flags &= ~(IFF_UP|IFF_RUNNING); 1736 } 1737 return (0); 1738 } 1739 1740 void 1741 carp_set_vhe_enaddr(struct carp_vhost_entry *vhe) 1742 { 1743 struct carp_softc *sc = vhe->parent_sc; 1744 1745 if (vhe->vhid != 0 && sc->sc_carpdev) { 1746 if (vhe->vhe_leader && sc->sc_balancing == CARP_BAL_IP) 1747 vhe->vhe_enaddr[0] = 1; 1748 else 1749 vhe->vhe_enaddr[0] = 0; 1750 vhe->vhe_enaddr[1] = 0; 1751 vhe->vhe_enaddr[2] = 0x5e; 1752 vhe->vhe_enaddr[3] = 0; 1753 vhe->vhe_enaddr[4] = 1; 1754 vhe->vhe_enaddr[5] = vhe->vhid; 1755 1756 vhe->vhe_sdl.sdl_family = AF_LINK; 1757 vhe->vhe_sdl.sdl_alen = ETHER_ADDR_LEN; 1758 bcopy(vhe->vhe_enaddr, vhe->vhe_sdl.sdl_data, ETHER_ADDR_LEN); 1759 } else 1760 memset(vhe->vhe_enaddr, 0, ETHER_ADDR_LEN); 1761 } 1762 1763 void 1764 carp_set_enaddr(struct carp_softc *sc) 1765 { 1766 struct carp_vhost_entry *vhe; 1767 1768 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) 1769 carp_set_vhe_enaddr(vhe); 1770 1771 vhe = LIST_FIRST(&sc->carp_vhosts); 1772 1773 /* 1774 * Use the carp lladdr if the running one isn't manually set. 1775 * Only compare static parts of the lladdr. 1776 */ 1777 if ((memcmp(sc->sc_ac.ac_enaddr + 1, vhe->vhe_enaddr + 1, 1778 ETHER_ADDR_LEN - 2) == 0) || 1779 (!sc->sc_ac.ac_enaddr[0] && !sc->sc_ac.ac_enaddr[1] && 1780 !sc->sc_ac.ac_enaddr[2] && !sc->sc_ac.ac_enaddr[3] && 1781 !sc->sc_ac.ac_enaddr[4] && !sc->sc_ac.ac_enaddr[5])) 1782 bcopy(vhe->vhe_enaddr, sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN); 1783 1784 /* Make sure the enaddr has changed before further twiddling. */ 1785 if (memcmp(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN) != 0) { 1786 bcopy(sc->sc_ac.ac_enaddr, LLADDR(sc->sc_if.if_sadl), 1787 ETHER_ADDR_LEN); 1788 bcopy(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN); 1789 #ifdef INET6 1790 /* 1791 * (re)attach a link-local address which matches 1792 * our new MAC address. 1793 */ 1794 in6_ifattach_linklocal(&sc->sc_if, NULL); 1795 #endif 1796 carp_set_state_all(sc, INIT); 1797 carp_setrun_all(sc, 0); 1798 } 1799 } 1800 1801 void 1802 carp_addr_updated(void *v) 1803 { 1804 struct carp_softc *sc = (struct carp_softc *) v; 1805 struct ifaddr *ifa; 1806 int new_naddrs = 0, new_naddrs6 = 0; 1807 1808 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 1809 if (ifa->ifa_addr->sa_family == AF_INET) 1810 new_naddrs++; 1811 #ifdef INET6 1812 else if (ifa->ifa_addr->sa_family == AF_INET6 && 1813 !IN6_IS_ADDR_LINKLOCAL(&ifatoia6(ifa)->ia_addr.sin6_addr)) 1814 new_naddrs6++; 1815 #endif /* INET6 */ 1816 } 1817 1818 /* We received address changes from if_addrhooks callback */ 1819 if (new_naddrs != sc->sc_naddrs || new_naddrs6 != sc->sc_naddrs6) { 1820 struct in_addr mc_addr; 1821 struct in_multi *inm; 1822 1823 sc->sc_naddrs = new_naddrs; 1824 sc->sc_naddrs6 = new_naddrs6; 1825 1826 /* Re-establish multicast membership removed by in_control */ 1827 if (IN_MULTICAST(sc->sc_peer.s_addr)) { 1828 mc_addr.s_addr = sc->sc_peer.s_addr; 1829 IN_LOOKUP_MULTI(mc_addr, &sc->sc_if, inm); 1830 if (inm == NULL) { 1831 struct in_multi **imm = 1832 sc->sc_imo.imo_membership; 1833 u_int16_t maxmem = 1834 sc->sc_imo.imo_max_memberships; 1835 1836 memset(&sc->sc_imo, 0, sizeof(sc->sc_imo)); 1837 sc->sc_imo.imo_membership = imm; 1838 sc->sc_imo.imo_max_memberships = maxmem; 1839 1840 if (sc->sc_carpdev != NULL && sc->sc_naddrs > 0) 1841 carp_join_multicast(sc); 1842 } 1843 } 1844 1845 if (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) { 1846 sc->sc_if.if_flags &= ~IFF_UP; 1847 carp_set_state_all(sc, INIT); 1848 } else 1849 carp_hmac_prepare(sc); 1850 } 1851 1852 carp_setrun_all(sc, 0); 1853 } 1854 1855 int 1856 carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin) 1857 { 1858 struct ifnet *ifp = NULL; 1859 struct in_addr *in = &sin->sin_addr; 1860 struct ifaddr *ifa; 1861 struct in_ifaddr *ia; 1862 int error = 0; 1863 1864 /* XXX is this necessary? */ 1865 if (in->s_addr == INADDR_ANY) { 1866 if (!(sc->sc_if.if_flags & IFF_UP)) 1867 carp_set_state_all(sc, INIT); 1868 if (sc->sc_naddrs) 1869 sc->sc_if.if_flags |= IFF_UP; 1870 carp_setrun_all(sc, 0); 1871 return (0); 1872 } 1873 1874 /* we have to do this by hand to ensure we don't match on ourselves */ 1875 TAILQ_FOREACH(ifp, &ifnet, if_list) { 1876 /* and, yeah, we need a multicast-capable iface too */ 1877 if ((ifp->if_type == IFT_CARP) || 1878 (ifp->if_flags & IFF_MULTICAST) == 0 || 1879 (ifp->if_rdomain != sc->sc_if.if_rdomain)) 1880 continue; 1881 1882 TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { 1883 if (ifa->ifa_addr->sa_family != AF_INET) 1884 continue; 1885 1886 ia = ifatoia(ifa); 1887 if ((in->s_addr & ia->ia_netmask) == ia->ia_net) 1888 goto found; 1889 } 1890 } 1891 1892 found: 1893 if (ifp == NULL) 1894 ifp = sc->sc_carpdev; 1895 1896 if (sc->sc_carpdev != NULL && ifp != sc->sc_carpdev) 1897 return (EADDRNOTAVAIL); 1898 1899 if ((error = carp_set_ifp(sc, ifp))) 1900 return (error); 1901 1902 if (sc->sc_carpdev == NULL) 1903 return (EADDRNOTAVAIL); 1904 1905 if (sc->sc_naddrs == 0 && (error = carp_join_multicast(sc)) != 0) 1906 return (error); 1907 1908 if (sc->sc_carpdev != NULL) 1909 sc->sc_if.if_flags |= IFF_UP; 1910 1911 carp_set_state_all(sc, INIT); 1912 1913 return (0); 1914 } 1915 1916 int 1917 carp_join_multicast(struct carp_softc *sc) 1918 { 1919 struct ip_moptions *imo = &sc->sc_imo; 1920 struct in_multi *imm; 1921 struct in_addr addr; 1922 1923 if (!IN_MULTICAST(sc->sc_peer.s_addr)) 1924 return (0); 1925 1926 addr.s_addr = sc->sc_peer.s_addr; 1927 if ((imm = in_addmulti(&addr, &sc->sc_if)) == NULL) 1928 return (ENOBUFS); 1929 1930 imo->imo_membership[0] = imm; 1931 imo->imo_num_memberships = 1; 1932 imo->imo_multicast_ifp = &sc->sc_if; 1933 imo->imo_multicast_ttl = CARP_DFLTTL; 1934 imo->imo_multicast_loop = 0; 1935 return (0); 1936 } 1937 1938 1939 #ifdef INET6 1940 int 1941 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) 1942 { 1943 struct ifnet *ifp = sc->sc_carpdev; 1944 struct ifaddr *ifa; 1945 struct in6_ifaddr *ia6; 1946 int i, error = 0; 1947 1948 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 1949 if (!(sc->sc_if.if_flags & IFF_UP)) 1950 carp_set_state_all(sc, INIT); 1951 if (sc->sc_naddrs6) 1952 sc->sc_if.if_flags |= IFF_UP; 1953 carp_setrun_all(sc, 0); 1954 return (0); 1955 } 1956 1957 /* we have to do this by hand to ensure we don't match on ourselves */ 1958 TAILQ_FOREACH(ifp, &ifnet, if_list) { 1959 /* and, yeah, we need a multicast-capable iface too */ 1960 if ((ifp->if_type == IFT_CARP) || 1961 (ifp->if_flags & IFF_MULTICAST) == 0 || 1962 (ifp->if_rdomain != sc->sc_if.if_rdomain)) 1963 continue; 1964 1965 TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { 1966 if (ifa->ifa_addr->sa_family != AF_INET6) 1967 continue; 1968 1969 ia6 = ifatoia6(ifa); 1970 for (i = 0; i < 4; i++) { 1971 if ((sin6->sin6_addr.s6_addr32[i] & 1972 ia6->ia_prefixmask.sin6_addr.s6_addr32[i]) != 1973 (ia6->ia_addr.sin6_addr.s6_addr32[i] & 1974 ia6->ia_prefixmask.sin6_addr.s6_addr32[i])) 1975 break; 1976 } 1977 1978 if (i == 4) 1979 goto found; 1980 } 1981 } 1982 1983 found: 1984 if (ifp == NULL) 1985 ifp = sc->sc_carpdev; 1986 1987 if (sc->sc_carpdev != NULL && ifp != sc->sc_carpdev) 1988 return (EADDRNOTAVAIL); 1989 1990 if ((error = carp_set_ifp(sc, ifp))) 1991 return (error); 1992 1993 if (sc->sc_carpdev == NULL) 1994 return (EADDRNOTAVAIL); 1995 1996 if (sc->sc_naddrs6 == 0 && (error = carp_join_multicast6(sc)) != 0) 1997 return (error); 1998 1999 if (sc->sc_carpdev != NULL) 2000 sc->sc_if.if_flags |= IFF_UP; 2001 2002 carp_set_state_all(sc, INIT); 2003 2004 return (0); 2005 } 2006 2007 int 2008 carp_join_multicast6(struct carp_softc *sc) 2009 { 2010 struct in6_multi_mship *imm, *imm2; 2011 struct ip6_moptions *im6o = &sc->sc_im6o; 2012 struct sockaddr_in6 addr6; 2013 int error; 2014 2015 /* Join IPv6 CARP multicast group */ 2016 memset(&addr6, 0, sizeof(addr6)); 2017 addr6.sin6_family = AF_INET6; 2018 addr6.sin6_len = sizeof(addr6); 2019 addr6.sin6_addr.s6_addr16[0] = htons(0xff02); 2020 addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index); 2021 addr6.sin6_addr.s6_addr8[15] = 0x12; 2022 if ((imm = in6_joingroup(&sc->sc_if, 2023 &addr6.sin6_addr, &error)) == NULL) { 2024 return (error); 2025 } 2026 /* join solicited multicast address */ 2027 memset(&addr6.sin6_addr, 0, sizeof(addr6.sin6_addr)); 2028 addr6.sin6_addr.s6_addr16[0] = htons(0xff02); 2029 addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index); 2030 addr6.sin6_addr.s6_addr32[1] = 0; 2031 addr6.sin6_addr.s6_addr32[2] = htonl(1); 2032 addr6.sin6_addr.s6_addr32[3] = 0; 2033 addr6.sin6_addr.s6_addr8[12] = 0xff; 2034 if ((imm2 = in6_joingroup(&sc->sc_if, 2035 &addr6.sin6_addr, &error)) == NULL) { 2036 in6_leavegroup(imm); 2037 return (error); 2038 } 2039 2040 /* apply v6 multicast membership */ 2041 im6o->im6o_multicast_ifp = &sc->sc_if; 2042 if (imm) 2043 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, 2044 i6mm_chain); 2045 if (imm2) 2046 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm2, 2047 i6mm_chain); 2048 2049 return (0); 2050 } 2051 2052 #endif /* INET6 */ 2053 2054 int 2055 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr) 2056 { 2057 struct proc *p = curproc; /* XXX */ 2058 struct carp_softc *sc = ifp->if_softc; 2059 struct carp_vhost_entry *vhe; 2060 struct carpreq carpr; 2061 struct ifaddr *ifa = (struct ifaddr *)addr; 2062 struct ifreq *ifr = (struct ifreq *)addr; 2063 struct ifnet *cdev = NULL; 2064 int i, error = 0; 2065 2066 switch (cmd) { 2067 case SIOCSIFADDR: 2068 switch (ifa->ifa_addr->sa_family) { 2069 #ifdef INET 2070 case AF_INET: 2071 sc->sc_if.if_flags |= IFF_UP; 2072 /* 2073 * emulate arp_ifinit() without doing a gratuitous arp 2074 * request so that the routes are setup correctly. 2075 */ 2076 ifa->ifa_rtrequest = arp_rtrequest; 2077 2078 error = carp_set_addr(sc, satosin(ifa->ifa_addr)); 2079 break; 2080 #endif /* INET */ 2081 #ifdef INET6 2082 case AF_INET6: 2083 sc->sc_if.if_flags |= IFF_UP; 2084 error = carp_set_addr6(sc, satosin6(ifa->ifa_addr)); 2085 break; 2086 #endif /* INET6 */ 2087 default: 2088 error = EAFNOSUPPORT; 2089 break; 2090 } 2091 break; 2092 2093 case SIOCSIFFLAGS: 2094 vhe = LIST_FIRST(&sc->carp_vhosts); 2095 if (vhe->state != INIT && !(ifr->ifr_flags & IFF_UP)) { 2096 carp_del_all_timeouts(sc); 2097 2098 /* we need the interface up to bow out */ 2099 sc->sc_if.if_flags |= IFF_UP; 2100 sc->sc_bow_out = 1; 2101 carp_vhe_send_ad_all(sc); 2102 sc->sc_bow_out = 0; 2103 2104 sc->sc_if.if_flags &= ~IFF_UP; 2105 carp_set_state_all(sc, INIT); 2106 carp_setrun_all(sc, 0); 2107 } else if (vhe->state == INIT && (ifr->ifr_flags & IFF_UP)) { 2108 sc->sc_if.if_flags |= IFF_UP; 2109 carp_setrun_all(sc, 0); 2110 } 2111 break; 2112 2113 case SIOCSVH: 2114 vhe = LIST_FIRST(&sc->carp_vhosts); 2115 if ((error = suser(p, 0)) != 0) 2116 break; 2117 if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr))) 2118 break; 2119 error = 1; 2120 if (carpr.carpr_carpdev[0] != '\0' && 2121 (cdev = ifunit(carpr.carpr_carpdev)) == NULL) 2122 return (EINVAL); 2123 if (carpr.carpr_peer.s_addr == 0) 2124 sc->sc_peer.s_addr = INADDR_CARP_GROUP; 2125 else 2126 sc->sc_peer.s_addr = carpr.carpr_peer.s_addr; 2127 if ((error = carp_set_ifp(sc, cdev))) 2128 return (error); 2129 if (vhe->state != INIT && carpr.carpr_state != vhe->state) { 2130 switch (carpr.carpr_state) { 2131 case BACKUP: 2132 timeout_del(&vhe->ad_tmo); 2133 carp_set_state_all(sc, BACKUP); 2134 carp_setrun_all(sc, 0); 2135 break; 2136 case MASTER: 2137 LIST_FOREACH(vhe, &sc->carp_vhosts, 2138 vhost_entries) 2139 carp_master_down(vhe); 2140 break; 2141 default: 2142 break; 2143 } 2144 } 2145 if ((error = carp_vhids_ioctl(sc, &carpr))) 2146 return (error); 2147 if (carpr.carpr_advbase >= 0) { 2148 if (carpr.carpr_advbase > 255) { 2149 error = EINVAL; 2150 break; 2151 } 2152 sc->sc_advbase = carpr.carpr_advbase; 2153 error--; 2154 } 2155 if (memcmp(sc->sc_advskews, carpr.carpr_advskews, 2156 sizeof(sc->sc_advskews))) { 2157 i = 0; 2158 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) 2159 vhe->advskew = carpr.carpr_advskews[i++]; 2160 bcopy(carpr.carpr_advskews, sc->sc_advskews, 2161 sizeof(sc->sc_advskews)); 2162 } 2163 if (sc->sc_balancing != carpr.carpr_balancing) { 2164 if (carpr.carpr_balancing > CARP_BAL_MAXID) { 2165 error = EINVAL; 2166 break; 2167 } 2168 sc->sc_balancing = carpr.carpr_balancing; 2169 carp_set_enaddr(sc); 2170 carp_update_lsmask(sc); 2171 } 2172 bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key)); 2173 if (error > 0) 2174 error = EINVAL; 2175 else { 2176 error = 0; 2177 carp_hmac_prepare(sc); 2178 carp_setrun_all(sc, 0); 2179 } 2180 break; 2181 2182 case SIOCGVH: 2183 memset(&carpr, 0, sizeof(carpr)); 2184 if (sc->sc_carpdev != NULL) 2185 strlcpy(carpr.carpr_carpdev, sc->sc_carpdev->if_xname, 2186 IFNAMSIZ); 2187 i = 0; 2188 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) { 2189 carpr.carpr_vhids[i] = vhe->vhid; 2190 carpr.carpr_advskews[i] = vhe->advskew; 2191 carpr.carpr_states[i] = vhe->state; 2192 i++; 2193 } 2194 carpr.carpr_advbase = sc->sc_advbase; 2195 carpr.carpr_balancing = sc->sc_balancing; 2196 if (suser(p, 0) == 0) 2197 bcopy(sc->sc_key, carpr.carpr_key, 2198 sizeof(carpr.carpr_key)); 2199 carpr.carpr_peer.s_addr = sc->sc_peer.s_addr; 2200 error = copyout(&carpr, ifr->ifr_data, sizeof(carpr)); 2201 break; 2202 2203 case SIOCADDMULTI: 2204 error = carp_ether_addmulti(sc, ifr); 2205 break; 2206 2207 case SIOCDELMULTI: 2208 error = carp_ether_delmulti(sc, ifr); 2209 break; 2210 case SIOCAIFGROUP: 2211 case SIOCDIFGROUP: 2212 if (sc->sc_demote_cnt) 2213 carp_ifgroup_ioctl(ifp, cmd, addr); 2214 break; 2215 case SIOCSIFGATTR: 2216 carp_ifgattr_ioctl(ifp, cmd, addr); 2217 break; 2218 default: 2219 error = ENOTTY; 2220 } 2221 2222 if (memcmp(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN) != 0) 2223 carp_set_enaddr(sc); 2224 return (error); 2225 } 2226 2227 int 2228 carp_check_dup_vhids(struct carp_softc *sc, struct carp_if *cif, 2229 struct carpreq *carpr) 2230 { 2231 struct carp_softc *vr; 2232 struct carp_vhost_entry *vhe, *vhe0; 2233 int i; 2234 2235 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 2236 if (vr == sc) 2237 continue; 2238 LIST_FOREACH(vhe, &vr->carp_vhosts, vhost_entries) { 2239 if (carpr) { 2240 for (i = 0; carpr->carpr_vhids[i]; i++) { 2241 if (vhe->vhid == carpr->carpr_vhids[i]) 2242 return (EINVAL); 2243 } 2244 } 2245 LIST_FOREACH(vhe0, &sc->carp_vhosts, vhost_entries) { 2246 if (vhe->vhid == vhe0->vhid) 2247 return (EINVAL); 2248 } 2249 } 2250 } 2251 return (0); 2252 } 2253 2254 int 2255 carp_vhids_ioctl(struct carp_softc *sc, struct carpreq *carpr) 2256 { 2257 int i, j; 2258 u_int8_t taken_vhids[256]; 2259 2260 if (carpr->carpr_vhids[0] == 0 || 2261 !memcmp(sc->sc_vhids, carpr->carpr_vhids, sizeof(sc->sc_vhids))) 2262 return (0); 2263 2264 memset(taken_vhids, 0, sizeof(taken_vhids)); 2265 for (i = 0; carpr->carpr_vhids[i]; i++) { 2266 if (taken_vhids[carpr->carpr_vhids[i]]) 2267 return (EINVAL); 2268 taken_vhids[carpr->carpr_vhids[i]] = 1; 2269 2270 if (sc->sc_carpdev) { 2271 struct carp_if *cif; 2272 cif = (struct carp_if *)sc->sc_carpdev->if_carp; 2273 if (carp_check_dup_vhids(sc, cif, carpr)) 2274 return (EINVAL); 2275 } 2276 if (carpr->carpr_advskews[i] >= 255) 2277 return (EINVAL); 2278 } 2279 /* set sane balancing defaults */ 2280 if (i <= 1) 2281 carpr->carpr_balancing = CARP_BAL_NONE; 2282 else if (carpr->carpr_balancing == CARP_BAL_NONE && 2283 sc->sc_balancing == CARP_BAL_NONE) 2284 carpr->carpr_balancing = CARP_BAL_IP; 2285 2286 /* destroy all */ 2287 carp_del_all_timeouts(sc); 2288 carp_destroy_vhosts(sc); 2289 memset(sc->sc_vhids, 0, sizeof(sc->sc_vhids)); 2290 2291 /* sort vhosts list by vhid */ 2292 for (j = 1; j <= 255; j++) { 2293 for (i = 0; carpr->carpr_vhids[i]; i++) { 2294 if (carpr->carpr_vhids[i] != j) 2295 continue; 2296 if (carp_new_vhost(sc, carpr->carpr_vhids[i], 2297 carpr->carpr_advskews[i])) 2298 return (ENOMEM); 2299 sc->sc_vhids[i] = carpr->carpr_vhids[i]; 2300 sc->sc_advskews[i] = carpr->carpr_advskews[i]; 2301 } 2302 } 2303 carp_set_enaddr(sc); 2304 carp_set_state_all(sc, INIT); 2305 return (0); 2306 } 2307 2308 void 2309 carp_ifgroup_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr) 2310 { 2311 struct ifgroupreq *ifgr = (struct ifgroupreq *)addr; 2312 struct ifg_list *ifgl; 2313 int *dm, adj; 2314 2315 if (!strcmp(ifgr->ifgr_group, IFG_ALL)) 2316 return; 2317 adj = ((struct carp_softc *)ifp->if_softc)->sc_demote_cnt; 2318 if (cmd == SIOCDIFGROUP) 2319 adj = adj * -1; 2320 2321 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) 2322 if (!strcmp(ifgl->ifgl_group->ifg_group, ifgr->ifgr_group)) { 2323 dm = &ifgl->ifgl_group->ifg_carp_demoted; 2324 if (*dm + adj >= 0) 2325 *dm += adj; 2326 else 2327 *dm = 0; 2328 } 2329 } 2330 2331 void 2332 carp_ifgattr_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr) 2333 { 2334 struct ifgroupreq *ifgr = (struct ifgroupreq *)addr; 2335 struct carp_softc *sc = ifp->if_softc; 2336 2337 if (ifgr->ifgr_attrib.ifg_carp_demoted > 0 && (sc->sc_if.if_flags & 2338 (IFF_UP|IFF_RUNNING)) == (IFF_UP|IFF_RUNNING)) 2339 carp_vhe_send_ad_all(sc); 2340 } 2341 2342 /* 2343 * Start output on carp interface. This function should never be called. 2344 */ 2345 void 2346 carp_start(struct ifnet *ifp) 2347 { 2348 #ifdef DEBUG 2349 printf("%s: start called\n", ifp->if_xname); 2350 #endif 2351 } 2352 2353 int 2354 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa, 2355 struct rtentry *rt) 2356 { 2357 struct carp_softc *sc = ((struct carp_softc *)ifp->if_softc); 2358 struct carp_vhost_entry *vhe; 2359 2360 vhe = sc->cur_vhe ? sc->cur_vhe : LIST_FIRST(&sc->carp_vhosts); 2361 2362 if (sc->sc_carpdev != NULL && 2363 (sc->sc_balancing || vhe->state == MASTER)) 2364 return (sc->sc_carpdev->if_output(ifp, m, sa, rt)); 2365 else { 2366 m_freem(m); 2367 return (ENETUNREACH); 2368 } 2369 } 2370 2371 void 2372 carp_set_state_all(struct carp_softc *sc, int state) 2373 { 2374 struct carp_vhost_entry *vhe; 2375 2376 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) 2377 carp_set_state(vhe, state); 2378 } 2379 2380 void 2381 carp_set_state(struct carp_vhost_entry *vhe, int state) 2382 { 2383 struct carp_softc *sc = vhe->parent_sc; 2384 static const char *carp_states[] = { CARP_STATES }; 2385 int loglevel; 2386 2387 if (vhe->state == state) 2388 return; 2389 if (vhe->state == INIT || state == INIT) 2390 loglevel = LOG_WARNING; 2391 else 2392 loglevel = LOG_CRIT; 2393 2394 if (sc->sc_vhe_count > 1) 2395 CARP_LOG(loglevel, sc, 2396 ("state transition (vhid %d): %s -> %s", vhe->vhid, 2397 carp_states[vhe->state], carp_states[state])); 2398 else 2399 CARP_LOG(loglevel, sc, 2400 ("state transition: %s -> %s", 2401 carp_states[vhe->state], carp_states[state])); 2402 2403 vhe->state = state; 2404 carp_update_lsmask(sc); 2405 2406 /* only the master vhe creates link state messages */ 2407 if (!vhe->vhe_leader) 2408 return; 2409 2410 switch (state) { 2411 case BACKUP: 2412 sc->sc_if.if_link_state = LINK_STATE_DOWN; 2413 break; 2414 case MASTER: 2415 sc->sc_if.if_link_state = LINK_STATE_UP; 2416 break; 2417 default: 2418 sc->sc_if.if_link_state = LINK_STATE_INVALID; 2419 break; 2420 } 2421 if_link_state_change(&sc->sc_if); 2422 } 2423 2424 void 2425 carp_group_demote_adj(struct ifnet *ifp, int adj, char *reason) 2426 { 2427 struct ifg_list *ifgl; 2428 int *dm; 2429 struct carp_softc *nil = NULL; 2430 2431 if (ifp->if_type == IFT_CARP) { 2432 dm = &((struct carp_softc *)ifp->if_softc)->sc_demote_cnt; 2433 if (*dm + adj >= 0) 2434 *dm += adj; 2435 else 2436 *dm = 0; 2437 } 2438 2439 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) { 2440 if (!strcmp(ifgl->ifgl_group->ifg_group, IFG_ALL)) 2441 continue; 2442 dm = &ifgl->ifgl_group->ifg_carp_demoted; 2443 2444 if (*dm + adj >= 0) 2445 *dm += adj; 2446 else 2447 *dm = 0; 2448 2449 if (adj > 0 && *dm == 1) 2450 carp_send_ad_all(); 2451 CARP_LOG(LOG_ERR, nil, 2452 ("%s demoted group %s by %d to %d (%s)", 2453 ifp->if_xname, ifgl->ifgl_group->ifg_group, 2454 adj, *dm, reason)); 2455 } 2456 } 2457 2458 int 2459 carp_group_demote_count(struct carp_softc *sc) 2460 { 2461 struct ifg_list *ifgl; 2462 int count = 0; 2463 2464 TAILQ_FOREACH(ifgl, &sc->sc_if.if_groups, ifgl_next) 2465 count += ifgl->ifgl_group->ifg_carp_demoted; 2466 2467 if (count == 0 && sc->sc_demote_cnt) 2468 count = sc->sc_demote_cnt; 2469 2470 return (count > 255 ? 255 : count); 2471 } 2472 2473 void 2474 carp_carpdev_state(void *v) 2475 { 2476 struct carp_if *cif; 2477 struct carp_softc *sc; 2478 struct ifnet *ifp = v; 2479 2480 if (ifp->if_type == IFT_CARP) 2481 return; 2482 2483 cif = (struct carp_if *)ifp->if_carp; 2484 2485 TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) { 2486 int suppressed = sc->sc_suppress; 2487 2488 if (sc->sc_carpdev->if_link_state == LINK_STATE_DOWN || 2489 !(sc->sc_carpdev->if_flags & IFF_UP)) { 2490 sc->sc_if.if_flags &= ~IFF_RUNNING; 2491 carp_del_all_timeouts(sc); 2492 carp_set_state_all(sc, INIT); 2493 sc->sc_suppress = 1; 2494 carp_setrun_all(sc, 0); 2495 if (!suppressed) 2496 carp_group_demote_adj(&sc->sc_if, 1, "carpdev"); 2497 } else if (suppressed) { 2498 carp_set_state_all(sc, INIT); 2499 sc->sc_suppress = 0; 2500 carp_setrun_all(sc, 0); 2501 carp_group_demote_adj(&sc->sc_if, -1, "carpdev"); 2502 } 2503 } 2504 } 2505 2506 int 2507 carp_ether_addmulti(struct carp_softc *sc, struct ifreq *ifr) 2508 { 2509 struct ifnet *ifp; 2510 struct carp_mc_entry *mc; 2511 u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN]; 2512 int error; 2513 2514 ifp = sc->sc_carpdev; 2515 if (ifp == NULL) 2516 return (EINVAL); 2517 2518 error = ether_addmulti(ifr, (struct arpcom *)&sc->sc_ac); 2519 if (error != ENETRESET) 2520 return (error); 2521 2522 /* 2523 * This is new multicast address. We have to tell parent 2524 * about it. Also, remember this multicast address so that 2525 * we can delete them on unconfigure. 2526 */ 2527 mc = malloc(sizeof(struct carp_mc_entry), M_DEVBUF, M_NOWAIT); 2528 if (mc == NULL) { 2529 error = ENOMEM; 2530 goto alloc_failed; 2531 } 2532 2533 /* 2534 * As ether_addmulti() returns ENETRESET, following two 2535 * statement shouldn't fail. 2536 */ 2537 (void)ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi); 2538 ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, mc->mc_enm); 2539 memcpy(&mc->mc_addr, &ifr->ifr_addr, ifr->ifr_addr.sa_len); 2540 LIST_INSERT_HEAD(&sc->carp_mc_listhead, mc, mc_entries); 2541 2542 error = (*ifp->if_ioctl)(ifp, SIOCADDMULTI, (caddr_t)ifr); 2543 if (error != 0) 2544 goto ioctl_failed; 2545 2546 return (error); 2547 2548 ioctl_failed: 2549 LIST_REMOVE(mc, mc_entries); 2550 free(mc, M_DEVBUF, 0); 2551 alloc_failed: 2552 (void)ether_delmulti(ifr, (struct arpcom *)&sc->sc_ac); 2553 2554 return (error); 2555 } 2556 2557 int 2558 carp_ether_delmulti(struct carp_softc *sc, struct ifreq *ifr) 2559 { 2560 struct ifnet *ifp; 2561 struct ether_multi *enm; 2562 struct carp_mc_entry *mc; 2563 u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN]; 2564 int error; 2565 2566 ifp = sc->sc_carpdev; 2567 if (ifp == NULL) 2568 return (EINVAL); 2569 2570 /* 2571 * Find a key to lookup carp_mc_entry. We have to do this 2572 * before calling ether_delmulti for obvious reason. 2573 */ 2574 if ((error = ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi)) != 0) 2575 return (error); 2576 ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, enm); 2577 if (enm == NULL) 2578 return (EINVAL); 2579 2580 LIST_FOREACH(mc, &sc->carp_mc_listhead, mc_entries) 2581 if (mc->mc_enm == enm) 2582 break; 2583 2584 /* We won't delete entries we didn't add */ 2585 if (mc == NULL) 2586 return (EINVAL); 2587 2588 error = ether_delmulti(ifr, (struct arpcom *)&sc->sc_ac); 2589 if (error != ENETRESET) 2590 return (error); 2591 2592 /* We no longer use this multicast address. Tell parent so. */ 2593 error = (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)ifr); 2594 if (error == 0) { 2595 /* And forget about this address. */ 2596 LIST_REMOVE(mc, mc_entries); 2597 free(mc, M_DEVBUF, 0); 2598 } else 2599 (void)ether_addmulti(ifr, (struct arpcom *)&sc->sc_ac); 2600 return (error); 2601 } 2602 2603 /* 2604 * Delete any multicast address we have asked to add from parent 2605 * interface. Called when the carp is being unconfigured. 2606 */ 2607 void 2608 carp_ether_purgemulti(struct carp_softc *sc) 2609 { 2610 struct ifnet *ifp = sc->sc_carpdev; /* Parent. */ 2611 struct carp_mc_entry *mc; 2612 union { 2613 struct ifreq ifreq; 2614 struct { 2615 char ifr_name[IFNAMSIZ]; 2616 struct sockaddr_storage ifr_ss; 2617 } ifreq_storage; 2618 } u; 2619 struct ifreq *ifr = &u.ifreq; 2620 2621 if (ifp == NULL) 2622 return; 2623 2624 memcpy(ifr->ifr_name, ifp->if_xname, IFNAMSIZ); 2625 while ((mc = LIST_FIRST(&sc->carp_mc_listhead)) != NULL) { 2626 memcpy(&ifr->ifr_addr, &mc->mc_addr, mc->mc_addr.ss_len); 2627 (void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)ifr); 2628 LIST_REMOVE(mc, mc_entries); 2629 free(mc, M_DEVBUF, 0); 2630 } 2631 } 2632