1 /* $OpenBSD: ip_carp.c,v 1.293 2016/07/25 16:44:04 benno Exp $ */ 2 3 /* 4 * Copyright (c) 2002 Michael Shalayeff. All rights reserved. 5 * Copyright (c) 2003 Ryan McBride. All rights reserved. 6 * Copyright (c) 2006-2008 Marco Pfatschbacher. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 21 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 25 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 26 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 27 * THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 /* 31 * TODO: 32 * - iface reconfigure 33 * - support for hardware checksum calculations; 34 * 35 */ 36 37 #include "ether.h" 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/mbuf.h> 42 #include <sys/socket.h> 43 #include <sys/socketvar.h> 44 #include <sys/timeout.h> 45 #include <sys/ioctl.h> 46 #include <sys/errno.h> 47 #include <sys/device.h> 48 #include <sys/kernel.h> 49 #include <sys/sysctl.h> 50 #include <sys/syslog.h> 51 #include <sys/refcnt.h> 52 53 #include <net/if.h> 54 #include <net/if_var.h> 55 #include <net/if_types.h> 56 #include <net/netisr.h> 57 58 #include <crypto/sha1.h> 59 60 #include <netinet/in.h> 61 #include <netinet/in_var.h> 62 #include <netinet/ip.h> 63 #include <netinet/ip_var.h> 64 #include <netinet/if_ether.h> 65 #include <netinet/ip_ipsp.h> 66 67 #include <net/if_dl.h> 68 69 #ifdef INET6 70 #include <netinet6/in6_var.h> 71 #include <netinet/icmp6.h> 72 #include <netinet/ip6.h> 73 #include <netinet6/ip6_var.h> 74 #include <netinet6/nd6.h> 75 #include <netinet6/in6_ifattach.h> 76 #endif 77 78 #include "bpfilter.h" 79 #if NBPFILTER > 0 80 #include <net/bpf.h> 81 #endif 82 83 #include "vlan.h" 84 #if NVLAN > 0 85 #include <net/if_vlan_var.h> 86 #endif 87 88 #include <netinet/ip_carp.h> 89 90 struct carp_mc_entry { 91 LIST_ENTRY(carp_mc_entry) mc_entries; 92 union { 93 struct ether_multi *mcu_enm; 94 } mc_u; 95 struct sockaddr_storage mc_addr; 96 }; 97 #define mc_enm mc_u.mcu_enm 98 99 enum { HMAC_ORIG=0, HMAC_NOV6LL=1, HMAC_MAX=2 }; 100 101 struct carp_vhost_entry { 102 SRPL_ENTRY(carp_vhost_entry) vhost_entries; 103 struct refcnt vhost_refcnt; 104 105 struct carp_softc *parent_sc; 106 int vhe_leader; 107 int vhid; 108 int advskew; 109 enum { INIT = 0, BACKUP, MASTER } state; 110 struct timeout ad_tmo; /* advertisement timeout */ 111 struct timeout md_tmo; /* master down timeout */ 112 struct timeout md6_tmo; /* master down timeout */ 113 114 u_int64_t vhe_replay_cookie; 115 116 /* authentication */ 117 #define CARP_HMAC_PAD 64 118 unsigned char vhe_pad[CARP_HMAC_PAD]; 119 SHA1_CTX vhe_sha1[HMAC_MAX]; 120 121 u_int8_t vhe_enaddr[ETHER_ADDR_LEN]; 122 }; 123 124 void carp_vh_ref(void *, void *); 125 void carp_vh_unref(void *, void *); 126 127 struct srpl_rc carp_vh_rc = 128 SRPL_RC_INITIALIZER(carp_vh_ref, carp_vh_unref, NULL); 129 130 struct carp_softc { 131 struct arpcom sc_ac; 132 #define sc_if sc_ac.ac_if 133 #define sc_carpdev sc_ac.ac_if.if_carpdev 134 void *ah_cookie; 135 void *lh_cookie; 136 struct ip_moptions sc_imo; 137 #ifdef INET6 138 struct ip6_moptions sc_im6o; 139 #endif /* INET6 */ 140 141 SRPL_ENTRY(carp_softc) sc_list; 142 struct refcnt sc_refcnt; 143 144 int sc_suppress; 145 int sc_bow_out; 146 int sc_demote_cnt; 147 148 int sc_sendad_errors; 149 #define CARP_SENDAD_MAX_ERRORS(sc) (3 * (sc)->sc_vhe_count) 150 int sc_sendad_success; 151 #define CARP_SENDAD_MIN_SUCCESS(sc) (3 * (sc)->sc_vhe_count) 152 153 char sc_curlladdr[ETHER_ADDR_LEN]; 154 155 SRPL_HEAD(, carp_vhost_entry) carp_vhosts; 156 int sc_vhe_count; 157 u_int8_t sc_vhids[CARP_MAXNODES]; 158 u_int8_t sc_advskews[CARP_MAXNODES]; 159 u_int8_t sc_balancing; 160 161 int sc_naddrs; 162 int sc_naddrs6; 163 int sc_advbase; /* seconds */ 164 165 /* authentication */ 166 unsigned char sc_key[CARP_KEY_LEN]; 167 168 u_int32_t sc_hashkey[2]; 169 u_int32_t sc_lsmask; /* load sharing mask */ 170 int sc_lscount; /* # load sharing interfaces (max 32) */ 171 int sc_delayed_arp; /* delayed ARP request countdown */ 172 int sc_realmac; /* using real mac */ 173 174 struct in_addr sc_peer; 175 176 LIST_HEAD(__carp_mchead, carp_mc_entry) carp_mc_listhead; 177 struct carp_vhost_entry *cur_vhe; /* current active vhe */ 178 }; 179 180 void carp_sc_ref(void *, void *); 181 void carp_sc_unref(void *, void *); 182 183 struct srpl_rc carp_sc_rc = 184 SRPL_RC_INITIALIZER(carp_sc_ref, carp_sc_unref, NULL); 185 186 int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, LOG_CRIT }; /* XXX for now */ 187 struct carpstats carpstats; 188 189 int carp_send_all_recur = 0; 190 191 struct carp_if { 192 struct srpl vhif_vrs; 193 }; 194 195 #define CARP_LOG(l, sc, s) \ 196 do { \ 197 if (carp_opts[CARPCTL_LOG] >= l) { \ 198 if (sc) \ 199 log(l, "%s: ", \ 200 (sc)->sc_if.if_xname); \ 201 else \ 202 log(l, "carp: "); \ 203 addlog s; \ 204 addlog("\n"); \ 205 } \ 206 } while (0) 207 208 void carp_hmac_prepare(struct carp_softc *); 209 void carp_hmac_prepare_ctx(struct carp_vhost_entry *, u_int8_t); 210 void carp_hmac_generate(struct carp_vhost_entry *, u_int32_t *, 211 unsigned char *, u_int8_t); 212 int carp_hmac_verify(struct carp_vhost_entry *, u_int32_t *, 213 unsigned char *); 214 int carp_input(struct ifnet *, struct mbuf *, void *); 215 void carp_proto_input_c(struct ifnet *, struct mbuf *, 216 struct carp_header *, int, sa_family_t); 217 void carp_proto_input_if(struct ifnet *, struct mbuf *, int); 218 void carpattach(int); 219 void carpdetach(struct carp_softc *); 220 int carp_prepare_ad(struct mbuf *, struct carp_vhost_entry *, 221 struct carp_header *); 222 void carp_send_ad_all(void); 223 void carp_vhe_send_ad_all(struct carp_softc *); 224 void carp_send_ad(void *); 225 void carp_send_arp(struct carp_softc *); 226 void carp_master_down(void *); 227 int carp_ioctl(struct ifnet *, u_long, caddr_t); 228 int carp_vhids_ioctl(struct carp_softc *, struct carpreq *); 229 int carp_check_dup_vhids(struct carp_softc *, struct carp_if *, 230 struct carpreq *); 231 void carp_ifgroup_ioctl(struct ifnet *, u_long, caddr_t); 232 void carp_ifgattr_ioctl(struct ifnet *, u_long, caddr_t); 233 void carp_start(struct ifnet *); 234 void carp_setrun_all(struct carp_softc *, sa_family_t); 235 void carp_setrun(struct carp_vhost_entry *, sa_family_t); 236 void carp_set_state_all(struct carp_softc *, int); 237 void carp_set_state(struct carp_vhost_entry *, int); 238 void carp_multicast_cleanup(struct carp_softc *); 239 int carp_set_ifp(struct carp_softc *, struct ifnet *); 240 void carp_set_enaddr(struct carp_softc *); 241 void carp_set_vhe_enaddr(struct carp_vhost_entry *); 242 void carp_addr_updated(void *); 243 int carp_set_addr(struct carp_softc *, struct sockaddr_in *); 244 int carp_join_multicast(struct carp_softc *); 245 #ifdef INET6 246 void carp_send_na(struct carp_softc *); 247 int carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *); 248 int carp_join_multicast6(struct carp_softc *); 249 #endif 250 int carp_clone_create(struct if_clone *, int); 251 int carp_clone_destroy(struct ifnet *); 252 int carp_ether_addmulti(struct carp_softc *, struct ifreq *); 253 int carp_ether_delmulti(struct carp_softc *, struct ifreq *); 254 void carp_ether_purgemulti(struct carp_softc *); 255 int carp_group_demote_count(struct carp_softc *); 256 void carp_update_lsmask(struct carp_softc *); 257 int carp_new_vhost(struct carp_softc *, int, int); 258 void carp_destroy_vhosts(struct carp_softc *); 259 void carp_del_all_timeouts(struct carp_softc *); 260 261 struct if_clone carp_cloner = 262 IF_CLONE_INITIALIZER("carp", carp_clone_create, carp_clone_destroy); 263 264 #define carp_cksum(_m, _l) ((u_int16_t)in_cksum((_m), (_l))) 265 #define CARP_IFQ_PRIO 6 266 267 void 268 carp_hmac_prepare(struct carp_softc *sc) 269 { 270 struct carp_vhost_entry *vhe; 271 u_int8_t i; 272 273 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 274 275 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) { 276 for (i = 0; i < HMAC_MAX; i++) { 277 carp_hmac_prepare_ctx(vhe, i); 278 } 279 } 280 } 281 282 void 283 carp_hmac_prepare_ctx(struct carp_vhost_entry *vhe, u_int8_t ctx) 284 { 285 struct carp_softc *sc = vhe->parent_sc; 286 287 u_int8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT; 288 u_int8_t vhid = vhe->vhid & 0xff; 289 SHA1_CTX sha1ctx; 290 u_int32_t kmd[5]; 291 struct ifaddr *ifa; 292 int i, found; 293 struct in_addr last, cur, in; 294 #ifdef INET6 295 struct in6_addr last6, cur6, in6; 296 #endif /* INET6 */ 297 298 /* compute ipad from key */ 299 memset(vhe->vhe_pad, 0, sizeof(vhe->vhe_pad)); 300 bcopy(sc->sc_key, vhe->vhe_pad, sizeof(sc->sc_key)); 301 for (i = 0; i < sizeof(vhe->vhe_pad); i++) 302 vhe->vhe_pad[i] ^= 0x36; 303 304 /* precompute first part of inner hash */ 305 SHA1Init(&vhe->vhe_sha1[ctx]); 306 SHA1Update(&vhe->vhe_sha1[ctx], vhe->vhe_pad, sizeof(vhe->vhe_pad)); 307 SHA1Update(&vhe->vhe_sha1[ctx], (void *)&version, sizeof(version)); 308 SHA1Update(&vhe->vhe_sha1[ctx], (void *)&type, sizeof(type)); 309 310 /* generate a key for the arpbalance hash, before the vhid is hashed */ 311 if (vhe->vhe_leader) { 312 bcopy(&vhe->vhe_sha1[ctx], &sha1ctx, sizeof(sha1ctx)); 313 SHA1Final((unsigned char *)kmd, &sha1ctx); 314 sc->sc_hashkey[0] = kmd[0] ^ kmd[1]; 315 sc->sc_hashkey[1] = kmd[2] ^ kmd[3]; 316 } 317 318 /* the rest of the precomputation */ 319 if (!sc->sc_realmac && vhe->vhe_leader && 320 memcmp(sc->sc_ac.ac_enaddr, vhe->vhe_enaddr, ETHER_ADDR_LEN) != 0) 321 SHA1Update(&vhe->vhe_sha1[ctx], sc->sc_ac.ac_enaddr, 322 ETHER_ADDR_LEN); 323 324 SHA1Update(&vhe->vhe_sha1[ctx], (void *)&vhid, sizeof(vhid)); 325 326 /* Hash the addresses from smallest to largest, not interface order */ 327 cur.s_addr = 0; 328 do { 329 found = 0; 330 last = cur; 331 cur.s_addr = 0xffffffff; 332 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 333 if (ifa->ifa_addr->sa_family != AF_INET) 334 continue; 335 in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr; 336 if (ntohl(in.s_addr) > ntohl(last.s_addr) && 337 ntohl(in.s_addr) < ntohl(cur.s_addr)) { 338 cur.s_addr = in.s_addr; 339 found++; 340 } 341 } 342 if (found) 343 SHA1Update(&vhe->vhe_sha1[ctx], 344 (void *)&cur, sizeof(cur)); 345 } while (found); 346 #ifdef INET6 347 memset(&cur6, 0x00, sizeof(cur6)); 348 do { 349 found = 0; 350 last6 = cur6; 351 memset(&cur6, 0xff, sizeof(cur6)); 352 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 353 if (ifa->ifa_addr->sa_family != AF_INET6) 354 continue; 355 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 356 if (IN6_IS_SCOPE_EMBED(&in6)) { 357 if (ctx == HMAC_NOV6LL) 358 continue; 359 in6.s6_addr16[1] = 0; 360 } 361 if (memcmp(&in6, &last6, sizeof(in6)) > 0 && 362 memcmp(&in6, &cur6, sizeof(in6)) < 0) { 363 cur6 = in6; 364 found++; 365 } 366 } 367 if (found) 368 SHA1Update(&vhe->vhe_sha1[ctx], 369 (void *)&cur6, sizeof(cur6)); 370 } while (found); 371 #endif /* INET6 */ 372 373 /* convert ipad to opad */ 374 for (i = 0; i < sizeof(vhe->vhe_pad); i++) 375 vhe->vhe_pad[i] ^= 0x36 ^ 0x5c; 376 } 377 378 void 379 carp_hmac_generate(struct carp_vhost_entry *vhe, u_int32_t counter[2], 380 unsigned char md[20], u_int8_t ctx) 381 { 382 SHA1_CTX sha1ctx; 383 384 /* fetch first half of inner hash */ 385 bcopy(&vhe->vhe_sha1[ctx], &sha1ctx, sizeof(sha1ctx)); 386 387 SHA1Update(&sha1ctx, (void *)counter, sizeof(vhe->vhe_replay_cookie)); 388 SHA1Final(md, &sha1ctx); 389 390 /* outer hash */ 391 SHA1Init(&sha1ctx); 392 SHA1Update(&sha1ctx, vhe->vhe_pad, sizeof(vhe->vhe_pad)); 393 SHA1Update(&sha1ctx, md, 20); 394 SHA1Final(md, &sha1ctx); 395 } 396 397 int 398 carp_hmac_verify(struct carp_vhost_entry *vhe, u_int32_t counter[2], 399 unsigned char md[20]) 400 { 401 unsigned char md2[20]; 402 u_int8_t i; 403 404 for (i = 0; i < HMAC_MAX; i++) { 405 carp_hmac_generate(vhe, counter, md2, i); 406 if (!timingsafe_bcmp(md, md2, sizeof(md2))) 407 return (0); 408 } 409 return (1); 410 } 411 412 void 413 carp_proto_input(struct mbuf *m, ...) 414 { 415 struct ifnet *ifp; 416 int hlen; 417 va_list ap; 418 419 va_start(ap, m); 420 hlen = va_arg(ap, int); 421 va_end(ap); 422 423 ifp = if_get(m->m_pkthdr.ph_ifidx); 424 if (ifp == NULL) { 425 m_freem(m); 426 return; 427 } 428 429 carp_proto_input_if(ifp, m, hlen); 430 if_put(ifp); 431 } 432 433 /* 434 * process input packet. 435 * we have rearranged checks order compared to the rfc, 436 * but it seems more efficient this way or not possible otherwise. 437 */ 438 void 439 carp_proto_input_if(struct ifnet *ifp, struct mbuf *m, int hlen) 440 { 441 struct ip *ip = mtod(m, struct ip *); 442 struct carp_softc *sc = NULL; 443 struct carp_header *ch; 444 int iplen, len, ismulti; 445 446 carpstats.carps_ipackets++; 447 448 if (!carp_opts[CARPCTL_ALLOW]) { 449 m_freem(m); 450 return; 451 } 452 453 ismulti = IN_MULTICAST(ip->ip_dst.s_addr); 454 455 /* check if received on a valid carp interface */ 456 if (!((ifp->if_type == IFT_CARP && ismulti) || 457 (ifp->if_type != IFT_CARP && !ismulti && ifp->if_carp != NULL))) { 458 carpstats.carps_badif++; 459 CARP_LOG(LOG_INFO, sc, 460 ("packet received on non-carp interface: %s", 461 ifp->if_xname)); 462 m_freem(m); 463 return; 464 } 465 466 /* verify that the IP TTL is 255. */ 467 if (ip->ip_ttl != CARP_DFLTTL) { 468 carpstats.carps_badttl++; 469 CARP_LOG(LOG_NOTICE, sc, ("received ttl %d != %d on %s", 470 ip->ip_ttl, CARP_DFLTTL, ifp->if_xname)); 471 m_freem(m); 472 return; 473 } 474 475 /* 476 * verify that the received packet length is 477 * equal to the CARP header 478 */ 479 iplen = ip->ip_hl << 2; 480 len = iplen + sizeof(*ch); 481 if (len > m->m_pkthdr.len) { 482 carpstats.carps_badlen++; 483 CARP_LOG(LOG_INFO, sc, ("packet too short %d on %s", 484 m->m_pkthdr.len, ifp->if_xname)); 485 m_freem(m); 486 return; 487 } 488 489 if ((m = m_pullup(m, len)) == NULL) { 490 carpstats.carps_hdrops++; 491 return; 492 } 493 ip = mtod(m, struct ip *); 494 ch = (struct carp_header *)(mtod(m, caddr_t) + iplen); 495 496 /* verify the CARP checksum */ 497 m->m_data += iplen; 498 if (carp_cksum(m, len - iplen)) { 499 carpstats.carps_badsum++; 500 CARP_LOG(LOG_INFO, sc, ("checksum failed on %s", 501 ifp->if_xname)); 502 m_freem(m); 503 return; 504 } 505 m->m_data -= iplen; 506 507 carp_proto_input_c(ifp, m, ch, ismulti, AF_INET); 508 } 509 510 #ifdef INET6 511 int carp6_proto_input_if(struct ifnet *, struct mbuf *, int *); 512 513 int 514 carp6_proto_input(struct mbuf **mp, int *offp, int proto) 515 { 516 struct mbuf *m = *mp; 517 struct ifnet *ifp; 518 int rv; 519 520 ifp = if_get(m->m_pkthdr.ph_ifidx); 521 if (ifp == NULL) { 522 m_freem(m); 523 return (IPPROTO_DONE); 524 } 525 526 rv = carp6_proto_input_if(ifp, m, offp); 527 if_put(ifp); 528 529 return (rv); 530 } 531 532 int 533 carp6_proto_input_if(struct ifnet *ifp, struct mbuf *m, int *offp) 534 { 535 struct carp_softc *sc = NULL; 536 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 537 struct carp_header *ch; 538 u_int len; 539 540 carpstats.carps_ipackets6++; 541 542 if (!carp_opts[CARPCTL_ALLOW]) { 543 m_freem(m); 544 return (IPPROTO_DONE); 545 } 546 547 /* check if received on a valid carp interface */ 548 if (ifp->if_type != IFT_CARP) { 549 carpstats.carps_badif++; 550 CARP_LOG(LOG_INFO, sc, ("packet received on non-carp interface: %s", 551 ifp->if_xname)); 552 m_freem(m); 553 return (IPPROTO_DONE); 554 } 555 556 /* verify that the IP TTL is 255 */ 557 if (ip6->ip6_hlim != CARP_DFLTTL) { 558 carpstats.carps_badttl++; 559 CARP_LOG(LOG_NOTICE, sc, ("received ttl %d != %d on %s", 560 ip6->ip6_hlim, CARP_DFLTTL, ifp->if_xname)); 561 m_freem(m); 562 return (IPPROTO_DONE); 563 } 564 565 /* verify that we have a complete carp packet */ 566 len = m->m_len; 567 if ((m = m_pullup(m, *offp + sizeof(*ch))) == NULL) { 568 carpstats.carps_badlen++; 569 CARP_LOG(LOG_INFO, sc, ("packet size %u too small", len)); 570 return (IPPROTO_DONE); 571 } 572 ch = (struct carp_header *)(mtod(m, caddr_t) + *offp); 573 574 /* verify the CARP checksum */ 575 m->m_data += *offp; 576 if (carp_cksum(m, sizeof(*ch))) { 577 carpstats.carps_badsum++; 578 CARP_LOG(LOG_INFO, sc, ("checksum failed, on %s", 579 ifp->if_xname)); 580 m_freem(m); 581 return (IPPROTO_DONE); 582 } 583 m->m_data -= *offp; 584 585 carp_proto_input_c(ifp, m, ch, 1, AF_INET6); 586 return (IPPROTO_DONE); 587 } 588 #endif /* INET6 */ 589 590 void 591 carp_proto_input_c(struct ifnet *ifp, struct mbuf *m, struct carp_header *ch, 592 int ismulti, sa_family_t af) 593 { 594 struct carp_softc *sc; 595 struct carp_vhost_entry *vhe; 596 struct timeval sc_tv, ch_tv; 597 struct carp_if *cif; 598 599 if (ifp->if_type == IFT_CARP) 600 cif = (struct carp_if *)ifp->if_carpdev->if_carp; 601 else 602 cif = (struct carp_if *)ifp->if_carp; 603 604 KERNEL_ASSERT_LOCKED(); /* touching vhif_vrs + carp_vhosts */ 605 SRPL_FOREACH_LOCKED(sc, &cif->vhif_vrs, sc_list) { 606 if (af == AF_INET && 607 ismulti != IN_MULTICAST(sc->sc_peer.s_addr)) 608 continue; 609 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) { 610 if (vhe->vhid == ch->carp_vhid) 611 goto found; 612 } 613 } 614 found: 615 616 if (!sc || (sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) != 617 (IFF_UP|IFF_RUNNING)) { 618 carpstats.carps_badvhid++; 619 m_freem(m); 620 return; 621 } 622 623 getmicrotime(&sc->sc_if.if_lastchange); 624 sc->sc_if.if_ipackets++; 625 sc->sc_if.if_ibytes += m->m_pkthdr.len; 626 627 /* verify the CARP version. */ 628 if (ch->carp_version != CARP_VERSION) { 629 carpstats.carps_badver++; 630 sc->sc_if.if_ierrors++; 631 CARP_LOG(LOG_NOTICE, sc, ("invalid version %d != %d", 632 ch->carp_version, CARP_VERSION)); 633 m_freem(m); 634 return; 635 } 636 637 /* verify the hash */ 638 if (carp_hmac_verify(vhe, ch->carp_counter, ch->carp_md)) { 639 carpstats.carps_badauth++; 640 sc->sc_if.if_ierrors++; 641 CARP_LOG(LOG_INFO, sc, ("incorrect hash")); 642 m_freem(m); 643 return; 644 } 645 646 if (!memcmp(&vhe->vhe_replay_cookie, ch->carp_counter, 647 sizeof(ch->carp_counter))) { 648 /* Do not log duplicates from non simplex interfaces */ 649 if (sc->sc_carpdev->if_flags & IFF_SIMPLEX) { 650 carpstats.carps_badauth++; 651 sc->sc_if.if_ierrors++; 652 CARP_LOG(LOG_WARNING, sc, 653 ("replay or network loop detected")); 654 } 655 m_freem(m); 656 return; 657 } 658 659 sc_tv.tv_sec = sc->sc_advbase; 660 sc_tv.tv_usec = vhe->advskew * 1000000 / 256; 661 ch_tv.tv_sec = ch->carp_advbase; 662 ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256; 663 664 switch (vhe->state) { 665 case INIT: 666 break; 667 case MASTER: 668 /* 669 * If we receive an advertisement from a master who's going to 670 * be more frequent than us, and whose demote count is not higher 671 * than ours, go into BACKUP state. If his demote count is lower, 672 * also go into BACKUP. 673 */ 674 if (((timercmp(&sc_tv, &ch_tv, >) || 675 timercmp(&sc_tv, &ch_tv, ==)) && 676 (ch->carp_demote <= carp_group_demote_count(sc))) || 677 ch->carp_demote < carp_group_demote_count(sc)) { 678 timeout_del(&vhe->ad_tmo); 679 carp_set_state(vhe, BACKUP); 680 carp_setrun(vhe, 0); 681 } 682 break; 683 case BACKUP: 684 /* 685 * If we're pre-empting masters who advertise slower than us, 686 * and do not have a better demote count, treat them as down. 687 * 688 */ 689 if (carp_opts[CARPCTL_PREEMPT] && 690 timercmp(&sc_tv, &ch_tv, <) && 691 ch->carp_demote >= carp_group_demote_count(sc)) { 692 carp_master_down(vhe); 693 break; 694 } 695 696 /* 697 * Take over masters advertising with a higher demote count, 698 * regardless of CARPCTL_PREEMPT. 699 */ 700 if (ch->carp_demote > carp_group_demote_count(sc)) { 701 carp_master_down(vhe); 702 break; 703 } 704 705 /* 706 * If the master is going to advertise at such a low frequency 707 * that he's guaranteed to time out, we'd might as well just 708 * treat him as timed out now. 709 */ 710 sc_tv.tv_sec = sc->sc_advbase * 3; 711 if (sc->sc_advbase && timercmp(&sc_tv, &ch_tv, <)) { 712 carp_master_down(vhe); 713 break; 714 } 715 716 /* 717 * Otherwise, we reset the counter and wait for the next 718 * advertisement. 719 */ 720 carp_setrun(vhe, af); 721 break; 722 } 723 724 m_freem(m); 725 return; 726 } 727 728 int 729 carp_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, 730 size_t newlen) 731 { 732 /* All sysctl names at this level are terminal. */ 733 if (namelen != 1) 734 return (ENOTDIR); 735 736 switch (name[0]) { 737 case CARPCTL_STATS: 738 if (newp != NULL) 739 return (EPERM); 740 return (sysctl_struct(oldp, oldlenp, newp, newlen, 741 &carpstats, sizeof(carpstats))); 742 default: 743 if (name[0] <= 0 || name[0] >= CARPCTL_MAXID) 744 return (ENOPROTOOPT); 745 return sysctl_int(oldp, oldlenp, newp, newlen, 746 &carp_opts[name[0]]); 747 } 748 } 749 750 /* 751 * Interface side of the CARP implementation. 752 */ 753 754 /* ARGSUSED */ 755 void 756 carpattach(int n) 757 { 758 struct ifg_group *ifg; 759 760 if ((ifg = if_creategroup("carp")) != NULL) 761 ifg->ifg_refcnt++; /* keep around even if empty */ 762 if_clone_attach(&carp_cloner); 763 } 764 765 int 766 carp_clone_create(struct if_clone *ifc, int unit) 767 { 768 struct carp_softc *sc; 769 struct ifnet *ifp; 770 771 sc = malloc(sizeof(*sc), M_DEVBUF, M_NOWAIT|M_ZERO); 772 if (!sc) 773 return (ENOMEM); 774 775 refcnt_init(&sc->sc_refcnt); 776 777 SRPL_INIT(&sc->carp_vhosts); 778 sc->sc_vhe_count = 0; 779 if (carp_new_vhost(sc, 0, 0)) { 780 free(sc, M_DEVBUF, sizeof(*sc)); 781 return (ENOMEM); 782 } 783 784 sc->sc_suppress = 0; 785 sc->sc_advbase = CARP_DFLTINTV; 786 sc->sc_naddrs = sc->sc_naddrs6 = 0; 787 #ifdef INET6 788 sc->sc_im6o.im6o_hlim = CARP_DFLTTL; 789 #endif /* INET6 */ 790 sc->sc_imo.imo_membership = (struct in_multi **)malloc( 791 (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_IPMOPTS, 792 M_WAITOK|M_ZERO); 793 sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS; 794 795 LIST_INIT(&sc->carp_mc_listhead); 796 ifp = &sc->sc_if; 797 ifp->if_softc = sc; 798 snprintf(ifp->if_xname, sizeof ifp->if_xname, "%s%d", ifc->ifc_name, 799 unit); 800 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 801 ifp->if_ioctl = carp_ioctl; 802 ifp->if_start = carp_start; 803 IFQ_SET_MAXLEN(&ifp->if_snd, 1); 804 if_attach(ifp); 805 ether_ifattach(ifp); 806 ifp->if_type = IFT_CARP; 807 ifp->if_sadl->sdl_type = IFT_CARP; 808 ifp->if_output = carp_output; 809 ifp->if_priority = IF_CARP_DEFAULT_PRIORITY; 810 ifp->if_link_state = LINK_STATE_INVALID; 811 812 /* Hook carp_addr_updated to cope with address and route changes. */ 813 sc->ah_cookie = hook_establish(sc->sc_if.if_addrhooks, 0, 814 carp_addr_updated, sc); 815 816 return (0); 817 } 818 819 int 820 carp_new_vhost(struct carp_softc *sc, int vhid, int advskew) 821 { 822 struct carp_vhost_entry *vhe, *vhe0; 823 824 vhe = malloc(sizeof(*vhe), M_DEVBUF, M_NOWAIT | M_ZERO); 825 if (vhe == NULL) 826 return (ENOMEM); 827 828 refcnt_init(&vhe->vhost_refcnt); 829 carp_sc_ref(NULL, sc); /* give a sc ref to the vhe */ 830 vhe->parent_sc = sc; 831 vhe->vhid = vhid; 832 vhe->advskew = advskew; 833 vhe->state = INIT; 834 timeout_set(&vhe->ad_tmo, carp_send_ad, vhe); 835 timeout_set(&vhe->md_tmo, carp_master_down, vhe); 836 timeout_set(&vhe->md6_tmo, carp_master_down, vhe); 837 838 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 839 840 /* mark the first vhe as leader */ 841 if (SRPL_EMPTY_LOCKED(&sc->carp_vhosts)) { 842 vhe->vhe_leader = 1; 843 SRPL_INSERT_HEAD_LOCKED(&carp_vh_rc, &sc->carp_vhosts, 844 vhe, vhost_entries); 845 sc->sc_vhe_count = 1; 846 return (0); 847 } 848 849 SRPL_FOREACH_LOCKED(vhe0, &sc->carp_vhosts, vhost_entries) { 850 if (SRPL_NEXT_LOCKED(vhe0, vhost_entries) == NULL) 851 break; 852 } 853 854 SRPL_INSERT_AFTER_LOCKED(&carp_vh_rc, vhe0, vhe, vhost_entries); 855 sc->sc_vhe_count++; 856 857 return (0); 858 } 859 860 int 861 carp_clone_destroy(struct ifnet *ifp) 862 { 863 struct carp_softc *sc = ifp->if_softc; 864 865 carpdetach(sc); 866 ether_ifdetach(ifp); 867 if_detach(ifp); 868 carp_destroy_vhosts(ifp->if_softc); 869 refcnt_finalize(&sc->sc_refcnt, "carpdtor"); 870 free(sc->sc_imo.imo_membership, M_IPMOPTS, 0); 871 free(sc, M_DEVBUF, sizeof(*sc)); 872 return (0); 873 } 874 875 void 876 carp_del_all_timeouts(struct carp_softc *sc) 877 { 878 struct carp_vhost_entry *vhe; 879 880 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 881 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) { 882 timeout_del(&vhe->ad_tmo); 883 timeout_del(&vhe->md_tmo); 884 timeout_del(&vhe->md6_tmo); 885 } 886 } 887 888 void 889 carpdetach(struct carp_softc *sc) 890 { 891 struct ifnet *ifp0; 892 struct carp_if *cif; 893 int s; 894 895 carp_del_all_timeouts(sc); 896 897 if (sc->sc_demote_cnt) 898 carp_group_demote_adj(&sc->sc_if, -sc->sc_demote_cnt, "detach"); 899 sc->sc_suppress = 0; 900 sc->sc_sendad_errors = 0; 901 902 carp_set_state_all(sc, INIT); 903 sc->sc_if.if_flags &= ~IFF_UP; 904 carp_setrun_all(sc, 0); 905 carp_multicast_cleanup(sc); 906 907 if (sc->ah_cookie != NULL) 908 hook_disestablish(sc->sc_if.if_addrhooks, sc->ah_cookie); 909 910 ifp0 = sc->sc_carpdev; 911 if (ifp0 == NULL) 912 return; 913 914 KERNEL_ASSERT_LOCKED(); /* touching vhif_vrs */ 915 916 cif = (struct carp_if *)ifp0->if_carp; 917 918 /* Restore previous input handler. */ 919 if_ih_remove(ifp0, carp_input, cif); 920 921 s = splnet(); 922 if (sc->lh_cookie != NULL) 923 hook_disestablish(ifp0->if_linkstatehooks, sc->lh_cookie); 924 925 SRPL_REMOVE_LOCKED(&carp_sc_rc, &cif->vhif_vrs, sc, 926 carp_softc, sc_list); 927 if (SRPL_EMPTY_LOCKED(&cif->vhif_vrs)) { 928 ifpromisc(ifp0, 0); 929 ifp0->if_carp = NULL; 930 free(cif, M_IFADDR, sizeof(*cif)); 931 } 932 sc->sc_carpdev = NULL; 933 splx(s); 934 } 935 936 /* Detach an interface from the carp. */ 937 void 938 carp_ifdetach(struct ifnet *ifp0) 939 { 940 struct carp_softc *sc, *nextsc; 941 struct carp_if *cif = (struct carp_if *)ifp0->if_carp; 942 943 KERNEL_ASSERT_LOCKED(); /* touching vhif_vrs */ 944 945 for (sc = SRPL_FIRST_LOCKED(&cif->vhif_vrs); sc != NULL; sc = nextsc) { 946 nextsc = SRPL_NEXT_LOCKED(sc, sc_list); 947 948 carpdetach(sc); /* this can free cif */ 949 } 950 } 951 952 void 953 carp_destroy_vhosts(struct carp_softc *sc) 954 { 955 /* XXX bow out? */ 956 struct carp_vhost_entry *vhe; 957 958 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 959 960 while ((vhe = SRPL_FIRST_LOCKED(&sc->carp_vhosts)) != NULL) { 961 SRPL_REMOVE_LOCKED(&carp_vh_rc, &sc->carp_vhosts, vhe, 962 carp_vhost_entry, vhost_entries); 963 carp_vh_unref(NULL, vhe); /* drop last ref */ 964 } 965 sc->sc_vhe_count = 0; 966 } 967 968 int 969 carp_prepare_ad(struct mbuf *m, struct carp_vhost_entry *vhe, 970 struct carp_header *ch) 971 { 972 if (!vhe->vhe_replay_cookie) { 973 arc4random_buf(&vhe->vhe_replay_cookie, 974 sizeof(vhe->vhe_replay_cookie)); 975 } 976 977 bcopy(&vhe->vhe_replay_cookie, ch->carp_counter, 978 sizeof(ch->carp_counter)); 979 980 /* 981 * For the time being, do not include the IPv6 linklayer addresses 982 * in the HMAC. 983 */ 984 carp_hmac_generate(vhe, ch->carp_counter, ch->carp_md, HMAC_NOV6LL); 985 986 return (0); 987 } 988 989 void 990 carp_send_ad_all(void) 991 { 992 struct ifnet *ifp0; 993 struct carp_if *cif; 994 struct carp_softc *vh; 995 996 KERNEL_ASSERT_LOCKED(); /* touching vhif_vrs */ 997 998 if (carp_send_all_recur > 0) 999 return; 1000 ++carp_send_all_recur; 1001 TAILQ_FOREACH(ifp0, &ifnet, if_list) { 1002 if (ifp0->if_carp == NULL || ifp0->if_type == IFT_CARP) 1003 continue; 1004 1005 cif = (struct carp_if *)ifp0->if_carp; 1006 SRPL_FOREACH_LOCKED(vh, &cif->vhif_vrs, sc_list) { 1007 if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) == 1008 (IFF_UP|IFF_RUNNING)) { 1009 carp_vhe_send_ad_all(vh); 1010 } 1011 } 1012 } 1013 --carp_send_all_recur; 1014 } 1015 1016 void 1017 carp_vhe_send_ad_all(struct carp_softc *sc) 1018 { 1019 struct carp_vhost_entry *vhe; 1020 1021 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 1022 1023 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) { 1024 if (vhe->state == MASTER) 1025 carp_send_ad(vhe); 1026 } 1027 } 1028 1029 void 1030 carp_send_ad(void *v) 1031 { 1032 struct carp_header ch; 1033 struct timeval tv; 1034 struct carp_vhost_entry *vhe = v; 1035 struct carp_softc *sc = vhe->parent_sc; 1036 struct carp_header *ch_ptr; 1037 1038 struct mbuf *m; 1039 int error, len, advbase, advskew, s; 1040 struct ifaddr *ifa; 1041 struct sockaddr sa; 1042 1043 if (sc->sc_carpdev == NULL) { 1044 sc->sc_if.if_oerrors++; 1045 return; 1046 } 1047 1048 s = splsoftnet(); 1049 1050 /* bow out if we've gone to backup (the carp interface is going down) */ 1051 if (sc->sc_bow_out) { 1052 advbase = 255; 1053 advskew = 255; 1054 } else { 1055 advbase = sc->sc_advbase; 1056 advskew = vhe->advskew; 1057 tv.tv_sec = advbase; 1058 if (advbase == 0 && advskew == 0) 1059 tv.tv_usec = 1 * 1000000 / 256; 1060 else 1061 tv.tv_usec = advskew * 1000000 / 256; 1062 } 1063 1064 ch.carp_version = CARP_VERSION; 1065 ch.carp_type = CARP_ADVERTISEMENT; 1066 ch.carp_vhid = vhe->vhid; 1067 ch.carp_demote = carp_group_demote_count(sc) & 0xff; 1068 ch.carp_advbase = advbase; 1069 ch.carp_advskew = advskew; 1070 ch.carp_authlen = 7; /* XXX DEFINE */ 1071 ch.carp_cksum = 0; 1072 1073 sc->cur_vhe = vhe; /* we need the vhe later on the output path */ 1074 1075 if (sc->sc_naddrs) { 1076 struct ip *ip; 1077 1078 MGETHDR(m, M_DONTWAIT, MT_HEADER); 1079 if (m == NULL) { 1080 sc->sc_if.if_oerrors++; 1081 carpstats.carps_onomem++; 1082 /* XXX maybe less ? */ 1083 goto retry_later; 1084 } 1085 len = sizeof(*ip) + sizeof(ch); 1086 m->m_pkthdr.len = len; 1087 m->m_pkthdr.ph_ifidx = 0; 1088 m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain; 1089 m->m_pkthdr.pf.prio = CARP_IFQ_PRIO; 1090 m->m_len = len; 1091 MH_ALIGN(m, m->m_len); 1092 ip = mtod(m, struct ip *); 1093 ip->ip_v = IPVERSION; 1094 ip->ip_hl = sizeof(*ip) >> 2; 1095 ip->ip_tos = IPTOS_LOWDELAY; 1096 ip->ip_len = htons(len); 1097 ip->ip_id = htons(ip_randomid()); 1098 ip->ip_off = htons(IP_DF); 1099 ip->ip_ttl = CARP_DFLTTL; 1100 ip->ip_p = IPPROTO_CARP; 1101 ip->ip_sum = 0; 1102 1103 memset(&sa, 0, sizeof(sa)); 1104 sa.sa_family = AF_INET; 1105 /* Prefer addresses on the parent interface as source for AD. */ 1106 ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev); 1107 if (ifa == NULL) 1108 ifa = ifaof_ifpforaddr(&sa, &sc->sc_if); 1109 KASSERT(ifa != NULL); 1110 ip->ip_src.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr; 1111 ip->ip_dst.s_addr = sc->sc_peer.s_addr; 1112 if (IN_MULTICAST(ip->ip_dst.s_addr)) 1113 m->m_flags |= M_MCAST; 1114 1115 ch_ptr = (struct carp_header *)(ip + 1); 1116 bcopy(&ch, ch_ptr, sizeof(ch)); 1117 if (carp_prepare_ad(m, vhe, ch_ptr)) 1118 goto retry_later; 1119 1120 m->m_data += sizeof(*ip); 1121 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip)); 1122 m->m_data -= sizeof(*ip); 1123 1124 getmicrotime(&sc->sc_if.if_lastchange); 1125 sc->sc_if.if_opackets++; 1126 sc->sc_if.if_obytes += len; 1127 carpstats.carps_opackets++; 1128 1129 error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, 1130 NULL, 0); 1131 if (error) { 1132 if (error == ENOBUFS) 1133 carpstats.carps_onomem++; 1134 else 1135 CARP_LOG(LOG_WARNING, sc, 1136 ("ip_output failed: %d", error)); 1137 sc->sc_if.if_oerrors++; 1138 if (sc->sc_sendad_errors < INT_MAX) 1139 sc->sc_sendad_errors++; 1140 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS(sc)) 1141 carp_group_demote_adj(&sc->sc_if, 1, 1142 "> snderrors"); 1143 sc->sc_sendad_success = 0; 1144 } else { 1145 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS(sc)) { 1146 if (++sc->sc_sendad_success >= 1147 CARP_SENDAD_MIN_SUCCESS(sc)) { 1148 carp_group_demote_adj(&sc->sc_if, -1, 1149 "< snderrors"); 1150 sc->sc_sendad_errors = 0; 1151 } 1152 } else 1153 sc->sc_sendad_errors = 0; 1154 } 1155 if (vhe->vhe_leader) { 1156 if (sc->sc_delayed_arp > 0) 1157 sc->sc_delayed_arp--; 1158 if (sc->sc_delayed_arp == 0) { 1159 carp_send_arp(sc); 1160 sc->sc_delayed_arp = -1; 1161 } 1162 } 1163 } 1164 #ifdef INET6 1165 if (sc->sc_naddrs6) { 1166 struct ip6_hdr *ip6; 1167 1168 MGETHDR(m, M_DONTWAIT, MT_HEADER); 1169 if (m == NULL) { 1170 sc->sc_if.if_oerrors++; 1171 carpstats.carps_onomem++; 1172 /* XXX maybe less ? */ 1173 goto retry_later; 1174 } 1175 len = sizeof(*ip6) + sizeof(ch); 1176 m->m_pkthdr.len = len; 1177 m->m_pkthdr.ph_ifidx = 0; 1178 m->m_pkthdr.pf.prio = CARP_IFQ_PRIO; 1179 m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain; 1180 m->m_len = len; 1181 MH_ALIGN(m, m->m_len); 1182 m->m_flags |= M_MCAST; 1183 ip6 = mtod(m, struct ip6_hdr *); 1184 memset(ip6, 0, sizeof(*ip6)); 1185 ip6->ip6_vfc |= IPV6_VERSION; 1186 ip6->ip6_hlim = CARP_DFLTTL; 1187 ip6->ip6_nxt = IPPROTO_CARP; 1188 1189 /* set the source address */ 1190 memset(&sa, 0, sizeof(sa)); 1191 sa.sa_family = AF_INET6; 1192 /* Prefer addresses on the parent interface as source for AD. */ 1193 ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev); 1194 if (ifa == NULL) 1195 ifa = ifaof_ifpforaddr(&sa, &sc->sc_if); 1196 KASSERT(ifa != NULL); 1197 bcopy(ifatoia6(ifa)->ia_addr.sin6_addr.s6_addr, 1198 &ip6->ip6_src, sizeof(struct in6_addr)); 1199 /* set the multicast destination */ 1200 1201 ip6->ip6_dst.s6_addr16[0] = htons(0xff02); 1202 ip6->ip6_dst.s6_addr16[1] = htons(sc->sc_carpdev->if_index); 1203 ip6->ip6_dst.s6_addr8[15] = 0x12; 1204 1205 ch_ptr = (struct carp_header *)(ip6 + 1); 1206 bcopy(&ch, ch_ptr, sizeof(ch)); 1207 if (carp_prepare_ad(m, vhe, ch_ptr)) 1208 goto retry_later; 1209 1210 m->m_data += sizeof(*ip6); 1211 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip6)); 1212 m->m_data -= sizeof(*ip6); 1213 1214 getmicrotime(&sc->sc_if.if_lastchange); 1215 sc->sc_if.if_opackets++; 1216 sc->sc_if.if_obytes += len; 1217 carpstats.carps_opackets6++; 1218 1219 error = ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL); 1220 if (error) { 1221 if (error == ENOBUFS) 1222 carpstats.carps_onomem++; 1223 else 1224 CARP_LOG(LOG_WARNING, sc, 1225 ("ip6_output failed: %d", error)); 1226 sc->sc_if.if_oerrors++; 1227 if (sc->sc_sendad_errors < INT_MAX) 1228 sc->sc_sendad_errors++; 1229 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS(sc)) 1230 carp_group_demote_adj(&sc->sc_if, 1, 1231 "> snd6errors"); 1232 sc->sc_sendad_success = 0; 1233 } else { 1234 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS(sc)) { 1235 if (++sc->sc_sendad_success >= 1236 CARP_SENDAD_MIN_SUCCESS(sc)) { 1237 carp_group_demote_adj(&sc->sc_if, -1, 1238 "< snd6errors"); 1239 sc->sc_sendad_errors = 0; 1240 } 1241 } else 1242 sc->sc_sendad_errors = 0; 1243 } 1244 } 1245 #endif /* INET6 */ 1246 1247 retry_later: 1248 sc->cur_vhe = NULL; 1249 splx(s); 1250 if (advbase != 255 || advskew != 255) 1251 timeout_add(&vhe->ad_tmo, tvtohz(&tv)); 1252 } 1253 1254 /* 1255 * Broadcast a gratuitous ARP request containing 1256 * the virtual router MAC address for each IP address 1257 * associated with the virtual router. 1258 */ 1259 void 1260 carp_send_arp(struct carp_softc *sc) 1261 { 1262 struct ifaddr *ifa; 1263 in_addr_t in; 1264 int s = splsoftnet(); 1265 1266 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 1267 1268 if (ifa->ifa_addr->sa_family != AF_INET) 1269 continue; 1270 1271 in = ifatoia(ifa)->ia_addr.sin_addr.s_addr; 1272 arprequest(&sc->sc_if, &in, &in, sc->sc_ac.ac_enaddr); 1273 DELAY(1000); /* XXX */ 1274 } 1275 splx(s); 1276 } 1277 1278 #ifdef INET6 1279 void 1280 carp_send_na(struct carp_softc *sc) 1281 { 1282 struct ifaddr *ifa; 1283 struct in6_addr *in6; 1284 static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT; 1285 int s = splsoftnet(); 1286 1287 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 1288 1289 if (ifa->ifa_addr->sa_family != AF_INET6) 1290 continue; 1291 1292 in6 = &ifatoia6(ifa)->ia_addr.sin6_addr; 1293 nd6_na_output(&sc->sc_if, &mcast, in6, 1294 ND_NA_FLAG_OVERRIDE | 1295 (ip6_forwarding ? ND_NA_FLAG_ROUTER : 0), 1, NULL); 1296 DELAY(1000); /* XXX */ 1297 } 1298 splx(s); 1299 } 1300 #endif /* INET6 */ 1301 1302 void 1303 carp_update_lsmask(struct carp_softc *sc) 1304 { 1305 struct carp_vhost_entry *vhe; 1306 int count; 1307 1308 if (sc->sc_balancing == CARP_BAL_NONE) 1309 return; 1310 1311 sc->sc_lsmask = 0; 1312 count = 0; 1313 1314 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 1315 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) { 1316 if (vhe->state == MASTER && count < sizeof(sc->sc_lsmask) * 8) 1317 sc->sc_lsmask |= 1 << count; 1318 count++; 1319 } 1320 sc->sc_lscount = count; 1321 CARP_LOG(LOG_DEBUG, sc, ("carp_update_lsmask: %x", sc->sc_lsmask)); 1322 } 1323 1324 int 1325 carp_iamatch(struct ifnet *ifp) 1326 { 1327 struct carp_softc *sc = ifp->if_softc; 1328 struct carp_vhost_entry *vhe; 1329 struct srp_ref sr; 1330 int match = 0; 1331 1332 vhe = SRPL_ENTER(&sr, &sc->carp_vhosts); /* head */ 1333 if (vhe->state == MASTER) 1334 match = 1; 1335 SRPL_LEAVE(&sr); 1336 1337 return (match); 1338 } 1339 1340 #ifdef INET6 1341 int 1342 carp_iamatch6(struct ifnet *ifp) 1343 { 1344 struct carp_softc *sc = ifp->if_softc; 1345 struct carp_vhost_entry *vhe = SRPL_FIRST_LOCKED(&sc->carp_vhosts); 1346 1347 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 1348 1349 if (vhe->state == MASTER) 1350 return (1); 1351 1352 return (0); 1353 } 1354 #endif /* INET6 */ 1355 1356 struct ifnet * 1357 carp_ourether(void *v, u_int8_t *ena) 1358 { 1359 struct carp_if *cif = (struct carp_if *)v; 1360 struct carp_softc *vh; 1361 1362 KERNEL_ASSERT_LOCKED(); /* touching vhif_vrs + carp_vhosts */ 1363 1364 SRPL_FOREACH_LOCKED(vh, &cif->vhif_vrs, sc_list) { 1365 struct carp_vhost_entry *vhe; 1366 if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) != 1367 (IFF_UP|IFF_RUNNING)) 1368 continue; 1369 vhe = SRPL_FIRST_LOCKED(&vh->carp_vhosts); 1370 if ((vhe->state == MASTER || vh->sc_balancing >= CARP_BAL_IP) && 1371 !memcmp(ena, vh->sc_ac.ac_enaddr, ETHER_ADDR_LEN)) 1372 return (&vh->sc_if); 1373 } 1374 return (NULL); 1375 } 1376 1377 int 1378 carp_vhe_match(struct carp_softc *sc, uint8_t *ena) 1379 { 1380 struct carp_vhost_entry *vhe; 1381 struct srp_ref sr; 1382 int match = 0; 1383 1384 vhe = SRPL_ENTER(&sr, &sc->carp_vhosts); /* head */ 1385 match = (vhe->state == MASTER || sc->sc_balancing >= CARP_BAL_IP) && 1386 !memcmp(ena, sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN); 1387 SRPL_LEAVE(&sr); 1388 1389 return (match); 1390 } 1391 1392 int 1393 carp_input(struct ifnet *ifp0, struct mbuf *m, void *cookie) 1394 { 1395 struct ether_header *eh; 1396 struct mbuf_list ml = MBUF_LIST_INITIALIZER(); 1397 struct carp_if *cif; 1398 struct carp_softc *sc; 1399 struct srp_ref sr; 1400 1401 #if NVLAN > 0 1402 /* 1403 * If the underlying interface removed the VLAN header itself, 1404 * it's not for us. 1405 */ 1406 if (ISSET(m->m_flags, M_VLANTAG)) 1407 return (0); 1408 #endif 1409 1410 eh = mtod(m, struct ether_header *); 1411 cif = (struct carp_if *)cookie; 1412 KASSERT(cif == (struct carp_if *)ifp0->if_carp); 1413 1414 SRPL_FOREACH(sc, &sr, &cif->vhif_vrs, sc_list) { 1415 if ((sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) != 1416 (IFF_UP|IFF_RUNNING)) 1417 continue; 1418 1419 if (carp_vhe_match(sc, eh->ether_dhost)) 1420 break; 1421 } 1422 1423 if (sc == NULL) { 1424 SRPL_LEAVE(&sr); 1425 1426 if (!ETHER_IS_MULTICAST(eh->ether_dhost)) 1427 return (0); 1428 1429 /* 1430 * XXX Should really check the list of multicast addresses 1431 * for each CARP interface _before_ copying. 1432 */ 1433 SRPL_FOREACH(sc, &sr, &cif->vhif_vrs, sc_list) { 1434 struct mbuf *m0; 1435 1436 if (!(sc->sc_if.if_flags & IFF_UP)) 1437 continue; 1438 1439 m0 = m_dup_pkt(m, ETHER_ALIGN, M_DONTWAIT); 1440 if (m0 == NULL) 1441 continue; 1442 1443 ml_init(&ml); 1444 ml_enqueue(&ml, m0); 1445 1446 if_input(&sc->sc_if, &ml); 1447 } 1448 SRPL_LEAVE(&sr); 1449 1450 return (0); 1451 } 1452 1453 /* 1454 * Clear mcast if received on a carp IP balanced address. 1455 */ 1456 if (sc->sc_balancing == CARP_BAL_IP && 1457 ETHER_IS_MULTICAST(eh->ether_dhost)) 1458 *(eh->ether_dhost) &= ~0x01; 1459 1460 ml_enqueue(&ml, m); 1461 if_input(&sc->sc_if, &ml); 1462 SRPL_LEAVE(&sr); 1463 1464 return (1); 1465 } 1466 1467 int 1468 carp_lsdrop(struct mbuf *m, sa_family_t af, u_int32_t *src, u_int32_t *dst) 1469 { 1470 struct ifnet *ifp; 1471 struct carp_softc *sc; 1472 int match = 1; 1473 u_int32_t fold; 1474 1475 ifp = if_get(m->m_pkthdr.ph_ifidx); 1476 KASSERT(ifp != NULL); 1477 1478 sc = ifp->if_softc; 1479 if (sc->sc_balancing == CARP_BAL_NONE) 1480 goto done; 1481 /* 1482 * Never drop carp advertisements. 1483 * XXX Bad idea to pass all broadcast / multicast traffic? 1484 */ 1485 if (m->m_flags & (M_BCAST|M_MCAST)) 1486 goto done; 1487 1488 fold = src[0] ^ dst[0]; 1489 #ifdef INET6 1490 if (af == AF_INET6) { 1491 int i; 1492 for (i = 1; i < 4; i++) 1493 fold ^= src[i] ^ dst[i]; 1494 } 1495 #endif 1496 if (sc->sc_lscount == 0) /* just to be safe */ 1497 match = 0; 1498 else 1499 match = (1 << (ntohl(fold) % sc->sc_lscount)) & sc->sc_lsmask; 1500 1501 done: 1502 if_put(ifp); 1503 return (!match); 1504 } 1505 1506 void 1507 carp_master_down(void *v) 1508 { 1509 struct carp_vhost_entry *vhe = v; 1510 struct carp_softc *sc = vhe->parent_sc; 1511 1512 switch (vhe->state) { 1513 case INIT: 1514 printf("%s: master_down event in INIT state\n", 1515 sc->sc_if.if_xname); 1516 break; 1517 case MASTER: 1518 break; 1519 case BACKUP: 1520 carp_set_state(vhe, MASTER); 1521 carp_send_ad(vhe); 1522 if (sc->sc_balancing == CARP_BAL_NONE && vhe->vhe_leader) { 1523 carp_send_arp(sc); 1524 /* Schedule a delayed ARP to deal w/ some L3 switches */ 1525 sc->sc_delayed_arp = 2; 1526 #ifdef INET6 1527 carp_send_na(sc); 1528 #endif /* INET6 */ 1529 } 1530 carp_setrun(vhe, 0); 1531 carpstats.carps_preempt++; 1532 break; 1533 } 1534 } 1535 1536 void 1537 carp_setrun_all(struct carp_softc *sc, sa_family_t af) 1538 { 1539 struct carp_vhost_entry *vhe; 1540 1541 KERNEL_ASSERT_LOCKED(); /* touching carp_vhost */ 1542 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) { 1543 carp_setrun(vhe, af); 1544 } 1545 } 1546 1547 /* 1548 * When in backup state, af indicates whether to reset the master down timer 1549 * for v4 or v6. If it's set to zero, reset the ones which are already pending. 1550 */ 1551 void 1552 carp_setrun(struct carp_vhost_entry *vhe, sa_family_t af) 1553 { 1554 struct timeval tv; 1555 struct carp_softc *sc = vhe->parent_sc; 1556 1557 if (sc->sc_carpdev == NULL) { 1558 sc->sc_if.if_flags &= ~IFF_RUNNING; 1559 carp_set_state_all(sc, INIT); 1560 return; 1561 } 1562 1563 if (memcmp(((struct arpcom *)sc->sc_carpdev)->ac_enaddr, 1564 sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN) == 0) 1565 sc->sc_realmac = 1; 1566 else 1567 sc->sc_realmac = 0; 1568 1569 if (sc->sc_if.if_flags & IFF_UP && vhe->vhid > 0 && 1570 (sc->sc_naddrs || sc->sc_naddrs6) && !sc->sc_suppress) { 1571 sc->sc_if.if_flags |= IFF_RUNNING; 1572 } else { 1573 sc->sc_if.if_flags &= ~IFF_RUNNING; 1574 return; 1575 } 1576 1577 switch (vhe->state) { 1578 case INIT: 1579 carp_set_state(vhe, BACKUP); 1580 carp_setrun(vhe, 0); 1581 break; 1582 case BACKUP: 1583 timeout_del(&vhe->ad_tmo); 1584 tv.tv_sec = 3 * sc->sc_advbase; 1585 if (sc->sc_advbase == 0 && vhe->advskew == 0) 1586 tv.tv_usec = 3 * 1000000 / 256; 1587 else if (sc->sc_advbase == 0) 1588 tv.tv_usec = 3 * vhe->advskew * 1000000 / 256; 1589 else 1590 tv.tv_usec = vhe->advskew * 1000000 / 256; 1591 if (vhe->vhe_leader) 1592 sc->sc_delayed_arp = -1; 1593 switch (af) { 1594 case AF_INET: 1595 timeout_add(&vhe->md_tmo, tvtohz(&tv)); 1596 break; 1597 #ifdef INET6 1598 case AF_INET6: 1599 timeout_add(&vhe->md6_tmo, tvtohz(&tv)); 1600 break; 1601 #endif /* INET6 */ 1602 default: 1603 if (sc->sc_naddrs) 1604 timeout_add(&vhe->md_tmo, tvtohz(&tv)); 1605 if (sc->sc_naddrs6) 1606 timeout_add(&vhe->md6_tmo, tvtohz(&tv)); 1607 break; 1608 } 1609 break; 1610 case MASTER: 1611 tv.tv_sec = sc->sc_advbase; 1612 if (sc->sc_advbase == 0 && vhe->advskew == 0) 1613 tv.tv_usec = 1 * 1000000 / 256; 1614 else 1615 tv.tv_usec = vhe->advskew * 1000000 / 256; 1616 timeout_add(&vhe->ad_tmo, tvtohz(&tv)); 1617 break; 1618 } 1619 } 1620 1621 void 1622 carp_multicast_cleanup(struct carp_softc *sc) 1623 { 1624 struct ip_moptions *imo = &sc->sc_imo; 1625 #ifdef INET6 1626 struct ip6_moptions *im6o = &sc->sc_im6o; 1627 #endif 1628 u_int16_t n = imo->imo_num_memberships; 1629 1630 /* Clean up our own multicast memberships */ 1631 while (n-- > 0) { 1632 if (imo->imo_membership[n] != NULL) { 1633 in_delmulti(imo->imo_membership[n]); 1634 imo->imo_membership[n] = NULL; 1635 } 1636 } 1637 imo->imo_num_memberships = 0; 1638 imo->imo_ifidx = 0; 1639 1640 #ifdef INET6 1641 while (!LIST_EMPTY(&im6o->im6o_memberships)) { 1642 struct in6_multi_mship *imm = 1643 LIST_FIRST(&im6o->im6o_memberships); 1644 1645 LIST_REMOVE(imm, i6mm_chain); 1646 in6_leavegroup(imm); 1647 } 1648 im6o->im6o_ifidx = 0; 1649 #endif 1650 1651 /* And any other multicast memberships */ 1652 carp_ether_purgemulti(sc); 1653 } 1654 1655 int 1656 carp_set_ifp(struct carp_softc *sc, struct ifnet *ifp0) 1657 { 1658 struct carp_if *cif, *ncif = NULL; 1659 struct carp_softc *vr, *last = NULL, *after = NULL; 1660 int myself = 0, error = 0; 1661 int s; 1662 1663 KASSERT(ifp0 != sc->sc_carpdev); 1664 KERNEL_ASSERT_LOCKED(); /* touching vhif_vrs */ 1665 1666 if ((ifp0->if_flags & IFF_MULTICAST) == 0) 1667 return (EADDRNOTAVAIL); 1668 1669 if (ifp0->if_type == IFT_CARP) 1670 return (EINVAL); 1671 1672 if (ifp0->if_carp == NULL) { 1673 ncif = malloc(sizeof(*cif), M_IFADDR, M_NOWAIT|M_ZERO); 1674 if (ncif == NULL) 1675 return (ENOBUFS); 1676 if ((error = ifpromisc(ifp0, 1))) { 1677 free(ncif, M_IFADDR, sizeof(*ncif)); 1678 return (error); 1679 } 1680 1681 SRPL_INIT(&ncif->vhif_vrs); 1682 } else { 1683 cif = (struct carp_if *)ifp0->if_carp; 1684 if (carp_check_dup_vhids(sc, cif, NULL)) 1685 return (EINVAL); 1686 } 1687 1688 /* detach from old interface */ 1689 if (sc->sc_carpdev != NULL) 1690 carpdetach(sc); 1691 1692 /* attach carp interface to physical interface */ 1693 if (ncif != NULL) 1694 ifp0->if_carp = (caddr_t)ncif; 1695 sc->sc_carpdev = ifp0; 1696 sc->sc_if.if_capabilities = ifp0->if_capabilities & 1697 IFCAP_CSUM_MASK; 1698 cif = (struct carp_if *)ifp0->if_carp; 1699 SRPL_FOREACH_LOCKED(vr, &cif->vhif_vrs, sc_list) { 1700 struct carp_vhost_entry *vrhead, *schead; 1701 last = vr; 1702 1703 if (vr == sc) 1704 myself = 1; 1705 1706 vrhead = SRPL_FIRST_LOCKED(&vr->carp_vhosts); 1707 schead = SRPL_FIRST_LOCKED(&sc->carp_vhosts); 1708 if (vrhead->vhid < schead->vhid) 1709 after = vr; 1710 } 1711 1712 if (!myself) { 1713 /* We're trying to keep things in order */ 1714 if (last == NULL) { 1715 SRPL_INSERT_HEAD_LOCKED(&carp_sc_rc, &cif->vhif_vrs, 1716 sc, sc_list); 1717 } else if (after == NULL) { 1718 SRPL_INSERT_AFTER_LOCKED(&carp_sc_rc, last, 1719 sc, sc_list); 1720 } else { 1721 SRPL_INSERT_AFTER_LOCKED(&carp_sc_rc, after, 1722 sc, sc_list); 1723 } 1724 } 1725 if (sc->sc_naddrs || sc->sc_naddrs6) 1726 sc->sc_if.if_flags |= IFF_UP; 1727 carp_set_enaddr(sc); 1728 1729 sc->lh_cookie = hook_establish(ifp0->if_linkstatehooks, 1, 1730 carp_carpdev_state, ifp0); 1731 1732 /* Change input handler of the physical interface. */ 1733 if_ih_insert(ifp0, carp_input, cif); 1734 1735 s = splnet(); 1736 carp_carpdev_state(ifp0); 1737 splx(s); 1738 1739 return (0); 1740 } 1741 1742 void 1743 carp_set_vhe_enaddr(struct carp_vhost_entry *vhe) 1744 { 1745 struct carp_softc *sc = vhe->parent_sc; 1746 1747 if (vhe->vhid != 0 && sc->sc_carpdev) { 1748 if (vhe->vhe_leader && sc->sc_balancing == CARP_BAL_IP) 1749 vhe->vhe_enaddr[0] = 1; 1750 else 1751 vhe->vhe_enaddr[0] = 0; 1752 vhe->vhe_enaddr[1] = 0; 1753 vhe->vhe_enaddr[2] = 0x5e; 1754 vhe->vhe_enaddr[3] = 0; 1755 vhe->vhe_enaddr[4] = 1; 1756 vhe->vhe_enaddr[5] = vhe->vhid; 1757 } else 1758 memset(vhe->vhe_enaddr, 0, ETHER_ADDR_LEN); 1759 } 1760 1761 void 1762 carp_set_enaddr(struct carp_softc *sc) 1763 { 1764 struct carp_vhost_entry *vhe; 1765 1766 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 1767 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) 1768 carp_set_vhe_enaddr(vhe); 1769 1770 vhe = SRPL_FIRST_LOCKED(&sc->carp_vhosts); 1771 1772 /* 1773 * Use the carp lladdr if the running one isn't manually set. 1774 * Only compare static parts of the lladdr. 1775 */ 1776 if ((memcmp(sc->sc_ac.ac_enaddr + 1, vhe->vhe_enaddr + 1, 1777 ETHER_ADDR_LEN - 2) == 0) || 1778 (!sc->sc_ac.ac_enaddr[0] && !sc->sc_ac.ac_enaddr[1] && 1779 !sc->sc_ac.ac_enaddr[2] && !sc->sc_ac.ac_enaddr[3] && 1780 !sc->sc_ac.ac_enaddr[4] && !sc->sc_ac.ac_enaddr[5])) 1781 bcopy(vhe->vhe_enaddr, sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN); 1782 1783 /* Make sure the enaddr has changed before further twiddling. */ 1784 if (memcmp(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN) != 0) { 1785 bcopy(sc->sc_ac.ac_enaddr, LLADDR(sc->sc_if.if_sadl), 1786 ETHER_ADDR_LEN); 1787 bcopy(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN); 1788 #ifdef INET6 1789 /* 1790 * (re)attach a link-local address which matches 1791 * our new MAC address. 1792 */ 1793 if (sc->sc_naddrs6) 1794 in6_ifattach_linklocal(&sc->sc_if, NULL); 1795 #endif 1796 carp_set_state_all(sc, INIT); 1797 carp_setrun_all(sc, 0); 1798 } 1799 } 1800 1801 void 1802 carp_addr_updated(void *v) 1803 { 1804 struct carp_softc *sc = (struct carp_softc *) v; 1805 struct ifaddr *ifa; 1806 int new_naddrs = 0, new_naddrs6 = 0; 1807 1808 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 1809 if (ifa->ifa_addr->sa_family == AF_INET) 1810 new_naddrs++; 1811 #ifdef INET6 1812 else if (ifa->ifa_addr->sa_family == AF_INET6) 1813 new_naddrs6++; 1814 #endif /* INET6 */ 1815 } 1816 1817 /* We received address changes from if_addrhooks callback */ 1818 if (new_naddrs != sc->sc_naddrs || new_naddrs6 != sc->sc_naddrs6) { 1819 1820 sc->sc_naddrs = new_naddrs; 1821 sc->sc_naddrs6 = new_naddrs6; 1822 1823 /* Re-establish multicast membership removed by in_control */ 1824 if (IN_MULTICAST(sc->sc_peer.s_addr)) { 1825 if (!in_hasmulti(&sc->sc_peer, &sc->sc_if)) { 1826 struct in_multi **imm = 1827 sc->sc_imo.imo_membership; 1828 u_int16_t maxmem = 1829 sc->sc_imo.imo_max_memberships; 1830 1831 memset(&sc->sc_imo, 0, sizeof(sc->sc_imo)); 1832 sc->sc_imo.imo_membership = imm; 1833 sc->sc_imo.imo_max_memberships = maxmem; 1834 1835 if (sc->sc_carpdev != NULL && sc->sc_naddrs > 0) 1836 carp_join_multicast(sc); 1837 } 1838 } 1839 1840 if (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) { 1841 sc->sc_if.if_flags &= ~IFF_UP; 1842 carp_set_state_all(sc, INIT); 1843 } else 1844 carp_hmac_prepare(sc); 1845 } 1846 1847 carp_setrun_all(sc, 0); 1848 } 1849 1850 int 1851 carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin) 1852 { 1853 struct in_addr *in = &sin->sin_addr; 1854 int error; 1855 1856 KASSERT(sc->sc_carpdev != NULL); 1857 1858 /* XXX is this necessary? */ 1859 if (in->s_addr == INADDR_ANY) { 1860 carp_setrun_all(sc, 0); 1861 return (0); 1862 } 1863 1864 if (sc->sc_naddrs == 0 && (error = carp_join_multicast(sc)) != 0) 1865 return (error); 1866 1867 carp_set_state_all(sc, INIT); 1868 1869 return (0); 1870 } 1871 1872 int 1873 carp_join_multicast(struct carp_softc *sc) 1874 { 1875 struct ip_moptions *imo = &sc->sc_imo; 1876 struct in_multi *imm; 1877 struct in_addr addr; 1878 1879 if (!IN_MULTICAST(sc->sc_peer.s_addr)) 1880 return (0); 1881 1882 addr.s_addr = sc->sc_peer.s_addr; 1883 if ((imm = in_addmulti(&addr, &sc->sc_if)) == NULL) 1884 return (ENOBUFS); 1885 1886 imo->imo_membership[0] = imm; 1887 imo->imo_num_memberships = 1; 1888 imo->imo_ifidx = sc->sc_if.if_index; 1889 imo->imo_ttl = CARP_DFLTTL; 1890 imo->imo_loop = 0; 1891 return (0); 1892 } 1893 1894 1895 #ifdef INET6 1896 int 1897 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) 1898 { 1899 int error; 1900 1901 KASSERT(sc->sc_carpdev != NULL); 1902 1903 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 1904 carp_setrun_all(sc, 0); 1905 return (0); 1906 } 1907 1908 if (sc->sc_naddrs6 == 0 && (error = carp_join_multicast6(sc)) != 0) 1909 return (error); 1910 1911 carp_set_state_all(sc, INIT); 1912 1913 return (0); 1914 } 1915 1916 int 1917 carp_join_multicast6(struct carp_softc *sc) 1918 { 1919 struct in6_multi_mship *imm, *imm2; 1920 struct ip6_moptions *im6o = &sc->sc_im6o; 1921 struct sockaddr_in6 addr6; 1922 int error; 1923 1924 /* Join IPv6 CARP multicast group */ 1925 memset(&addr6, 0, sizeof(addr6)); 1926 addr6.sin6_family = AF_INET6; 1927 addr6.sin6_len = sizeof(addr6); 1928 addr6.sin6_addr.s6_addr16[0] = htons(0xff02); 1929 addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index); 1930 addr6.sin6_addr.s6_addr8[15] = 0x12; 1931 if ((imm = in6_joingroup(&sc->sc_if, 1932 &addr6.sin6_addr, &error)) == NULL) { 1933 return (error); 1934 } 1935 /* join solicited multicast address */ 1936 memset(&addr6.sin6_addr, 0, sizeof(addr6.sin6_addr)); 1937 addr6.sin6_addr.s6_addr16[0] = htons(0xff02); 1938 addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index); 1939 addr6.sin6_addr.s6_addr32[1] = 0; 1940 addr6.sin6_addr.s6_addr32[2] = htonl(1); 1941 addr6.sin6_addr.s6_addr32[3] = 0; 1942 addr6.sin6_addr.s6_addr8[12] = 0xff; 1943 if ((imm2 = in6_joingroup(&sc->sc_if, 1944 &addr6.sin6_addr, &error)) == NULL) { 1945 in6_leavegroup(imm); 1946 return (error); 1947 } 1948 1949 /* apply v6 multicast membership */ 1950 im6o->im6o_ifidx = sc->sc_if.if_index; 1951 if (imm) 1952 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, 1953 i6mm_chain); 1954 if (imm2) 1955 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm2, 1956 i6mm_chain); 1957 1958 return (0); 1959 } 1960 1961 #endif /* INET6 */ 1962 1963 int 1964 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr) 1965 { 1966 struct proc *p = curproc; /* XXX */ 1967 struct carp_softc *sc = ifp->if_softc; 1968 struct carp_vhost_entry *vhe; 1969 struct carpreq carpr; 1970 struct ifaddr *ifa = (struct ifaddr *)addr; 1971 struct ifreq *ifr = (struct ifreq *)addr; 1972 struct ifnet *ifp0 = sc->sc_carpdev; 1973 int i, error = 0; 1974 1975 switch (cmd) { 1976 case SIOCSIFADDR: 1977 if (ifp0 == NULL) 1978 return (EINVAL); 1979 1980 switch (ifa->ifa_addr->sa_family) { 1981 case AF_INET: 1982 sc->sc_if.if_flags |= IFF_UP; 1983 error = carp_set_addr(sc, satosin(ifa->ifa_addr)); 1984 break; 1985 #ifdef INET6 1986 case AF_INET6: 1987 sc->sc_if.if_flags |= IFF_UP; 1988 error = carp_set_addr6(sc, satosin6(ifa->ifa_addr)); 1989 break; 1990 #endif /* INET6 */ 1991 default: 1992 error = EAFNOSUPPORT; 1993 break; 1994 } 1995 break; 1996 1997 case SIOCSIFFLAGS: 1998 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 1999 vhe = SRPL_FIRST_LOCKED(&sc->carp_vhosts); 2000 if (vhe->state != INIT && !(ifr->ifr_flags & IFF_UP)) { 2001 carp_del_all_timeouts(sc); 2002 2003 /* we need the interface up to bow out */ 2004 sc->sc_if.if_flags |= IFF_UP; 2005 sc->sc_bow_out = 1; 2006 carp_vhe_send_ad_all(sc); 2007 sc->sc_bow_out = 0; 2008 2009 sc->sc_if.if_flags &= ~IFF_UP; 2010 carp_set_state_all(sc, INIT); 2011 carp_setrun_all(sc, 0); 2012 } else if (vhe->state == INIT && (ifr->ifr_flags & IFF_UP)) { 2013 sc->sc_if.if_flags |= IFF_UP; 2014 carp_setrun_all(sc, 0); 2015 } 2016 break; 2017 2018 case SIOCSVH: 2019 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 2020 vhe = SRPL_FIRST_LOCKED(&sc->carp_vhosts); 2021 if ((error = suser(p, 0)) != 0) 2022 break; 2023 if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr))) 2024 break; 2025 error = 1; 2026 if (carpr.carpr_carpdev[0] != '\0' && 2027 (ifp0 = ifunit(carpr.carpr_carpdev)) == NULL) 2028 return (EINVAL); 2029 if (carpr.carpr_peer.s_addr == 0) 2030 sc->sc_peer.s_addr = INADDR_CARP_GROUP; 2031 else 2032 sc->sc_peer.s_addr = carpr.carpr_peer.s_addr; 2033 if (ifp0 != sc->sc_carpdev) { 2034 if ((error = carp_set_ifp(sc, ifp0))) 2035 return (error); 2036 } 2037 if (vhe->state != INIT && carpr.carpr_state != vhe->state) { 2038 switch (carpr.carpr_state) { 2039 case BACKUP: 2040 timeout_del(&vhe->ad_tmo); 2041 carp_set_state_all(sc, BACKUP); 2042 carp_setrun_all(sc, 0); 2043 break; 2044 case MASTER: 2045 KERNEL_ASSERT_LOCKED(); 2046 /* touching carp_vhosts */ 2047 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, 2048 vhost_entries) 2049 carp_master_down(vhe); 2050 break; 2051 default: 2052 break; 2053 } 2054 } 2055 if ((error = carp_vhids_ioctl(sc, &carpr))) 2056 return (error); 2057 if (carpr.carpr_advbase >= 0) { 2058 if (carpr.carpr_advbase > 255) { 2059 error = EINVAL; 2060 break; 2061 } 2062 sc->sc_advbase = carpr.carpr_advbase; 2063 error--; 2064 } 2065 if (memcmp(sc->sc_advskews, carpr.carpr_advskews, 2066 sizeof(sc->sc_advskews))) { 2067 i = 0; 2068 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 2069 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, 2070 vhost_entries) 2071 vhe->advskew = carpr.carpr_advskews[i++]; 2072 bcopy(carpr.carpr_advskews, sc->sc_advskews, 2073 sizeof(sc->sc_advskews)); 2074 } 2075 if (sc->sc_balancing != carpr.carpr_balancing) { 2076 if (carpr.carpr_balancing > CARP_BAL_MAXID) { 2077 error = EINVAL; 2078 break; 2079 } 2080 sc->sc_balancing = carpr.carpr_balancing; 2081 carp_set_enaddr(sc); 2082 carp_update_lsmask(sc); 2083 } 2084 bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key)); 2085 if (error > 0) 2086 error = EINVAL; 2087 else { 2088 error = 0; 2089 carp_hmac_prepare(sc); 2090 carp_setrun_all(sc, 0); 2091 } 2092 break; 2093 2094 case SIOCGVH: 2095 memset(&carpr, 0, sizeof(carpr)); 2096 if (ifp0 != NULL) 2097 strlcpy(carpr.carpr_carpdev, ifp0->if_xname, IFNAMSIZ); 2098 i = 0; 2099 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 2100 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) { 2101 carpr.carpr_vhids[i] = vhe->vhid; 2102 carpr.carpr_advskews[i] = vhe->advskew; 2103 carpr.carpr_states[i] = vhe->state; 2104 i++; 2105 } 2106 carpr.carpr_advbase = sc->sc_advbase; 2107 carpr.carpr_balancing = sc->sc_balancing; 2108 if (suser(p, 0) == 0) 2109 bcopy(sc->sc_key, carpr.carpr_key, 2110 sizeof(carpr.carpr_key)); 2111 carpr.carpr_peer.s_addr = sc->sc_peer.s_addr; 2112 error = copyout(&carpr, ifr->ifr_data, sizeof(carpr)); 2113 break; 2114 2115 case SIOCADDMULTI: 2116 error = carp_ether_addmulti(sc, ifr); 2117 break; 2118 2119 case SIOCDELMULTI: 2120 error = carp_ether_delmulti(sc, ifr); 2121 break; 2122 case SIOCAIFGROUP: 2123 case SIOCDIFGROUP: 2124 if (sc->sc_demote_cnt) 2125 carp_ifgroup_ioctl(ifp, cmd, addr); 2126 break; 2127 case SIOCSIFGATTR: 2128 carp_ifgattr_ioctl(ifp, cmd, addr); 2129 break; 2130 default: 2131 error = ENOTTY; 2132 } 2133 2134 if (memcmp(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN) != 0) 2135 carp_set_enaddr(sc); 2136 return (error); 2137 } 2138 2139 int 2140 carp_check_dup_vhids(struct carp_softc *sc, struct carp_if *cif, 2141 struct carpreq *carpr) 2142 { 2143 struct carp_softc *vr; 2144 struct carp_vhost_entry *vhe, *vhe0; 2145 int i; 2146 2147 KERNEL_ASSERT_LOCKED(); /* touching vhif_vrs + carp_vhosts */ 2148 2149 SRPL_FOREACH_LOCKED(vr, &cif->vhif_vrs, sc_list) { 2150 if (vr == sc) 2151 continue; 2152 SRPL_FOREACH_LOCKED(vhe, &vr->carp_vhosts, vhost_entries) { 2153 if (carpr) { 2154 for (i = 0; carpr->carpr_vhids[i]; i++) { 2155 if (vhe->vhid == carpr->carpr_vhids[i]) 2156 return (EINVAL); 2157 } 2158 } 2159 SRPL_FOREACH_LOCKED(vhe0, &sc->carp_vhosts, 2160 vhost_entries) { 2161 if (vhe->vhid == vhe0->vhid) 2162 return (EINVAL); 2163 } 2164 } 2165 } 2166 return (0); 2167 } 2168 2169 int 2170 carp_vhids_ioctl(struct carp_softc *sc, struct carpreq *carpr) 2171 { 2172 int i, j; 2173 u_int8_t taken_vhids[256]; 2174 2175 if (carpr->carpr_vhids[0] == 0 || 2176 !memcmp(sc->sc_vhids, carpr->carpr_vhids, sizeof(sc->sc_vhids))) 2177 return (0); 2178 2179 memset(taken_vhids, 0, sizeof(taken_vhids)); 2180 for (i = 0; carpr->carpr_vhids[i]; i++) { 2181 if (taken_vhids[carpr->carpr_vhids[i]]) 2182 return (EINVAL); 2183 taken_vhids[carpr->carpr_vhids[i]] = 1; 2184 2185 if (sc->sc_carpdev) { 2186 struct carp_if *cif; 2187 cif = (struct carp_if *)sc->sc_carpdev->if_carp; 2188 if (carp_check_dup_vhids(sc, cif, carpr)) 2189 return (EINVAL); 2190 } 2191 if (carpr->carpr_advskews[i] >= 255) 2192 return (EINVAL); 2193 } 2194 /* set sane balancing defaults */ 2195 if (i <= 1) 2196 carpr->carpr_balancing = CARP_BAL_NONE; 2197 else if (carpr->carpr_balancing == CARP_BAL_NONE && 2198 sc->sc_balancing == CARP_BAL_NONE) 2199 carpr->carpr_balancing = CARP_BAL_IP; 2200 2201 /* destroy all */ 2202 carp_del_all_timeouts(sc); 2203 carp_destroy_vhosts(sc); 2204 memset(sc->sc_vhids, 0, sizeof(sc->sc_vhids)); 2205 2206 /* sort vhosts list by vhid */ 2207 for (j = 1; j <= 255; j++) { 2208 for (i = 0; carpr->carpr_vhids[i]; i++) { 2209 if (carpr->carpr_vhids[i] != j) 2210 continue; 2211 if (carp_new_vhost(sc, carpr->carpr_vhids[i], 2212 carpr->carpr_advskews[i])) 2213 return (ENOMEM); 2214 sc->sc_vhids[i] = carpr->carpr_vhids[i]; 2215 sc->sc_advskews[i] = carpr->carpr_advskews[i]; 2216 } 2217 } 2218 carp_set_enaddr(sc); 2219 carp_set_state_all(sc, INIT); 2220 return (0); 2221 } 2222 2223 void 2224 carp_ifgroup_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr) 2225 { 2226 struct ifgroupreq *ifgr = (struct ifgroupreq *)addr; 2227 struct ifg_list *ifgl; 2228 int *dm, adj; 2229 2230 if (!strcmp(ifgr->ifgr_group, IFG_ALL)) 2231 return; 2232 adj = ((struct carp_softc *)ifp->if_softc)->sc_demote_cnt; 2233 if (cmd == SIOCDIFGROUP) 2234 adj = adj * -1; 2235 2236 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) 2237 if (!strcmp(ifgl->ifgl_group->ifg_group, ifgr->ifgr_group)) { 2238 dm = &ifgl->ifgl_group->ifg_carp_demoted; 2239 if (*dm + adj >= 0) 2240 *dm += adj; 2241 else 2242 *dm = 0; 2243 } 2244 } 2245 2246 void 2247 carp_ifgattr_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr) 2248 { 2249 struct ifgroupreq *ifgr = (struct ifgroupreq *)addr; 2250 struct carp_softc *sc = ifp->if_softc; 2251 2252 if (ifgr->ifgr_attrib.ifg_carp_demoted > 0 && (sc->sc_if.if_flags & 2253 (IFF_UP|IFF_RUNNING)) == (IFF_UP|IFF_RUNNING)) 2254 carp_vhe_send_ad_all(sc); 2255 } 2256 2257 void 2258 carp_start(struct ifnet *ifp) 2259 { 2260 struct carp_softc *sc = ifp->if_softc; 2261 struct mbuf *m; 2262 2263 for (;;) { 2264 IFQ_DEQUEUE(&ifp->if_snd, m); 2265 if (m == NULL) 2266 break; 2267 2268 #if NBPFILTER > 0 2269 if (ifp->if_bpf) 2270 bpf_mtap_ether(ifp->if_bpf, m, BPF_DIRECTION_OUT); 2271 #endif /* NBPFILTER > 0 */ 2272 2273 if ((ifp->if_carpdev->if_flags & (IFF_UP|IFF_RUNNING)) != 2274 (IFF_UP|IFF_RUNNING)) { 2275 ifp->if_oerrors++; 2276 m_freem(m); 2277 continue; 2278 } 2279 2280 /* 2281 * Do not leak the multicast address when sending 2282 * advertisements in 'ip' and 'ip-stealth' balacing 2283 * modes. 2284 */ 2285 if (sc->sc_balancing == CARP_BAL_IP || 2286 sc->sc_balancing == CARP_BAL_IPSTEALTH) { 2287 struct ether_header *eh; 2288 uint8_t *esrc; 2289 2290 eh = mtod(m, struct ether_header *); 2291 esrc = ((struct arpcom*)ifp->if_carpdev)->ac_enaddr;; 2292 memcpy(eh->ether_shost, esrc, sizeof(eh->ether_shost)); 2293 } 2294 2295 if (if_enqueue(ifp->if_carpdev, m)) { 2296 ifp->if_oerrors++; 2297 continue; 2298 } 2299 ifp->if_opackets++; 2300 } 2301 } 2302 2303 int 2304 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa, 2305 struct rtentry *rt) 2306 { 2307 struct carp_softc *sc = ((struct carp_softc *)ifp->if_softc); 2308 struct carp_vhost_entry *vhe; 2309 struct srp_ref sr; 2310 int ismaster; 2311 2312 KASSERT(sc->sc_carpdev != NULL); 2313 2314 if (sc->cur_vhe == NULL) { 2315 vhe = SRPL_ENTER(&sr, &sc->carp_vhosts); /* head */ 2316 ismaster = (vhe->state == MASTER); 2317 SRPL_LEAVE(&sr); 2318 } else { 2319 ismaster = (sc->cur_vhe->state == MASTER); 2320 } 2321 2322 if ((sc->sc_balancing == CARP_BAL_NONE && !ismaster)) { 2323 m_freem(m); 2324 return (ENETUNREACH); 2325 } 2326 2327 return (ether_output(ifp, m, sa, rt)); 2328 } 2329 2330 void 2331 carp_set_state_all(struct carp_softc *sc, int state) 2332 { 2333 struct carp_vhost_entry *vhe; 2334 2335 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 2336 2337 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) { 2338 if (vhe->state == state) 2339 continue; 2340 2341 carp_set_state(vhe, state); 2342 } 2343 } 2344 2345 void 2346 carp_set_state(struct carp_vhost_entry *vhe, int state) 2347 { 2348 struct carp_softc *sc = vhe->parent_sc; 2349 static const char *carp_states[] = { CARP_STATES }; 2350 int loglevel; 2351 2352 KASSERT(vhe->state != state); 2353 2354 if (vhe->state == INIT || state == INIT) 2355 loglevel = LOG_WARNING; 2356 else 2357 loglevel = LOG_CRIT; 2358 2359 if (sc->sc_vhe_count > 1) 2360 CARP_LOG(loglevel, sc, 2361 ("state transition (vhid %d): %s -> %s", vhe->vhid, 2362 carp_states[vhe->state], carp_states[state])); 2363 else 2364 CARP_LOG(loglevel, sc, 2365 ("state transition: %s -> %s", 2366 carp_states[vhe->state], carp_states[state])); 2367 2368 vhe->state = state; 2369 carp_update_lsmask(sc); 2370 2371 /* only the master vhe creates link state messages */ 2372 if (!vhe->vhe_leader) 2373 return; 2374 2375 switch (state) { 2376 case BACKUP: 2377 sc->sc_if.if_link_state = LINK_STATE_DOWN; 2378 break; 2379 case MASTER: 2380 sc->sc_if.if_link_state = LINK_STATE_UP; 2381 break; 2382 default: 2383 sc->sc_if.if_link_state = LINK_STATE_INVALID; 2384 break; 2385 } 2386 if_link_state_change(&sc->sc_if); 2387 } 2388 2389 void 2390 carp_group_demote_adj(struct ifnet *ifp, int adj, char *reason) 2391 { 2392 struct ifg_list *ifgl; 2393 int *dm, need_ad; 2394 struct carp_softc *nil = NULL; 2395 2396 if (ifp->if_type == IFT_CARP) { 2397 dm = &((struct carp_softc *)ifp->if_softc)->sc_demote_cnt; 2398 if (*dm + adj >= 0) 2399 *dm += adj; 2400 else 2401 *dm = 0; 2402 } 2403 2404 need_ad = 0; 2405 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) { 2406 if (!strcmp(ifgl->ifgl_group->ifg_group, IFG_ALL)) 2407 continue; 2408 dm = &ifgl->ifgl_group->ifg_carp_demoted; 2409 2410 if (*dm + adj >= 0) 2411 *dm += adj; 2412 else 2413 *dm = 0; 2414 2415 if (adj > 0 && *dm == 1) 2416 need_ad = 1; 2417 CARP_LOG(LOG_ERR, nil, 2418 ("%s demoted group %s by %d to %d (%s)", 2419 ifp->if_xname, ifgl->ifgl_group->ifg_group, 2420 adj, *dm, reason)); 2421 } 2422 if (need_ad) 2423 carp_send_ad_all(); 2424 } 2425 2426 int 2427 carp_group_demote_count(struct carp_softc *sc) 2428 { 2429 struct ifg_list *ifgl; 2430 int count = 0; 2431 2432 TAILQ_FOREACH(ifgl, &sc->sc_if.if_groups, ifgl_next) 2433 count += ifgl->ifgl_group->ifg_carp_demoted; 2434 2435 if (count == 0 && sc->sc_demote_cnt) 2436 count = sc->sc_demote_cnt; 2437 2438 return (count > 255 ? 255 : count); 2439 } 2440 2441 void 2442 carp_carpdev_state(void *v) 2443 { 2444 struct carp_if *cif; 2445 struct carp_softc *sc; 2446 struct ifnet *ifp0 = v; 2447 2448 if (ifp0->if_type == IFT_CARP) 2449 return; 2450 2451 cif = (struct carp_if *)ifp0->if_carp; 2452 2453 KERNEL_ASSERT_LOCKED(); /* touching vhif_vrs */ 2454 2455 SRPL_FOREACH_LOCKED(sc, &cif->vhif_vrs, sc_list) { 2456 int suppressed = sc->sc_suppress; 2457 2458 if (sc->sc_carpdev->if_link_state == LINK_STATE_DOWN || 2459 !(sc->sc_carpdev->if_flags & IFF_UP)) { 2460 sc->sc_if.if_flags &= ~IFF_RUNNING; 2461 carp_del_all_timeouts(sc); 2462 carp_set_state_all(sc, INIT); 2463 sc->sc_suppress = 1; 2464 carp_setrun_all(sc, 0); 2465 if (!suppressed) 2466 carp_group_demote_adj(&sc->sc_if, 1, "carpdev"); 2467 } else if (suppressed) { 2468 carp_set_state_all(sc, INIT); 2469 sc->sc_suppress = 0; 2470 carp_setrun_all(sc, 0); 2471 carp_group_demote_adj(&sc->sc_if, -1, "carpdev"); 2472 } 2473 } 2474 } 2475 2476 int 2477 carp_ether_addmulti(struct carp_softc *sc, struct ifreq *ifr) 2478 { 2479 struct ifnet *ifp0; 2480 struct carp_mc_entry *mc; 2481 u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN]; 2482 int error; 2483 2484 ifp0 = sc->sc_carpdev; 2485 if (ifp0 == NULL) 2486 return (EINVAL); 2487 2488 error = ether_addmulti(ifr, (struct arpcom *)&sc->sc_ac); 2489 if (error != ENETRESET) 2490 return (error); 2491 2492 /* 2493 * This is new multicast address. We have to tell parent 2494 * about it. Also, remember this multicast address so that 2495 * we can delete them on unconfigure. 2496 */ 2497 mc = malloc(sizeof(*mc), M_DEVBUF, M_NOWAIT); 2498 if (mc == NULL) { 2499 error = ENOMEM; 2500 goto alloc_failed; 2501 } 2502 2503 /* 2504 * As ether_addmulti() returns ENETRESET, following two 2505 * statement shouldn't fail. 2506 */ 2507 (void)ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi); 2508 ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, mc->mc_enm); 2509 memcpy(&mc->mc_addr, &ifr->ifr_addr, ifr->ifr_addr.sa_len); 2510 LIST_INSERT_HEAD(&sc->carp_mc_listhead, mc, mc_entries); 2511 2512 error = (*ifp0->if_ioctl)(ifp0, SIOCADDMULTI, (caddr_t)ifr); 2513 if (error != 0) 2514 goto ioctl_failed; 2515 2516 return (error); 2517 2518 ioctl_failed: 2519 LIST_REMOVE(mc, mc_entries); 2520 free(mc, M_DEVBUF, sizeof(*mc)); 2521 alloc_failed: 2522 (void)ether_delmulti(ifr, (struct arpcom *)&sc->sc_ac); 2523 2524 return (error); 2525 } 2526 2527 int 2528 carp_ether_delmulti(struct carp_softc *sc, struct ifreq *ifr) 2529 { 2530 struct ifnet *ifp0; 2531 struct ether_multi *enm; 2532 struct carp_mc_entry *mc; 2533 u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN]; 2534 int error; 2535 2536 ifp0 = sc->sc_carpdev; 2537 if (ifp0 == NULL) 2538 return (EINVAL); 2539 2540 /* 2541 * Find a key to lookup carp_mc_entry. We have to do this 2542 * before calling ether_delmulti for obvious reason. 2543 */ 2544 if ((error = ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi)) != 0) 2545 return (error); 2546 ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, enm); 2547 if (enm == NULL) 2548 return (EINVAL); 2549 2550 LIST_FOREACH(mc, &sc->carp_mc_listhead, mc_entries) 2551 if (mc->mc_enm == enm) 2552 break; 2553 2554 /* We won't delete entries we didn't add */ 2555 if (mc == NULL) 2556 return (EINVAL); 2557 2558 error = ether_delmulti(ifr, (struct arpcom *)&sc->sc_ac); 2559 if (error != ENETRESET) 2560 return (error); 2561 2562 /* We no longer use this multicast address. Tell parent so. */ 2563 error = (*ifp0->if_ioctl)(ifp0, SIOCDELMULTI, (caddr_t)ifr); 2564 if (error == 0) { 2565 /* And forget about this address. */ 2566 LIST_REMOVE(mc, mc_entries); 2567 free(mc, M_DEVBUF, sizeof(*mc)); 2568 } else 2569 (void)ether_addmulti(ifr, (struct arpcom *)&sc->sc_ac); 2570 return (error); 2571 } 2572 2573 /* 2574 * Delete any multicast address we have asked to add from parent 2575 * interface. Called when the carp is being unconfigured. 2576 */ 2577 void 2578 carp_ether_purgemulti(struct carp_softc *sc) 2579 { 2580 struct ifnet *ifp0 = sc->sc_carpdev; /* Parent. */ 2581 struct carp_mc_entry *mc; 2582 union { 2583 struct ifreq ifreq; 2584 struct { 2585 char ifr_name[IFNAMSIZ]; 2586 struct sockaddr_storage ifr_ss; 2587 } ifreq_storage; 2588 } u; 2589 struct ifreq *ifr = &u.ifreq; 2590 2591 if (ifp0 == NULL) 2592 return; 2593 2594 memcpy(ifr->ifr_name, ifp0->if_xname, IFNAMSIZ); 2595 while ((mc = LIST_FIRST(&sc->carp_mc_listhead)) != NULL) { 2596 memcpy(&ifr->ifr_addr, &mc->mc_addr, mc->mc_addr.ss_len); 2597 (void)(*ifp0->if_ioctl)(ifp0, SIOCDELMULTI, (caddr_t)ifr); 2598 LIST_REMOVE(mc, mc_entries); 2599 free(mc, M_DEVBUF, sizeof(*mc)); 2600 } 2601 } 2602 2603 void 2604 carp_vh_ref(void *null, void *v) 2605 { 2606 struct carp_vhost_entry *vhe = v; 2607 2608 refcnt_take(&vhe->vhost_refcnt); 2609 } 2610 2611 void 2612 carp_vh_unref(void *null, void *v) 2613 { 2614 struct carp_vhost_entry *vhe = v; 2615 2616 if (refcnt_rele(&vhe->vhost_refcnt)) { 2617 carp_sc_unref(NULL, vhe->parent_sc); 2618 free(vhe, M_DEVBUF, sizeof(*vhe)); 2619 } 2620 } 2621 2622 void 2623 carp_sc_ref(void *null, void *s) 2624 { 2625 struct carp_softc *sc = s; 2626 2627 refcnt_take(&sc->sc_refcnt); 2628 } 2629 2630 void 2631 carp_sc_unref(void *null, void *s) 2632 { 2633 struct carp_softc *sc = s; 2634 2635 refcnt_rele_wake(&sc->sc_refcnt); 2636 } 2637