1 /* $OpenBSD: ip_carp.c,v 1.342 2019/11/08 07:51:41 dlg Exp $ */ 2 3 /* 4 * Copyright (c) 2002 Michael Shalayeff. All rights reserved. 5 * Copyright (c) 2003 Ryan McBride. All rights reserved. 6 * Copyright (c) 2006-2008 Marco Pfatschbacher. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 21 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 25 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 26 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 27 * THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 /* 31 * TODO: 32 * - iface reconfigure 33 * - support for hardware checksum calculations; 34 * 35 */ 36 37 #include "ether.h" 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/mbuf.h> 42 #include <sys/socket.h> 43 #include <sys/socketvar.h> 44 #include <sys/timeout.h> 45 #include <sys/ioctl.h> 46 #include <sys/errno.h> 47 #include <sys/device.h> 48 #include <sys/kernel.h> 49 #include <sys/sysctl.h> 50 #include <sys/syslog.h> 51 #include <sys/refcnt.h> 52 53 #include <net/if.h> 54 #include <net/if_var.h> 55 #include <net/if_types.h> 56 #include <net/netisr.h> 57 58 #include <crypto/sha1.h> 59 60 #include <netinet/in.h> 61 #include <netinet/in_var.h> 62 #include <netinet/ip.h> 63 #include <netinet/ip_var.h> 64 #include <netinet/if_ether.h> 65 #include <netinet/ip_ipsp.h> 66 67 #include <net/if_dl.h> 68 69 #ifdef INET6 70 #include <netinet6/in6_var.h> 71 #include <netinet/icmp6.h> 72 #include <netinet/ip6.h> 73 #include <netinet6/ip6_var.h> 74 #include <netinet6/nd6.h> 75 #include <netinet6/in6_ifattach.h> 76 #endif 77 78 #include "bpfilter.h" 79 #if NBPFILTER > 0 80 #include <net/bpf.h> 81 #endif 82 83 #include "vlan.h" 84 #if NVLAN > 0 85 #include <net/if_vlan_var.h> 86 #endif 87 88 #include <netinet/ip_carp.h> 89 90 struct carp_mc_entry { 91 LIST_ENTRY(carp_mc_entry) mc_entries; 92 union { 93 struct ether_multi *mcu_enm; 94 } mc_u; 95 struct sockaddr_storage mc_addr; 96 }; 97 #define mc_enm mc_u.mcu_enm 98 99 enum { HMAC_ORIG=0, HMAC_NOV6LL=1, HMAC_MAX=2 }; 100 101 struct carp_vhost_entry { 102 SRPL_ENTRY(carp_vhost_entry) vhost_entries; 103 struct refcnt vhost_refcnt; 104 105 struct carp_softc *parent_sc; 106 int vhe_leader; 107 int vhid; 108 int advskew; 109 enum { INIT = 0, BACKUP, MASTER } state; 110 struct timeout ad_tmo; /* advertisement timeout */ 111 struct timeout md_tmo; /* master down timeout */ 112 struct timeout md6_tmo; /* master down timeout */ 113 114 u_int64_t vhe_replay_cookie; 115 116 /* authentication */ 117 #define CARP_HMAC_PAD 64 118 unsigned char vhe_pad[CARP_HMAC_PAD]; 119 SHA1_CTX vhe_sha1[HMAC_MAX]; 120 121 u_int8_t vhe_enaddr[ETHER_ADDR_LEN]; 122 }; 123 124 void carp_vh_ref(void *, void *); 125 void carp_vh_unref(void *, void *); 126 127 struct srpl_rc carp_vh_rc = 128 SRPL_RC_INITIALIZER(carp_vh_ref, carp_vh_unref, NULL); 129 130 struct carp_softc { 131 struct arpcom sc_ac; 132 #define sc_if sc_ac.ac_if 133 #define sc_carpdev sc_ac.ac_if.if_carpdev 134 struct task sc_atask; 135 struct task sc_ltask; 136 struct task sc_dtask; 137 struct ip_moptions sc_imo; 138 #ifdef INET6 139 struct ip6_moptions sc_im6o; 140 #endif /* INET6 */ 141 142 SRPL_ENTRY(carp_softc) sc_list; 143 struct refcnt sc_refcnt; 144 145 int sc_suppress; 146 int sc_bow_out; 147 int sc_demote_cnt; 148 149 int sc_sendad_errors; 150 #define CARP_SENDAD_MAX_ERRORS(sc) (3 * (sc)->sc_vhe_count) 151 int sc_sendad_success; 152 #define CARP_SENDAD_MIN_SUCCESS(sc) (3 * (sc)->sc_vhe_count) 153 154 char sc_curlladdr[ETHER_ADDR_LEN]; 155 156 SRPL_HEAD(, carp_vhost_entry) carp_vhosts; 157 int sc_vhe_count; 158 u_int8_t sc_vhids[CARP_MAXNODES]; 159 u_int8_t sc_advskews[CARP_MAXNODES]; 160 u_int8_t sc_balancing; 161 162 int sc_naddrs; 163 int sc_naddrs6; 164 int sc_advbase; /* seconds */ 165 166 /* authentication */ 167 unsigned char sc_key[CARP_KEY_LEN]; 168 169 u_int32_t sc_hashkey[2]; 170 u_int32_t sc_lsmask; /* load sharing mask */ 171 int sc_lscount; /* # load sharing interfaces (max 32) */ 172 int sc_delayed_arp; /* delayed ARP request countdown */ 173 int sc_realmac; /* using real mac */ 174 175 struct in_addr sc_peer; 176 177 LIST_HEAD(__carp_mchead, carp_mc_entry) carp_mc_listhead; 178 struct carp_vhost_entry *cur_vhe; /* current active vhe */ 179 }; 180 181 void carp_sc_ref(void *, void *); 182 void carp_sc_unref(void *, void *); 183 184 struct srpl_rc carp_sc_rc = 185 SRPL_RC_INITIALIZER(carp_sc_ref, carp_sc_unref, NULL); 186 187 int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, LOG_CRIT }; /* XXX for now */ 188 struct cpumem *carpcounters; 189 190 int carp_send_all_recur = 0; 191 192 #define CARP_LOG(l, sc, s) \ 193 do { \ 194 if (carp_opts[CARPCTL_LOG] >= l) { \ 195 if (sc) \ 196 log(l, "%s: ", \ 197 (sc)->sc_if.if_xname); \ 198 else \ 199 log(l, "carp: "); \ 200 addlog s; \ 201 addlog("\n"); \ 202 } \ 203 } while (0) 204 205 void carp_hmac_prepare(struct carp_softc *); 206 void carp_hmac_prepare_ctx(struct carp_vhost_entry *, u_int8_t); 207 void carp_hmac_generate(struct carp_vhost_entry *, u_int32_t *, 208 unsigned char *, u_int8_t); 209 int carp_hmac_verify(struct carp_vhost_entry *, u_int32_t *, 210 unsigned char *); 211 int carp_input(struct ifnet *, struct mbuf *, void *); 212 void carp_proto_input_c(struct ifnet *, struct mbuf *, 213 struct carp_header *, int, sa_family_t); 214 int carp_proto_input_if(struct ifnet *, struct mbuf **, int *, int); 215 #ifdef INET6 216 int carp6_proto_input_if(struct ifnet *, struct mbuf **, int *, int); 217 #endif 218 void carpattach(int); 219 void carpdetach(void *); 220 void carp_prepare_ad(struct mbuf *, struct carp_vhost_entry *, 221 struct carp_header *); 222 void carp_send_ad_all(void); 223 void carp_vhe_send_ad_all(struct carp_softc *); 224 void carp_timer_ad(void *); 225 void carp_send_ad(struct carp_vhost_entry *); 226 void carp_send_arp(struct carp_softc *); 227 void carp_timer_down(void *); 228 void carp_master_down(struct carp_vhost_entry *); 229 int carp_ioctl(struct ifnet *, u_long, caddr_t); 230 int carp_vhids_ioctl(struct carp_softc *, struct carpreq *); 231 int carp_check_dup_vhids(struct carp_softc *, struct srpl *, 232 struct carpreq *); 233 void carp_ifgroup_ioctl(struct ifnet *, u_long, caddr_t); 234 void carp_ifgattr_ioctl(struct ifnet *, u_long, caddr_t); 235 void carp_start(struct ifnet *); 236 void carp_setrun_all(struct carp_softc *, sa_family_t); 237 void carp_setrun(struct carp_vhost_entry *, sa_family_t); 238 void carp_set_state_all(struct carp_softc *, int); 239 void carp_set_state(struct carp_vhost_entry *, int); 240 void carp_multicast_cleanup(struct carp_softc *); 241 int carp_set_ifp(struct carp_softc *, struct ifnet *); 242 void carp_set_enaddr(struct carp_softc *); 243 void carp_set_vhe_enaddr(struct carp_vhost_entry *); 244 void carp_addr_updated(void *); 245 int carp_set_addr(struct carp_softc *, struct sockaddr_in *); 246 int carp_join_multicast(struct carp_softc *); 247 #ifdef INET6 248 void carp_send_na(struct carp_softc *); 249 int carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *); 250 int carp_join_multicast6(struct carp_softc *); 251 #endif 252 int carp_clone_create(struct if_clone *, int); 253 int carp_clone_destroy(struct ifnet *); 254 int carp_ether_addmulti(struct carp_softc *, struct ifreq *); 255 int carp_ether_delmulti(struct carp_softc *, struct ifreq *); 256 void carp_ether_purgemulti(struct carp_softc *); 257 int carp_group_demote_count(struct carp_softc *); 258 void carp_update_lsmask(struct carp_softc *); 259 int carp_new_vhost(struct carp_softc *, int, int); 260 void carp_destroy_vhosts(struct carp_softc *); 261 void carp_del_all_timeouts(struct carp_softc *); 262 int carp_vhe_match(struct carp_softc *, uint8_t *); 263 264 struct if_clone carp_cloner = 265 IF_CLONE_INITIALIZER("carp", carp_clone_create, carp_clone_destroy); 266 267 #define carp_cksum(_m, _l) ((u_int16_t)in_cksum((_m), (_l))) 268 #define CARP_IFQ_PRIO 6 269 270 void 271 carp_hmac_prepare(struct carp_softc *sc) 272 { 273 struct carp_vhost_entry *vhe; 274 u_int8_t i; 275 276 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 277 278 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) { 279 for (i = 0; i < HMAC_MAX; i++) { 280 carp_hmac_prepare_ctx(vhe, i); 281 } 282 } 283 } 284 285 void 286 carp_hmac_prepare_ctx(struct carp_vhost_entry *vhe, u_int8_t ctx) 287 { 288 struct carp_softc *sc = vhe->parent_sc; 289 290 u_int8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT; 291 u_int8_t vhid = vhe->vhid & 0xff; 292 SHA1_CTX sha1ctx; 293 u_int32_t kmd[5]; 294 struct ifaddr *ifa; 295 int i, found; 296 struct in_addr last, cur, in; 297 #ifdef INET6 298 struct in6_addr last6, cur6, in6; 299 #endif /* INET6 */ 300 301 /* compute ipad from key */ 302 memset(vhe->vhe_pad, 0, sizeof(vhe->vhe_pad)); 303 bcopy(sc->sc_key, vhe->vhe_pad, sizeof(sc->sc_key)); 304 for (i = 0; i < sizeof(vhe->vhe_pad); i++) 305 vhe->vhe_pad[i] ^= 0x36; 306 307 /* precompute first part of inner hash */ 308 SHA1Init(&vhe->vhe_sha1[ctx]); 309 SHA1Update(&vhe->vhe_sha1[ctx], vhe->vhe_pad, sizeof(vhe->vhe_pad)); 310 SHA1Update(&vhe->vhe_sha1[ctx], (void *)&version, sizeof(version)); 311 SHA1Update(&vhe->vhe_sha1[ctx], (void *)&type, sizeof(type)); 312 313 /* generate a key for the arpbalance hash, before the vhid is hashed */ 314 if (vhe->vhe_leader) { 315 bcopy(&vhe->vhe_sha1[ctx], &sha1ctx, sizeof(sha1ctx)); 316 SHA1Final((unsigned char *)kmd, &sha1ctx); 317 sc->sc_hashkey[0] = kmd[0] ^ kmd[1]; 318 sc->sc_hashkey[1] = kmd[2] ^ kmd[3]; 319 } 320 321 /* the rest of the precomputation */ 322 if (!sc->sc_realmac && vhe->vhe_leader && 323 memcmp(sc->sc_ac.ac_enaddr, vhe->vhe_enaddr, ETHER_ADDR_LEN) != 0) 324 SHA1Update(&vhe->vhe_sha1[ctx], sc->sc_ac.ac_enaddr, 325 ETHER_ADDR_LEN); 326 327 SHA1Update(&vhe->vhe_sha1[ctx], (void *)&vhid, sizeof(vhid)); 328 329 /* Hash the addresses from smallest to largest, not interface order */ 330 cur.s_addr = 0; 331 do { 332 found = 0; 333 last = cur; 334 cur.s_addr = 0xffffffff; 335 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 336 if (ifa->ifa_addr->sa_family != AF_INET) 337 continue; 338 in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr; 339 if (ntohl(in.s_addr) > ntohl(last.s_addr) && 340 ntohl(in.s_addr) < ntohl(cur.s_addr)) { 341 cur.s_addr = in.s_addr; 342 found++; 343 } 344 } 345 if (found) 346 SHA1Update(&vhe->vhe_sha1[ctx], 347 (void *)&cur, sizeof(cur)); 348 } while (found); 349 #ifdef INET6 350 memset(&cur6, 0x00, sizeof(cur6)); 351 do { 352 found = 0; 353 last6 = cur6; 354 memset(&cur6, 0xff, sizeof(cur6)); 355 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 356 if (ifa->ifa_addr->sa_family != AF_INET6) 357 continue; 358 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 359 if (IN6_IS_SCOPE_EMBED(&in6)) { 360 if (ctx == HMAC_NOV6LL) 361 continue; 362 in6.s6_addr16[1] = 0; 363 } 364 if (memcmp(&in6, &last6, sizeof(in6)) > 0 && 365 memcmp(&in6, &cur6, sizeof(in6)) < 0) { 366 cur6 = in6; 367 found++; 368 } 369 } 370 if (found) 371 SHA1Update(&vhe->vhe_sha1[ctx], 372 (void *)&cur6, sizeof(cur6)); 373 } while (found); 374 #endif /* INET6 */ 375 376 /* convert ipad to opad */ 377 for (i = 0; i < sizeof(vhe->vhe_pad); i++) 378 vhe->vhe_pad[i] ^= 0x36 ^ 0x5c; 379 } 380 381 void 382 carp_hmac_generate(struct carp_vhost_entry *vhe, u_int32_t counter[2], 383 unsigned char md[20], u_int8_t ctx) 384 { 385 SHA1_CTX sha1ctx; 386 387 /* fetch first half of inner hash */ 388 bcopy(&vhe->vhe_sha1[ctx], &sha1ctx, sizeof(sha1ctx)); 389 390 SHA1Update(&sha1ctx, (void *)counter, sizeof(vhe->vhe_replay_cookie)); 391 SHA1Final(md, &sha1ctx); 392 393 /* outer hash */ 394 SHA1Init(&sha1ctx); 395 SHA1Update(&sha1ctx, vhe->vhe_pad, sizeof(vhe->vhe_pad)); 396 SHA1Update(&sha1ctx, md, 20); 397 SHA1Final(md, &sha1ctx); 398 } 399 400 int 401 carp_hmac_verify(struct carp_vhost_entry *vhe, u_int32_t counter[2], 402 unsigned char md[20]) 403 { 404 unsigned char md2[20]; 405 u_int8_t i; 406 407 for (i = 0; i < HMAC_MAX; i++) { 408 carp_hmac_generate(vhe, counter, md2, i); 409 if (!timingsafe_bcmp(md, md2, sizeof(md2))) 410 return (0); 411 } 412 return (1); 413 } 414 415 int 416 carp_proto_input(struct mbuf **mp, int *offp, int proto, int af) 417 { 418 struct ifnet *ifp; 419 420 ifp = if_get((*mp)->m_pkthdr.ph_ifidx); 421 if (ifp == NULL) { 422 m_freemp(mp); 423 return IPPROTO_DONE; 424 } 425 426 proto = carp_proto_input_if(ifp, mp, offp, proto); 427 if_put(ifp); 428 return proto; 429 } 430 431 /* 432 * process input packet. 433 * we have rearranged checks order compared to the rfc, 434 * but it seems more efficient this way or not possible otherwise. 435 */ 436 int 437 carp_proto_input_if(struct ifnet *ifp, struct mbuf **mp, int *offp, int proto) 438 { 439 struct mbuf *m = *mp; 440 struct ip *ip = mtod(m, struct ip *); 441 struct carp_softc *sc = NULL; 442 struct carp_header *ch; 443 int iplen, len, ismulti; 444 445 carpstat_inc(carps_ipackets); 446 447 if (!carp_opts[CARPCTL_ALLOW]) { 448 m_freem(m); 449 return IPPROTO_DONE; 450 } 451 452 ismulti = IN_MULTICAST(ip->ip_dst.s_addr); 453 454 /* check if received on a valid carp interface */ 455 switch (ifp->if_type) { 456 case IFT_CARP: 457 break; 458 case IFT_ETHER: 459 if (ismulti || !SRPL_EMPTY_LOCKED(&ifp->if_carp)) 460 break; 461 /* FALLTHROUGH */ 462 default: 463 carpstat_inc(carps_badif); 464 CARP_LOG(LOG_INFO, sc, 465 ("packet received on non-carp interface: %s", 466 ifp->if_xname)); 467 m_freem(m); 468 return IPPROTO_DONE; 469 } 470 471 /* verify that the IP TTL is 255. */ 472 if (ip->ip_ttl != CARP_DFLTTL) { 473 carpstat_inc(carps_badttl); 474 CARP_LOG(LOG_NOTICE, sc, ("received ttl %d != %d on %s", 475 ip->ip_ttl, CARP_DFLTTL, ifp->if_xname)); 476 m_freem(m); 477 return IPPROTO_DONE; 478 } 479 480 /* 481 * verify that the received packet length is 482 * equal to the CARP header 483 */ 484 iplen = ip->ip_hl << 2; 485 len = iplen + sizeof(*ch); 486 if (len > m->m_pkthdr.len) { 487 carpstat_inc(carps_badlen); 488 CARP_LOG(LOG_INFO, sc, ("packet too short %d on %s", 489 m->m_pkthdr.len, ifp->if_xname)); 490 m_freem(m); 491 return IPPROTO_DONE; 492 } 493 494 if ((m = *mp = m_pullup(m, len)) == NULL) { 495 carpstat_inc(carps_hdrops); 496 return IPPROTO_DONE; 497 } 498 ip = mtod(m, struct ip *); 499 ch = (struct carp_header *)(mtod(m, caddr_t) + iplen); 500 501 /* verify the CARP checksum */ 502 m->m_data += iplen; 503 if (carp_cksum(m, len - iplen)) { 504 carpstat_inc(carps_badsum); 505 CARP_LOG(LOG_INFO, sc, ("checksum failed on %s", 506 ifp->if_xname)); 507 m_freem(m); 508 return IPPROTO_DONE; 509 } 510 m->m_data -= iplen; 511 512 KERNEL_LOCK(); 513 carp_proto_input_c(ifp, m, ch, ismulti, AF_INET); 514 KERNEL_UNLOCK(); 515 return IPPROTO_DONE; 516 } 517 518 #ifdef INET6 519 int 520 carp6_proto_input(struct mbuf **mp, int *offp, int proto, int af) 521 { 522 struct ifnet *ifp; 523 524 ifp = if_get((*mp)->m_pkthdr.ph_ifidx); 525 if (ifp == NULL) { 526 m_freemp(mp); 527 return IPPROTO_DONE; 528 } 529 530 proto = carp6_proto_input_if(ifp, mp, offp, proto); 531 if_put(ifp); 532 return proto; 533 } 534 535 int 536 carp6_proto_input_if(struct ifnet *ifp, struct mbuf **mp, int *offp, int proto) 537 { 538 struct mbuf *m = *mp; 539 struct carp_softc *sc = NULL; 540 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 541 struct carp_header *ch; 542 u_int len; 543 544 carpstat_inc(carps_ipackets6); 545 546 if (!carp_opts[CARPCTL_ALLOW]) { 547 m_freem(m); 548 return IPPROTO_DONE; 549 } 550 551 /* check if received on a valid carp interface */ 552 if (ifp->if_type != IFT_CARP) { 553 carpstat_inc(carps_badif); 554 CARP_LOG(LOG_INFO, sc, ("packet received on non-carp interface: %s", 555 ifp->if_xname)); 556 m_freem(m); 557 return IPPROTO_DONE; 558 } 559 560 /* verify that the IP TTL is 255 */ 561 if (ip6->ip6_hlim != CARP_DFLTTL) { 562 carpstat_inc(carps_badttl); 563 CARP_LOG(LOG_NOTICE, sc, ("received ttl %d != %d on %s", 564 ip6->ip6_hlim, CARP_DFLTTL, ifp->if_xname)); 565 m_freem(m); 566 return IPPROTO_DONE; 567 } 568 569 /* verify that we have a complete carp packet */ 570 len = m->m_len; 571 if ((m = *mp = m_pullup(m, *offp + sizeof(*ch))) == NULL) { 572 carpstat_inc(carps_badlen); 573 CARP_LOG(LOG_INFO, sc, ("packet size %u too small", len)); 574 return IPPROTO_DONE; 575 } 576 ch = (struct carp_header *)(mtod(m, caddr_t) + *offp); 577 578 /* verify the CARP checksum */ 579 m->m_data += *offp; 580 if (carp_cksum(m, sizeof(*ch))) { 581 carpstat_inc(carps_badsum); 582 CARP_LOG(LOG_INFO, sc, ("checksum failed, on %s", 583 ifp->if_xname)); 584 m_freem(m); 585 return IPPROTO_DONE; 586 } 587 m->m_data -= *offp; 588 589 KERNEL_LOCK(); 590 carp_proto_input_c(ifp, m, ch, 1, AF_INET6); 591 KERNEL_UNLOCK(); 592 return IPPROTO_DONE; 593 } 594 #endif /* INET6 */ 595 596 void 597 carp_proto_input_c(struct ifnet *ifp, struct mbuf *m, struct carp_header *ch, 598 int ismulti, sa_family_t af) 599 { 600 struct carp_softc *sc; 601 struct carp_vhost_entry *vhe; 602 struct timeval sc_tv, ch_tv; 603 struct srpl *cif; 604 605 KERNEL_ASSERT_LOCKED(); /* touching if_carp + carp_vhosts */ 606 607 if (ifp->if_type == IFT_CARP) { 608 /* 609 * If the parent of this carp(4) got destroyed while 610 * `m' was being processed, silently drop it. 611 */ 612 if (ifp->if_carpdev == NULL) { 613 m_freem(m); 614 return; 615 } 616 cif = &ifp->if_carpdev->if_carp; 617 } else 618 cif = &ifp->if_carp; 619 620 SRPL_FOREACH_LOCKED(sc, cif, sc_list) { 621 if (af == AF_INET && 622 ismulti != IN_MULTICAST(sc->sc_peer.s_addr)) 623 continue; 624 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) { 625 if (vhe->vhid == ch->carp_vhid) 626 goto found; 627 } 628 } 629 found: 630 631 if (!sc || (sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) != 632 (IFF_UP|IFF_RUNNING)) { 633 carpstat_inc(carps_badvhid); 634 m_freem(m); 635 return; 636 } 637 638 getmicrotime(&sc->sc_if.if_lastchange); 639 sc->sc_if.if_ipackets++; 640 sc->sc_if.if_ibytes += m->m_pkthdr.len; 641 642 /* verify the CARP version. */ 643 if (ch->carp_version != CARP_VERSION) { 644 carpstat_inc(carps_badver); 645 sc->sc_if.if_ierrors++; 646 CARP_LOG(LOG_NOTICE, sc, ("invalid version %d != %d", 647 ch->carp_version, CARP_VERSION)); 648 m_freem(m); 649 return; 650 } 651 652 /* verify the hash */ 653 if (carp_hmac_verify(vhe, ch->carp_counter, ch->carp_md)) { 654 carpstat_inc(carps_badauth); 655 sc->sc_if.if_ierrors++; 656 CARP_LOG(LOG_INFO, sc, ("incorrect hash")); 657 m_freem(m); 658 return; 659 } 660 661 if (!memcmp(&vhe->vhe_replay_cookie, ch->carp_counter, 662 sizeof(ch->carp_counter))) { 663 /* Do not log duplicates from non simplex interfaces */ 664 if (sc->sc_carpdev->if_flags & IFF_SIMPLEX) { 665 carpstat_inc(carps_badauth); 666 sc->sc_if.if_ierrors++; 667 CARP_LOG(LOG_WARNING, sc, 668 ("replay or network loop detected")); 669 } 670 m_freem(m); 671 return; 672 } 673 674 sc_tv.tv_sec = sc->sc_advbase; 675 sc_tv.tv_usec = vhe->advskew * 1000000 / 256; 676 ch_tv.tv_sec = ch->carp_advbase; 677 ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256; 678 679 switch (vhe->state) { 680 case INIT: 681 break; 682 case MASTER: 683 /* 684 * If we receive an advertisement from a master who's going to 685 * be more frequent than us, and whose demote count is not higher 686 * than ours, go into BACKUP state. If his demote count is lower, 687 * also go into BACKUP. 688 */ 689 if (((timercmp(&sc_tv, &ch_tv, >) || 690 timercmp(&sc_tv, &ch_tv, ==)) && 691 (ch->carp_demote <= carp_group_demote_count(sc))) || 692 ch->carp_demote < carp_group_demote_count(sc)) { 693 timeout_del(&vhe->ad_tmo); 694 carp_set_state(vhe, BACKUP); 695 carp_setrun(vhe, 0); 696 } 697 break; 698 case BACKUP: 699 /* 700 * If we're pre-empting masters who advertise slower than us, 701 * and do not have a better demote count, treat them as down. 702 * 703 */ 704 if (carp_opts[CARPCTL_PREEMPT] && 705 timercmp(&sc_tv, &ch_tv, <) && 706 ch->carp_demote >= carp_group_demote_count(sc)) { 707 carp_master_down(vhe); 708 break; 709 } 710 711 /* 712 * Take over masters advertising with a higher demote count, 713 * regardless of CARPCTL_PREEMPT. 714 */ 715 if (ch->carp_demote > carp_group_demote_count(sc)) { 716 carp_master_down(vhe); 717 break; 718 } 719 720 /* 721 * If the master is going to advertise at such a low frequency 722 * that he's guaranteed to time out, we'd might as well just 723 * treat him as timed out now. 724 */ 725 sc_tv.tv_sec = sc->sc_advbase * 3; 726 if (sc->sc_advbase && timercmp(&sc_tv, &ch_tv, <)) { 727 carp_master_down(vhe); 728 break; 729 } 730 731 /* 732 * Otherwise, we reset the counter and wait for the next 733 * advertisement. 734 */ 735 carp_setrun(vhe, af); 736 break; 737 } 738 739 m_freem(m); 740 return; 741 } 742 743 int 744 carp_sysctl_carpstat(void *oldp, size_t *oldlenp, void *newp) 745 { 746 struct carpstats carpstat; 747 748 CTASSERT(sizeof(carpstat) == (carps_ncounters * sizeof(uint64_t))); 749 memset(&carpstat, 0, sizeof carpstat); 750 counters_read(carpcounters, (uint64_t *)&carpstat, carps_ncounters); 751 return (sysctl_rdstruct(oldp, oldlenp, newp, 752 &carpstat, sizeof(carpstat))); 753 } 754 755 int 756 carp_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, 757 size_t newlen) 758 { 759 int error; 760 761 /* All sysctl names at this level are terminal. */ 762 if (namelen != 1) 763 return (ENOTDIR); 764 765 switch (name[0]) { 766 case CARPCTL_STATS: 767 return (carp_sysctl_carpstat(oldp, oldlenp, newp)); 768 default: 769 if (name[0] <= 0 || name[0] >= CARPCTL_MAXID) 770 return (ENOPROTOOPT); 771 NET_LOCK(); 772 error = sysctl_int(oldp, oldlenp, newp, newlen, 773 &carp_opts[name[0]]); 774 NET_UNLOCK(); 775 return (error); 776 } 777 } 778 779 /* 780 * Interface side of the CARP implementation. 781 */ 782 783 /* ARGSUSED */ 784 void 785 carpattach(int n) 786 { 787 struct ifg_group *ifg; 788 789 if ((ifg = if_creategroup("carp")) != NULL) 790 ifg->ifg_refcnt++; /* keep around even if empty */ 791 if_clone_attach(&carp_cloner); 792 carpcounters = counters_alloc(carps_ncounters); 793 } 794 795 int 796 carp_clone_create(struct if_clone *ifc, int unit) 797 { 798 struct carp_softc *sc; 799 struct ifnet *ifp; 800 801 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO); 802 refcnt_init(&sc->sc_refcnt); 803 804 SRPL_INIT(&sc->carp_vhosts); 805 sc->sc_vhe_count = 0; 806 if (carp_new_vhost(sc, 0, 0)) { 807 free(sc, M_DEVBUF, sizeof(*sc)); 808 return (ENOMEM); 809 } 810 811 task_set(&sc->sc_atask, carp_addr_updated, sc); 812 task_set(&sc->sc_ltask, carp_carpdev_state, sc); 813 task_set(&sc->sc_dtask, carpdetach, sc); 814 815 sc->sc_suppress = 0; 816 sc->sc_advbase = CARP_DFLTINTV; 817 sc->sc_naddrs = sc->sc_naddrs6 = 0; 818 #ifdef INET6 819 sc->sc_im6o.im6o_hlim = CARP_DFLTTL; 820 #endif /* INET6 */ 821 sc->sc_imo.imo_membership = mallocarray(IP_MIN_MEMBERSHIPS, 822 sizeof(struct in_multi *), M_IPMOPTS, M_WAITOK|M_ZERO); 823 sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS; 824 825 LIST_INIT(&sc->carp_mc_listhead); 826 ifp = &sc->sc_if; 827 ifp->if_softc = sc; 828 snprintf(ifp->if_xname, sizeof ifp->if_xname, "%s%d", ifc->ifc_name, 829 unit); 830 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 831 ifp->if_ioctl = carp_ioctl; 832 ifp->if_start = carp_start; 833 ifp->if_xflags = IFXF_CLONED; 834 IFQ_SET_MAXLEN(&ifp->if_snd, 1); 835 if_counters_alloc(ifp); 836 if_attach(ifp); 837 ether_ifattach(ifp); 838 ifp->if_type = IFT_CARP; 839 ifp->if_sadl->sdl_type = IFT_CARP; 840 ifp->if_output = carp_output; 841 ifp->if_priority = IF_CARP_DEFAULT_PRIORITY; 842 ifp->if_link_state = LINK_STATE_INVALID; 843 844 /* Hook carp_addr_updated to cope with address and route changes. */ 845 if_addrhook_add(&sc->sc_if, &sc->sc_atask); 846 847 return (0); 848 } 849 850 int 851 carp_new_vhost(struct carp_softc *sc, int vhid, int advskew) 852 { 853 struct carp_vhost_entry *vhe, *vhe0; 854 855 vhe = malloc(sizeof(*vhe), M_DEVBUF, M_NOWAIT | M_ZERO); 856 if (vhe == NULL) 857 return (ENOMEM); 858 859 refcnt_init(&vhe->vhost_refcnt); 860 carp_sc_ref(NULL, sc); /* give a sc ref to the vhe */ 861 vhe->parent_sc = sc; 862 vhe->vhid = vhid; 863 vhe->advskew = advskew; 864 vhe->state = INIT; 865 timeout_set_proc(&vhe->ad_tmo, carp_timer_ad, vhe); 866 timeout_set_proc(&vhe->md_tmo, carp_timer_down, vhe); 867 timeout_set_proc(&vhe->md6_tmo, carp_timer_down, vhe); 868 869 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 870 871 /* mark the first vhe as leader */ 872 if (SRPL_EMPTY_LOCKED(&sc->carp_vhosts)) { 873 vhe->vhe_leader = 1; 874 SRPL_INSERT_HEAD_LOCKED(&carp_vh_rc, &sc->carp_vhosts, 875 vhe, vhost_entries); 876 sc->sc_vhe_count = 1; 877 return (0); 878 } 879 880 SRPL_FOREACH_LOCKED(vhe0, &sc->carp_vhosts, vhost_entries) { 881 if (SRPL_NEXT_LOCKED(vhe0, vhost_entries) == NULL) 882 break; 883 } 884 885 SRPL_INSERT_AFTER_LOCKED(&carp_vh_rc, vhe0, vhe, vhost_entries); 886 sc->sc_vhe_count++; 887 888 return (0); 889 } 890 891 int 892 carp_clone_destroy(struct ifnet *ifp) 893 { 894 struct carp_softc *sc = ifp->if_softc; 895 896 if_addrhook_del(&sc->sc_if, &sc->sc_atask); 897 898 NET_LOCK(); 899 carpdetach(sc); 900 NET_UNLOCK(); 901 902 ether_ifdetach(ifp); 903 if_detach(ifp); 904 carp_destroy_vhosts(ifp->if_softc); 905 refcnt_finalize(&sc->sc_refcnt, "carpdtor"); 906 free(sc->sc_imo.imo_membership, M_IPMOPTS, 907 sc->sc_imo.imo_max_memberships * sizeof(struct in_multi *)); 908 free(sc, M_DEVBUF, sizeof(*sc)); 909 return (0); 910 } 911 912 void 913 carp_del_all_timeouts(struct carp_softc *sc) 914 { 915 struct carp_vhost_entry *vhe; 916 917 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 918 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) { 919 timeout_del(&vhe->ad_tmo); 920 timeout_del(&vhe->md_tmo); 921 timeout_del(&vhe->md6_tmo); 922 } 923 } 924 925 void 926 carpdetach(void *arg) 927 { 928 struct carp_softc *sc = arg; 929 struct ifnet *ifp0; 930 struct srpl *cif; 931 932 carp_del_all_timeouts(sc); 933 934 if (sc->sc_demote_cnt) 935 carp_group_demote_adj(&sc->sc_if, -sc->sc_demote_cnt, "detach"); 936 sc->sc_suppress = 0; 937 sc->sc_sendad_errors = 0; 938 939 carp_set_state_all(sc, INIT); 940 sc->sc_if.if_flags &= ~IFF_UP; 941 carp_setrun_all(sc, 0); 942 carp_multicast_cleanup(sc); 943 944 ifp0 = sc->sc_carpdev; 945 if (ifp0 == NULL) 946 return; 947 948 KERNEL_ASSERT_LOCKED(); /* touching if_carp */ 949 950 cif = &ifp0->if_carp; 951 952 /* Restore previous input handler. */ 953 if_ih_remove(ifp0, carp_input, NULL); 954 955 SRPL_REMOVE_LOCKED(&carp_sc_rc, cif, sc, carp_softc, sc_list); 956 sc->sc_carpdev = NULL; 957 958 if_linkstatehook_del(ifp0, &sc->sc_ltask); 959 if_detachhook_del(ifp0, &sc->sc_dtask); 960 ifpromisc(ifp0, 0); 961 } 962 963 void 964 carp_destroy_vhosts(struct carp_softc *sc) 965 { 966 /* XXX bow out? */ 967 struct carp_vhost_entry *vhe; 968 969 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 970 971 while ((vhe = SRPL_FIRST_LOCKED(&sc->carp_vhosts)) != NULL) { 972 SRPL_REMOVE_LOCKED(&carp_vh_rc, &sc->carp_vhosts, vhe, 973 carp_vhost_entry, vhost_entries); 974 carp_vh_unref(NULL, vhe); /* drop last ref */ 975 } 976 sc->sc_vhe_count = 0; 977 } 978 979 void 980 carp_prepare_ad(struct mbuf *m, struct carp_vhost_entry *vhe, 981 struct carp_header *ch) 982 { 983 if (!vhe->vhe_replay_cookie) { 984 arc4random_buf(&vhe->vhe_replay_cookie, 985 sizeof(vhe->vhe_replay_cookie)); 986 } 987 988 bcopy(&vhe->vhe_replay_cookie, ch->carp_counter, 989 sizeof(ch->carp_counter)); 990 991 /* 992 * For the time being, do not include the IPv6 linklayer addresses 993 * in the HMAC. 994 */ 995 carp_hmac_generate(vhe, ch->carp_counter, ch->carp_md, HMAC_NOV6LL); 996 } 997 998 void 999 carp_send_ad_all(void) 1000 { 1001 struct ifnet *ifp0; 1002 struct srpl *cif; 1003 struct carp_softc *vh; 1004 1005 KERNEL_ASSERT_LOCKED(); /* touching if_carp */ 1006 1007 if (carp_send_all_recur > 0) 1008 return; 1009 ++carp_send_all_recur; 1010 TAILQ_FOREACH(ifp0, &ifnet, if_list) { 1011 if (ifp0->if_type != IFT_ETHER) 1012 continue; 1013 1014 cif = &ifp0->if_carp; 1015 SRPL_FOREACH_LOCKED(vh, cif, sc_list) { 1016 if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) == 1017 (IFF_UP|IFF_RUNNING)) { 1018 carp_vhe_send_ad_all(vh); 1019 } 1020 } 1021 } 1022 --carp_send_all_recur; 1023 } 1024 1025 void 1026 carp_vhe_send_ad_all(struct carp_softc *sc) 1027 { 1028 struct carp_vhost_entry *vhe; 1029 1030 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 1031 1032 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) { 1033 if (vhe->state == MASTER) 1034 carp_send_ad(vhe); 1035 } 1036 } 1037 1038 void 1039 carp_timer_ad(void *v) 1040 { 1041 NET_LOCK(); 1042 carp_send_ad(v); 1043 NET_UNLOCK(); 1044 } 1045 1046 void 1047 carp_send_ad(struct carp_vhost_entry *vhe) 1048 { 1049 struct carp_header ch; 1050 struct timeval tv; 1051 struct carp_softc *sc = vhe->parent_sc; 1052 struct carp_header *ch_ptr; 1053 struct mbuf *m; 1054 int error, len, advbase, advskew; 1055 struct ifaddr *ifa; 1056 struct sockaddr sa; 1057 1058 NET_ASSERT_LOCKED(); 1059 1060 if (sc->sc_carpdev == NULL) { 1061 sc->sc_if.if_oerrors++; 1062 return; 1063 } 1064 1065 /* bow out if we've gone to backup (the carp interface is going down) */ 1066 if (sc->sc_bow_out) { 1067 advbase = 255; 1068 advskew = 255; 1069 } else { 1070 advbase = sc->sc_advbase; 1071 advskew = vhe->advskew; 1072 tv.tv_sec = advbase; 1073 if (advbase == 0 && advskew == 0) 1074 tv.tv_usec = 1 * 1000000 / 256; 1075 else 1076 tv.tv_usec = advskew * 1000000 / 256; 1077 } 1078 1079 ch.carp_version = CARP_VERSION; 1080 ch.carp_type = CARP_ADVERTISEMENT; 1081 ch.carp_vhid = vhe->vhid; 1082 ch.carp_demote = carp_group_demote_count(sc) & 0xff; 1083 ch.carp_advbase = advbase; 1084 ch.carp_advskew = advskew; 1085 ch.carp_authlen = 7; /* XXX DEFINE */ 1086 ch.carp_cksum = 0; 1087 1088 sc->cur_vhe = vhe; /* we need the vhe later on the output path */ 1089 1090 if (sc->sc_naddrs) { 1091 struct ip *ip; 1092 1093 MGETHDR(m, M_DONTWAIT, MT_HEADER); 1094 if (m == NULL) { 1095 sc->sc_if.if_oerrors++; 1096 carpstat_inc(carps_onomem); 1097 /* XXX maybe less ? */ 1098 goto retry_later; 1099 } 1100 len = sizeof(*ip) + sizeof(ch); 1101 m->m_pkthdr.pf.prio = CARP_IFQ_PRIO; 1102 m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain; 1103 m->m_pkthdr.len = len; 1104 m->m_len = len; 1105 m_align(m, len); 1106 ip = mtod(m, struct ip *); 1107 ip->ip_v = IPVERSION; 1108 ip->ip_hl = sizeof(*ip) >> 2; 1109 ip->ip_tos = IPTOS_LOWDELAY; 1110 ip->ip_len = htons(len); 1111 ip->ip_id = htons(ip_randomid()); 1112 ip->ip_off = htons(IP_DF); 1113 ip->ip_ttl = CARP_DFLTTL; 1114 ip->ip_p = IPPROTO_CARP; 1115 ip->ip_sum = 0; 1116 1117 memset(&sa, 0, sizeof(sa)); 1118 sa.sa_family = AF_INET; 1119 /* Prefer addresses on the parent interface as source for AD. */ 1120 ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev); 1121 if (ifa == NULL) 1122 ifa = ifaof_ifpforaddr(&sa, &sc->sc_if); 1123 KASSERT(ifa != NULL); 1124 ip->ip_src.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr; 1125 ip->ip_dst.s_addr = sc->sc_peer.s_addr; 1126 if (IN_MULTICAST(ip->ip_dst.s_addr)) 1127 m->m_flags |= M_MCAST; 1128 1129 ch_ptr = (struct carp_header *)(ip + 1); 1130 bcopy(&ch, ch_ptr, sizeof(ch)); 1131 carp_prepare_ad(m, vhe, ch_ptr); 1132 1133 m->m_data += sizeof(*ip); 1134 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip)); 1135 m->m_data -= sizeof(*ip); 1136 1137 getmicrotime(&sc->sc_if.if_lastchange); 1138 sc->sc_if.if_opackets++; 1139 sc->sc_if.if_obytes += len; 1140 carpstat_inc(carps_opackets); 1141 1142 error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, 1143 NULL, 0); 1144 if (error) { 1145 if (error == ENOBUFS) 1146 carpstat_inc(carps_onomem); 1147 else 1148 CARP_LOG(LOG_WARNING, sc, 1149 ("ip_output failed: %d", error)); 1150 sc->sc_if.if_oerrors++; 1151 if (sc->sc_sendad_errors < INT_MAX) 1152 sc->sc_sendad_errors++; 1153 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS(sc)) 1154 carp_group_demote_adj(&sc->sc_if, 1, 1155 "> snderrors"); 1156 sc->sc_sendad_success = 0; 1157 } else { 1158 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS(sc)) { 1159 if (++sc->sc_sendad_success >= 1160 CARP_SENDAD_MIN_SUCCESS(sc)) { 1161 carp_group_demote_adj(&sc->sc_if, -1, 1162 "< snderrors"); 1163 sc->sc_sendad_errors = 0; 1164 } 1165 } else 1166 sc->sc_sendad_errors = 0; 1167 } 1168 if (vhe->vhe_leader) { 1169 if (sc->sc_delayed_arp > 0) 1170 sc->sc_delayed_arp--; 1171 if (sc->sc_delayed_arp == 0) { 1172 carp_send_arp(sc); 1173 sc->sc_delayed_arp = -1; 1174 } 1175 } 1176 } 1177 #ifdef INET6 1178 if (sc->sc_naddrs6) { 1179 struct ip6_hdr *ip6; 1180 1181 MGETHDR(m, M_DONTWAIT, MT_HEADER); 1182 if (m == NULL) { 1183 sc->sc_if.if_oerrors++; 1184 carpstat_inc(carps_onomem); 1185 /* XXX maybe less ? */ 1186 goto retry_later; 1187 } 1188 len = sizeof(*ip6) + sizeof(ch); 1189 m->m_pkthdr.pf.prio = CARP_IFQ_PRIO; 1190 m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain; 1191 m->m_pkthdr.len = len; 1192 m->m_len = len; 1193 m_align(m, len); 1194 m->m_flags |= M_MCAST; 1195 ip6 = mtod(m, struct ip6_hdr *); 1196 memset(ip6, 0, sizeof(*ip6)); 1197 ip6->ip6_vfc |= IPV6_VERSION; 1198 ip6->ip6_hlim = CARP_DFLTTL; 1199 ip6->ip6_nxt = IPPROTO_CARP; 1200 1201 /* set the source address */ 1202 memset(&sa, 0, sizeof(sa)); 1203 sa.sa_family = AF_INET6; 1204 /* Prefer addresses on the parent interface as source for AD. */ 1205 ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev); 1206 if (ifa == NULL) 1207 ifa = ifaof_ifpforaddr(&sa, &sc->sc_if); 1208 KASSERT(ifa != NULL); 1209 bcopy(ifatoia6(ifa)->ia_addr.sin6_addr.s6_addr, 1210 &ip6->ip6_src, sizeof(struct in6_addr)); 1211 /* set the multicast destination */ 1212 1213 ip6->ip6_dst.s6_addr16[0] = htons(0xff02); 1214 ip6->ip6_dst.s6_addr16[1] = htons(sc->sc_carpdev->if_index); 1215 ip6->ip6_dst.s6_addr8[15] = 0x12; 1216 1217 ch_ptr = (struct carp_header *)(ip6 + 1); 1218 bcopy(&ch, ch_ptr, sizeof(ch)); 1219 carp_prepare_ad(m, vhe, ch_ptr); 1220 1221 m->m_data += sizeof(*ip6); 1222 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip6)); 1223 m->m_data -= sizeof(*ip6); 1224 1225 getmicrotime(&sc->sc_if.if_lastchange); 1226 sc->sc_if.if_opackets++; 1227 sc->sc_if.if_obytes += len; 1228 carpstat_inc(carps_opackets6); 1229 1230 error = ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL); 1231 if (error) { 1232 if (error == ENOBUFS) 1233 carpstat_inc(carps_onomem); 1234 else 1235 CARP_LOG(LOG_WARNING, sc, 1236 ("ip6_output failed: %d", error)); 1237 sc->sc_if.if_oerrors++; 1238 if (sc->sc_sendad_errors < INT_MAX) 1239 sc->sc_sendad_errors++; 1240 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS(sc)) 1241 carp_group_demote_adj(&sc->sc_if, 1, 1242 "> snd6errors"); 1243 sc->sc_sendad_success = 0; 1244 } else { 1245 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS(sc)) { 1246 if (++sc->sc_sendad_success >= 1247 CARP_SENDAD_MIN_SUCCESS(sc)) { 1248 carp_group_demote_adj(&sc->sc_if, -1, 1249 "< snd6errors"); 1250 sc->sc_sendad_errors = 0; 1251 } 1252 } else 1253 sc->sc_sendad_errors = 0; 1254 } 1255 } 1256 #endif /* INET6 */ 1257 1258 retry_later: 1259 sc->cur_vhe = NULL; 1260 if (advbase != 255 || advskew != 255) 1261 timeout_add_tv(&vhe->ad_tmo, &tv); 1262 } 1263 1264 /* 1265 * Broadcast a gratuitous ARP request containing 1266 * the virtual router MAC address for each IP address 1267 * associated with the virtual router. 1268 */ 1269 void 1270 carp_send_arp(struct carp_softc *sc) 1271 { 1272 struct ifaddr *ifa; 1273 in_addr_t in; 1274 1275 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 1276 1277 if (ifa->ifa_addr->sa_family != AF_INET) 1278 continue; 1279 1280 in = ifatoia(ifa)->ia_addr.sin_addr.s_addr; 1281 arprequest(&sc->sc_if, &in, &in, sc->sc_ac.ac_enaddr); 1282 } 1283 } 1284 1285 #ifdef INET6 1286 void 1287 carp_send_na(struct carp_softc *sc) 1288 { 1289 struct ifaddr *ifa; 1290 struct in6_addr *in6; 1291 static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT; 1292 1293 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 1294 1295 if (ifa->ifa_addr->sa_family != AF_INET6) 1296 continue; 1297 1298 in6 = &ifatoia6(ifa)->ia_addr.sin6_addr; 1299 nd6_na_output(&sc->sc_if, &mcast, in6, 1300 ND_NA_FLAG_OVERRIDE | 1301 (ip6_forwarding ? ND_NA_FLAG_ROUTER : 0), 1, NULL); 1302 } 1303 } 1304 #endif /* INET6 */ 1305 1306 void 1307 carp_update_lsmask(struct carp_softc *sc) 1308 { 1309 struct carp_vhost_entry *vhe; 1310 int count; 1311 1312 if (sc->sc_balancing == CARP_BAL_NONE) 1313 return; 1314 1315 sc->sc_lsmask = 0; 1316 count = 0; 1317 1318 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 1319 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) { 1320 if (vhe->state == MASTER && count < sizeof(sc->sc_lsmask) * 8) 1321 sc->sc_lsmask |= 1 << count; 1322 count++; 1323 } 1324 sc->sc_lscount = count; 1325 CARP_LOG(LOG_DEBUG, sc, ("carp_update_lsmask: %x", sc->sc_lsmask)); 1326 } 1327 1328 int 1329 carp_iamatch(struct ifnet *ifp) 1330 { 1331 struct carp_softc *sc = ifp->if_softc; 1332 struct carp_vhost_entry *vhe; 1333 struct srp_ref sr; 1334 int match = 0; 1335 1336 vhe = SRPL_FIRST(&sr, &sc->carp_vhosts); 1337 if (vhe->state == MASTER) 1338 match = 1; 1339 SRPL_LEAVE(&sr); 1340 1341 return (match); 1342 } 1343 1344 int 1345 carp_ourether(struct ifnet *ifp, uint8_t *ena) 1346 { 1347 struct srpl *cif = &ifp->if_carp; 1348 struct carp_softc *sc; 1349 struct srp_ref sr; 1350 int match = 0; 1351 1352 KASSERT(ifp->if_type == IFT_ETHER); 1353 1354 SRPL_FOREACH(sc, &sr, cif, sc_list) { 1355 if ((sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) != 1356 (IFF_UP|IFF_RUNNING)) 1357 continue; 1358 if (carp_vhe_match(sc, ena)) { 1359 match = 1; 1360 break; 1361 } 1362 } 1363 SRPL_LEAVE(&sr); 1364 1365 return (match); 1366 } 1367 1368 int 1369 carp_vhe_match(struct carp_softc *sc, uint8_t *ena) 1370 { 1371 struct carp_vhost_entry *vhe; 1372 struct srp_ref sr; 1373 int match = 0; 1374 1375 vhe = SRPL_FIRST(&sr, &sc->carp_vhosts); 1376 match = (vhe->state == MASTER || sc->sc_balancing >= CARP_BAL_IP) && 1377 !memcmp(ena, sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN); 1378 SRPL_LEAVE(&sr); 1379 1380 return (match); 1381 } 1382 1383 int 1384 carp_input(struct ifnet *ifp0, struct mbuf *m, void *cookie) 1385 { 1386 struct ether_header *eh; 1387 struct srpl *cif; 1388 struct carp_softc *sc; 1389 struct srp_ref sr; 1390 1391 #if NVLAN > 0 1392 /* 1393 * If the underlying interface removed the VLAN header itself, 1394 * it's not for us. 1395 */ 1396 if (ISSET(m->m_flags, M_VLANTAG)) 1397 return (0); 1398 #endif 1399 1400 eh = mtod(m, struct ether_header *); 1401 cif = &ifp0->if_carp; 1402 1403 SRPL_FOREACH(sc, &sr, cif, sc_list) { 1404 if ((sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) != 1405 (IFF_UP|IFF_RUNNING)) 1406 continue; 1407 1408 if (carp_vhe_match(sc, eh->ether_dhost)) { 1409 /* 1410 * These packets look like layer 2 multicast but they 1411 * are unicast at layer 3. With help of the tag the 1412 * mbuf's M_MCAST flag can be removed by carp_lsdrop() 1413 * after we have passed layer 2. 1414 */ 1415 if (sc->sc_balancing == CARP_BAL_IP) { 1416 struct m_tag *mtag; 1417 mtag = m_tag_get(PACKET_TAG_CARP_BAL_IP, 0, 1418 M_NOWAIT); 1419 if (mtag == NULL) { 1420 m_freem(m); 1421 goto out; 1422 } 1423 m_tag_prepend(m, mtag); 1424 } 1425 break; 1426 } 1427 } 1428 1429 if (sc == NULL) { 1430 SRPL_LEAVE(&sr); 1431 1432 if (!ETHER_IS_MULTICAST(eh->ether_dhost)) 1433 return (0); 1434 1435 /* 1436 * XXX Should really check the list of multicast addresses 1437 * for each CARP interface _before_ copying. 1438 */ 1439 SRPL_FOREACH(sc, &sr, cif, sc_list) { 1440 struct mbuf *m0; 1441 1442 if (!(sc->sc_if.if_flags & IFF_UP)) 1443 continue; 1444 1445 m0 = m_dup_pkt(m, ETHER_ALIGN, M_DONTWAIT); 1446 if (m0 == NULL) 1447 continue; 1448 1449 if_vinput(&sc->sc_if, m0); 1450 } 1451 SRPL_LEAVE(&sr); 1452 1453 return (0); 1454 } 1455 1456 if_vinput(&sc->sc_if, m); 1457 out: 1458 SRPL_LEAVE(&sr); 1459 1460 return (1); 1461 } 1462 1463 int 1464 carp_lsdrop(struct ifnet *ifp, struct mbuf *m, sa_family_t af, u_int32_t *src, 1465 u_int32_t *dst, int drop) 1466 { 1467 struct carp_softc *sc; 1468 u_int32_t fold; 1469 struct m_tag *mtag; 1470 1471 if (ifp->if_type != IFT_CARP) 1472 return 0; 1473 sc = ifp->if_softc; 1474 if (sc->sc_balancing == CARP_BAL_NONE) 1475 return 0; 1476 1477 /* 1478 * Remove M_MCAST flag from mbuf of balancing ip traffic, since the fact 1479 * that it is layer 2 multicast does not implicate that it is also layer 1480 * 3 multicast. 1481 */ 1482 if (m->m_flags & M_MCAST && 1483 (mtag = m_tag_find(m, PACKET_TAG_CARP_BAL_IP, NULL))) { 1484 m_tag_delete(m, mtag); 1485 m->m_flags &= ~M_MCAST; 1486 } 1487 1488 /* 1489 * Return without making a drop decision. This allows to clear the 1490 * M_MCAST flag and do nothing else. 1491 */ 1492 if (!drop) 1493 return 0; 1494 1495 /* 1496 * Never drop carp advertisements. 1497 * XXX Bad idea to pass all broadcast / multicast traffic? 1498 */ 1499 if (m->m_flags & (M_BCAST|M_MCAST)) 1500 return 0; 1501 1502 fold = src[0] ^ dst[0]; 1503 #ifdef INET6 1504 if (af == AF_INET6) { 1505 int i; 1506 for (i = 1; i < 4; i++) 1507 fold ^= src[i] ^ dst[i]; 1508 } 1509 #endif 1510 if (sc->sc_lscount == 0) /* just to be safe */ 1511 return 1; 1512 1513 return ((1 << (ntohl(fold) % sc->sc_lscount)) & sc->sc_lsmask) == 0; 1514 } 1515 1516 void 1517 carp_timer_down(void *v) 1518 { 1519 NET_LOCK(); 1520 carp_master_down(v); 1521 NET_UNLOCK(); 1522 } 1523 1524 void 1525 carp_master_down(struct carp_vhost_entry *vhe) 1526 { 1527 struct carp_softc *sc = vhe->parent_sc; 1528 1529 NET_ASSERT_LOCKED(); 1530 1531 switch (vhe->state) { 1532 case INIT: 1533 printf("%s: master_down event in INIT state\n", 1534 sc->sc_if.if_xname); 1535 break; 1536 case MASTER: 1537 break; 1538 case BACKUP: 1539 carp_set_state(vhe, MASTER); 1540 carp_send_ad(vhe); 1541 if (sc->sc_balancing == CARP_BAL_NONE && vhe->vhe_leader) { 1542 carp_send_arp(sc); 1543 /* Schedule a delayed ARP to deal w/ some L3 switches */ 1544 sc->sc_delayed_arp = 2; 1545 #ifdef INET6 1546 carp_send_na(sc); 1547 #endif /* INET6 */ 1548 } 1549 carp_setrun(vhe, 0); 1550 carpstat_inc(carps_preempt); 1551 break; 1552 } 1553 } 1554 1555 void 1556 carp_setrun_all(struct carp_softc *sc, sa_family_t af) 1557 { 1558 struct carp_vhost_entry *vhe; 1559 1560 KERNEL_ASSERT_LOCKED(); /* touching carp_vhost */ 1561 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) { 1562 carp_setrun(vhe, af); 1563 } 1564 } 1565 1566 /* 1567 * When in backup state, af indicates whether to reset the master down timer 1568 * for v4 or v6. If it's set to zero, reset the ones which are already pending. 1569 */ 1570 void 1571 carp_setrun(struct carp_vhost_entry *vhe, sa_family_t af) 1572 { 1573 struct timeval tv; 1574 struct carp_softc *sc = vhe->parent_sc; 1575 1576 if (sc->sc_carpdev == NULL) { 1577 sc->sc_if.if_flags &= ~IFF_RUNNING; 1578 carp_set_state_all(sc, INIT); 1579 return; 1580 } 1581 1582 if (memcmp(((struct arpcom *)sc->sc_carpdev)->ac_enaddr, 1583 sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN) == 0) 1584 sc->sc_realmac = 1; 1585 else 1586 sc->sc_realmac = 0; 1587 1588 if (sc->sc_if.if_flags & IFF_UP && vhe->vhid > 0 && 1589 (sc->sc_naddrs || sc->sc_naddrs6) && !sc->sc_suppress) { 1590 sc->sc_if.if_flags |= IFF_RUNNING; 1591 } else { 1592 sc->sc_if.if_flags &= ~IFF_RUNNING; 1593 return; 1594 } 1595 1596 switch (vhe->state) { 1597 case INIT: 1598 carp_set_state(vhe, BACKUP); 1599 carp_setrun(vhe, 0); 1600 break; 1601 case BACKUP: 1602 timeout_del(&vhe->ad_tmo); 1603 tv.tv_sec = 3 * sc->sc_advbase; 1604 if (sc->sc_advbase == 0 && vhe->advskew == 0) 1605 tv.tv_usec = 3 * 1000000 / 256; 1606 else if (sc->sc_advbase == 0) 1607 tv.tv_usec = 3 * vhe->advskew * 1000000 / 256; 1608 else 1609 tv.tv_usec = vhe->advskew * 1000000 / 256; 1610 if (vhe->vhe_leader) 1611 sc->sc_delayed_arp = -1; 1612 switch (af) { 1613 case AF_INET: 1614 timeout_add_tv(&vhe->md_tmo, &tv); 1615 break; 1616 #ifdef INET6 1617 case AF_INET6: 1618 timeout_add_tv(&vhe->md6_tmo, &tv); 1619 break; 1620 #endif /* INET6 */ 1621 default: 1622 if (sc->sc_naddrs) 1623 timeout_add_tv(&vhe->md_tmo, &tv); 1624 if (sc->sc_naddrs6) 1625 timeout_add_tv(&vhe->md6_tmo, &tv); 1626 break; 1627 } 1628 break; 1629 case MASTER: 1630 tv.tv_sec = sc->sc_advbase; 1631 if (sc->sc_advbase == 0 && vhe->advskew == 0) 1632 tv.tv_usec = 1 * 1000000 / 256; 1633 else 1634 tv.tv_usec = vhe->advskew * 1000000 / 256; 1635 timeout_add_tv(&vhe->ad_tmo, &tv); 1636 break; 1637 } 1638 } 1639 1640 void 1641 carp_multicast_cleanup(struct carp_softc *sc) 1642 { 1643 struct ip_moptions *imo = &sc->sc_imo; 1644 #ifdef INET6 1645 struct ip6_moptions *im6o = &sc->sc_im6o; 1646 #endif 1647 u_int16_t n = imo->imo_num_memberships; 1648 1649 /* Clean up our own multicast memberships */ 1650 while (n-- > 0) { 1651 if (imo->imo_membership[n] != NULL) { 1652 in_delmulti(imo->imo_membership[n]); 1653 imo->imo_membership[n] = NULL; 1654 } 1655 } 1656 imo->imo_num_memberships = 0; 1657 imo->imo_ifidx = 0; 1658 1659 #ifdef INET6 1660 while (!LIST_EMPTY(&im6o->im6o_memberships)) { 1661 struct in6_multi_mship *imm = 1662 LIST_FIRST(&im6o->im6o_memberships); 1663 1664 LIST_REMOVE(imm, i6mm_chain); 1665 in6_leavegroup(imm); 1666 } 1667 im6o->im6o_ifidx = 0; 1668 #endif 1669 1670 /* And any other multicast memberships */ 1671 carp_ether_purgemulti(sc); 1672 } 1673 1674 int 1675 carp_set_ifp(struct carp_softc *sc, struct ifnet *ifp0) 1676 { 1677 struct srpl *cif; 1678 struct carp_softc *vr, *last = NULL, *after = NULL; 1679 int myself = 0, error = 0; 1680 1681 KASSERT(ifp0 != sc->sc_carpdev); 1682 KERNEL_ASSERT_LOCKED(); /* touching if_carp */ 1683 1684 if ((ifp0->if_flags & IFF_MULTICAST) == 0) 1685 return (EADDRNOTAVAIL); 1686 1687 if (ifp0->if_type != IFT_ETHER) 1688 return (EINVAL); 1689 1690 cif = &ifp0->if_carp; 1691 if (carp_check_dup_vhids(sc, cif, NULL)) 1692 return (EINVAL); 1693 1694 if ((error = ifpromisc(ifp0, 1))) 1695 return (error); 1696 1697 /* detach from old interface */ 1698 if (sc->sc_carpdev != NULL) 1699 carpdetach(sc); 1700 1701 /* attach carp interface to physical interface */ 1702 if_detachhook_add(ifp0, &sc->sc_dtask); 1703 if_linkstatehook_add(ifp0, &sc->sc_ltask); 1704 1705 sc->sc_carpdev = ifp0; 1706 sc->sc_if.if_capabilities = ifp0->if_capabilities & 1707 IFCAP_CSUM_MASK; 1708 1709 SRPL_FOREACH_LOCKED(vr, cif, sc_list) { 1710 struct carp_vhost_entry *vrhead, *schead; 1711 last = vr; 1712 1713 if (vr == sc) 1714 myself = 1; 1715 1716 vrhead = SRPL_FIRST_LOCKED(&vr->carp_vhosts); 1717 schead = SRPL_FIRST_LOCKED(&sc->carp_vhosts); 1718 if (vrhead->vhid < schead->vhid) 1719 after = vr; 1720 } 1721 1722 if (!myself) { 1723 /* We're trying to keep things in order */ 1724 if (last == NULL) { 1725 SRPL_INSERT_HEAD_LOCKED(&carp_sc_rc, cif, 1726 sc, sc_list); 1727 } else if (after == NULL) { 1728 SRPL_INSERT_AFTER_LOCKED(&carp_sc_rc, last, 1729 sc, sc_list); 1730 } else { 1731 SRPL_INSERT_AFTER_LOCKED(&carp_sc_rc, after, 1732 sc, sc_list); 1733 } 1734 } 1735 if (sc->sc_naddrs || sc->sc_naddrs6) 1736 sc->sc_if.if_flags |= IFF_UP; 1737 carp_set_enaddr(sc); 1738 1739 /* Change input handler of the physical interface. */ 1740 if_ih_insert(ifp0, carp_input, NULL); 1741 1742 carp_carpdev_state(sc); 1743 1744 return (0); 1745 } 1746 1747 void 1748 carp_set_vhe_enaddr(struct carp_vhost_entry *vhe) 1749 { 1750 struct carp_softc *sc = vhe->parent_sc; 1751 1752 if (vhe->vhid != 0 && sc->sc_carpdev) { 1753 if (vhe->vhe_leader && sc->sc_balancing == CARP_BAL_IP) 1754 vhe->vhe_enaddr[0] = 1; 1755 else 1756 vhe->vhe_enaddr[0] = 0; 1757 vhe->vhe_enaddr[1] = 0; 1758 vhe->vhe_enaddr[2] = 0x5e; 1759 vhe->vhe_enaddr[3] = 0; 1760 vhe->vhe_enaddr[4] = 1; 1761 vhe->vhe_enaddr[5] = vhe->vhid; 1762 } else 1763 memset(vhe->vhe_enaddr, 0, ETHER_ADDR_LEN); 1764 } 1765 1766 void 1767 carp_set_enaddr(struct carp_softc *sc) 1768 { 1769 struct carp_vhost_entry *vhe; 1770 1771 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 1772 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) 1773 carp_set_vhe_enaddr(vhe); 1774 1775 vhe = SRPL_FIRST_LOCKED(&sc->carp_vhosts); 1776 1777 /* 1778 * Use the carp lladdr if the running one isn't manually set. 1779 * Only compare static parts of the lladdr. 1780 */ 1781 if ((memcmp(sc->sc_ac.ac_enaddr + 1, vhe->vhe_enaddr + 1, 1782 ETHER_ADDR_LEN - 2) == 0) || 1783 (!sc->sc_ac.ac_enaddr[0] && !sc->sc_ac.ac_enaddr[1] && 1784 !sc->sc_ac.ac_enaddr[2] && !sc->sc_ac.ac_enaddr[3] && 1785 !sc->sc_ac.ac_enaddr[4] && !sc->sc_ac.ac_enaddr[5])) 1786 bcopy(vhe->vhe_enaddr, sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN); 1787 1788 /* Make sure the enaddr has changed before further twiddling. */ 1789 if (memcmp(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN) != 0) { 1790 bcopy(sc->sc_ac.ac_enaddr, LLADDR(sc->sc_if.if_sadl), 1791 ETHER_ADDR_LEN); 1792 bcopy(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN); 1793 #ifdef INET6 1794 /* 1795 * (re)attach a link-local address which matches 1796 * our new MAC address. 1797 */ 1798 if (sc->sc_naddrs6) 1799 in6_ifattach_linklocal(&sc->sc_if, NULL); 1800 #endif 1801 carp_set_state_all(sc, INIT); 1802 carp_setrun_all(sc, 0); 1803 } 1804 } 1805 1806 void 1807 carp_addr_updated(void *v) 1808 { 1809 struct carp_softc *sc = (struct carp_softc *) v; 1810 struct ifaddr *ifa; 1811 int new_naddrs = 0, new_naddrs6 = 0; 1812 1813 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 1814 if (ifa->ifa_addr->sa_family == AF_INET) 1815 new_naddrs++; 1816 #ifdef INET6 1817 else if (ifa->ifa_addr->sa_family == AF_INET6) 1818 new_naddrs6++; 1819 #endif /* INET6 */ 1820 } 1821 1822 /* We received address changes from if_addrhooks callback */ 1823 if (new_naddrs != sc->sc_naddrs || new_naddrs6 != sc->sc_naddrs6) { 1824 1825 sc->sc_naddrs = new_naddrs; 1826 sc->sc_naddrs6 = new_naddrs6; 1827 1828 /* Re-establish multicast membership removed by in_control */ 1829 if (IN_MULTICAST(sc->sc_peer.s_addr)) { 1830 if (!in_hasmulti(&sc->sc_peer, &sc->sc_if)) { 1831 struct in_multi **imm = 1832 sc->sc_imo.imo_membership; 1833 u_int16_t maxmem = 1834 sc->sc_imo.imo_max_memberships; 1835 1836 memset(&sc->sc_imo, 0, sizeof(sc->sc_imo)); 1837 sc->sc_imo.imo_membership = imm; 1838 sc->sc_imo.imo_max_memberships = maxmem; 1839 1840 if (sc->sc_carpdev != NULL && sc->sc_naddrs > 0) 1841 carp_join_multicast(sc); 1842 } 1843 } 1844 1845 if (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) { 1846 sc->sc_if.if_flags &= ~IFF_UP; 1847 carp_set_state_all(sc, INIT); 1848 } else 1849 carp_hmac_prepare(sc); 1850 } 1851 1852 carp_setrun_all(sc, 0); 1853 } 1854 1855 int 1856 carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin) 1857 { 1858 struct in_addr *in = &sin->sin_addr; 1859 int error; 1860 1861 KASSERT(sc->sc_carpdev != NULL); 1862 1863 /* XXX is this necessary? */ 1864 if (in->s_addr == INADDR_ANY) { 1865 carp_setrun_all(sc, 0); 1866 return (0); 1867 } 1868 1869 if (sc->sc_naddrs == 0 && (error = carp_join_multicast(sc)) != 0) 1870 return (error); 1871 1872 carp_set_state_all(sc, INIT); 1873 1874 return (0); 1875 } 1876 1877 int 1878 carp_join_multicast(struct carp_softc *sc) 1879 { 1880 struct ip_moptions *imo = &sc->sc_imo; 1881 struct in_multi *imm; 1882 struct in_addr addr; 1883 1884 if (!IN_MULTICAST(sc->sc_peer.s_addr)) 1885 return (0); 1886 1887 addr.s_addr = sc->sc_peer.s_addr; 1888 if ((imm = in_addmulti(&addr, &sc->sc_if)) == NULL) 1889 return (ENOBUFS); 1890 1891 imo->imo_membership[0] = imm; 1892 imo->imo_num_memberships = 1; 1893 imo->imo_ifidx = sc->sc_if.if_index; 1894 imo->imo_ttl = CARP_DFLTTL; 1895 imo->imo_loop = 0; 1896 return (0); 1897 } 1898 1899 1900 #ifdef INET6 1901 int 1902 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) 1903 { 1904 int error; 1905 1906 KASSERT(sc->sc_carpdev != NULL); 1907 1908 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 1909 carp_setrun_all(sc, 0); 1910 return (0); 1911 } 1912 1913 if (sc->sc_naddrs6 == 0 && (error = carp_join_multicast6(sc)) != 0) 1914 return (error); 1915 1916 carp_set_state_all(sc, INIT); 1917 1918 return (0); 1919 } 1920 1921 int 1922 carp_join_multicast6(struct carp_softc *sc) 1923 { 1924 struct in6_multi_mship *imm, *imm2; 1925 struct ip6_moptions *im6o = &sc->sc_im6o; 1926 struct sockaddr_in6 addr6; 1927 int error; 1928 1929 /* Join IPv6 CARP multicast group */ 1930 memset(&addr6, 0, sizeof(addr6)); 1931 addr6.sin6_family = AF_INET6; 1932 addr6.sin6_len = sizeof(addr6); 1933 addr6.sin6_addr.s6_addr16[0] = htons(0xff02); 1934 addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index); 1935 addr6.sin6_addr.s6_addr8[15] = 0x12; 1936 if ((imm = in6_joingroup(&sc->sc_if, 1937 &addr6.sin6_addr, &error)) == NULL) { 1938 return (error); 1939 } 1940 /* join solicited multicast address */ 1941 memset(&addr6.sin6_addr, 0, sizeof(addr6.sin6_addr)); 1942 addr6.sin6_addr.s6_addr16[0] = htons(0xff02); 1943 addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index); 1944 addr6.sin6_addr.s6_addr32[1] = 0; 1945 addr6.sin6_addr.s6_addr32[2] = htonl(1); 1946 addr6.sin6_addr.s6_addr32[3] = 0; 1947 addr6.sin6_addr.s6_addr8[12] = 0xff; 1948 if ((imm2 = in6_joingroup(&sc->sc_if, 1949 &addr6.sin6_addr, &error)) == NULL) { 1950 in6_leavegroup(imm); 1951 return (error); 1952 } 1953 1954 /* apply v6 multicast membership */ 1955 im6o->im6o_ifidx = sc->sc_if.if_index; 1956 if (imm) 1957 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, 1958 i6mm_chain); 1959 if (imm2) 1960 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm2, 1961 i6mm_chain); 1962 1963 return (0); 1964 } 1965 1966 #endif /* INET6 */ 1967 1968 int 1969 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr) 1970 { 1971 struct proc *p = curproc; /* XXX */ 1972 struct carp_softc *sc = ifp->if_softc; 1973 struct carp_vhost_entry *vhe; 1974 struct carpreq carpr; 1975 struct ifaddr *ifa = (struct ifaddr *)addr; 1976 struct ifreq *ifr = (struct ifreq *)addr; 1977 struct ifnet *ifp0 = sc->sc_carpdev; 1978 int i, error = 0; 1979 1980 switch (cmd) { 1981 case SIOCSIFADDR: 1982 if (ifp0 == NULL) 1983 return (EINVAL); 1984 1985 switch (ifa->ifa_addr->sa_family) { 1986 case AF_INET: 1987 sc->sc_if.if_flags |= IFF_UP; 1988 error = carp_set_addr(sc, satosin(ifa->ifa_addr)); 1989 break; 1990 #ifdef INET6 1991 case AF_INET6: 1992 sc->sc_if.if_flags |= IFF_UP; 1993 error = carp_set_addr6(sc, satosin6(ifa->ifa_addr)); 1994 break; 1995 #endif /* INET6 */ 1996 default: 1997 error = EAFNOSUPPORT; 1998 break; 1999 } 2000 break; 2001 2002 case SIOCSIFFLAGS: 2003 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 2004 vhe = SRPL_FIRST_LOCKED(&sc->carp_vhosts); 2005 if (vhe->state != INIT && !(ifr->ifr_flags & IFF_UP)) { 2006 carp_del_all_timeouts(sc); 2007 2008 /* we need the interface up to bow out */ 2009 sc->sc_if.if_flags |= IFF_UP; 2010 sc->sc_bow_out = 1; 2011 carp_vhe_send_ad_all(sc); 2012 sc->sc_bow_out = 0; 2013 2014 sc->sc_if.if_flags &= ~IFF_UP; 2015 carp_set_state_all(sc, INIT); 2016 carp_setrun_all(sc, 0); 2017 } else if (vhe->state == INIT && (ifr->ifr_flags & IFF_UP)) { 2018 sc->sc_if.if_flags |= IFF_UP; 2019 carp_setrun_all(sc, 0); 2020 } 2021 break; 2022 2023 case SIOCSVH: 2024 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 2025 vhe = SRPL_FIRST_LOCKED(&sc->carp_vhosts); 2026 if ((error = suser(p)) != 0) 2027 break; 2028 if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr))) 2029 break; 2030 error = 1; 2031 if (carpr.carpr_carpdev[0] != '\0' && 2032 (ifp0 = ifunit(carpr.carpr_carpdev)) == NULL) 2033 return (EINVAL); 2034 if (carpr.carpr_peer.s_addr == 0) 2035 sc->sc_peer.s_addr = INADDR_CARP_GROUP; 2036 else 2037 sc->sc_peer.s_addr = carpr.carpr_peer.s_addr; 2038 if (ifp0 != sc->sc_carpdev) { 2039 if ((error = carp_set_ifp(sc, ifp0))) 2040 return (error); 2041 } 2042 if (vhe->state != INIT && carpr.carpr_state != vhe->state) { 2043 switch (carpr.carpr_state) { 2044 case BACKUP: 2045 timeout_del(&vhe->ad_tmo); 2046 carp_set_state_all(sc, BACKUP); 2047 carp_setrun_all(sc, 0); 2048 break; 2049 case MASTER: 2050 KERNEL_ASSERT_LOCKED(); 2051 /* touching carp_vhosts */ 2052 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, 2053 vhost_entries) 2054 carp_master_down(vhe); 2055 break; 2056 default: 2057 break; 2058 } 2059 } 2060 if ((error = carp_vhids_ioctl(sc, &carpr))) 2061 return (error); 2062 if (carpr.carpr_advbase >= 0) { 2063 if (carpr.carpr_advbase > 255) { 2064 error = EINVAL; 2065 break; 2066 } 2067 sc->sc_advbase = carpr.carpr_advbase; 2068 error--; 2069 } 2070 if (memcmp(sc->sc_advskews, carpr.carpr_advskews, 2071 sizeof(sc->sc_advskews))) { 2072 i = 0; 2073 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 2074 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, 2075 vhost_entries) 2076 vhe->advskew = carpr.carpr_advskews[i++]; 2077 bcopy(carpr.carpr_advskews, sc->sc_advskews, 2078 sizeof(sc->sc_advskews)); 2079 } 2080 if (sc->sc_balancing != carpr.carpr_balancing) { 2081 if (carpr.carpr_balancing > CARP_BAL_MAXID) { 2082 error = EINVAL; 2083 break; 2084 } 2085 sc->sc_balancing = carpr.carpr_balancing; 2086 carp_set_enaddr(sc); 2087 carp_update_lsmask(sc); 2088 } 2089 bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key)); 2090 if (error > 0) 2091 error = EINVAL; 2092 else { 2093 error = 0; 2094 carp_hmac_prepare(sc); 2095 carp_setrun_all(sc, 0); 2096 } 2097 break; 2098 2099 case SIOCGVH: 2100 memset(&carpr, 0, sizeof(carpr)); 2101 if (ifp0 != NULL) 2102 strlcpy(carpr.carpr_carpdev, ifp0->if_xname, IFNAMSIZ); 2103 i = 0; 2104 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 2105 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) { 2106 carpr.carpr_vhids[i] = vhe->vhid; 2107 carpr.carpr_advskews[i] = vhe->advskew; 2108 carpr.carpr_states[i] = vhe->state; 2109 i++; 2110 } 2111 carpr.carpr_advbase = sc->sc_advbase; 2112 carpr.carpr_balancing = sc->sc_balancing; 2113 if (suser(p) == 0) 2114 bcopy(sc->sc_key, carpr.carpr_key, 2115 sizeof(carpr.carpr_key)); 2116 carpr.carpr_peer.s_addr = sc->sc_peer.s_addr; 2117 error = copyout(&carpr, ifr->ifr_data, sizeof(carpr)); 2118 break; 2119 2120 case SIOCADDMULTI: 2121 error = carp_ether_addmulti(sc, ifr); 2122 break; 2123 2124 case SIOCDELMULTI: 2125 error = carp_ether_delmulti(sc, ifr); 2126 break; 2127 case SIOCAIFGROUP: 2128 case SIOCDIFGROUP: 2129 if (sc->sc_demote_cnt) 2130 carp_ifgroup_ioctl(ifp, cmd, addr); 2131 break; 2132 case SIOCSIFGATTR: 2133 carp_ifgattr_ioctl(ifp, cmd, addr); 2134 break; 2135 default: 2136 error = ENOTTY; 2137 } 2138 2139 if (memcmp(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN) != 0) 2140 carp_set_enaddr(sc); 2141 return (error); 2142 } 2143 2144 int 2145 carp_check_dup_vhids(struct carp_softc *sc, struct srpl *cif, 2146 struct carpreq *carpr) 2147 { 2148 struct carp_softc *vr; 2149 struct carp_vhost_entry *vhe, *vhe0; 2150 int i; 2151 2152 KERNEL_ASSERT_LOCKED(); /* touching if_carp + carp_vhosts */ 2153 2154 SRPL_FOREACH_LOCKED(vr, cif, sc_list) { 2155 if (vr == sc) 2156 continue; 2157 SRPL_FOREACH_LOCKED(vhe, &vr->carp_vhosts, vhost_entries) { 2158 if (carpr) { 2159 for (i = 0; carpr->carpr_vhids[i]; i++) { 2160 if (vhe->vhid == carpr->carpr_vhids[i]) 2161 return (EINVAL); 2162 } 2163 } 2164 SRPL_FOREACH_LOCKED(vhe0, &sc->carp_vhosts, 2165 vhost_entries) { 2166 if (vhe->vhid == vhe0->vhid) 2167 return (EINVAL); 2168 } 2169 } 2170 } 2171 return (0); 2172 } 2173 2174 int 2175 carp_vhids_ioctl(struct carp_softc *sc, struct carpreq *carpr) 2176 { 2177 int i, j; 2178 u_int8_t taken_vhids[256]; 2179 2180 if (carpr->carpr_vhids[0] == 0 || 2181 !memcmp(sc->sc_vhids, carpr->carpr_vhids, sizeof(sc->sc_vhids))) 2182 return (0); 2183 2184 memset(taken_vhids, 0, sizeof(taken_vhids)); 2185 for (i = 0; carpr->carpr_vhids[i]; i++) { 2186 if (taken_vhids[carpr->carpr_vhids[i]]) 2187 return (EINVAL); 2188 taken_vhids[carpr->carpr_vhids[i]] = 1; 2189 2190 if (sc->sc_carpdev) { 2191 struct srpl *cif; 2192 cif = &sc->sc_carpdev->if_carp; 2193 if (carp_check_dup_vhids(sc, cif, carpr)) 2194 return (EINVAL); 2195 } 2196 if (carpr->carpr_advskews[i] >= 255) 2197 return (EINVAL); 2198 } 2199 /* set sane balancing defaults */ 2200 if (i <= 1) 2201 carpr->carpr_balancing = CARP_BAL_NONE; 2202 else if (carpr->carpr_balancing == CARP_BAL_NONE && 2203 sc->sc_balancing == CARP_BAL_NONE) 2204 carpr->carpr_balancing = CARP_BAL_IP; 2205 2206 /* destroy all */ 2207 carp_del_all_timeouts(sc); 2208 carp_destroy_vhosts(sc); 2209 memset(sc->sc_vhids, 0, sizeof(sc->sc_vhids)); 2210 2211 /* sort vhosts list by vhid */ 2212 for (j = 1; j <= 255; j++) { 2213 for (i = 0; carpr->carpr_vhids[i]; i++) { 2214 if (carpr->carpr_vhids[i] != j) 2215 continue; 2216 if (carp_new_vhost(sc, carpr->carpr_vhids[i], 2217 carpr->carpr_advskews[i])) 2218 return (ENOMEM); 2219 sc->sc_vhids[i] = carpr->carpr_vhids[i]; 2220 sc->sc_advskews[i] = carpr->carpr_advskews[i]; 2221 } 2222 } 2223 carp_set_enaddr(sc); 2224 carp_set_state_all(sc, INIT); 2225 return (0); 2226 } 2227 2228 void 2229 carp_ifgroup_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr) 2230 { 2231 struct ifgroupreq *ifgr = (struct ifgroupreq *)addr; 2232 struct ifg_list *ifgl; 2233 int *dm, adj; 2234 2235 if (!strcmp(ifgr->ifgr_group, IFG_ALL)) 2236 return; 2237 adj = ((struct carp_softc *)ifp->if_softc)->sc_demote_cnt; 2238 if (cmd == SIOCDIFGROUP) 2239 adj = adj * -1; 2240 2241 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) 2242 if (!strcmp(ifgl->ifgl_group->ifg_group, ifgr->ifgr_group)) { 2243 dm = &ifgl->ifgl_group->ifg_carp_demoted; 2244 if (*dm + adj >= 0) 2245 *dm += adj; 2246 else 2247 *dm = 0; 2248 } 2249 } 2250 2251 void 2252 carp_ifgattr_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr) 2253 { 2254 struct ifgroupreq *ifgr = (struct ifgroupreq *)addr; 2255 struct carp_softc *sc = ifp->if_softc; 2256 2257 if (ifgr->ifgr_attrib.ifg_carp_demoted > 0 && (sc->sc_if.if_flags & 2258 (IFF_UP|IFF_RUNNING)) == (IFF_UP|IFF_RUNNING)) 2259 carp_vhe_send_ad_all(sc); 2260 } 2261 2262 void 2263 carp_start(struct ifnet *ifp) 2264 { 2265 struct carp_softc *sc = ifp->if_softc; 2266 struct mbuf *m; 2267 2268 for (;;) { 2269 IFQ_DEQUEUE(&ifp->if_snd, m); 2270 if (m == NULL) 2271 break; 2272 2273 #if NBPFILTER > 0 2274 if (ifp->if_bpf) 2275 bpf_mtap_ether(ifp->if_bpf, m, BPF_DIRECTION_OUT); 2276 #endif /* NBPFILTER > 0 */ 2277 2278 if ((ifp->if_carpdev->if_flags & (IFF_UP|IFF_RUNNING)) != 2279 (IFF_UP|IFF_RUNNING)) { 2280 ifp->if_oerrors++; 2281 m_freem(m); 2282 continue; 2283 } 2284 2285 /* 2286 * Do not leak the multicast address when sending 2287 * advertisements in 'ip' and 'ip-stealth' balacing 2288 * modes. 2289 */ 2290 if (sc->sc_balancing == CARP_BAL_IP || 2291 sc->sc_balancing == CARP_BAL_IPSTEALTH) { 2292 struct ether_header *eh; 2293 uint8_t *esrc; 2294 2295 eh = mtod(m, struct ether_header *); 2296 esrc = ((struct arpcom*)ifp->if_carpdev)->ac_enaddr; 2297 memcpy(eh->ether_shost, esrc, sizeof(eh->ether_shost)); 2298 } 2299 2300 if (if_enqueue(ifp->if_carpdev, m)) { 2301 ifp->if_oerrors++; 2302 continue; 2303 } 2304 ifp->if_opackets++; 2305 } 2306 } 2307 2308 int 2309 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa, 2310 struct rtentry *rt) 2311 { 2312 struct carp_softc *sc = ((struct carp_softc *)ifp->if_softc); 2313 struct carp_vhost_entry *vhe; 2314 struct srp_ref sr; 2315 int ismaster; 2316 2317 /* 2318 * If the parent of this carp(4) got destroyed while 2319 * `m' was being processed, silently drop it. 2320 */ 2321 if (sc->sc_carpdev == NULL) { 2322 m_freem(m); 2323 return (0); 2324 } 2325 2326 if (sc->cur_vhe == NULL) { 2327 vhe = SRPL_FIRST(&sr, &sc->carp_vhosts); 2328 ismaster = (vhe->state == MASTER); 2329 SRPL_LEAVE(&sr); 2330 } else { 2331 ismaster = (sc->cur_vhe->state == MASTER); 2332 } 2333 2334 if ((sc->sc_balancing == CARP_BAL_NONE && !ismaster)) { 2335 m_freem(m); 2336 return (ENETUNREACH); 2337 } 2338 2339 return (ether_output(ifp, m, sa, rt)); 2340 } 2341 2342 void 2343 carp_set_state_all(struct carp_softc *sc, int state) 2344 { 2345 struct carp_vhost_entry *vhe; 2346 2347 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 2348 2349 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) { 2350 if (vhe->state == state) 2351 continue; 2352 2353 carp_set_state(vhe, state); 2354 } 2355 } 2356 2357 void 2358 carp_set_state(struct carp_vhost_entry *vhe, int state) 2359 { 2360 struct carp_softc *sc = vhe->parent_sc; 2361 static const char *carp_states[] = { CARP_STATES }; 2362 int loglevel; 2363 struct carp_vhost_entry *vhe0; 2364 2365 KASSERT(vhe->state != state); 2366 2367 if (vhe->state == INIT || state == INIT) 2368 loglevel = LOG_WARNING; 2369 else 2370 loglevel = LOG_CRIT; 2371 2372 if (sc->sc_vhe_count > 1) 2373 CARP_LOG(loglevel, sc, 2374 ("state transition (vhid %d): %s -> %s", vhe->vhid, 2375 carp_states[vhe->state], carp_states[state])); 2376 else 2377 CARP_LOG(loglevel, sc, 2378 ("state transition: %s -> %s", 2379 carp_states[vhe->state], carp_states[state])); 2380 2381 vhe->state = state; 2382 carp_update_lsmask(sc); 2383 2384 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 2385 2386 sc->sc_if.if_link_state = LINK_STATE_INVALID; 2387 SRPL_FOREACH_LOCKED(vhe0, &sc->carp_vhosts, vhost_entries) { 2388 /* 2389 * Link must be up if at least one vhe is in state MASTER to 2390 * bring or keep route up. 2391 */ 2392 if (vhe0->state == MASTER) { 2393 sc->sc_if.if_link_state = LINK_STATE_UP; 2394 break; 2395 } else if (vhe0->state == BACKUP) { 2396 sc->sc_if.if_link_state = LINK_STATE_DOWN; 2397 } 2398 } 2399 if_link_state_change(&sc->sc_if); 2400 } 2401 2402 void 2403 carp_group_demote_adj(struct ifnet *ifp, int adj, char *reason) 2404 { 2405 struct ifg_list *ifgl; 2406 int *dm, need_ad; 2407 struct carp_softc *nil = NULL; 2408 2409 if (ifp->if_type == IFT_CARP) { 2410 dm = &((struct carp_softc *)ifp->if_softc)->sc_demote_cnt; 2411 if (*dm + adj >= 0) 2412 *dm += adj; 2413 else 2414 *dm = 0; 2415 } 2416 2417 need_ad = 0; 2418 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) { 2419 if (!strcmp(ifgl->ifgl_group->ifg_group, IFG_ALL)) 2420 continue; 2421 dm = &ifgl->ifgl_group->ifg_carp_demoted; 2422 2423 if (*dm + adj >= 0) 2424 *dm += adj; 2425 else 2426 *dm = 0; 2427 2428 if (adj > 0 && *dm == 1) 2429 need_ad = 1; 2430 CARP_LOG(LOG_ERR, nil, 2431 ("%s demoted group %s by %d to %d (%s)", 2432 ifp->if_xname, ifgl->ifgl_group->ifg_group, 2433 adj, *dm, reason)); 2434 } 2435 if (need_ad) 2436 carp_send_ad_all(); 2437 } 2438 2439 int 2440 carp_group_demote_count(struct carp_softc *sc) 2441 { 2442 struct ifg_list *ifgl; 2443 int count = 0; 2444 2445 TAILQ_FOREACH(ifgl, &sc->sc_if.if_groups, ifgl_next) 2446 count += ifgl->ifgl_group->ifg_carp_demoted; 2447 2448 if (count == 0 && sc->sc_demote_cnt) 2449 count = sc->sc_demote_cnt; 2450 2451 return (count > 255 ? 255 : count); 2452 } 2453 2454 void 2455 carp_carpdev_state(void *v) 2456 { 2457 struct carp_softc *sc = v; 2458 struct ifnet *ifp0 = sc->sc_carpdev; 2459 int suppressed = sc->sc_suppress; 2460 2461 if (ifp0->if_link_state == LINK_STATE_DOWN || 2462 !(ifp0->if_flags & IFF_UP)) { 2463 sc->sc_if.if_flags &= ~IFF_RUNNING; 2464 carp_del_all_timeouts(sc); 2465 carp_set_state_all(sc, INIT); 2466 sc->sc_suppress = 1; 2467 carp_setrun_all(sc, 0); 2468 if (!suppressed) 2469 carp_group_demote_adj(&sc->sc_if, 1, "carpdev"); 2470 } else if (suppressed) { 2471 carp_set_state_all(sc, INIT); 2472 sc->sc_suppress = 0; 2473 carp_setrun_all(sc, 0); 2474 carp_group_demote_adj(&sc->sc_if, -1, "carpdev"); 2475 } 2476 } 2477 2478 int 2479 carp_ether_addmulti(struct carp_softc *sc, struct ifreq *ifr) 2480 { 2481 struct ifnet *ifp0; 2482 struct carp_mc_entry *mc; 2483 u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN]; 2484 int error; 2485 2486 ifp0 = sc->sc_carpdev; 2487 if (ifp0 == NULL) 2488 return (EINVAL); 2489 2490 error = ether_addmulti(ifr, (struct arpcom *)&sc->sc_ac); 2491 if (error != ENETRESET) 2492 return (error); 2493 2494 /* 2495 * This is new multicast address. We have to tell parent 2496 * about it. Also, remember this multicast address so that 2497 * we can delete them on unconfigure. 2498 */ 2499 mc = malloc(sizeof(*mc), M_DEVBUF, M_NOWAIT); 2500 if (mc == NULL) { 2501 error = ENOMEM; 2502 goto alloc_failed; 2503 } 2504 2505 /* 2506 * As ether_addmulti() returns ENETRESET, following two 2507 * statement shouldn't fail. 2508 */ 2509 (void)ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi); 2510 ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, mc->mc_enm); 2511 memcpy(&mc->mc_addr, &ifr->ifr_addr, ifr->ifr_addr.sa_len); 2512 LIST_INSERT_HEAD(&sc->carp_mc_listhead, mc, mc_entries); 2513 2514 error = (*ifp0->if_ioctl)(ifp0, SIOCADDMULTI, (caddr_t)ifr); 2515 if (error != 0) 2516 goto ioctl_failed; 2517 2518 return (error); 2519 2520 ioctl_failed: 2521 LIST_REMOVE(mc, mc_entries); 2522 free(mc, M_DEVBUF, sizeof(*mc)); 2523 alloc_failed: 2524 (void)ether_delmulti(ifr, (struct arpcom *)&sc->sc_ac); 2525 2526 return (error); 2527 } 2528 2529 int 2530 carp_ether_delmulti(struct carp_softc *sc, struct ifreq *ifr) 2531 { 2532 struct ifnet *ifp0; 2533 struct ether_multi *enm; 2534 struct carp_mc_entry *mc; 2535 u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN]; 2536 int error; 2537 2538 ifp0 = sc->sc_carpdev; 2539 if (ifp0 == NULL) 2540 return (EINVAL); 2541 2542 /* 2543 * Find a key to lookup carp_mc_entry. We have to do this 2544 * before calling ether_delmulti for obvious reason. 2545 */ 2546 if ((error = ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi)) != 0) 2547 return (error); 2548 ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, enm); 2549 if (enm == NULL) 2550 return (EINVAL); 2551 2552 LIST_FOREACH(mc, &sc->carp_mc_listhead, mc_entries) 2553 if (mc->mc_enm == enm) 2554 break; 2555 2556 /* We won't delete entries we didn't add */ 2557 if (mc == NULL) 2558 return (EINVAL); 2559 2560 error = ether_delmulti(ifr, (struct arpcom *)&sc->sc_ac); 2561 if (error != ENETRESET) 2562 return (error); 2563 2564 /* We no longer use this multicast address. Tell parent so. */ 2565 error = (*ifp0->if_ioctl)(ifp0, SIOCDELMULTI, (caddr_t)ifr); 2566 if (error == 0) { 2567 /* And forget about this address. */ 2568 LIST_REMOVE(mc, mc_entries); 2569 free(mc, M_DEVBUF, sizeof(*mc)); 2570 } else 2571 (void)ether_addmulti(ifr, (struct arpcom *)&sc->sc_ac); 2572 return (error); 2573 } 2574 2575 /* 2576 * Delete any multicast address we have asked to add from parent 2577 * interface. Called when the carp is being unconfigured. 2578 */ 2579 void 2580 carp_ether_purgemulti(struct carp_softc *sc) 2581 { 2582 struct ifnet *ifp0 = sc->sc_carpdev; /* Parent. */ 2583 struct carp_mc_entry *mc; 2584 union { 2585 struct ifreq ifreq; 2586 struct { 2587 char ifr_name[IFNAMSIZ]; 2588 struct sockaddr_storage ifr_ss; 2589 } ifreq_storage; 2590 } u; 2591 struct ifreq *ifr = &u.ifreq; 2592 2593 if (ifp0 == NULL) 2594 return; 2595 2596 memcpy(ifr->ifr_name, ifp0->if_xname, IFNAMSIZ); 2597 while ((mc = LIST_FIRST(&sc->carp_mc_listhead)) != NULL) { 2598 memcpy(&ifr->ifr_addr, &mc->mc_addr, mc->mc_addr.ss_len); 2599 (void)(*ifp0->if_ioctl)(ifp0, SIOCDELMULTI, (caddr_t)ifr); 2600 LIST_REMOVE(mc, mc_entries); 2601 free(mc, M_DEVBUF, sizeof(*mc)); 2602 } 2603 } 2604 2605 void 2606 carp_vh_ref(void *null, void *v) 2607 { 2608 struct carp_vhost_entry *vhe = v; 2609 2610 refcnt_take(&vhe->vhost_refcnt); 2611 } 2612 2613 void 2614 carp_vh_unref(void *null, void *v) 2615 { 2616 struct carp_vhost_entry *vhe = v; 2617 2618 if (refcnt_rele(&vhe->vhost_refcnt)) { 2619 carp_sc_unref(NULL, vhe->parent_sc); 2620 free(vhe, M_DEVBUF, sizeof(*vhe)); 2621 } 2622 } 2623 2624 void 2625 carp_sc_ref(void *null, void *s) 2626 { 2627 struct carp_softc *sc = s; 2628 2629 refcnt_take(&sc->sc_refcnt); 2630 } 2631 2632 void 2633 carp_sc_unref(void *null, void *s) 2634 { 2635 struct carp_softc *sc = s; 2636 2637 refcnt_rele_wake(&sc->sc_refcnt); 2638 } 2639