1 /* $OpenBSD: ip_carp.c,v 1.357 2023/05/16 14:32:54 jan Exp $ */ 2 3 /* 4 * Copyright (c) 2002 Michael Shalayeff. All rights reserved. 5 * Copyright (c) 2003 Ryan McBride. All rights reserved. 6 * Copyright (c) 2006-2008 Marco Pfatschbacher. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 21 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 25 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 26 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 27 * THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 /* 31 * TODO: 32 * - iface reconfigure 33 * - support for hardware checksum calculations; 34 * 35 */ 36 37 #include "ether.h" 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/mbuf.h> 42 #include <sys/socket.h> 43 #include <sys/socketvar.h> 44 #include <sys/timeout.h> 45 #include <sys/ioctl.h> 46 #include <sys/errno.h> 47 #include <sys/device.h> 48 #include <sys/kernel.h> 49 #include <sys/sysctl.h> 50 #include <sys/syslog.h> 51 #include <sys/refcnt.h> 52 53 #include <net/if.h> 54 #include <net/if_var.h> 55 #include <net/if_types.h> 56 #include <net/netisr.h> 57 58 #include <crypto/sha1.h> 59 60 #include <netinet/in.h> 61 #include <netinet/in_var.h> 62 #include <netinet/ip.h> 63 #include <netinet/ip_var.h> 64 #include <netinet/if_ether.h> 65 #include <netinet/ip_ipsp.h> 66 67 #include <net/if_dl.h> 68 69 #ifdef INET6 70 #include <netinet6/in6_var.h> 71 #include <netinet/icmp6.h> 72 #include <netinet/ip6.h> 73 #include <netinet6/ip6_var.h> 74 #include <netinet6/nd6.h> 75 #include <netinet6/in6_ifattach.h> 76 #endif 77 78 #include "bpfilter.h" 79 #if NBPFILTER > 0 80 #include <net/bpf.h> 81 #endif 82 83 #include "vlan.h" 84 #if NVLAN > 0 85 #include <net/if_vlan_var.h> 86 #endif 87 88 #include <netinet/ip_carp.h> 89 90 struct carp_mc_entry { 91 LIST_ENTRY(carp_mc_entry) mc_entries; 92 union { 93 struct ether_multi *mcu_enm; 94 } mc_u; 95 struct sockaddr_storage mc_addr; 96 }; 97 #define mc_enm mc_u.mcu_enm 98 99 enum { HMAC_ORIG=0, HMAC_NOV6LL=1, HMAC_MAX=2 }; 100 101 struct carp_vhost_entry { 102 SRPL_ENTRY(carp_vhost_entry) vhost_entries; 103 struct refcnt vhost_refcnt; 104 105 struct carp_softc *parent_sc; 106 int vhe_leader; 107 int vhid; 108 int advskew; 109 enum { INIT = 0, BACKUP, MASTER } state; 110 struct timeout ad_tmo; /* advertisement timeout */ 111 struct timeout md_tmo; /* master down timeout */ 112 struct timeout md6_tmo; /* master down timeout */ 113 114 u_int64_t vhe_replay_cookie; 115 116 /* authentication */ 117 #define CARP_HMAC_PAD 64 118 unsigned char vhe_pad[CARP_HMAC_PAD]; 119 SHA1_CTX vhe_sha1[HMAC_MAX]; 120 121 u_int8_t vhe_enaddr[ETHER_ADDR_LEN]; 122 }; 123 124 void carp_vh_ref(void *, void *); 125 void carp_vh_unref(void *, void *); 126 127 struct srpl_rc carp_vh_rc = 128 SRPL_RC_INITIALIZER(carp_vh_ref, carp_vh_unref, NULL); 129 130 struct carp_softc { 131 struct arpcom sc_ac; 132 #define sc_if sc_ac.ac_if 133 #define sc_carpdevidx sc_ac.ac_if.if_carpdevidx 134 struct task sc_atask; 135 struct task sc_ltask; 136 struct task sc_dtask; 137 struct ip_moptions sc_imo; 138 #ifdef INET6 139 struct ip6_moptions sc_im6o; 140 #endif /* INET6 */ 141 142 SRPL_ENTRY(carp_softc) sc_list; 143 struct refcnt sc_refcnt; 144 145 int sc_suppress; 146 int sc_bow_out; 147 int sc_demote_cnt; 148 149 int sc_sendad_errors; 150 #define CARP_SENDAD_MAX_ERRORS(sc) (3 * (sc)->sc_vhe_count) 151 int sc_sendad_success; 152 #define CARP_SENDAD_MIN_SUCCESS(sc) (3 * (sc)->sc_vhe_count) 153 154 char sc_curlladdr[ETHER_ADDR_LEN]; 155 156 SRPL_HEAD(, carp_vhost_entry) carp_vhosts; 157 int sc_vhe_count; 158 u_int8_t sc_vhids[CARP_MAXNODES]; 159 u_int8_t sc_advskews[CARP_MAXNODES]; 160 u_int8_t sc_balancing; 161 162 int sc_naddrs; 163 int sc_naddrs6; 164 int sc_advbase; /* seconds */ 165 166 /* authentication */ 167 unsigned char sc_key[CARP_KEY_LEN]; 168 169 u_int32_t sc_hashkey[2]; 170 u_int32_t sc_lsmask; /* load sharing mask */ 171 int sc_lscount; /* # load sharing interfaces (max 32) */ 172 int sc_delayed_arp; /* delayed ARP request countdown */ 173 int sc_realmac; /* using real mac */ 174 175 struct in_addr sc_peer; 176 177 LIST_HEAD(__carp_mchead, carp_mc_entry) carp_mc_listhead; 178 struct carp_vhost_entry *cur_vhe; /* current active vhe */ 179 }; 180 181 void carp_sc_ref(void *, void *); 182 void carp_sc_unref(void *, void *); 183 184 struct srpl_rc carp_sc_rc = 185 SRPL_RC_INITIALIZER(carp_sc_ref, carp_sc_unref, NULL); 186 187 int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, LOG_CRIT }; /* XXX for now */ 188 struct cpumem *carpcounters; 189 190 int carp_send_all_recur = 0; 191 192 #define CARP_LOG(l, sc, s) \ 193 do { \ 194 if (carp_opts[CARPCTL_LOG] >= l) { \ 195 if (sc) \ 196 log(l, "%s: ", \ 197 (sc)->sc_if.if_xname); \ 198 else \ 199 log(l, "carp: "); \ 200 addlog s; \ 201 addlog("\n"); \ 202 } \ 203 } while (0) 204 205 void carp_hmac_prepare(struct carp_softc *); 206 void carp_hmac_prepare_ctx(struct carp_vhost_entry *, u_int8_t); 207 void carp_hmac_generate(struct carp_vhost_entry *, u_int32_t *, 208 unsigned char *, u_int8_t); 209 int carp_hmac_verify(struct carp_vhost_entry *, u_int32_t *, 210 unsigned char *); 211 void carp_proto_input_c(struct ifnet *, struct mbuf *, 212 struct carp_header *, int, sa_family_t); 213 int carp_proto_input_if(struct ifnet *, struct mbuf **, int *, int); 214 #ifdef INET6 215 int carp6_proto_input_if(struct ifnet *, struct mbuf **, int *, int); 216 #endif 217 void carpattach(int); 218 void carpdetach(void *); 219 void carp_prepare_ad(struct mbuf *, struct carp_vhost_entry *, 220 struct carp_header *); 221 void carp_send_ad_all(void); 222 void carp_vhe_send_ad_all(struct carp_softc *); 223 void carp_timer_ad(void *); 224 void carp_send_ad(struct carp_vhost_entry *); 225 void carp_send_arp(struct carp_softc *); 226 void carp_timer_down(void *); 227 void carp_master_down(struct carp_vhost_entry *); 228 int carp_ioctl(struct ifnet *, u_long, caddr_t); 229 int carp_vhids_ioctl(struct carp_softc *, struct carpreq *); 230 int carp_check_dup_vhids(struct carp_softc *, struct srpl *, 231 struct carpreq *); 232 void carp_ifgroup_ioctl(struct ifnet *, u_long, caddr_t); 233 void carp_ifgattr_ioctl(struct ifnet *, u_long, caddr_t); 234 void carp_start(struct ifnet *); 235 int carp_enqueue(struct ifnet *, struct mbuf *); 236 void carp_transmit(struct carp_softc *, struct ifnet *, struct mbuf *); 237 void carp_setrun_all(struct carp_softc *, sa_family_t); 238 void carp_setrun(struct carp_vhost_entry *, sa_family_t); 239 void carp_set_state_all(struct carp_softc *, int); 240 void carp_set_state(struct carp_vhost_entry *, int); 241 void carp_multicast_cleanup(struct carp_softc *); 242 int carp_set_ifp(struct carp_softc *, struct ifnet *); 243 void carp_set_enaddr(struct carp_softc *); 244 void carp_set_vhe_enaddr(struct carp_vhost_entry *); 245 void carp_addr_updated(void *); 246 int carp_set_addr(struct carp_softc *, struct sockaddr_in *); 247 int carp_join_multicast(struct carp_softc *); 248 #ifdef INET6 249 void carp_send_na(struct carp_softc *); 250 int carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *); 251 int carp_join_multicast6(struct carp_softc *); 252 #endif 253 int carp_clone_create(struct if_clone *, int); 254 int carp_clone_destroy(struct ifnet *); 255 int carp_ether_addmulti(struct carp_softc *, struct ifreq *); 256 int carp_ether_delmulti(struct carp_softc *, struct ifreq *); 257 void carp_ether_purgemulti(struct carp_softc *); 258 int carp_group_demote_count(struct carp_softc *); 259 void carp_update_lsmask(struct carp_softc *); 260 int carp_new_vhost(struct carp_softc *, int, int); 261 void carp_destroy_vhosts(struct carp_softc *); 262 void carp_del_all_timeouts(struct carp_softc *); 263 int carp_vhe_match(struct carp_softc *, uint64_t); 264 265 struct if_clone carp_cloner = 266 IF_CLONE_INITIALIZER("carp", carp_clone_create, carp_clone_destroy); 267 268 #define carp_cksum(_m, _l) ((u_int16_t)in_cksum((_m), (_l))) 269 #define CARP_IFQ_PRIO 6 270 271 void 272 carp_hmac_prepare(struct carp_softc *sc) 273 { 274 struct carp_vhost_entry *vhe; 275 u_int8_t i; 276 277 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 278 279 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) { 280 for (i = 0; i < HMAC_MAX; i++) { 281 carp_hmac_prepare_ctx(vhe, i); 282 } 283 } 284 } 285 286 void 287 carp_hmac_prepare_ctx(struct carp_vhost_entry *vhe, u_int8_t ctx) 288 { 289 struct carp_softc *sc = vhe->parent_sc; 290 291 u_int8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT; 292 u_int8_t vhid = vhe->vhid & 0xff; 293 SHA1_CTX sha1ctx; 294 u_int32_t kmd[5]; 295 struct ifaddr *ifa; 296 int i, found; 297 struct in_addr last, cur, in; 298 #ifdef INET6 299 struct in6_addr last6, cur6, in6; 300 #endif /* INET6 */ 301 302 /* compute ipad from key */ 303 memset(vhe->vhe_pad, 0, sizeof(vhe->vhe_pad)); 304 bcopy(sc->sc_key, vhe->vhe_pad, sizeof(sc->sc_key)); 305 for (i = 0; i < sizeof(vhe->vhe_pad); i++) 306 vhe->vhe_pad[i] ^= 0x36; 307 308 /* precompute first part of inner hash */ 309 SHA1Init(&vhe->vhe_sha1[ctx]); 310 SHA1Update(&vhe->vhe_sha1[ctx], vhe->vhe_pad, sizeof(vhe->vhe_pad)); 311 SHA1Update(&vhe->vhe_sha1[ctx], (void *)&version, sizeof(version)); 312 SHA1Update(&vhe->vhe_sha1[ctx], (void *)&type, sizeof(type)); 313 314 /* generate a key for the arpbalance hash, before the vhid is hashed */ 315 if (vhe->vhe_leader) { 316 bcopy(&vhe->vhe_sha1[ctx], &sha1ctx, sizeof(sha1ctx)); 317 SHA1Final((unsigned char *)kmd, &sha1ctx); 318 sc->sc_hashkey[0] = kmd[0] ^ kmd[1]; 319 sc->sc_hashkey[1] = kmd[2] ^ kmd[3]; 320 } 321 322 /* the rest of the precomputation */ 323 if (!sc->sc_realmac && vhe->vhe_leader && 324 memcmp(sc->sc_ac.ac_enaddr, vhe->vhe_enaddr, ETHER_ADDR_LEN) != 0) 325 SHA1Update(&vhe->vhe_sha1[ctx], sc->sc_ac.ac_enaddr, 326 ETHER_ADDR_LEN); 327 328 SHA1Update(&vhe->vhe_sha1[ctx], (void *)&vhid, sizeof(vhid)); 329 330 /* Hash the addresses from smallest to largest, not interface order */ 331 cur.s_addr = 0; 332 do { 333 found = 0; 334 last = cur; 335 cur.s_addr = 0xffffffff; 336 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 337 if (ifa->ifa_addr->sa_family != AF_INET) 338 continue; 339 in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr; 340 if (ntohl(in.s_addr) > ntohl(last.s_addr) && 341 ntohl(in.s_addr) < ntohl(cur.s_addr)) { 342 cur.s_addr = in.s_addr; 343 found++; 344 } 345 } 346 if (found) 347 SHA1Update(&vhe->vhe_sha1[ctx], 348 (void *)&cur, sizeof(cur)); 349 } while (found); 350 #ifdef INET6 351 memset(&cur6, 0x00, sizeof(cur6)); 352 do { 353 found = 0; 354 last6 = cur6; 355 memset(&cur6, 0xff, sizeof(cur6)); 356 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 357 if (ifa->ifa_addr->sa_family != AF_INET6) 358 continue; 359 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 360 if (IN6_IS_SCOPE_EMBED(&in6)) { 361 if (ctx == HMAC_NOV6LL) 362 continue; 363 in6.s6_addr16[1] = 0; 364 } 365 if (memcmp(&in6, &last6, sizeof(in6)) > 0 && 366 memcmp(&in6, &cur6, sizeof(in6)) < 0) { 367 cur6 = in6; 368 found++; 369 } 370 } 371 if (found) 372 SHA1Update(&vhe->vhe_sha1[ctx], 373 (void *)&cur6, sizeof(cur6)); 374 } while (found); 375 #endif /* INET6 */ 376 377 /* convert ipad to opad */ 378 for (i = 0; i < sizeof(vhe->vhe_pad); i++) 379 vhe->vhe_pad[i] ^= 0x36 ^ 0x5c; 380 } 381 382 void 383 carp_hmac_generate(struct carp_vhost_entry *vhe, u_int32_t counter[2], 384 unsigned char md[20], u_int8_t ctx) 385 { 386 SHA1_CTX sha1ctx; 387 388 /* fetch first half of inner hash */ 389 bcopy(&vhe->vhe_sha1[ctx], &sha1ctx, sizeof(sha1ctx)); 390 391 SHA1Update(&sha1ctx, (void *)counter, sizeof(vhe->vhe_replay_cookie)); 392 SHA1Final(md, &sha1ctx); 393 394 /* outer hash */ 395 SHA1Init(&sha1ctx); 396 SHA1Update(&sha1ctx, vhe->vhe_pad, sizeof(vhe->vhe_pad)); 397 SHA1Update(&sha1ctx, md, 20); 398 SHA1Final(md, &sha1ctx); 399 } 400 401 int 402 carp_hmac_verify(struct carp_vhost_entry *vhe, u_int32_t counter[2], 403 unsigned char md[20]) 404 { 405 unsigned char md2[20]; 406 u_int8_t i; 407 408 for (i = 0; i < HMAC_MAX; i++) { 409 carp_hmac_generate(vhe, counter, md2, i); 410 if (!timingsafe_bcmp(md, md2, sizeof(md2))) 411 return (0); 412 } 413 return (1); 414 } 415 416 int 417 carp_proto_input(struct mbuf **mp, int *offp, int proto, int af) 418 { 419 struct ifnet *ifp; 420 421 ifp = if_get((*mp)->m_pkthdr.ph_ifidx); 422 if (ifp == NULL) { 423 m_freemp(mp); 424 return IPPROTO_DONE; 425 } 426 427 proto = carp_proto_input_if(ifp, mp, offp, proto); 428 if_put(ifp); 429 return proto; 430 } 431 432 /* 433 * process input packet. 434 * we have rearranged checks order compared to the rfc, 435 * but it seems more efficient this way or not possible otherwise. 436 */ 437 int 438 carp_proto_input_if(struct ifnet *ifp, struct mbuf **mp, int *offp, int proto) 439 { 440 struct mbuf *m = *mp; 441 struct ip *ip = mtod(m, struct ip *); 442 struct carp_softc *sc = NULL; 443 struct carp_header *ch; 444 int iplen, len, ismulti; 445 446 carpstat_inc(carps_ipackets); 447 448 if (!carp_opts[CARPCTL_ALLOW]) { 449 m_freem(m); 450 return IPPROTO_DONE; 451 } 452 453 ismulti = IN_MULTICAST(ip->ip_dst.s_addr); 454 455 /* check if received on a valid carp interface */ 456 switch (ifp->if_type) { 457 case IFT_CARP: 458 break; 459 case IFT_ETHER: 460 if (ismulti || !SRPL_EMPTY_LOCKED(&ifp->if_carp)) 461 break; 462 /* FALLTHROUGH */ 463 default: 464 carpstat_inc(carps_badif); 465 CARP_LOG(LOG_INFO, sc, 466 ("packet received on non-carp interface: %s", 467 ifp->if_xname)); 468 m_freem(m); 469 return IPPROTO_DONE; 470 } 471 472 /* verify that the IP TTL is 255. */ 473 if (ip->ip_ttl != CARP_DFLTTL) { 474 carpstat_inc(carps_badttl); 475 CARP_LOG(LOG_NOTICE, sc, ("received ttl %d != %d on %s", 476 ip->ip_ttl, CARP_DFLTTL, ifp->if_xname)); 477 m_freem(m); 478 return IPPROTO_DONE; 479 } 480 481 /* 482 * verify that the received packet length is 483 * equal to the CARP header 484 */ 485 iplen = ip->ip_hl << 2; 486 len = iplen + sizeof(*ch); 487 if (len > m->m_pkthdr.len) { 488 carpstat_inc(carps_badlen); 489 CARP_LOG(LOG_INFO, sc, ("packet too short %d on %s", 490 m->m_pkthdr.len, ifp->if_xname)); 491 m_freem(m); 492 return IPPROTO_DONE; 493 } 494 495 if ((m = *mp = m_pullup(m, len)) == NULL) { 496 carpstat_inc(carps_hdrops); 497 return IPPROTO_DONE; 498 } 499 ip = mtod(m, struct ip *); 500 ch = (struct carp_header *)(mtod(m, caddr_t) + iplen); 501 502 /* verify the CARP checksum */ 503 m->m_data += iplen; 504 if (carp_cksum(m, len - iplen)) { 505 carpstat_inc(carps_badsum); 506 CARP_LOG(LOG_INFO, sc, ("checksum failed on %s", 507 ifp->if_xname)); 508 m_freem(m); 509 return IPPROTO_DONE; 510 } 511 m->m_data -= iplen; 512 513 KERNEL_LOCK(); 514 carp_proto_input_c(ifp, m, ch, ismulti, AF_INET); 515 KERNEL_UNLOCK(); 516 return IPPROTO_DONE; 517 } 518 519 #ifdef INET6 520 int 521 carp6_proto_input(struct mbuf **mp, int *offp, int proto, int af) 522 { 523 struct ifnet *ifp; 524 525 ifp = if_get((*mp)->m_pkthdr.ph_ifidx); 526 if (ifp == NULL) { 527 m_freemp(mp); 528 return IPPROTO_DONE; 529 } 530 531 proto = carp6_proto_input_if(ifp, mp, offp, proto); 532 if_put(ifp); 533 return proto; 534 } 535 536 int 537 carp6_proto_input_if(struct ifnet *ifp, struct mbuf **mp, int *offp, int proto) 538 { 539 struct mbuf *m = *mp; 540 struct carp_softc *sc = NULL; 541 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 542 struct carp_header *ch; 543 u_int len; 544 545 carpstat_inc(carps_ipackets6); 546 547 if (!carp_opts[CARPCTL_ALLOW]) { 548 m_freem(m); 549 return IPPROTO_DONE; 550 } 551 552 /* check if received on a valid carp interface */ 553 if (ifp->if_type != IFT_CARP) { 554 carpstat_inc(carps_badif); 555 CARP_LOG(LOG_INFO, sc, ("packet received on non-carp interface: %s", 556 ifp->if_xname)); 557 m_freem(m); 558 return IPPROTO_DONE; 559 } 560 561 /* verify that the IP TTL is 255 */ 562 if (ip6->ip6_hlim != CARP_DFLTTL) { 563 carpstat_inc(carps_badttl); 564 CARP_LOG(LOG_NOTICE, sc, ("received ttl %d != %d on %s", 565 ip6->ip6_hlim, CARP_DFLTTL, ifp->if_xname)); 566 m_freem(m); 567 return IPPROTO_DONE; 568 } 569 570 /* verify that we have a complete carp packet */ 571 len = m->m_len; 572 if ((m = *mp = m_pullup(m, *offp + sizeof(*ch))) == NULL) { 573 carpstat_inc(carps_badlen); 574 CARP_LOG(LOG_INFO, sc, ("packet size %u too small", len)); 575 return IPPROTO_DONE; 576 } 577 ch = (struct carp_header *)(mtod(m, caddr_t) + *offp); 578 579 /* verify the CARP checksum */ 580 m->m_data += *offp; 581 if (carp_cksum(m, sizeof(*ch))) { 582 carpstat_inc(carps_badsum); 583 CARP_LOG(LOG_INFO, sc, ("checksum failed, on %s", 584 ifp->if_xname)); 585 m_freem(m); 586 return IPPROTO_DONE; 587 } 588 m->m_data -= *offp; 589 590 KERNEL_LOCK(); 591 carp_proto_input_c(ifp, m, ch, 1, AF_INET6); 592 KERNEL_UNLOCK(); 593 return IPPROTO_DONE; 594 } 595 #endif /* INET6 */ 596 597 void 598 carp_proto_input_c(struct ifnet *ifp, struct mbuf *m, struct carp_header *ch, 599 int ismulti, sa_family_t af) 600 { 601 struct carp_softc *sc; 602 struct ifnet *ifp0; 603 struct carp_vhost_entry *vhe; 604 struct timeval sc_tv, ch_tv; 605 struct srpl *cif; 606 607 KERNEL_ASSERT_LOCKED(); /* touching if_carp + carp_vhosts */ 608 609 ifp0 = if_get(ifp->if_carpdevidx); 610 611 if (ifp->if_type == IFT_CARP) { 612 /* 613 * If the parent of this carp(4) got destroyed while 614 * `m' was being processed, silently drop it. 615 */ 616 if (ifp0 == NULL) 617 goto rele; 618 cif = &ifp0->if_carp; 619 } else 620 cif = &ifp->if_carp; 621 622 SRPL_FOREACH_LOCKED(sc, cif, sc_list) { 623 if (af == AF_INET && 624 ismulti != IN_MULTICAST(sc->sc_peer.s_addr)) 625 continue; 626 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) { 627 if (vhe->vhid == ch->carp_vhid) 628 goto found; 629 } 630 } 631 found: 632 633 if (!sc || (sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) != 634 (IFF_UP|IFF_RUNNING)) { 635 carpstat_inc(carps_badvhid); 636 goto rele; 637 } 638 639 getmicrotime(&sc->sc_if.if_lastchange); 640 641 /* verify the CARP version. */ 642 if (ch->carp_version != CARP_VERSION) { 643 carpstat_inc(carps_badver); 644 sc->sc_if.if_ierrors++; 645 CARP_LOG(LOG_NOTICE, sc, ("invalid version %d != %d", 646 ch->carp_version, CARP_VERSION)); 647 goto rele; 648 } 649 650 /* verify the hash */ 651 if (carp_hmac_verify(vhe, ch->carp_counter, ch->carp_md)) { 652 carpstat_inc(carps_badauth); 653 sc->sc_if.if_ierrors++; 654 CARP_LOG(LOG_INFO, sc, ("incorrect hash")); 655 goto rele; 656 } 657 658 if (!memcmp(&vhe->vhe_replay_cookie, ch->carp_counter, 659 sizeof(ch->carp_counter))) { 660 struct ifnet *ifp2; 661 662 ifp2 = if_get(sc->sc_carpdevidx); 663 /* Do not log duplicates from non simplex interfaces */ 664 if (ifp2 && ifp2->if_flags & IFF_SIMPLEX) { 665 carpstat_inc(carps_badauth); 666 sc->sc_if.if_ierrors++; 667 CARP_LOG(LOG_WARNING, sc, 668 ("replay or network loop detected")); 669 } 670 if_put(ifp2); 671 goto rele; 672 } 673 674 sc_tv.tv_sec = sc->sc_advbase; 675 sc_tv.tv_usec = vhe->advskew * 1000000 / 256; 676 ch_tv.tv_sec = ch->carp_advbase; 677 ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256; 678 679 switch (vhe->state) { 680 case INIT: 681 break; 682 case MASTER: 683 /* 684 * If we receive an advertisement from a master who's going to 685 * be more frequent than us, and whose demote count is not higher 686 * than ours, go into BACKUP state. If his demote count is lower, 687 * also go into BACKUP. 688 */ 689 if (((timercmp(&sc_tv, &ch_tv, >) || 690 timercmp(&sc_tv, &ch_tv, ==)) && 691 (ch->carp_demote <= carp_group_demote_count(sc))) || 692 ch->carp_demote < carp_group_demote_count(sc)) { 693 timeout_del(&vhe->ad_tmo); 694 carp_set_state(vhe, BACKUP); 695 carp_setrun(vhe, 0); 696 } 697 break; 698 case BACKUP: 699 /* 700 * If we're pre-empting masters who advertise slower than us, 701 * and do not have a better demote count, treat them as down. 702 * 703 */ 704 if (carp_opts[CARPCTL_PREEMPT] && 705 timercmp(&sc_tv, &ch_tv, <) && 706 ch->carp_demote >= carp_group_demote_count(sc)) { 707 carp_master_down(vhe); 708 break; 709 } 710 711 /* 712 * Take over masters advertising with a higher demote count, 713 * regardless of CARPCTL_PREEMPT. 714 */ 715 if (ch->carp_demote > carp_group_demote_count(sc)) { 716 carp_master_down(vhe); 717 break; 718 } 719 720 /* 721 * If the master is going to advertise at such a low frequency 722 * that he's guaranteed to time out, we'd might as well just 723 * treat him as timed out now. 724 */ 725 sc_tv.tv_sec = sc->sc_advbase * 3; 726 if (sc->sc_advbase && timercmp(&sc_tv, &ch_tv, <)) { 727 carp_master_down(vhe); 728 break; 729 } 730 731 /* 732 * Otherwise, we reset the counter and wait for the next 733 * advertisement. 734 */ 735 carp_setrun(vhe, af); 736 break; 737 } 738 739 rele: 740 if_put(ifp0); 741 m_freem(m); 742 return; 743 } 744 745 int 746 carp_sysctl_carpstat(void *oldp, size_t *oldlenp, void *newp) 747 { 748 struct carpstats carpstat; 749 750 CTASSERT(sizeof(carpstat) == (carps_ncounters * sizeof(uint64_t))); 751 memset(&carpstat, 0, sizeof carpstat); 752 counters_read(carpcounters, (uint64_t *)&carpstat, carps_ncounters); 753 return (sysctl_rdstruct(oldp, oldlenp, newp, 754 &carpstat, sizeof(carpstat))); 755 } 756 757 int 758 carp_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, 759 size_t newlen) 760 { 761 int error; 762 763 /* All sysctl names at this level are terminal. */ 764 if (namelen != 1) 765 return (ENOTDIR); 766 767 switch (name[0]) { 768 case CARPCTL_STATS: 769 return (carp_sysctl_carpstat(oldp, oldlenp, newp)); 770 default: 771 if (name[0] <= 0 || name[0] >= CARPCTL_MAXID) 772 return (ENOPROTOOPT); 773 NET_LOCK(); 774 error = sysctl_int(oldp, oldlenp, newp, newlen, 775 &carp_opts[name[0]]); 776 NET_UNLOCK(); 777 return (error); 778 } 779 } 780 781 /* 782 * Interface side of the CARP implementation. 783 */ 784 785 void 786 carpattach(int n) 787 { 788 if_creategroup("carp"); /* keep around even if empty */ 789 if_clone_attach(&carp_cloner); 790 carpcounters = counters_alloc(carps_ncounters); 791 } 792 793 int 794 carp_clone_create(struct if_clone *ifc, int unit) 795 { 796 struct carp_softc *sc; 797 struct ifnet *ifp; 798 799 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO); 800 refcnt_init(&sc->sc_refcnt); 801 802 SRPL_INIT(&sc->carp_vhosts); 803 sc->sc_vhe_count = 0; 804 if (carp_new_vhost(sc, 0, 0)) { 805 free(sc, M_DEVBUF, sizeof(*sc)); 806 return (ENOMEM); 807 } 808 809 task_set(&sc->sc_atask, carp_addr_updated, sc); 810 task_set(&sc->sc_ltask, carp_carpdev_state, sc); 811 task_set(&sc->sc_dtask, carpdetach, sc); 812 813 sc->sc_suppress = 0; 814 sc->sc_advbase = CARP_DFLTINTV; 815 sc->sc_naddrs = sc->sc_naddrs6 = 0; 816 #ifdef INET6 817 sc->sc_im6o.im6o_hlim = CARP_DFLTTL; 818 #endif /* INET6 */ 819 sc->sc_imo.imo_membership = mallocarray(IP_MIN_MEMBERSHIPS, 820 sizeof(struct in_multi *), M_IPMOPTS, M_WAITOK|M_ZERO); 821 sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS; 822 823 LIST_INIT(&sc->carp_mc_listhead); 824 ifp = &sc->sc_if; 825 ifp->if_softc = sc; 826 snprintf(ifp->if_xname, sizeof ifp->if_xname, "%s%d", ifc->ifc_name, 827 unit); 828 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 829 ifp->if_ioctl = carp_ioctl; 830 ifp->if_start = carp_start; 831 ifp->if_enqueue = carp_enqueue; 832 ifp->if_xflags = IFXF_CLONED; 833 if_counters_alloc(ifp); 834 if_attach(ifp); 835 ether_ifattach(ifp); 836 ifp->if_type = IFT_CARP; 837 ifp->if_sadl->sdl_type = IFT_CARP; 838 ifp->if_output = carp_output; 839 ifp->if_priority = IF_CARP_DEFAULT_PRIORITY; 840 ifp->if_link_state = LINK_STATE_INVALID; 841 842 /* Hook carp_addr_updated to cope with address and route changes. */ 843 if_addrhook_add(&sc->sc_if, &sc->sc_atask); 844 845 return (0); 846 } 847 848 int 849 carp_new_vhost(struct carp_softc *sc, int vhid, int advskew) 850 { 851 struct carp_vhost_entry *vhe, *vhe0; 852 853 vhe = malloc(sizeof(*vhe), M_DEVBUF, M_NOWAIT | M_ZERO); 854 if (vhe == NULL) 855 return (ENOMEM); 856 857 refcnt_init(&vhe->vhost_refcnt); 858 carp_sc_ref(NULL, sc); /* give a sc ref to the vhe */ 859 vhe->parent_sc = sc; 860 vhe->vhid = vhid; 861 vhe->advskew = advskew; 862 vhe->state = INIT; 863 timeout_set_proc(&vhe->ad_tmo, carp_timer_ad, vhe); 864 timeout_set_proc(&vhe->md_tmo, carp_timer_down, vhe); 865 timeout_set_proc(&vhe->md6_tmo, carp_timer_down, vhe); 866 867 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 868 869 /* mark the first vhe as leader */ 870 if (SRPL_EMPTY_LOCKED(&sc->carp_vhosts)) { 871 vhe->vhe_leader = 1; 872 SRPL_INSERT_HEAD_LOCKED(&carp_vh_rc, &sc->carp_vhosts, 873 vhe, vhost_entries); 874 sc->sc_vhe_count = 1; 875 return (0); 876 } 877 878 SRPL_FOREACH_LOCKED(vhe0, &sc->carp_vhosts, vhost_entries) { 879 if (SRPL_NEXT_LOCKED(vhe0, vhost_entries) == NULL) 880 break; 881 } 882 883 SRPL_INSERT_AFTER_LOCKED(&carp_vh_rc, vhe0, vhe, vhost_entries); 884 sc->sc_vhe_count++; 885 886 return (0); 887 } 888 889 int 890 carp_clone_destroy(struct ifnet *ifp) 891 { 892 struct carp_softc *sc = ifp->if_softc; 893 894 if_addrhook_del(&sc->sc_if, &sc->sc_atask); 895 896 NET_LOCK(); 897 carpdetach(sc); 898 NET_UNLOCK(); 899 900 ether_ifdetach(ifp); 901 if_detach(ifp); 902 carp_destroy_vhosts(ifp->if_softc); 903 refcnt_finalize(&sc->sc_refcnt, "carpdtor"); 904 free(sc->sc_imo.imo_membership, M_IPMOPTS, 905 sc->sc_imo.imo_max_memberships * sizeof(struct in_multi *)); 906 free(sc, M_DEVBUF, sizeof(*sc)); 907 return (0); 908 } 909 910 void 911 carp_del_all_timeouts(struct carp_softc *sc) 912 { 913 struct carp_vhost_entry *vhe; 914 915 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 916 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) { 917 timeout_del(&vhe->ad_tmo); 918 timeout_del(&vhe->md_tmo); 919 timeout_del(&vhe->md6_tmo); 920 } 921 } 922 923 void 924 carpdetach(void *arg) 925 { 926 struct carp_softc *sc = arg; 927 struct ifnet *ifp0; 928 struct srpl *cif; 929 930 carp_del_all_timeouts(sc); 931 932 if (sc->sc_demote_cnt) 933 carp_group_demote_adj(&sc->sc_if, -sc->sc_demote_cnt, "detach"); 934 sc->sc_suppress = 0; 935 sc->sc_sendad_errors = 0; 936 937 carp_set_state_all(sc, INIT); 938 sc->sc_if.if_flags &= ~IFF_UP; 939 carp_setrun_all(sc, 0); 940 carp_multicast_cleanup(sc); 941 942 ifp0 = if_get(sc->sc_carpdevidx); 943 if (ifp0 == NULL) 944 return; 945 946 KERNEL_ASSERT_LOCKED(); /* touching if_carp */ 947 948 cif = &ifp0->if_carp; 949 950 SRPL_REMOVE_LOCKED(&carp_sc_rc, cif, sc, carp_softc, sc_list); 951 sc->sc_carpdevidx = 0; 952 953 if_linkstatehook_del(ifp0, &sc->sc_ltask); 954 if_detachhook_del(ifp0, &sc->sc_dtask); 955 ifpromisc(ifp0, 0); 956 if_put(ifp0); 957 } 958 959 void 960 carp_destroy_vhosts(struct carp_softc *sc) 961 { 962 /* XXX bow out? */ 963 struct carp_vhost_entry *vhe; 964 965 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 966 967 while ((vhe = SRPL_FIRST_LOCKED(&sc->carp_vhosts)) != NULL) { 968 SRPL_REMOVE_LOCKED(&carp_vh_rc, &sc->carp_vhosts, vhe, 969 carp_vhost_entry, vhost_entries); 970 carp_vh_unref(NULL, vhe); /* drop last ref */ 971 } 972 sc->sc_vhe_count = 0; 973 } 974 975 void 976 carp_prepare_ad(struct mbuf *m, struct carp_vhost_entry *vhe, 977 struct carp_header *ch) 978 { 979 if (!vhe->vhe_replay_cookie) { 980 arc4random_buf(&vhe->vhe_replay_cookie, 981 sizeof(vhe->vhe_replay_cookie)); 982 } 983 984 bcopy(&vhe->vhe_replay_cookie, ch->carp_counter, 985 sizeof(ch->carp_counter)); 986 987 /* 988 * For the time being, do not include the IPv6 linklayer addresses 989 * in the HMAC. 990 */ 991 carp_hmac_generate(vhe, ch->carp_counter, ch->carp_md, HMAC_NOV6LL); 992 } 993 994 void 995 carp_send_ad_all(void) 996 { 997 struct ifnet *ifp0; 998 struct srpl *cif; 999 struct carp_softc *vh; 1000 1001 KERNEL_ASSERT_LOCKED(); /* touching if_carp */ 1002 1003 if (carp_send_all_recur > 0) 1004 return; 1005 ++carp_send_all_recur; 1006 TAILQ_FOREACH(ifp0, &ifnetlist, if_list) { 1007 if (ifp0->if_type != IFT_ETHER) 1008 continue; 1009 1010 cif = &ifp0->if_carp; 1011 SRPL_FOREACH_LOCKED(vh, cif, sc_list) { 1012 if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) == 1013 (IFF_UP|IFF_RUNNING)) { 1014 carp_vhe_send_ad_all(vh); 1015 } 1016 } 1017 } 1018 --carp_send_all_recur; 1019 } 1020 1021 void 1022 carp_vhe_send_ad_all(struct carp_softc *sc) 1023 { 1024 struct carp_vhost_entry *vhe; 1025 1026 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 1027 1028 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) { 1029 if (vhe->state == MASTER) 1030 carp_send_ad(vhe); 1031 } 1032 } 1033 1034 void 1035 carp_timer_ad(void *v) 1036 { 1037 NET_LOCK(); 1038 carp_send_ad(v); 1039 NET_UNLOCK(); 1040 } 1041 1042 void 1043 carp_send_ad(struct carp_vhost_entry *vhe) 1044 { 1045 struct carp_header ch; 1046 struct timeval tv; 1047 struct carp_softc *sc = vhe->parent_sc; 1048 struct carp_header *ch_ptr; 1049 struct mbuf *m; 1050 int error, len, advbase, advskew; 1051 struct ifnet *ifp; 1052 struct ifaddr *ifa; 1053 struct sockaddr sa; 1054 1055 NET_ASSERT_LOCKED(); 1056 1057 if ((ifp = if_get(sc->sc_carpdevidx)) == NULL) { 1058 sc->sc_if.if_oerrors++; 1059 return; 1060 } 1061 1062 /* bow out if we've gone to backup (the carp interface is going down) */ 1063 if (sc->sc_bow_out) { 1064 advbase = 255; 1065 advskew = 255; 1066 } else { 1067 advbase = sc->sc_advbase; 1068 advskew = vhe->advskew; 1069 tv.tv_sec = advbase; 1070 if (advbase == 0 && advskew == 0) 1071 tv.tv_usec = 1 * 1000000 / 256; 1072 else 1073 tv.tv_usec = advskew * 1000000 / 256; 1074 } 1075 1076 ch.carp_version = CARP_VERSION; 1077 ch.carp_type = CARP_ADVERTISEMENT; 1078 ch.carp_vhid = vhe->vhid; 1079 ch.carp_demote = carp_group_demote_count(sc) & 0xff; 1080 ch.carp_advbase = advbase; 1081 ch.carp_advskew = advskew; 1082 ch.carp_authlen = 7; /* XXX DEFINE */ 1083 ch.carp_cksum = 0; 1084 1085 sc->cur_vhe = vhe; /* we need the vhe later on the output path */ 1086 1087 if (sc->sc_naddrs) { 1088 struct ip *ip; 1089 1090 MGETHDR(m, M_DONTWAIT, MT_HEADER); 1091 if (m == NULL) { 1092 sc->sc_if.if_oerrors++; 1093 carpstat_inc(carps_onomem); 1094 /* XXX maybe less ? */ 1095 goto retry_later; 1096 } 1097 len = sizeof(*ip) + sizeof(ch); 1098 m->m_pkthdr.pf.prio = CARP_IFQ_PRIO; 1099 m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain; 1100 m->m_pkthdr.len = len; 1101 m->m_len = len; 1102 m_align(m, len); 1103 ip = mtod(m, struct ip *); 1104 ip->ip_v = IPVERSION; 1105 ip->ip_hl = sizeof(*ip) >> 2; 1106 ip->ip_tos = IPTOS_LOWDELAY; 1107 ip->ip_len = htons(len); 1108 ip->ip_id = htons(ip_randomid()); 1109 ip->ip_off = htons(IP_DF); 1110 ip->ip_ttl = CARP_DFLTTL; 1111 ip->ip_p = IPPROTO_CARP; 1112 ip->ip_sum = 0; 1113 1114 memset(&sa, 0, sizeof(sa)); 1115 sa.sa_family = AF_INET; 1116 /* Prefer addresses on the parent interface as source for AD. */ 1117 ifa = ifaof_ifpforaddr(&sa, ifp); 1118 if (ifa == NULL) 1119 ifa = ifaof_ifpforaddr(&sa, &sc->sc_if); 1120 KASSERT(ifa != NULL); 1121 ip->ip_src.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr; 1122 ip->ip_dst.s_addr = sc->sc_peer.s_addr; 1123 if (IN_MULTICAST(ip->ip_dst.s_addr)) 1124 m->m_flags |= M_MCAST; 1125 1126 ch_ptr = (struct carp_header *)(ip + 1); 1127 bcopy(&ch, ch_ptr, sizeof(ch)); 1128 carp_prepare_ad(m, vhe, ch_ptr); 1129 1130 m->m_data += sizeof(*ip); 1131 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip)); 1132 m->m_data -= sizeof(*ip); 1133 1134 getmicrotime(&sc->sc_if.if_lastchange); 1135 carpstat_inc(carps_opackets); 1136 1137 error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, 1138 NULL, 0); 1139 if (error && 1140 /* when unicast, the peer's down is not our fault */ 1141 !(!IN_MULTICAST(sc->sc_peer.s_addr) && error == EHOSTDOWN)){ 1142 if (error == ENOBUFS) 1143 carpstat_inc(carps_onomem); 1144 else 1145 CARP_LOG(LOG_WARNING, sc, 1146 ("ip_output failed: %d", error)); 1147 sc->sc_if.if_oerrors++; 1148 if (sc->sc_sendad_errors < INT_MAX) 1149 sc->sc_sendad_errors++; 1150 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS(sc)) 1151 carp_group_demote_adj(&sc->sc_if, 1, 1152 "> snderrors"); 1153 sc->sc_sendad_success = 0; 1154 } else { 1155 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS(sc)) { 1156 if (++sc->sc_sendad_success >= 1157 CARP_SENDAD_MIN_SUCCESS(sc)) { 1158 carp_group_demote_adj(&sc->sc_if, -1, 1159 "< snderrors"); 1160 sc->sc_sendad_errors = 0; 1161 } 1162 } else 1163 sc->sc_sendad_errors = 0; 1164 } 1165 if (vhe->vhe_leader) { 1166 if (sc->sc_delayed_arp > 0) 1167 sc->sc_delayed_arp--; 1168 if (sc->sc_delayed_arp == 0) { 1169 carp_send_arp(sc); 1170 sc->sc_delayed_arp = -1; 1171 } 1172 } 1173 } 1174 #ifdef INET6 1175 if (sc->sc_naddrs6) { 1176 struct ip6_hdr *ip6; 1177 1178 MGETHDR(m, M_DONTWAIT, MT_HEADER); 1179 if (m == NULL) { 1180 sc->sc_if.if_oerrors++; 1181 carpstat_inc(carps_onomem); 1182 /* XXX maybe less ? */ 1183 goto retry_later; 1184 } 1185 len = sizeof(*ip6) + sizeof(ch); 1186 m->m_pkthdr.pf.prio = CARP_IFQ_PRIO; 1187 m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain; 1188 m->m_pkthdr.len = len; 1189 m->m_len = len; 1190 m_align(m, len); 1191 m->m_flags |= M_MCAST; 1192 ip6 = mtod(m, struct ip6_hdr *); 1193 memset(ip6, 0, sizeof(*ip6)); 1194 ip6->ip6_vfc |= IPV6_VERSION; 1195 ip6->ip6_hlim = CARP_DFLTTL; 1196 ip6->ip6_nxt = IPPROTO_CARP; 1197 1198 /* set the source address */ 1199 memset(&sa, 0, sizeof(sa)); 1200 sa.sa_family = AF_INET6; 1201 /* Prefer addresses on the parent interface as source for AD. */ 1202 ifa = ifaof_ifpforaddr(&sa, ifp); 1203 if (ifa == NULL) 1204 ifa = ifaof_ifpforaddr(&sa, &sc->sc_if); 1205 KASSERT(ifa != NULL); 1206 bcopy(ifatoia6(ifa)->ia_addr.sin6_addr.s6_addr, 1207 &ip6->ip6_src, sizeof(struct in6_addr)); 1208 /* set the multicast destination */ 1209 1210 ip6->ip6_dst.s6_addr16[0] = htons(0xff02); 1211 ip6->ip6_dst.s6_addr16[1] = htons(ifp->if_index); 1212 ip6->ip6_dst.s6_addr8[15] = 0x12; 1213 1214 ch_ptr = (struct carp_header *)(ip6 + 1); 1215 bcopy(&ch, ch_ptr, sizeof(ch)); 1216 carp_prepare_ad(m, vhe, ch_ptr); 1217 1218 m->m_data += sizeof(*ip6); 1219 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip6)); 1220 m->m_data -= sizeof(*ip6); 1221 1222 getmicrotime(&sc->sc_if.if_lastchange); 1223 carpstat_inc(carps_opackets6); 1224 1225 error = ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL); 1226 if (error) { 1227 if (error == ENOBUFS) 1228 carpstat_inc(carps_onomem); 1229 else 1230 CARP_LOG(LOG_WARNING, sc, 1231 ("ip6_output failed: %d", error)); 1232 sc->sc_if.if_oerrors++; 1233 if (sc->sc_sendad_errors < INT_MAX) 1234 sc->sc_sendad_errors++; 1235 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS(sc)) 1236 carp_group_demote_adj(&sc->sc_if, 1, 1237 "> snd6errors"); 1238 sc->sc_sendad_success = 0; 1239 } else { 1240 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS(sc)) { 1241 if (++sc->sc_sendad_success >= 1242 CARP_SENDAD_MIN_SUCCESS(sc)) { 1243 carp_group_demote_adj(&sc->sc_if, -1, 1244 "< snd6errors"); 1245 sc->sc_sendad_errors = 0; 1246 } 1247 } else 1248 sc->sc_sendad_errors = 0; 1249 } 1250 } 1251 #endif /* INET6 */ 1252 1253 retry_later: 1254 sc->cur_vhe = NULL; 1255 if (advbase != 255 || advskew != 255) 1256 timeout_add_tv(&vhe->ad_tmo, &tv); 1257 if_put(ifp); 1258 } 1259 1260 /* 1261 * Broadcast a gratuitous ARP request containing 1262 * the virtual router MAC address for each IP address 1263 * associated with the virtual router. 1264 */ 1265 void 1266 carp_send_arp(struct carp_softc *sc) 1267 { 1268 struct ifaddr *ifa; 1269 in_addr_t in; 1270 1271 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 1272 1273 if (ifa->ifa_addr->sa_family != AF_INET) 1274 continue; 1275 1276 in = ifatoia(ifa)->ia_addr.sin_addr.s_addr; 1277 arprequest(&sc->sc_if, &in, &in, sc->sc_ac.ac_enaddr); 1278 } 1279 } 1280 1281 #ifdef INET6 1282 void 1283 carp_send_na(struct carp_softc *sc) 1284 { 1285 struct ifaddr *ifa; 1286 struct in6_addr *in6; 1287 static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT; 1288 1289 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 1290 1291 if (ifa->ifa_addr->sa_family != AF_INET6) 1292 continue; 1293 1294 in6 = &ifatoia6(ifa)->ia_addr.sin6_addr; 1295 nd6_na_output(&sc->sc_if, &mcast, in6, 1296 ND_NA_FLAG_OVERRIDE | 1297 (ip6_forwarding ? ND_NA_FLAG_ROUTER : 0), 1, NULL); 1298 } 1299 } 1300 #endif /* INET6 */ 1301 1302 void 1303 carp_update_lsmask(struct carp_softc *sc) 1304 { 1305 struct carp_vhost_entry *vhe; 1306 int count; 1307 1308 if (sc->sc_balancing == CARP_BAL_NONE) 1309 return; 1310 1311 sc->sc_lsmask = 0; 1312 count = 0; 1313 1314 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 1315 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) { 1316 if (vhe->state == MASTER && count < sizeof(sc->sc_lsmask) * 8) 1317 sc->sc_lsmask |= 1 << count; 1318 count++; 1319 } 1320 sc->sc_lscount = count; 1321 CARP_LOG(LOG_DEBUG, sc, ("carp_update_lsmask: %x", sc->sc_lsmask)); 1322 } 1323 1324 int 1325 carp_iamatch(struct ifnet *ifp) 1326 { 1327 struct carp_softc *sc = ifp->if_softc; 1328 struct carp_vhost_entry *vhe; 1329 struct srp_ref sr; 1330 int match = 0; 1331 1332 vhe = SRPL_FIRST(&sr, &sc->carp_vhosts); 1333 if (vhe->state == MASTER) 1334 match = 1; 1335 SRPL_LEAVE(&sr); 1336 1337 return (match); 1338 } 1339 1340 int 1341 carp_ourether(struct ifnet *ifp, uint8_t *ena) 1342 { 1343 struct srpl *cif = &ifp->if_carp; 1344 struct carp_softc *sc; 1345 struct srp_ref sr; 1346 int match = 0; 1347 uint64_t dst = ether_addr_to_e64((struct ether_addr *)ena); 1348 1349 KASSERT(ifp->if_type == IFT_ETHER); 1350 1351 SRPL_FOREACH(sc, &sr, cif, sc_list) { 1352 if ((sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) != 1353 (IFF_UP|IFF_RUNNING)) 1354 continue; 1355 if (carp_vhe_match(sc, dst)) { 1356 match = 1; 1357 break; 1358 } 1359 } 1360 SRPL_LEAVE(&sr); 1361 1362 return (match); 1363 } 1364 1365 int 1366 carp_vhe_match(struct carp_softc *sc, uint64_t dst) 1367 { 1368 struct carp_vhost_entry *vhe; 1369 struct srp_ref sr; 1370 int active = 0; 1371 1372 vhe = SRPL_FIRST(&sr, &sc->carp_vhosts); 1373 active = (vhe->state == MASTER || sc->sc_balancing >= CARP_BAL_IP); 1374 SRPL_LEAVE(&sr); 1375 1376 return (active && (dst == 1377 ether_addr_to_e64((struct ether_addr *)sc->sc_ac.ac_enaddr))); 1378 } 1379 1380 struct mbuf * 1381 carp_input(struct ifnet *ifp0, struct mbuf *m, uint64_t dst) 1382 { 1383 struct srpl *cif; 1384 struct carp_softc *sc; 1385 struct srp_ref sr; 1386 1387 cif = &ifp0->if_carp; 1388 1389 SRPL_FOREACH(sc, &sr, cif, sc_list) { 1390 if ((sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) != 1391 (IFF_UP|IFF_RUNNING)) 1392 continue; 1393 1394 if (carp_vhe_match(sc, dst)) { 1395 /* 1396 * These packets look like layer 2 multicast but they 1397 * are unicast at layer 3. With help of the tag the 1398 * mbuf's M_MCAST flag can be removed by carp_lsdrop() 1399 * after we have passed layer 2. 1400 */ 1401 if (sc->sc_balancing == CARP_BAL_IP) { 1402 struct m_tag *mtag; 1403 mtag = m_tag_get(PACKET_TAG_CARP_BAL_IP, 0, 1404 M_NOWAIT); 1405 if (mtag == NULL) { 1406 m_freem(m); 1407 goto out; 1408 } 1409 m_tag_prepend(m, mtag); 1410 } 1411 break; 1412 } 1413 } 1414 1415 if (sc == NULL) { 1416 SRPL_LEAVE(&sr); 1417 1418 if (!ETH64_IS_MULTICAST(dst)) 1419 return (m); 1420 1421 /* 1422 * XXX Should really check the list of multicast addresses 1423 * for each CARP interface _before_ copying. 1424 */ 1425 SRPL_FOREACH(sc, &sr, cif, sc_list) { 1426 struct mbuf *m0; 1427 1428 if (!(sc->sc_if.if_flags & IFF_UP)) 1429 continue; 1430 1431 m0 = m_dup_pkt(m, ETHER_ALIGN, M_DONTWAIT); 1432 if (m0 == NULL) 1433 continue; 1434 1435 if_vinput(&sc->sc_if, m0); 1436 } 1437 SRPL_LEAVE(&sr); 1438 1439 return (m); 1440 } 1441 1442 if_vinput(&sc->sc_if, m); 1443 out: 1444 SRPL_LEAVE(&sr); 1445 1446 return (NULL); 1447 } 1448 1449 int 1450 carp_lsdrop(struct ifnet *ifp, struct mbuf *m, sa_family_t af, u_int32_t *src, 1451 u_int32_t *dst, int drop) 1452 { 1453 struct carp_softc *sc; 1454 u_int32_t fold; 1455 struct m_tag *mtag; 1456 1457 if (ifp->if_type != IFT_CARP) 1458 return 0; 1459 sc = ifp->if_softc; 1460 if (sc->sc_balancing == CARP_BAL_NONE) 1461 return 0; 1462 1463 /* 1464 * Remove M_MCAST flag from mbuf of balancing ip traffic, since the fact 1465 * that it is layer 2 multicast does not implicate that it is also layer 1466 * 3 multicast. 1467 */ 1468 if (m->m_flags & M_MCAST && 1469 (mtag = m_tag_find(m, PACKET_TAG_CARP_BAL_IP, NULL))) { 1470 m_tag_delete(m, mtag); 1471 m->m_flags &= ~M_MCAST; 1472 } 1473 1474 /* 1475 * Return without making a drop decision. This allows to clear the 1476 * M_MCAST flag and do nothing else. 1477 */ 1478 if (!drop) 1479 return 0; 1480 1481 /* 1482 * Never drop carp advertisements. 1483 * XXX Bad idea to pass all broadcast / multicast traffic? 1484 */ 1485 if (m->m_flags & (M_BCAST|M_MCAST)) 1486 return 0; 1487 1488 fold = src[0] ^ dst[0]; 1489 #ifdef INET6 1490 if (af == AF_INET6) { 1491 int i; 1492 for (i = 1; i < 4; i++) 1493 fold ^= src[i] ^ dst[i]; 1494 } 1495 #endif 1496 if (sc->sc_lscount == 0) /* just to be safe */ 1497 return 1; 1498 1499 return ((1 << (ntohl(fold) % sc->sc_lscount)) & sc->sc_lsmask) == 0; 1500 } 1501 1502 void 1503 carp_timer_down(void *v) 1504 { 1505 NET_LOCK(); 1506 carp_master_down(v); 1507 NET_UNLOCK(); 1508 } 1509 1510 void 1511 carp_master_down(struct carp_vhost_entry *vhe) 1512 { 1513 struct carp_softc *sc = vhe->parent_sc; 1514 1515 NET_ASSERT_LOCKED(); 1516 1517 switch (vhe->state) { 1518 case INIT: 1519 printf("%s: master_down event in INIT state\n", 1520 sc->sc_if.if_xname); 1521 break; 1522 case MASTER: 1523 break; 1524 case BACKUP: 1525 carp_set_state(vhe, MASTER); 1526 carp_send_ad(vhe); 1527 if (sc->sc_balancing == CARP_BAL_NONE && vhe->vhe_leader) { 1528 carp_send_arp(sc); 1529 /* Schedule a delayed ARP to deal w/ some L3 switches */ 1530 sc->sc_delayed_arp = 2; 1531 #ifdef INET6 1532 carp_send_na(sc); 1533 #endif /* INET6 */ 1534 } 1535 carp_setrun(vhe, 0); 1536 carpstat_inc(carps_preempt); 1537 break; 1538 } 1539 } 1540 1541 void 1542 carp_setrun_all(struct carp_softc *sc, sa_family_t af) 1543 { 1544 struct carp_vhost_entry *vhe; 1545 1546 KERNEL_ASSERT_LOCKED(); /* touching carp_vhost */ 1547 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) { 1548 carp_setrun(vhe, af); 1549 } 1550 } 1551 1552 /* 1553 * When in backup state, af indicates whether to reset the master down timer 1554 * for v4 or v6. If it's set to zero, reset the ones which are already pending. 1555 */ 1556 void 1557 carp_setrun(struct carp_vhost_entry *vhe, sa_family_t af) 1558 { 1559 struct ifnet *ifp; 1560 struct timeval tv; 1561 struct carp_softc *sc = vhe->parent_sc; 1562 1563 if ((ifp = if_get(sc->sc_carpdevidx)) == NULL) { 1564 sc->sc_if.if_flags &= ~IFF_RUNNING; 1565 carp_set_state_all(sc, INIT); 1566 return; 1567 } 1568 1569 if (memcmp(((struct arpcom *)ifp)->ac_enaddr, 1570 sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN) == 0) 1571 sc->sc_realmac = 1; 1572 else 1573 sc->sc_realmac = 0; 1574 1575 if_put(ifp); 1576 1577 if (sc->sc_if.if_flags & IFF_UP && vhe->vhid > 0 && 1578 (sc->sc_naddrs || sc->sc_naddrs6) && !sc->sc_suppress) { 1579 sc->sc_if.if_flags |= IFF_RUNNING; 1580 } else { 1581 sc->sc_if.if_flags &= ~IFF_RUNNING; 1582 return; 1583 } 1584 1585 switch (vhe->state) { 1586 case INIT: 1587 carp_set_state(vhe, BACKUP); 1588 carp_setrun(vhe, 0); 1589 break; 1590 case BACKUP: 1591 timeout_del(&vhe->ad_tmo); 1592 tv.tv_sec = 3 * sc->sc_advbase; 1593 if (sc->sc_advbase == 0 && vhe->advskew == 0) 1594 tv.tv_usec = 3 * 1000000 / 256; 1595 else if (sc->sc_advbase == 0) 1596 tv.tv_usec = 3 * vhe->advskew * 1000000 / 256; 1597 else 1598 tv.tv_usec = vhe->advskew * 1000000 / 256; 1599 if (vhe->vhe_leader) 1600 sc->sc_delayed_arp = -1; 1601 switch (af) { 1602 case AF_INET: 1603 timeout_add_tv(&vhe->md_tmo, &tv); 1604 break; 1605 #ifdef INET6 1606 case AF_INET6: 1607 timeout_add_tv(&vhe->md6_tmo, &tv); 1608 break; 1609 #endif /* INET6 */ 1610 default: 1611 if (sc->sc_naddrs) 1612 timeout_add_tv(&vhe->md_tmo, &tv); 1613 if (sc->sc_naddrs6) 1614 timeout_add_tv(&vhe->md6_tmo, &tv); 1615 break; 1616 } 1617 break; 1618 case MASTER: 1619 tv.tv_sec = sc->sc_advbase; 1620 if (sc->sc_advbase == 0 && vhe->advskew == 0) 1621 tv.tv_usec = 1 * 1000000 / 256; 1622 else 1623 tv.tv_usec = vhe->advskew * 1000000 / 256; 1624 timeout_add_tv(&vhe->ad_tmo, &tv); 1625 break; 1626 } 1627 } 1628 1629 void 1630 carp_multicast_cleanup(struct carp_softc *sc) 1631 { 1632 struct ip_moptions *imo = &sc->sc_imo; 1633 #ifdef INET6 1634 struct ip6_moptions *im6o = &sc->sc_im6o; 1635 #endif 1636 u_int16_t n = imo->imo_num_memberships; 1637 1638 /* Clean up our own multicast memberships */ 1639 while (n-- > 0) { 1640 if (imo->imo_membership[n] != NULL) { 1641 in_delmulti(imo->imo_membership[n]); 1642 imo->imo_membership[n] = NULL; 1643 } 1644 } 1645 imo->imo_num_memberships = 0; 1646 imo->imo_ifidx = 0; 1647 1648 #ifdef INET6 1649 while (!LIST_EMPTY(&im6o->im6o_memberships)) { 1650 struct in6_multi_mship *imm = 1651 LIST_FIRST(&im6o->im6o_memberships); 1652 1653 LIST_REMOVE(imm, i6mm_chain); 1654 in6_leavegroup(imm); 1655 } 1656 im6o->im6o_ifidx = 0; 1657 #endif 1658 1659 /* And any other multicast memberships */ 1660 carp_ether_purgemulti(sc); 1661 } 1662 1663 int 1664 carp_set_ifp(struct carp_softc *sc, struct ifnet *ifp0) 1665 { 1666 struct srpl *cif; 1667 struct carp_softc *vr, *last = NULL, *after = NULL; 1668 int myself = 0, error = 0; 1669 1670 KASSERT(ifp0->if_index != sc->sc_carpdevidx); 1671 KERNEL_ASSERT_LOCKED(); /* touching if_carp */ 1672 1673 if ((ifp0->if_flags & IFF_MULTICAST) == 0) 1674 return (EADDRNOTAVAIL); 1675 1676 if (ifp0->if_type != IFT_ETHER) 1677 return (EINVAL); 1678 1679 cif = &ifp0->if_carp; 1680 if (carp_check_dup_vhids(sc, cif, NULL)) 1681 return (EINVAL); 1682 1683 if ((error = ifpromisc(ifp0, 1))) 1684 return (error); 1685 1686 /* detach from old interface */ 1687 if (sc->sc_carpdevidx != 0) 1688 carpdetach(sc); 1689 1690 /* attach carp interface to physical interface */ 1691 if_detachhook_add(ifp0, &sc->sc_dtask); 1692 if_linkstatehook_add(ifp0, &sc->sc_ltask); 1693 1694 sc->sc_carpdevidx = ifp0->if_index; 1695 sc->sc_if.if_capabilities = ifp0->if_capabilities & 1696 (IFCAP_CSUM_MASK | IFCAP_TSOv4 | IFCAP_TSOv6); 1697 1698 SRPL_FOREACH_LOCKED(vr, cif, sc_list) { 1699 struct carp_vhost_entry *vrhead, *schead; 1700 last = vr; 1701 1702 if (vr == sc) 1703 myself = 1; 1704 1705 vrhead = SRPL_FIRST_LOCKED(&vr->carp_vhosts); 1706 schead = SRPL_FIRST_LOCKED(&sc->carp_vhosts); 1707 if (vrhead->vhid < schead->vhid) 1708 after = vr; 1709 } 1710 1711 if (!myself) { 1712 /* We're trying to keep things in order */ 1713 if (last == NULL) { 1714 SRPL_INSERT_HEAD_LOCKED(&carp_sc_rc, cif, 1715 sc, sc_list); 1716 } else if (after == NULL) { 1717 SRPL_INSERT_AFTER_LOCKED(&carp_sc_rc, last, 1718 sc, sc_list); 1719 } else { 1720 SRPL_INSERT_AFTER_LOCKED(&carp_sc_rc, after, 1721 sc, sc_list); 1722 } 1723 } 1724 if (sc->sc_naddrs || sc->sc_naddrs6) 1725 sc->sc_if.if_flags |= IFF_UP; 1726 carp_set_enaddr(sc); 1727 1728 carp_carpdev_state(sc); 1729 1730 return (0); 1731 } 1732 1733 void 1734 carp_set_vhe_enaddr(struct carp_vhost_entry *vhe) 1735 { 1736 struct carp_softc *sc = vhe->parent_sc; 1737 1738 if (vhe->vhid != 0 && sc->sc_carpdevidx != 0) { 1739 if (vhe->vhe_leader && sc->sc_balancing == CARP_BAL_IP) 1740 vhe->vhe_enaddr[0] = 1; 1741 else 1742 vhe->vhe_enaddr[0] = 0; 1743 vhe->vhe_enaddr[1] = 0; 1744 vhe->vhe_enaddr[2] = 0x5e; 1745 vhe->vhe_enaddr[3] = 0; 1746 vhe->vhe_enaddr[4] = 1; 1747 vhe->vhe_enaddr[5] = vhe->vhid; 1748 } else 1749 memset(vhe->vhe_enaddr, 0, ETHER_ADDR_LEN); 1750 } 1751 1752 void 1753 carp_set_enaddr(struct carp_softc *sc) 1754 { 1755 struct carp_vhost_entry *vhe; 1756 1757 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 1758 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) 1759 carp_set_vhe_enaddr(vhe); 1760 1761 vhe = SRPL_FIRST_LOCKED(&sc->carp_vhosts); 1762 1763 /* 1764 * Use the carp lladdr if the running one isn't manually set. 1765 * Only compare static parts of the lladdr. 1766 */ 1767 if ((memcmp(sc->sc_ac.ac_enaddr + 1, vhe->vhe_enaddr + 1, 1768 ETHER_ADDR_LEN - 2) == 0) || 1769 (!sc->sc_ac.ac_enaddr[0] && !sc->sc_ac.ac_enaddr[1] && 1770 !sc->sc_ac.ac_enaddr[2] && !sc->sc_ac.ac_enaddr[3] && 1771 !sc->sc_ac.ac_enaddr[4] && !sc->sc_ac.ac_enaddr[5])) 1772 bcopy(vhe->vhe_enaddr, sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN); 1773 1774 /* Make sure the enaddr has changed before further twiddling. */ 1775 if (memcmp(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN) != 0) { 1776 bcopy(sc->sc_ac.ac_enaddr, LLADDR(sc->sc_if.if_sadl), 1777 ETHER_ADDR_LEN); 1778 bcopy(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN); 1779 #ifdef INET6 1780 /* 1781 * (re)attach a link-local address which matches 1782 * our new MAC address. 1783 */ 1784 if (sc->sc_naddrs6) 1785 in6_ifattach_linklocal(&sc->sc_if, NULL); 1786 #endif 1787 carp_set_state_all(sc, INIT); 1788 carp_setrun_all(sc, 0); 1789 } 1790 } 1791 1792 void 1793 carp_addr_updated(void *v) 1794 { 1795 struct carp_softc *sc = (struct carp_softc *) v; 1796 struct ifaddr *ifa; 1797 int new_naddrs = 0, new_naddrs6 = 0; 1798 1799 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 1800 if (ifa->ifa_addr->sa_family == AF_INET) 1801 new_naddrs++; 1802 #ifdef INET6 1803 else if (ifa->ifa_addr->sa_family == AF_INET6) 1804 new_naddrs6++; 1805 #endif /* INET6 */ 1806 } 1807 1808 /* We received address changes from if_addrhooks callback */ 1809 if (new_naddrs != sc->sc_naddrs || new_naddrs6 != sc->sc_naddrs6) { 1810 1811 sc->sc_naddrs = new_naddrs; 1812 sc->sc_naddrs6 = new_naddrs6; 1813 1814 /* Re-establish multicast membership removed by in_control */ 1815 if (IN_MULTICAST(sc->sc_peer.s_addr)) { 1816 if (!in_hasmulti(&sc->sc_peer, &sc->sc_if)) { 1817 struct in_multi **imm = 1818 sc->sc_imo.imo_membership; 1819 u_int16_t maxmem = 1820 sc->sc_imo.imo_max_memberships; 1821 1822 memset(&sc->sc_imo, 0, sizeof(sc->sc_imo)); 1823 sc->sc_imo.imo_membership = imm; 1824 sc->sc_imo.imo_max_memberships = maxmem; 1825 1826 if (sc->sc_carpdevidx != 0 && 1827 sc->sc_naddrs > 0) 1828 carp_join_multicast(sc); 1829 } 1830 } 1831 1832 if (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) { 1833 sc->sc_if.if_flags &= ~IFF_UP; 1834 carp_set_state_all(sc, INIT); 1835 } else 1836 carp_hmac_prepare(sc); 1837 } 1838 1839 carp_setrun_all(sc, 0); 1840 } 1841 1842 int 1843 carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin) 1844 { 1845 struct in_addr *in = &sin->sin_addr; 1846 int error; 1847 1848 KASSERT(sc->sc_carpdevidx != 0); 1849 1850 /* XXX is this necessary? */ 1851 if (in->s_addr == INADDR_ANY) { 1852 carp_setrun_all(sc, 0); 1853 return (0); 1854 } 1855 1856 if (sc->sc_naddrs == 0 && (error = carp_join_multicast(sc)) != 0) 1857 return (error); 1858 1859 carp_set_state_all(sc, INIT); 1860 1861 return (0); 1862 } 1863 1864 int 1865 carp_join_multicast(struct carp_softc *sc) 1866 { 1867 struct ip_moptions *imo = &sc->sc_imo; 1868 struct in_multi *imm; 1869 struct in_addr addr; 1870 1871 if (!IN_MULTICAST(sc->sc_peer.s_addr)) 1872 return (0); 1873 1874 addr.s_addr = sc->sc_peer.s_addr; 1875 if ((imm = in_addmulti(&addr, &sc->sc_if)) == NULL) 1876 return (ENOBUFS); 1877 1878 imo->imo_membership[0] = imm; 1879 imo->imo_num_memberships = 1; 1880 imo->imo_ifidx = sc->sc_if.if_index; 1881 imo->imo_ttl = CARP_DFLTTL; 1882 imo->imo_loop = 0; 1883 return (0); 1884 } 1885 1886 1887 #ifdef INET6 1888 int 1889 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) 1890 { 1891 int error; 1892 1893 KASSERT(sc->sc_carpdevidx != 0); 1894 1895 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 1896 carp_setrun_all(sc, 0); 1897 return (0); 1898 } 1899 1900 if (sc->sc_naddrs6 == 0 && (error = carp_join_multicast6(sc)) != 0) 1901 return (error); 1902 1903 carp_set_state_all(sc, INIT); 1904 1905 return (0); 1906 } 1907 1908 int 1909 carp_join_multicast6(struct carp_softc *sc) 1910 { 1911 struct in6_multi_mship *imm, *imm2; 1912 struct ip6_moptions *im6o = &sc->sc_im6o; 1913 struct sockaddr_in6 addr6; 1914 int error; 1915 1916 /* Join IPv6 CARP multicast group */ 1917 memset(&addr6, 0, sizeof(addr6)); 1918 addr6.sin6_family = AF_INET6; 1919 addr6.sin6_len = sizeof(addr6); 1920 addr6.sin6_addr.s6_addr16[0] = htons(0xff02); 1921 addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index); 1922 addr6.sin6_addr.s6_addr8[15] = 0x12; 1923 if ((imm = in6_joingroup(&sc->sc_if, 1924 &addr6.sin6_addr, &error)) == NULL) { 1925 return (error); 1926 } 1927 /* join solicited multicast address */ 1928 memset(&addr6.sin6_addr, 0, sizeof(addr6.sin6_addr)); 1929 addr6.sin6_addr.s6_addr16[0] = htons(0xff02); 1930 addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index); 1931 addr6.sin6_addr.s6_addr32[1] = 0; 1932 addr6.sin6_addr.s6_addr32[2] = htonl(1); 1933 addr6.sin6_addr.s6_addr32[3] = 0; 1934 addr6.sin6_addr.s6_addr8[12] = 0xff; 1935 if ((imm2 = in6_joingroup(&sc->sc_if, 1936 &addr6.sin6_addr, &error)) == NULL) { 1937 in6_leavegroup(imm); 1938 return (error); 1939 } 1940 1941 /* apply v6 multicast membership */ 1942 im6o->im6o_ifidx = sc->sc_if.if_index; 1943 if (imm) 1944 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, 1945 i6mm_chain); 1946 if (imm2) 1947 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm2, 1948 i6mm_chain); 1949 1950 return (0); 1951 } 1952 1953 #endif /* INET6 */ 1954 1955 int 1956 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr) 1957 { 1958 struct proc *p = curproc; /* XXX */ 1959 struct carp_softc *sc = ifp->if_softc; 1960 struct carp_vhost_entry *vhe; 1961 struct carpreq carpr; 1962 struct ifaddr *ifa = (struct ifaddr *)addr; 1963 struct ifreq *ifr = (struct ifreq *)addr; 1964 struct ifnet *ifp0 = NULL; 1965 int i, error = 0; 1966 1967 switch (cmd) { 1968 case SIOCSIFADDR: 1969 if (sc->sc_carpdevidx == 0) 1970 return (EINVAL); 1971 1972 switch (ifa->ifa_addr->sa_family) { 1973 case AF_INET: 1974 sc->sc_if.if_flags |= IFF_UP; 1975 error = carp_set_addr(sc, satosin(ifa->ifa_addr)); 1976 break; 1977 #ifdef INET6 1978 case AF_INET6: 1979 sc->sc_if.if_flags |= IFF_UP; 1980 error = carp_set_addr6(sc, satosin6(ifa->ifa_addr)); 1981 break; 1982 #endif /* INET6 */ 1983 default: 1984 error = EAFNOSUPPORT; 1985 break; 1986 } 1987 break; 1988 1989 case SIOCSIFFLAGS: 1990 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 1991 vhe = SRPL_FIRST_LOCKED(&sc->carp_vhosts); 1992 if (vhe->state != INIT && !(ifr->ifr_flags & IFF_UP)) { 1993 carp_del_all_timeouts(sc); 1994 1995 /* we need the interface up to bow out */ 1996 sc->sc_if.if_flags |= IFF_UP; 1997 sc->sc_bow_out = 1; 1998 carp_vhe_send_ad_all(sc); 1999 sc->sc_bow_out = 0; 2000 2001 sc->sc_if.if_flags &= ~IFF_UP; 2002 carp_set_state_all(sc, INIT); 2003 carp_setrun_all(sc, 0); 2004 } else if (vhe->state == INIT && (ifr->ifr_flags & IFF_UP)) { 2005 sc->sc_if.if_flags |= IFF_UP; 2006 carp_setrun_all(sc, 0); 2007 } 2008 break; 2009 2010 case SIOCSVH: 2011 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 2012 vhe = SRPL_FIRST_LOCKED(&sc->carp_vhosts); 2013 if ((error = suser(p)) != 0) 2014 break; 2015 if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr))) 2016 break; 2017 error = 1; 2018 if (carpr.carpr_carpdev[0] != '\0' && 2019 (ifp0 = if_unit(carpr.carpr_carpdev)) == NULL) 2020 return (EINVAL); 2021 if (carpr.carpr_peer.s_addr == 0) 2022 sc->sc_peer.s_addr = INADDR_CARP_GROUP; 2023 else 2024 sc->sc_peer.s_addr = carpr.carpr_peer.s_addr; 2025 if (ifp0 != NULL && ifp0->if_index != sc->sc_carpdevidx) { 2026 if ((error = carp_set_ifp(sc, ifp0))) { 2027 if_put(ifp0); 2028 return (error); 2029 } 2030 } 2031 if_put(ifp0); 2032 if (vhe->state != INIT && carpr.carpr_state != vhe->state) { 2033 switch (carpr.carpr_state) { 2034 case BACKUP: 2035 timeout_del(&vhe->ad_tmo); 2036 carp_set_state_all(sc, BACKUP); 2037 carp_setrun_all(sc, 0); 2038 break; 2039 case MASTER: 2040 KERNEL_ASSERT_LOCKED(); 2041 /* touching carp_vhosts */ 2042 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, 2043 vhost_entries) 2044 carp_master_down(vhe); 2045 break; 2046 default: 2047 break; 2048 } 2049 } 2050 if ((error = carp_vhids_ioctl(sc, &carpr))) 2051 return (error); 2052 if (carpr.carpr_advbase >= 0) { 2053 if (carpr.carpr_advbase > 255) { 2054 error = EINVAL; 2055 break; 2056 } 2057 sc->sc_advbase = carpr.carpr_advbase; 2058 error--; 2059 } 2060 if (memcmp(sc->sc_advskews, carpr.carpr_advskews, 2061 sizeof(sc->sc_advskews))) { 2062 i = 0; 2063 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 2064 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, 2065 vhost_entries) 2066 vhe->advskew = carpr.carpr_advskews[i++]; 2067 bcopy(carpr.carpr_advskews, sc->sc_advskews, 2068 sizeof(sc->sc_advskews)); 2069 } 2070 if (sc->sc_balancing != carpr.carpr_balancing) { 2071 if (carpr.carpr_balancing > CARP_BAL_MAXID) { 2072 error = EINVAL; 2073 break; 2074 } 2075 sc->sc_balancing = carpr.carpr_balancing; 2076 carp_set_enaddr(sc); 2077 carp_update_lsmask(sc); 2078 } 2079 bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key)); 2080 if (error > 0) 2081 error = EINVAL; 2082 else { 2083 error = 0; 2084 carp_hmac_prepare(sc); 2085 carp_setrun_all(sc, 0); 2086 } 2087 break; 2088 2089 case SIOCGVH: 2090 memset(&carpr, 0, sizeof(carpr)); 2091 if ((ifp0 = if_get(sc->sc_carpdevidx)) != NULL) 2092 strlcpy(carpr.carpr_carpdev, ifp0->if_xname, IFNAMSIZ); 2093 if_put(ifp0); 2094 i = 0; 2095 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 2096 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) { 2097 carpr.carpr_vhids[i] = vhe->vhid; 2098 carpr.carpr_advskews[i] = vhe->advskew; 2099 carpr.carpr_states[i] = vhe->state; 2100 i++; 2101 } 2102 carpr.carpr_advbase = sc->sc_advbase; 2103 carpr.carpr_balancing = sc->sc_balancing; 2104 if (suser(p) == 0) 2105 bcopy(sc->sc_key, carpr.carpr_key, 2106 sizeof(carpr.carpr_key)); 2107 carpr.carpr_peer.s_addr = sc->sc_peer.s_addr; 2108 error = copyout(&carpr, ifr->ifr_data, sizeof(carpr)); 2109 break; 2110 2111 case SIOCADDMULTI: 2112 error = carp_ether_addmulti(sc, ifr); 2113 break; 2114 2115 case SIOCDELMULTI: 2116 error = carp_ether_delmulti(sc, ifr); 2117 break; 2118 case SIOCAIFGROUP: 2119 case SIOCDIFGROUP: 2120 if (sc->sc_demote_cnt) 2121 carp_ifgroup_ioctl(ifp, cmd, addr); 2122 break; 2123 case SIOCSIFGATTR: 2124 carp_ifgattr_ioctl(ifp, cmd, addr); 2125 break; 2126 default: 2127 error = ENOTTY; 2128 } 2129 2130 if (memcmp(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN) != 0) 2131 carp_set_enaddr(sc); 2132 return (error); 2133 } 2134 2135 int 2136 carp_check_dup_vhids(struct carp_softc *sc, struct srpl *cif, 2137 struct carpreq *carpr) 2138 { 2139 struct carp_softc *vr; 2140 struct carp_vhost_entry *vhe, *vhe0; 2141 int i; 2142 2143 KERNEL_ASSERT_LOCKED(); /* touching if_carp + carp_vhosts */ 2144 2145 SRPL_FOREACH_LOCKED(vr, cif, sc_list) { 2146 if (vr == sc) 2147 continue; 2148 SRPL_FOREACH_LOCKED(vhe, &vr->carp_vhosts, vhost_entries) { 2149 if (carpr) { 2150 for (i = 0; carpr->carpr_vhids[i]; i++) { 2151 if (vhe->vhid == carpr->carpr_vhids[i]) 2152 return (EINVAL); 2153 } 2154 } 2155 SRPL_FOREACH_LOCKED(vhe0, &sc->carp_vhosts, 2156 vhost_entries) { 2157 if (vhe->vhid == vhe0->vhid) 2158 return (EINVAL); 2159 } 2160 } 2161 } 2162 return (0); 2163 } 2164 2165 int 2166 carp_vhids_ioctl(struct carp_softc *sc, struct carpreq *carpr) 2167 { 2168 int i, j; 2169 u_int8_t taken_vhids[256]; 2170 2171 if (carpr->carpr_vhids[0] == 0 || 2172 !memcmp(sc->sc_vhids, carpr->carpr_vhids, sizeof(sc->sc_vhids))) 2173 return (0); 2174 2175 memset(taken_vhids, 0, sizeof(taken_vhids)); 2176 for (i = 0; carpr->carpr_vhids[i]; i++) { 2177 struct ifnet *ifp; 2178 2179 if (taken_vhids[carpr->carpr_vhids[i]]) 2180 return (EINVAL); 2181 taken_vhids[carpr->carpr_vhids[i]] = 1; 2182 2183 if ((ifp = if_get(sc->sc_carpdevidx)) != NULL) { 2184 struct srpl *cif; 2185 cif = &ifp->if_carp; 2186 if (carp_check_dup_vhids(sc, cif, carpr)) { 2187 if_put(ifp); 2188 return (EINVAL); 2189 } 2190 } 2191 if_put(ifp); 2192 if (carpr->carpr_advskews[i] >= 255) 2193 return (EINVAL); 2194 } 2195 /* set sane balancing defaults */ 2196 if (i <= 1) 2197 carpr->carpr_balancing = CARP_BAL_NONE; 2198 else if (carpr->carpr_balancing == CARP_BAL_NONE && 2199 sc->sc_balancing == CARP_BAL_NONE) 2200 carpr->carpr_balancing = CARP_BAL_IP; 2201 2202 /* destroy all */ 2203 carp_del_all_timeouts(sc); 2204 carp_destroy_vhosts(sc); 2205 memset(sc->sc_vhids, 0, sizeof(sc->sc_vhids)); 2206 2207 /* sort vhosts list by vhid */ 2208 for (j = 1; j <= 255; j++) { 2209 for (i = 0; carpr->carpr_vhids[i]; i++) { 2210 if (carpr->carpr_vhids[i] != j) 2211 continue; 2212 if (carp_new_vhost(sc, carpr->carpr_vhids[i], 2213 carpr->carpr_advskews[i])) 2214 return (ENOMEM); 2215 sc->sc_vhids[i] = carpr->carpr_vhids[i]; 2216 sc->sc_advskews[i] = carpr->carpr_advskews[i]; 2217 } 2218 } 2219 carp_set_enaddr(sc); 2220 carp_set_state_all(sc, INIT); 2221 return (0); 2222 } 2223 2224 void 2225 carp_ifgroup_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr) 2226 { 2227 struct ifgroupreq *ifgr = (struct ifgroupreq *)addr; 2228 struct ifg_list *ifgl; 2229 int *dm, adj; 2230 2231 if (!strcmp(ifgr->ifgr_group, IFG_ALL)) 2232 return; 2233 adj = ((struct carp_softc *)ifp->if_softc)->sc_demote_cnt; 2234 if (cmd == SIOCDIFGROUP) 2235 adj = adj * -1; 2236 2237 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) 2238 if (!strcmp(ifgl->ifgl_group->ifg_group, ifgr->ifgr_group)) { 2239 dm = &ifgl->ifgl_group->ifg_carp_demoted; 2240 if (*dm + adj >= 0) 2241 *dm += adj; 2242 else 2243 *dm = 0; 2244 } 2245 } 2246 2247 void 2248 carp_ifgattr_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr) 2249 { 2250 struct ifgroupreq *ifgr = (struct ifgroupreq *)addr; 2251 struct carp_softc *sc = ifp->if_softc; 2252 2253 if (ifgr->ifgr_attrib.ifg_carp_demoted > 0 && (sc->sc_if.if_flags & 2254 (IFF_UP|IFF_RUNNING)) == (IFF_UP|IFF_RUNNING)) 2255 carp_vhe_send_ad_all(sc); 2256 } 2257 2258 void 2259 carp_start(struct ifnet *ifp) 2260 { 2261 struct carp_softc *sc = ifp->if_softc; 2262 struct ifnet *ifp0; 2263 struct mbuf *m; 2264 2265 if ((ifp0 = if_get(sc->sc_carpdevidx)) == NULL) { 2266 ifq_purge(&ifp->if_snd); 2267 return; 2268 } 2269 2270 while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) 2271 carp_transmit(sc, ifp0, m); 2272 if_put(ifp0); 2273 } 2274 2275 void 2276 carp_transmit(struct carp_softc *sc, struct ifnet *ifp0, struct mbuf *m) 2277 { 2278 struct ifnet *ifp = &sc->sc_if; 2279 2280 #if NBPFILTER > 0 2281 { 2282 caddr_t if_bpf = ifp->if_bpf; 2283 if (if_bpf) 2284 bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_OUT); 2285 } 2286 #endif /* NBPFILTER > 0 */ 2287 2288 if (!ISSET(ifp0->if_flags, IFF_RUNNING)) { 2289 counters_inc(ifp->if_counters, ifc_oerrors); 2290 m_freem(m); 2291 return; 2292 } 2293 2294 /* 2295 * Do not leak the multicast address when sending 2296 * advertisements in 'ip' and 'ip-stealth' balancing 2297 * modes. 2298 */ 2299 if (sc->sc_balancing == CARP_BAL_IP || 2300 sc->sc_balancing == CARP_BAL_IPSTEALTH) { 2301 struct ether_header *eh = mtod(m, struct ether_header *); 2302 memcpy(eh->ether_shost, sc->sc_ac.ac_enaddr, 2303 sizeof(eh->ether_shost)); 2304 } 2305 2306 if (if_enqueue(ifp0, m)) 2307 counters_inc(ifp->if_counters, ifc_oerrors); 2308 } 2309 2310 int 2311 carp_enqueue(struct ifnet *ifp, struct mbuf *m) 2312 { 2313 struct carp_softc *sc = ifp->if_softc; 2314 struct ifnet *ifp0; 2315 2316 /* no ifq_is_priq, cos hfsc on carp doesn't make sense */ 2317 2318 /* 2319 * If the parent of this carp(4) got destroyed while 2320 * `m' was being processed, silently drop it. 2321 */ 2322 if ((ifp0 = if_get(sc->sc_carpdevidx)) == NULL) { 2323 m_freem(m); 2324 return (0); 2325 } 2326 2327 counters_pkt(ifp->if_counters, 2328 ifc_opackets, ifc_obytes, m->m_pkthdr.len); 2329 carp_transmit(sc, ifp0, m); 2330 if_put(ifp0); 2331 2332 return (0); 2333 } 2334 2335 int 2336 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa, 2337 struct rtentry *rt) 2338 { 2339 struct carp_softc *sc = ((struct carp_softc *)ifp->if_softc); 2340 struct carp_vhost_entry *vhe; 2341 struct srp_ref sr; 2342 int ismaster; 2343 2344 if (sc->cur_vhe == NULL) { 2345 vhe = SRPL_FIRST(&sr, &sc->carp_vhosts); 2346 ismaster = (vhe->state == MASTER); 2347 SRPL_LEAVE(&sr); 2348 } else { 2349 ismaster = (sc->cur_vhe->state == MASTER); 2350 } 2351 2352 if ((sc->sc_balancing == CARP_BAL_NONE && !ismaster)) { 2353 m_freem(m); 2354 return (ENETUNREACH); 2355 } 2356 2357 return (ether_output(ifp, m, sa, rt)); 2358 } 2359 2360 void 2361 carp_set_state_all(struct carp_softc *sc, int state) 2362 { 2363 struct carp_vhost_entry *vhe; 2364 2365 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 2366 2367 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) { 2368 if (vhe->state == state) 2369 continue; 2370 2371 carp_set_state(vhe, state); 2372 } 2373 } 2374 2375 void 2376 carp_set_state(struct carp_vhost_entry *vhe, int state) 2377 { 2378 struct carp_softc *sc = vhe->parent_sc; 2379 static const char *carp_states[] = { CARP_STATES }; 2380 int loglevel; 2381 struct carp_vhost_entry *vhe0; 2382 2383 KASSERT(vhe->state != state); 2384 2385 if (vhe->state == INIT || state == INIT) 2386 loglevel = LOG_WARNING; 2387 else 2388 loglevel = LOG_CRIT; 2389 2390 if (sc->sc_vhe_count > 1) 2391 CARP_LOG(loglevel, sc, 2392 ("state transition (vhid %d): %s -> %s", vhe->vhid, 2393 carp_states[vhe->state], carp_states[state])); 2394 else 2395 CARP_LOG(loglevel, sc, 2396 ("state transition: %s -> %s", 2397 carp_states[vhe->state], carp_states[state])); 2398 2399 vhe->state = state; 2400 carp_update_lsmask(sc); 2401 2402 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 2403 2404 sc->sc_if.if_link_state = LINK_STATE_INVALID; 2405 SRPL_FOREACH_LOCKED(vhe0, &sc->carp_vhosts, vhost_entries) { 2406 /* 2407 * Link must be up if at least one vhe is in state MASTER to 2408 * bring or keep route up. 2409 */ 2410 if (vhe0->state == MASTER) { 2411 sc->sc_if.if_link_state = LINK_STATE_UP; 2412 break; 2413 } else if (vhe0->state == BACKUP) { 2414 sc->sc_if.if_link_state = LINK_STATE_DOWN; 2415 } 2416 } 2417 if_link_state_change(&sc->sc_if); 2418 } 2419 2420 void 2421 carp_group_demote_adj(struct ifnet *ifp, int adj, char *reason) 2422 { 2423 struct ifg_list *ifgl; 2424 int *dm, need_ad; 2425 struct carp_softc *nil = NULL; 2426 2427 if (ifp->if_type == IFT_CARP) { 2428 dm = &((struct carp_softc *)ifp->if_softc)->sc_demote_cnt; 2429 if (*dm + adj >= 0) 2430 *dm += adj; 2431 else 2432 *dm = 0; 2433 } 2434 2435 need_ad = 0; 2436 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) { 2437 if (!strcmp(ifgl->ifgl_group->ifg_group, IFG_ALL)) 2438 continue; 2439 dm = &ifgl->ifgl_group->ifg_carp_demoted; 2440 2441 if (*dm + adj >= 0) 2442 *dm += adj; 2443 else 2444 *dm = 0; 2445 2446 if (adj > 0 && *dm == 1) 2447 need_ad = 1; 2448 CARP_LOG(LOG_ERR, nil, 2449 ("%s demoted group %s by %d to %d (%s)", 2450 ifp->if_xname, ifgl->ifgl_group->ifg_group, 2451 adj, *dm, reason)); 2452 } 2453 if (need_ad) 2454 carp_send_ad_all(); 2455 } 2456 2457 int 2458 carp_group_demote_count(struct carp_softc *sc) 2459 { 2460 struct ifg_list *ifgl; 2461 int count = 0; 2462 2463 TAILQ_FOREACH(ifgl, &sc->sc_if.if_groups, ifgl_next) 2464 count += ifgl->ifgl_group->ifg_carp_demoted; 2465 2466 if (count == 0 && sc->sc_demote_cnt) 2467 count = sc->sc_demote_cnt; 2468 2469 return (count > 255 ? 255 : count); 2470 } 2471 2472 void 2473 carp_carpdev_state(void *v) 2474 { 2475 struct carp_softc *sc = v; 2476 struct ifnet *ifp0; 2477 int suppressed = sc->sc_suppress; 2478 2479 if ((ifp0 = if_get(sc->sc_carpdevidx)) == NULL) 2480 return; 2481 2482 if (ifp0->if_link_state == LINK_STATE_DOWN || 2483 !(ifp0->if_flags & IFF_UP)) { 2484 sc->sc_if.if_flags &= ~IFF_RUNNING; 2485 carp_del_all_timeouts(sc); 2486 carp_set_state_all(sc, INIT); 2487 sc->sc_suppress = 1; 2488 carp_setrun_all(sc, 0); 2489 if (!suppressed) 2490 carp_group_demote_adj(&sc->sc_if, 1, "carpdev"); 2491 } else if (suppressed) { 2492 carp_set_state_all(sc, INIT); 2493 sc->sc_suppress = 0; 2494 carp_setrun_all(sc, 0); 2495 carp_group_demote_adj(&sc->sc_if, -1, "carpdev"); 2496 } 2497 2498 if_put(ifp0); 2499 } 2500 2501 int 2502 carp_ether_addmulti(struct carp_softc *sc, struct ifreq *ifr) 2503 { 2504 struct ifnet *ifp0; 2505 struct carp_mc_entry *mc; 2506 u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN]; 2507 int error; 2508 2509 ifp0 = if_get(sc->sc_carpdevidx); 2510 if (ifp0 == NULL) 2511 return (EINVAL); 2512 2513 error = ether_addmulti(ifr, (struct arpcom *)&sc->sc_ac); 2514 if (error != ENETRESET) { 2515 if_put(ifp0); 2516 return (error); 2517 } 2518 2519 /* 2520 * This is new multicast address. We have to tell parent 2521 * about it. Also, remember this multicast address so that 2522 * we can delete them on unconfigure. 2523 */ 2524 mc = malloc(sizeof(*mc), M_DEVBUF, M_NOWAIT); 2525 if (mc == NULL) { 2526 error = ENOMEM; 2527 goto alloc_failed; 2528 } 2529 2530 /* 2531 * As ether_addmulti() returns ENETRESET, following two 2532 * statement shouldn't fail. 2533 */ 2534 (void)ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi); 2535 ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, mc->mc_enm); 2536 memcpy(&mc->mc_addr, &ifr->ifr_addr, ifr->ifr_addr.sa_len); 2537 LIST_INSERT_HEAD(&sc->carp_mc_listhead, mc, mc_entries); 2538 2539 error = (*ifp0->if_ioctl)(ifp0, SIOCADDMULTI, (caddr_t)ifr); 2540 if (error != 0) 2541 goto ioctl_failed; 2542 2543 if_put(ifp0); 2544 2545 return (error); 2546 2547 ioctl_failed: 2548 LIST_REMOVE(mc, mc_entries); 2549 free(mc, M_DEVBUF, sizeof(*mc)); 2550 alloc_failed: 2551 (void)ether_delmulti(ifr, (struct arpcom *)&sc->sc_ac); 2552 if_put(ifp0); 2553 2554 return (error); 2555 } 2556 2557 int 2558 carp_ether_delmulti(struct carp_softc *sc, struct ifreq *ifr) 2559 { 2560 struct ifnet *ifp0; 2561 struct ether_multi *enm; 2562 struct carp_mc_entry *mc; 2563 u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN]; 2564 int error; 2565 2566 ifp0 = if_get(sc->sc_carpdevidx); 2567 if (ifp0 == NULL) 2568 return (EINVAL); 2569 2570 /* 2571 * Find a key to lookup carp_mc_entry. We have to do this 2572 * before calling ether_delmulti for obvious reason. 2573 */ 2574 if ((error = ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi)) != 0) 2575 goto rele; 2576 ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, enm); 2577 if (enm == NULL) { 2578 error = EINVAL; 2579 goto rele; 2580 } 2581 2582 LIST_FOREACH(mc, &sc->carp_mc_listhead, mc_entries) 2583 if (mc->mc_enm == enm) 2584 break; 2585 2586 /* We won't delete entries we didn't add */ 2587 if (mc == NULL) { 2588 error = EINVAL; 2589 goto rele; 2590 } 2591 2592 error = ether_delmulti(ifr, (struct arpcom *)&sc->sc_ac); 2593 if (error != ENETRESET) 2594 goto rele; 2595 2596 /* We no longer use this multicast address. Tell parent so. */ 2597 error = (*ifp0->if_ioctl)(ifp0, SIOCDELMULTI, (caddr_t)ifr); 2598 if (error == 0) { 2599 /* And forget about this address. */ 2600 LIST_REMOVE(mc, mc_entries); 2601 free(mc, M_DEVBUF, sizeof(*mc)); 2602 } else 2603 (void)ether_addmulti(ifr, (struct arpcom *)&sc->sc_ac); 2604 rele: 2605 if_put(ifp0); 2606 return (error); 2607 } 2608 2609 /* 2610 * Delete any multicast address we have asked to add from parent 2611 * interface. Called when the carp is being unconfigured. 2612 */ 2613 void 2614 carp_ether_purgemulti(struct carp_softc *sc) 2615 { 2616 struct ifnet *ifp0; /* Parent. */ 2617 struct carp_mc_entry *mc; 2618 union { 2619 struct ifreq ifreq; 2620 struct { 2621 char ifr_name[IFNAMSIZ]; 2622 struct sockaddr_storage ifr_ss; 2623 } ifreq_storage; 2624 } u; 2625 struct ifreq *ifr = &u.ifreq; 2626 2627 if ((ifp0 = if_get(sc->sc_carpdevidx)) == NULL) 2628 return; 2629 2630 memcpy(ifr->ifr_name, ifp0->if_xname, IFNAMSIZ); 2631 while ((mc = LIST_FIRST(&sc->carp_mc_listhead)) != NULL) { 2632 memcpy(&ifr->ifr_addr, &mc->mc_addr, mc->mc_addr.ss_len); 2633 (void)(*ifp0->if_ioctl)(ifp0, SIOCDELMULTI, (caddr_t)ifr); 2634 LIST_REMOVE(mc, mc_entries); 2635 free(mc, M_DEVBUF, sizeof(*mc)); 2636 } 2637 2638 if_put(ifp0); 2639 } 2640 2641 void 2642 carp_vh_ref(void *null, void *v) 2643 { 2644 struct carp_vhost_entry *vhe = v; 2645 2646 refcnt_take(&vhe->vhost_refcnt); 2647 } 2648 2649 void 2650 carp_vh_unref(void *null, void *v) 2651 { 2652 struct carp_vhost_entry *vhe = v; 2653 2654 if (refcnt_rele(&vhe->vhost_refcnt)) { 2655 carp_sc_unref(NULL, vhe->parent_sc); 2656 free(vhe, M_DEVBUF, sizeof(*vhe)); 2657 } 2658 } 2659 2660 void 2661 carp_sc_ref(void *null, void *s) 2662 { 2663 struct carp_softc *sc = s; 2664 2665 refcnt_take(&sc->sc_refcnt); 2666 } 2667 2668 void 2669 carp_sc_unref(void *null, void *s) 2670 { 2671 struct carp_softc *sc = s; 2672 2673 refcnt_rele_wake(&sc->sc_refcnt); 2674 } 2675