1 /* $OpenBSD: ip_carp.c,v 1.349 2020/07/28 16:44:34 yasuoka Exp $ */ 2 3 /* 4 * Copyright (c) 2002 Michael Shalayeff. All rights reserved. 5 * Copyright (c) 2003 Ryan McBride. All rights reserved. 6 * Copyright (c) 2006-2008 Marco Pfatschbacher. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 21 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 25 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 26 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 27 * THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 /* 31 * TODO: 32 * - iface reconfigure 33 * - support for hardware checksum calculations; 34 * 35 */ 36 37 #include "ether.h" 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/mbuf.h> 42 #include <sys/socket.h> 43 #include <sys/socketvar.h> 44 #include <sys/timeout.h> 45 #include <sys/ioctl.h> 46 #include <sys/errno.h> 47 #include <sys/device.h> 48 #include <sys/kernel.h> 49 #include <sys/sysctl.h> 50 #include <sys/syslog.h> 51 #include <sys/refcnt.h> 52 53 #include <net/if.h> 54 #include <net/if_var.h> 55 #include <net/if_types.h> 56 #include <net/netisr.h> 57 58 #include <crypto/sha1.h> 59 60 #include <netinet/in.h> 61 #include <netinet/in_var.h> 62 #include <netinet/ip.h> 63 #include <netinet/ip_var.h> 64 #include <netinet/if_ether.h> 65 #include <netinet/ip_ipsp.h> 66 67 #include <net/if_dl.h> 68 69 #ifdef INET6 70 #include <netinet6/in6_var.h> 71 #include <netinet/icmp6.h> 72 #include <netinet/ip6.h> 73 #include <netinet6/ip6_var.h> 74 #include <netinet6/nd6.h> 75 #include <netinet6/in6_ifattach.h> 76 #endif 77 78 #include "bpfilter.h" 79 #if NBPFILTER > 0 80 #include <net/bpf.h> 81 #endif 82 83 #include "vlan.h" 84 #if NVLAN > 0 85 #include <net/if_vlan_var.h> 86 #endif 87 88 #include <netinet/ip_carp.h> 89 90 struct carp_mc_entry { 91 LIST_ENTRY(carp_mc_entry) mc_entries; 92 union { 93 struct ether_multi *mcu_enm; 94 } mc_u; 95 struct sockaddr_storage mc_addr; 96 }; 97 #define mc_enm mc_u.mcu_enm 98 99 enum { HMAC_ORIG=0, HMAC_NOV6LL=1, HMAC_MAX=2 }; 100 101 struct carp_vhost_entry { 102 SRPL_ENTRY(carp_vhost_entry) vhost_entries; 103 struct refcnt vhost_refcnt; 104 105 struct carp_softc *parent_sc; 106 int vhe_leader; 107 int vhid; 108 int advskew; 109 enum { INIT = 0, BACKUP, MASTER } state; 110 struct timeout ad_tmo; /* advertisement timeout */ 111 struct timeout md_tmo; /* master down timeout */ 112 struct timeout md6_tmo; /* master down timeout */ 113 114 u_int64_t vhe_replay_cookie; 115 116 /* authentication */ 117 #define CARP_HMAC_PAD 64 118 unsigned char vhe_pad[CARP_HMAC_PAD]; 119 SHA1_CTX vhe_sha1[HMAC_MAX]; 120 121 u_int8_t vhe_enaddr[ETHER_ADDR_LEN]; 122 }; 123 124 void carp_vh_ref(void *, void *); 125 void carp_vh_unref(void *, void *); 126 127 struct srpl_rc carp_vh_rc = 128 SRPL_RC_INITIALIZER(carp_vh_ref, carp_vh_unref, NULL); 129 130 struct carp_softc { 131 struct arpcom sc_ac; 132 #define sc_if sc_ac.ac_if 133 #define sc_carpdevidx sc_ac.ac_if.if_carpdevidx 134 struct task sc_atask; 135 struct task sc_ltask; 136 struct task sc_dtask; 137 struct ip_moptions sc_imo; 138 #ifdef INET6 139 struct ip6_moptions sc_im6o; 140 #endif /* INET6 */ 141 142 SRPL_ENTRY(carp_softc) sc_list; 143 struct refcnt sc_refcnt; 144 145 int sc_suppress; 146 int sc_bow_out; 147 int sc_demote_cnt; 148 149 int sc_sendad_errors; 150 #define CARP_SENDAD_MAX_ERRORS(sc) (3 * (sc)->sc_vhe_count) 151 int sc_sendad_success; 152 #define CARP_SENDAD_MIN_SUCCESS(sc) (3 * (sc)->sc_vhe_count) 153 154 char sc_curlladdr[ETHER_ADDR_LEN]; 155 156 SRPL_HEAD(, carp_vhost_entry) carp_vhosts; 157 int sc_vhe_count; 158 u_int8_t sc_vhids[CARP_MAXNODES]; 159 u_int8_t sc_advskews[CARP_MAXNODES]; 160 u_int8_t sc_balancing; 161 162 int sc_naddrs; 163 int sc_naddrs6; 164 int sc_advbase; /* seconds */ 165 166 /* authentication */ 167 unsigned char sc_key[CARP_KEY_LEN]; 168 169 u_int32_t sc_hashkey[2]; 170 u_int32_t sc_lsmask; /* load sharing mask */ 171 int sc_lscount; /* # load sharing interfaces (max 32) */ 172 int sc_delayed_arp; /* delayed ARP request countdown */ 173 int sc_realmac; /* using real mac */ 174 175 struct in_addr sc_peer; 176 177 LIST_HEAD(__carp_mchead, carp_mc_entry) carp_mc_listhead; 178 struct carp_vhost_entry *cur_vhe; /* current active vhe */ 179 }; 180 181 void carp_sc_ref(void *, void *); 182 void carp_sc_unref(void *, void *); 183 184 struct srpl_rc carp_sc_rc = 185 SRPL_RC_INITIALIZER(carp_sc_ref, carp_sc_unref, NULL); 186 187 int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, LOG_CRIT }; /* XXX for now */ 188 struct cpumem *carpcounters; 189 190 int carp_send_all_recur = 0; 191 192 #define CARP_LOG(l, sc, s) \ 193 do { \ 194 if (carp_opts[CARPCTL_LOG] >= l) { \ 195 if (sc) \ 196 log(l, "%s: ", \ 197 (sc)->sc_if.if_xname); \ 198 else \ 199 log(l, "carp: "); \ 200 addlog s; \ 201 addlog("\n"); \ 202 } \ 203 } while (0) 204 205 void carp_hmac_prepare(struct carp_softc *); 206 void carp_hmac_prepare_ctx(struct carp_vhost_entry *, u_int8_t); 207 void carp_hmac_generate(struct carp_vhost_entry *, u_int32_t *, 208 unsigned char *, u_int8_t); 209 int carp_hmac_verify(struct carp_vhost_entry *, u_int32_t *, 210 unsigned char *); 211 void carp_proto_input_c(struct ifnet *, struct mbuf *, 212 struct carp_header *, int, sa_family_t); 213 int carp_proto_input_if(struct ifnet *, struct mbuf **, int *, int); 214 #ifdef INET6 215 int carp6_proto_input_if(struct ifnet *, struct mbuf **, int *, int); 216 #endif 217 void carpattach(int); 218 void carpdetach(void *); 219 void carp_prepare_ad(struct mbuf *, struct carp_vhost_entry *, 220 struct carp_header *); 221 void carp_send_ad_all(void); 222 void carp_vhe_send_ad_all(struct carp_softc *); 223 void carp_timer_ad(void *); 224 void carp_send_ad(struct carp_vhost_entry *); 225 void carp_send_arp(struct carp_softc *); 226 void carp_timer_down(void *); 227 void carp_master_down(struct carp_vhost_entry *); 228 int carp_ioctl(struct ifnet *, u_long, caddr_t); 229 int carp_vhids_ioctl(struct carp_softc *, struct carpreq *); 230 int carp_check_dup_vhids(struct carp_softc *, struct srpl *, 231 struct carpreq *); 232 void carp_ifgroup_ioctl(struct ifnet *, u_long, caddr_t); 233 void carp_ifgattr_ioctl(struct ifnet *, u_long, caddr_t); 234 void carp_start(struct ifnet *); 235 int carp_enqueue(struct ifnet *, struct mbuf *); 236 void carp_transmit(struct carp_softc *, struct ifnet *, struct mbuf *); 237 void carp_setrun_all(struct carp_softc *, sa_family_t); 238 void carp_setrun(struct carp_vhost_entry *, sa_family_t); 239 void carp_set_state_all(struct carp_softc *, int); 240 void carp_set_state(struct carp_vhost_entry *, int); 241 void carp_multicast_cleanup(struct carp_softc *); 242 int carp_set_ifp(struct carp_softc *, struct ifnet *); 243 void carp_set_enaddr(struct carp_softc *); 244 void carp_set_vhe_enaddr(struct carp_vhost_entry *); 245 void carp_addr_updated(void *); 246 int carp_set_addr(struct carp_softc *, struct sockaddr_in *); 247 int carp_join_multicast(struct carp_softc *); 248 #ifdef INET6 249 void carp_send_na(struct carp_softc *); 250 int carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *); 251 int carp_join_multicast6(struct carp_softc *); 252 #endif 253 int carp_clone_create(struct if_clone *, int); 254 int carp_clone_destroy(struct ifnet *); 255 int carp_ether_addmulti(struct carp_softc *, struct ifreq *); 256 int carp_ether_delmulti(struct carp_softc *, struct ifreq *); 257 void carp_ether_purgemulti(struct carp_softc *); 258 int carp_group_demote_count(struct carp_softc *); 259 void carp_update_lsmask(struct carp_softc *); 260 int carp_new_vhost(struct carp_softc *, int, int); 261 void carp_destroy_vhosts(struct carp_softc *); 262 void carp_del_all_timeouts(struct carp_softc *); 263 int carp_vhe_match(struct carp_softc *, uint8_t *); 264 265 struct if_clone carp_cloner = 266 IF_CLONE_INITIALIZER("carp", carp_clone_create, carp_clone_destroy); 267 268 #define carp_cksum(_m, _l) ((u_int16_t)in_cksum((_m), (_l))) 269 #define CARP_IFQ_PRIO 6 270 271 void 272 carp_hmac_prepare(struct carp_softc *sc) 273 { 274 struct carp_vhost_entry *vhe; 275 u_int8_t i; 276 277 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 278 279 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) { 280 for (i = 0; i < HMAC_MAX; i++) { 281 carp_hmac_prepare_ctx(vhe, i); 282 } 283 } 284 } 285 286 void 287 carp_hmac_prepare_ctx(struct carp_vhost_entry *vhe, u_int8_t ctx) 288 { 289 struct carp_softc *sc = vhe->parent_sc; 290 291 u_int8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT; 292 u_int8_t vhid = vhe->vhid & 0xff; 293 SHA1_CTX sha1ctx; 294 u_int32_t kmd[5]; 295 struct ifaddr *ifa; 296 int i, found; 297 struct in_addr last, cur, in; 298 #ifdef INET6 299 struct in6_addr last6, cur6, in6; 300 #endif /* INET6 */ 301 302 /* compute ipad from key */ 303 memset(vhe->vhe_pad, 0, sizeof(vhe->vhe_pad)); 304 bcopy(sc->sc_key, vhe->vhe_pad, sizeof(sc->sc_key)); 305 for (i = 0; i < sizeof(vhe->vhe_pad); i++) 306 vhe->vhe_pad[i] ^= 0x36; 307 308 /* precompute first part of inner hash */ 309 SHA1Init(&vhe->vhe_sha1[ctx]); 310 SHA1Update(&vhe->vhe_sha1[ctx], vhe->vhe_pad, sizeof(vhe->vhe_pad)); 311 SHA1Update(&vhe->vhe_sha1[ctx], (void *)&version, sizeof(version)); 312 SHA1Update(&vhe->vhe_sha1[ctx], (void *)&type, sizeof(type)); 313 314 /* generate a key for the arpbalance hash, before the vhid is hashed */ 315 if (vhe->vhe_leader) { 316 bcopy(&vhe->vhe_sha1[ctx], &sha1ctx, sizeof(sha1ctx)); 317 SHA1Final((unsigned char *)kmd, &sha1ctx); 318 sc->sc_hashkey[0] = kmd[0] ^ kmd[1]; 319 sc->sc_hashkey[1] = kmd[2] ^ kmd[3]; 320 } 321 322 /* the rest of the precomputation */ 323 if (!sc->sc_realmac && vhe->vhe_leader && 324 memcmp(sc->sc_ac.ac_enaddr, vhe->vhe_enaddr, ETHER_ADDR_LEN) != 0) 325 SHA1Update(&vhe->vhe_sha1[ctx], sc->sc_ac.ac_enaddr, 326 ETHER_ADDR_LEN); 327 328 SHA1Update(&vhe->vhe_sha1[ctx], (void *)&vhid, sizeof(vhid)); 329 330 /* Hash the addresses from smallest to largest, not interface order */ 331 cur.s_addr = 0; 332 do { 333 found = 0; 334 last = cur; 335 cur.s_addr = 0xffffffff; 336 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 337 if (ifa->ifa_addr->sa_family != AF_INET) 338 continue; 339 in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr; 340 if (ntohl(in.s_addr) > ntohl(last.s_addr) && 341 ntohl(in.s_addr) < ntohl(cur.s_addr)) { 342 cur.s_addr = in.s_addr; 343 found++; 344 } 345 } 346 if (found) 347 SHA1Update(&vhe->vhe_sha1[ctx], 348 (void *)&cur, sizeof(cur)); 349 } while (found); 350 #ifdef INET6 351 memset(&cur6, 0x00, sizeof(cur6)); 352 do { 353 found = 0; 354 last6 = cur6; 355 memset(&cur6, 0xff, sizeof(cur6)); 356 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 357 if (ifa->ifa_addr->sa_family != AF_INET6) 358 continue; 359 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 360 if (IN6_IS_SCOPE_EMBED(&in6)) { 361 if (ctx == HMAC_NOV6LL) 362 continue; 363 in6.s6_addr16[1] = 0; 364 } 365 if (memcmp(&in6, &last6, sizeof(in6)) > 0 && 366 memcmp(&in6, &cur6, sizeof(in6)) < 0) { 367 cur6 = in6; 368 found++; 369 } 370 } 371 if (found) 372 SHA1Update(&vhe->vhe_sha1[ctx], 373 (void *)&cur6, sizeof(cur6)); 374 } while (found); 375 #endif /* INET6 */ 376 377 /* convert ipad to opad */ 378 for (i = 0; i < sizeof(vhe->vhe_pad); i++) 379 vhe->vhe_pad[i] ^= 0x36 ^ 0x5c; 380 } 381 382 void 383 carp_hmac_generate(struct carp_vhost_entry *vhe, u_int32_t counter[2], 384 unsigned char md[20], u_int8_t ctx) 385 { 386 SHA1_CTX sha1ctx; 387 388 /* fetch first half of inner hash */ 389 bcopy(&vhe->vhe_sha1[ctx], &sha1ctx, sizeof(sha1ctx)); 390 391 SHA1Update(&sha1ctx, (void *)counter, sizeof(vhe->vhe_replay_cookie)); 392 SHA1Final(md, &sha1ctx); 393 394 /* outer hash */ 395 SHA1Init(&sha1ctx); 396 SHA1Update(&sha1ctx, vhe->vhe_pad, sizeof(vhe->vhe_pad)); 397 SHA1Update(&sha1ctx, md, 20); 398 SHA1Final(md, &sha1ctx); 399 } 400 401 int 402 carp_hmac_verify(struct carp_vhost_entry *vhe, u_int32_t counter[2], 403 unsigned char md[20]) 404 { 405 unsigned char md2[20]; 406 u_int8_t i; 407 408 for (i = 0; i < HMAC_MAX; i++) { 409 carp_hmac_generate(vhe, counter, md2, i); 410 if (!timingsafe_bcmp(md, md2, sizeof(md2))) 411 return (0); 412 } 413 return (1); 414 } 415 416 int 417 carp_proto_input(struct mbuf **mp, int *offp, int proto, int af) 418 { 419 struct ifnet *ifp; 420 421 ifp = if_get((*mp)->m_pkthdr.ph_ifidx); 422 if (ifp == NULL) { 423 m_freemp(mp); 424 return IPPROTO_DONE; 425 } 426 427 proto = carp_proto_input_if(ifp, mp, offp, proto); 428 if_put(ifp); 429 return proto; 430 } 431 432 /* 433 * process input packet. 434 * we have rearranged checks order compared to the rfc, 435 * but it seems more efficient this way or not possible otherwise. 436 */ 437 int 438 carp_proto_input_if(struct ifnet *ifp, struct mbuf **mp, int *offp, int proto) 439 { 440 struct mbuf *m = *mp; 441 struct ip *ip = mtod(m, struct ip *); 442 struct carp_softc *sc = NULL; 443 struct carp_header *ch; 444 int iplen, len, ismulti; 445 446 carpstat_inc(carps_ipackets); 447 448 if (!carp_opts[CARPCTL_ALLOW]) { 449 m_freem(m); 450 return IPPROTO_DONE; 451 } 452 453 ismulti = IN_MULTICAST(ip->ip_dst.s_addr); 454 455 /* check if received on a valid carp interface */ 456 switch (ifp->if_type) { 457 case IFT_CARP: 458 break; 459 case IFT_ETHER: 460 if (ismulti || !SRPL_EMPTY_LOCKED(&ifp->if_carp)) 461 break; 462 /* FALLTHROUGH */ 463 default: 464 carpstat_inc(carps_badif); 465 CARP_LOG(LOG_INFO, sc, 466 ("packet received on non-carp interface: %s", 467 ifp->if_xname)); 468 m_freem(m); 469 return IPPROTO_DONE; 470 } 471 472 /* verify that the IP TTL is 255. */ 473 if (ip->ip_ttl != CARP_DFLTTL) { 474 carpstat_inc(carps_badttl); 475 CARP_LOG(LOG_NOTICE, sc, ("received ttl %d != %d on %s", 476 ip->ip_ttl, CARP_DFLTTL, ifp->if_xname)); 477 m_freem(m); 478 return IPPROTO_DONE; 479 } 480 481 /* 482 * verify that the received packet length is 483 * equal to the CARP header 484 */ 485 iplen = ip->ip_hl << 2; 486 len = iplen + sizeof(*ch); 487 if (len > m->m_pkthdr.len) { 488 carpstat_inc(carps_badlen); 489 CARP_LOG(LOG_INFO, sc, ("packet too short %d on %s", 490 m->m_pkthdr.len, ifp->if_xname)); 491 m_freem(m); 492 return IPPROTO_DONE; 493 } 494 495 if ((m = *mp = m_pullup(m, len)) == NULL) { 496 carpstat_inc(carps_hdrops); 497 return IPPROTO_DONE; 498 } 499 ip = mtod(m, struct ip *); 500 ch = (struct carp_header *)(mtod(m, caddr_t) + iplen); 501 502 /* verify the CARP checksum */ 503 m->m_data += iplen; 504 if (carp_cksum(m, len - iplen)) { 505 carpstat_inc(carps_badsum); 506 CARP_LOG(LOG_INFO, sc, ("checksum failed on %s", 507 ifp->if_xname)); 508 m_freem(m); 509 return IPPROTO_DONE; 510 } 511 m->m_data -= iplen; 512 513 KERNEL_LOCK(); 514 carp_proto_input_c(ifp, m, ch, ismulti, AF_INET); 515 KERNEL_UNLOCK(); 516 return IPPROTO_DONE; 517 } 518 519 #ifdef INET6 520 int 521 carp6_proto_input(struct mbuf **mp, int *offp, int proto, int af) 522 { 523 struct ifnet *ifp; 524 525 ifp = if_get((*mp)->m_pkthdr.ph_ifidx); 526 if (ifp == NULL) { 527 m_freemp(mp); 528 return IPPROTO_DONE; 529 } 530 531 proto = carp6_proto_input_if(ifp, mp, offp, proto); 532 if_put(ifp); 533 return proto; 534 } 535 536 int 537 carp6_proto_input_if(struct ifnet *ifp, struct mbuf **mp, int *offp, int proto) 538 { 539 struct mbuf *m = *mp; 540 struct carp_softc *sc = NULL; 541 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 542 struct carp_header *ch; 543 u_int len; 544 545 carpstat_inc(carps_ipackets6); 546 547 if (!carp_opts[CARPCTL_ALLOW]) { 548 m_freem(m); 549 return IPPROTO_DONE; 550 } 551 552 /* check if received on a valid carp interface */ 553 if (ifp->if_type != IFT_CARP) { 554 carpstat_inc(carps_badif); 555 CARP_LOG(LOG_INFO, sc, ("packet received on non-carp interface: %s", 556 ifp->if_xname)); 557 m_freem(m); 558 return IPPROTO_DONE; 559 } 560 561 /* verify that the IP TTL is 255 */ 562 if (ip6->ip6_hlim != CARP_DFLTTL) { 563 carpstat_inc(carps_badttl); 564 CARP_LOG(LOG_NOTICE, sc, ("received ttl %d != %d on %s", 565 ip6->ip6_hlim, CARP_DFLTTL, ifp->if_xname)); 566 m_freem(m); 567 return IPPROTO_DONE; 568 } 569 570 /* verify that we have a complete carp packet */ 571 len = m->m_len; 572 if ((m = *mp = m_pullup(m, *offp + sizeof(*ch))) == NULL) { 573 carpstat_inc(carps_badlen); 574 CARP_LOG(LOG_INFO, sc, ("packet size %u too small", len)); 575 return IPPROTO_DONE; 576 } 577 ch = (struct carp_header *)(mtod(m, caddr_t) + *offp); 578 579 /* verify the CARP checksum */ 580 m->m_data += *offp; 581 if (carp_cksum(m, sizeof(*ch))) { 582 carpstat_inc(carps_badsum); 583 CARP_LOG(LOG_INFO, sc, ("checksum failed, on %s", 584 ifp->if_xname)); 585 m_freem(m); 586 return IPPROTO_DONE; 587 } 588 m->m_data -= *offp; 589 590 KERNEL_LOCK(); 591 carp_proto_input_c(ifp, m, ch, 1, AF_INET6); 592 KERNEL_UNLOCK(); 593 return IPPROTO_DONE; 594 } 595 #endif /* INET6 */ 596 597 void 598 carp_proto_input_c(struct ifnet *ifp, struct mbuf *m, struct carp_header *ch, 599 int ismulti, sa_family_t af) 600 { 601 struct carp_softc *sc; 602 struct ifnet *ifp0; 603 struct carp_vhost_entry *vhe; 604 struct timeval sc_tv, ch_tv; 605 struct srpl *cif; 606 607 KERNEL_ASSERT_LOCKED(); /* touching if_carp + carp_vhosts */ 608 609 ifp0 = if_get(ifp->if_carpdevidx); 610 611 if (ifp->if_type == IFT_CARP) { 612 /* 613 * If the parent of this carp(4) got destroyed while 614 * `m' was being processed, silently drop it. 615 */ 616 if (ifp0 == NULL) 617 goto rele; 618 cif = &ifp0->if_carp; 619 } else 620 cif = &ifp->if_carp; 621 622 SRPL_FOREACH_LOCKED(sc, cif, sc_list) { 623 if (af == AF_INET && 624 ismulti != IN_MULTICAST(sc->sc_peer.s_addr)) 625 continue; 626 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) { 627 if (vhe->vhid == ch->carp_vhid) 628 goto found; 629 } 630 } 631 found: 632 633 if (!sc || (sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) != 634 (IFF_UP|IFF_RUNNING)) { 635 carpstat_inc(carps_badvhid); 636 goto rele; 637 } 638 639 getmicrotime(&sc->sc_if.if_lastchange); 640 641 /* verify the CARP version. */ 642 if (ch->carp_version != CARP_VERSION) { 643 carpstat_inc(carps_badver); 644 sc->sc_if.if_ierrors++; 645 CARP_LOG(LOG_NOTICE, sc, ("invalid version %d != %d", 646 ch->carp_version, CARP_VERSION)); 647 goto rele; 648 } 649 650 /* verify the hash */ 651 if (carp_hmac_verify(vhe, ch->carp_counter, ch->carp_md)) { 652 carpstat_inc(carps_badauth); 653 sc->sc_if.if_ierrors++; 654 CARP_LOG(LOG_INFO, sc, ("incorrect hash")); 655 goto rele; 656 } 657 658 if (!memcmp(&vhe->vhe_replay_cookie, ch->carp_counter, 659 sizeof(ch->carp_counter))) { 660 struct ifnet *ifp2; 661 662 ifp2 = if_get(sc->sc_carpdevidx); 663 /* Do not log duplicates from non simplex interfaces */ 664 if (ifp2 && ifp2->if_flags & IFF_SIMPLEX) { 665 carpstat_inc(carps_badauth); 666 sc->sc_if.if_ierrors++; 667 CARP_LOG(LOG_WARNING, sc, 668 ("replay or network loop detected")); 669 } 670 if_put(ifp2); 671 goto rele; 672 } 673 674 sc_tv.tv_sec = sc->sc_advbase; 675 sc_tv.tv_usec = vhe->advskew * 1000000 / 256; 676 ch_tv.tv_sec = ch->carp_advbase; 677 ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256; 678 679 switch (vhe->state) { 680 case INIT: 681 break; 682 case MASTER: 683 /* 684 * If we receive an advertisement from a master who's going to 685 * be more frequent than us, and whose demote count is not higher 686 * than ours, go into BACKUP state. If his demote count is lower, 687 * also go into BACKUP. 688 */ 689 if (((timercmp(&sc_tv, &ch_tv, >) || 690 timercmp(&sc_tv, &ch_tv, ==)) && 691 (ch->carp_demote <= carp_group_demote_count(sc))) || 692 ch->carp_demote < carp_group_demote_count(sc)) { 693 timeout_del(&vhe->ad_tmo); 694 carp_set_state(vhe, BACKUP); 695 carp_setrun(vhe, 0); 696 } 697 break; 698 case BACKUP: 699 /* 700 * If we're pre-empting masters who advertise slower than us, 701 * and do not have a better demote count, treat them as down. 702 * 703 */ 704 if (carp_opts[CARPCTL_PREEMPT] && 705 timercmp(&sc_tv, &ch_tv, <) && 706 ch->carp_demote >= carp_group_demote_count(sc)) { 707 carp_master_down(vhe); 708 break; 709 } 710 711 /* 712 * Take over masters advertising with a higher demote count, 713 * regardless of CARPCTL_PREEMPT. 714 */ 715 if (ch->carp_demote > carp_group_demote_count(sc)) { 716 carp_master_down(vhe); 717 break; 718 } 719 720 /* 721 * If the master is going to advertise at such a low frequency 722 * that he's guaranteed to time out, we'd might as well just 723 * treat him as timed out now. 724 */ 725 sc_tv.tv_sec = sc->sc_advbase * 3; 726 if (sc->sc_advbase && timercmp(&sc_tv, &ch_tv, <)) { 727 carp_master_down(vhe); 728 break; 729 } 730 731 /* 732 * Otherwise, we reset the counter and wait for the next 733 * advertisement. 734 */ 735 carp_setrun(vhe, af); 736 break; 737 } 738 739 rele: 740 if_put(ifp0); 741 m_freem(m); 742 return; 743 } 744 745 int 746 carp_sysctl_carpstat(void *oldp, size_t *oldlenp, void *newp) 747 { 748 struct carpstats carpstat; 749 750 CTASSERT(sizeof(carpstat) == (carps_ncounters * sizeof(uint64_t))); 751 memset(&carpstat, 0, sizeof carpstat); 752 counters_read(carpcounters, (uint64_t *)&carpstat, carps_ncounters); 753 return (sysctl_rdstruct(oldp, oldlenp, newp, 754 &carpstat, sizeof(carpstat))); 755 } 756 757 int 758 carp_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, 759 size_t newlen) 760 { 761 int error; 762 763 /* All sysctl names at this level are terminal. */ 764 if (namelen != 1) 765 return (ENOTDIR); 766 767 switch (name[0]) { 768 case CARPCTL_STATS: 769 return (carp_sysctl_carpstat(oldp, oldlenp, newp)); 770 default: 771 if (name[0] <= 0 || name[0] >= CARPCTL_MAXID) 772 return (ENOPROTOOPT); 773 NET_LOCK(); 774 error = sysctl_int(oldp, oldlenp, newp, newlen, 775 &carp_opts[name[0]]); 776 NET_UNLOCK(); 777 return (error); 778 } 779 } 780 781 /* 782 * Interface side of the CARP implementation. 783 */ 784 785 /* ARGSUSED */ 786 void 787 carpattach(int n) 788 { 789 struct ifg_group *ifg; 790 791 if ((ifg = if_creategroup("carp")) != NULL) 792 ifg->ifg_refcnt++; /* keep around even if empty */ 793 if_clone_attach(&carp_cloner); 794 carpcounters = counters_alloc(carps_ncounters); 795 } 796 797 int 798 carp_clone_create(struct if_clone *ifc, int unit) 799 { 800 struct carp_softc *sc; 801 struct ifnet *ifp; 802 803 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO); 804 refcnt_init(&sc->sc_refcnt); 805 806 SRPL_INIT(&sc->carp_vhosts); 807 sc->sc_vhe_count = 0; 808 if (carp_new_vhost(sc, 0, 0)) { 809 free(sc, M_DEVBUF, sizeof(*sc)); 810 return (ENOMEM); 811 } 812 813 task_set(&sc->sc_atask, carp_addr_updated, sc); 814 task_set(&sc->sc_ltask, carp_carpdev_state, sc); 815 task_set(&sc->sc_dtask, carpdetach, sc); 816 817 sc->sc_suppress = 0; 818 sc->sc_advbase = CARP_DFLTINTV; 819 sc->sc_naddrs = sc->sc_naddrs6 = 0; 820 #ifdef INET6 821 sc->sc_im6o.im6o_hlim = CARP_DFLTTL; 822 #endif /* INET6 */ 823 sc->sc_imo.imo_membership = mallocarray(IP_MIN_MEMBERSHIPS, 824 sizeof(struct in_multi *), M_IPMOPTS, M_WAITOK|M_ZERO); 825 sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS; 826 827 LIST_INIT(&sc->carp_mc_listhead); 828 ifp = &sc->sc_if; 829 ifp->if_softc = sc; 830 snprintf(ifp->if_xname, sizeof ifp->if_xname, "%s%d", ifc->ifc_name, 831 unit); 832 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 833 ifp->if_ioctl = carp_ioctl; 834 ifp->if_start = carp_start; 835 ifp->if_enqueue = carp_enqueue; 836 ifp->if_xflags = IFXF_CLONED; 837 if_counters_alloc(ifp); 838 if_attach(ifp); 839 ether_ifattach(ifp); 840 ifp->if_type = IFT_CARP; 841 ifp->if_sadl->sdl_type = IFT_CARP; 842 ifp->if_output = carp_output; 843 ifp->if_priority = IF_CARP_DEFAULT_PRIORITY; 844 ifp->if_link_state = LINK_STATE_INVALID; 845 846 /* Hook carp_addr_updated to cope with address and route changes. */ 847 if_addrhook_add(&sc->sc_if, &sc->sc_atask); 848 849 return (0); 850 } 851 852 int 853 carp_new_vhost(struct carp_softc *sc, int vhid, int advskew) 854 { 855 struct carp_vhost_entry *vhe, *vhe0; 856 857 vhe = malloc(sizeof(*vhe), M_DEVBUF, M_NOWAIT | M_ZERO); 858 if (vhe == NULL) 859 return (ENOMEM); 860 861 refcnt_init(&vhe->vhost_refcnt); 862 carp_sc_ref(NULL, sc); /* give a sc ref to the vhe */ 863 vhe->parent_sc = sc; 864 vhe->vhid = vhid; 865 vhe->advskew = advskew; 866 vhe->state = INIT; 867 timeout_set_proc(&vhe->ad_tmo, carp_timer_ad, vhe); 868 timeout_set_proc(&vhe->md_tmo, carp_timer_down, vhe); 869 timeout_set_proc(&vhe->md6_tmo, carp_timer_down, vhe); 870 871 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 872 873 /* mark the first vhe as leader */ 874 if (SRPL_EMPTY_LOCKED(&sc->carp_vhosts)) { 875 vhe->vhe_leader = 1; 876 SRPL_INSERT_HEAD_LOCKED(&carp_vh_rc, &sc->carp_vhosts, 877 vhe, vhost_entries); 878 sc->sc_vhe_count = 1; 879 return (0); 880 } 881 882 SRPL_FOREACH_LOCKED(vhe0, &sc->carp_vhosts, vhost_entries) { 883 if (SRPL_NEXT_LOCKED(vhe0, vhost_entries) == NULL) 884 break; 885 } 886 887 SRPL_INSERT_AFTER_LOCKED(&carp_vh_rc, vhe0, vhe, vhost_entries); 888 sc->sc_vhe_count++; 889 890 return (0); 891 } 892 893 int 894 carp_clone_destroy(struct ifnet *ifp) 895 { 896 struct carp_softc *sc = ifp->if_softc; 897 898 if_addrhook_del(&sc->sc_if, &sc->sc_atask); 899 900 NET_LOCK(); 901 carpdetach(sc); 902 NET_UNLOCK(); 903 904 ether_ifdetach(ifp); 905 if_detach(ifp); 906 carp_destroy_vhosts(ifp->if_softc); 907 refcnt_finalize(&sc->sc_refcnt, "carpdtor"); 908 free(sc->sc_imo.imo_membership, M_IPMOPTS, 909 sc->sc_imo.imo_max_memberships * sizeof(struct in_multi *)); 910 free(sc, M_DEVBUF, sizeof(*sc)); 911 return (0); 912 } 913 914 void 915 carp_del_all_timeouts(struct carp_softc *sc) 916 { 917 struct carp_vhost_entry *vhe; 918 919 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 920 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) { 921 timeout_del(&vhe->ad_tmo); 922 timeout_del(&vhe->md_tmo); 923 timeout_del(&vhe->md6_tmo); 924 } 925 } 926 927 void 928 carpdetach(void *arg) 929 { 930 struct carp_softc *sc = arg; 931 struct ifnet *ifp0; 932 struct srpl *cif; 933 934 carp_del_all_timeouts(sc); 935 936 if (sc->sc_demote_cnt) 937 carp_group_demote_adj(&sc->sc_if, -sc->sc_demote_cnt, "detach"); 938 sc->sc_suppress = 0; 939 sc->sc_sendad_errors = 0; 940 941 carp_set_state_all(sc, INIT); 942 sc->sc_if.if_flags &= ~IFF_UP; 943 carp_setrun_all(sc, 0); 944 carp_multicast_cleanup(sc); 945 946 ifp0 = if_get(sc->sc_carpdevidx); 947 if (ifp0 == NULL) 948 return; 949 950 KERNEL_ASSERT_LOCKED(); /* touching if_carp */ 951 952 cif = &ifp0->if_carp; 953 954 SRPL_REMOVE_LOCKED(&carp_sc_rc, cif, sc, carp_softc, sc_list); 955 sc->sc_carpdevidx = 0; 956 957 if_linkstatehook_del(ifp0, &sc->sc_ltask); 958 if_detachhook_del(ifp0, &sc->sc_dtask); 959 ifpromisc(ifp0, 0); 960 if_put(ifp0); 961 } 962 963 void 964 carp_destroy_vhosts(struct carp_softc *sc) 965 { 966 /* XXX bow out? */ 967 struct carp_vhost_entry *vhe; 968 969 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 970 971 while ((vhe = SRPL_FIRST_LOCKED(&sc->carp_vhosts)) != NULL) { 972 SRPL_REMOVE_LOCKED(&carp_vh_rc, &sc->carp_vhosts, vhe, 973 carp_vhost_entry, vhost_entries); 974 carp_vh_unref(NULL, vhe); /* drop last ref */ 975 } 976 sc->sc_vhe_count = 0; 977 } 978 979 void 980 carp_prepare_ad(struct mbuf *m, struct carp_vhost_entry *vhe, 981 struct carp_header *ch) 982 { 983 if (!vhe->vhe_replay_cookie) { 984 arc4random_buf(&vhe->vhe_replay_cookie, 985 sizeof(vhe->vhe_replay_cookie)); 986 } 987 988 bcopy(&vhe->vhe_replay_cookie, ch->carp_counter, 989 sizeof(ch->carp_counter)); 990 991 /* 992 * For the time being, do not include the IPv6 linklayer addresses 993 * in the HMAC. 994 */ 995 carp_hmac_generate(vhe, ch->carp_counter, ch->carp_md, HMAC_NOV6LL); 996 } 997 998 void 999 carp_send_ad_all(void) 1000 { 1001 struct ifnet *ifp0; 1002 struct srpl *cif; 1003 struct carp_softc *vh; 1004 1005 KERNEL_ASSERT_LOCKED(); /* touching if_carp */ 1006 1007 if (carp_send_all_recur > 0) 1008 return; 1009 ++carp_send_all_recur; 1010 TAILQ_FOREACH(ifp0, &ifnet, if_list) { 1011 if (ifp0->if_type != IFT_ETHER) 1012 continue; 1013 1014 cif = &ifp0->if_carp; 1015 SRPL_FOREACH_LOCKED(vh, cif, sc_list) { 1016 if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) == 1017 (IFF_UP|IFF_RUNNING)) { 1018 carp_vhe_send_ad_all(vh); 1019 } 1020 } 1021 } 1022 --carp_send_all_recur; 1023 } 1024 1025 void 1026 carp_vhe_send_ad_all(struct carp_softc *sc) 1027 { 1028 struct carp_vhost_entry *vhe; 1029 1030 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 1031 1032 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) { 1033 if (vhe->state == MASTER) 1034 carp_send_ad(vhe); 1035 } 1036 } 1037 1038 void 1039 carp_timer_ad(void *v) 1040 { 1041 NET_LOCK(); 1042 carp_send_ad(v); 1043 NET_UNLOCK(); 1044 } 1045 1046 void 1047 carp_send_ad(struct carp_vhost_entry *vhe) 1048 { 1049 struct carp_header ch; 1050 struct timeval tv; 1051 struct carp_softc *sc = vhe->parent_sc; 1052 struct carp_header *ch_ptr; 1053 struct mbuf *m; 1054 int error, len, advbase, advskew; 1055 struct ifnet *ifp; 1056 struct ifaddr *ifa; 1057 struct sockaddr sa; 1058 1059 NET_ASSERT_LOCKED(); 1060 1061 if ((ifp = if_get(sc->sc_carpdevidx)) == NULL) { 1062 sc->sc_if.if_oerrors++; 1063 return; 1064 } 1065 1066 /* bow out if we've gone to backup (the carp interface is going down) */ 1067 if (sc->sc_bow_out) { 1068 advbase = 255; 1069 advskew = 255; 1070 } else { 1071 advbase = sc->sc_advbase; 1072 advskew = vhe->advskew; 1073 tv.tv_sec = advbase; 1074 if (advbase == 0 && advskew == 0) 1075 tv.tv_usec = 1 * 1000000 / 256; 1076 else 1077 tv.tv_usec = advskew * 1000000 / 256; 1078 } 1079 1080 ch.carp_version = CARP_VERSION; 1081 ch.carp_type = CARP_ADVERTISEMENT; 1082 ch.carp_vhid = vhe->vhid; 1083 ch.carp_demote = carp_group_demote_count(sc) & 0xff; 1084 ch.carp_advbase = advbase; 1085 ch.carp_advskew = advskew; 1086 ch.carp_authlen = 7; /* XXX DEFINE */ 1087 ch.carp_cksum = 0; 1088 1089 sc->cur_vhe = vhe; /* we need the vhe later on the output path */ 1090 1091 if (sc->sc_naddrs) { 1092 struct ip *ip; 1093 1094 MGETHDR(m, M_DONTWAIT, MT_HEADER); 1095 if (m == NULL) { 1096 sc->sc_if.if_oerrors++; 1097 carpstat_inc(carps_onomem); 1098 /* XXX maybe less ? */ 1099 goto retry_later; 1100 } 1101 len = sizeof(*ip) + sizeof(ch); 1102 m->m_pkthdr.pf.prio = CARP_IFQ_PRIO; 1103 m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain; 1104 m->m_pkthdr.len = len; 1105 m->m_len = len; 1106 m_align(m, len); 1107 ip = mtod(m, struct ip *); 1108 ip->ip_v = IPVERSION; 1109 ip->ip_hl = sizeof(*ip) >> 2; 1110 ip->ip_tos = IPTOS_LOWDELAY; 1111 ip->ip_len = htons(len); 1112 ip->ip_id = htons(ip_randomid()); 1113 ip->ip_off = htons(IP_DF); 1114 ip->ip_ttl = CARP_DFLTTL; 1115 ip->ip_p = IPPROTO_CARP; 1116 ip->ip_sum = 0; 1117 1118 memset(&sa, 0, sizeof(sa)); 1119 sa.sa_family = AF_INET; 1120 /* Prefer addresses on the parent interface as source for AD. */ 1121 ifa = ifaof_ifpforaddr(&sa, ifp); 1122 if (ifa == NULL) 1123 ifa = ifaof_ifpforaddr(&sa, &sc->sc_if); 1124 KASSERT(ifa != NULL); 1125 ip->ip_src.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr; 1126 ip->ip_dst.s_addr = sc->sc_peer.s_addr; 1127 if (IN_MULTICAST(ip->ip_dst.s_addr)) 1128 m->m_flags |= M_MCAST; 1129 1130 ch_ptr = (struct carp_header *)(ip + 1); 1131 bcopy(&ch, ch_ptr, sizeof(ch)); 1132 carp_prepare_ad(m, vhe, ch_ptr); 1133 1134 m->m_data += sizeof(*ip); 1135 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip)); 1136 m->m_data -= sizeof(*ip); 1137 1138 getmicrotime(&sc->sc_if.if_lastchange); 1139 carpstat_inc(carps_opackets); 1140 1141 error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, 1142 NULL, 0); 1143 if (error && 1144 /* when unicast, the peer's down is not our fault */ 1145 !(!IN_MULTICAST(sc->sc_peer.s_addr) && error == EHOSTDOWN)){ 1146 if (error == ENOBUFS) 1147 carpstat_inc(carps_onomem); 1148 else 1149 CARP_LOG(LOG_WARNING, sc, 1150 ("ip_output failed: %d", error)); 1151 sc->sc_if.if_oerrors++; 1152 if (sc->sc_sendad_errors < INT_MAX) 1153 sc->sc_sendad_errors++; 1154 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS(sc)) 1155 carp_group_demote_adj(&sc->sc_if, 1, 1156 "> snderrors"); 1157 sc->sc_sendad_success = 0; 1158 } else { 1159 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS(sc)) { 1160 if (++sc->sc_sendad_success >= 1161 CARP_SENDAD_MIN_SUCCESS(sc)) { 1162 carp_group_demote_adj(&sc->sc_if, -1, 1163 "< snderrors"); 1164 sc->sc_sendad_errors = 0; 1165 } 1166 } else 1167 sc->sc_sendad_errors = 0; 1168 } 1169 if (vhe->vhe_leader) { 1170 if (sc->sc_delayed_arp > 0) 1171 sc->sc_delayed_arp--; 1172 if (sc->sc_delayed_arp == 0) { 1173 carp_send_arp(sc); 1174 sc->sc_delayed_arp = -1; 1175 } 1176 } 1177 } 1178 #ifdef INET6 1179 if (sc->sc_naddrs6) { 1180 struct ip6_hdr *ip6; 1181 1182 MGETHDR(m, M_DONTWAIT, MT_HEADER); 1183 if (m == NULL) { 1184 sc->sc_if.if_oerrors++; 1185 carpstat_inc(carps_onomem); 1186 /* XXX maybe less ? */ 1187 goto retry_later; 1188 } 1189 len = sizeof(*ip6) + sizeof(ch); 1190 m->m_pkthdr.pf.prio = CARP_IFQ_PRIO; 1191 m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain; 1192 m->m_pkthdr.len = len; 1193 m->m_len = len; 1194 m_align(m, len); 1195 m->m_flags |= M_MCAST; 1196 ip6 = mtod(m, struct ip6_hdr *); 1197 memset(ip6, 0, sizeof(*ip6)); 1198 ip6->ip6_vfc |= IPV6_VERSION; 1199 ip6->ip6_hlim = CARP_DFLTTL; 1200 ip6->ip6_nxt = IPPROTO_CARP; 1201 1202 /* set the source address */ 1203 memset(&sa, 0, sizeof(sa)); 1204 sa.sa_family = AF_INET6; 1205 /* Prefer addresses on the parent interface as source for AD. */ 1206 ifa = ifaof_ifpforaddr(&sa, ifp); 1207 if (ifa == NULL) 1208 ifa = ifaof_ifpforaddr(&sa, &sc->sc_if); 1209 KASSERT(ifa != NULL); 1210 bcopy(ifatoia6(ifa)->ia_addr.sin6_addr.s6_addr, 1211 &ip6->ip6_src, sizeof(struct in6_addr)); 1212 /* set the multicast destination */ 1213 1214 ip6->ip6_dst.s6_addr16[0] = htons(0xff02); 1215 ip6->ip6_dst.s6_addr16[1] = htons(ifp->if_index); 1216 ip6->ip6_dst.s6_addr8[15] = 0x12; 1217 1218 ch_ptr = (struct carp_header *)(ip6 + 1); 1219 bcopy(&ch, ch_ptr, sizeof(ch)); 1220 carp_prepare_ad(m, vhe, ch_ptr); 1221 1222 m->m_data += sizeof(*ip6); 1223 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip6)); 1224 m->m_data -= sizeof(*ip6); 1225 1226 getmicrotime(&sc->sc_if.if_lastchange); 1227 carpstat_inc(carps_opackets6); 1228 1229 error = ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL); 1230 if (error) { 1231 if (error == ENOBUFS) 1232 carpstat_inc(carps_onomem); 1233 else 1234 CARP_LOG(LOG_WARNING, sc, 1235 ("ip6_output failed: %d", error)); 1236 sc->sc_if.if_oerrors++; 1237 if (sc->sc_sendad_errors < INT_MAX) 1238 sc->sc_sendad_errors++; 1239 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS(sc)) 1240 carp_group_demote_adj(&sc->sc_if, 1, 1241 "> snd6errors"); 1242 sc->sc_sendad_success = 0; 1243 } else { 1244 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS(sc)) { 1245 if (++sc->sc_sendad_success >= 1246 CARP_SENDAD_MIN_SUCCESS(sc)) { 1247 carp_group_demote_adj(&sc->sc_if, -1, 1248 "< snd6errors"); 1249 sc->sc_sendad_errors = 0; 1250 } 1251 } else 1252 sc->sc_sendad_errors = 0; 1253 } 1254 } 1255 #endif /* INET6 */ 1256 1257 retry_later: 1258 sc->cur_vhe = NULL; 1259 if (advbase != 255 || advskew != 255) 1260 timeout_add_tv(&vhe->ad_tmo, &tv); 1261 if_put(ifp); 1262 } 1263 1264 /* 1265 * Broadcast a gratuitous ARP request containing 1266 * the virtual router MAC address for each IP address 1267 * associated with the virtual router. 1268 */ 1269 void 1270 carp_send_arp(struct carp_softc *sc) 1271 { 1272 struct ifaddr *ifa; 1273 in_addr_t in; 1274 1275 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 1276 1277 if (ifa->ifa_addr->sa_family != AF_INET) 1278 continue; 1279 1280 in = ifatoia(ifa)->ia_addr.sin_addr.s_addr; 1281 arprequest(&sc->sc_if, &in, &in, sc->sc_ac.ac_enaddr); 1282 } 1283 } 1284 1285 #ifdef INET6 1286 void 1287 carp_send_na(struct carp_softc *sc) 1288 { 1289 struct ifaddr *ifa; 1290 struct in6_addr *in6; 1291 static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT; 1292 1293 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 1294 1295 if (ifa->ifa_addr->sa_family != AF_INET6) 1296 continue; 1297 1298 in6 = &ifatoia6(ifa)->ia_addr.sin6_addr; 1299 nd6_na_output(&sc->sc_if, &mcast, in6, 1300 ND_NA_FLAG_OVERRIDE | 1301 (ip6_forwarding ? ND_NA_FLAG_ROUTER : 0), 1, NULL); 1302 } 1303 } 1304 #endif /* INET6 */ 1305 1306 void 1307 carp_update_lsmask(struct carp_softc *sc) 1308 { 1309 struct carp_vhost_entry *vhe; 1310 int count; 1311 1312 if (sc->sc_balancing == CARP_BAL_NONE) 1313 return; 1314 1315 sc->sc_lsmask = 0; 1316 count = 0; 1317 1318 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 1319 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) { 1320 if (vhe->state == MASTER && count < sizeof(sc->sc_lsmask) * 8) 1321 sc->sc_lsmask |= 1 << count; 1322 count++; 1323 } 1324 sc->sc_lscount = count; 1325 CARP_LOG(LOG_DEBUG, sc, ("carp_update_lsmask: %x", sc->sc_lsmask)); 1326 } 1327 1328 int 1329 carp_iamatch(struct ifnet *ifp) 1330 { 1331 struct carp_softc *sc = ifp->if_softc; 1332 struct carp_vhost_entry *vhe; 1333 struct srp_ref sr; 1334 int match = 0; 1335 1336 vhe = SRPL_FIRST(&sr, &sc->carp_vhosts); 1337 if (vhe->state == MASTER) 1338 match = 1; 1339 SRPL_LEAVE(&sr); 1340 1341 return (match); 1342 } 1343 1344 int 1345 carp_ourether(struct ifnet *ifp, uint8_t *ena) 1346 { 1347 struct srpl *cif = &ifp->if_carp; 1348 struct carp_softc *sc; 1349 struct srp_ref sr; 1350 int match = 0; 1351 1352 KASSERT(ifp->if_type == IFT_ETHER); 1353 1354 SRPL_FOREACH(sc, &sr, cif, sc_list) { 1355 if ((sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) != 1356 (IFF_UP|IFF_RUNNING)) 1357 continue; 1358 if (carp_vhe_match(sc, ena)) { 1359 match = 1; 1360 break; 1361 } 1362 } 1363 SRPL_LEAVE(&sr); 1364 1365 return (match); 1366 } 1367 1368 int 1369 carp_vhe_match(struct carp_softc *sc, uint8_t *ena) 1370 { 1371 struct carp_vhost_entry *vhe; 1372 struct srp_ref sr; 1373 int match = 0; 1374 1375 vhe = SRPL_FIRST(&sr, &sc->carp_vhosts); 1376 match = (vhe->state == MASTER || sc->sc_balancing >= CARP_BAL_IP) && 1377 !memcmp(ena, sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN); 1378 SRPL_LEAVE(&sr); 1379 1380 return (match); 1381 } 1382 1383 struct mbuf * 1384 carp_input(struct ifnet *ifp0, struct mbuf *m) 1385 { 1386 struct ether_header *eh; 1387 struct srpl *cif; 1388 struct carp_softc *sc; 1389 struct srp_ref sr; 1390 1391 eh = mtod(m, struct ether_header *); 1392 cif = &ifp0->if_carp; 1393 1394 SRPL_FOREACH(sc, &sr, cif, sc_list) { 1395 if ((sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) != 1396 (IFF_UP|IFF_RUNNING)) 1397 continue; 1398 1399 if (carp_vhe_match(sc, eh->ether_dhost)) { 1400 /* 1401 * These packets look like layer 2 multicast but they 1402 * are unicast at layer 3. With help of the tag the 1403 * mbuf's M_MCAST flag can be removed by carp_lsdrop() 1404 * after we have passed layer 2. 1405 */ 1406 if (sc->sc_balancing == CARP_BAL_IP) { 1407 struct m_tag *mtag; 1408 mtag = m_tag_get(PACKET_TAG_CARP_BAL_IP, 0, 1409 M_NOWAIT); 1410 if (mtag == NULL) { 1411 m_freem(m); 1412 goto out; 1413 } 1414 m_tag_prepend(m, mtag); 1415 } 1416 break; 1417 } 1418 } 1419 1420 if (sc == NULL) { 1421 SRPL_LEAVE(&sr); 1422 1423 if (!ETHER_IS_MULTICAST(eh->ether_dhost)) 1424 return (m); 1425 1426 /* 1427 * XXX Should really check the list of multicast addresses 1428 * for each CARP interface _before_ copying. 1429 */ 1430 SRPL_FOREACH(sc, &sr, cif, sc_list) { 1431 struct mbuf *m0; 1432 1433 if (!(sc->sc_if.if_flags & IFF_UP)) 1434 continue; 1435 1436 m0 = m_dup_pkt(m, ETHER_ALIGN, M_DONTWAIT); 1437 if (m0 == NULL) 1438 continue; 1439 1440 if_vinput(&sc->sc_if, m0); 1441 } 1442 SRPL_LEAVE(&sr); 1443 1444 return (m); 1445 } 1446 1447 if_vinput(&sc->sc_if, m); 1448 out: 1449 SRPL_LEAVE(&sr); 1450 1451 return (NULL); 1452 } 1453 1454 int 1455 carp_lsdrop(struct ifnet *ifp, struct mbuf *m, sa_family_t af, u_int32_t *src, 1456 u_int32_t *dst, int drop) 1457 { 1458 struct carp_softc *sc; 1459 u_int32_t fold; 1460 struct m_tag *mtag; 1461 1462 if (ifp->if_type != IFT_CARP) 1463 return 0; 1464 sc = ifp->if_softc; 1465 if (sc->sc_balancing == CARP_BAL_NONE) 1466 return 0; 1467 1468 /* 1469 * Remove M_MCAST flag from mbuf of balancing ip traffic, since the fact 1470 * that it is layer 2 multicast does not implicate that it is also layer 1471 * 3 multicast. 1472 */ 1473 if (m->m_flags & M_MCAST && 1474 (mtag = m_tag_find(m, PACKET_TAG_CARP_BAL_IP, NULL))) { 1475 m_tag_delete(m, mtag); 1476 m->m_flags &= ~M_MCAST; 1477 } 1478 1479 /* 1480 * Return without making a drop decision. This allows to clear the 1481 * M_MCAST flag and do nothing else. 1482 */ 1483 if (!drop) 1484 return 0; 1485 1486 /* 1487 * Never drop carp advertisements. 1488 * XXX Bad idea to pass all broadcast / multicast traffic? 1489 */ 1490 if (m->m_flags & (M_BCAST|M_MCAST)) 1491 return 0; 1492 1493 fold = src[0] ^ dst[0]; 1494 #ifdef INET6 1495 if (af == AF_INET6) { 1496 int i; 1497 for (i = 1; i < 4; i++) 1498 fold ^= src[i] ^ dst[i]; 1499 } 1500 #endif 1501 if (sc->sc_lscount == 0) /* just to be safe */ 1502 return 1; 1503 1504 return ((1 << (ntohl(fold) % sc->sc_lscount)) & sc->sc_lsmask) == 0; 1505 } 1506 1507 void 1508 carp_timer_down(void *v) 1509 { 1510 NET_LOCK(); 1511 carp_master_down(v); 1512 NET_UNLOCK(); 1513 } 1514 1515 void 1516 carp_master_down(struct carp_vhost_entry *vhe) 1517 { 1518 struct carp_softc *sc = vhe->parent_sc; 1519 1520 NET_ASSERT_LOCKED(); 1521 1522 switch (vhe->state) { 1523 case INIT: 1524 printf("%s: master_down event in INIT state\n", 1525 sc->sc_if.if_xname); 1526 break; 1527 case MASTER: 1528 break; 1529 case BACKUP: 1530 carp_set_state(vhe, MASTER); 1531 carp_send_ad(vhe); 1532 if (sc->sc_balancing == CARP_BAL_NONE && vhe->vhe_leader) { 1533 carp_send_arp(sc); 1534 /* Schedule a delayed ARP to deal w/ some L3 switches */ 1535 sc->sc_delayed_arp = 2; 1536 #ifdef INET6 1537 carp_send_na(sc); 1538 #endif /* INET6 */ 1539 } 1540 carp_setrun(vhe, 0); 1541 carpstat_inc(carps_preempt); 1542 break; 1543 } 1544 } 1545 1546 void 1547 carp_setrun_all(struct carp_softc *sc, sa_family_t af) 1548 { 1549 struct carp_vhost_entry *vhe; 1550 1551 KERNEL_ASSERT_LOCKED(); /* touching carp_vhost */ 1552 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) { 1553 carp_setrun(vhe, af); 1554 } 1555 } 1556 1557 /* 1558 * When in backup state, af indicates whether to reset the master down timer 1559 * for v4 or v6. If it's set to zero, reset the ones which are already pending. 1560 */ 1561 void 1562 carp_setrun(struct carp_vhost_entry *vhe, sa_family_t af) 1563 { 1564 struct ifnet *ifp; 1565 struct timeval tv; 1566 struct carp_softc *sc = vhe->parent_sc; 1567 1568 if ((ifp = if_get(sc->sc_carpdevidx)) == NULL) { 1569 sc->sc_if.if_flags &= ~IFF_RUNNING; 1570 carp_set_state_all(sc, INIT); 1571 return; 1572 } 1573 1574 if (memcmp(((struct arpcom *)ifp)->ac_enaddr, 1575 sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN) == 0) 1576 sc->sc_realmac = 1; 1577 else 1578 sc->sc_realmac = 0; 1579 1580 if_put(ifp); 1581 1582 if (sc->sc_if.if_flags & IFF_UP && vhe->vhid > 0 && 1583 (sc->sc_naddrs || sc->sc_naddrs6) && !sc->sc_suppress) { 1584 sc->sc_if.if_flags |= IFF_RUNNING; 1585 } else { 1586 sc->sc_if.if_flags &= ~IFF_RUNNING; 1587 return; 1588 } 1589 1590 switch (vhe->state) { 1591 case INIT: 1592 carp_set_state(vhe, BACKUP); 1593 carp_setrun(vhe, 0); 1594 break; 1595 case BACKUP: 1596 timeout_del(&vhe->ad_tmo); 1597 tv.tv_sec = 3 * sc->sc_advbase; 1598 if (sc->sc_advbase == 0 && vhe->advskew == 0) 1599 tv.tv_usec = 3 * 1000000 / 256; 1600 else if (sc->sc_advbase == 0) 1601 tv.tv_usec = 3 * vhe->advskew * 1000000 / 256; 1602 else 1603 tv.tv_usec = vhe->advskew * 1000000 / 256; 1604 if (vhe->vhe_leader) 1605 sc->sc_delayed_arp = -1; 1606 switch (af) { 1607 case AF_INET: 1608 timeout_add_tv(&vhe->md_tmo, &tv); 1609 break; 1610 #ifdef INET6 1611 case AF_INET6: 1612 timeout_add_tv(&vhe->md6_tmo, &tv); 1613 break; 1614 #endif /* INET6 */ 1615 default: 1616 if (sc->sc_naddrs) 1617 timeout_add_tv(&vhe->md_tmo, &tv); 1618 if (sc->sc_naddrs6) 1619 timeout_add_tv(&vhe->md6_tmo, &tv); 1620 break; 1621 } 1622 break; 1623 case MASTER: 1624 tv.tv_sec = sc->sc_advbase; 1625 if (sc->sc_advbase == 0 && vhe->advskew == 0) 1626 tv.tv_usec = 1 * 1000000 / 256; 1627 else 1628 tv.tv_usec = vhe->advskew * 1000000 / 256; 1629 timeout_add_tv(&vhe->ad_tmo, &tv); 1630 break; 1631 } 1632 } 1633 1634 void 1635 carp_multicast_cleanup(struct carp_softc *sc) 1636 { 1637 struct ip_moptions *imo = &sc->sc_imo; 1638 #ifdef INET6 1639 struct ip6_moptions *im6o = &sc->sc_im6o; 1640 #endif 1641 u_int16_t n = imo->imo_num_memberships; 1642 1643 /* Clean up our own multicast memberships */ 1644 while (n-- > 0) { 1645 if (imo->imo_membership[n] != NULL) { 1646 in_delmulti(imo->imo_membership[n]); 1647 imo->imo_membership[n] = NULL; 1648 } 1649 } 1650 imo->imo_num_memberships = 0; 1651 imo->imo_ifidx = 0; 1652 1653 #ifdef INET6 1654 while (!LIST_EMPTY(&im6o->im6o_memberships)) { 1655 struct in6_multi_mship *imm = 1656 LIST_FIRST(&im6o->im6o_memberships); 1657 1658 LIST_REMOVE(imm, i6mm_chain); 1659 in6_leavegroup(imm); 1660 } 1661 im6o->im6o_ifidx = 0; 1662 #endif 1663 1664 /* And any other multicast memberships */ 1665 carp_ether_purgemulti(sc); 1666 } 1667 1668 int 1669 carp_set_ifp(struct carp_softc *sc, struct ifnet *ifp0) 1670 { 1671 struct srpl *cif; 1672 struct carp_softc *vr, *last = NULL, *after = NULL; 1673 int myself = 0, error = 0; 1674 1675 KASSERT(ifp0->if_index != sc->sc_carpdevidx); 1676 KERNEL_ASSERT_LOCKED(); /* touching if_carp */ 1677 1678 if ((ifp0->if_flags & IFF_MULTICAST) == 0) 1679 return (EADDRNOTAVAIL); 1680 1681 if (ifp0->if_type != IFT_ETHER) 1682 return (EINVAL); 1683 1684 cif = &ifp0->if_carp; 1685 if (carp_check_dup_vhids(sc, cif, NULL)) 1686 return (EINVAL); 1687 1688 if ((error = ifpromisc(ifp0, 1))) 1689 return (error); 1690 1691 /* detach from old interface */ 1692 if (sc->sc_carpdevidx != 0) 1693 carpdetach(sc); 1694 1695 /* attach carp interface to physical interface */ 1696 if_detachhook_add(ifp0, &sc->sc_dtask); 1697 if_linkstatehook_add(ifp0, &sc->sc_ltask); 1698 1699 sc->sc_carpdevidx = ifp0->if_index; 1700 sc->sc_if.if_capabilities = ifp0->if_capabilities & 1701 IFCAP_CSUM_MASK; 1702 1703 SRPL_FOREACH_LOCKED(vr, cif, sc_list) { 1704 struct carp_vhost_entry *vrhead, *schead; 1705 last = vr; 1706 1707 if (vr == sc) 1708 myself = 1; 1709 1710 vrhead = SRPL_FIRST_LOCKED(&vr->carp_vhosts); 1711 schead = SRPL_FIRST_LOCKED(&sc->carp_vhosts); 1712 if (vrhead->vhid < schead->vhid) 1713 after = vr; 1714 } 1715 1716 if (!myself) { 1717 /* We're trying to keep things in order */ 1718 if (last == NULL) { 1719 SRPL_INSERT_HEAD_LOCKED(&carp_sc_rc, cif, 1720 sc, sc_list); 1721 } else if (after == NULL) { 1722 SRPL_INSERT_AFTER_LOCKED(&carp_sc_rc, last, 1723 sc, sc_list); 1724 } else { 1725 SRPL_INSERT_AFTER_LOCKED(&carp_sc_rc, after, 1726 sc, sc_list); 1727 } 1728 } 1729 if (sc->sc_naddrs || sc->sc_naddrs6) 1730 sc->sc_if.if_flags |= IFF_UP; 1731 carp_set_enaddr(sc); 1732 1733 carp_carpdev_state(sc); 1734 1735 return (0); 1736 } 1737 1738 void 1739 carp_set_vhe_enaddr(struct carp_vhost_entry *vhe) 1740 { 1741 struct carp_softc *sc = vhe->parent_sc; 1742 1743 if (vhe->vhid != 0 && sc->sc_carpdevidx != 0) { 1744 if (vhe->vhe_leader && sc->sc_balancing == CARP_BAL_IP) 1745 vhe->vhe_enaddr[0] = 1; 1746 else 1747 vhe->vhe_enaddr[0] = 0; 1748 vhe->vhe_enaddr[1] = 0; 1749 vhe->vhe_enaddr[2] = 0x5e; 1750 vhe->vhe_enaddr[3] = 0; 1751 vhe->vhe_enaddr[4] = 1; 1752 vhe->vhe_enaddr[5] = vhe->vhid; 1753 } else 1754 memset(vhe->vhe_enaddr, 0, ETHER_ADDR_LEN); 1755 } 1756 1757 void 1758 carp_set_enaddr(struct carp_softc *sc) 1759 { 1760 struct carp_vhost_entry *vhe; 1761 1762 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 1763 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) 1764 carp_set_vhe_enaddr(vhe); 1765 1766 vhe = SRPL_FIRST_LOCKED(&sc->carp_vhosts); 1767 1768 /* 1769 * Use the carp lladdr if the running one isn't manually set. 1770 * Only compare static parts of the lladdr. 1771 */ 1772 if ((memcmp(sc->sc_ac.ac_enaddr + 1, vhe->vhe_enaddr + 1, 1773 ETHER_ADDR_LEN - 2) == 0) || 1774 (!sc->sc_ac.ac_enaddr[0] && !sc->sc_ac.ac_enaddr[1] && 1775 !sc->sc_ac.ac_enaddr[2] && !sc->sc_ac.ac_enaddr[3] && 1776 !sc->sc_ac.ac_enaddr[4] && !sc->sc_ac.ac_enaddr[5])) 1777 bcopy(vhe->vhe_enaddr, sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN); 1778 1779 /* Make sure the enaddr has changed before further twiddling. */ 1780 if (memcmp(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN) != 0) { 1781 bcopy(sc->sc_ac.ac_enaddr, LLADDR(sc->sc_if.if_sadl), 1782 ETHER_ADDR_LEN); 1783 bcopy(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN); 1784 #ifdef INET6 1785 /* 1786 * (re)attach a link-local address which matches 1787 * our new MAC address. 1788 */ 1789 if (sc->sc_naddrs6) 1790 in6_ifattach_linklocal(&sc->sc_if, NULL); 1791 #endif 1792 carp_set_state_all(sc, INIT); 1793 carp_setrun_all(sc, 0); 1794 } 1795 } 1796 1797 void 1798 carp_addr_updated(void *v) 1799 { 1800 struct carp_softc *sc = (struct carp_softc *) v; 1801 struct ifaddr *ifa; 1802 int new_naddrs = 0, new_naddrs6 = 0; 1803 1804 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 1805 if (ifa->ifa_addr->sa_family == AF_INET) 1806 new_naddrs++; 1807 #ifdef INET6 1808 else if (ifa->ifa_addr->sa_family == AF_INET6) 1809 new_naddrs6++; 1810 #endif /* INET6 */ 1811 } 1812 1813 /* We received address changes from if_addrhooks callback */ 1814 if (new_naddrs != sc->sc_naddrs || new_naddrs6 != sc->sc_naddrs6) { 1815 1816 sc->sc_naddrs = new_naddrs; 1817 sc->sc_naddrs6 = new_naddrs6; 1818 1819 /* Re-establish multicast membership removed by in_control */ 1820 if (IN_MULTICAST(sc->sc_peer.s_addr)) { 1821 if (!in_hasmulti(&sc->sc_peer, &sc->sc_if)) { 1822 struct in_multi **imm = 1823 sc->sc_imo.imo_membership; 1824 u_int16_t maxmem = 1825 sc->sc_imo.imo_max_memberships; 1826 1827 memset(&sc->sc_imo, 0, sizeof(sc->sc_imo)); 1828 sc->sc_imo.imo_membership = imm; 1829 sc->sc_imo.imo_max_memberships = maxmem; 1830 1831 if (sc->sc_carpdevidx != 0 && 1832 sc->sc_naddrs > 0) 1833 carp_join_multicast(sc); 1834 } 1835 } 1836 1837 if (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) { 1838 sc->sc_if.if_flags &= ~IFF_UP; 1839 carp_set_state_all(sc, INIT); 1840 } else 1841 carp_hmac_prepare(sc); 1842 } 1843 1844 carp_setrun_all(sc, 0); 1845 } 1846 1847 int 1848 carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin) 1849 { 1850 struct in_addr *in = &sin->sin_addr; 1851 int error; 1852 1853 KASSERT(sc->sc_carpdevidx != 0); 1854 1855 /* XXX is this necessary? */ 1856 if (in->s_addr == INADDR_ANY) { 1857 carp_setrun_all(sc, 0); 1858 return (0); 1859 } 1860 1861 if (sc->sc_naddrs == 0 && (error = carp_join_multicast(sc)) != 0) 1862 return (error); 1863 1864 carp_set_state_all(sc, INIT); 1865 1866 return (0); 1867 } 1868 1869 int 1870 carp_join_multicast(struct carp_softc *sc) 1871 { 1872 struct ip_moptions *imo = &sc->sc_imo; 1873 struct in_multi *imm; 1874 struct in_addr addr; 1875 1876 if (!IN_MULTICAST(sc->sc_peer.s_addr)) 1877 return (0); 1878 1879 addr.s_addr = sc->sc_peer.s_addr; 1880 if ((imm = in_addmulti(&addr, &sc->sc_if)) == NULL) 1881 return (ENOBUFS); 1882 1883 imo->imo_membership[0] = imm; 1884 imo->imo_num_memberships = 1; 1885 imo->imo_ifidx = sc->sc_if.if_index; 1886 imo->imo_ttl = CARP_DFLTTL; 1887 imo->imo_loop = 0; 1888 return (0); 1889 } 1890 1891 1892 #ifdef INET6 1893 int 1894 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) 1895 { 1896 int error; 1897 1898 KASSERT(sc->sc_carpdevidx != 0); 1899 1900 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 1901 carp_setrun_all(sc, 0); 1902 return (0); 1903 } 1904 1905 if (sc->sc_naddrs6 == 0 && (error = carp_join_multicast6(sc)) != 0) 1906 return (error); 1907 1908 carp_set_state_all(sc, INIT); 1909 1910 return (0); 1911 } 1912 1913 int 1914 carp_join_multicast6(struct carp_softc *sc) 1915 { 1916 struct in6_multi_mship *imm, *imm2; 1917 struct ip6_moptions *im6o = &sc->sc_im6o; 1918 struct sockaddr_in6 addr6; 1919 int error; 1920 1921 /* Join IPv6 CARP multicast group */ 1922 memset(&addr6, 0, sizeof(addr6)); 1923 addr6.sin6_family = AF_INET6; 1924 addr6.sin6_len = sizeof(addr6); 1925 addr6.sin6_addr.s6_addr16[0] = htons(0xff02); 1926 addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index); 1927 addr6.sin6_addr.s6_addr8[15] = 0x12; 1928 if ((imm = in6_joingroup(&sc->sc_if, 1929 &addr6.sin6_addr, &error)) == NULL) { 1930 return (error); 1931 } 1932 /* join solicited multicast address */ 1933 memset(&addr6.sin6_addr, 0, sizeof(addr6.sin6_addr)); 1934 addr6.sin6_addr.s6_addr16[0] = htons(0xff02); 1935 addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index); 1936 addr6.sin6_addr.s6_addr32[1] = 0; 1937 addr6.sin6_addr.s6_addr32[2] = htonl(1); 1938 addr6.sin6_addr.s6_addr32[3] = 0; 1939 addr6.sin6_addr.s6_addr8[12] = 0xff; 1940 if ((imm2 = in6_joingroup(&sc->sc_if, 1941 &addr6.sin6_addr, &error)) == NULL) { 1942 in6_leavegroup(imm); 1943 return (error); 1944 } 1945 1946 /* apply v6 multicast membership */ 1947 im6o->im6o_ifidx = sc->sc_if.if_index; 1948 if (imm) 1949 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, 1950 i6mm_chain); 1951 if (imm2) 1952 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm2, 1953 i6mm_chain); 1954 1955 return (0); 1956 } 1957 1958 #endif /* INET6 */ 1959 1960 int 1961 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr) 1962 { 1963 struct proc *p = curproc; /* XXX */ 1964 struct carp_softc *sc = ifp->if_softc; 1965 struct carp_vhost_entry *vhe; 1966 struct carpreq carpr; 1967 struct ifaddr *ifa = (struct ifaddr *)addr; 1968 struct ifreq *ifr = (struct ifreq *)addr; 1969 struct ifnet *ifp0 = NULL; 1970 int i, error = 0; 1971 1972 switch (cmd) { 1973 case SIOCSIFADDR: 1974 if (sc->sc_carpdevidx == 0) 1975 return (EINVAL); 1976 1977 switch (ifa->ifa_addr->sa_family) { 1978 case AF_INET: 1979 sc->sc_if.if_flags |= IFF_UP; 1980 error = carp_set_addr(sc, satosin(ifa->ifa_addr)); 1981 break; 1982 #ifdef INET6 1983 case AF_INET6: 1984 sc->sc_if.if_flags |= IFF_UP; 1985 error = carp_set_addr6(sc, satosin6(ifa->ifa_addr)); 1986 break; 1987 #endif /* INET6 */ 1988 default: 1989 error = EAFNOSUPPORT; 1990 break; 1991 } 1992 break; 1993 1994 case SIOCSIFFLAGS: 1995 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 1996 vhe = SRPL_FIRST_LOCKED(&sc->carp_vhosts); 1997 if (vhe->state != INIT && !(ifr->ifr_flags & IFF_UP)) { 1998 carp_del_all_timeouts(sc); 1999 2000 /* we need the interface up to bow out */ 2001 sc->sc_if.if_flags |= IFF_UP; 2002 sc->sc_bow_out = 1; 2003 carp_vhe_send_ad_all(sc); 2004 sc->sc_bow_out = 0; 2005 2006 sc->sc_if.if_flags &= ~IFF_UP; 2007 carp_set_state_all(sc, INIT); 2008 carp_setrun_all(sc, 0); 2009 } else if (vhe->state == INIT && (ifr->ifr_flags & IFF_UP)) { 2010 sc->sc_if.if_flags |= IFF_UP; 2011 carp_setrun_all(sc, 0); 2012 } 2013 break; 2014 2015 case SIOCSVH: 2016 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 2017 vhe = SRPL_FIRST_LOCKED(&sc->carp_vhosts); 2018 if ((error = suser(p)) != 0) 2019 break; 2020 if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr))) 2021 break; 2022 error = 1; 2023 if (carpr.carpr_carpdev[0] != '\0' && 2024 (ifp0 = ifunit(carpr.carpr_carpdev)) == NULL) 2025 return (EINVAL); 2026 if (carpr.carpr_peer.s_addr == 0) 2027 sc->sc_peer.s_addr = INADDR_CARP_GROUP; 2028 else 2029 sc->sc_peer.s_addr = carpr.carpr_peer.s_addr; 2030 if (ifp0 != NULL && ifp0->if_index != sc->sc_carpdevidx) { 2031 if ((error = carp_set_ifp(sc, ifp0))) 2032 return (error); 2033 } 2034 if (vhe->state != INIT && carpr.carpr_state != vhe->state) { 2035 switch (carpr.carpr_state) { 2036 case BACKUP: 2037 timeout_del(&vhe->ad_tmo); 2038 carp_set_state_all(sc, BACKUP); 2039 carp_setrun_all(sc, 0); 2040 break; 2041 case MASTER: 2042 KERNEL_ASSERT_LOCKED(); 2043 /* touching carp_vhosts */ 2044 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, 2045 vhost_entries) 2046 carp_master_down(vhe); 2047 break; 2048 default: 2049 break; 2050 } 2051 } 2052 if ((error = carp_vhids_ioctl(sc, &carpr))) 2053 return (error); 2054 if (carpr.carpr_advbase >= 0) { 2055 if (carpr.carpr_advbase > 255) { 2056 error = EINVAL; 2057 break; 2058 } 2059 sc->sc_advbase = carpr.carpr_advbase; 2060 error--; 2061 } 2062 if (memcmp(sc->sc_advskews, carpr.carpr_advskews, 2063 sizeof(sc->sc_advskews))) { 2064 i = 0; 2065 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 2066 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, 2067 vhost_entries) 2068 vhe->advskew = carpr.carpr_advskews[i++]; 2069 bcopy(carpr.carpr_advskews, sc->sc_advskews, 2070 sizeof(sc->sc_advskews)); 2071 } 2072 if (sc->sc_balancing != carpr.carpr_balancing) { 2073 if (carpr.carpr_balancing > CARP_BAL_MAXID) { 2074 error = EINVAL; 2075 break; 2076 } 2077 sc->sc_balancing = carpr.carpr_balancing; 2078 carp_set_enaddr(sc); 2079 carp_update_lsmask(sc); 2080 } 2081 bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key)); 2082 if (error > 0) 2083 error = EINVAL; 2084 else { 2085 error = 0; 2086 carp_hmac_prepare(sc); 2087 carp_setrun_all(sc, 0); 2088 } 2089 break; 2090 2091 case SIOCGVH: 2092 memset(&carpr, 0, sizeof(carpr)); 2093 if ((ifp0 = if_get(sc->sc_carpdevidx)) != NULL) 2094 strlcpy(carpr.carpr_carpdev, ifp0->if_xname, IFNAMSIZ); 2095 if_put(ifp0); 2096 i = 0; 2097 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 2098 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) { 2099 carpr.carpr_vhids[i] = vhe->vhid; 2100 carpr.carpr_advskews[i] = vhe->advskew; 2101 carpr.carpr_states[i] = vhe->state; 2102 i++; 2103 } 2104 carpr.carpr_advbase = sc->sc_advbase; 2105 carpr.carpr_balancing = sc->sc_balancing; 2106 if (suser(p) == 0) 2107 bcopy(sc->sc_key, carpr.carpr_key, 2108 sizeof(carpr.carpr_key)); 2109 carpr.carpr_peer.s_addr = sc->sc_peer.s_addr; 2110 error = copyout(&carpr, ifr->ifr_data, sizeof(carpr)); 2111 break; 2112 2113 case SIOCADDMULTI: 2114 error = carp_ether_addmulti(sc, ifr); 2115 break; 2116 2117 case SIOCDELMULTI: 2118 error = carp_ether_delmulti(sc, ifr); 2119 break; 2120 case SIOCAIFGROUP: 2121 case SIOCDIFGROUP: 2122 if (sc->sc_demote_cnt) 2123 carp_ifgroup_ioctl(ifp, cmd, addr); 2124 break; 2125 case SIOCSIFGATTR: 2126 carp_ifgattr_ioctl(ifp, cmd, addr); 2127 break; 2128 default: 2129 error = ENOTTY; 2130 } 2131 2132 if (memcmp(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN) != 0) 2133 carp_set_enaddr(sc); 2134 return (error); 2135 } 2136 2137 int 2138 carp_check_dup_vhids(struct carp_softc *sc, struct srpl *cif, 2139 struct carpreq *carpr) 2140 { 2141 struct carp_softc *vr; 2142 struct carp_vhost_entry *vhe, *vhe0; 2143 int i; 2144 2145 KERNEL_ASSERT_LOCKED(); /* touching if_carp + carp_vhosts */ 2146 2147 SRPL_FOREACH_LOCKED(vr, cif, sc_list) { 2148 if (vr == sc) 2149 continue; 2150 SRPL_FOREACH_LOCKED(vhe, &vr->carp_vhosts, vhost_entries) { 2151 if (carpr) { 2152 for (i = 0; carpr->carpr_vhids[i]; i++) { 2153 if (vhe->vhid == carpr->carpr_vhids[i]) 2154 return (EINVAL); 2155 } 2156 } 2157 SRPL_FOREACH_LOCKED(vhe0, &sc->carp_vhosts, 2158 vhost_entries) { 2159 if (vhe->vhid == vhe0->vhid) 2160 return (EINVAL); 2161 } 2162 } 2163 } 2164 return (0); 2165 } 2166 2167 int 2168 carp_vhids_ioctl(struct carp_softc *sc, struct carpreq *carpr) 2169 { 2170 int i, j; 2171 u_int8_t taken_vhids[256]; 2172 2173 if (carpr->carpr_vhids[0] == 0 || 2174 !memcmp(sc->sc_vhids, carpr->carpr_vhids, sizeof(sc->sc_vhids))) 2175 return (0); 2176 2177 memset(taken_vhids, 0, sizeof(taken_vhids)); 2178 for (i = 0; carpr->carpr_vhids[i]; i++) { 2179 struct ifnet *ifp; 2180 2181 if (taken_vhids[carpr->carpr_vhids[i]]) 2182 return (EINVAL); 2183 taken_vhids[carpr->carpr_vhids[i]] = 1; 2184 2185 if ((ifp = if_get(sc->sc_carpdevidx)) != NULL) { 2186 struct srpl *cif; 2187 cif = &ifp->if_carp; 2188 if (carp_check_dup_vhids(sc, cif, carpr)) { 2189 if_put(ifp); 2190 return (EINVAL); 2191 } 2192 } 2193 if_put(ifp); 2194 if (carpr->carpr_advskews[i] >= 255) 2195 return (EINVAL); 2196 } 2197 /* set sane balancing defaults */ 2198 if (i <= 1) 2199 carpr->carpr_balancing = CARP_BAL_NONE; 2200 else if (carpr->carpr_balancing == CARP_BAL_NONE && 2201 sc->sc_balancing == CARP_BAL_NONE) 2202 carpr->carpr_balancing = CARP_BAL_IP; 2203 2204 /* destroy all */ 2205 carp_del_all_timeouts(sc); 2206 carp_destroy_vhosts(sc); 2207 memset(sc->sc_vhids, 0, sizeof(sc->sc_vhids)); 2208 2209 /* sort vhosts list by vhid */ 2210 for (j = 1; j <= 255; j++) { 2211 for (i = 0; carpr->carpr_vhids[i]; i++) { 2212 if (carpr->carpr_vhids[i] != j) 2213 continue; 2214 if (carp_new_vhost(sc, carpr->carpr_vhids[i], 2215 carpr->carpr_advskews[i])) 2216 return (ENOMEM); 2217 sc->sc_vhids[i] = carpr->carpr_vhids[i]; 2218 sc->sc_advskews[i] = carpr->carpr_advskews[i]; 2219 } 2220 } 2221 carp_set_enaddr(sc); 2222 carp_set_state_all(sc, INIT); 2223 return (0); 2224 } 2225 2226 void 2227 carp_ifgroup_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr) 2228 { 2229 struct ifgroupreq *ifgr = (struct ifgroupreq *)addr; 2230 struct ifg_list *ifgl; 2231 int *dm, adj; 2232 2233 if (!strcmp(ifgr->ifgr_group, IFG_ALL)) 2234 return; 2235 adj = ((struct carp_softc *)ifp->if_softc)->sc_demote_cnt; 2236 if (cmd == SIOCDIFGROUP) 2237 adj = adj * -1; 2238 2239 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) 2240 if (!strcmp(ifgl->ifgl_group->ifg_group, ifgr->ifgr_group)) { 2241 dm = &ifgl->ifgl_group->ifg_carp_demoted; 2242 if (*dm + adj >= 0) 2243 *dm += adj; 2244 else 2245 *dm = 0; 2246 } 2247 } 2248 2249 void 2250 carp_ifgattr_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr) 2251 { 2252 struct ifgroupreq *ifgr = (struct ifgroupreq *)addr; 2253 struct carp_softc *sc = ifp->if_softc; 2254 2255 if (ifgr->ifgr_attrib.ifg_carp_demoted > 0 && (sc->sc_if.if_flags & 2256 (IFF_UP|IFF_RUNNING)) == (IFF_UP|IFF_RUNNING)) 2257 carp_vhe_send_ad_all(sc); 2258 } 2259 2260 void 2261 carp_start(struct ifnet *ifp) 2262 { 2263 struct carp_softc *sc = ifp->if_softc; 2264 struct ifnet *ifp0; 2265 struct mbuf *m; 2266 2267 if ((ifp0 = if_get(sc->sc_carpdevidx)) == NULL) { 2268 ifq_purge(&ifp->if_snd); 2269 return; 2270 } 2271 2272 while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) 2273 carp_transmit(sc, ifp0, m); 2274 if_put(ifp0); 2275 } 2276 2277 void 2278 carp_transmit(struct carp_softc *sc, struct ifnet *ifp0, struct mbuf *m) 2279 { 2280 struct ifnet *ifp = &sc->sc_if; 2281 2282 #if NBPFILTER > 0 2283 { 2284 caddr_t if_bpf = ifp->if_bpf; 2285 if (if_bpf) { 2286 if (bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_OUT)) 2287 m_freem(m); 2288 } 2289 } 2290 #endif /* NBPFILTER > 0 */ 2291 2292 if (!ISSET(ifp0->if_flags, IFF_RUNNING)) { 2293 counters_inc(ifp->if_counters, ifc_oerrors); 2294 m_freem(m); 2295 return; 2296 } 2297 2298 /* 2299 * Do not leak the multicast address when sending 2300 * advertisements in 'ip' and 'ip-stealth' balacing 2301 * modes. 2302 */ 2303 if (sc->sc_balancing == CARP_BAL_IP || 2304 sc->sc_balancing == CARP_BAL_IPSTEALTH) { 2305 struct ether_header *eh = mtod(m, struct ether_header *); 2306 memcpy(eh->ether_shost, sc->sc_ac.ac_enaddr, 2307 sizeof(eh->ether_shost)); 2308 } 2309 2310 if (if_enqueue(ifp0, m)) 2311 counters_inc(ifp->if_counters, ifc_oerrors); 2312 } 2313 2314 int 2315 carp_enqueue(struct ifnet *ifp, struct mbuf *m) 2316 { 2317 struct carp_softc *sc = ifp->if_softc; 2318 struct ifnet *ifp0; 2319 2320 /* no ifq_is_priq, cos hfsc on carp doesn't make sense */ 2321 2322 /* 2323 * If the parent of this carp(4) got destroyed while 2324 * `m' was being processed, silently drop it. 2325 */ 2326 if ((ifp0 = if_get(sc->sc_carpdevidx)) == NULL) { 2327 m_freem(m); 2328 return (0); 2329 } 2330 2331 counters_pkt(ifp->if_counters, 2332 ifc_opackets, ifc_obytes, m->m_pkthdr.len); 2333 carp_transmit(sc, ifp0, m); 2334 if_put(ifp0); 2335 2336 return (0); 2337 } 2338 2339 int 2340 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa, 2341 struct rtentry *rt) 2342 { 2343 struct carp_softc *sc = ((struct carp_softc *)ifp->if_softc); 2344 struct carp_vhost_entry *vhe; 2345 struct srp_ref sr; 2346 int ismaster; 2347 2348 if (sc->cur_vhe == NULL) { 2349 vhe = SRPL_FIRST(&sr, &sc->carp_vhosts); 2350 ismaster = (vhe->state == MASTER); 2351 SRPL_LEAVE(&sr); 2352 } else { 2353 ismaster = (sc->cur_vhe->state == MASTER); 2354 } 2355 2356 if ((sc->sc_balancing == CARP_BAL_NONE && !ismaster)) { 2357 m_freem(m); 2358 return (ENETUNREACH); 2359 } 2360 2361 return (ether_output(ifp, m, sa, rt)); 2362 } 2363 2364 void 2365 carp_set_state_all(struct carp_softc *sc, int state) 2366 { 2367 struct carp_vhost_entry *vhe; 2368 2369 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 2370 2371 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) { 2372 if (vhe->state == state) 2373 continue; 2374 2375 carp_set_state(vhe, state); 2376 } 2377 } 2378 2379 void 2380 carp_set_state(struct carp_vhost_entry *vhe, int state) 2381 { 2382 struct carp_softc *sc = vhe->parent_sc; 2383 static const char *carp_states[] = { CARP_STATES }; 2384 int loglevel; 2385 struct carp_vhost_entry *vhe0; 2386 2387 KASSERT(vhe->state != state); 2388 2389 if (vhe->state == INIT || state == INIT) 2390 loglevel = LOG_WARNING; 2391 else 2392 loglevel = LOG_CRIT; 2393 2394 if (sc->sc_vhe_count > 1) 2395 CARP_LOG(loglevel, sc, 2396 ("state transition (vhid %d): %s -> %s", vhe->vhid, 2397 carp_states[vhe->state], carp_states[state])); 2398 else 2399 CARP_LOG(loglevel, sc, 2400 ("state transition: %s -> %s", 2401 carp_states[vhe->state], carp_states[state])); 2402 2403 vhe->state = state; 2404 carp_update_lsmask(sc); 2405 2406 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 2407 2408 sc->sc_if.if_link_state = LINK_STATE_INVALID; 2409 SRPL_FOREACH_LOCKED(vhe0, &sc->carp_vhosts, vhost_entries) { 2410 /* 2411 * Link must be up if at least one vhe is in state MASTER to 2412 * bring or keep route up. 2413 */ 2414 if (vhe0->state == MASTER) { 2415 sc->sc_if.if_link_state = LINK_STATE_UP; 2416 break; 2417 } else if (vhe0->state == BACKUP) { 2418 sc->sc_if.if_link_state = LINK_STATE_DOWN; 2419 } 2420 } 2421 if_link_state_change(&sc->sc_if); 2422 } 2423 2424 void 2425 carp_group_demote_adj(struct ifnet *ifp, int adj, char *reason) 2426 { 2427 struct ifg_list *ifgl; 2428 int *dm, need_ad; 2429 struct carp_softc *nil = NULL; 2430 2431 if (ifp->if_type == IFT_CARP) { 2432 dm = &((struct carp_softc *)ifp->if_softc)->sc_demote_cnt; 2433 if (*dm + adj >= 0) 2434 *dm += adj; 2435 else 2436 *dm = 0; 2437 } 2438 2439 need_ad = 0; 2440 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) { 2441 if (!strcmp(ifgl->ifgl_group->ifg_group, IFG_ALL)) 2442 continue; 2443 dm = &ifgl->ifgl_group->ifg_carp_demoted; 2444 2445 if (*dm + adj >= 0) 2446 *dm += adj; 2447 else 2448 *dm = 0; 2449 2450 if (adj > 0 && *dm == 1) 2451 need_ad = 1; 2452 CARP_LOG(LOG_ERR, nil, 2453 ("%s demoted group %s by %d to %d (%s)", 2454 ifp->if_xname, ifgl->ifgl_group->ifg_group, 2455 adj, *dm, reason)); 2456 } 2457 if (need_ad) 2458 carp_send_ad_all(); 2459 } 2460 2461 int 2462 carp_group_demote_count(struct carp_softc *sc) 2463 { 2464 struct ifg_list *ifgl; 2465 int count = 0; 2466 2467 TAILQ_FOREACH(ifgl, &sc->sc_if.if_groups, ifgl_next) 2468 count += ifgl->ifgl_group->ifg_carp_demoted; 2469 2470 if (count == 0 && sc->sc_demote_cnt) 2471 count = sc->sc_demote_cnt; 2472 2473 return (count > 255 ? 255 : count); 2474 } 2475 2476 void 2477 carp_carpdev_state(void *v) 2478 { 2479 struct carp_softc *sc = v; 2480 struct ifnet *ifp0; 2481 int suppressed = sc->sc_suppress; 2482 2483 if ((ifp0 = if_get(sc->sc_carpdevidx)) == NULL) 2484 return; 2485 2486 if (ifp0->if_link_state == LINK_STATE_DOWN || 2487 !(ifp0->if_flags & IFF_UP)) { 2488 sc->sc_if.if_flags &= ~IFF_RUNNING; 2489 carp_del_all_timeouts(sc); 2490 carp_set_state_all(sc, INIT); 2491 sc->sc_suppress = 1; 2492 carp_setrun_all(sc, 0); 2493 if (!suppressed) 2494 carp_group_demote_adj(&sc->sc_if, 1, "carpdev"); 2495 } else if (suppressed) { 2496 carp_set_state_all(sc, INIT); 2497 sc->sc_suppress = 0; 2498 carp_setrun_all(sc, 0); 2499 carp_group_demote_adj(&sc->sc_if, -1, "carpdev"); 2500 } 2501 2502 if_put(ifp0); 2503 } 2504 2505 int 2506 carp_ether_addmulti(struct carp_softc *sc, struct ifreq *ifr) 2507 { 2508 struct ifnet *ifp0; 2509 struct carp_mc_entry *mc; 2510 u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN]; 2511 int error; 2512 2513 ifp0 = if_get(sc->sc_carpdevidx); 2514 if (ifp0 == NULL) 2515 return (EINVAL); 2516 2517 error = ether_addmulti(ifr, (struct arpcom *)&sc->sc_ac); 2518 if (error != ENETRESET) { 2519 if_put(ifp0); 2520 return (error); 2521 } 2522 2523 /* 2524 * This is new multicast address. We have to tell parent 2525 * about it. Also, remember this multicast address so that 2526 * we can delete them on unconfigure. 2527 */ 2528 mc = malloc(sizeof(*mc), M_DEVBUF, M_NOWAIT); 2529 if (mc == NULL) { 2530 error = ENOMEM; 2531 goto alloc_failed; 2532 } 2533 2534 /* 2535 * As ether_addmulti() returns ENETRESET, following two 2536 * statement shouldn't fail. 2537 */ 2538 (void)ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi); 2539 ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, mc->mc_enm); 2540 memcpy(&mc->mc_addr, &ifr->ifr_addr, ifr->ifr_addr.sa_len); 2541 LIST_INSERT_HEAD(&sc->carp_mc_listhead, mc, mc_entries); 2542 2543 error = (*ifp0->if_ioctl)(ifp0, SIOCADDMULTI, (caddr_t)ifr); 2544 if (error != 0) 2545 goto ioctl_failed; 2546 2547 if_put(ifp0); 2548 2549 return (error); 2550 2551 ioctl_failed: 2552 LIST_REMOVE(mc, mc_entries); 2553 free(mc, M_DEVBUF, sizeof(*mc)); 2554 alloc_failed: 2555 (void)ether_delmulti(ifr, (struct arpcom *)&sc->sc_ac); 2556 if_put(ifp0); 2557 2558 return (error); 2559 } 2560 2561 int 2562 carp_ether_delmulti(struct carp_softc *sc, struct ifreq *ifr) 2563 { 2564 struct ifnet *ifp0; 2565 struct ether_multi *enm; 2566 struct carp_mc_entry *mc; 2567 u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN]; 2568 int error; 2569 2570 ifp0 = if_get(sc->sc_carpdevidx); 2571 if (ifp0 == NULL) 2572 return (EINVAL); 2573 2574 /* 2575 * Find a key to lookup carp_mc_entry. We have to do this 2576 * before calling ether_delmulti for obvious reason. 2577 */ 2578 if ((error = ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi)) != 0) 2579 goto rele; 2580 ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, enm); 2581 if (enm == NULL) { 2582 error = EINVAL; 2583 goto rele; 2584 } 2585 2586 LIST_FOREACH(mc, &sc->carp_mc_listhead, mc_entries) 2587 if (mc->mc_enm == enm) 2588 break; 2589 2590 /* We won't delete entries we didn't add */ 2591 if (mc == NULL) { 2592 error = EINVAL; 2593 goto rele; 2594 } 2595 2596 error = ether_delmulti(ifr, (struct arpcom *)&sc->sc_ac); 2597 if (error != ENETRESET) 2598 goto rele; 2599 2600 /* We no longer use this multicast address. Tell parent so. */ 2601 error = (*ifp0->if_ioctl)(ifp0, SIOCDELMULTI, (caddr_t)ifr); 2602 if (error == 0) { 2603 /* And forget about this address. */ 2604 LIST_REMOVE(mc, mc_entries); 2605 free(mc, M_DEVBUF, sizeof(*mc)); 2606 } else 2607 (void)ether_addmulti(ifr, (struct arpcom *)&sc->sc_ac); 2608 rele: 2609 if_put(ifp0); 2610 return (error); 2611 } 2612 2613 /* 2614 * Delete any multicast address we have asked to add from parent 2615 * interface. Called when the carp is being unconfigured. 2616 */ 2617 void 2618 carp_ether_purgemulti(struct carp_softc *sc) 2619 { 2620 struct ifnet *ifp0; /* Parent. */ 2621 struct carp_mc_entry *mc; 2622 union { 2623 struct ifreq ifreq; 2624 struct { 2625 char ifr_name[IFNAMSIZ]; 2626 struct sockaddr_storage ifr_ss; 2627 } ifreq_storage; 2628 } u; 2629 struct ifreq *ifr = &u.ifreq; 2630 2631 if ((ifp0 = if_get(sc->sc_carpdevidx)) == NULL) 2632 return; 2633 2634 memcpy(ifr->ifr_name, ifp0->if_xname, IFNAMSIZ); 2635 while ((mc = LIST_FIRST(&sc->carp_mc_listhead)) != NULL) { 2636 memcpy(&ifr->ifr_addr, &mc->mc_addr, mc->mc_addr.ss_len); 2637 (void)(*ifp0->if_ioctl)(ifp0, SIOCDELMULTI, (caddr_t)ifr); 2638 LIST_REMOVE(mc, mc_entries); 2639 free(mc, M_DEVBUF, sizeof(*mc)); 2640 } 2641 2642 if_put(ifp0); 2643 } 2644 2645 void 2646 carp_vh_ref(void *null, void *v) 2647 { 2648 struct carp_vhost_entry *vhe = v; 2649 2650 refcnt_take(&vhe->vhost_refcnt); 2651 } 2652 2653 void 2654 carp_vh_unref(void *null, void *v) 2655 { 2656 struct carp_vhost_entry *vhe = v; 2657 2658 if (refcnt_rele(&vhe->vhost_refcnt)) { 2659 carp_sc_unref(NULL, vhe->parent_sc); 2660 free(vhe, M_DEVBUF, sizeof(*vhe)); 2661 } 2662 } 2663 2664 void 2665 carp_sc_ref(void *null, void *s) 2666 { 2667 struct carp_softc *sc = s; 2668 2669 refcnt_take(&sc->sc_refcnt); 2670 } 2671 2672 void 2673 carp_sc_unref(void *null, void *s) 2674 { 2675 struct carp_softc *sc = s; 2676 2677 refcnt_rele_wake(&sc->sc_refcnt); 2678 } 2679