1 /* $OpenBSD: ip_carp.c,v 1.169 2008/10/28 23:07:12 mpf Exp $ */ 2 3 /* 4 * Copyright (c) 2002 Michael Shalayeff. All rights reserved. 5 * Copyright (c) 2003 Ryan McBride. All rights reserved. 6 * Copyright (c) 2006-2008 Marco Pfatschbacher. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 21 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 25 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 26 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 27 * THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 /* 31 * TODO: 32 * - iface reconfigure 33 * - support for hardware checksum calculations; 34 * 35 */ 36 37 #include "ether.h" 38 39 #include <sys/param.h> 40 #include <sys/proc.h> 41 #include <sys/systm.h> 42 #include <sys/mbuf.h> 43 #include <sys/socket.h> 44 #include <sys/socketvar.h> 45 #include <sys/ioctl.h> 46 #include <sys/errno.h> 47 #include <sys/device.h> 48 #include <sys/kernel.h> 49 #include <sys/sysctl.h> 50 #include <sys/syslog.h> 51 52 #include <machine/cpu.h> 53 54 #include <net/if.h> 55 #include <net/if_types.h> 56 #include <net/if_llc.h> 57 #include <net/route.h> 58 #include <net/netisr.h> 59 60 /* for arc4random() */ 61 #include <dev/rndvar.h> 62 63 #if NFDDI > 0 64 #include <net/if_fddi.h> 65 #endif 66 67 #include <crypto/sha1.h> 68 69 #ifdef INET 70 #include <netinet/in.h> 71 #include <netinet/in_systm.h> 72 #include <netinet/in_var.h> 73 #include <netinet/ip.h> 74 #include <netinet/ip_var.h> 75 #include <netinet/if_ether.h> 76 #include <netinet/ip_ipsp.h> 77 78 #include <net/if_enc.h> 79 #include <net/if_dl.h> 80 #endif 81 82 #ifdef INET6 83 #include <netinet/icmp6.h> 84 #include <netinet/ip6.h> 85 #include <netinet6/ip6_var.h> 86 #include <netinet6/nd6.h> 87 #include <netinet6/in6_ifattach.h> 88 #endif 89 90 #include "bpfilter.h" 91 #if NBPFILTER > 0 92 #include <net/bpf.h> 93 #endif 94 95 #include <netinet/ip_carp.h> 96 97 struct carp_mc_entry { 98 LIST_ENTRY(carp_mc_entry) mc_entries; 99 union { 100 struct ether_multi *mcu_enm; 101 } mc_u; 102 struct sockaddr_storage mc_addr; 103 }; 104 #define mc_enm mc_u.mcu_enm 105 106 enum { HMAC_ORIG=0, HMAC_NOV6LL=1, HMAC_MAX=2 }; 107 108 struct carp_vhost_entry { 109 LIST_ENTRY(carp_vhost_entry) vhost_entries; 110 struct carp_softc *parent_sc; 111 int vhe_leader; 112 int vhid; 113 int advskew; 114 enum { INIT = 0, BACKUP, MASTER } state; 115 struct timeout ad_tmo; /* advertisement timeout */ 116 struct timeout md_tmo; /* master down timeout */ 117 struct timeout md6_tmo; /* master down timeout */ 118 119 u_int64_t vhe_replay_cookie; 120 121 /* authentication */ 122 #define CARP_HMAC_PAD 64 123 unsigned char vhe_pad[CARP_HMAC_PAD]; 124 SHA1_CTX vhe_sha1[HMAC_MAX]; 125 126 u_int8_t vhe_enaddr[ETHER_ADDR_LEN]; 127 struct sockaddr_dl vhe_sdl; /* for IPv6 ndp balancing */ 128 }; 129 130 struct carp_softc { 131 struct arpcom sc_ac; 132 #define sc_if sc_ac.ac_if 133 #define sc_carpdev sc_ac.ac_if.if_carpdev 134 void *ah_cookie; 135 void *lh_cookie; 136 struct ip_moptions sc_imo; 137 #ifdef INET6 138 struct ip6_moptions sc_im6o; 139 #endif /* INET6 */ 140 TAILQ_ENTRY(carp_softc) sc_list; 141 142 int sc_suppress; 143 int sc_bow_out; 144 145 int sc_sendad_errors; 146 #define CARP_SENDAD_MAX_ERRORS(sc) (3 * (sc)->sc_vhe_count) 147 int sc_sendad_success; 148 #define CARP_SENDAD_MIN_SUCCESS(sc) (3 * (sc)->sc_vhe_count) 149 150 char sc_curlladdr[ETHER_ADDR_LEN]; 151 152 LIST_HEAD(__carp_vhosthead, carp_vhost_entry) carp_vhosts; 153 int sc_vhe_count; 154 u_int8_t sc_vhids[CARP_MAXNODES]; 155 u_int8_t sc_advskews[CARP_MAXNODES]; 156 u_int8_t sc_balancing; 157 158 int sc_naddrs; 159 int sc_naddrs6; 160 int sc_advbase; /* seconds */ 161 162 /* authentication */ 163 unsigned char sc_key[CARP_KEY_LEN]; 164 165 u_int32_t sc_hashkey[2]; 166 u_int32_t sc_lsmask; /* load sharing mask */ 167 int sc_lscount; /* # load sharing interfaces (max 32) */ 168 int sc_delayed_arp; /* delayed ARP request countdown */ 169 170 struct in_addr sc_peer; 171 172 LIST_HEAD(__carp_mchead, carp_mc_entry) carp_mc_listhead; 173 struct carp_vhost_entry *cur_vhe; /* current active vhe */ 174 }; 175 176 int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, LOG_CRIT }; /* XXX for now */ 177 struct carpstats carpstats; 178 179 struct carp_if { 180 TAILQ_HEAD(, carp_softc) vhif_vrs; 181 int vhif_nvrs; 182 183 struct ifnet *vhif_ifp; 184 }; 185 186 #define CARP_LOG(l, sc, s) \ 187 do { \ 188 if (carp_opts[CARPCTL_LOG] >= l) { \ 189 if (sc) \ 190 log(l, "%s: ", \ 191 (sc)->sc_if.if_xname); \ 192 else \ 193 log(l, "carp: "); \ 194 addlog s; \ 195 addlog("\n"); \ 196 } \ 197 } while (0) 198 199 void carp_hmac_prepare(struct carp_softc *); 200 void carp_hmac_prepare_ctx(struct carp_vhost_entry *, u_int8_t); 201 void carp_hmac_generate(struct carp_vhost_entry *, u_int32_t *, 202 unsigned char *, u_int8_t); 203 int carp_hmac_verify(struct carp_vhost_entry *, u_int32_t *, 204 unsigned char *); 205 void carp_setroute(struct carp_softc *, int); 206 void carp_proto_input_c(struct mbuf *, struct carp_header *, int, 207 sa_family_t); 208 void carpattach(int); 209 void carpdetach(struct carp_softc *); 210 int carp_prepare_ad(struct mbuf *, struct carp_vhost_entry *, 211 struct carp_header *); 212 void carp_send_ad_all(void); 213 void carp_vhe_send_ad_all(struct carp_softc *); 214 void carp_send_ad(void *); 215 void carp_send_arp(struct carp_softc *); 216 void carp_master_down(void *); 217 int carp_ioctl(struct ifnet *, u_long, caddr_t); 218 int carp_vhids_ioctl(struct carp_softc *, struct carpreq *); 219 int carp_check_dup_vhids(struct carp_softc *, struct carp_if *, 220 struct carpreq *); 221 void carp_ifgroup_ioctl(struct ifnet *, u_long, caddr_t); 222 void carp_ifgattr_ioctl(struct ifnet *, u_long, caddr_t); 223 void carp_start(struct ifnet *); 224 void carp_setrun_all(struct carp_softc *, sa_family_t); 225 void carp_setrun(struct carp_vhost_entry *, sa_family_t); 226 void carp_set_state_all(struct carp_softc *, int); 227 void carp_set_state(struct carp_vhost_entry *, int); 228 void carp_multicast_cleanup(struct carp_softc *); 229 int carp_set_ifp(struct carp_softc *, struct ifnet *); 230 void carp_set_enaddr(struct carp_softc *); 231 void carp_set_vhe_enaddr(struct carp_vhost_entry *); 232 void carp_addr_updated(void *); 233 u_int32_t carp_hash(struct carp_softc *, u_char *); 234 int carp_set_addr(struct carp_softc *, struct sockaddr_in *); 235 int carp_join_multicast(struct carp_softc *); 236 #ifdef INET6 237 void carp_send_na(struct carp_softc *); 238 int carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *); 239 int carp_join_multicast6(struct carp_softc *); 240 #endif 241 int carp_clone_create(struct if_clone *, int); 242 int carp_clone_destroy(struct ifnet *); 243 int carp_ether_addmulti(struct carp_softc *, struct ifreq *); 244 int carp_ether_delmulti(struct carp_softc *, struct ifreq *); 245 void carp_ether_purgemulti(struct carp_softc *); 246 int carp_group_demote_count(struct carp_softc *); 247 void carp_update_lsmask(struct carp_softc *); 248 int carp_new_vhost(struct carp_softc *, int, int); 249 void carp_destroy_vhosts(struct carp_softc *); 250 void carp_del_all_timeouts(struct carp_softc *); 251 252 struct if_clone carp_cloner = 253 IF_CLONE_INITIALIZER("carp", carp_clone_create, carp_clone_destroy); 254 255 #define carp_cksum(_m, _l) ((u_int16_t)in_cksum((_m), (_l))) 256 257 void 258 carp_hmac_prepare(struct carp_softc *sc) 259 { 260 struct carp_vhost_entry *vhe; 261 u_int8_t i; 262 263 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) { 264 for (i = 0; i < HMAC_MAX; i++) { 265 carp_hmac_prepare_ctx(vhe, i); 266 } 267 } 268 } 269 270 void 271 carp_hmac_prepare_ctx(struct carp_vhost_entry *vhe, u_int8_t ctx) 272 { 273 struct carp_softc *sc = vhe->parent_sc; 274 275 u_int8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT; 276 u_int8_t vhid = vhe->vhid & 0xff; 277 SHA1_CTX sha1ctx; 278 u_int32_t kmd[5]; 279 struct ifaddr *ifa; 280 int i, found; 281 struct in_addr last, cur, in; 282 #ifdef INET6 283 struct in6_addr last6, cur6, in6; 284 #endif /* INET6 */ 285 286 /* compute ipad from key */ 287 bzero(vhe->vhe_pad, sizeof(vhe->vhe_pad)); 288 bcopy(sc->sc_key, vhe->vhe_pad, sizeof(sc->sc_key)); 289 for (i = 0; i < sizeof(vhe->vhe_pad); i++) 290 vhe->vhe_pad[i] ^= 0x36; 291 292 /* precompute first part of inner hash */ 293 SHA1Init(&vhe->vhe_sha1[ctx]); 294 SHA1Update(&vhe->vhe_sha1[ctx], vhe->vhe_pad, sizeof(vhe->vhe_pad)); 295 SHA1Update(&vhe->vhe_sha1[ctx], (void *)&version, sizeof(version)); 296 SHA1Update(&vhe->vhe_sha1[ctx], (void *)&type, sizeof(type)); 297 298 /* generate a key for the arpbalance hash, before the vhid is hashed */ 299 if (vhe->vhe_leader) { 300 bcopy(&vhe->vhe_sha1[ctx], &sha1ctx, sizeof(sha1ctx)); 301 SHA1Final((unsigned char *)kmd, &sha1ctx); 302 sc->sc_hashkey[0] = kmd[0] ^ kmd[1]; 303 sc->sc_hashkey[1] = kmd[2] ^ kmd[3]; 304 } 305 306 /* the rest of the precomputation */ 307 if (vhe->vhe_leader && bcmp(sc->sc_ac.ac_enaddr, vhe->vhe_enaddr, 308 ETHER_ADDR_LEN) != 0) 309 SHA1Update(&vhe->vhe_sha1[ctx], sc->sc_ac.ac_enaddr, 310 ETHER_ADDR_LEN); 311 312 SHA1Update(&vhe->vhe_sha1[ctx], (void *)&vhid, sizeof(vhid)); 313 314 /* Hash the addresses from smallest to largest, not interface order */ 315 #ifdef INET 316 cur.s_addr = 0; 317 do { 318 found = 0; 319 last = cur; 320 cur.s_addr = 0xffffffff; 321 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 322 in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr; 323 if (ifa->ifa_addr->sa_family == AF_INET && 324 ntohl(in.s_addr) > ntohl(last.s_addr) && 325 ntohl(in.s_addr) < ntohl(cur.s_addr)) { 326 cur.s_addr = in.s_addr; 327 found++; 328 } 329 } 330 if (found) 331 SHA1Update(&vhe->vhe_sha1[ctx], 332 (void *)&cur, sizeof(cur)); 333 } while (found); 334 #endif /* INET */ 335 #ifdef INET6 336 memset(&cur6, 0x00, sizeof(cur6)); 337 do { 338 found = 0; 339 last6 = cur6; 340 memset(&cur6, 0xff, sizeof(cur6)); 341 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 342 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 343 if (IN6_IS_SCOPE_EMBED(&in6)) { 344 if (ctx == HMAC_NOV6LL) 345 continue; 346 in6.s6_addr16[1] = 0; 347 } 348 if (ifa->ifa_addr->sa_family == AF_INET6 && 349 memcmp(&in6, &last6, sizeof(in6)) > 0 && 350 memcmp(&in6, &cur6, sizeof(in6)) < 0) { 351 cur6 = in6; 352 found++; 353 } 354 } 355 if (found) 356 SHA1Update(&vhe->vhe_sha1[ctx], 357 (void *)&cur6, sizeof(cur6)); 358 } while (found); 359 #endif /* INET6 */ 360 361 /* convert ipad to opad */ 362 for (i = 0; i < sizeof(vhe->vhe_pad); i++) 363 vhe->vhe_pad[i] ^= 0x36 ^ 0x5c; 364 } 365 366 void 367 carp_hmac_generate(struct carp_vhost_entry *vhe, u_int32_t counter[2], 368 unsigned char md[20], u_int8_t ctx) 369 { 370 SHA1_CTX sha1ctx; 371 372 /* fetch first half of inner hash */ 373 bcopy(&vhe->vhe_sha1[ctx], &sha1ctx, sizeof(sha1ctx)); 374 375 SHA1Update(&sha1ctx, (void *)counter, sizeof(vhe->vhe_replay_cookie)); 376 SHA1Final(md, &sha1ctx); 377 378 /* outer hash */ 379 SHA1Init(&sha1ctx); 380 SHA1Update(&sha1ctx, vhe->vhe_pad, sizeof(vhe->vhe_pad)); 381 SHA1Update(&sha1ctx, md, 20); 382 SHA1Final(md, &sha1ctx); 383 } 384 385 int 386 carp_hmac_verify(struct carp_vhost_entry *vhe, u_int32_t counter[2], 387 unsigned char md[20]) 388 { 389 unsigned char md2[20]; 390 u_int8_t i; 391 392 for (i = 0; i < HMAC_MAX; i++) { 393 carp_hmac_generate(vhe, counter, md2, i); 394 if (!bcmp(md, md2, sizeof(md2))) 395 return (0); 396 } 397 return (1); 398 } 399 400 void 401 carp_setroute(struct carp_softc *sc, int cmd) 402 { 403 struct ifaddr *ifa; 404 int s; 405 406 /* XXX this mess needs fixing */ 407 408 s = splsoftnet(); 409 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 410 switch (ifa->ifa_addr->sa_family) { 411 case AF_INET: { 412 int error; 413 struct sockaddr sa; 414 struct rtentry *rt; 415 struct radix_node_head *rnh; 416 struct radix_node *rn; 417 struct rt_addrinfo info; 418 int hr_otherif, nr_ourif; 419 struct sockaddr_rtlabel sa_rl; 420 const char *label; 421 422 /* Remove the existing host route, if any */ 423 bzero(&info, sizeof(info)); 424 info.rti_info[RTAX_DST] = ifa->ifa_addr; 425 info.rti_flags = RTF_HOST; 426 error = rtrequest1(RTM_DELETE, &info, RTP_CONNECTED, 427 NULL, 0); 428 rt_missmsg(RTM_DELETE, &info, info.rti_flags, NULL, 429 error, 0); 430 431 /* Check for our address on another interface */ 432 /* XXX cries for proper API */ 433 rnh = rt_gettable(ifa->ifa_addr->sa_family, 0); 434 rn = rnh->rnh_matchaddr(ifa->ifa_addr, rnh); 435 rt = (struct rtentry *)rn; 436 hr_otherif = (rt && rt->rt_ifp != &sc->sc_if && 437 rt->rt_flags & (RTF_CLONING|RTF_CLONED)); 438 439 /* Check for a network route on our interface */ 440 bcopy(ifa->ifa_addr, &sa, sizeof(sa)); 441 satosin(&sa)->sin_addr.s_addr = satosin(ifa->ifa_netmask 442 )->sin_addr.s_addr & satosin(&sa)->sin_addr.s_addr; 443 rt = (struct rtentry *)rt_lookup(&sa, 444 ifa->ifa_netmask, 0); 445 nr_ourif = (rt && rt->rt_ifp == &sc->sc_if); 446 447 /* Restore the route label */ 448 bzero(&sa_rl, sizeof(sa_rl)); 449 if (rt && rt->rt_labelid) { 450 sa_rl.sr_len = sizeof(sa_rl); 451 sa_rl.sr_family = AF_UNSPEC; 452 label = rtlabel_id2name(rt->rt_labelid); 453 if (label != NULL) 454 strlcpy(sa_rl.sr_label, label, 455 sizeof(sa_rl.sr_label)); 456 } 457 458 switch (cmd) { 459 case RTM_ADD: 460 if (hr_otherif) { 461 ifa->ifa_rtrequest = NULL; 462 ifa->ifa_flags &= ~RTF_CLONING; 463 bzero(&info, sizeof(info)); 464 info.rti_info[RTAX_DST] = ifa->ifa_addr; 465 info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; 466 info.rti_flags = RTF_UP | RTF_HOST; 467 error = rtrequest1(RTM_ADD, &info, 468 RTP_CONNECTED, NULL, 0); 469 rt_missmsg(RTM_ADD, &info, info.rti_flags, 470 &sc->sc_if, error, 0); 471 } 472 if (!hr_otherif || nr_ourif || !rt) { 473 if (nr_ourif && !(rt->rt_flags & 474 RTF_CLONING)) { 475 bzero(&info, sizeof(info)); 476 info.rti_info[RTAX_DST] = &sa; 477 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 478 error = rtrequest1(RTM_DELETE, &info, RTP_CONNECTED, NULL, 0); 479 rt_missmsg(RTM_DELETE, &info, info.rti_flags, NULL, 480 error, 0); 481 } 482 483 ifa->ifa_rtrequest = arp_rtrequest; 484 ifa->ifa_flags |= RTF_CLONING; 485 486 bzero(&info, sizeof(info)); 487 info.rti_info[RTAX_DST] = &sa; 488 info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; 489 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 490 info.rti_info[RTAX_LABEL] = 491 (struct sockaddr *)&sa_rl; 492 error = rtrequest1(RTM_ADD, &info, RTP_CONNECTED, NULL, 0); 493 if (error == 0) 494 ifa->ifa_flags |= IFA_ROUTE; 495 rt_missmsg(RTM_ADD, &info, info.rti_flags, 496 &sc->sc_if, error, 0); 497 } 498 break; 499 case RTM_DELETE: 500 break; 501 default: 502 break; 503 } 504 break; 505 } 506 507 #ifdef INET6 508 case AF_INET6: 509 if (sc->sc_balancing >= CARP_BAL_IP) 510 continue; 511 if (cmd == RTM_ADD) 512 in6_ifaddloop(ifa); 513 else 514 in6_ifremloop(ifa); 515 break; 516 #endif /* INET6 */ 517 default: 518 break; 519 } 520 } 521 splx(s); 522 } 523 524 /* 525 * process input packet. 526 * we have rearranged checks order compared to the rfc, 527 * but it seems more efficient this way or not possible otherwise. 528 */ 529 void 530 carp_proto_input(struct mbuf *m, ...) 531 { 532 struct ip *ip = mtod(m, struct ip *); 533 struct ifnet *ifp = m->m_pkthdr.rcvif; 534 struct carp_softc *sc = NULL; 535 struct carp_header *ch; 536 int iplen, len, hlen, ismulti; 537 va_list ap; 538 539 va_start(ap, m); 540 hlen = va_arg(ap, int); 541 va_end(ap); 542 543 carpstats.carps_ipackets++; 544 545 if (!carp_opts[CARPCTL_ALLOW]) { 546 m_freem(m); 547 return; 548 } 549 550 ismulti = IN_MULTICAST(ip->ip_dst.s_addr); 551 552 /* check if received on a valid carp interface */ 553 if (!((ifp->if_type == IFT_CARP && ismulti) || 554 (ifp->if_type != IFT_CARP && !ismulti && ifp->if_carp != NULL))) { 555 carpstats.carps_badif++; 556 CARP_LOG(LOG_INFO, sc, ("packet received on non-carp interface: %s", 557 m->m_pkthdr.rcvif->if_xname)); 558 m_freem(m); 559 return; 560 } 561 562 /* verify that the IP TTL is 255. */ 563 if (ip->ip_ttl != CARP_DFLTTL) { 564 carpstats.carps_badttl++; 565 CARP_LOG(LOG_NOTICE, sc, ("received ttl %d != %d on %s", ip->ip_ttl, 566 CARP_DFLTTL, m->m_pkthdr.rcvif->if_xname)); 567 m_freem(m); 568 return; 569 } 570 571 /* 572 * verify that the received packet length is 573 * equal to the CARP header 574 */ 575 iplen = ip->ip_hl << 2; 576 len = iplen + sizeof(*ch); 577 if (len > m->m_pkthdr.len) { 578 carpstats.carps_badlen++; 579 CARP_LOG(LOG_INFO, sc, ("packet too short %d on %s", m->m_pkthdr.len, 580 m->m_pkthdr.rcvif->if_xname)); 581 m_freem(m); 582 return; 583 } 584 585 if ((m = m_pullup2(m, len)) == NULL) { 586 carpstats.carps_hdrops++; 587 return; 588 } 589 ip = mtod(m, struct ip *); 590 ch = (void *)ip + iplen; 591 592 /* verify the CARP checksum */ 593 m->m_data += iplen; 594 if (carp_cksum(m, len - iplen)) { 595 carpstats.carps_badsum++; 596 CARP_LOG(LOG_INFO, sc, ("checksum failed on %s", 597 m->m_pkthdr.rcvif->if_xname)); 598 m_freem(m); 599 return; 600 } 601 m->m_data -= iplen; 602 603 carp_proto_input_c(m, ch, ismulti, AF_INET); 604 } 605 606 #ifdef INET6 607 int 608 carp6_proto_input(struct mbuf **mp, int *offp, int proto) 609 { 610 struct mbuf *m = *mp; 611 struct carp_softc *sc = NULL; 612 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 613 struct carp_header *ch; 614 u_int len; 615 616 carpstats.carps_ipackets6++; 617 618 if (!carp_opts[CARPCTL_ALLOW]) { 619 m_freem(m); 620 return (IPPROTO_DONE); 621 } 622 623 /* check if received on a valid carp interface */ 624 if (m->m_pkthdr.rcvif->if_type != IFT_CARP) { 625 carpstats.carps_badif++; 626 CARP_LOG(LOG_INFO, sc, ("packet received on non-carp interface: %s", 627 m->m_pkthdr.rcvif->if_xname)); 628 m_freem(m); 629 return (IPPROTO_DONE); 630 } 631 632 /* verify that the IP TTL is 255 */ 633 if (ip6->ip6_hlim != CARP_DFLTTL) { 634 carpstats.carps_badttl++; 635 CARP_LOG(LOG_NOTICE, sc, ("received ttl %d != %d on %s", ip6->ip6_hlim, 636 CARP_DFLTTL, m->m_pkthdr.rcvif->if_xname)); 637 m_freem(m); 638 return (IPPROTO_DONE); 639 } 640 641 /* verify that we have a complete carp packet */ 642 len = m->m_len; 643 IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch)); 644 if (ch == NULL) { 645 carpstats.carps_badlen++; 646 CARP_LOG(LOG_INFO, sc, ("packet size %u too small", len)); 647 return (IPPROTO_DONE); 648 } 649 650 651 /* verify the CARP checksum */ 652 m->m_data += *offp; 653 if (carp_cksum(m, sizeof(*ch))) { 654 carpstats.carps_badsum++; 655 CARP_LOG(LOG_INFO, sc, ("checksum failed, on %s", 656 m->m_pkthdr.rcvif->if_xname)); 657 m_freem(m); 658 return (IPPROTO_DONE); 659 } 660 m->m_data -= *offp; 661 662 carp_proto_input_c(m, ch, 1, AF_INET6); 663 return (IPPROTO_DONE); 664 } 665 #endif /* INET6 */ 666 667 void 668 carp_proto_input_c(struct mbuf *m, struct carp_header *ch, int ismulti, 669 sa_family_t af) 670 { 671 struct ifnet *ifp = m->m_pkthdr.rcvif; 672 struct carp_softc *sc; 673 struct carp_vhost_entry *vhe; 674 struct timeval sc_tv, ch_tv; 675 struct carp_if *cif; 676 677 if (ifp->if_type == IFT_CARP) 678 cif = (struct carp_if *)ifp->if_carpdev->if_carp; 679 else 680 cif = (struct carp_if *)ifp->if_carp; 681 682 TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) { 683 if (af == AF_INET && 684 ismulti != IN_MULTICAST(sc->sc_peer.s_addr)) 685 continue; 686 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) { 687 if (vhe->vhid == ch->carp_vhid) 688 goto found; 689 } 690 } 691 found: 692 693 if (!sc || (sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) != 694 (IFF_UP|IFF_RUNNING)) { 695 carpstats.carps_badvhid++; 696 m_freem(m); 697 return; 698 } 699 700 getmicrotime(&sc->sc_if.if_lastchange); 701 sc->sc_if.if_ipackets++; 702 sc->sc_if.if_ibytes += m->m_pkthdr.len; 703 704 /* verify the CARP version. */ 705 if (ch->carp_version != CARP_VERSION) { 706 carpstats.carps_badver++; 707 sc->sc_if.if_ierrors++; 708 CARP_LOG(LOG_NOTICE, sc, ("invalid version %d != %d", 709 ch->carp_version, CARP_VERSION)); 710 m_freem(m); 711 return; 712 } 713 714 /* verify the hash */ 715 if (carp_hmac_verify(vhe, ch->carp_counter, ch->carp_md)) { 716 carpstats.carps_badauth++; 717 sc->sc_if.if_ierrors++; 718 CARP_LOG(LOG_INFO, sc, ("incorrect hash")); 719 m_freem(m); 720 return; 721 } 722 723 if (!bcmp(&vhe->vhe_replay_cookie, ch->carp_counter, 724 sizeof(ch->carp_counter))) { 725 /* Do not log duplicates from non simplex interfaces */ 726 if (sc->sc_carpdev->if_flags & IFF_SIMPLEX) { 727 carpstats.carps_badauth++; 728 sc->sc_if.if_ierrors++; 729 CARP_LOG(LOG_WARNING, sc, 730 ("replay or network loop detected")); 731 } 732 m_freem(m); 733 return; 734 } 735 736 sc_tv.tv_sec = sc->sc_advbase; 737 if (carp_group_demote_count(sc) && vhe->advskew < 240) 738 sc_tv.tv_usec = 240 * 1000000 / 256; 739 else 740 sc_tv.tv_usec = vhe->advskew * 1000000 / 256; 741 ch_tv.tv_sec = ch->carp_advbase; 742 ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256; 743 744 switch (vhe->state) { 745 case INIT: 746 break; 747 case MASTER: 748 /* 749 * If we receive an advertisement from a master who's going to 750 * be more frequent than us, go into BACKUP state. 751 */ 752 if (timercmp(&sc_tv, &ch_tv, >) || 753 (timercmp(&sc_tv, &ch_tv, ==) && 754 ch->carp_demote <= 755 (carp_group_demote_count(sc) & 0xff))) { 756 timeout_del(&vhe->ad_tmo); 757 carp_set_state(vhe, BACKUP); 758 carp_setrun(vhe, 0); 759 if (vhe->vhe_leader) 760 carp_setroute(sc, RTM_DELETE); 761 } 762 break; 763 case BACKUP: 764 /* 765 * If we're pre-empting masters who advertise slower than us, 766 * and this one claims to be slower, treat him as down. 767 */ 768 if (carp_opts[CARPCTL_PREEMPT] && timercmp(&sc_tv, &ch_tv, <)) { 769 carp_master_down(vhe); 770 break; 771 } 772 773 /* 774 * Take over masters advertising with a higher demote count, 775 * regardless of CARPCTL_PREEMPT. 776 */ 777 if (ch->carp_demote > (carp_group_demote_count(sc) & 0xff)) { 778 carp_master_down(vhe); 779 break; 780 } 781 782 /* 783 * If the master is going to advertise at such a low frequency 784 * that he's guaranteed to time out, we'd might as well just 785 * treat him as timed out now. 786 */ 787 sc_tv.tv_sec = sc->sc_advbase * 3; 788 if (timercmp(&sc_tv, &ch_tv, <)) { 789 carp_master_down(vhe); 790 break; 791 } 792 793 /* 794 * Otherwise, we reset the counter and wait for the next 795 * advertisement. 796 */ 797 carp_setrun(vhe, af); 798 break; 799 } 800 801 m_freem(m); 802 return; 803 } 804 805 int 806 carp_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, 807 size_t newlen) 808 { 809 /* All sysctl names at this level are terminal. */ 810 if (namelen != 1) 811 return (ENOTDIR); 812 813 switch (name[0]) { 814 case CARPCTL_STATS: 815 if (newp != NULL) 816 return (EPERM); 817 return (sysctl_struct(oldp, oldlenp, newp, newlen, 818 &carpstats, sizeof(carpstats))); 819 default: 820 if (name[0] <= 0 || name[0] >= CARPCTL_MAXID) 821 return (ENOPROTOOPT); 822 return sysctl_int(oldp, oldlenp, newp, newlen, 823 &carp_opts[name[0]]); 824 } 825 } 826 827 /* 828 * Interface side of the CARP implementation. 829 */ 830 831 /* ARGSUSED */ 832 void 833 carpattach(int n) 834 { 835 struct ifg_group *ifg; 836 837 if ((ifg = if_creategroup("carp")) != NULL) 838 ifg->ifg_refcnt++; /* keep around even if empty */ 839 if_clone_attach(&carp_cloner); 840 } 841 842 int 843 carp_clone_create(ifc, unit) 844 struct if_clone *ifc; 845 int unit; 846 { 847 struct carp_softc *sc; 848 struct ifnet *ifp; 849 850 sc = malloc(sizeof(*sc), M_DEVBUF, M_NOWAIT); 851 if (!sc) 852 return (ENOMEM); 853 bzero(sc, sizeof(*sc)); 854 855 LIST_INIT(&sc->carp_vhosts); 856 sc->sc_vhe_count = 0; 857 if (carp_new_vhost(sc, 0, 0)) { 858 free(sc, M_DEVBUF); 859 return (ENOMEM); 860 } 861 862 sc->sc_suppress = 0; 863 sc->sc_advbase = CARP_DFLTINTV; 864 sc->sc_naddrs = sc->sc_naddrs6 = 0; 865 #ifdef INET6 866 sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL; 867 #endif /* INET6 */ 868 sc->sc_imo.imo_membership = (struct in_multi **)malloc( 869 (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_IPMOPTS, 870 M_WAITOK|M_ZERO); 871 sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS; 872 873 LIST_INIT(&sc->carp_mc_listhead); 874 ifp = &sc->sc_if; 875 ifp->if_softc = sc; 876 snprintf(ifp->if_xname, sizeof ifp->if_xname, "%s%d", ifc->ifc_name, 877 unit); 878 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 879 ifp->if_ioctl = carp_ioctl; 880 ifp->if_start = carp_start; 881 ifp->if_output = carp_output; 882 ifp->if_type = IFT_CARP; 883 ifp->if_addrlen = ETHER_ADDR_LEN; 884 ifp->if_hdrlen = ETHER_HDR_LEN; 885 ifp->if_mtu = ETHERMTU; 886 IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); 887 IFQ_SET_READY(&ifp->if_snd); 888 if_attach(ifp); 889 890 if_alloc_sadl(ifp); 891 LIST_INIT(&sc->sc_ac.ac_multiaddrs); 892 #if NBPFILTER > 0 893 bpfattach(&ifp->if_bpf, ifp, DLT_EN10MB, ETHER_HDR_LEN); 894 #endif 895 return (0); 896 } 897 898 int 899 carp_new_vhost(struct carp_softc *sc, int vhid, int advskew) 900 { 901 struct carp_vhost_entry *vhe, *vhe0; 902 903 vhe = malloc(sizeof(*vhe), M_DEVBUF, M_NOWAIT | M_ZERO); 904 if (vhe == NULL) 905 return (ENOMEM); 906 907 vhe->parent_sc = sc; 908 vhe->vhid = vhid; 909 vhe->advskew = advskew; 910 timeout_set(&vhe->ad_tmo, carp_send_ad, vhe); 911 timeout_set(&vhe->md_tmo, carp_master_down, vhe); 912 timeout_set(&vhe->md6_tmo, carp_master_down, vhe); 913 914 /* mark the first vhe as leader */ 915 if (LIST_EMPTY(&sc->carp_vhosts)) { 916 vhe->vhe_leader = 1; 917 LIST_INSERT_HEAD(&sc->carp_vhosts, vhe, vhost_entries); 918 sc->sc_vhe_count = 1; 919 return (0); 920 } 921 922 LIST_FOREACH(vhe0, &sc->carp_vhosts, vhost_entries) 923 if (LIST_NEXT(vhe0, vhost_entries) == NULL) 924 break; 925 LIST_INSERT_AFTER(vhe0, vhe, vhost_entries); 926 sc->sc_vhe_count++; 927 928 return (0); 929 } 930 931 int 932 carp_clone_destroy(struct ifnet *ifp) 933 { 934 struct carp_softc *sc = ifp->if_softc; 935 936 carpdetach(sc); 937 ether_ifdetach(ifp); 938 if_detach(ifp); 939 carp_destroy_vhosts(ifp->if_softc); 940 free(sc->sc_imo.imo_membership, M_IPMOPTS); 941 free(sc, M_DEVBUF); 942 943 return (0); 944 } 945 946 void 947 carp_del_all_timeouts(struct carp_softc *sc) 948 { 949 struct carp_vhost_entry *vhe; 950 951 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) { 952 timeout_del(&vhe->ad_tmo); 953 timeout_del(&vhe->md_tmo); 954 timeout_del(&vhe->md6_tmo); 955 } 956 } 957 958 void 959 carpdetach(struct carp_softc *sc) 960 { 961 struct carp_if *cif; 962 int s; 963 964 carp_del_all_timeouts(sc); 965 966 if (sc->sc_suppress) 967 carp_group_demote_adj(&sc->sc_if, -1); 968 sc->sc_suppress = 0; 969 970 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS(sc)) 971 carp_group_demote_adj(&sc->sc_if, -1); 972 sc->sc_sendad_errors = 0; 973 974 carp_set_state_all(sc, INIT); 975 sc->sc_if.if_flags &= ~IFF_UP; 976 carp_setrun_all(sc, 0); 977 carp_multicast_cleanup(sc); 978 979 s = splnet(); 980 if (sc->sc_carpdev != NULL) { 981 if (sc->lh_cookie != NULL) 982 hook_disestablish(sc->sc_carpdev->if_linkstatehooks, 983 sc->lh_cookie); 984 cif = (struct carp_if *)sc->sc_carpdev->if_carp; 985 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); 986 if (!--cif->vhif_nvrs) { 987 ifpromisc(sc->sc_carpdev, 0); 988 sc->sc_carpdev->if_carp = NULL; 989 free(cif, M_IFADDR); 990 } 991 } 992 sc->sc_carpdev = NULL; 993 splx(s); 994 } 995 996 /* Detach an interface from the carp. */ 997 void 998 carp_ifdetach(struct ifnet *ifp) 999 { 1000 struct carp_softc *sc, *nextsc; 1001 struct carp_if *cif = (struct carp_if *)ifp->if_carp; 1002 1003 for (sc = TAILQ_FIRST(&cif->vhif_vrs); sc; sc = nextsc) { 1004 nextsc = TAILQ_NEXT(sc, sc_list); 1005 carpdetach(sc); 1006 } 1007 } 1008 1009 void 1010 carp_destroy_vhosts(struct carp_softc *sc) 1011 { 1012 /* XXX bow out? */ 1013 struct carp_vhost_entry *vhe, *nvhe; 1014 1015 for (vhe = LIST_FIRST(&sc->carp_vhosts); 1016 vhe != LIST_END(&sc->carp_vhosts); vhe = nvhe) { 1017 nvhe = LIST_NEXT(vhe, vhost_entries); 1018 free(vhe, M_DEVBUF); 1019 } 1020 LIST_INIT(&sc->carp_vhosts); 1021 sc->sc_vhe_count = 0; 1022 } 1023 1024 int 1025 carp_prepare_ad(struct mbuf *m, struct carp_vhost_entry *vhe, 1026 struct carp_header *ch) 1027 { 1028 if (!vhe->vhe_replay_cookie) { 1029 arc4random_buf(&vhe->vhe_replay_cookie, 1030 sizeof(vhe->vhe_replay_cookie)); 1031 } 1032 1033 bcopy(&vhe->vhe_replay_cookie, ch->carp_counter, 1034 sizeof(ch->carp_counter)); 1035 1036 /* 1037 * For the time being, do not include the IPv6 linklayer addresses 1038 * in the HMAC. 1039 */ 1040 carp_hmac_generate(vhe, ch->carp_counter, ch->carp_md, HMAC_NOV6LL); 1041 1042 return (0); 1043 } 1044 1045 void 1046 carp_send_ad_all(void) 1047 { 1048 struct ifnet *ifp; 1049 struct carp_if *cif; 1050 struct carp_softc *vh; 1051 1052 TAILQ_FOREACH(ifp, &ifnet, if_list) { 1053 if (ifp->if_carp == NULL || ifp->if_type == IFT_CARP) 1054 continue; 1055 1056 cif = (struct carp_if *)ifp->if_carp; 1057 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1058 if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) == 1059 (IFF_UP|IFF_RUNNING)) { 1060 carp_vhe_send_ad_all(vh); 1061 } 1062 } 1063 } 1064 } 1065 1066 void 1067 carp_vhe_send_ad_all(struct carp_softc *sc) 1068 { 1069 struct carp_vhost_entry *vhe; 1070 1071 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) { 1072 if (vhe->state == MASTER) 1073 carp_send_ad(vhe); 1074 } 1075 } 1076 1077 void 1078 carp_send_ad(void *v) 1079 { 1080 struct carp_header ch; 1081 struct timeval tv; 1082 struct carp_vhost_entry *vhe = v; 1083 struct carp_softc *sc = vhe->parent_sc; 1084 struct carp_header *ch_ptr; 1085 1086 struct mbuf *m; 1087 int error, len, advbase, advskew, s; 1088 struct ifaddr *ifa; 1089 struct sockaddr sa; 1090 1091 if (sc->sc_carpdev == NULL) { 1092 sc->sc_if.if_oerrors++; 1093 return; 1094 } 1095 1096 s = splsoftnet(); 1097 1098 /* bow out if we've gone to backup (the carp interface is going down) */ 1099 if (sc->sc_bow_out) { 1100 advbase = 255; 1101 advskew = 255; 1102 } else { 1103 advbase = sc->sc_advbase; 1104 if (!carp_group_demote_count(sc) || vhe->advskew > 240) 1105 advskew = vhe->advskew; 1106 else 1107 advskew = 240; 1108 tv.tv_sec = advbase; 1109 tv.tv_usec = advskew * 1000000 / 256; 1110 } 1111 1112 ch.carp_version = CARP_VERSION; 1113 ch.carp_type = CARP_ADVERTISEMENT; 1114 ch.carp_vhid = vhe->vhid; 1115 ch.carp_demote = carp_group_demote_count(sc) & 0xff; 1116 ch.carp_advbase = advbase; 1117 ch.carp_advskew = advskew; 1118 ch.carp_authlen = 7; /* XXX DEFINE */ 1119 ch.carp_cksum = 0; 1120 1121 sc->cur_vhe = vhe; /* we need the vhe later on the output path */ 1122 1123 #ifdef INET 1124 if (sc->sc_naddrs) { 1125 struct ip *ip; 1126 1127 MGETHDR(m, M_DONTWAIT, MT_HEADER); 1128 if (m == NULL) { 1129 sc->sc_if.if_oerrors++; 1130 carpstats.carps_onomem++; 1131 /* XXX maybe less ? */ 1132 goto retry_later; 1133 } 1134 len = sizeof(*ip) + sizeof(ch); 1135 m->m_pkthdr.len = len; 1136 m->m_pkthdr.rcvif = NULL; 1137 m->m_len = len; 1138 MH_ALIGN(m, m->m_len); 1139 ip = mtod(m, struct ip *); 1140 ip->ip_v = IPVERSION; 1141 ip->ip_hl = sizeof(*ip) >> 2; 1142 ip->ip_tos = IPTOS_LOWDELAY; 1143 ip->ip_len = htons(len); 1144 ip->ip_id = htons(ip_randomid()); 1145 ip->ip_off = htons(IP_DF); 1146 ip->ip_ttl = CARP_DFLTTL; 1147 ip->ip_p = IPPROTO_CARP; 1148 ip->ip_sum = 0; 1149 1150 bzero(&sa, sizeof(sa)); 1151 sa.sa_family = AF_INET; 1152 ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev); 1153 if (ifa == NULL) 1154 ip->ip_src.s_addr = 0; 1155 else 1156 ip->ip_src.s_addr = 1157 ifatoia(ifa)->ia_addr.sin_addr.s_addr; 1158 ip->ip_dst.s_addr = sc->sc_peer.s_addr; 1159 if (IN_MULTICAST(ip->ip_dst.s_addr)) 1160 m->m_flags |= M_MCAST; 1161 1162 ch_ptr = (void *)ip + sizeof(*ip); 1163 bcopy(&ch, ch_ptr, sizeof(ch)); 1164 if (carp_prepare_ad(m, vhe, ch_ptr)) 1165 goto retry_later; 1166 1167 m->m_data += sizeof(*ip); 1168 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip)); 1169 m->m_data -= sizeof(*ip); 1170 1171 getmicrotime(&sc->sc_if.if_lastchange); 1172 sc->sc_if.if_opackets++; 1173 sc->sc_if.if_obytes += len; 1174 carpstats.carps_opackets++; 1175 1176 error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, 1177 NULL); 1178 if (error) { 1179 if (error == ENOBUFS) 1180 carpstats.carps_onomem++; 1181 else 1182 CARP_LOG(LOG_WARNING, sc, 1183 ("ip_output failed: %d", error)); 1184 sc->sc_if.if_oerrors++; 1185 if (sc->sc_sendad_errors < INT_MAX) 1186 sc->sc_sendad_errors++; 1187 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS(sc)) 1188 carp_group_demote_adj(&sc->sc_if, 1); 1189 sc->sc_sendad_success = 0; 1190 } else { 1191 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS(sc)) { 1192 if (++sc->sc_sendad_success >= 1193 CARP_SENDAD_MIN_SUCCESS(sc)) { 1194 carp_group_demote_adj(&sc->sc_if, -1); 1195 sc->sc_sendad_errors = 0; 1196 } 1197 } else 1198 sc->sc_sendad_errors = 0; 1199 } 1200 if (vhe->vhe_leader) { 1201 if (sc->sc_delayed_arp > 0) 1202 sc->sc_delayed_arp--; 1203 if (sc->sc_delayed_arp == 0) { 1204 carp_send_arp(sc); 1205 sc->sc_delayed_arp = -1; 1206 } 1207 } 1208 } 1209 #endif /* INET */ 1210 #ifdef INET6 1211 if (sc->sc_naddrs6) { 1212 struct ip6_hdr *ip6; 1213 1214 MGETHDR(m, M_DONTWAIT, MT_HEADER); 1215 if (m == NULL) { 1216 sc->sc_if.if_oerrors++; 1217 carpstats.carps_onomem++; 1218 /* XXX maybe less ? */ 1219 goto retry_later; 1220 } 1221 len = sizeof(*ip6) + sizeof(ch); 1222 m->m_pkthdr.len = len; 1223 m->m_pkthdr.rcvif = NULL; 1224 m->m_len = len; 1225 MH_ALIGN(m, m->m_len); 1226 m->m_flags |= M_MCAST; 1227 ip6 = mtod(m, struct ip6_hdr *); 1228 bzero(ip6, sizeof(*ip6)); 1229 ip6->ip6_vfc |= IPV6_VERSION; 1230 ip6->ip6_hlim = CARP_DFLTTL; 1231 ip6->ip6_nxt = IPPROTO_CARP; 1232 1233 /* set the source address */ 1234 bzero(&sa, sizeof(sa)); 1235 sa.sa_family = AF_INET6; 1236 ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev); 1237 if (ifa == NULL) /* This should never happen with IPv6 */ 1238 bzero(&ip6->ip6_src, sizeof(struct in6_addr)); 1239 else 1240 bcopy(ifatoia6(ifa)->ia_addr.sin6_addr.s6_addr, 1241 &ip6->ip6_src, sizeof(struct in6_addr)); 1242 /* set the multicast destination */ 1243 1244 ip6->ip6_dst.s6_addr8[0] = 0xff; 1245 ip6->ip6_dst.s6_addr8[1] = 0x02; 1246 ip6->ip6_dst.s6_addr8[15] = 0x12; 1247 1248 ch_ptr = (void *)ip6 + sizeof(*ip6); 1249 bcopy(&ch, ch_ptr, sizeof(ch)); 1250 if (carp_prepare_ad(m, vhe, ch_ptr)) 1251 goto retry_later; 1252 1253 m->m_data += sizeof(*ip6); 1254 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip6)); 1255 m->m_data -= sizeof(*ip6); 1256 1257 getmicrotime(&sc->sc_if.if_lastchange); 1258 sc->sc_if.if_opackets++; 1259 sc->sc_if.if_obytes += len; 1260 carpstats.carps_opackets6++; 1261 1262 error = ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL); 1263 if (error) { 1264 if (error == ENOBUFS) 1265 carpstats.carps_onomem++; 1266 else 1267 CARP_LOG(LOG_WARNING, sc, 1268 ("ip6_output failed: %d", error)); 1269 sc->sc_if.if_oerrors++; 1270 if (sc->sc_sendad_errors < INT_MAX) 1271 sc->sc_sendad_errors++; 1272 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS(sc)) 1273 carp_group_demote_adj(&sc->sc_if, 1); 1274 sc->sc_sendad_success = 0; 1275 } else { 1276 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS(sc)) { 1277 if (++sc->sc_sendad_success >= 1278 CARP_SENDAD_MIN_SUCCESS(sc)) { 1279 carp_group_demote_adj(&sc->sc_if, -1); 1280 sc->sc_sendad_errors = 0; 1281 } 1282 } else 1283 sc->sc_sendad_errors = 0; 1284 } 1285 } 1286 #endif /* INET6 */ 1287 1288 retry_later: 1289 sc->cur_vhe = NULL; 1290 splx(s); 1291 if (advbase != 255 || advskew != 255) 1292 timeout_add(&vhe->ad_tmo, tvtohz(&tv)); 1293 } 1294 1295 /* 1296 * Broadcast a gratuitous ARP request containing 1297 * the virtual router MAC address for each IP address 1298 * associated with the virtual router. 1299 */ 1300 void 1301 carp_send_arp(struct carp_softc *sc) 1302 { 1303 struct ifaddr *ifa; 1304 in_addr_t in; 1305 int s = splsoftnet(); 1306 1307 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 1308 1309 if (ifa->ifa_addr->sa_family != AF_INET) 1310 continue; 1311 1312 in = ifatoia(ifa)->ia_addr.sin_addr.s_addr; 1313 arprequest(sc->sc_carpdev, &in, &in, sc->sc_ac.ac_enaddr); 1314 DELAY(1000); /* XXX */ 1315 } 1316 splx(s); 1317 } 1318 1319 #ifdef INET6 1320 void 1321 carp_send_na(struct carp_softc *sc) 1322 { 1323 struct ifaddr *ifa; 1324 struct in6_addr *in6; 1325 static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT; 1326 int s = splsoftnet(); 1327 1328 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 1329 1330 if (ifa->ifa_addr->sa_family != AF_INET6) 1331 continue; 1332 1333 in6 = &ifatoia6(ifa)->ia_addr.sin6_addr; 1334 nd6_na_output(sc->sc_carpdev, &mcast, in6, 1335 ND_NA_FLAG_OVERRIDE, 1, NULL); 1336 DELAY(1000); /* XXX */ 1337 } 1338 splx(s); 1339 } 1340 #endif /* INET6 */ 1341 1342 /* 1343 * Based on bridge_hash() in if_bridge.c 1344 */ 1345 #define mix(a,b,c) \ 1346 do { \ 1347 a -= b; a -= c; a ^= (c >> 13); \ 1348 b -= c; b -= a; b ^= (a << 8); \ 1349 c -= a; c -= b; c ^= (b >> 13); \ 1350 a -= b; a -= c; a ^= (c >> 12); \ 1351 b -= c; b -= a; b ^= (a << 16); \ 1352 c -= a; c -= b; c ^= (b >> 5); \ 1353 a -= b; a -= c; a ^= (c >> 3); \ 1354 b -= c; b -= a; b ^= (a << 10); \ 1355 c -= a; c -= b; c ^= (b >> 15); \ 1356 } while (0) 1357 1358 u_int32_t 1359 carp_hash(struct carp_softc *sc, u_char *src) 1360 { 1361 u_int32_t a = 0x9e3779b9, b = sc->sc_hashkey[0], c = sc->sc_hashkey[1]; 1362 1363 c += sc->sc_key[3] << 24; 1364 c += sc->sc_key[2] << 16; 1365 c += sc->sc_key[1] << 8; 1366 c += sc->sc_key[0]; 1367 b += src[5] << 8; 1368 b += src[4]; 1369 a += src[3] << 24; 1370 a += src[2] << 16; 1371 a += src[1] << 8; 1372 a += src[0]; 1373 1374 mix(a, b, c); 1375 return (c); 1376 } 1377 1378 void 1379 carp_update_lsmask(struct carp_softc *sc) 1380 { 1381 struct carp_vhost_entry *vhe; 1382 int count; 1383 1384 if (!sc->sc_balancing) 1385 return; 1386 1387 sc->sc_lsmask = 0; 1388 count = 0; 1389 1390 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) { 1391 if (vhe->state == MASTER && count < sizeof(sc->sc_lsmask) * 8) 1392 sc->sc_lsmask |= 1 << count; 1393 count++; 1394 } 1395 sc->sc_lscount = count; 1396 CARP_LOG(LOG_DEBUG, sc, ("carp_update_lsmask: %x", sc->sc_lsmask)); 1397 } 1398 1399 int 1400 carp_iamatch(struct in_ifaddr *ia, u_char *src, u_int8_t **sha, 1401 u_int8_t **ether_shost) 1402 { 1403 struct carp_softc *sc = ia->ia_ifp->if_softc; 1404 struct carp_vhost_entry *vhe = LIST_FIRST(&sc->carp_vhosts); 1405 1406 if (sc->sc_balancing == CARP_BAL_ARP) { 1407 int lshash; 1408 /* 1409 * We use the source MAC address to decide which virtual host 1410 * should handle the request. If we're master of that virtual 1411 * host, then we respond, otherwise, just drop the arp packet 1412 * on the floor. 1413 */ 1414 1415 if (sc->sc_lscount == 0) /* just to be safe */ 1416 return (0); 1417 lshash = carp_hash(sc, src) % sc->sc_lscount; 1418 if ((1 << lshash) & sc->sc_lsmask) { 1419 int i = 0; 1420 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) { 1421 if (i++ == lshash) 1422 break; 1423 } 1424 if (vhe == NULL) 1425 return (0); 1426 *sha = vhe->vhe_enaddr; 1427 return (1); 1428 } 1429 } else if (sc->sc_balancing == CARP_BAL_IPSTEALTH || 1430 sc->sc_balancing == CARP_BAL_IP) { 1431 if (vhe->state == MASTER) { 1432 *ether_shost = ((struct arpcom *)sc->sc_carpdev)-> 1433 ac_enaddr; 1434 return (1); 1435 } 1436 } else { 1437 if (vhe->state == MASTER) 1438 return (1); 1439 } 1440 1441 return (0); 1442 } 1443 1444 #ifdef INET6 1445 int 1446 carp_iamatch6(struct ifnet *ifp, u_char *src, struct sockaddr_dl **sdl) 1447 { 1448 struct carp_softc *sc = ifp->if_softc; 1449 struct carp_vhost_entry *vhe = LIST_FIRST(&sc->carp_vhosts); 1450 1451 if (sc->sc_balancing == CARP_BAL_ARP) { 1452 int lshash; 1453 /* 1454 * We use the source MAC address to decide which virtual host 1455 * should handle the request. If we're master of that virtual 1456 * host, then we respond, otherwise, just drop the ndp packet 1457 * on the floor. 1458 */ 1459 1460 /* can happen if optional src lladdr is not provided */ 1461 if (src == NULL) 1462 return (0); 1463 if (sc->sc_lscount == 0) /* just to be safe */ 1464 return (0); 1465 lshash = carp_hash(sc, src) % sc->sc_lscount; 1466 if ((1 << lshash) & sc->sc_lsmask) { 1467 int i = 0; 1468 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) { 1469 if (i++ == lshash) 1470 break; 1471 } 1472 if (vhe == NULL) 1473 return (0); 1474 *sdl = &vhe->vhe_sdl; 1475 return (1); 1476 } 1477 } else { 1478 if (vhe->state == MASTER) 1479 return (1); 1480 } 1481 1482 return (0); 1483 } 1484 #endif /* INET6 */ 1485 1486 struct ifnet * 1487 carp_ourether(void *v, struct ether_header *eh, int src) 1488 { 1489 struct carp_if *cif = (struct carp_if *)v; 1490 struct carp_softc *vh; 1491 u_int8_t *ena; 1492 1493 if (src) 1494 ena = (u_int8_t *)&eh->ether_shost; 1495 else 1496 ena = (u_int8_t *)&eh->ether_dhost; 1497 1498 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1499 struct carp_vhost_entry *vhe; 1500 if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) != 1501 (IFF_UP|IFF_RUNNING)) 1502 continue; 1503 if (vh->sc_balancing == CARP_BAL_ARP) { 1504 LIST_FOREACH(vhe, &vh->carp_vhosts, vhost_entries) 1505 if (vhe->state == MASTER && 1506 !bcmp(ena, vhe->vhe_enaddr, ETHER_ADDR_LEN)) 1507 return (&vh->sc_if); 1508 } else { 1509 vhe = LIST_FIRST(&vh->carp_vhosts); 1510 if ((vhe->state == MASTER || 1511 vh->sc_balancing >= CARP_BAL_IP) && 1512 !bcmp(ena, vh->sc_ac.ac_enaddr, ETHER_ADDR_LEN)) 1513 return (&vh->sc_if); 1514 } 1515 } 1516 return (NULL); 1517 } 1518 1519 void 1520 carp_rewrite_lladdr(struct ifnet *ifp, u_int8_t *s_enaddr) 1521 { 1522 struct carp_softc *sc = ifp->if_softc; 1523 1524 if (sc->sc_balancing != CARP_BAL_IPSTEALTH && 1525 sc->sc_balancing != CARP_BAL_IP && sc->cur_vhe) { 1526 if (sc->cur_vhe->vhe_leader) 1527 bcopy((caddr_t)sc->sc_ac.ac_enaddr, 1528 (caddr_t)s_enaddr, ETHER_ADDR_LEN); 1529 else 1530 bcopy((caddr_t)sc->cur_vhe->vhe_enaddr, 1531 (caddr_t)s_enaddr, ETHER_ADDR_LEN); 1532 } 1533 } 1534 1535 int 1536 carp_our_mcastaddr(struct ifnet *ifp, u_int8_t *d_enaddr) 1537 { 1538 struct carp_softc *sc = ifp->if_softc; 1539 1540 if (sc->sc_balancing != CARP_BAL_IP) 1541 return (0); 1542 1543 return(!bcmp(sc->sc_ac.ac_enaddr, d_enaddr, ETHER_ADDR_LEN)); 1544 } 1545 1546 1547 int 1548 carp_input(struct mbuf *m, u_int8_t *shost, u_int8_t *dhost, u_int16_t etype) 1549 { 1550 struct ether_header eh; 1551 struct carp_if *cif = (struct carp_if *)m->m_pkthdr.rcvif->if_carp; 1552 struct ifnet *ifp; 1553 1554 bcopy(shost, &eh.ether_shost, sizeof(eh.ether_shost)); 1555 bcopy(dhost, &eh.ether_dhost, sizeof(eh.ether_dhost)); 1556 eh.ether_type = etype; 1557 1558 if ((ifp = carp_ourether(cif, &eh, 0))) 1559 ; 1560 else if (m->m_flags & (M_BCAST|M_MCAST)) { 1561 struct carp_softc *vh; 1562 struct mbuf *m0; 1563 1564 /* 1565 * XXX Should really check the list of multicast addresses 1566 * for each CARP interface _before_ copying. 1567 */ 1568 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1569 m0 = m_copym2(m, 0, M_COPYALL, M_DONTWAIT); 1570 if (m0 == NULL) 1571 continue; 1572 m0->m_pkthdr.rcvif = &vh->sc_if; 1573 ether_input(&vh->sc_if, &eh, m0); 1574 } 1575 return (1); 1576 } 1577 1578 if (ifp == NULL) 1579 return (1); 1580 1581 m->m_pkthdr.rcvif = ifp; 1582 1583 #if NBPFILTER > 0 1584 if (ifp->if_bpf) 1585 bpf_mtap_hdr(ifp->if_bpf, (char *)&eh, ETHER_HDR_LEN, m, 1586 BPF_DIRECTION_IN); 1587 #endif 1588 ifp->if_ipackets++; 1589 ether_input(ifp, &eh, m); 1590 1591 return (0); 1592 } 1593 1594 int 1595 carp_lsdrop(struct mbuf *m, sa_family_t af, u_int32_t *src, u_int32_t *dst) 1596 { 1597 struct carp_softc *sc = m->m_pkthdr.rcvif->if_softc; 1598 int match; 1599 u_int32_t fold; 1600 1601 if (sc->sc_balancing < CARP_BAL_IP) 1602 return (0); 1603 /* 1604 * Never drop carp advertisements. 1605 * XXX Bad idea to pass all broadcast / multicast traffic? 1606 */ 1607 if (m->m_flags & (M_BCAST|M_MCAST)) 1608 return (0); 1609 1610 fold = src[0] ^ dst[0]; 1611 #ifdef INET6 1612 if (af == AF_INET6) { 1613 int i; 1614 for (i = 1; i < 4; i++) 1615 fold ^= src[i] ^ dst[i]; 1616 } 1617 #endif 1618 if (sc->sc_lscount == 0) /* just to be safe */ 1619 return (1); 1620 match = (1 << (ntohl(fold) % sc->sc_lscount)) & sc->sc_lsmask; 1621 1622 return (!match); 1623 } 1624 1625 void 1626 carp_master_down(void *v) 1627 { 1628 struct carp_vhost_entry *vhe = v; 1629 struct carp_softc *sc = vhe->parent_sc; 1630 1631 switch (vhe->state) { 1632 case INIT: 1633 printf("%s: master_down event in INIT state\n", 1634 sc->sc_if.if_xname); 1635 break; 1636 case MASTER: 1637 break; 1638 case BACKUP: 1639 carp_set_state(vhe, MASTER); 1640 carp_send_ad(vhe); 1641 if (sc->sc_balancing == CARP_BAL_NONE && vhe->vhe_leader) { 1642 carp_send_arp(sc); 1643 /* Schedule a delayed ARP to deal w/ some L3 switches */ 1644 sc->sc_delayed_arp = 2; 1645 #ifdef INET6 1646 carp_send_na(sc); 1647 #endif /* INET6 */ 1648 } 1649 carp_setrun(vhe, 0); 1650 if (vhe->vhe_leader) 1651 carp_setroute(sc, RTM_ADD); 1652 carpstats.carps_preempt++; 1653 break; 1654 } 1655 } 1656 1657 void 1658 carp_setrun_all(struct carp_softc *sc, sa_family_t af) 1659 { 1660 struct carp_vhost_entry *vhe; 1661 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) { 1662 carp_setrun(vhe, af); 1663 } 1664 } 1665 1666 /* 1667 * When in backup state, af indicates whether to reset the master down timer 1668 * for v4 or v6. If it's set to zero, reset the ones which are already pending. 1669 */ 1670 void 1671 carp_setrun(struct carp_vhost_entry *vhe, sa_family_t af) 1672 { 1673 struct timeval tv; 1674 struct carp_softc *sc = vhe->parent_sc; 1675 1676 if (sc->sc_carpdev == NULL) { 1677 sc->sc_if.if_flags &= ~IFF_RUNNING; 1678 carp_set_state_all(sc, INIT); 1679 return; 1680 } 1681 1682 if (sc->sc_if.if_flags & IFF_UP && vhe->vhid > 0 && 1683 (sc->sc_naddrs || sc->sc_naddrs6) && !sc->sc_suppress) { 1684 sc->sc_if.if_flags |= IFF_RUNNING; 1685 } else { 1686 sc->sc_if.if_flags &= ~IFF_RUNNING; 1687 if (vhe->vhe_leader) 1688 carp_setroute(sc, RTM_DELETE); 1689 return; 1690 } 1691 1692 switch (vhe->state) { 1693 case INIT: 1694 carp_set_state(vhe, BACKUP); 1695 if (vhe->vhe_leader) 1696 carp_setroute(sc, RTM_DELETE); 1697 carp_setrun(vhe, 0); 1698 break; 1699 case BACKUP: 1700 timeout_del(&vhe->ad_tmo); 1701 tv.tv_sec = 3 * sc->sc_advbase; 1702 tv.tv_usec = vhe->advskew * 1000000 / 256; 1703 if (vhe->vhe_leader) 1704 sc->sc_delayed_arp = -1; 1705 switch (af) { 1706 #ifdef INET 1707 case AF_INET: 1708 timeout_add(&vhe->md_tmo, tvtohz(&tv)); 1709 break; 1710 #endif /* INET */ 1711 #ifdef INET6 1712 case AF_INET6: 1713 timeout_add(&vhe->md6_tmo, tvtohz(&tv)); 1714 break; 1715 #endif /* INET6 */ 1716 default: 1717 if (sc->sc_naddrs) 1718 timeout_add(&vhe->md_tmo, tvtohz(&tv)); 1719 if (sc->sc_naddrs6) 1720 timeout_add(&vhe->md6_tmo, tvtohz(&tv)); 1721 break; 1722 } 1723 break; 1724 case MASTER: 1725 tv.tv_sec = sc->sc_advbase; 1726 tv.tv_usec = vhe->advskew * 1000000 / 256; 1727 timeout_add(&vhe->ad_tmo, tvtohz(&tv)); 1728 break; 1729 } 1730 } 1731 1732 void 1733 carp_multicast_cleanup(struct carp_softc *sc) 1734 { 1735 struct ip_moptions *imo = &sc->sc_imo; 1736 #ifdef INET6 1737 struct ip6_moptions *im6o = &sc->sc_im6o; 1738 #endif 1739 u_int16_t n = imo->imo_num_memberships; 1740 1741 /* Clean up our own multicast memberships */ 1742 while (n-- > 0) { 1743 if (imo->imo_membership[n] != NULL) { 1744 in_delmulti(imo->imo_membership[n]); 1745 imo->imo_membership[n] = NULL; 1746 } 1747 } 1748 imo->imo_num_memberships = 0; 1749 imo->imo_multicast_ifp = NULL; 1750 1751 #ifdef INET6 1752 while (!LIST_EMPTY(&im6o->im6o_memberships)) { 1753 struct in6_multi_mship *imm = 1754 LIST_FIRST(&im6o->im6o_memberships); 1755 1756 LIST_REMOVE(imm, i6mm_chain); 1757 in6_leavegroup(imm); 1758 } 1759 im6o->im6o_multicast_ifp = NULL; 1760 #endif 1761 1762 /* And any other multicast memberships */ 1763 carp_ether_purgemulti(sc); 1764 } 1765 1766 int 1767 carp_set_ifp(struct carp_softc *sc, struct ifnet *ifp) 1768 { 1769 struct carp_if *cif, *ncif = NULL; 1770 struct carp_softc *vr, *after = NULL; 1771 int myself = 0, error = 0; 1772 int s; 1773 1774 if (ifp == sc->sc_carpdev) 1775 return (0); 1776 1777 if (ifp != NULL) { 1778 if ((ifp->if_flags & IFF_MULTICAST) == 0) 1779 return (EADDRNOTAVAIL); 1780 1781 if (ifp->if_type == IFT_CARP) 1782 return (EINVAL); 1783 1784 if (ifp->if_carp == NULL) { 1785 ncif = malloc(sizeof(*cif), M_IFADDR, M_NOWAIT); 1786 if (ncif == NULL) 1787 return (ENOBUFS); 1788 if ((error = ifpromisc(ifp, 1))) { 1789 free(ncif, M_IFADDR); 1790 return (error); 1791 } 1792 1793 ncif->vhif_ifp = ifp; 1794 TAILQ_INIT(&ncif->vhif_vrs); 1795 } else { 1796 cif = (struct carp_if *)ifp->if_carp; 1797 if (carp_check_dup_vhids(sc, cif, NULL)) 1798 return (EINVAL); 1799 } 1800 1801 /* detach from old interface */ 1802 if (sc->sc_carpdev != NULL) 1803 carpdetach(sc); 1804 1805 /* join multicast groups */ 1806 if (sc->sc_naddrs < 0 && 1807 (error = carp_join_multicast(sc)) != 0) { 1808 if (ncif != NULL) 1809 free(ncif, M_IFADDR); 1810 return (error); 1811 } 1812 1813 #ifdef INET6 1814 if (sc->sc_naddrs6 < 0 && 1815 (error = carp_join_multicast6(sc)) != 0) { 1816 if (ncif != NULL) 1817 free(ncif, M_IFADDR); 1818 carp_multicast_cleanup(sc); 1819 return (error); 1820 } 1821 #endif 1822 1823 /* attach carp interface to physical interface */ 1824 if (ncif != NULL) 1825 ifp->if_carp = (caddr_t)ncif; 1826 sc->sc_carpdev = ifp; 1827 cif = (struct carp_if *)ifp->if_carp; 1828 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 1829 if (vr == sc) 1830 myself = 1; 1831 if (LIST_FIRST(&vr->carp_vhosts)->vhid < 1832 LIST_FIRST(&sc->carp_vhosts)->vhid) 1833 after = vr; 1834 } 1835 1836 if (!myself) { 1837 /* We're trying to keep things in order */ 1838 if (after == NULL) { 1839 TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list); 1840 } else { 1841 TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, 1842 sc, sc_list); 1843 } 1844 cif->vhif_nvrs++; 1845 } 1846 if (sc->sc_naddrs || sc->sc_naddrs6) 1847 sc->sc_if.if_flags |= IFF_UP; 1848 carp_set_enaddr(sc); 1849 s = splnet(); 1850 sc->lh_cookie = hook_establish(ifp->if_linkstatehooks, 1, 1851 carp_carpdev_state, ifp); 1852 carp_carpdev_state(ifp); 1853 splx(s); 1854 } else { 1855 carpdetach(sc); 1856 sc->sc_if.if_flags &= ~(IFF_UP|IFF_RUNNING); 1857 } 1858 return (0); 1859 } 1860 1861 void 1862 carp_set_vhe_enaddr(struct carp_vhost_entry *vhe) 1863 { 1864 struct carp_softc *sc = vhe->parent_sc; 1865 1866 if (vhe->vhid != 0 && sc->sc_carpdev) { 1867 if (vhe->vhe_leader && sc->sc_balancing == CARP_BAL_IP) 1868 vhe->vhe_enaddr[0] = 1; 1869 else 1870 vhe->vhe_enaddr[0] = 0; 1871 vhe->vhe_enaddr[1] = 0; 1872 vhe->vhe_enaddr[2] = 0x5e; 1873 vhe->vhe_enaddr[3] = 0; 1874 vhe->vhe_enaddr[4] = 1; 1875 vhe->vhe_enaddr[5] = vhe->vhid; 1876 1877 vhe->vhe_sdl.sdl_family = AF_LINK; 1878 vhe->vhe_sdl.sdl_alen = ETHER_ADDR_LEN; 1879 bcopy(vhe->vhe_enaddr, vhe->vhe_sdl.sdl_data, ETHER_ADDR_LEN); 1880 } else 1881 bzero(vhe->vhe_enaddr, ETHER_ADDR_LEN); 1882 } 1883 1884 void 1885 carp_set_enaddr(struct carp_softc *sc) 1886 { 1887 struct carp_vhost_entry *vhe; 1888 1889 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) 1890 carp_set_vhe_enaddr(vhe); 1891 1892 vhe = LIST_FIRST(&sc->carp_vhosts); 1893 1894 /* 1895 * Use the carp lladdr if the running one isn't manually set. 1896 * Only compare static parts of the lladdr. 1897 */ 1898 if ((bcmp(sc->sc_ac.ac_enaddr + 1, vhe->vhe_enaddr + 1, 1899 ETHER_ADDR_LEN - 2) == 0) || 1900 (!sc->sc_ac.ac_enaddr[0] && !sc->sc_ac.ac_enaddr[1] && 1901 !sc->sc_ac.ac_enaddr[2] && !sc->sc_ac.ac_enaddr[3] && 1902 !sc->sc_ac.ac_enaddr[4] && !sc->sc_ac.ac_enaddr[5])) 1903 bcopy(vhe->vhe_enaddr, sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN); 1904 1905 /* Make sure the enaddr has changed before further twiddling. */ 1906 if (bcmp(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN) != 0) { 1907 bcopy(sc->sc_ac.ac_enaddr, LLADDR(sc->sc_if.if_sadl), 1908 ETHER_ADDR_LEN); 1909 bcopy(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN); 1910 #ifdef INET6 1911 /* 1912 * (re)attach a link-local address which matches 1913 * our new MAC address. 1914 */ 1915 in6_ifattach_linklocal(&sc->sc_if, NULL); 1916 #endif 1917 carp_set_state_all(sc, INIT); 1918 carp_setrun_all(sc, 0); 1919 } 1920 } 1921 1922 void 1923 carp_addr_updated(void *v) 1924 { 1925 struct carp_softc *sc = (struct carp_softc *) v; 1926 struct ifaddr *ifa; 1927 int new_naddrs = 0, new_naddrs6 = 0; 1928 1929 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 1930 if (ifa->ifa_addr->sa_family == AF_INET) 1931 new_naddrs++; 1932 else if (ifa->ifa_addr->sa_family == AF_INET6 && 1933 !IN6_IS_ADDR_LINKLOCAL(&ifatoia6(ifa)->ia_addr.sin6_addr)) 1934 new_naddrs6++; 1935 } 1936 1937 /* Handle a callback after SIOCDIFADDR */ 1938 if (new_naddrs < sc->sc_naddrs || new_naddrs6 < sc->sc_naddrs6) { 1939 struct in_addr mc_addr; 1940 struct in_multi *inm; 1941 1942 sc->sc_naddrs = new_naddrs; 1943 sc->sc_naddrs6 = new_naddrs6; 1944 1945 /* Re-establish multicast membership removed by in_control */ 1946 if (IN_MULTICAST(sc->sc_peer.s_addr)) { 1947 mc_addr.s_addr = sc->sc_peer.s_addr; 1948 IN_LOOKUP_MULTI(mc_addr, &sc->sc_if, inm); 1949 if (inm == NULL) { 1950 struct in_multi **imm = 1951 sc->sc_imo.imo_membership; 1952 u_int16_t maxmem = 1953 sc->sc_imo.imo_max_memberships; 1954 1955 bzero(&sc->sc_imo, sizeof(sc->sc_imo)); 1956 sc->sc_imo.imo_membership = imm; 1957 sc->sc_imo.imo_max_memberships = maxmem; 1958 1959 if (sc->sc_carpdev != NULL && sc->sc_naddrs > 0) 1960 carp_join_multicast(sc); 1961 } 1962 } 1963 1964 if (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) { 1965 sc->sc_if.if_flags &= ~IFF_UP; 1966 carp_set_state_all(sc, INIT); 1967 } else 1968 carp_hmac_prepare(sc); 1969 } 1970 1971 carp_setrun_all(sc, 0); 1972 } 1973 1974 int 1975 carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin) 1976 { 1977 struct ifnet *ifp = sc->sc_carpdev; 1978 struct in_ifaddr *ia, *ia_if; 1979 int error = 0; 1980 1981 if (sin->sin_addr.s_addr == 0) { 1982 if (!(sc->sc_if.if_flags & IFF_UP)) 1983 carp_set_state_all(sc, INIT); 1984 if (sc->sc_naddrs) 1985 sc->sc_if.if_flags |= IFF_UP; 1986 carp_setrun_all(sc, 0); 1987 return (0); 1988 } 1989 1990 /* we have to do this by hand to ensure we don't match on ourselves */ 1991 ia_if = NULL; 1992 for (ia = TAILQ_FIRST(&in_ifaddr); ia; 1993 ia = TAILQ_NEXT(ia, ia_list)) { 1994 1995 /* and, yeah, we need a multicast-capable iface too */ 1996 if (ia->ia_ifp != &sc->sc_if && 1997 ia->ia_ifp->if_type != IFT_CARP && 1998 (ia->ia_ifp->if_flags & IFF_MULTICAST) && 1999 (sin->sin_addr.s_addr & ia->ia_subnetmask) == 2000 ia->ia_subnet) { 2001 if (!ia_if) 2002 ia_if = ia; 2003 } 2004 } 2005 2006 if (ia_if) { 2007 ia = ia_if; 2008 if (ifp) { 2009 if (ifp != ia->ia_ifp) 2010 return (EADDRNOTAVAIL); 2011 } else { 2012 ifp = ia->ia_ifp; 2013 } 2014 } 2015 2016 if ((error = carp_set_ifp(sc, ifp))) 2017 return (error); 2018 2019 if (sc->sc_carpdev == NULL) 2020 return (EADDRNOTAVAIL); 2021 2022 if (sc->sc_naddrs == 0 && (error = carp_join_multicast(sc)) != 0) 2023 return (error); 2024 2025 sc->sc_naddrs++; 2026 if (sc->sc_carpdev != NULL) 2027 sc->sc_if.if_flags |= IFF_UP; 2028 2029 carp_set_state_all(sc, INIT); 2030 2031 /* 2032 * Hook if_addrhooks so that we get a callback after in_ifinit has run, 2033 * to correct any inappropriate routes that it inserted. 2034 */ 2035 if (sc->ah_cookie == NULL) 2036 sc->ah_cookie = hook_establish(sc->sc_if.if_addrhooks, 0, 2037 carp_addr_updated, sc); 2038 2039 return (0); 2040 } 2041 2042 int 2043 carp_join_multicast(struct carp_softc *sc) 2044 { 2045 struct ip_moptions *imo = &sc->sc_imo; 2046 struct in_multi *imm; 2047 struct in_addr addr; 2048 2049 if (!IN_MULTICAST(sc->sc_peer.s_addr)) 2050 return (0); 2051 2052 addr.s_addr = sc->sc_peer.s_addr; 2053 if ((imm = in_addmulti(&addr, &sc->sc_if)) == NULL) 2054 return (ENOBUFS); 2055 2056 imo->imo_membership[0] = imm; 2057 imo->imo_num_memberships = 1; 2058 imo->imo_multicast_ifp = &sc->sc_if; 2059 imo->imo_multicast_ttl = CARP_DFLTTL; 2060 imo->imo_multicast_loop = 0; 2061 return (0); 2062 } 2063 2064 2065 #ifdef INET6 2066 int 2067 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) 2068 { 2069 struct ifnet *ifp = sc->sc_carpdev; 2070 struct in6_ifaddr *ia, *ia_if; 2071 int error = 0; 2072 2073 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 2074 if (!(sc->sc_if.if_flags & IFF_UP)) 2075 carp_set_state_all(sc, INIT); 2076 if (sc->sc_naddrs6) 2077 sc->sc_if.if_flags |= IFF_UP; 2078 carp_setrun_all(sc, 0); 2079 return (0); 2080 } 2081 2082 /* we have to do this by hand to ensure we don't match on ourselves */ 2083 ia_if = NULL; 2084 for (ia = in6_ifaddr; ia; ia = ia->ia_next) { 2085 int i; 2086 2087 for (i = 0; i < 4; i++) { 2088 if ((sin6->sin6_addr.s6_addr32[i] & 2089 ia->ia_prefixmask.sin6_addr.s6_addr32[i]) != 2090 (ia->ia_addr.sin6_addr.s6_addr32[i] & 2091 ia->ia_prefixmask.sin6_addr.s6_addr32[i])) 2092 break; 2093 } 2094 /* and, yeah, we need a multicast-capable iface too */ 2095 if (ia->ia_ifp != &sc->sc_if && 2096 ia->ia_ifp->if_type != IFT_CARP && 2097 (ia->ia_ifp->if_flags & IFF_MULTICAST) && 2098 (i == 4)) { 2099 if (!ia_if) 2100 ia_if = ia; 2101 } 2102 } 2103 2104 if (ia_if) { 2105 ia = ia_if; 2106 if (sc->sc_carpdev) { 2107 if (sc->sc_carpdev != ia->ia_ifp) 2108 return (EADDRNOTAVAIL); 2109 } else { 2110 ifp = ia->ia_ifp; 2111 } 2112 } 2113 2114 if ((error = carp_set_ifp(sc, ifp))) 2115 return (error); 2116 2117 if (sc->sc_carpdev == NULL) 2118 return (EADDRNOTAVAIL); 2119 2120 if (sc->sc_naddrs6 == 0 && (error = carp_join_multicast6(sc)) != 0) 2121 return (error); 2122 2123 if (!IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) 2124 sc->sc_naddrs6++; 2125 if (sc->sc_carpdev != NULL && sc->sc_naddrs6) 2126 sc->sc_if.if_flags |= IFF_UP; 2127 carp_set_state_all(sc, INIT); 2128 carp_setrun_all(sc, 0); 2129 2130 return (0); 2131 } 2132 2133 int 2134 carp_join_multicast6(struct carp_softc *sc) 2135 { 2136 struct in6_multi_mship *imm, *imm2; 2137 struct ip6_moptions *im6o = &sc->sc_im6o; 2138 struct sockaddr_in6 addr6; 2139 int error; 2140 2141 /* Join IPv6 CARP multicast group */ 2142 bzero(&addr6, sizeof(addr6)); 2143 addr6.sin6_family = AF_INET6; 2144 addr6.sin6_len = sizeof(addr6); 2145 addr6.sin6_addr.s6_addr16[0] = htons(0xff02); 2146 addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index); 2147 addr6.sin6_addr.s6_addr8[15] = 0x12; 2148 if ((imm = in6_joingroup(&sc->sc_if, 2149 &addr6.sin6_addr, &error)) == NULL) { 2150 return (error); 2151 } 2152 /* join solicited multicast address */ 2153 bzero(&addr6.sin6_addr, sizeof(addr6.sin6_addr)); 2154 addr6.sin6_addr.s6_addr16[0] = htons(0xff02); 2155 addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index); 2156 addr6.sin6_addr.s6_addr32[1] = 0; 2157 addr6.sin6_addr.s6_addr32[2] = htonl(1); 2158 addr6.sin6_addr.s6_addr32[3] = 0; 2159 addr6.sin6_addr.s6_addr8[12] = 0xff; 2160 if ((imm2 = in6_joingroup(&sc->sc_if, 2161 &addr6.sin6_addr, &error)) == NULL) { 2162 in6_leavegroup(imm); 2163 return (error); 2164 } 2165 2166 /* apply v6 multicast membership */ 2167 im6o->im6o_multicast_ifp = &sc->sc_if; 2168 if (imm) 2169 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, 2170 i6mm_chain); 2171 if (imm2) 2172 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm2, 2173 i6mm_chain); 2174 2175 return (0); 2176 } 2177 2178 #endif /* INET6 */ 2179 2180 int 2181 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr) 2182 { 2183 struct proc *p = curproc; /* XXX */ 2184 struct carp_softc *sc = ifp->if_softc; 2185 struct carp_vhost_entry *vhe; 2186 struct carpreq carpr; 2187 struct ifaddr *ifa = (struct ifaddr *)addr; 2188 struct ifreq *ifr = (struct ifreq *)addr; 2189 struct ifnet *cdev = NULL; 2190 int i, error = 0; 2191 2192 switch (cmd) { 2193 case SIOCSIFADDR: 2194 switch (ifa->ifa_addr->sa_family) { 2195 #ifdef INET 2196 case AF_INET: 2197 sc->sc_if.if_flags |= IFF_UP; 2198 /* 2199 * emulate arp_ifinit() without doing a gratious arp 2200 * request so that the routes are setup correctly. 2201 */ 2202 ifa->ifa_rtrequest = arp_rtrequest; 2203 ifa->ifa_flags |= RTF_CLONING; 2204 2205 error = carp_set_addr(sc, satosin(ifa->ifa_addr)); 2206 break; 2207 #endif /* INET */ 2208 #ifdef INET6 2209 case AF_INET6: 2210 sc->sc_if.if_flags |= IFF_UP; 2211 error = carp_set_addr6(sc, satosin6(ifa->ifa_addr)); 2212 break; 2213 #endif /* INET6 */ 2214 default: 2215 error = EAFNOSUPPORT; 2216 break; 2217 } 2218 break; 2219 2220 case SIOCSIFFLAGS: 2221 vhe = LIST_FIRST(&sc->carp_vhosts); 2222 if (vhe->state != INIT && !(ifr->ifr_flags & IFF_UP)) { 2223 carp_del_all_timeouts(sc); 2224 2225 /* we need the interface up to bow out */ 2226 sc->sc_if.if_flags |= IFF_UP; 2227 sc->sc_bow_out = 1; 2228 carp_vhe_send_ad_all(sc); 2229 sc->sc_bow_out = 0; 2230 2231 sc->sc_if.if_flags &= ~IFF_UP; 2232 carp_set_state_all(sc, INIT); 2233 carp_setrun_all(sc, 0); 2234 } else if (vhe->state == INIT && (ifr->ifr_flags & IFF_UP)) { 2235 sc->sc_if.if_flags |= IFF_UP; 2236 carp_setrun_all(sc, 0); 2237 } 2238 break; 2239 2240 case SIOCSVH: 2241 vhe = LIST_FIRST(&sc->carp_vhosts); 2242 if ((error = suser(p, p->p_acflag)) != 0) 2243 break; 2244 if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr))) 2245 break; 2246 error = 1; 2247 if (carpr.carpr_carpdev[0] != '\0' && 2248 (cdev = ifunit(carpr.carpr_carpdev)) == NULL) 2249 return (EINVAL); 2250 if (carpr.carpr_peer.s_addr == 0) 2251 sc->sc_peer.s_addr = INADDR_CARP_GROUP; 2252 else 2253 sc->sc_peer.s_addr = carpr.carpr_peer.s_addr; 2254 if ((error = carp_set_ifp(sc, cdev))) 2255 return (error); 2256 if (vhe->state != INIT && carpr.carpr_state != vhe->state) { 2257 switch (carpr.carpr_state) { 2258 case BACKUP: 2259 timeout_del(&vhe->ad_tmo); 2260 carp_set_state_all(sc, BACKUP); 2261 carp_setrun_all(sc, 0); 2262 carp_setroute(sc, RTM_DELETE); 2263 break; 2264 case MASTER: 2265 LIST_FOREACH(vhe, &sc->carp_vhosts, 2266 vhost_entries) 2267 carp_master_down(vhe); 2268 break; 2269 default: 2270 break; 2271 } 2272 } 2273 if ((error = carp_vhids_ioctl(sc, &carpr))) 2274 return (error); 2275 if (carpr.carpr_advbase > 0) { 2276 if (carpr.carpr_advbase > 255) { 2277 error = EINVAL; 2278 break; 2279 } 2280 sc->sc_advbase = carpr.carpr_advbase; 2281 error--; 2282 } 2283 if (bcmp(sc->sc_advskews, carpr.carpr_advskews, 2284 sizeof(sc->sc_advskews))) { 2285 i = 0; 2286 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) 2287 vhe->advskew = carpr.carpr_advskews[i++]; 2288 bcopy(carpr.carpr_advskews, sc->sc_advskews, 2289 sizeof(sc->sc_advskews)); 2290 } 2291 if (sc->sc_balancing != carpr.carpr_balancing) { 2292 if (carpr.carpr_balancing > CARP_BAL_MAXID) { 2293 error = EINVAL; 2294 break; 2295 } 2296 sc->sc_balancing = carpr.carpr_balancing; 2297 carp_set_enaddr(sc); 2298 carp_update_lsmask(sc); 2299 } 2300 bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key)); 2301 if (error > 0) 2302 error = EINVAL; 2303 else { 2304 error = 0; 2305 carp_setrun_all(sc, 0); 2306 } 2307 break; 2308 2309 case SIOCGVH: 2310 bzero(&carpr, sizeof(carpr)); 2311 if (sc->sc_carpdev != NULL) 2312 strlcpy(carpr.carpr_carpdev, sc->sc_carpdev->if_xname, 2313 IFNAMSIZ); 2314 i = 0; 2315 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) { 2316 carpr.carpr_vhids[i] = vhe->vhid; 2317 carpr.carpr_advskews[i] = vhe->advskew; 2318 carpr.carpr_states[i] = vhe->state; 2319 i++; 2320 } 2321 carpr.carpr_advbase = sc->sc_advbase; 2322 carpr.carpr_balancing = sc->sc_balancing; 2323 if (suser(p, p->p_acflag) == 0) 2324 bcopy(sc->sc_key, carpr.carpr_key, 2325 sizeof(carpr.carpr_key)); 2326 carpr.carpr_peer.s_addr = sc->sc_peer.s_addr; 2327 error = copyout(&carpr, ifr->ifr_data, sizeof(carpr)); 2328 break; 2329 2330 case SIOCADDMULTI: 2331 error = carp_ether_addmulti(sc, ifr); 2332 break; 2333 2334 case SIOCDELMULTI: 2335 error = carp_ether_delmulti(sc, ifr); 2336 break; 2337 case SIOCAIFGROUP: 2338 case SIOCDIFGROUP: 2339 if (sc->sc_suppress) 2340 carp_ifgroup_ioctl(ifp, cmd, addr); 2341 break; 2342 case SIOCSIFGATTR: 2343 carp_ifgattr_ioctl(ifp, cmd, addr); 2344 break; 2345 default: 2346 error = ENOTTY; 2347 } 2348 2349 if (bcmp(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN) != 0) 2350 carp_set_enaddr(sc); 2351 carp_hmac_prepare(sc); 2352 return (error); 2353 } 2354 2355 int 2356 carp_check_dup_vhids(struct carp_softc *sc, struct carp_if *cif, 2357 struct carpreq *carpr) 2358 { 2359 struct carp_softc *vr; 2360 struct carp_vhost_entry *vhe, *vhe0; 2361 int i; 2362 2363 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 2364 if (vr == sc) 2365 continue; 2366 LIST_FOREACH(vhe, &vr->carp_vhosts, vhost_entries) { 2367 if (carpr) { 2368 for (i = 0; carpr->carpr_vhids[i]; i++) { 2369 if (vhe->vhid == carpr->carpr_vhids[i]) 2370 return (EINVAL); 2371 } 2372 } 2373 LIST_FOREACH(vhe0, &sc->carp_vhosts, vhost_entries) { 2374 if (vhe->vhid == vhe0->vhid) 2375 return (EINVAL); 2376 } 2377 } 2378 } 2379 return (0); 2380 } 2381 2382 int 2383 carp_vhids_ioctl(struct carp_softc *sc, struct carpreq *carpr) 2384 { 2385 int i, j; 2386 u_int8_t taken_vhids[256]; 2387 2388 if (carpr->carpr_vhids[0] == 0 || 2389 !bcmp(sc->sc_vhids, carpr->carpr_vhids, sizeof(sc->sc_vhids))) 2390 return (0); 2391 2392 bzero(taken_vhids, sizeof(taken_vhids)); 2393 for (i = 0; carpr->carpr_vhids[i]; i++) { 2394 if (taken_vhids[carpr->carpr_vhids[i]]) 2395 return (EINVAL); 2396 taken_vhids[carpr->carpr_vhids[i]] = 1; 2397 2398 if (sc->sc_carpdev) { 2399 struct carp_if *cif; 2400 cif = (struct carp_if *)sc->sc_carpdev->if_carp; 2401 if (carp_check_dup_vhids(sc, cif, carpr)) 2402 return (EINVAL); 2403 } 2404 if (carpr->carpr_advskews[i] >= 255) 2405 return (EINVAL); 2406 } 2407 /* set sane balancing defaults */ 2408 if (i <= 1) 2409 carpr->carpr_balancing = CARP_BAL_NONE; 2410 else if (carpr->carpr_balancing == CARP_BAL_NONE && 2411 sc->sc_balancing == CARP_BAL_NONE) 2412 carpr->carpr_balancing = CARP_BAL_IP; 2413 2414 /* destroy all */ 2415 carp_del_all_timeouts(sc); 2416 carp_destroy_vhosts(sc); 2417 bzero(sc->sc_vhids, sizeof(sc->sc_vhids)); 2418 2419 /* sort vhosts list by vhid */ 2420 for (j = 1; j <= 255; j++) { 2421 for (i = 0; carpr->carpr_vhids[i]; i++) { 2422 if (carpr->carpr_vhids[i] != j) 2423 continue; 2424 if (carp_new_vhost(sc, carpr->carpr_vhids[i], 2425 carpr->carpr_advskews[i])) 2426 return (ENOMEM); 2427 sc->sc_vhids[i] = carpr->carpr_vhids[i]; 2428 sc->sc_advskews[i] = carpr->carpr_advskews[i]; 2429 } 2430 } 2431 carp_set_enaddr(sc); 2432 carp_set_state_all(sc, INIT); 2433 return (0); 2434 } 2435 2436 void 2437 carp_ifgroup_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr) 2438 { 2439 struct ifgroupreq *ifgr = (struct ifgroupreq *)addr; 2440 struct ifg_list *ifgl; 2441 2442 if (!strcmp(ifgr->ifgr_group, IFG_ALL)) 2443 return; 2444 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) 2445 if (!strcmp(ifgl->ifgl_group->ifg_group, ifgr->ifgr_group)) { 2446 if (cmd == SIOCAIFGROUP) 2447 ifgl->ifgl_group->ifg_carp_demoted++; 2448 else if (cmd == SIOCDIFGROUP && 2449 ifgl->ifgl_group->ifg_carp_demoted) 2450 ifgl->ifgl_group->ifg_carp_demoted--; 2451 } 2452 } 2453 2454 void 2455 carp_ifgattr_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr) 2456 { 2457 struct ifgroupreq *ifgr = (struct ifgroupreq *)addr; 2458 struct carp_softc *sc = ifp->if_softc; 2459 2460 if (ifgr->ifgr_attrib.ifg_carp_demoted > 0 && (sc->sc_if.if_flags & 2461 (IFF_UP|IFF_RUNNING)) == (IFF_UP|IFF_RUNNING)) 2462 carp_vhe_send_ad_all(sc); 2463 } 2464 2465 /* 2466 * Start output on carp interface. This function should never be called. 2467 */ 2468 void 2469 carp_start(struct ifnet *ifp) 2470 { 2471 #ifdef DEBUG 2472 printf("%s: start called\n", ifp->if_xname); 2473 #endif 2474 } 2475 2476 int 2477 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa, 2478 struct rtentry *rt) 2479 { 2480 struct carp_softc *sc = ((struct carp_softc *)ifp->if_softc); 2481 struct carp_vhost_entry *vhe; 2482 2483 vhe = sc->cur_vhe ? sc->cur_vhe : LIST_FIRST(&sc->carp_vhosts); 2484 2485 if (sc->sc_carpdev != NULL && 2486 (sc->sc_balancing || vhe->state == MASTER)) 2487 return (sc->sc_carpdev->if_output(ifp, m, sa, rt)); 2488 else { 2489 m_freem(m); 2490 return (ENETUNREACH); 2491 } 2492 } 2493 2494 void 2495 carp_set_state_all(struct carp_softc *sc, int state) 2496 { 2497 struct carp_vhost_entry *vhe; 2498 2499 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) 2500 carp_set_state(vhe, state); 2501 } 2502 2503 void 2504 carp_set_state(struct carp_vhost_entry *vhe, int state) 2505 { 2506 struct carp_softc *sc = vhe->parent_sc; 2507 static const char *carp_states[] = { CARP_STATES }; 2508 int loglevel; 2509 2510 if (vhe->state == state) 2511 return; 2512 if (vhe->state == INIT || state == INIT) 2513 loglevel = LOG_WARNING; 2514 else 2515 loglevel = LOG_CRIT; 2516 2517 if (sc->sc_vhe_count > 1) 2518 CARP_LOG(loglevel, sc, 2519 ("state transition (vhid %d): %s -> %s", vhe->vhid, 2520 carp_states[vhe->state], carp_states[state])); 2521 else 2522 CARP_LOG(loglevel, sc, 2523 ("state transition: %s -> %s", 2524 carp_states[vhe->state], carp_states[state])); 2525 2526 vhe->state = state; 2527 carp_update_lsmask(sc); 2528 2529 /* only the master vhe creates link state messages */ 2530 if (!vhe->vhe_leader) 2531 return; 2532 2533 switch (state) { 2534 case BACKUP: 2535 sc->sc_if.if_link_state = LINK_STATE_DOWN; 2536 break; 2537 case MASTER: 2538 sc->sc_if.if_link_state = LINK_STATE_UP; 2539 break; 2540 default: 2541 sc->sc_if.if_link_state = LINK_STATE_UNKNOWN; 2542 break; 2543 } 2544 if_link_state_change(&sc->sc_if); 2545 } 2546 2547 void 2548 carp_group_demote_adj(struct ifnet *ifp, int adj) 2549 { 2550 struct ifg_list *ifgl; 2551 int *dm; 2552 struct carp_softc *nil = NULL; 2553 2554 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) { 2555 if (!strcmp(ifgl->ifgl_group->ifg_group, IFG_ALL)) 2556 continue; 2557 dm = &ifgl->ifgl_group->ifg_carp_demoted; 2558 2559 if (*dm + adj >= 0) 2560 *dm += adj; 2561 else 2562 *dm = 0; 2563 2564 if (adj > 0 && *dm == 1) 2565 carp_send_ad_all(); 2566 CARP_LOG(LOG_INFO, nil, ("%s demoted group %s to %d", ifp->if_xname, 2567 ifgl->ifgl_group->ifg_group, *dm)); 2568 } 2569 } 2570 2571 int 2572 carp_group_demote_count(struct carp_softc *sc) 2573 { 2574 struct ifg_list *ifgl; 2575 int count = 0; 2576 2577 TAILQ_FOREACH(ifgl, &sc->sc_if.if_groups, ifgl_next) 2578 count += ifgl->ifgl_group->ifg_carp_demoted; 2579 2580 return (count > 255 ? 255 : count); 2581 } 2582 2583 void 2584 carp_carpdev_state(void *v) 2585 { 2586 struct carp_if *cif; 2587 struct carp_softc *sc; 2588 struct ifnet *ifp = v; 2589 2590 if (ifp->if_type == IFT_CARP) 2591 return; 2592 2593 cif = (struct carp_if *)ifp->if_carp; 2594 2595 TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) { 2596 int suppressed = sc->sc_suppress; 2597 2598 if (sc->sc_carpdev->if_link_state == LINK_STATE_DOWN || 2599 !(sc->sc_carpdev->if_flags & IFF_UP)) { 2600 sc->sc_if.if_flags &= ~IFF_RUNNING; 2601 carp_del_all_timeouts(sc); 2602 carp_set_state_all(sc, INIT); 2603 sc->sc_suppress = 1; 2604 carp_setrun_all(sc, 0); 2605 if (!suppressed) 2606 carp_group_demote_adj(&sc->sc_if, 1); 2607 } else { 2608 carp_set_state_all(sc, INIT); 2609 sc->sc_suppress = 0; 2610 carp_setrun_all(sc, 0); 2611 if (suppressed) 2612 carp_group_demote_adj(&sc->sc_if, -1); 2613 } 2614 } 2615 } 2616 2617 int 2618 carp_ether_addmulti(struct carp_softc *sc, struct ifreq *ifr) 2619 { 2620 struct ifnet *ifp; 2621 struct carp_mc_entry *mc; 2622 u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN]; 2623 int error; 2624 2625 ifp = sc->sc_carpdev; 2626 if (ifp == NULL) 2627 return (EINVAL); 2628 2629 error = ether_addmulti(ifr, (struct arpcom *)&sc->sc_ac); 2630 if (error != ENETRESET) 2631 return (error); 2632 2633 /* 2634 * This is new multicast address. We have to tell parent 2635 * about it. Also, remember this multicast address so that 2636 * we can delete them on unconfigure. 2637 */ 2638 mc = malloc(sizeof(struct carp_mc_entry), M_DEVBUF, M_NOWAIT); 2639 if (mc == NULL) { 2640 error = ENOMEM; 2641 goto alloc_failed; 2642 } 2643 2644 /* 2645 * As ether_addmulti() returns ENETRESET, following two 2646 * statement shouldn't fail. 2647 */ 2648 (void)ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi); 2649 ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, mc->mc_enm); 2650 memcpy(&mc->mc_addr, &ifr->ifr_addr, ifr->ifr_addr.sa_len); 2651 LIST_INSERT_HEAD(&sc->carp_mc_listhead, mc, mc_entries); 2652 2653 error = (*ifp->if_ioctl)(ifp, SIOCADDMULTI, (caddr_t)ifr); 2654 if (error != 0) 2655 goto ioctl_failed; 2656 2657 return (error); 2658 2659 ioctl_failed: 2660 LIST_REMOVE(mc, mc_entries); 2661 free(mc, M_DEVBUF); 2662 alloc_failed: 2663 (void)ether_delmulti(ifr, (struct arpcom *)&sc->sc_ac); 2664 2665 return (error); 2666 } 2667 2668 int 2669 carp_ether_delmulti(struct carp_softc *sc, struct ifreq *ifr) 2670 { 2671 struct ifnet *ifp; 2672 struct ether_multi *enm; 2673 struct carp_mc_entry *mc; 2674 u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN]; 2675 int error; 2676 2677 ifp = sc->sc_carpdev; 2678 if (ifp == NULL) 2679 return (EINVAL); 2680 2681 /* 2682 * Find a key to lookup carp_mc_entry. We have to do this 2683 * before calling ether_delmulti for obvious reason. 2684 */ 2685 if ((error = ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi)) != 0) 2686 return (error); 2687 ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, enm); 2688 if (enm == NULL) 2689 return (EINVAL); 2690 2691 LIST_FOREACH(mc, &sc->carp_mc_listhead, mc_entries) 2692 if (mc->mc_enm == enm) 2693 break; 2694 2695 /* We won't delete entries we didn't add */ 2696 if (mc == NULL) 2697 return (EINVAL); 2698 2699 error = ether_delmulti(ifr, (struct arpcom *)&sc->sc_ac); 2700 if (error != ENETRESET) 2701 return (error); 2702 2703 /* We no longer use this multicast address. Tell parent so. */ 2704 error = (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)ifr); 2705 if (error == 0) { 2706 /* And forget about this address. */ 2707 LIST_REMOVE(mc, mc_entries); 2708 free(mc, M_DEVBUF); 2709 } else 2710 (void)ether_addmulti(ifr, (struct arpcom *)&sc->sc_ac); 2711 return (error); 2712 } 2713 2714 /* 2715 * Delete any multicast address we have asked to add from parent 2716 * interface. Called when the carp is being unconfigured. 2717 */ 2718 void 2719 carp_ether_purgemulti(struct carp_softc *sc) 2720 { 2721 struct ifnet *ifp = sc->sc_carpdev; /* Parent. */ 2722 struct carp_mc_entry *mc; 2723 union { 2724 struct ifreq ifreq; 2725 struct { 2726 char ifr_name[IFNAMSIZ]; 2727 struct sockaddr_storage ifr_ss; 2728 } ifreq_storage; 2729 } u; 2730 struct ifreq *ifr = &u.ifreq; 2731 2732 if (ifp == NULL) 2733 return; 2734 2735 memcpy(ifr->ifr_name, ifp->if_xname, IFNAMSIZ); 2736 while ((mc = LIST_FIRST(&sc->carp_mc_listhead)) != NULL) { 2737 memcpy(&ifr->ifr_addr, &mc->mc_addr, mc->mc_addr.ss_len); 2738 (void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)ifr); 2739 LIST_REMOVE(mc, mc_entries); 2740 free(mc, M_DEVBUF); 2741 } 2742 } 2743