1 /* $OpenBSD: ip_carp.c,v 1.190 2011/09/06 16:00:22 mpf Exp $ */ 2 3 /* 4 * Copyright (c) 2002 Michael Shalayeff. All rights reserved. 5 * Copyright (c) 2003 Ryan McBride. All rights reserved. 6 * Copyright (c) 2006-2008 Marco Pfatschbacher. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 21 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 25 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 26 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 27 * THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 /* 31 * TODO: 32 * - iface reconfigure 33 * - support for hardware checksum calculations; 34 * 35 */ 36 37 #include "ether.h" 38 39 #include <sys/param.h> 40 #include <sys/proc.h> 41 #include <sys/systm.h> 42 #include <sys/mbuf.h> 43 #include <sys/socket.h> 44 #include <sys/socketvar.h> 45 #include <sys/ioctl.h> 46 #include <sys/errno.h> 47 #include <sys/device.h> 48 #include <sys/kernel.h> 49 #include <sys/sysctl.h> 50 #include <sys/syslog.h> 51 52 #include <machine/cpu.h> 53 54 #include <net/if.h> 55 #include <net/if_types.h> 56 #include <net/if_llc.h> 57 #include <net/route.h> 58 #include <net/netisr.h> 59 60 /* for arc4random() */ 61 #include <dev/rndvar.h> 62 63 #if NFDDI > 0 64 #include <net/if_fddi.h> 65 #endif 66 67 #include <crypto/sha1.h> 68 69 #ifdef INET 70 #include <netinet/in.h> 71 #include <netinet/in_systm.h> 72 #include <netinet/in_var.h> 73 #include <netinet/ip.h> 74 #include <netinet/ip_var.h> 75 #include <netinet/if_ether.h> 76 #include <netinet/ip_ipsp.h> 77 78 #include <net/if_enc.h> 79 #include <net/if_dl.h> 80 #endif 81 82 #ifdef INET6 83 #include <netinet/icmp6.h> 84 #include <netinet/ip6.h> 85 #include <netinet6/ip6_var.h> 86 #include <netinet6/nd6.h> 87 #include <netinet6/in6_ifattach.h> 88 #endif 89 90 #include "bpfilter.h" 91 #if NBPFILTER > 0 92 #include <net/bpf.h> 93 #endif 94 95 #include <netinet/ip_carp.h> 96 97 struct carp_mc_entry { 98 LIST_ENTRY(carp_mc_entry) mc_entries; 99 union { 100 struct ether_multi *mcu_enm; 101 } mc_u; 102 struct sockaddr_storage mc_addr; 103 }; 104 #define mc_enm mc_u.mcu_enm 105 106 enum { HMAC_ORIG=0, HMAC_NOV6LL=1, HMAC_MAX=2 }; 107 108 struct carp_vhost_entry { 109 LIST_ENTRY(carp_vhost_entry) vhost_entries; 110 struct carp_softc *parent_sc; 111 int vhe_leader; 112 int vhid; 113 int advskew; 114 enum { INIT = 0, BACKUP, MASTER } state; 115 struct timeout ad_tmo; /* advertisement timeout */ 116 struct timeout md_tmo; /* master down timeout */ 117 struct timeout md6_tmo; /* master down timeout */ 118 119 u_int64_t vhe_replay_cookie; 120 121 /* authentication */ 122 #define CARP_HMAC_PAD 64 123 unsigned char vhe_pad[CARP_HMAC_PAD]; 124 SHA1_CTX vhe_sha1[HMAC_MAX]; 125 126 u_int8_t vhe_enaddr[ETHER_ADDR_LEN]; 127 struct sockaddr_dl vhe_sdl; /* for IPv6 ndp balancing */ 128 }; 129 130 struct carp_softc { 131 struct arpcom sc_ac; 132 #define sc_if sc_ac.ac_if 133 #define sc_carpdev sc_ac.ac_if.if_carpdev 134 void *ah_cookie; 135 void *lh_cookie; 136 struct ip_moptions sc_imo; 137 #ifdef INET6 138 struct ip6_moptions sc_im6o; 139 #endif /* INET6 */ 140 TAILQ_ENTRY(carp_softc) sc_list; 141 142 int sc_suppress; 143 int sc_bow_out; 144 int sc_demote_cnt; 145 146 int sc_sendad_errors; 147 #define CARP_SENDAD_MAX_ERRORS(sc) (3 * (sc)->sc_vhe_count) 148 int sc_sendad_success; 149 #define CARP_SENDAD_MIN_SUCCESS(sc) (3 * (sc)->sc_vhe_count) 150 151 char sc_curlladdr[ETHER_ADDR_LEN]; 152 153 LIST_HEAD(__carp_vhosthead, carp_vhost_entry) carp_vhosts; 154 int sc_vhe_count; 155 u_int8_t sc_vhids[CARP_MAXNODES]; 156 u_int8_t sc_advskews[CARP_MAXNODES]; 157 u_int8_t sc_balancing; 158 159 int sc_naddrs; 160 int sc_naddrs6; 161 int sc_advbase; /* seconds */ 162 163 /* authentication */ 164 unsigned char sc_key[CARP_KEY_LEN]; 165 166 u_int32_t sc_hashkey[2]; 167 u_int32_t sc_lsmask; /* load sharing mask */ 168 int sc_lscount; /* # load sharing interfaces (max 32) */ 169 int sc_delayed_arp; /* delayed ARP request countdown */ 170 171 struct in_addr sc_peer; 172 173 LIST_HEAD(__carp_mchead, carp_mc_entry) carp_mc_listhead; 174 struct carp_vhost_entry *cur_vhe; /* current active vhe */ 175 }; 176 177 int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, LOG_CRIT }; /* XXX for now */ 178 struct carpstats carpstats; 179 180 struct carp_if { 181 TAILQ_HEAD(, carp_softc) vhif_vrs; 182 int vhif_nvrs; 183 184 struct ifnet *vhif_ifp; 185 }; 186 187 #define CARP_LOG(l, sc, s) \ 188 do { \ 189 if (carp_opts[CARPCTL_LOG] >= l) { \ 190 if (sc) \ 191 log(l, "%s: ", \ 192 (sc)->sc_if.if_xname); \ 193 else \ 194 log(l, "carp: "); \ 195 addlog s; \ 196 addlog("\n"); \ 197 } \ 198 } while (0) 199 200 void carp_hmac_prepare(struct carp_softc *); 201 void carp_hmac_prepare_ctx(struct carp_vhost_entry *, u_int8_t); 202 void carp_hmac_generate(struct carp_vhost_entry *, u_int32_t *, 203 unsigned char *, u_int8_t); 204 int carp_hmac_verify(struct carp_vhost_entry *, u_int32_t *, 205 unsigned char *); 206 void carp_setroute(struct carp_softc *, int); 207 void carp_proto_input_c(struct mbuf *, struct carp_header *, int, 208 sa_family_t); 209 void carpattach(int); 210 void carpdetach(struct carp_softc *); 211 int carp_prepare_ad(struct mbuf *, struct carp_vhost_entry *, 212 struct carp_header *); 213 void carp_send_ad_all(void); 214 void carp_vhe_send_ad_all(struct carp_softc *); 215 void carp_send_ad(void *); 216 void carp_send_arp(struct carp_softc *); 217 void carp_master_down(void *); 218 int carp_ioctl(struct ifnet *, u_long, caddr_t); 219 int carp_vhids_ioctl(struct carp_softc *, struct carpreq *); 220 int carp_check_dup_vhids(struct carp_softc *, struct carp_if *, 221 struct carpreq *); 222 void carp_ifgroup_ioctl(struct ifnet *, u_long, caddr_t); 223 void carp_ifgattr_ioctl(struct ifnet *, u_long, caddr_t); 224 void carp_start(struct ifnet *); 225 void carp_setrun_all(struct carp_softc *, sa_family_t); 226 void carp_setrun(struct carp_vhost_entry *, sa_family_t); 227 void carp_set_state_all(struct carp_softc *, int); 228 void carp_set_state(struct carp_vhost_entry *, int); 229 void carp_multicast_cleanup(struct carp_softc *); 230 int carp_set_ifp(struct carp_softc *, struct ifnet *); 231 void carp_set_enaddr(struct carp_softc *); 232 void carp_set_vhe_enaddr(struct carp_vhost_entry *); 233 void carp_addr_updated(void *); 234 u_int32_t carp_hash(struct carp_softc *, u_char *); 235 int carp_set_addr(struct carp_softc *, struct sockaddr_in *); 236 int carp_join_multicast(struct carp_softc *); 237 #ifdef INET6 238 void carp_send_na(struct carp_softc *); 239 int carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *); 240 int carp_join_multicast6(struct carp_softc *); 241 #endif 242 int carp_clone_create(struct if_clone *, int); 243 int carp_clone_destroy(struct ifnet *); 244 int carp_ether_addmulti(struct carp_softc *, struct ifreq *); 245 int carp_ether_delmulti(struct carp_softc *, struct ifreq *); 246 void carp_ether_purgemulti(struct carp_softc *); 247 int carp_group_demote_count(struct carp_softc *); 248 void carp_update_lsmask(struct carp_softc *); 249 int carp_new_vhost(struct carp_softc *, int, int); 250 void carp_destroy_vhosts(struct carp_softc *); 251 void carp_del_all_timeouts(struct carp_softc *); 252 253 struct if_clone carp_cloner = 254 IF_CLONE_INITIALIZER("carp", carp_clone_create, carp_clone_destroy); 255 256 #define carp_cksum(_m, _l) ((u_int16_t)in_cksum((_m), (_l))) 257 #define CARP_IFQ_PRIO 6 258 259 void 260 carp_hmac_prepare(struct carp_softc *sc) 261 { 262 struct carp_vhost_entry *vhe; 263 u_int8_t i; 264 265 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) { 266 for (i = 0; i < HMAC_MAX; i++) { 267 carp_hmac_prepare_ctx(vhe, i); 268 } 269 } 270 } 271 272 void 273 carp_hmac_prepare_ctx(struct carp_vhost_entry *vhe, u_int8_t ctx) 274 { 275 struct carp_softc *sc = vhe->parent_sc; 276 277 u_int8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT; 278 u_int8_t vhid = vhe->vhid & 0xff; 279 SHA1_CTX sha1ctx; 280 u_int32_t kmd[5]; 281 struct ifaddr *ifa; 282 int i, found; 283 struct in_addr last, cur, in; 284 #ifdef INET6 285 struct in6_addr last6, cur6, in6; 286 #endif /* INET6 */ 287 288 /* compute ipad from key */ 289 bzero(vhe->vhe_pad, sizeof(vhe->vhe_pad)); 290 bcopy(sc->sc_key, vhe->vhe_pad, sizeof(sc->sc_key)); 291 for (i = 0; i < sizeof(vhe->vhe_pad); i++) 292 vhe->vhe_pad[i] ^= 0x36; 293 294 /* precompute first part of inner hash */ 295 SHA1Init(&vhe->vhe_sha1[ctx]); 296 SHA1Update(&vhe->vhe_sha1[ctx], vhe->vhe_pad, sizeof(vhe->vhe_pad)); 297 SHA1Update(&vhe->vhe_sha1[ctx], (void *)&version, sizeof(version)); 298 SHA1Update(&vhe->vhe_sha1[ctx], (void *)&type, sizeof(type)); 299 300 /* generate a key for the arpbalance hash, before the vhid is hashed */ 301 if (vhe->vhe_leader) { 302 bcopy(&vhe->vhe_sha1[ctx], &sha1ctx, sizeof(sha1ctx)); 303 SHA1Final((unsigned char *)kmd, &sha1ctx); 304 sc->sc_hashkey[0] = kmd[0] ^ kmd[1]; 305 sc->sc_hashkey[1] = kmd[2] ^ kmd[3]; 306 } 307 308 /* the rest of the precomputation */ 309 if (vhe->vhe_leader && bcmp(sc->sc_ac.ac_enaddr, vhe->vhe_enaddr, 310 ETHER_ADDR_LEN) != 0) 311 SHA1Update(&vhe->vhe_sha1[ctx], sc->sc_ac.ac_enaddr, 312 ETHER_ADDR_LEN); 313 314 SHA1Update(&vhe->vhe_sha1[ctx], (void *)&vhid, sizeof(vhid)); 315 316 /* Hash the addresses from smallest to largest, not interface order */ 317 #ifdef INET 318 cur.s_addr = 0; 319 do { 320 found = 0; 321 last = cur; 322 cur.s_addr = 0xffffffff; 323 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 324 if (ifa->ifa_addr->sa_family != AF_INET) 325 continue; 326 in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr; 327 if (ntohl(in.s_addr) > ntohl(last.s_addr) && 328 ntohl(in.s_addr) < ntohl(cur.s_addr)) { 329 cur.s_addr = in.s_addr; 330 found++; 331 } 332 } 333 if (found) 334 SHA1Update(&vhe->vhe_sha1[ctx], 335 (void *)&cur, sizeof(cur)); 336 } while (found); 337 #endif /* INET */ 338 #ifdef INET6 339 memset(&cur6, 0x00, sizeof(cur6)); 340 do { 341 found = 0; 342 last6 = cur6; 343 memset(&cur6, 0xff, sizeof(cur6)); 344 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 345 if (ifa->ifa_addr->sa_family != AF_INET6) 346 continue; 347 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 348 if (IN6_IS_SCOPE_EMBED(&in6)) { 349 if (ctx == HMAC_NOV6LL) 350 continue; 351 in6.s6_addr16[1] = 0; 352 } 353 if (memcmp(&in6, &last6, sizeof(in6)) > 0 && 354 memcmp(&in6, &cur6, sizeof(in6)) < 0) { 355 cur6 = in6; 356 found++; 357 } 358 } 359 if (found) 360 SHA1Update(&vhe->vhe_sha1[ctx], 361 (void *)&cur6, sizeof(cur6)); 362 } while (found); 363 #endif /* INET6 */ 364 365 /* convert ipad to opad */ 366 for (i = 0; i < sizeof(vhe->vhe_pad); i++) 367 vhe->vhe_pad[i] ^= 0x36 ^ 0x5c; 368 } 369 370 void 371 carp_hmac_generate(struct carp_vhost_entry *vhe, u_int32_t counter[2], 372 unsigned char md[20], u_int8_t ctx) 373 { 374 SHA1_CTX sha1ctx; 375 376 /* fetch first half of inner hash */ 377 bcopy(&vhe->vhe_sha1[ctx], &sha1ctx, sizeof(sha1ctx)); 378 379 SHA1Update(&sha1ctx, (void *)counter, sizeof(vhe->vhe_replay_cookie)); 380 SHA1Final(md, &sha1ctx); 381 382 /* outer hash */ 383 SHA1Init(&sha1ctx); 384 SHA1Update(&sha1ctx, vhe->vhe_pad, sizeof(vhe->vhe_pad)); 385 SHA1Update(&sha1ctx, md, 20); 386 SHA1Final(md, &sha1ctx); 387 } 388 389 int 390 carp_hmac_verify(struct carp_vhost_entry *vhe, u_int32_t counter[2], 391 unsigned char md[20]) 392 { 393 unsigned char md2[20]; 394 u_int8_t i; 395 396 for (i = 0; i < HMAC_MAX; i++) { 397 carp_hmac_generate(vhe, counter, md2, i); 398 if (!timingsafe_bcmp(md, md2, sizeof(md2))) 399 return (0); 400 } 401 return (1); 402 } 403 404 void 405 carp_setroute(struct carp_softc *sc, int cmd) 406 { 407 struct ifaddr *ifa; 408 int s; 409 410 /* XXX this mess needs fixing */ 411 412 s = splsoftnet(); 413 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 414 switch (ifa->ifa_addr->sa_family) { 415 case AF_INET: { 416 int error; 417 struct sockaddr sa; 418 struct rtentry *rt; 419 struct radix_node_head *rnh; 420 struct radix_node *rn; 421 struct rt_addrinfo info; 422 int hr_otherif, nr_ourif; 423 struct sockaddr_rtlabel sa_rl; 424 const char *label; 425 426 /* Remove the existing host route, if any */ 427 bzero(&info, sizeof(info)); 428 info.rti_info[RTAX_DST] = ifa->ifa_addr; 429 info.rti_flags = RTF_HOST; 430 error = rtrequest1(RTM_DELETE, &info, RTP_CONNECTED, 431 NULL, sc->sc_if.if_rdomain); 432 rt_missmsg(RTM_DELETE, &info, info.rti_flags, NULL, 433 error, sc->sc_if.if_rdomain); 434 435 /* Check for our address on another interface */ 436 /* XXX cries for proper API */ 437 rnh = rt_gettable(ifa->ifa_addr->sa_family, 0); 438 rn = rnh->rnh_matchaddr(ifa->ifa_addr, rnh); 439 rt = (struct rtentry *)rn; 440 hr_otherif = (rt && rt->rt_ifp != &sc->sc_if && 441 rt->rt_flags & (RTF_CLONING|RTF_CLONED)); 442 443 /* Check for a network route on our interface */ 444 bcopy(ifa->ifa_addr, &sa, sizeof(sa)); 445 satosin(&sa)->sin_addr.s_addr = satosin(ifa->ifa_netmask 446 )->sin_addr.s_addr & satosin(&sa)->sin_addr.s_addr; 447 rt = (struct rtentry *)rt_lookup(&sa, 448 ifa->ifa_netmask, sc->sc_if.if_rdomain); 449 nr_ourif = (rt && rt->rt_ifp == &sc->sc_if); 450 451 /* Restore the route label */ 452 bzero(&sa_rl, sizeof(sa_rl)); 453 if (rt && rt->rt_labelid) { 454 sa_rl.sr_len = sizeof(sa_rl); 455 sa_rl.sr_family = AF_UNSPEC; 456 label = rtlabel_id2name(rt->rt_labelid); 457 if (label != NULL) 458 strlcpy(sa_rl.sr_label, label, 459 sizeof(sa_rl.sr_label)); 460 } 461 462 switch (cmd) { 463 case RTM_ADD: 464 if (hr_otherif) { 465 ifa->ifa_rtrequest = NULL; 466 ifa->ifa_flags &= ~RTF_CLONING; 467 bzero(&info, sizeof(info)); 468 info.rti_info[RTAX_DST] = ifa->ifa_addr; 469 info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; 470 info.rti_flags = RTF_UP | RTF_HOST; 471 error = rtrequest1(RTM_ADD, &info, 472 RTP_CONNECTED, NULL, 473 sc->sc_if.if_rdomain); 474 rt_missmsg(RTM_ADD, &info, 475 info.rti_flags, &sc->sc_if, 476 error, sc->sc_if.if_rdomain); 477 } 478 if (!hr_otherif || nr_ourif || !rt) { 479 if (nr_ourif && !(rt->rt_flags & 480 RTF_CLONING)) { 481 bzero(&info, sizeof(info)); 482 info.rti_info[RTAX_DST] = &sa; 483 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 484 error = rtrequest1(RTM_DELETE, 485 &info, RTP_CONNECTED, NULL, 486 sc->sc_if.if_rdomain); 487 rt_missmsg(RTM_DELETE, &info, info.rti_flags, NULL, 488 error, sc->sc_if.if_rdomain); 489 } 490 491 ifa->ifa_rtrequest = arp_rtrequest; 492 ifa->ifa_flags |= RTF_CLONING; 493 494 bzero(&info, sizeof(info)); 495 info.rti_info[RTAX_DST] = &sa; 496 info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; 497 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 498 info.rti_info[RTAX_LABEL] = 499 (struct sockaddr *)&sa_rl; 500 error = rtrequest1(RTM_ADD, &info, 501 RTP_CONNECTED, NULL, 502 sc->sc_if.if_rdomain); 503 if (error == 0) 504 ifa->ifa_flags |= IFA_ROUTE; 505 rt_missmsg(RTM_ADD, &info, info.rti_flags, 506 &sc->sc_if, error, sc->sc_if.if_rdomain); 507 } 508 break; 509 case RTM_DELETE: 510 break; 511 default: 512 break; 513 } 514 break; 515 } 516 517 #ifdef INET6 518 case AF_INET6: 519 if (sc->sc_balancing >= CARP_BAL_IP) 520 continue; 521 if (cmd == RTM_ADD) 522 in6_ifaddloop(ifa); 523 else 524 in6_ifremloop(ifa); 525 break; 526 #endif /* INET6 */ 527 default: 528 break; 529 } 530 } 531 splx(s); 532 } 533 534 /* 535 * process input packet. 536 * we have rearranged checks order compared to the rfc, 537 * but it seems more efficient this way or not possible otherwise. 538 */ 539 void 540 carp_proto_input(struct mbuf *m, ...) 541 { 542 struct ip *ip = mtod(m, struct ip *); 543 struct ifnet *ifp = m->m_pkthdr.rcvif; 544 struct carp_softc *sc = NULL; 545 struct carp_header *ch; 546 int iplen, len, hlen, ismulti; 547 va_list ap; 548 549 va_start(ap, m); 550 hlen = va_arg(ap, int); 551 va_end(ap); 552 553 carpstats.carps_ipackets++; 554 555 if (!carp_opts[CARPCTL_ALLOW]) { 556 m_freem(m); 557 return; 558 } 559 560 ismulti = IN_MULTICAST(ip->ip_dst.s_addr); 561 562 /* check if received on a valid carp interface */ 563 if (!((ifp->if_type == IFT_CARP && ismulti) || 564 (ifp->if_type != IFT_CARP && !ismulti && ifp->if_carp != NULL))) { 565 carpstats.carps_badif++; 566 CARP_LOG(LOG_INFO, sc, ("packet received on non-carp interface: %s", 567 m->m_pkthdr.rcvif->if_xname)); 568 m_freem(m); 569 return; 570 } 571 572 /* verify that the IP TTL is 255. */ 573 if (ip->ip_ttl != CARP_DFLTTL) { 574 carpstats.carps_badttl++; 575 CARP_LOG(LOG_NOTICE, sc, ("received ttl %d != %d on %s", ip->ip_ttl, 576 CARP_DFLTTL, m->m_pkthdr.rcvif->if_xname)); 577 m_freem(m); 578 return; 579 } 580 581 /* 582 * verify that the received packet length is 583 * equal to the CARP header 584 */ 585 iplen = ip->ip_hl << 2; 586 len = iplen + sizeof(*ch); 587 if (len > m->m_pkthdr.len) { 588 carpstats.carps_badlen++; 589 CARP_LOG(LOG_INFO, sc, ("packet too short %d on %s", m->m_pkthdr.len, 590 m->m_pkthdr.rcvif->if_xname)); 591 m_freem(m); 592 return; 593 } 594 595 if ((m = m_pullup(m, len)) == NULL) { 596 carpstats.carps_hdrops++; 597 return; 598 } 599 ip = mtod(m, struct ip *); 600 ch = (struct carp_header *)(mtod(m, caddr_t) + iplen); 601 602 /* verify the CARP checksum */ 603 m->m_data += iplen; 604 if (carp_cksum(m, len - iplen)) { 605 carpstats.carps_badsum++; 606 CARP_LOG(LOG_INFO, sc, ("checksum failed on %s", 607 m->m_pkthdr.rcvif->if_xname)); 608 m_freem(m); 609 return; 610 } 611 m->m_data -= iplen; 612 613 carp_proto_input_c(m, ch, ismulti, AF_INET); 614 } 615 616 #ifdef INET6 617 int 618 carp6_proto_input(struct mbuf **mp, int *offp, int proto) 619 { 620 struct mbuf *m = *mp; 621 struct carp_softc *sc = NULL; 622 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 623 struct carp_header *ch; 624 u_int len; 625 626 carpstats.carps_ipackets6++; 627 628 if (!carp_opts[CARPCTL_ALLOW]) { 629 m_freem(m); 630 return (IPPROTO_DONE); 631 } 632 633 /* check if received on a valid carp interface */ 634 if (m->m_pkthdr.rcvif->if_type != IFT_CARP) { 635 carpstats.carps_badif++; 636 CARP_LOG(LOG_INFO, sc, ("packet received on non-carp interface: %s", 637 m->m_pkthdr.rcvif->if_xname)); 638 m_freem(m); 639 return (IPPROTO_DONE); 640 } 641 642 /* verify that the IP TTL is 255 */ 643 if (ip6->ip6_hlim != CARP_DFLTTL) { 644 carpstats.carps_badttl++; 645 CARP_LOG(LOG_NOTICE, sc, ("received ttl %d != %d on %s", ip6->ip6_hlim, 646 CARP_DFLTTL, m->m_pkthdr.rcvif->if_xname)); 647 m_freem(m); 648 return (IPPROTO_DONE); 649 } 650 651 /* verify that we have a complete carp packet */ 652 len = m->m_len; 653 IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch)); 654 if (ch == NULL) { 655 carpstats.carps_badlen++; 656 CARP_LOG(LOG_INFO, sc, ("packet size %u too small", len)); 657 return (IPPROTO_DONE); 658 } 659 660 661 /* verify the CARP checksum */ 662 m->m_data += *offp; 663 if (carp_cksum(m, sizeof(*ch))) { 664 carpstats.carps_badsum++; 665 CARP_LOG(LOG_INFO, sc, ("checksum failed, on %s", 666 m->m_pkthdr.rcvif->if_xname)); 667 m_freem(m); 668 return (IPPROTO_DONE); 669 } 670 m->m_data -= *offp; 671 672 carp_proto_input_c(m, ch, 1, AF_INET6); 673 return (IPPROTO_DONE); 674 } 675 #endif /* INET6 */ 676 677 void 678 carp_proto_input_c(struct mbuf *m, struct carp_header *ch, int ismulti, 679 sa_family_t af) 680 { 681 struct ifnet *ifp = m->m_pkthdr.rcvif; 682 struct carp_softc *sc; 683 struct carp_vhost_entry *vhe; 684 struct timeval sc_tv, ch_tv; 685 struct carp_if *cif; 686 687 if (ifp->if_type == IFT_CARP) 688 cif = (struct carp_if *)ifp->if_carpdev->if_carp; 689 else 690 cif = (struct carp_if *)ifp->if_carp; 691 692 TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) { 693 if (af == AF_INET && 694 ismulti != IN_MULTICAST(sc->sc_peer.s_addr)) 695 continue; 696 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) { 697 if (vhe->vhid == ch->carp_vhid) 698 goto found; 699 } 700 } 701 found: 702 703 if (!sc || (sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) != 704 (IFF_UP|IFF_RUNNING)) { 705 carpstats.carps_badvhid++; 706 m_freem(m); 707 return; 708 } 709 710 getmicrotime(&sc->sc_if.if_lastchange); 711 sc->sc_if.if_ipackets++; 712 sc->sc_if.if_ibytes += m->m_pkthdr.len; 713 714 /* verify the CARP version. */ 715 if (ch->carp_version != CARP_VERSION) { 716 carpstats.carps_badver++; 717 sc->sc_if.if_ierrors++; 718 CARP_LOG(LOG_NOTICE, sc, ("invalid version %d != %d", 719 ch->carp_version, CARP_VERSION)); 720 m_freem(m); 721 return; 722 } 723 724 /* verify the hash */ 725 if (carp_hmac_verify(vhe, ch->carp_counter, ch->carp_md)) { 726 carpstats.carps_badauth++; 727 sc->sc_if.if_ierrors++; 728 CARP_LOG(LOG_INFO, sc, ("incorrect hash")); 729 m_freem(m); 730 return; 731 } 732 733 if (!bcmp(&vhe->vhe_replay_cookie, ch->carp_counter, 734 sizeof(ch->carp_counter))) { 735 /* Do not log duplicates from non simplex interfaces */ 736 if (sc->sc_carpdev->if_flags & IFF_SIMPLEX) { 737 carpstats.carps_badauth++; 738 sc->sc_if.if_ierrors++; 739 CARP_LOG(LOG_WARNING, sc, 740 ("replay or network loop detected")); 741 } 742 m_freem(m); 743 return; 744 } 745 746 sc_tv.tv_sec = sc->sc_advbase; 747 sc_tv.tv_usec = vhe->advskew * 1000000 / 256; 748 ch_tv.tv_sec = ch->carp_advbase; 749 ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256; 750 751 switch (vhe->state) { 752 case INIT: 753 break; 754 case MASTER: 755 /* 756 * If we receive an advertisement from a master who's going to 757 * be more frequent than us, and whose demote count is not higher 758 * than ours, go into BACKUP state. If his demote count is lower, 759 * also go into BACKUP. 760 */ 761 if (((timercmp(&sc_tv, &ch_tv, >) || 762 timercmp(&sc_tv, &ch_tv, ==)) && 763 (ch->carp_demote <= carp_group_demote_count(sc))) || 764 ch->carp_demote < carp_group_demote_count(sc)) { 765 timeout_del(&vhe->ad_tmo); 766 carp_set_state(vhe, BACKUP); 767 carp_setrun(vhe, 0); 768 if (vhe->vhe_leader) 769 carp_setroute(sc, RTM_DELETE); 770 } 771 break; 772 case BACKUP: 773 /* 774 * If we're pre-empting masters who advertise slower than us, 775 * and do not have a better demote count, treat them as down. 776 * 777 */ 778 if (carp_opts[CARPCTL_PREEMPT] && 779 timercmp(&sc_tv, &ch_tv, <) && 780 ch->carp_demote >= carp_group_demote_count(sc)) { 781 carp_master_down(vhe); 782 break; 783 } 784 785 /* 786 * Take over masters advertising with a higher demote count, 787 * regardless of CARPCTL_PREEMPT. 788 */ 789 if (ch->carp_demote > carp_group_demote_count(sc)) { 790 carp_master_down(vhe); 791 break; 792 } 793 794 /* 795 * If the master is going to advertise at such a low frequency 796 * that he's guaranteed to time out, we'd might as well just 797 * treat him as timed out now. 798 */ 799 sc_tv.tv_sec = sc->sc_advbase * 3; 800 if (sc->sc_advbase && timercmp(&sc_tv, &ch_tv, <)) { 801 carp_master_down(vhe); 802 break; 803 } 804 805 /* 806 * Otherwise, we reset the counter and wait for the next 807 * advertisement. 808 */ 809 carp_setrun(vhe, af); 810 break; 811 } 812 813 m_freem(m); 814 return; 815 } 816 817 int 818 carp_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, 819 size_t newlen) 820 { 821 /* All sysctl names at this level are terminal. */ 822 if (namelen != 1) 823 return (ENOTDIR); 824 825 switch (name[0]) { 826 case CARPCTL_STATS: 827 if (newp != NULL) 828 return (EPERM); 829 return (sysctl_struct(oldp, oldlenp, newp, newlen, 830 &carpstats, sizeof(carpstats))); 831 default: 832 if (name[0] <= 0 || name[0] >= CARPCTL_MAXID) 833 return (ENOPROTOOPT); 834 return sysctl_int(oldp, oldlenp, newp, newlen, 835 &carp_opts[name[0]]); 836 } 837 } 838 839 /* 840 * Interface side of the CARP implementation. 841 */ 842 843 /* ARGSUSED */ 844 void 845 carpattach(int n) 846 { 847 struct ifg_group *ifg; 848 849 if ((ifg = if_creategroup("carp")) != NULL) 850 ifg->ifg_refcnt++; /* keep around even if empty */ 851 if_clone_attach(&carp_cloner); 852 } 853 854 int 855 carp_clone_create(ifc, unit) 856 struct if_clone *ifc; 857 int unit; 858 { 859 struct carp_softc *sc; 860 struct ifnet *ifp; 861 862 sc = malloc(sizeof(*sc), M_DEVBUF, M_NOWAIT|M_ZERO); 863 if (!sc) 864 return (ENOMEM); 865 866 LIST_INIT(&sc->carp_vhosts); 867 sc->sc_vhe_count = 0; 868 if (carp_new_vhost(sc, 0, 0)) { 869 free(sc, M_DEVBUF); 870 return (ENOMEM); 871 } 872 873 sc->sc_suppress = 0; 874 sc->sc_advbase = CARP_DFLTINTV; 875 sc->sc_naddrs = sc->sc_naddrs6 = 0; 876 #ifdef INET6 877 sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL; 878 #endif /* INET6 */ 879 sc->sc_imo.imo_membership = (struct in_multi **)malloc( 880 (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_IPMOPTS, 881 M_WAITOK|M_ZERO); 882 sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS; 883 884 LIST_INIT(&sc->carp_mc_listhead); 885 ifp = &sc->sc_if; 886 ifp->if_softc = sc; 887 snprintf(ifp->if_xname, sizeof ifp->if_xname, "%s%d", ifc->ifc_name, 888 unit); 889 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 890 ifp->if_ioctl = carp_ioctl; 891 ifp->if_start = carp_start; 892 ifp->if_output = carp_output; 893 ifp->if_type = IFT_CARP; 894 ifp->if_addrlen = ETHER_ADDR_LEN; 895 ifp->if_hdrlen = ETHER_HDR_LEN; 896 ifp->if_mtu = ETHERMTU; 897 IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); 898 IFQ_SET_READY(&ifp->if_snd); 899 if_attach(ifp); 900 901 if_alloc_sadl(ifp); 902 LIST_INIT(&sc->sc_ac.ac_multiaddrs); 903 #if NBPFILTER > 0 904 bpfattach(&ifp->if_bpf, ifp, DLT_EN10MB, ETHER_HDR_LEN); 905 #endif 906 907 /* Hook carp_addr_updated to cope with address and route changes. */ 908 sc->ah_cookie = hook_establish(sc->sc_if.if_addrhooks, 0, 909 carp_addr_updated, sc); 910 carp_set_state_all(sc, INIT); 911 912 return (0); 913 } 914 915 int 916 carp_new_vhost(struct carp_softc *sc, int vhid, int advskew) 917 { 918 struct carp_vhost_entry *vhe, *vhe0; 919 920 vhe = malloc(sizeof(*vhe), M_DEVBUF, M_NOWAIT | M_ZERO); 921 if (vhe == NULL) 922 return (ENOMEM); 923 924 vhe->parent_sc = sc; 925 vhe->vhid = vhid; 926 vhe->advskew = advskew; 927 timeout_set(&vhe->ad_tmo, carp_send_ad, vhe); 928 timeout_set(&vhe->md_tmo, carp_master_down, vhe); 929 timeout_set(&vhe->md6_tmo, carp_master_down, vhe); 930 931 /* mark the first vhe as leader */ 932 if (LIST_EMPTY(&sc->carp_vhosts)) { 933 vhe->vhe_leader = 1; 934 LIST_INSERT_HEAD(&sc->carp_vhosts, vhe, vhost_entries); 935 sc->sc_vhe_count = 1; 936 return (0); 937 } 938 939 LIST_FOREACH(vhe0, &sc->carp_vhosts, vhost_entries) 940 if (LIST_NEXT(vhe0, vhost_entries) == NULL) 941 break; 942 LIST_INSERT_AFTER(vhe0, vhe, vhost_entries); 943 sc->sc_vhe_count++; 944 945 return (0); 946 } 947 948 int 949 carp_clone_destroy(struct ifnet *ifp) 950 { 951 struct carp_softc *sc = ifp->if_softc; 952 953 carpdetach(sc); 954 ether_ifdetach(ifp); 955 if_detach(ifp); 956 carp_destroy_vhosts(ifp->if_softc); 957 free(sc->sc_imo.imo_membership, M_IPMOPTS); 958 free(sc, M_DEVBUF); 959 960 return (0); 961 } 962 963 void 964 carp_del_all_timeouts(struct carp_softc *sc) 965 { 966 struct carp_vhost_entry *vhe; 967 968 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) { 969 timeout_del(&vhe->ad_tmo); 970 timeout_del(&vhe->md_tmo); 971 timeout_del(&vhe->md6_tmo); 972 } 973 } 974 975 void 976 carpdetach(struct carp_softc *sc) 977 { 978 struct carp_if *cif; 979 int s; 980 981 carp_del_all_timeouts(sc); 982 983 if (sc->sc_demote_cnt) 984 carp_group_demote_adj(&sc->sc_if, sc->sc_demote_cnt, "detach"); 985 sc->sc_suppress = 0; 986 sc->sc_sendad_errors = 0; 987 988 carp_set_state_all(sc, INIT); 989 sc->sc_if.if_flags &= ~IFF_UP; 990 carp_setrun_all(sc, 0); 991 carp_multicast_cleanup(sc); 992 993 s = splnet(); 994 if (sc->ah_cookie != NULL) 995 hook_disestablish(sc->sc_if.if_addrhooks, sc->ah_cookie); 996 if (sc->sc_carpdev != NULL) { 997 if (sc->lh_cookie != NULL) 998 hook_disestablish(sc->sc_carpdev->if_linkstatehooks, 999 sc->lh_cookie); 1000 cif = (struct carp_if *)sc->sc_carpdev->if_carp; 1001 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); 1002 if (!--cif->vhif_nvrs) { 1003 ifpromisc(sc->sc_carpdev, 0); 1004 sc->sc_carpdev->if_carp = NULL; 1005 free(cif, M_IFADDR); 1006 } 1007 } 1008 sc->sc_carpdev = NULL; 1009 splx(s); 1010 } 1011 1012 /* Detach an interface from the carp. */ 1013 void 1014 carp_ifdetach(struct ifnet *ifp) 1015 { 1016 struct carp_softc *sc, *nextsc; 1017 struct carp_if *cif = (struct carp_if *)ifp->if_carp; 1018 1019 for (sc = TAILQ_FIRST(&cif->vhif_vrs); sc; sc = nextsc) { 1020 nextsc = TAILQ_NEXT(sc, sc_list); 1021 carpdetach(sc); 1022 } 1023 } 1024 1025 void 1026 carp_destroy_vhosts(struct carp_softc *sc) 1027 { 1028 /* XXX bow out? */ 1029 struct carp_vhost_entry *vhe, *nvhe; 1030 1031 for (vhe = LIST_FIRST(&sc->carp_vhosts); 1032 vhe != LIST_END(&sc->carp_vhosts); vhe = nvhe) { 1033 nvhe = LIST_NEXT(vhe, vhost_entries); 1034 free(vhe, M_DEVBUF); 1035 } 1036 LIST_INIT(&sc->carp_vhosts); 1037 sc->sc_vhe_count = 0; 1038 } 1039 1040 int 1041 carp_prepare_ad(struct mbuf *m, struct carp_vhost_entry *vhe, 1042 struct carp_header *ch) 1043 { 1044 if (!vhe->vhe_replay_cookie) { 1045 arc4random_buf(&vhe->vhe_replay_cookie, 1046 sizeof(vhe->vhe_replay_cookie)); 1047 } 1048 1049 bcopy(&vhe->vhe_replay_cookie, ch->carp_counter, 1050 sizeof(ch->carp_counter)); 1051 1052 /* 1053 * For the time being, do not include the IPv6 linklayer addresses 1054 * in the HMAC. 1055 */ 1056 carp_hmac_generate(vhe, ch->carp_counter, ch->carp_md, HMAC_NOV6LL); 1057 1058 return (0); 1059 } 1060 1061 void 1062 carp_send_ad_all(void) 1063 { 1064 struct ifnet *ifp; 1065 struct carp_if *cif; 1066 struct carp_softc *vh; 1067 1068 TAILQ_FOREACH(ifp, &ifnet, if_list) { 1069 if (ifp->if_carp == NULL || ifp->if_type == IFT_CARP) 1070 continue; 1071 1072 cif = (struct carp_if *)ifp->if_carp; 1073 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1074 if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) == 1075 (IFF_UP|IFF_RUNNING)) { 1076 carp_vhe_send_ad_all(vh); 1077 } 1078 } 1079 } 1080 } 1081 1082 void 1083 carp_vhe_send_ad_all(struct carp_softc *sc) 1084 { 1085 struct carp_vhost_entry *vhe; 1086 1087 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) { 1088 if (vhe->state == MASTER) 1089 carp_send_ad(vhe); 1090 } 1091 } 1092 1093 void 1094 carp_send_ad(void *v) 1095 { 1096 struct carp_header ch; 1097 struct timeval tv; 1098 struct carp_vhost_entry *vhe = v; 1099 struct carp_softc *sc = vhe->parent_sc; 1100 struct carp_header *ch_ptr; 1101 1102 struct mbuf *m; 1103 int error, len, advbase, advskew, s; 1104 struct ifaddr *ifa; 1105 struct sockaddr sa; 1106 1107 if (sc->sc_carpdev == NULL) { 1108 sc->sc_if.if_oerrors++; 1109 return; 1110 } 1111 1112 s = splsoftnet(); 1113 1114 /* bow out if we've gone to backup (the carp interface is going down) */ 1115 if (sc->sc_bow_out) { 1116 advbase = 255; 1117 advskew = 255; 1118 } else { 1119 advbase = sc->sc_advbase; 1120 advskew = vhe->advskew; 1121 tv.tv_sec = advbase; 1122 if (advbase == 0 && advskew == 0) 1123 tv.tv_usec = 1 * 1000000 / 256; 1124 else 1125 tv.tv_usec = advskew * 1000000 / 256; 1126 } 1127 1128 ch.carp_version = CARP_VERSION; 1129 ch.carp_type = CARP_ADVERTISEMENT; 1130 ch.carp_vhid = vhe->vhid; 1131 ch.carp_demote = carp_group_demote_count(sc) & 0xff; 1132 ch.carp_advbase = advbase; 1133 ch.carp_advskew = advskew; 1134 ch.carp_authlen = 7; /* XXX DEFINE */ 1135 ch.carp_cksum = 0; 1136 1137 sc->cur_vhe = vhe; /* we need the vhe later on the output path */ 1138 1139 #ifdef INET 1140 if (sc->sc_naddrs) { 1141 struct ip *ip; 1142 1143 MGETHDR(m, M_DONTWAIT, MT_HEADER); 1144 if (m == NULL) { 1145 sc->sc_if.if_oerrors++; 1146 carpstats.carps_onomem++; 1147 /* XXX maybe less ? */ 1148 goto retry_later; 1149 } 1150 len = sizeof(*ip) + sizeof(ch); 1151 m->m_pkthdr.len = len; 1152 m->m_pkthdr.rcvif = NULL; 1153 m->m_pkthdr.rdomain = sc->sc_if.if_rdomain; 1154 m->m_pkthdr.pf.prio = CARP_IFQ_PRIO; 1155 m->m_len = len; 1156 MH_ALIGN(m, m->m_len); 1157 ip = mtod(m, struct ip *); 1158 ip->ip_v = IPVERSION; 1159 ip->ip_hl = sizeof(*ip) >> 2; 1160 ip->ip_tos = IPTOS_LOWDELAY; 1161 ip->ip_len = htons(len); 1162 ip->ip_id = htons(ip_randomid()); 1163 ip->ip_off = htons(IP_DF); 1164 ip->ip_ttl = CARP_DFLTTL; 1165 ip->ip_p = IPPROTO_CARP; 1166 ip->ip_sum = 0; 1167 1168 bzero(&sa, sizeof(sa)); 1169 sa.sa_family = AF_INET; 1170 ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev); 1171 if (ifa == NULL) 1172 ip->ip_src.s_addr = 0; 1173 else 1174 ip->ip_src.s_addr = 1175 ifatoia(ifa)->ia_addr.sin_addr.s_addr; 1176 ip->ip_dst.s_addr = sc->sc_peer.s_addr; 1177 if (IN_MULTICAST(ip->ip_dst.s_addr)) 1178 m->m_flags |= M_MCAST; 1179 1180 ch_ptr = (struct carp_header *)(ip + 1); 1181 bcopy(&ch, ch_ptr, sizeof(ch)); 1182 if (carp_prepare_ad(m, vhe, ch_ptr)) 1183 goto retry_later; 1184 1185 m->m_data += sizeof(*ip); 1186 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip)); 1187 m->m_data -= sizeof(*ip); 1188 1189 getmicrotime(&sc->sc_if.if_lastchange); 1190 sc->sc_if.if_opackets++; 1191 sc->sc_if.if_obytes += len; 1192 carpstats.carps_opackets++; 1193 1194 error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, 1195 NULL); 1196 if (error) { 1197 if (error == ENOBUFS) 1198 carpstats.carps_onomem++; 1199 else 1200 CARP_LOG(LOG_WARNING, sc, 1201 ("ip_output failed: %d", error)); 1202 sc->sc_if.if_oerrors++; 1203 if (sc->sc_sendad_errors < INT_MAX) 1204 sc->sc_sendad_errors++; 1205 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS(sc)) 1206 carp_group_demote_adj(&sc->sc_if, 1, 1207 "> snderrors"); 1208 sc->sc_sendad_success = 0; 1209 } else { 1210 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS(sc)) { 1211 if (++sc->sc_sendad_success >= 1212 CARP_SENDAD_MIN_SUCCESS(sc)) { 1213 carp_group_demote_adj(&sc->sc_if, -1, 1214 "< snderrors"); 1215 sc->sc_sendad_errors = 0; 1216 } 1217 } else 1218 sc->sc_sendad_errors = 0; 1219 } 1220 if (vhe->vhe_leader) { 1221 if (sc->sc_delayed_arp > 0) 1222 sc->sc_delayed_arp--; 1223 if (sc->sc_delayed_arp == 0) { 1224 carp_send_arp(sc); 1225 sc->sc_delayed_arp = -1; 1226 } 1227 } 1228 } 1229 #endif /* INET */ 1230 #ifdef INET6 1231 if (sc->sc_naddrs6) { 1232 struct ip6_hdr *ip6; 1233 1234 MGETHDR(m, M_DONTWAIT, MT_HEADER); 1235 if (m == NULL) { 1236 sc->sc_if.if_oerrors++; 1237 carpstats.carps_onomem++; 1238 /* XXX maybe less ? */ 1239 goto retry_later; 1240 } 1241 len = sizeof(*ip6) + sizeof(ch); 1242 m->m_pkthdr.len = len; 1243 m->m_pkthdr.rcvif = NULL; 1244 m->m_pkthdr.pf.prio = CARP_IFQ_PRIO; 1245 /* XXX m->m_pkthdr.rdomain = sc->sc_if.if_rdomain; */ 1246 m->m_len = len; 1247 MH_ALIGN(m, m->m_len); 1248 m->m_flags |= M_MCAST; 1249 ip6 = mtod(m, struct ip6_hdr *); 1250 bzero(ip6, sizeof(*ip6)); 1251 ip6->ip6_vfc |= IPV6_VERSION; 1252 ip6->ip6_hlim = CARP_DFLTTL; 1253 ip6->ip6_nxt = IPPROTO_CARP; 1254 1255 /* set the source address */ 1256 bzero(&sa, sizeof(sa)); 1257 sa.sa_family = AF_INET6; 1258 ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev); 1259 if (ifa == NULL) /* This should never happen with IPv6 */ 1260 bzero(&ip6->ip6_src, sizeof(struct in6_addr)); 1261 else 1262 bcopy(ifatoia6(ifa)->ia_addr.sin6_addr.s6_addr, 1263 &ip6->ip6_src, sizeof(struct in6_addr)); 1264 /* set the multicast destination */ 1265 1266 ip6->ip6_dst.s6_addr16[0] = htons(0xff02); 1267 ip6->ip6_dst.s6_addr16[1] = htons(sc->sc_carpdev->if_index); 1268 ip6->ip6_dst.s6_addr8[15] = 0x12; 1269 1270 ch_ptr = (struct carp_header *)(ip6 + 1); 1271 bcopy(&ch, ch_ptr, sizeof(ch)); 1272 if (carp_prepare_ad(m, vhe, ch_ptr)) 1273 goto retry_later; 1274 1275 m->m_data += sizeof(*ip6); 1276 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip6)); 1277 m->m_data -= sizeof(*ip6); 1278 1279 getmicrotime(&sc->sc_if.if_lastchange); 1280 sc->sc_if.if_opackets++; 1281 sc->sc_if.if_obytes += len; 1282 carpstats.carps_opackets6++; 1283 1284 error = ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL); 1285 if (error) { 1286 if (error == ENOBUFS) 1287 carpstats.carps_onomem++; 1288 else 1289 CARP_LOG(LOG_WARNING, sc, 1290 ("ip6_output failed: %d", error)); 1291 sc->sc_if.if_oerrors++; 1292 if (sc->sc_sendad_errors < INT_MAX) 1293 sc->sc_sendad_errors++; 1294 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS(sc)) 1295 carp_group_demote_adj(&sc->sc_if, 1, 1296 "> snd6errors"); 1297 sc->sc_sendad_success = 0; 1298 } else { 1299 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS(sc)) { 1300 if (++sc->sc_sendad_success >= 1301 CARP_SENDAD_MIN_SUCCESS(sc)) { 1302 carp_group_demote_adj(&sc->sc_if, -1, 1303 "< snd6errors"); 1304 sc->sc_sendad_errors = 0; 1305 } 1306 } else 1307 sc->sc_sendad_errors = 0; 1308 } 1309 } 1310 #endif /* INET6 */ 1311 1312 retry_later: 1313 sc->cur_vhe = NULL; 1314 splx(s); 1315 if (advbase != 255 || advskew != 255) 1316 timeout_add(&vhe->ad_tmo, tvtohz(&tv)); 1317 } 1318 1319 /* 1320 * Broadcast a gratuitous ARP request containing 1321 * the virtual router MAC address for each IP address 1322 * associated with the virtual router. 1323 */ 1324 void 1325 carp_send_arp(struct carp_softc *sc) 1326 { 1327 struct ifaddr *ifa; 1328 in_addr_t in; 1329 int s = splsoftnet(); 1330 1331 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 1332 1333 if (ifa->ifa_addr->sa_family != AF_INET) 1334 continue; 1335 1336 in = ifatoia(ifa)->ia_addr.sin_addr.s_addr; 1337 arprequest(sc->sc_carpdev, &in, &in, sc->sc_ac.ac_enaddr); 1338 DELAY(1000); /* XXX */ 1339 } 1340 splx(s); 1341 } 1342 1343 #ifdef INET6 1344 void 1345 carp_send_na(struct carp_softc *sc) 1346 { 1347 struct ifaddr *ifa; 1348 struct in6_addr *in6; 1349 static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT; 1350 int s = splsoftnet(); 1351 1352 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 1353 1354 if (ifa->ifa_addr->sa_family != AF_INET6) 1355 continue; 1356 1357 in6 = &ifatoia6(ifa)->ia_addr.sin6_addr; 1358 nd6_na_output(sc->sc_carpdev, &mcast, in6, 1359 ND_NA_FLAG_OVERRIDE, 1, NULL); 1360 DELAY(1000); /* XXX */ 1361 } 1362 splx(s); 1363 } 1364 #endif /* INET6 */ 1365 1366 /* 1367 * Based on bridge_hash() in if_bridge.c 1368 */ 1369 #define mix(a,b,c) \ 1370 do { \ 1371 a -= b; a -= c; a ^= (c >> 13); \ 1372 b -= c; b -= a; b ^= (a << 8); \ 1373 c -= a; c -= b; c ^= (b >> 13); \ 1374 a -= b; a -= c; a ^= (c >> 12); \ 1375 b -= c; b -= a; b ^= (a << 16); \ 1376 c -= a; c -= b; c ^= (b >> 5); \ 1377 a -= b; a -= c; a ^= (c >> 3); \ 1378 b -= c; b -= a; b ^= (a << 10); \ 1379 c -= a; c -= b; c ^= (b >> 15); \ 1380 } while (0) 1381 1382 u_int32_t 1383 carp_hash(struct carp_softc *sc, u_char *src) 1384 { 1385 u_int32_t a = 0x9e3779b9, b = sc->sc_hashkey[0], c = sc->sc_hashkey[1]; 1386 1387 c += sc->sc_key[3] << 24; 1388 c += sc->sc_key[2] << 16; 1389 c += sc->sc_key[1] << 8; 1390 c += sc->sc_key[0]; 1391 b += src[5] << 8; 1392 b += src[4]; 1393 a += src[3] << 24; 1394 a += src[2] << 16; 1395 a += src[1] << 8; 1396 a += src[0]; 1397 1398 mix(a, b, c); 1399 return (c); 1400 } 1401 1402 void 1403 carp_update_lsmask(struct carp_softc *sc) 1404 { 1405 struct carp_vhost_entry *vhe; 1406 int count; 1407 1408 if (!sc->sc_balancing) 1409 return; 1410 1411 sc->sc_lsmask = 0; 1412 count = 0; 1413 1414 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) { 1415 if (vhe->state == MASTER && count < sizeof(sc->sc_lsmask) * 8) 1416 sc->sc_lsmask |= 1 << count; 1417 count++; 1418 } 1419 sc->sc_lscount = count; 1420 CARP_LOG(LOG_DEBUG, sc, ("carp_update_lsmask: %x", sc->sc_lsmask)); 1421 } 1422 1423 int 1424 carp_iamatch(struct in_ifaddr *ia, u_char *src, u_int8_t **sha, 1425 u_int8_t **ether_shost) 1426 { 1427 struct carp_softc *sc = ia->ia_ifp->if_softc; 1428 struct carp_vhost_entry *vhe = LIST_FIRST(&sc->carp_vhosts); 1429 1430 if (sc->sc_balancing == CARP_BAL_ARP) { 1431 int lshash; 1432 /* 1433 * We use the source MAC address to decide which virtual host 1434 * should handle the request. If we're master of that virtual 1435 * host, then we respond, otherwise, just drop the arp packet 1436 * on the floor. 1437 */ 1438 1439 if (sc->sc_lscount == 0) /* just to be safe */ 1440 return (0); 1441 lshash = carp_hash(sc, src) % sc->sc_lscount; 1442 if ((1 << lshash) & sc->sc_lsmask) { 1443 int i = 0; 1444 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) { 1445 if (i++ == lshash) 1446 break; 1447 } 1448 if (vhe == NULL) 1449 return (0); 1450 *sha = vhe->vhe_enaddr; 1451 return (1); 1452 } 1453 } else if (sc->sc_balancing == CARP_BAL_IPSTEALTH || 1454 sc->sc_balancing == CARP_BAL_IP) { 1455 if (vhe->state == MASTER) { 1456 *ether_shost = ((struct arpcom *)sc->sc_carpdev)-> 1457 ac_enaddr; 1458 return (1); 1459 } 1460 } else { 1461 if (vhe->state == MASTER) 1462 return (1); 1463 } 1464 1465 return (0); 1466 } 1467 1468 #ifdef INET6 1469 int 1470 carp_iamatch6(struct ifnet *ifp, u_char *src, struct sockaddr_dl **sdl) 1471 { 1472 struct carp_softc *sc = ifp->if_softc; 1473 struct carp_vhost_entry *vhe = LIST_FIRST(&sc->carp_vhosts); 1474 1475 if (sc->sc_balancing == CARP_BAL_ARP) { 1476 int lshash; 1477 /* 1478 * We use the source MAC address to decide which virtual host 1479 * should handle the request. If we're master of that virtual 1480 * host, then we respond, otherwise, just drop the ndp packet 1481 * on the floor. 1482 */ 1483 1484 /* can happen if optional src lladdr is not provided */ 1485 if (src == NULL) 1486 return (0); 1487 if (sc->sc_lscount == 0) /* just to be safe */ 1488 return (0); 1489 lshash = carp_hash(sc, src) % sc->sc_lscount; 1490 if ((1 << lshash) & sc->sc_lsmask) { 1491 int i = 0; 1492 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) { 1493 if (i++ == lshash) 1494 break; 1495 } 1496 if (vhe == NULL) 1497 return (0); 1498 *sdl = &vhe->vhe_sdl; 1499 return (1); 1500 } 1501 } else { 1502 if (vhe->state == MASTER) 1503 return (1); 1504 } 1505 1506 return (0); 1507 } 1508 #endif /* INET6 */ 1509 1510 struct ifnet * 1511 carp_ourether(void *v, struct ether_header *eh, int src) 1512 { 1513 struct carp_if *cif = (struct carp_if *)v; 1514 struct carp_softc *vh; 1515 u_int8_t *ena; 1516 1517 if (src) 1518 ena = (u_int8_t *)&eh->ether_shost; 1519 else 1520 ena = (u_int8_t *)&eh->ether_dhost; 1521 1522 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1523 struct carp_vhost_entry *vhe; 1524 if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) != 1525 (IFF_UP|IFF_RUNNING)) 1526 continue; 1527 if (vh->sc_balancing == CARP_BAL_ARP) { 1528 LIST_FOREACH(vhe, &vh->carp_vhosts, vhost_entries) 1529 if (vhe->state == MASTER && 1530 !bcmp(ena, vhe->vhe_enaddr, ETHER_ADDR_LEN)) 1531 return (&vh->sc_if); 1532 } else { 1533 vhe = LIST_FIRST(&vh->carp_vhosts); 1534 if ((vhe->state == MASTER || 1535 vh->sc_balancing >= CARP_BAL_IP) && 1536 !bcmp(ena, vh->sc_ac.ac_enaddr, ETHER_ADDR_LEN)) 1537 return (&vh->sc_if); 1538 } 1539 } 1540 return (NULL); 1541 } 1542 1543 void 1544 carp_rewrite_lladdr(struct ifnet *ifp, u_int8_t *s_enaddr) 1545 { 1546 struct carp_softc *sc = ifp->if_softc; 1547 1548 if (sc->sc_balancing != CARP_BAL_IPSTEALTH && 1549 sc->sc_balancing != CARP_BAL_IP && sc->cur_vhe) { 1550 if (sc->cur_vhe->vhe_leader) 1551 bcopy((caddr_t)sc->sc_ac.ac_enaddr, 1552 (caddr_t)s_enaddr, ETHER_ADDR_LEN); 1553 else 1554 bcopy((caddr_t)sc->cur_vhe->vhe_enaddr, 1555 (caddr_t)s_enaddr, ETHER_ADDR_LEN); 1556 } 1557 } 1558 1559 int 1560 carp_our_mcastaddr(struct ifnet *ifp, u_int8_t *d_enaddr) 1561 { 1562 struct carp_softc *sc = ifp->if_softc; 1563 1564 if (sc->sc_balancing != CARP_BAL_IP) 1565 return (0); 1566 1567 return(!bcmp(sc->sc_ac.ac_enaddr, d_enaddr, ETHER_ADDR_LEN)); 1568 } 1569 1570 1571 int 1572 carp_input(struct mbuf *m, u_int8_t *shost, u_int8_t *dhost, u_int16_t etype) 1573 { 1574 struct ether_header eh; 1575 struct carp_if *cif = (struct carp_if *)m->m_pkthdr.rcvif->if_carp; 1576 struct ifnet *ifp; 1577 1578 bcopy(shost, &eh.ether_shost, sizeof(eh.ether_shost)); 1579 bcopy(dhost, &eh.ether_dhost, sizeof(eh.ether_dhost)); 1580 eh.ether_type = etype; 1581 1582 if ((ifp = carp_ourether(cif, &eh, 0))) 1583 ; 1584 else if (m->m_flags & (M_BCAST|M_MCAST)) { 1585 struct carp_softc *vh; 1586 struct mbuf *m0; 1587 1588 /* 1589 * XXX Should really check the list of multicast addresses 1590 * for each CARP interface _before_ copying. 1591 */ 1592 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1593 if (!(vh->sc_if.if_flags & IFF_UP)) 1594 continue; 1595 m0 = m_copym2(m, 0, M_COPYALL, M_DONTWAIT); 1596 if (m0 == NULL) 1597 continue; 1598 m0->m_pkthdr.rcvif = &vh->sc_if; 1599 #if NBPFILTER > 0 1600 if (vh->sc_if.if_bpf) 1601 bpf_mtap_hdr(vh->sc_if.if_bpf, (char *)&eh, 1602 ETHER_HDR_LEN, m0, BPF_DIRECTION_IN); 1603 #endif 1604 vh->sc_if.if_ipackets++; 1605 ether_input(&vh->sc_if, &eh, m0); 1606 } 1607 return (1); 1608 } 1609 1610 if (ifp == NULL) 1611 return (1); 1612 1613 m->m_pkthdr.rcvif = ifp; 1614 1615 #if NBPFILTER > 0 1616 if (ifp->if_bpf) 1617 bpf_mtap_hdr(ifp->if_bpf, (char *)&eh, ETHER_HDR_LEN, m, 1618 BPF_DIRECTION_IN); 1619 #endif 1620 ifp->if_ipackets++; 1621 ether_input(ifp, &eh, m); 1622 1623 return (0); 1624 } 1625 1626 int 1627 carp_lsdrop(struct mbuf *m, sa_family_t af, u_int32_t *src, u_int32_t *dst) 1628 { 1629 struct carp_softc *sc = m->m_pkthdr.rcvif->if_softc; 1630 int match; 1631 u_int32_t fold; 1632 1633 if (sc->sc_balancing < CARP_BAL_IP) 1634 return (0); 1635 /* 1636 * Never drop carp advertisements. 1637 * XXX Bad idea to pass all broadcast / multicast traffic? 1638 */ 1639 if (m->m_flags & (M_BCAST|M_MCAST)) 1640 return (0); 1641 1642 fold = src[0] ^ dst[0]; 1643 #ifdef INET6 1644 if (af == AF_INET6) { 1645 int i; 1646 for (i = 1; i < 4; i++) 1647 fold ^= src[i] ^ dst[i]; 1648 } 1649 #endif 1650 if (sc->sc_lscount == 0) /* just to be safe */ 1651 return (1); 1652 match = (1 << (ntohl(fold) % sc->sc_lscount)) & sc->sc_lsmask; 1653 1654 return (!match); 1655 } 1656 1657 void 1658 carp_master_down(void *v) 1659 { 1660 struct carp_vhost_entry *vhe = v; 1661 struct carp_softc *sc = vhe->parent_sc; 1662 1663 switch (vhe->state) { 1664 case INIT: 1665 printf("%s: master_down event in INIT state\n", 1666 sc->sc_if.if_xname); 1667 break; 1668 case MASTER: 1669 break; 1670 case BACKUP: 1671 carp_set_state(vhe, MASTER); 1672 carp_send_ad(vhe); 1673 if (sc->sc_balancing == CARP_BAL_NONE && vhe->vhe_leader) { 1674 carp_send_arp(sc); 1675 /* Schedule a delayed ARP to deal w/ some L3 switches */ 1676 sc->sc_delayed_arp = 2; 1677 #ifdef INET6 1678 carp_send_na(sc); 1679 #endif /* INET6 */ 1680 } 1681 carp_setrun(vhe, 0); 1682 if (vhe->vhe_leader) 1683 carp_setroute(sc, RTM_ADD); 1684 carpstats.carps_preempt++; 1685 break; 1686 } 1687 } 1688 1689 void 1690 carp_setrun_all(struct carp_softc *sc, sa_family_t af) 1691 { 1692 struct carp_vhost_entry *vhe; 1693 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) { 1694 carp_setrun(vhe, af); 1695 } 1696 } 1697 1698 /* 1699 * When in backup state, af indicates whether to reset the master down timer 1700 * for v4 or v6. If it's set to zero, reset the ones which are already pending. 1701 */ 1702 void 1703 carp_setrun(struct carp_vhost_entry *vhe, sa_family_t af) 1704 { 1705 struct timeval tv; 1706 struct carp_softc *sc = vhe->parent_sc; 1707 1708 if (sc->sc_carpdev == NULL) { 1709 sc->sc_if.if_flags &= ~IFF_RUNNING; 1710 carp_set_state_all(sc, INIT); 1711 return; 1712 } 1713 1714 if (sc->sc_if.if_flags & IFF_UP && vhe->vhid > 0 && 1715 (sc->sc_naddrs || sc->sc_naddrs6) && !sc->sc_suppress) { 1716 sc->sc_if.if_flags |= IFF_RUNNING; 1717 } else { 1718 sc->sc_if.if_flags &= ~IFF_RUNNING; 1719 if (vhe->vhe_leader) 1720 carp_setroute(sc, RTM_DELETE); 1721 return; 1722 } 1723 1724 switch (vhe->state) { 1725 case INIT: 1726 carp_set_state(vhe, BACKUP); 1727 if (vhe->vhe_leader) 1728 carp_setroute(sc, RTM_DELETE); 1729 carp_setrun(vhe, 0); 1730 break; 1731 case BACKUP: 1732 timeout_del(&vhe->ad_tmo); 1733 tv.tv_sec = 3 * sc->sc_advbase; 1734 if (sc->sc_advbase == 0 && vhe->advskew == 0) 1735 tv.tv_usec = 3 * 1000000 / 256; 1736 else 1737 tv.tv_usec = vhe->advskew * 1000000 / 256; 1738 if (vhe->vhe_leader) 1739 sc->sc_delayed_arp = -1; 1740 switch (af) { 1741 #ifdef INET 1742 case AF_INET: 1743 timeout_add(&vhe->md_tmo, tvtohz(&tv)); 1744 break; 1745 #endif /* INET */ 1746 #ifdef INET6 1747 case AF_INET6: 1748 timeout_add(&vhe->md6_tmo, tvtohz(&tv)); 1749 break; 1750 #endif /* INET6 */ 1751 default: 1752 if (sc->sc_naddrs) 1753 timeout_add(&vhe->md_tmo, tvtohz(&tv)); 1754 if (sc->sc_naddrs6) 1755 timeout_add(&vhe->md6_tmo, tvtohz(&tv)); 1756 break; 1757 } 1758 break; 1759 case MASTER: 1760 tv.tv_sec = sc->sc_advbase; 1761 if (sc->sc_advbase == 0 && vhe->advskew == 0) 1762 tv.tv_usec = 1 * 1000000 / 256; 1763 else 1764 tv.tv_usec = vhe->advskew * 1000000 / 256; 1765 timeout_add(&vhe->ad_tmo, tvtohz(&tv)); 1766 break; 1767 } 1768 } 1769 1770 void 1771 carp_multicast_cleanup(struct carp_softc *sc) 1772 { 1773 struct ip_moptions *imo = &sc->sc_imo; 1774 #ifdef INET6 1775 struct ip6_moptions *im6o = &sc->sc_im6o; 1776 #endif 1777 u_int16_t n = imo->imo_num_memberships; 1778 1779 /* Clean up our own multicast memberships */ 1780 while (n-- > 0) { 1781 if (imo->imo_membership[n] != NULL) { 1782 in_delmulti(imo->imo_membership[n]); 1783 imo->imo_membership[n] = NULL; 1784 } 1785 } 1786 imo->imo_num_memberships = 0; 1787 imo->imo_multicast_ifp = NULL; 1788 1789 #ifdef INET6 1790 while (!LIST_EMPTY(&im6o->im6o_memberships)) { 1791 struct in6_multi_mship *imm = 1792 LIST_FIRST(&im6o->im6o_memberships); 1793 1794 LIST_REMOVE(imm, i6mm_chain); 1795 in6_leavegroup(imm); 1796 } 1797 im6o->im6o_multicast_ifp = NULL; 1798 #endif 1799 1800 /* And any other multicast memberships */ 1801 carp_ether_purgemulti(sc); 1802 } 1803 1804 int 1805 carp_set_ifp(struct carp_softc *sc, struct ifnet *ifp) 1806 { 1807 struct carp_if *cif, *ncif = NULL; 1808 struct carp_softc *vr, *after = NULL; 1809 int myself = 0, error = 0; 1810 int s; 1811 1812 if (ifp == sc->sc_carpdev) 1813 return (0); 1814 1815 if (ifp != NULL) { 1816 if ((ifp->if_flags & IFF_MULTICAST) == 0) 1817 return (EADDRNOTAVAIL); 1818 1819 if (ifp->if_type == IFT_CARP) 1820 return (EINVAL); 1821 1822 if (ifp->if_carp == NULL) { 1823 ncif = malloc(sizeof(*cif), M_IFADDR, M_NOWAIT|M_ZERO); 1824 if (ncif == NULL) 1825 return (ENOBUFS); 1826 if ((error = ifpromisc(ifp, 1))) { 1827 free(ncif, M_IFADDR); 1828 return (error); 1829 } 1830 1831 ncif->vhif_ifp = ifp; 1832 TAILQ_INIT(&ncif->vhif_vrs); 1833 } else { 1834 cif = (struct carp_if *)ifp->if_carp; 1835 if (carp_check_dup_vhids(sc, cif, NULL)) 1836 return (EINVAL); 1837 } 1838 1839 /* detach from old interface */ 1840 if (sc->sc_carpdev != NULL) 1841 carpdetach(sc); 1842 1843 /* join multicast groups */ 1844 if (sc->sc_naddrs < 0 && 1845 (error = carp_join_multicast(sc)) != 0) { 1846 if (ncif != NULL) 1847 free(ncif, M_IFADDR); 1848 return (error); 1849 } 1850 1851 #ifdef INET6 1852 if (sc->sc_naddrs6 < 0 && 1853 (error = carp_join_multicast6(sc)) != 0) { 1854 if (ncif != NULL) 1855 free(ncif, M_IFADDR); 1856 carp_multicast_cleanup(sc); 1857 return (error); 1858 } 1859 #endif 1860 1861 /* attach carp interface to physical interface */ 1862 if (ncif != NULL) 1863 ifp->if_carp = (caddr_t)ncif; 1864 sc->sc_carpdev = ifp; 1865 cif = (struct carp_if *)ifp->if_carp; 1866 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 1867 if (vr == sc) 1868 myself = 1; 1869 if (LIST_FIRST(&vr->carp_vhosts)->vhid < 1870 LIST_FIRST(&sc->carp_vhosts)->vhid) 1871 after = vr; 1872 } 1873 1874 if (!myself) { 1875 /* We're trying to keep things in order */ 1876 if (after == NULL) { 1877 TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list); 1878 } else { 1879 TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, 1880 sc, sc_list); 1881 } 1882 cif->vhif_nvrs++; 1883 } 1884 if (sc->sc_naddrs || sc->sc_naddrs6) 1885 sc->sc_if.if_flags |= IFF_UP; 1886 carp_set_enaddr(sc); 1887 s = splnet(); 1888 sc->lh_cookie = hook_establish(ifp->if_linkstatehooks, 1, 1889 carp_carpdev_state, ifp); 1890 carp_carpdev_state(ifp); 1891 splx(s); 1892 } else { 1893 carpdetach(sc); 1894 sc->sc_if.if_flags &= ~(IFF_UP|IFF_RUNNING); 1895 } 1896 return (0); 1897 } 1898 1899 void 1900 carp_set_vhe_enaddr(struct carp_vhost_entry *vhe) 1901 { 1902 struct carp_softc *sc = vhe->parent_sc; 1903 1904 if (vhe->vhid != 0 && sc->sc_carpdev) { 1905 if (vhe->vhe_leader && sc->sc_balancing == CARP_BAL_IP) 1906 vhe->vhe_enaddr[0] = 1; 1907 else 1908 vhe->vhe_enaddr[0] = 0; 1909 vhe->vhe_enaddr[1] = 0; 1910 vhe->vhe_enaddr[2] = 0x5e; 1911 vhe->vhe_enaddr[3] = 0; 1912 vhe->vhe_enaddr[4] = 1; 1913 vhe->vhe_enaddr[5] = vhe->vhid; 1914 1915 vhe->vhe_sdl.sdl_family = AF_LINK; 1916 vhe->vhe_sdl.sdl_alen = ETHER_ADDR_LEN; 1917 bcopy(vhe->vhe_enaddr, vhe->vhe_sdl.sdl_data, ETHER_ADDR_LEN); 1918 } else 1919 bzero(vhe->vhe_enaddr, ETHER_ADDR_LEN); 1920 } 1921 1922 void 1923 carp_set_enaddr(struct carp_softc *sc) 1924 { 1925 struct carp_vhost_entry *vhe; 1926 1927 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) 1928 carp_set_vhe_enaddr(vhe); 1929 1930 vhe = LIST_FIRST(&sc->carp_vhosts); 1931 1932 /* 1933 * Use the carp lladdr if the running one isn't manually set. 1934 * Only compare static parts of the lladdr. 1935 */ 1936 if ((bcmp(sc->sc_ac.ac_enaddr + 1, vhe->vhe_enaddr + 1, 1937 ETHER_ADDR_LEN - 2) == 0) || 1938 (!sc->sc_ac.ac_enaddr[0] && !sc->sc_ac.ac_enaddr[1] && 1939 !sc->sc_ac.ac_enaddr[2] && !sc->sc_ac.ac_enaddr[3] && 1940 !sc->sc_ac.ac_enaddr[4] && !sc->sc_ac.ac_enaddr[5])) 1941 bcopy(vhe->vhe_enaddr, sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN); 1942 1943 /* Make sure the enaddr has changed before further twiddling. */ 1944 if (bcmp(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN) != 0) { 1945 bcopy(sc->sc_ac.ac_enaddr, LLADDR(sc->sc_if.if_sadl), 1946 ETHER_ADDR_LEN); 1947 bcopy(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN); 1948 #ifdef INET6 1949 /* 1950 * (re)attach a link-local address which matches 1951 * our new MAC address. 1952 */ 1953 in6_ifattach_linklocal(&sc->sc_if, NULL); 1954 #endif 1955 carp_set_state_all(sc, INIT); 1956 carp_setrun_all(sc, 0); 1957 } 1958 } 1959 1960 void 1961 carp_addr_updated(void *v) 1962 { 1963 struct carp_softc *sc = (struct carp_softc *) v; 1964 struct ifaddr *ifa; 1965 int new_naddrs = 0, new_naddrs6 = 0; 1966 1967 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 1968 if (ifa->ifa_addr->sa_family == AF_INET) 1969 new_naddrs++; 1970 else if (ifa->ifa_addr->sa_family == AF_INET6 && 1971 !IN6_IS_ADDR_LINKLOCAL(&ifatoia6(ifa)->ia_addr.sin6_addr)) 1972 new_naddrs6++; 1973 } 1974 1975 /* We received address changes from if_addrhooks callback */ 1976 if (new_naddrs != sc->sc_naddrs || new_naddrs6 != sc->sc_naddrs6) { 1977 struct in_addr mc_addr; 1978 struct in_multi *inm; 1979 1980 sc->sc_naddrs = new_naddrs; 1981 sc->sc_naddrs6 = new_naddrs6; 1982 1983 /* Re-establish multicast membership removed by in_control */ 1984 if (IN_MULTICAST(sc->sc_peer.s_addr)) { 1985 mc_addr.s_addr = sc->sc_peer.s_addr; 1986 IN_LOOKUP_MULTI(mc_addr, &sc->sc_if, inm); 1987 if (inm == NULL) { 1988 struct in_multi **imm = 1989 sc->sc_imo.imo_membership; 1990 u_int16_t maxmem = 1991 sc->sc_imo.imo_max_memberships; 1992 1993 bzero(&sc->sc_imo, sizeof(sc->sc_imo)); 1994 sc->sc_imo.imo_membership = imm; 1995 sc->sc_imo.imo_max_memberships = maxmem; 1996 1997 if (sc->sc_carpdev != NULL && sc->sc_naddrs > 0) 1998 carp_join_multicast(sc); 1999 } 2000 } 2001 2002 if (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) { 2003 sc->sc_if.if_flags &= ~IFF_UP; 2004 carp_set_state_all(sc, INIT); 2005 } else 2006 carp_hmac_prepare(sc); 2007 } 2008 2009 carp_setrun_all(sc, 0); 2010 } 2011 2012 int 2013 carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin) 2014 { 2015 struct ifnet *ifp = sc->sc_carpdev; 2016 struct in_ifaddr *ia, *ia_if; 2017 int error = 0; 2018 2019 /* XXX is this necessary? */ 2020 if (sin->sin_addr.s_addr == 0) { 2021 if (!(sc->sc_if.if_flags & IFF_UP)) 2022 carp_set_state_all(sc, INIT); 2023 if (sc->sc_naddrs) 2024 sc->sc_if.if_flags |= IFF_UP; 2025 carp_setrun_all(sc, 0); 2026 return (0); 2027 } 2028 2029 /* we have to do this by hand to ensure we don't match on ourselves */ 2030 ia_if = NULL; 2031 for (ia = TAILQ_FIRST(&in_ifaddr); ia; 2032 ia = TAILQ_NEXT(ia, ia_list)) { 2033 2034 /* and, yeah, we need a multicast-capable iface too */ 2035 if (ia->ia_ifp != &sc->sc_if && 2036 ia->ia_ifp->if_type != IFT_CARP && 2037 (ia->ia_ifp->if_flags & IFF_MULTICAST) && 2038 ia->ia_ifp->if_rdomain == sc->sc_if.if_rdomain && 2039 (sin->sin_addr.s_addr & ia->ia_netmask) == 2040 ia->ia_net) { 2041 if (!ia_if) 2042 ia_if = ia; 2043 } 2044 } 2045 2046 if (ia_if) { 2047 ia = ia_if; 2048 if (ifp) { 2049 if (ifp != ia->ia_ifp) 2050 return (EADDRNOTAVAIL); 2051 } else { 2052 ifp = ia->ia_ifp; 2053 } 2054 } 2055 2056 if ((error = carp_set_ifp(sc, ifp))) 2057 return (error); 2058 2059 if (sc->sc_carpdev == NULL) 2060 return (EADDRNOTAVAIL); 2061 2062 if (sc->sc_naddrs == 0 && (error = carp_join_multicast(sc)) != 0) 2063 return (error); 2064 2065 if (sc->sc_carpdev != NULL) 2066 sc->sc_if.if_flags |= IFF_UP; 2067 2068 carp_set_state_all(sc, INIT); 2069 2070 return (0); 2071 } 2072 2073 int 2074 carp_join_multicast(struct carp_softc *sc) 2075 { 2076 struct ip_moptions *imo = &sc->sc_imo; 2077 struct in_multi *imm; 2078 struct in_addr addr; 2079 2080 if (!IN_MULTICAST(sc->sc_peer.s_addr)) 2081 return (0); 2082 2083 addr.s_addr = sc->sc_peer.s_addr; 2084 if ((imm = in_addmulti(&addr, &sc->sc_if)) == NULL) 2085 return (ENOBUFS); 2086 2087 imo->imo_membership[0] = imm; 2088 imo->imo_num_memberships = 1; 2089 imo->imo_multicast_ifp = &sc->sc_if; 2090 imo->imo_multicast_ttl = CARP_DFLTTL; 2091 imo->imo_multicast_loop = 0; 2092 return (0); 2093 } 2094 2095 2096 #ifdef INET6 2097 int 2098 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) 2099 { 2100 struct ifnet *ifp = sc->sc_carpdev; 2101 struct in6_ifaddr *ia, *ia_if; 2102 int error = 0; 2103 2104 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 2105 if (!(sc->sc_if.if_flags & IFF_UP)) 2106 carp_set_state_all(sc, INIT); 2107 if (sc->sc_naddrs6) 2108 sc->sc_if.if_flags |= IFF_UP; 2109 carp_setrun_all(sc, 0); 2110 return (0); 2111 } 2112 2113 /* we have to do this by hand to ensure we don't match on ourselves */ 2114 ia_if = NULL; 2115 for (ia = in6_ifaddr; ia; ia = ia->ia_next) { 2116 int i; 2117 2118 for (i = 0; i < 4; i++) { 2119 if ((sin6->sin6_addr.s6_addr32[i] & 2120 ia->ia_prefixmask.sin6_addr.s6_addr32[i]) != 2121 (ia->ia_addr.sin6_addr.s6_addr32[i] & 2122 ia->ia_prefixmask.sin6_addr.s6_addr32[i])) 2123 break; 2124 } 2125 /* and, yeah, we need a multicast-capable iface too */ 2126 if (ia->ia_ifp != &sc->sc_if && 2127 ia->ia_ifp->if_type != IFT_CARP && 2128 (ia->ia_ifp->if_flags & IFF_MULTICAST) && 2129 (i == 4)) { 2130 if (!ia_if) 2131 ia_if = ia; 2132 } 2133 } 2134 2135 if (ia_if) { 2136 ia = ia_if; 2137 if (sc->sc_carpdev) { 2138 if (sc->sc_carpdev != ia->ia_ifp) 2139 return (EADDRNOTAVAIL); 2140 } else { 2141 ifp = ia->ia_ifp; 2142 } 2143 } 2144 2145 if ((error = carp_set_ifp(sc, ifp))) 2146 return (error); 2147 2148 if (sc->sc_carpdev == NULL) 2149 return (EADDRNOTAVAIL); 2150 2151 if (sc->sc_naddrs6 == 0 && (error = carp_join_multicast6(sc)) != 0) 2152 return (error); 2153 2154 if (sc->sc_carpdev != NULL) 2155 sc->sc_if.if_flags |= IFF_UP; 2156 2157 carp_set_state_all(sc, INIT); 2158 2159 return (0); 2160 } 2161 2162 int 2163 carp_join_multicast6(struct carp_softc *sc) 2164 { 2165 struct in6_multi_mship *imm, *imm2; 2166 struct ip6_moptions *im6o = &sc->sc_im6o; 2167 struct sockaddr_in6 addr6; 2168 int error; 2169 2170 /* Join IPv6 CARP multicast group */ 2171 bzero(&addr6, sizeof(addr6)); 2172 addr6.sin6_family = AF_INET6; 2173 addr6.sin6_len = sizeof(addr6); 2174 addr6.sin6_addr.s6_addr16[0] = htons(0xff02); 2175 addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index); 2176 addr6.sin6_addr.s6_addr8[15] = 0x12; 2177 if ((imm = in6_joingroup(&sc->sc_if, 2178 &addr6.sin6_addr, &error)) == NULL) { 2179 return (error); 2180 } 2181 /* join solicited multicast address */ 2182 bzero(&addr6.sin6_addr, sizeof(addr6.sin6_addr)); 2183 addr6.sin6_addr.s6_addr16[0] = htons(0xff02); 2184 addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index); 2185 addr6.sin6_addr.s6_addr32[1] = 0; 2186 addr6.sin6_addr.s6_addr32[2] = htonl(1); 2187 addr6.sin6_addr.s6_addr32[3] = 0; 2188 addr6.sin6_addr.s6_addr8[12] = 0xff; 2189 if ((imm2 = in6_joingroup(&sc->sc_if, 2190 &addr6.sin6_addr, &error)) == NULL) { 2191 in6_leavegroup(imm); 2192 return (error); 2193 } 2194 2195 /* apply v6 multicast membership */ 2196 im6o->im6o_multicast_ifp = &sc->sc_if; 2197 if (imm) 2198 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, 2199 i6mm_chain); 2200 if (imm2) 2201 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm2, 2202 i6mm_chain); 2203 2204 return (0); 2205 } 2206 2207 #endif /* INET6 */ 2208 2209 int 2210 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr) 2211 { 2212 struct proc *p = curproc; /* XXX */ 2213 struct carp_softc *sc = ifp->if_softc; 2214 struct carp_vhost_entry *vhe; 2215 struct carpreq carpr; 2216 struct ifaddr *ifa = (struct ifaddr *)addr; 2217 struct ifreq *ifr = (struct ifreq *)addr; 2218 struct ifnet *cdev = NULL; 2219 int i, error = 0; 2220 2221 switch (cmd) { 2222 case SIOCSIFADDR: 2223 switch (ifa->ifa_addr->sa_family) { 2224 #ifdef INET 2225 case AF_INET: 2226 sc->sc_if.if_flags |= IFF_UP; 2227 /* 2228 * emulate arp_ifinit() without doing a gratious arp 2229 * request so that the routes are setup correctly. 2230 */ 2231 ifa->ifa_rtrequest = arp_rtrequest; 2232 ifa->ifa_flags |= RTF_CLONING; 2233 2234 error = carp_set_addr(sc, satosin(ifa->ifa_addr)); 2235 break; 2236 #endif /* INET */ 2237 #ifdef INET6 2238 case AF_INET6: 2239 sc->sc_if.if_flags |= IFF_UP; 2240 error = carp_set_addr6(sc, satosin6(ifa->ifa_addr)); 2241 break; 2242 #endif /* INET6 */ 2243 default: 2244 error = EAFNOSUPPORT; 2245 break; 2246 } 2247 break; 2248 2249 case SIOCSIFFLAGS: 2250 vhe = LIST_FIRST(&sc->carp_vhosts); 2251 if (vhe->state != INIT && !(ifr->ifr_flags & IFF_UP)) { 2252 carp_del_all_timeouts(sc); 2253 2254 /* we need the interface up to bow out */ 2255 sc->sc_if.if_flags |= IFF_UP; 2256 sc->sc_bow_out = 1; 2257 carp_vhe_send_ad_all(sc); 2258 sc->sc_bow_out = 0; 2259 2260 sc->sc_if.if_flags &= ~IFF_UP; 2261 carp_set_state_all(sc, INIT); 2262 carp_setrun_all(sc, 0); 2263 } else if (vhe->state == INIT && (ifr->ifr_flags & IFF_UP)) { 2264 sc->sc_if.if_flags |= IFF_UP; 2265 carp_setrun_all(sc, 0); 2266 } 2267 break; 2268 2269 case SIOCSVH: 2270 vhe = LIST_FIRST(&sc->carp_vhosts); 2271 if ((error = suser(p, p->p_acflag)) != 0) 2272 break; 2273 if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr))) 2274 break; 2275 error = 1; 2276 if (carpr.carpr_carpdev[0] != '\0' && 2277 (cdev = ifunit(carpr.carpr_carpdev)) == NULL) 2278 return (EINVAL); 2279 if (carpr.carpr_peer.s_addr == 0) 2280 sc->sc_peer.s_addr = INADDR_CARP_GROUP; 2281 else 2282 sc->sc_peer.s_addr = carpr.carpr_peer.s_addr; 2283 if ((error = carp_set_ifp(sc, cdev))) 2284 return (error); 2285 if (vhe->state != INIT && carpr.carpr_state != vhe->state) { 2286 switch (carpr.carpr_state) { 2287 case BACKUP: 2288 timeout_del(&vhe->ad_tmo); 2289 carp_set_state_all(sc, BACKUP); 2290 carp_setrun_all(sc, 0); 2291 carp_setroute(sc, RTM_DELETE); 2292 break; 2293 case MASTER: 2294 LIST_FOREACH(vhe, &sc->carp_vhosts, 2295 vhost_entries) 2296 carp_master_down(vhe); 2297 break; 2298 default: 2299 break; 2300 } 2301 } 2302 if ((error = carp_vhids_ioctl(sc, &carpr))) 2303 return (error); 2304 if (carpr.carpr_advbase >= 0) { 2305 if (carpr.carpr_advbase > 255) { 2306 error = EINVAL; 2307 break; 2308 } 2309 sc->sc_advbase = carpr.carpr_advbase; 2310 error--; 2311 } 2312 if (bcmp(sc->sc_advskews, carpr.carpr_advskews, 2313 sizeof(sc->sc_advskews))) { 2314 i = 0; 2315 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) 2316 vhe->advskew = carpr.carpr_advskews[i++]; 2317 bcopy(carpr.carpr_advskews, sc->sc_advskews, 2318 sizeof(sc->sc_advskews)); 2319 } 2320 if (sc->sc_balancing != carpr.carpr_balancing) { 2321 if (carpr.carpr_balancing > CARP_BAL_MAXID) { 2322 error = EINVAL; 2323 break; 2324 } 2325 sc->sc_balancing = carpr.carpr_balancing; 2326 carp_set_enaddr(sc); 2327 carp_update_lsmask(sc); 2328 } 2329 bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key)); 2330 if (error > 0) 2331 error = EINVAL; 2332 else { 2333 error = 0; 2334 carp_hmac_prepare(sc); 2335 carp_setrun_all(sc, 0); 2336 } 2337 break; 2338 2339 case SIOCGVH: 2340 bzero(&carpr, sizeof(carpr)); 2341 if (sc->sc_carpdev != NULL) 2342 strlcpy(carpr.carpr_carpdev, sc->sc_carpdev->if_xname, 2343 IFNAMSIZ); 2344 i = 0; 2345 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) { 2346 carpr.carpr_vhids[i] = vhe->vhid; 2347 carpr.carpr_advskews[i] = vhe->advskew; 2348 carpr.carpr_states[i] = vhe->state; 2349 i++; 2350 } 2351 carpr.carpr_advbase = sc->sc_advbase; 2352 carpr.carpr_balancing = sc->sc_balancing; 2353 if (suser(p, p->p_acflag) == 0) 2354 bcopy(sc->sc_key, carpr.carpr_key, 2355 sizeof(carpr.carpr_key)); 2356 carpr.carpr_peer.s_addr = sc->sc_peer.s_addr; 2357 error = copyout(&carpr, ifr->ifr_data, sizeof(carpr)); 2358 break; 2359 2360 case SIOCADDMULTI: 2361 error = carp_ether_addmulti(sc, ifr); 2362 break; 2363 2364 case SIOCDELMULTI: 2365 error = carp_ether_delmulti(sc, ifr); 2366 break; 2367 case SIOCAIFGROUP: 2368 case SIOCDIFGROUP: 2369 if (sc->sc_demote_cnt) 2370 carp_ifgroup_ioctl(ifp, cmd, addr); 2371 break; 2372 case SIOCSIFGATTR: 2373 carp_ifgattr_ioctl(ifp, cmd, addr); 2374 break; 2375 default: 2376 error = ENOTTY; 2377 } 2378 2379 if (bcmp(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN) != 0) 2380 carp_set_enaddr(sc); 2381 return (error); 2382 } 2383 2384 int 2385 carp_check_dup_vhids(struct carp_softc *sc, struct carp_if *cif, 2386 struct carpreq *carpr) 2387 { 2388 struct carp_softc *vr; 2389 struct carp_vhost_entry *vhe, *vhe0; 2390 int i; 2391 2392 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 2393 if (vr == sc) 2394 continue; 2395 LIST_FOREACH(vhe, &vr->carp_vhosts, vhost_entries) { 2396 if (carpr) { 2397 for (i = 0; carpr->carpr_vhids[i]; i++) { 2398 if (vhe->vhid == carpr->carpr_vhids[i]) 2399 return (EINVAL); 2400 } 2401 } 2402 LIST_FOREACH(vhe0, &sc->carp_vhosts, vhost_entries) { 2403 if (vhe->vhid == vhe0->vhid) 2404 return (EINVAL); 2405 } 2406 } 2407 } 2408 return (0); 2409 } 2410 2411 int 2412 carp_vhids_ioctl(struct carp_softc *sc, struct carpreq *carpr) 2413 { 2414 int i, j; 2415 u_int8_t taken_vhids[256]; 2416 2417 if (carpr->carpr_vhids[0] == 0 || 2418 !bcmp(sc->sc_vhids, carpr->carpr_vhids, sizeof(sc->sc_vhids))) 2419 return (0); 2420 2421 bzero(taken_vhids, sizeof(taken_vhids)); 2422 for (i = 0; carpr->carpr_vhids[i]; i++) { 2423 if (taken_vhids[carpr->carpr_vhids[i]]) 2424 return (EINVAL); 2425 taken_vhids[carpr->carpr_vhids[i]] = 1; 2426 2427 if (sc->sc_carpdev) { 2428 struct carp_if *cif; 2429 cif = (struct carp_if *)sc->sc_carpdev->if_carp; 2430 if (carp_check_dup_vhids(sc, cif, carpr)) 2431 return (EINVAL); 2432 } 2433 if (carpr->carpr_advskews[i] >= 255) 2434 return (EINVAL); 2435 } 2436 /* set sane balancing defaults */ 2437 if (i <= 1) 2438 carpr->carpr_balancing = CARP_BAL_NONE; 2439 else if (carpr->carpr_balancing == CARP_BAL_NONE && 2440 sc->sc_balancing == CARP_BAL_NONE) 2441 carpr->carpr_balancing = CARP_BAL_IP; 2442 2443 /* destroy all */ 2444 carp_del_all_timeouts(sc); 2445 carp_destroy_vhosts(sc); 2446 bzero(sc->sc_vhids, sizeof(sc->sc_vhids)); 2447 2448 /* sort vhosts list by vhid */ 2449 for (j = 1; j <= 255; j++) { 2450 for (i = 0; carpr->carpr_vhids[i]; i++) { 2451 if (carpr->carpr_vhids[i] != j) 2452 continue; 2453 if (carp_new_vhost(sc, carpr->carpr_vhids[i], 2454 carpr->carpr_advskews[i])) 2455 return (ENOMEM); 2456 sc->sc_vhids[i] = carpr->carpr_vhids[i]; 2457 sc->sc_advskews[i] = carpr->carpr_advskews[i]; 2458 } 2459 } 2460 carp_set_enaddr(sc); 2461 carp_set_state_all(sc, INIT); 2462 return (0); 2463 } 2464 2465 void 2466 carp_ifgroup_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr) 2467 { 2468 struct ifgroupreq *ifgr = (struct ifgroupreq *)addr; 2469 struct ifg_list *ifgl; 2470 int *dm, adj; 2471 2472 if (!strcmp(ifgr->ifgr_group, IFG_ALL)) 2473 return; 2474 adj = ((struct carp_softc *)ifp->if_softc)->sc_demote_cnt; 2475 if (cmd == SIOCDIFGROUP) 2476 adj = adj * -1; 2477 2478 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) 2479 if (!strcmp(ifgl->ifgl_group->ifg_group, ifgr->ifgr_group)) { 2480 dm = &ifgl->ifgl_group->ifg_carp_demoted; 2481 if (*dm + adj >= 0) 2482 *dm += adj; 2483 else 2484 *dm = 0; 2485 } 2486 } 2487 2488 void 2489 carp_ifgattr_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr) 2490 { 2491 struct ifgroupreq *ifgr = (struct ifgroupreq *)addr; 2492 struct carp_softc *sc = ifp->if_softc; 2493 2494 if (ifgr->ifgr_attrib.ifg_carp_demoted > 0 && (sc->sc_if.if_flags & 2495 (IFF_UP|IFF_RUNNING)) == (IFF_UP|IFF_RUNNING)) 2496 carp_vhe_send_ad_all(sc); 2497 } 2498 2499 /* 2500 * Start output on carp interface. This function should never be called. 2501 */ 2502 void 2503 carp_start(struct ifnet *ifp) 2504 { 2505 #ifdef DEBUG 2506 printf("%s: start called\n", ifp->if_xname); 2507 #endif 2508 } 2509 2510 int 2511 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa, 2512 struct rtentry *rt) 2513 { 2514 struct carp_softc *sc = ((struct carp_softc *)ifp->if_softc); 2515 struct carp_vhost_entry *vhe; 2516 2517 vhe = sc->cur_vhe ? sc->cur_vhe : LIST_FIRST(&sc->carp_vhosts); 2518 2519 if (sc->sc_carpdev != NULL && 2520 (sc->sc_balancing || vhe->state == MASTER)) 2521 return (sc->sc_carpdev->if_output(ifp, m, sa, rt)); 2522 else { 2523 m_freem(m); 2524 return (ENETUNREACH); 2525 } 2526 } 2527 2528 void 2529 carp_set_state_all(struct carp_softc *sc, int state) 2530 { 2531 struct carp_vhost_entry *vhe; 2532 2533 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) 2534 carp_set_state(vhe, state); 2535 } 2536 2537 void 2538 carp_set_state(struct carp_vhost_entry *vhe, int state) 2539 { 2540 struct carp_softc *sc = vhe->parent_sc; 2541 static const char *carp_states[] = { CARP_STATES }; 2542 int loglevel; 2543 2544 if (vhe->state == state) 2545 return; 2546 if (vhe->state == INIT || state == INIT) 2547 loglevel = LOG_WARNING; 2548 else 2549 loglevel = LOG_CRIT; 2550 2551 if (sc->sc_vhe_count > 1) 2552 CARP_LOG(loglevel, sc, 2553 ("state transition (vhid %d): %s -> %s", vhe->vhid, 2554 carp_states[vhe->state], carp_states[state])); 2555 else 2556 CARP_LOG(loglevel, sc, 2557 ("state transition: %s -> %s", 2558 carp_states[vhe->state], carp_states[state])); 2559 2560 vhe->state = state; 2561 carp_update_lsmask(sc); 2562 2563 /* only the master vhe creates link state messages */ 2564 if (!vhe->vhe_leader) 2565 return; 2566 2567 switch (state) { 2568 case BACKUP: 2569 sc->sc_if.if_link_state = LINK_STATE_DOWN; 2570 break; 2571 case MASTER: 2572 sc->sc_if.if_link_state = LINK_STATE_UP; 2573 break; 2574 default: 2575 sc->sc_if.if_link_state = LINK_STATE_INVALID; 2576 break; 2577 } 2578 if_link_state_change(&sc->sc_if); 2579 } 2580 2581 void 2582 carp_group_demote_adj(struct ifnet *ifp, int adj, char *reason) 2583 { 2584 struct ifg_list *ifgl; 2585 int *dm; 2586 struct carp_softc *nil = NULL; 2587 2588 if (ifp->if_type == IFT_CARP) { 2589 dm = &((struct carp_softc *)ifp->if_softc)->sc_demote_cnt; 2590 if (*dm + adj >= 0) 2591 *dm += adj; 2592 else 2593 *dm = 0; 2594 } 2595 2596 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) { 2597 if (!strcmp(ifgl->ifgl_group->ifg_group, IFG_ALL)) 2598 continue; 2599 dm = &ifgl->ifgl_group->ifg_carp_demoted; 2600 2601 if (*dm + adj >= 0) 2602 *dm += adj; 2603 else 2604 *dm = 0; 2605 2606 if (adj > 0 && *dm == 1) 2607 carp_send_ad_all(); 2608 CARP_LOG(LOG_ERR, nil, 2609 ("%s demoted group %s by %d to %d (%s)", 2610 ifp->if_xname, ifgl->ifgl_group->ifg_group, 2611 adj, *dm, reason)); 2612 } 2613 } 2614 2615 int 2616 carp_group_demote_count(struct carp_softc *sc) 2617 { 2618 struct ifg_list *ifgl; 2619 int count = 0; 2620 2621 TAILQ_FOREACH(ifgl, &sc->sc_if.if_groups, ifgl_next) 2622 count += ifgl->ifgl_group->ifg_carp_demoted; 2623 2624 if (count == 0 && sc->sc_demote_cnt) 2625 count = sc->sc_demote_cnt; 2626 2627 return (count > 255 ? 255 : count); 2628 } 2629 2630 void 2631 carp_carpdev_state(void *v) 2632 { 2633 struct carp_if *cif; 2634 struct carp_softc *sc; 2635 struct ifnet *ifp = v; 2636 2637 if (ifp->if_type == IFT_CARP) 2638 return; 2639 2640 cif = (struct carp_if *)ifp->if_carp; 2641 2642 TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) { 2643 int suppressed = sc->sc_suppress; 2644 2645 if (sc->sc_carpdev->if_link_state == LINK_STATE_DOWN || 2646 !(sc->sc_carpdev->if_flags & IFF_UP)) { 2647 sc->sc_if.if_flags &= ~IFF_RUNNING; 2648 carp_del_all_timeouts(sc); 2649 carp_set_state_all(sc, INIT); 2650 sc->sc_suppress = 1; 2651 carp_setrun_all(sc, 0); 2652 if (!suppressed) 2653 carp_group_demote_adj(&sc->sc_if, 1, "carpdev"); 2654 } else if (suppressed) { 2655 carp_set_state_all(sc, INIT); 2656 sc->sc_suppress = 0; 2657 carp_setrun_all(sc, 0); 2658 carp_group_demote_adj(&sc->sc_if, -1, "carpdev"); 2659 } 2660 } 2661 } 2662 2663 int 2664 carp_ether_addmulti(struct carp_softc *sc, struct ifreq *ifr) 2665 { 2666 struct ifnet *ifp; 2667 struct carp_mc_entry *mc; 2668 u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN]; 2669 int error; 2670 2671 ifp = sc->sc_carpdev; 2672 if (ifp == NULL) 2673 return (EINVAL); 2674 2675 error = ether_addmulti(ifr, (struct arpcom *)&sc->sc_ac); 2676 if (error != ENETRESET) 2677 return (error); 2678 2679 /* 2680 * This is new multicast address. We have to tell parent 2681 * about it. Also, remember this multicast address so that 2682 * we can delete them on unconfigure. 2683 */ 2684 mc = malloc(sizeof(struct carp_mc_entry), M_DEVBUF, M_NOWAIT); 2685 if (mc == NULL) { 2686 error = ENOMEM; 2687 goto alloc_failed; 2688 } 2689 2690 /* 2691 * As ether_addmulti() returns ENETRESET, following two 2692 * statement shouldn't fail. 2693 */ 2694 (void)ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi); 2695 ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, mc->mc_enm); 2696 memcpy(&mc->mc_addr, &ifr->ifr_addr, ifr->ifr_addr.sa_len); 2697 LIST_INSERT_HEAD(&sc->carp_mc_listhead, mc, mc_entries); 2698 2699 error = (*ifp->if_ioctl)(ifp, SIOCADDMULTI, (caddr_t)ifr); 2700 if (error != 0) 2701 goto ioctl_failed; 2702 2703 return (error); 2704 2705 ioctl_failed: 2706 LIST_REMOVE(mc, mc_entries); 2707 free(mc, M_DEVBUF); 2708 alloc_failed: 2709 (void)ether_delmulti(ifr, (struct arpcom *)&sc->sc_ac); 2710 2711 return (error); 2712 } 2713 2714 int 2715 carp_ether_delmulti(struct carp_softc *sc, struct ifreq *ifr) 2716 { 2717 struct ifnet *ifp; 2718 struct ether_multi *enm; 2719 struct carp_mc_entry *mc; 2720 u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN]; 2721 int error; 2722 2723 ifp = sc->sc_carpdev; 2724 if (ifp == NULL) 2725 return (EINVAL); 2726 2727 /* 2728 * Find a key to lookup carp_mc_entry. We have to do this 2729 * before calling ether_delmulti for obvious reason. 2730 */ 2731 if ((error = ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi)) != 0) 2732 return (error); 2733 ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, enm); 2734 if (enm == NULL) 2735 return (EINVAL); 2736 2737 LIST_FOREACH(mc, &sc->carp_mc_listhead, mc_entries) 2738 if (mc->mc_enm == enm) 2739 break; 2740 2741 /* We won't delete entries we didn't add */ 2742 if (mc == NULL) 2743 return (EINVAL); 2744 2745 error = ether_delmulti(ifr, (struct arpcom *)&sc->sc_ac); 2746 if (error != ENETRESET) 2747 return (error); 2748 2749 /* We no longer use this multicast address. Tell parent so. */ 2750 error = (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)ifr); 2751 if (error == 0) { 2752 /* And forget about this address. */ 2753 LIST_REMOVE(mc, mc_entries); 2754 free(mc, M_DEVBUF); 2755 } else 2756 (void)ether_addmulti(ifr, (struct arpcom *)&sc->sc_ac); 2757 return (error); 2758 } 2759 2760 /* 2761 * Delete any multicast address we have asked to add from parent 2762 * interface. Called when the carp is being unconfigured. 2763 */ 2764 void 2765 carp_ether_purgemulti(struct carp_softc *sc) 2766 { 2767 struct ifnet *ifp = sc->sc_carpdev; /* Parent. */ 2768 struct carp_mc_entry *mc; 2769 union { 2770 struct ifreq ifreq; 2771 struct { 2772 char ifr_name[IFNAMSIZ]; 2773 struct sockaddr_storage ifr_ss; 2774 } ifreq_storage; 2775 } u; 2776 struct ifreq *ifr = &u.ifreq; 2777 2778 if (ifp == NULL) 2779 return; 2780 2781 memcpy(ifr->ifr_name, ifp->if_xname, IFNAMSIZ); 2782 while ((mc = LIST_FIRST(&sc->carp_mc_listhead)) != NULL) { 2783 memcpy(&ifr->ifr_addr, &mc->mc_addr, mc->mc_addr.ss_len); 2784 (void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)ifr); 2785 LIST_REMOVE(mc, mc_entries); 2786 free(mc, M_DEVBUF); 2787 } 2788 } 2789